I’ve been recording my progress on Chopin Etude op 10 no 1, for a few years now and figured I’d see where I’m at now.
Asked chatgpt for help. I’ve been recording to .m4a
by mac voice memo. I just downloaded my files. I had been using a more or less consistent file naming. Chatgpt generated a quick script for polars
( because I had stopped using pandas
about a year ago ).
And of course for the record I’m aware this in no way reflects mistakes or how well this sounds but only speed. Apparently there is some super interesting research being done on quality too. (Not sure if it is available to the mainstream yet but see a cool blogpost here https://www.launchpad.ai/blog/an-ai-based-method-for-student-piano-performance-assessment ).
In any case for speed, here is what I have. This assumes ffprobe
is available, which comes with brew install ffmpeg
.
from pathlib import Path
workdir = Path().home() / "Dropbox/AudioRecordings/piano"
import polars as pl
durations = process_audio_files(workdir)
durations[:4]
# Out[6]:
[{'filename': '2019-10-15-Chopin-etude-op10-no2.m4a', 'duration': '06:25'},
{'filename': '2023-02-19-chopin-etude-op-10-no-1.m4a', 'duration': '05:13'},
{'filename': '2020-11-02-Chopin-op10-etude1.m4a', 'duration': '06:41'},
{'filename': '2019-10-26-Chopin-etude-op10-no2.m4a', 'duration': '05:42'}]
df = pl.from_dicts(durations)
df.filter(pl.col("filename").str.contains("op-10-no-1")).sort(by="duration")
# Out[13]:
shape: (27, 2)
┌─────────────────────────────────┬──────────┐
│ filename ┆ duration │
│ --- ┆ --- │
│ str ┆ str │
╞═════════════════════════════════╪══════════╡
│ 2023-02-19-chopin-etude-op-10-… ┆ 05:13 │
│ 2023-02-14-chopin-etude-op-10-… ┆ 05:34 │
│ 2023-10-14-Chopin-etude-op-10-… ┆ 05:45 │
│ 2021-04-17-23.08-Chopin-Étude… ┆ 05:53 │
│ 2025-02-22-Chopin-etude-op-10-… ┆ 05:56 │
│ … ┆ … │
│ 2020-08-05-Chopin-etude-op-10-… ┆ 09:05 │
│ 2020-05-30-T1214-Chopin-etude-… ┆ 09:19 │
│ 2020-09-08-Chopin-etude-op-10-… ┆ 09:23 │
│ 2020-06-23-Chopin-etude-op-10-… ┆ 09:29 │
│ 2020-05-30-Chopin-etude-op-10-… ┆ 10:04 │
└─────────────────────────────────┴──────────┘
with pl.Config(tbl_rows=40):
print(df.filter(pl.col("filename").str.contains("op-10-no-1")).sort(by="duration"))
shape: (27, 2)
┌─────────────────────────────────┬──────────┐
│ filename ┆ duration │
│ --- ┆ --- │
│ str ┆ str │
╞═════════════════════════════════╪══════════╡
│ 2023-02-19-chopin-etude-op-10-… ┆ 05:13 │
│ 2023-02-14-chopin-etude-op-10-… ┆ 05:34 │
│ 2023-10-14-Chopin-etude-op-10-… ┆ 05:45 │
│ 2021-04-17-23.08-Chopin-Étude… ┆ 05:53 │
│ 2025-02-22-Chopin-etude-op-10-… ┆ 05:56 │
│ 2021-05-22-Chopin-etude-op-10-… ┆ 05:59 │
│ 2021-02-28-Chopin-etude-op-10-… ┆ 06:06 │
│ 2023-07-01-Chopin-etude-op-10-… ┆ 06:20 │
│ 2020-12-22-Chopin-etude-op-10-… ┆ 06:31 │
│ 2021-03-06-Chopin-etude-op-10-… ┆ 06:37 │
│ 2020-11-02-Chopin-etude-op-10-… ┆ 06:41 │
│ 2021-01-22-Chopin-etude-op-10-… ┆ 06:54 │
│ 2020-11-09-Chopin-etude-op-10-… ┆ 07:00 │
│ 2020-11-29-Chopin-etude-op-10-… ┆ 07:00 │
│ 2020-12-20-Chopin-etude-op-10-… ┆ 07:33 │
│ 2020-10-29-Chopin-etude-op-10-… ┆ 07:37 │
│ 2020-09-27-Chopin-etude-op-10-… ┆ 07:50 │
│ 2020-08-26-Chopin-etude-op-10-… ┆ 07:56 │
│ 2020-10-11-chopin-etude-op-10-… ┆ 08:02 │
│ 2020-12-27-Chopin-etude-op-10-… ┆ 08:12 │
│ 2020-07-12-Chopin-etude-op-10-… ┆ 08:29 │
│ 2020-06-10-chopin-etude-op-10-… ┆ 08:38 │
│ 2020-08-05-Chopin-etude-op-10-… ┆ 09:05 │
│ 2020-05-30-T1214-Chopin-etude-… ┆ 09:19 │
│ 2020-09-08-Chopin-etude-op-10-… ┆ 09:23 │
│ 2020-06-23-Chopin-etude-op-10-… ┆ 09:29 │
│ 2020-05-30-Chopin-etude-op-10-… ┆ 10:04 │
└─────────────────────────────────┴──────────┘
for reference here is the script
import subprocess
import os
def get_audio_duration(file_path):
"""Gets the duration of an audio file using ffprobe."""
if os.path.getsize(file_path) == 0:
return "00:00" # Handle empty files gracefully
try:
result = subprocess.run(
["ffprobe", "-v", "error", "-show_entries", "format=duration",
"-of", "default=noprint_wrappers=1:nokey=1", file_path],
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
text=True
)
duration = float(result.stdout.strip())
minutes = int(duration // 60)
seconds = int(duration % 60)
return f"{minutes:02}:{seconds:02}"
except Exception as e:
return f"Error: {e}"
def process_audio_files(folder_path):
"""Processes all M4A files in a given folder and returns their durations."""
audio_files = [f for f in os.listdir(folder_path) if f.endswith(".m4a")]
audio_durations = []
for file in audio_files:
file_path = os.path.join(folder_path, file)
if os.path.getsize(file_path) == 0:
duration = "00:00" # Skip empty files
else:
duration = get_audio_duration(file_path)
audio_durations.append({"filename": file, "duration": duration})
return audio_durations