Figured, I’m not good at logseq datalog queries yet, so may as well just read logseq LOGBOOK data using plain python.

And with assist of chat gpt, I have a nice proof of concept.

from pathlib import Path
import polars as pl
import time_log as tl

journals_dir = "mgraphblah/journals"
pattern = "2025_*.md"
out_vec = tl.iterate_across_journals(journals_dir, pattern)
df = pl.from_dicts(out_vec)

how much time I spent on taxes?

per_tag_stats = df.explode("Tags").group_by("Tags").agg(pl.col("Duration (mins)").sum().alias("Total minutes"))
per_tag_stats.filter(pl.col("Tags") == "my taxes/2024")
┌───────────────┬───────────────┐
│ Tags          ┆ Total minutes │
│ ---           ┆ ---           │
│ str           ┆ i64           │
╞═══════════════╪═══════════════╡
│ my taxes/2024 ┆ 260           │
└───────────────┴───────────────┘

Source

time_log.py

import re
import csv
import argparse
from datetime import datetime
from pathlib import Path
import polars as pl



def parse_clock_line(line):
    pattern = r"CLOCK: \[(.*?)\]--\[(.*?)\]"
    match = re.search(pattern, line)
    if match:
        start_str, end_str = match.groups()
        start_dt = datetime.strptime(start_str, "%Y-%m-%d %a %H:%M:%S")
        end_dt = datetime.strptime(end_str, "%Y-%m-%d %a %H:%M:%S")
        duration = end_dt - start_dt
        return start_dt, end_dt, duration
    return None, None, None

def extract_time_blocks(filepath):
    rows = []
    with open(filepath, 'r', encoding='utf-8') as file:
        current_task = None
        for line in file:
            line = line.strip()
            if line.startswith('- '):
                current_task = line[2:]
            elif line.startswith('CLOCK:'):
                start_dt, end_dt, duration = parse_clock_line(line)
                if start_dt and end_dt:
                    rows.append({
                        'Date': start_dt.date(),
                        'Start': start_dt.time(),
                        'End': end_dt.time(),
                        'Duration (mins)': round(duration.total_seconds() / 60),
                        'Task': current_task,
                        'Tags': parse_tags_from_task(current_task),
                    })
    return rows

def parse_tags_from_task(task_line):
    if not task_line:
        return []

    tags = []

    # Match [[tag with spaces]]
    tags += re.findall(r"\[\[([^\]]+)\]\]", task_line)

    # Match #tag (without brackets)
    tags += re.findall(r"#([a-zA-Z0-9/_\-]+)", task_line)

    return tags

def iterate_across_journals(journals_dir, pattern):
    out_vec = []
    for input_md_file in Path(journals_dir).glob(pattern):
        ...
        parsed_rows = extract_time_blocks(input_md_file)

        out_vec.extend(parsed_rows)

    return out_vec