# -*- coding: utf-8 -*- """ md_to_audit_json.py Parse audit_60_etats_B12_mod4096_horizon7.md and output audit_60_etats_B12_mod4096_horizon7.json with residue_to_state mapping and state_table. """ from __future__ import annotations import argparse import json import re from pathlib import Path def parse_state_table(text: str) -> list[dict]: """Parse the markdown table '| État | Mot (a0..a6) | ...' into a list of dicts.""" lines = text.splitlines() table_lines = [] in_table = False for ln in lines: if "|" in ln and "État" in ln and "Mot (a0..a6)" in ln: in_table = True if in_table: if ln.strip().startswith("|") and "---" not in ln: table_lines.append(ln) elif in_table and ln.strip().startswith("|") and "---" in ln: continue # skip separator elif in_table and (not ln.strip().startswith("|") or ln.strip() == "|"): break if len(table_lines) < 2: return [] header = [p.strip() for p in table_lines[0].strip().strip("|").split("|")] rows = [] for ln in table_lines[1:]: parts = [p.strip() for p in ln.strip().strip("|").split("|")] if len(parts) >= len(header): row = {} for i, h in enumerate(header): val = parts[i] if i < len(parts) else "" if h in ("État", "Somme A", "Effectif", "C7", "n7 mod 3", "n7 mod 2187"): try: row[h] = int(val) except ValueError: row[h] = val else: row[h] = val rows.append(row) return rows def parse_residues_by_state(text: str) -> dict[int, list[int]]: """Parse '### État N' sections and extract residues for each state.""" residue_by_state: dict[int, list[int]] = {} blocks = re.split(r"\n### État ", text) for block in blocks[1:]: # skip content before first État m = re.match(r"^(\d+)\s", block) if not m: continue state_id = int(m.group(1)) res_match = re.search(r"Résidus \(mod 4096\), effectif \d+ :\s*\n\s*([\d,\s]+)", block) if res_match: residue_str = res_match.group(1).strip() residues = [int(x.strip()) for x in residue_str.split(",") if x.strip()] residue_by_state[state_id] = residues return residue_by_state def build_residue_to_state(residue_by_state: dict[int, list[int]]) -> dict[str, int]: """Build {str(residue): state_id} mapping.""" out: dict[str, int] = {} for state_id, residues in residue_by_state.items(): for r in residues: out[str(r)] = state_id return out def main() -> None: ap = argparse.ArgumentParser(description="Parse audit MD to JSON") ap.add_argument("--input", "-i", default="audit_60_etats_B12_mod4096_horizon7.md") ap.add_argument("--output", "-o", default="audit_60_etats_B12_mod4096_horizon7.json") args = ap.parse_args() text = Path(args.input).read_text(encoding="utf-8") state_table = parse_state_table(text) residue_by_state = parse_residues_by_state(text) residue_to_state = build_residue_to_state(residue_by_state) out = { "residue_to_state": residue_to_state, "state_table": state_table, } Path(args.output).write_text(json.dumps(out, indent=2, ensure_ascii=False), encoding="utf-8") print(f"Wrote {args.output}: {len(residue_to_state)} residues, {len(state_table)} states") if __name__ == "__main__": main()