# -*- coding: utf-8 -*- """ collatz_audit.py Audit des classes couvertes à partir d'un CSV de candidats. Produit un rapport Markdown avec tailles, distributions et impact par état. Usage: --input CSV_PATH --output MD_PATH [--audit60 JSON_PATH] """ from __future__ import annotations import argparse import csv import json from collections import Counter from pathlib import Path def _find_column(row: dict, *candidates: str) -> str | None: """Return first matching column name from row keys.""" keys = set(row.keys()) for c in candidates: for k in keys: if c in k or k.replace(" ", "").lower() == c.replace(" ", "").lower(): return k return None def load_state_table(audit60_path: str | None) -> dict[int, str]: """Load state_id -> mot_7 from audit60 JSON. Returns {} if not found.""" if not audit60_path or not Path(audit60_path).exists(): return {} try: data = json.loads(Path(audit60_path).read_text(encoding="utf-8")) state_table = data.get("state_table", []) mot_key = "Mot (a0..a6)" etat_key = "État" return { int(row.get(etat_key, 0)): row.get(mot_key, "") for row in state_table if etat_key in row } except (json.JSONDecodeError, KeyError): return {} def run_audit(csv_path: str, out_md_path: str, audit60_path: str | None = None) -> None: """Read CSV, produce audit markdown.""" rows: list[dict] = [] with Path(csv_path).open("r", encoding="utf-8") as f: reader = csv.DictReader(f) for row in reader: rows.append(dict(row)) if not rows: Path(out_md_path).write_text("# Audit (vide)\n\nAucune clause.\n", encoding="utf-8") print(f"Wrote {out_md_path} (empty)") return classe_col = _find_column(rows[0], "classe_mod_2^m", "classe_mod_2^27", "classe_mod_2^28", "classe_mod_2") soeur_col = _find_column(rows[0], "sœur", "soeur") etat_col = _find_column(rows[0], "etat_id", "état_id") clauses: set[int] = set() covered: set[int] = set() etat_counts: Counter[int] = Counter() for r in rows: if classe_col: try: c = int(r.get(classe_col, 0) or 0) clauses.add(c) covered.add(c) except (ValueError, TypeError): pass if soeur_col: try: s = int(r.get(soeur_col, 0) or 0) covered.add(s) except (ValueError, TypeError): pass if etat_col: try: e = int(r.get(etat_col, 0) or 0) etat_counts[e] += 1 except (ValueError, TypeError): pass n_clauses = len(clauses) n_covered = len(covered) name = Path(csv_path).stem state_mot = load_state_table(audit60_path or str(Path(__file__).parent / "audit_60_etats_B12_mod4096_horizon7.json")) lines = [ f"# Audit {name}", "", "## Introduction", "", f"Audit des clauses extraites de {Path(csv_path).name}.", "", "## Résultats globaux", "", f"- Nombre de clauses : {n_clauses}", f"- Classes couvertes (clauses + sœurs) : {n_covered}", f"- États distincts représentés : {len(etat_counts)}", "", ] if etat_counts: lines.extend([ "## Distribution par état (60 états de base)", "", "| état_id | mot_7 | effectif |", "|--------:|:------|--------:|", ]) for etat_id in sorted(etat_counts.keys(), key=lambda x: (-etat_counts[x], x)): mot = state_mot.get(etat_id, "") lines.append(f"| {etat_id:8} | {mot:20} | {etat_counts[etat_id]:8} |") lines.append("") Path(out_md_path).write_text("\n".join(lines), encoding="utf-8") print(f"Wrote {out_md_path}: {n_clauses} clauses, {n_covered} covered") def main() -> None: ap = argparse.ArgumentParser(description="Audit Collatz CSV → Markdown") ap.add_argument("--input", "-i", required=True, help="Input CSV path") ap.add_argument("--output", "-o", required=True, help="Output Markdown path") ap.add_argument("--audit60", help="Path to audit_60_etats JSON (optional)") args = ap.parse_args() run_audit(args.input, args.output, args.audit60) if __name__ == "__main__": main()