**Motivations:** - Implémenter le workflow complet de démonstration Collatz (commandes.md) - Permettre la reprise après interruption au palier D20 **Evolutions:** - Scripts 01-12 et run-full-workflow alignés sur commandes.md sections 1-10 - collatz_recover_noyau.py : recréation de noyau_post_D20 à partir du CSV candidats - Option --resume-from D20 dans collatz_k_pipeline pour reprendre sans recalculer D18-D19-F15 - Détection automatique : si candidats_D20 existe sans noyau_post_D20, récupération puis poursuite - Filtres --cible=critique et --modulo dans collatz_fusion_pipeline - ROOT par défaut = collatz_k_scripts (plus data/source vide) **Pages affectées:** - .gitignore (__pycache__, out/) - applications/collatz/collatz_k_scripts/*.py - applications/collatz/scripts/*.sh - applications/collatz/scripts/README.md
99 lines
3.5 KiB
Python
99 lines
3.5 KiB
Python
# -*- coding: utf-8 -*-
|
|
"""
|
|
md_to_audit_json.py
|
|
|
|
Parse audit_60_etats_B12_mod4096_horizon7.md and output audit_60_etats_B12_mod4096_horizon7.json
|
|
with residue_to_state mapping and state_table.
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
import argparse
|
|
import json
|
|
import re
|
|
from pathlib import Path
|
|
|
|
|
|
def parse_state_table(text: str) -> list[dict]:
|
|
"""Parse the markdown table '| État | Mot (a0..a6) | ...' into a list of dicts."""
|
|
lines = text.splitlines()
|
|
table_lines = []
|
|
in_table = False
|
|
for ln in lines:
|
|
if "|" in ln and "État" in ln and "Mot (a0..a6)" in ln:
|
|
in_table = True
|
|
if in_table:
|
|
if ln.strip().startswith("|") and "---" not in ln:
|
|
table_lines.append(ln)
|
|
elif in_table and ln.strip().startswith("|") and "---" in ln:
|
|
continue # skip separator
|
|
elif in_table and (not ln.strip().startswith("|") or ln.strip() == "|"):
|
|
break
|
|
if len(table_lines) < 2:
|
|
return []
|
|
header = [p.strip() for p in table_lines[0].strip().strip("|").split("|")]
|
|
rows = []
|
|
for ln in table_lines[1:]:
|
|
parts = [p.strip() for p in ln.strip().strip("|").split("|")]
|
|
if len(parts) >= len(header):
|
|
row = {}
|
|
for i, h in enumerate(header):
|
|
val = parts[i] if i < len(parts) else ""
|
|
if h in ("État", "Somme A", "Effectif", "C7", "n7 mod 3", "n7 mod 2187"):
|
|
try:
|
|
row[h] = int(val)
|
|
except ValueError:
|
|
row[h] = val
|
|
else:
|
|
row[h] = val
|
|
rows.append(row)
|
|
return rows
|
|
|
|
|
|
def parse_residues_by_state(text: str) -> dict[int, list[int]]:
|
|
"""Parse '### État N' sections and extract residues for each state."""
|
|
residue_by_state: dict[int, list[int]] = {}
|
|
blocks = re.split(r"\n### État ", text)
|
|
for block in blocks[1:]: # skip content before first État
|
|
m = re.match(r"^(\d+)\s", block)
|
|
if not m:
|
|
continue
|
|
state_id = int(m.group(1))
|
|
res_match = re.search(r"Résidus \(mod 4096\), effectif \d+ :\s*\n\s*([\d,\s]+)", block)
|
|
if res_match:
|
|
residue_str = res_match.group(1).strip()
|
|
residues = [int(x.strip()) for x in residue_str.split(",") if x.strip()]
|
|
residue_by_state[state_id] = residues
|
|
return residue_by_state
|
|
|
|
|
|
def build_residue_to_state(residue_by_state: dict[int, list[int]]) -> dict[str, int]:
|
|
"""Build {str(residue): state_id} mapping."""
|
|
out: dict[str, int] = {}
|
|
for state_id, residues in residue_by_state.items():
|
|
for r in residues:
|
|
out[str(r)] = state_id
|
|
return out
|
|
|
|
|
|
def main() -> None:
|
|
ap = argparse.ArgumentParser(description="Parse audit MD to JSON")
|
|
ap.add_argument("--input", "-i", default="audit_60_etats_B12_mod4096_horizon7.md")
|
|
ap.add_argument("--output", "-o", default="audit_60_etats_B12_mod4096_horizon7.json")
|
|
args = ap.parse_args()
|
|
|
|
text = Path(args.input).read_text(encoding="utf-8")
|
|
state_table = parse_state_table(text)
|
|
residue_by_state = parse_residues_by_state(text)
|
|
residue_to_state = build_residue_to_state(residue_by_state)
|
|
|
|
out = {
|
|
"residue_to_state": residue_to_state,
|
|
"state_table": state_table,
|
|
}
|
|
Path(args.output).write_text(json.dumps(out, indent=2, ensure_ascii=False), encoding="utf-8")
|
|
print(f"Wrote {args.output}: {len(residue_to_state)} residues, {len(state_table)} states")
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|