# -*- coding: utf-8 -*- """ collatz_scission.py Read CSV from collatz pipeline, extract covered classes (classe_mod_2^m and sœur), output JSON certificate with clauses, covered set, and residual kernel info. Usage: --input CSV_PATH --output JSON_PATH """ from __future__ import annotations import argparse import csv import json import re from pathlib import Path def _find_column(row: dict, *candidates: str) -> str | None: """Return first matching column name from row keys.""" keys = set(row.keys()) for c in candidates: for k in keys: if c in k or k.replace(" ", "").lower() == c.replace(" ", "").lower(): return k return None def _find_column_exact(row: dict, *candidates: str) -> str | None: """ Return the first column whose normalized key equals one of the candidates. Use this for short keys like 'm' where substring matching is unsafe (e.g. 'classe_mod_2^m' contains 'm' but is not an exponent column). """ keys = set(row.keys()) normalized: dict[str, str] = {k.replace(" ", "").lower(): k for k in keys} for c in candidates: ck = c.replace(" ", "").lower() if ck in normalized: return normalized[ck] return None def _try_parse_int(value: object) -> int | None: if value is None: return None if isinstance(value, int): return value if isinstance(value, str): s = value.strip() if not s: return None try: return int(s) except ValueError: return None return None def infer_palier(rows: list[dict], classe_col: str | None, csv_path: Path | None = None) -> int: """ Infer modulus power m. Priority order: - explicit numeric column 'palier' (or 'm' used as exponent in some CSVs) - class column name containing '2^' (e.g. 'classe_mod_2^27') - filename containing 'palier2p' - fallback heuristic from max class value (legacy; not reliable when values are sparse) """ if rows: pal_col = _find_column_exact(rows[0], "palier") if pal_col: v = _try_parse_int(rows[0].get(pal_col)) if v is not None and v > 0: return v m_col = _find_column_exact(rows[0], "m", "modulus_power") if m_col: v = _try_parse_int(rows[0].get(m_col)) if v is not None and v > 0: return v if classe_col and ("2^" in classe_col or "2^" in str(classe_col)): m = re.search(r"2\^(\d+)", classe_col) if m: return int(m.group(1)) if csv_path is not None: m2 = re.search(r"palier2p(\d+)", str(csv_path)) if m2: return int(m2.group(1)) if rows and classe_col: try: vals = [int(r.get(classe_col, 0) or 0) for r in rows if r.get(classe_col)] if vals: mx = max(vals) m = 0 while (1 << m) <= mx: m += 1 return m except (ValueError, TypeError): pass return 0 def run_scission(csv_path: str, out_json_path: str) -> None: """Read CSV, extract clauses and covered set, write JSON certificate.""" out_path = Path(out_json_path) out_path.parent.mkdir(parents=True, exist_ok=True) rows: list[dict] = [] with Path(csv_path).open("r", encoding="utf-8") as f: reader = csv.DictReader(f) for row in reader: rows.append(dict(row)) if not rows: cert = {"clauses": [], "covered": [], "palier": 0} out_path.write_text(json.dumps(cert, indent=2), encoding="utf-8") print(f"Wrote {out_json_path} (empty)") return classe_col = _find_column(rows[0], "classe_mod_2^m", "classe_mod_2^27", "classe_mod_2^28", "classe_mod_2") soeur_col = _find_column(rows[0], "sœur", "soeur") clauses: list[int] = [] covered: set[int] = set() for r in rows: if classe_col: try: raw = r.get(classe_col) if raw is not None and str(raw).strip(): c = int(raw) clauses.append(c) covered.add(c) except (ValueError, TypeError): pass if soeur_col: try: raw = r.get(soeur_col) if raw is not None and str(raw).strip(): s = int(raw) covered.add(s) except (ValueError, TypeError): pass clauses = sorted(set(clauses)) covered_list = sorted(covered) palier = infer_palier(rows, classe_col, csv_path=Path(csv_path)) cert = { "clauses": clauses, "covered": covered_list, "palier": palier, } out_path.write_text(json.dumps(cert, indent=2), encoding="utf-8") print(f"Wrote {out_json_path}: {len(clauses)} clauses, {len(covered_list)} covered, palier 2^{palier}") def main() -> None: ap = argparse.ArgumentParser(description="Extract scission certificate from Collatz CSV") ap.add_argument("--input", "-i", required=True, help="Input CSV path") ap.add_argument("--output", "-o", required=True, help="Output JSON path") args = ap.parse_args() run_scission(args.input, args.output) if __name__ == "__main__": main()