**Motivations:** - Make certificates reproducible when CSV columns do not encode the palier - Avoid FileNotFoundError when writing certificates into new folders - Reuse scission in the local H6 generator to avoid duplicated certificate logic **Root causes:** - palier inference relied on max residue value when the class column was generic - scission assumed output directories already exist - empty CSV fields were coerced to 0 **Correctifs:** - Infer palier from explicit columns (palier/m) or filename, keep heuristic fallback - Create parent directory for output JSON - Skip empty class/sister values instead of adding residue 0 **Evolutions:** - Use collatz_scission for certificate generation in local H6 artefacts generator **Pages affectées:** - applications/collatz/collatz_k_scripts/collatz_scission.py - applications/collatz/collatz_k_scripts/collatz_generate_local_h6_artefacts.py - docs/fixKnowledge/collatz_scission_palier_inference_and_output_dirs.md
156 lines
4.7 KiB
Python
156 lines
4.7 KiB
Python
# -*- coding: utf-8 -*-
|
|
"""
|
|
collatz_scission.py
|
|
|
|
Read CSV from collatz pipeline, extract covered classes (classe_mod_2^m and sœur),
|
|
output JSON certificate with clauses, covered set, and residual kernel info.
|
|
|
|
Usage: --input CSV_PATH --output JSON_PATH
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
import argparse
|
|
import csv
|
|
import json
|
|
import re
|
|
from pathlib import Path
|
|
|
|
|
|
def _find_column(row: dict, *candidates: str) -> str | None:
|
|
"""Return first matching column name from row keys."""
|
|
keys = set(row.keys())
|
|
for c in candidates:
|
|
for k in keys:
|
|
if c in k or k.replace(" ", "").lower() == c.replace(" ", "").lower():
|
|
return k
|
|
return None
|
|
|
|
|
|
def _try_parse_int(value: object) -> int | None:
|
|
if value is None:
|
|
return None
|
|
if isinstance(value, int):
|
|
return value
|
|
if isinstance(value, str):
|
|
s = value.strip()
|
|
if not s:
|
|
return None
|
|
try:
|
|
return int(s)
|
|
except ValueError:
|
|
return None
|
|
return None
|
|
|
|
|
|
def infer_palier(rows: list[dict], classe_col: str | None, csv_path: Path | None = None) -> int:
|
|
"""
|
|
Infer modulus power m.
|
|
|
|
Priority order:
|
|
- explicit numeric column 'palier' (or 'm' used as exponent in some CSVs)
|
|
- class column name containing '2^<m>' (e.g. 'classe_mod_2^27')
|
|
- filename containing 'palier2p<m>'
|
|
- fallback heuristic from max class value (legacy; not reliable when values are sparse)
|
|
"""
|
|
if rows:
|
|
pal_col = _find_column(rows[0], "palier")
|
|
if pal_col:
|
|
v = _try_parse_int(rows[0].get(pal_col))
|
|
if v is not None and v > 0:
|
|
return v
|
|
m_col = _find_column(rows[0], "m", "modulus_power")
|
|
if m_col:
|
|
v = _try_parse_int(rows[0].get(m_col))
|
|
if v is not None and v > 0:
|
|
return v
|
|
|
|
if classe_col and ("2^" in classe_col or "2^" in str(classe_col)):
|
|
m = re.search(r"2\^(\d+)", classe_col)
|
|
if m:
|
|
return int(m.group(1))
|
|
|
|
if csv_path is not None:
|
|
m2 = re.search(r"palier2p(\d+)", str(csv_path))
|
|
if m2:
|
|
return int(m2.group(1))
|
|
|
|
if rows and classe_col:
|
|
try:
|
|
vals = [int(r.get(classe_col, 0) or 0) for r in rows if r.get(classe_col)]
|
|
if vals:
|
|
mx = max(vals)
|
|
m = 0
|
|
while (1 << m) <= mx:
|
|
m += 1
|
|
return m
|
|
except (ValueError, TypeError):
|
|
pass
|
|
return 0
|
|
|
|
|
|
def run_scission(csv_path: str, out_json_path: str) -> None:
|
|
"""Read CSV, extract clauses and covered set, write JSON certificate."""
|
|
out_path = Path(out_json_path)
|
|
out_path.parent.mkdir(parents=True, exist_ok=True)
|
|
|
|
rows: list[dict] = []
|
|
with Path(csv_path).open("r", encoding="utf-8") as f:
|
|
reader = csv.DictReader(f)
|
|
for row in reader:
|
|
rows.append(dict(row))
|
|
|
|
if not rows:
|
|
cert = {"clauses": [], "covered": [], "palier": 0}
|
|
out_path.write_text(json.dumps(cert, indent=2), encoding="utf-8")
|
|
print(f"Wrote {out_json_path} (empty)")
|
|
return
|
|
|
|
classe_col = _find_column(rows[0], "classe_mod_2^m", "classe_mod_2^27", "classe_mod_2^28", "classe_mod_2")
|
|
soeur_col = _find_column(rows[0], "sœur", "soeur")
|
|
|
|
clauses: list[int] = []
|
|
covered: set[int] = set()
|
|
|
|
for r in rows:
|
|
if classe_col:
|
|
try:
|
|
raw = r.get(classe_col)
|
|
if raw is not None and str(raw).strip():
|
|
c = int(raw)
|
|
clauses.append(c)
|
|
covered.add(c)
|
|
except (ValueError, TypeError):
|
|
pass
|
|
if soeur_col:
|
|
try:
|
|
raw = r.get(soeur_col)
|
|
if raw is not None and str(raw).strip():
|
|
s = int(raw)
|
|
covered.add(s)
|
|
except (ValueError, TypeError):
|
|
pass
|
|
|
|
clauses = sorted(set(clauses))
|
|
covered_list = sorted(covered)
|
|
palier = infer_palier(rows, classe_col, csv_path=Path(csv_path))
|
|
|
|
cert = {
|
|
"clauses": clauses,
|
|
"covered": covered_list,
|
|
"palier": palier,
|
|
}
|
|
out_path.write_text(json.dumps(cert, indent=2), encoding="utf-8")
|
|
print(f"Wrote {out_json_path}: {len(clauses)} clauses, {len(covered_list)} covered, palier 2^{palier}")
|
|
|
|
|
|
def main() -> None:
|
|
ap = argparse.ArgumentParser(description="Extract scission certificate from Collatz CSV")
|
|
ap.add_argument("--input", "-i", required=True, help="Input CSV path")
|
|
ap.add_argument("--output", "-o", required=True, help="Output JSON path")
|
|
args = ap.parse_args()
|
|
run_scission(args.input, args.output)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|