ncantu bd529682bf collatz: add palier2p15/p16 artefacts and Sm refinement tooling
**Motivations:**
- Publish new Collatz palier runs and associated artefacts (C3 local descent, universal clauses, iteration protocol).
- Extend the scripts toolbox to generate/verify clauses and build refinement certificates over S_m.

**Root causes:**
- Universal clause witnesses were lifted to 2^(A+1) even when the witness is already fixed modulo the domain palier, leading to unstable or unnecessarily weak/ambiguous modulus choices.
- CSV palier inference in scission could mis-detect short column names (e.g. "m") by substring matching.

**Correctifs:**
- Lift D_exact/F witnesses to m_stable := max(m, A+1) in universal clause extraction and run reports.
- Make scission palier/m column detection exact-match to avoid false positives.
- Update C3 local descent verification/reporting to use strict fusion witness selection prioritizing lower modular stability and refreshed D/F metrics.
- Add a dedicated run report profile for per-palier universal clauses.

**Evolutions:**
- Add scripts for terminal clauses and minorated descent clauses over S_m, their deterministic verification, and multi-level refinement certificate building.
- Add modular tooling for register_K and incremental comparison of D_minor families.
- Add/update feature documentation for the new pipelines and generated reports.

**Pages affectées:**
- applications/collatz/collatz_k_scripts/README.md
- applications/collatz/collatz_k_scripts/collatz_extract_universal_clauses.py
- applications/collatz/collatz_k_scripts/collatz_generate_run_report.py
- applications/collatz/collatz_k_scripts/collatz_iterate_palier_protocol.py
- applications/collatz/collatz_k_scripts/collatz_scission.py
- applications/collatz/collatz_k_scripts/collatz_verify_c3_local_descent.py
- applications/collatz/collatz_k_scripts/collatz_verify_universal_clauses.py
- applications/collatz/collatz_k_scripts/*refinement*over_Sm*.py
- applications/collatz/collatz_k_scripts/collatz_generate_*clauses_over_Sm.py
- applications/collatz/collatz_k_scripts/collatz_verify_minorated_descent_clauses_over_Sm.py
- applications/collatz/collatz_k_scripts/collatz_build_register_K_modular.py
- applications/collatz/collatz_k_scripts/collatz_compare_dminor_families_incremental.py
- applications/collatz/*.md
- docs/features/*.md
- docs/artefacts/collatz/**
- docs/collatz_run_report_2026-03-09_*.md
2026-03-09 23:29:59 +01:00

172 lines
5.2 KiB
Python

# -*- coding: utf-8 -*-
"""
collatz_scission.py
Read CSV from collatz pipeline, extract covered classes (classe_mod_2^m and sœur),
output JSON certificate with clauses, covered set, and residual kernel info.
Usage: --input CSV_PATH --output JSON_PATH
"""
from __future__ import annotations
import argparse
import csv
import json
import re
from pathlib import Path
def _find_column(row: dict, *candidates: str) -> str | None:
"""Return first matching column name from row keys."""
keys = set(row.keys())
for c in candidates:
for k in keys:
if c in k or k.replace(" ", "").lower() == c.replace(" ", "").lower():
return k
return None
def _find_column_exact(row: dict, *candidates: str) -> str | None:
"""
Return the first column whose normalized key equals one of the candidates.
Use this for short keys like 'm' where substring matching is unsafe
(e.g. 'classe_mod_2^m' contains 'm' but is not an exponent column).
"""
keys = set(row.keys())
normalized: dict[str, str] = {k.replace(" ", "").lower(): k for k in keys}
for c in candidates:
ck = c.replace(" ", "").lower()
if ck in normalized:
return normalized[ck]
return None
def _try_parse_int(value: object) -> int | None:
if value is None:
return None
if isinstance(value, int):
return value
if isinstance(value, str):
s = value.strip()
if not s:
return None
try:
return int(s)
except ValueError:
return None
return None
def infer_palier(rows: list[dict], classe_col: str | None, csv_path: Path | None = None) -> int:
"""
Infer modulus power m.
Priority order:
- explicit numeric column 'palier' (or 'm' used as exponent in some CSVs)
- class column name containing '2^<m>' (e.g. 'classe_mod_2^27')
- filename containing 'palier2p<m>'
- fallback heuristic from max class value (legacy; not reliable when values are sparse)
"""
if rows:
pal_col = _find_column_exact(rows[0], "palier")
if pal_col:
v = _try_parse_int(rows[0].get(pal_col))
if v is not None and v > 0:
return v
m_col = _find_column_exact(rows[0], "m", "modulus_power")
if m_col:
v = _try_parse_int(rows[0].get(m_col))
if v is not None and v > 0:
return v
if classe_col and ("2^" in classe_col or "2^" in str(classe_col)):
m = re.search(r"2\^(\d+)", classe_col)
if m:
return int(m.group(1))
if csv_path is not None:
m2 = re.search(r"palier2p(\d+)", str(csv_path))
if m2:
return int(m2.group(1))
if rows and classe_col:
try:
vals = [int(r.get(classe_col, 0) or 0) for r in rows if r.get(classe_col)]
if vals:
mx = max(vals)
m = 0
while (1 << m) <= mx:
m += 1
return m
except (ValueError, TypeError):
pass
return 0
def run_scission(csv_path: str, out_json_path: str) -> None:
"""Read CSV, extract clauses and covered set, write JSON certificate."""
out_path = Path(out_json_path)
out_path.parent.mkdir(parents=True, exist_ok=True)
rows: list[dict] = []
with Path(csv_path).open("r", encoding="utf-8") as f:
reader = csv.DictReader(f)
for row in reader:
rows.append(dict(row))
if not rows:
cert = {"clauses": [], "covered": [], "palier": 0}
out_path.write_text(json.dumps(cert, indent=2), encoding="utf-8")
print(f"Wrote {out_json_path} (empty)")
return
classe_col = _find_column(rows[0], "classe_mod_2^m", "classe_mod_2^27", "classe_mod_2^28", "classe_mod_2")
soeur_col = _find_column(rows[0], "sœur", "soeur")
clauses: list[int] = []
covered: set[int] = set()
for r in rows:
if classe_col:
try:
raw = r.get(classe_col)
if raw is not None and str(raw).strip():
c = int(raw)
clauses.append(c)
covered.add(c)
except (ValueError, TypeError):
pass
if soeur_col:
try:
raw = r.get(soeur_col)
if raw is not None and str(raw).strip():
s = int(raw)
covered.add(s)
except (ValueError, TypeError):
pass
clauses = sorted(set(clauses))
covered_list = sorted(covered)
palier = infer_palier(rows, classe_col, csv_path=Path(csv_path))
cert = {
"clauses": clauses,
"covered": covered_list,
"palier": palier,
}
out_path.write_text(json.dumps(cert, indent=2), encoding="utf-8")
print(f"Wrote {out_json_path}: {len(clauses)} clauses, {len(covered_list)} covered, palier 2^{palier}")
def main() -> None:
ap = argparse.ArgumentParser(description="Extract scission certificate from Collatz CSV")
ap.add_argument("--input", "-i", required=True, help="Input CSV path")
ap.add_argument("--output", "-o", required=True, help="Output JSON path")
args = ap.parse_args()
run_scission(args.input, args.output)
if __name__ == "__main__":
main()