#!/usr/bin/env python3 # -*- coding: utf-8 -*- """ collatz_extract_universal_clauses.py Option A (Lift(B12)-scoped): convert deterministic *local witnesses* from the C3 verification artefact into *universal clause candidates* (D/F) by lifting each witnessed residue to the minimal modulus that fixes its prefix of valuations. Inputs: - docs/artefacts/collatz/c3_local_descent/verification_c3_local_descent.json Outputs (versionable, no transcripts): - docs/artefacts/collatz/universal_clauses/clauses_universelles.json - docs/artefacts/collatz/universal_clauses/clauses_universelles.md Notes: - D_exact and F witnesses are lifted to modulus 2^m_stable, where m_stable := max(domain_palier, A+1), because the witness is already fixed modulo 2^domain_palier. - D_brother witnesses are preserved as *local* dependencies (mate_exact) at the domain palier; they are not asserted as standalone universal reductions here. """ from __future__ import annotations import argparse import json from dataclasses import dataclass from pathlib import Path from typing import Literal from collatz_k_core import prefix_data ClauseKind = Literal["D_exact", "F", "D_brother_local"] def _read_json(path: Path) -> object: return json.loads(path.read_text(encoding="utf-8", errors="strict")) def _display_path(path: Path, repo_root: Path) -> str: try: rel = path.resolve().relative_to(repo_root.resolve()) return str(rel) except ValueError: return str(path) @dataclass(frozen=True) class ExtractedClause: kind: ClauseKind domain_palier: int source_n: int stable_modulus_power: int | None stable_residue_mod_2p: int | None # shared: prefix length + affine constants k_or_t: int | None A: int | None C: int | None word: tuple[int, ...] | None # D N0: int | None # F a: int | None y: int | None preimage_m: int | None DeltaF: int | None Nf: int | None source_csv: str | None # brother mate_exact: int | None def to_json(self) -> dict[str, object]: return { "kind": self.kind, "domain_palier": self.domain_palier, "source_n": self.source_n, "stable_modulus_power": self.stable_modulus_power, "stable_residue_mod_2p": self.stable_residue_mod_2p, "k_or_t": self.k_or_t, "A": self.A, "C": self.C, "word": list(self.word) if self.word is not None else None, "N0": self.N0, "a": self.a, "y": self.y, "preimage_m": self.preimage_m, "DeltaF": self.DeltaF, "Nf": self.Nf, "source_csv": self.source_csv, "mate_exact": self.mate_exact, } def _lift_to_stable_residue( *, n0: int, base_palier: int, target_palier: int, prefix_len: int, expected_word: tuple[int, ...], ) -> int: """ Deterministically lift a residue from 2^base_palier to 2^target_palier by choosing, at each step, the unique child (r or r+2^m) that preserves the expected prefix word. """ if target_palier < base_palier: raise ValueError("target_palier must be >= base_palier") if prefix_len < 1: raise ValueError("prefix_len must be >= 1") r = n0 for m in range(base_palier, target_palier): shift = 1 << m c0 = r c1 = r + shift w0 = prefix_data(c0, prefix_len).word w1 = prefix_data(c1, prefix_len).word ok0 = w0 == expected_word ok1 = w1 == expected_word if ok0 and not ok1: r = c0 elif ok1 and not ok0: r = c1 else: raise ValueError( "Ambiguous lift (expected exactly one child to preserve the prefix word): " f"m={m}, base_palier={base_palier}, target_palier={target_palier}, n0={n0}, ok0={ok0}, ok1={ok1}" ) return r def _format_int_list(values: list[int], per_line: int = 16) -> list[str]: lines: list[str] = [] for i in range(0, len(values), per_line): lines.append(", ".join(str(x) for x in values[i : i + per_line])) return lines def run(*, verification_json: Path, output_dir: Path, repo_root: Path) -> None: obj = _read_json(verification_json) if not isinstance(obj, dict): raise ValueError("Invalid C3 verification JSON: expected object") domain = obj.get("domain") records = obj.get("records") if not isinstance(domain, dict) or not isinstance(records, list): raise ValueError("Invalid C3 verification JSON: missing domain/records") palier = domain.get("palier") if not isinstance(palier, int): raise ValueError("Invalid C3 verification JSON: domain.palier must be int") extracted: list[ExtractedClause] = [] for rec in records: if not isinstance(rec, dict): raise ValueError("Invalid C3 record: expected object") n = rec.get("n") kind = rec.get("kind") if not isinstance(n, int) or not isinstance(kind, str): raise ValueError("Invalid C3 record: missing n/kind") if kind == "D_exact": k = rec.get("k") A_k = rec.get("A_k") C_k = rec.get("C_k") N0 = rec.get("N0") if not all(isinstance(x, int) for x in [k, A_k, C_k, N0]): raise ValueError(f"Invalid D_exact record for n={n}: missing k/A_k/C_k/N0") pref = prefix_data(n, k) if pref.A != A_k or pref.C != C_k: raise ValueError(f"D_exact mismatch for n={n}: prefix_data differs from record") m_stab = max(palier, A_k + 1) r_stab = _lift_to_stable_residue( n0=n, base_palier=palier, target_palier=m_stab, prefix_len=k, expected_word=pref.word, ) pref_stab = prefix_data(r_stab, k) if pref_stab.word != pref.word or pref_stab.A != pref.A or pref_stab.C != pref.C: raise ValueError(f"D_exact lift check failed for n={n} at modulus 2^{m_stab}") extracted.append( ExtractedClause( kind="D_exact", domain_palier=palier, source_n=n, stable_modulus_power=m_stab, stable_residue_mod_2p=r_stab, k_or_t=k, A=A_k, C=C_k, word=pref.word, N0=N0, a=None, y=None, preimage_m=None, DeltaF=None, Nf=None, source_csv=None, mate_exact=None, ) ) continue if kind == "F": t = rec.get("t") a = rec.get("a") y = rec.get("y") preimage_m = rec.get("preimage_m") A_t = rec.get("A_t") C_t = rec.get("C_t") DeltaF = rec.get("DeltaF") Nf = rec.get("Nf") source_csv = rec.get("source_csv") ints_ok = all(isinstance(x, int) for x in [t, a, y, preimage_m, A_t, C_t, DeltaF, Nf]) if not ints_ok or not isinstance(source_csv, str): raise ValueError(f"Invalid F record for n={n}: missing fields") pref = prefix_data(n, t) if pref.A != A_t or pref.C != C_t or pref.y != y: raise ValueError(f"F mismatch for n={n}: prefix_data differs from record") m_stab = max(palier, A_t + 1) r_stab = _lift_to_stable_residue( n0=n, base_palier=palier, target_palier=m_stab, prefix_len=t, expected_word=pref.word, ) pref_stab = prefix_data(r_stab, t) if pref_stab.word != pref.word or pref_stab.A != pref.A or pref_stab.C != pref.C or pref_stab.y != pref.y: raise ValueError(f"F lift check failed for n={n} at modulus 2^{m_stab}") extracted.append( ExtractedClause( kind="F", domain_palier=palier, source_n=n, stable_modulus_power=m_stab, stable_residue_mod_2p=r_stab, k_or_t=t, A=A_t, C=C_t, word=pref.word, N0=None, a=a, y=y, preimage_m=preimage_m, DeltaF=DeltaF, Nf=Nf, source_csv=source_csv, mate_exact=None, ) ) continue if kind == "D_brother": k = rec.get("k") A_k = rec.get("A_k") C_k = rec.get("C_k") mate = rec.get("mate_exact") if not all(isinstance(x, int) for x in [k, A_k, C_k, mate]): raise ValueError(f"Invalid D_brother record for n={n}: missing fields") pref = prefix_data(n, k) if pref.A != A_k or pref.C != C_k: raise ValueError(f"D_brother mismatch for n={n}: prefix_data differs from record") extracted.append( ExtractedClause( kind="D_brother_local", domain_palier=palier, source_n=n, stable_modulus_power=None, stable_residue_mod_2p=None, k_or_t=k, A=A_k, C=C_k, word=pref.word, N0=None, a=None, y=None, preimage_m=None, DeltaF=None, Nf=None, source_csv=None, mate_exact=mate, ) ) continue raise ValueError(f"Unknown record kind in C3 verification JSON: {kind}") extracted = sorted(extracted, key=lambda c: (c.kind, c.source_n)) max_stab = max([c.stable_modulus_power for c in extracted if c.stable_modulus_power is not None], default=None) out_json = output_dir / "clauses_universelles.json" out_md = output_dir / "clauses_universelles.md" output_dir.mkdir(parents=True, exist_ok=True) summary = { "inputs": { "verification_c3_local_descent_json": _display_path(verification_json, repo_root), }, "domain": {"palier": palier, "max_stable_modulus_power": max_stab}, "counts": { "total": len(extracted), "D_exact": sum(1 for c in extracted if c.kind == "D_exact"), "F": sum(1 for c in extracted if c.kind == "F"), "D_brother_local": sum(1 for c in extracted if c.kind == "D_brother_local"), }, "clauses": [c.to_json() for c in extracted], } out_json.write_text(json.dumps(summary, indent=2, ensure_ascii=False) + "\n", encoding="utf-8") # Markdown output (short, human-checkable) lines: list[str] = [] lines.append("**Auteur** : Équipe 4NK") lines.append("") lines.append("# Clauses universelles extraites — Option A (Lift(B12))") lines.append("") lines.append("## Entrée") lines.append("") lines.append(f"- C3 vérification JSON : `{_display_path(verification_json, repo_root)}`") lines.append("") lines.append("## Domaine") lines.append("") lines.append(f"- palier (domaine des témoins) : 2^{palier}") if max_stab is not None: lines.append(f"- max(2^m_stable) observé : 2^{max_stab}") lines.append("") lines.append("## Compteurs") lines.append("") lines.append(f"- total : {summary['counts']['total']}") lines.append(f"- D_exact : {summary['counts']['D_exact']}") lines.append(f"- F : {summary['counts']['F']}") lines.append(f"- D_brother_local : {summary['counts']['D_brother_local']}") lines.append("") lines.append("## Table (extrait)") lines.append("") header = ["kind", "source_n", "m_stable", "residue_mod_2^m", "k/t", "A", "N0/Nf", "mate_exact"] lines.append("| " + " | ".join(header) + " |") lines.append("| " + " | ".join(["---"] * len(header)) + " |") for c in extracted: m_st = "" if c.stable_modulus_power is None else str(c.stable_modulus_power) r_st = "" if c.stable_residue_mod_2p is None else str(c.stable_residue_mod_2p) kt = "" if c.k_or_t is None else str(c.k_or_t) A = "" if c.A is None else str(c.A) if c.kind == "D_exact": thr = "" if c.N0 is None else f"N0={c.N0}" elif c.kind == "F": thr = "" if c.Nf is None else f"Nf={c.Nf}" else: thr = "" mate = "" if c.mate_exact is None else str(c.mate_exact) lines.append(f"| {c.kind} | {c.source_n} | {m_st} | {r_st} | {kt} | {A} | {thr} | {mate} |") lines.append("") lines.append("## Notes") lines.append("") lines.append( "- `D_exact` et `F` sont relevées au module \\(2^{m_{stable}}\\) avec " "\\(m_{stable}=\\max(m, A+1)\\), où \\(m\\) est le palier du domaine, afin de figer le mot de valuations du témoin." ) lines.append("- `D_brother_local` encode une dépendance locale (palier du domaine) vers un `mate_exact` ; elle n’est pas utilisée ici comme clause universelle autonome.") lines.append("") out_md.write_text("\n".join(lines) + "\n", encoding="utf-8") def main() -> None: ap = argparse.ArgumentParser(description="Extract universal clause candidates from deterministic C3 witnesses (Option A)") ap.add_argument( "--verification-json", default="docs/artefacts/collatz/c3_local_descent/verification_c3_local_descent.json", help="Path to deterministic C3 verification JSON", ) ap.add_argument( "--output-dir", default="docs/artefacts/collatz/universal_clauses", help="Output directory for extracted clause artefacts (JSON + MD)", ) ap.add_argument( "--repo-root", default="", help="Repository root used to display relative paths in outputs (defaults to current working directory)", ) args = ap.parse_args() repo_root = Path(args.repo_root).resolve() if args.repo_root.strip() else Path.cwd().resolve() run( verification_json=Path(args.verification_json).resolve(), output_dir=Path(args.output_dir).resolve(), repo_root=repo_root, ) if __name__ == "__main__": main()