algo/applications/collatz/collatz_k_scripts/collatz_verify_universal_clauses.py
ncantu 94280b93fd collatz: extract and verify universal clause candidates (Option A)
**Motivations:**
- Produce a deterministic bridge from C3 local witnesses to universal-clause candidates (D/F) usable in later H4/H5 formalization.

**Root causes:**
- Local witnesses (Lift(B12)-scoped) were not exportable into a versioned, machine-checkable clause artefact.

**Correctifs:**
- Add deterministic extraction with minimal stable modulus lifting for D_exact/F and local encoding for D_brother dependencies.
- Add deterministic verifier checking arithmetic consistency, lifting stability, and brother↔mate relation.

**Evolutions:**
- Version universal clause artefacts (JSON/MD) under `docs/artefacts/collatz/universal_clauses/`.
- Document the feature and reproduction steps in `docs/features/`.

**Pages affectées:**
- applications/collatz/collatz_k_scripts/collatz_extract_universal_clauses.py
- applications/collatz/collatz_k_scripts/collatz_verify_universal_clauses.py
- docs/artefacts/collatz/universal_clauses/*
- docs/features/collatz_universal_clause_extraction_optionA.md
2026-03-09 03:26:07 +01:00

340 lines
13 KiB
Python

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
collatz_verify_universal_clauses.py
Deterministic verifier for the extracted clause artefact produced by
collatz_extract_universal_clauses.py.
Scope (Option A):
- check internal arithmetic consistency for each extracted D_exact / F clause
against the originating C3 witness record
- check deterministic lifting (stable residue) preserves the prefix word and
affine constants on the canonical representative
- check D_brother_local records point to an existing D_exact mate and match the
"sister" relation at the domain palier (xor 2^(m-1))
Outputs:
- docs/artefacts/collatz/universal_clauses/verification_universal_clauses.json
- docs/artefacts/collatz/universal_clauses/verification_universal_clauses.md
"""
from __future__ import annotations
import argparse
import json
from pathlib import Path
from collatz_k_core import (
N0_D,
Nf_F,
U_step,
delta_F,
fusion_choice_a,
prefix_data,
preimage_m,
)
def _read_json(path: Path) -> object:
return json.loads(path.read_text(encoding="utf-8", errors="strict"))
def _display_path(path: Path, repo_root: Path) -> str:
try:
rel = path.resolve().relative_to(repo_root.resolve())
return str(rel)
except ValueError:
return str(path)
def _req_int(obj: dict[str, object], key: str) -> int:
v = obj.get(key)
if not isinstance(v, int):
raise ValueError(f"Expected int for {key}")
return v
def _opt_int(obj: dict[str, object], key: str) -> int | None:
v = obj.get(key)
if v is None:
return None
if not isinstance(v, int):
raise ValueError(f"Expected int|null for {key}")
return v
def _opt_str(obj: dict[str, object], key: str) -> str | None:
v = obj.get(key)
if v is None:
return None
if not isinstance(v, str):
raise ValueError(f"Expected str|null for {key}")
return v
def _opt_word(obj: dict[str, object], key: str) -> tuple[int, ...] | None:
v = obj.get(key)
if v is None:
return None
if not isinstance(v, list) or not all(isinstance(x, int) for x in v):
raise ValueError(f"Expected list[int]|null for {key}")
return tuple(v)
def run(
*,
verification_json: Path,
clauses_json: Path,
output_dir: Path,
repo_root: Path,
) -> None:
vobj = _read_json(verification_json)
if not isinstance(vobj, dict):
raise ValueError("Invalid C3 verification JSON: expected object")
domain = vobj.get("domain")
records = vobj.get("records")
if not isinstance(domain, dict) or not isinstance(records, list):
raise ValueError("Invalid C3 verification JSON: missing domain/records")
palier = domain.get("palier")
if not isinstance(palier, int):
raise ValueError("Invalid C3 verification JSON: domain.palier must be int")
cobj = _read_json(clauses_json)
if not isinstance(cobj, dict):
raise ValueError("Invalid clauses JSON: expected object")
clauses = cobj.get("clauses")
if not isinstance(clauses, list) or not all(isinstance(x, dict) for x in clauses):
raise ValueError("Invalid clauses JSON: missing clauses list")
by_source_n: dict[int, dict[str, object]] = {}
for c in clauses:
sn = c.get("source_n")
if not isinstance(sn, int):
raise ValueError("Invalid clause: missing source_n")
if sn in by_source_n:
raise ValueError(f"Duplicate source_n in clauses JSON: {sn}")
by_source_n[sn] = c
by_record_n: dict[int, dict[str, object]] = {}
for r in records:
if not isinstance(r, dict):
raise ValueError("Invalid C3 record: expected object")
n = r.get("n")
if not isinstance(n, int):
raise ValueError("Invalid C3 record: missing n")
if n in by_record_n:
raise ValueError(f"Duplicate n in C3 records: {n}")
by_record_n[n] = r
missing = sorted(set(by_record_n.keys()) - set(by_source_n.keys()))
if missing:
raise ValueError(f"Missing extracted clauses for {len(missing)} records (e.g. {missing[:20]})")
ok_records = 0
kind_counts: dict[str, int] = {"D_exact": 0, "F": 0, "D_brother_local": 0}
max_stable_mod: int | None = None
for n, r in by_record_n.items():
c = by_source_n[n]
kind = c.get("kind")
if not isinstance(kind, str):
raise ValueError(f"Invalid clause for n={n}: missing kind")
if kind not in kind_counts:
raise ValueError(f"Unknown clause kind for n={n}: {kind}")
kind_counts[kind] += 1
domain_palier = _req_int(c, "domain_palier")
if domain_palier != palier:
raise ValueError(f"Clause domain_palier mismatch for n={n}: {domain_palier} != {palier}")
word = _opt_word(c, "word")
A = _opt_int(c, "A")
C = _opt_int(c, "C")
k_or_t = _opt_int(c, "k_or_t")
m_stab = _opt_int(c, "stable_modulus_power")
r_stab = _opt_int(c, "stable_residue_mod_2p")
if m_stab is not None:
max_stable_mod = m_stab if max_stable_mod is None else max(max_stable_mod, m_stab)
if kind == "D_exact":
if r.get("kind") != "D_exact":
raise ValueError(f"Record kind mismatch for n={n}: expected D_exact")
k = _req_int(r, "k")
A_k = _req_int(r, "A_k")
C_k = _req_int(r, "C_k")
N0 = _req_int(r, "N0")
if k_or_t != k or A != A_k or C != C_k:
raise ValueError(f"D_exact clause params mismatch for n={n}")
if m_stab is None or r_stab is None or word is None:
raise ValueError(f"D_exact clause missing stability fields for n={n}")
if m_stab != A_k + 1:
raise ValueError(f"D_exact stable modulus mismatch for n={n}: {m_stab} != {A_k+1}")
pref = prefix_data(n, k)
if pref.word != word or pref.A != A_k or pref.C != C_k:
raise ValueError(f"D_exact prefix mismatch for n={n}")
pref_stab = prefix_data(r_stab, k)
if pref_stab.word != word or pref_stab.A != A_k or pref_stab.C != C_k:
raise ValueError(f"D_exact lift mismatch for n={n}")
N0_calc = N0_D(C_k, A_k, k)
if N0_calc != N0:
raise ValueError(f"D_exact N0 mismatch for n={n}: {N0_calc} != {N0}")
if n >= N0:
next_n = prefix_data(n, k).y
if not next_n < n:
raise ValueError(f"D_exact descent failed on representative for n={n}: U^k(n)={next_n} not < n")
ok_records += 1
continue
if kind == "F":
if r.get("kind") != "F":
raise ValueError(f"Record kind mismatch for n={n}: expected F")
t = _req_int(r, "t")
a = _req_int(r, "a")
y = _req_int(r, "y")
pre_m = _req_int(r, "preimage_m")
A_t = _req_int(r, "A_t")
C_t = _req_int(r, "C_t")
dF = _req_int(r, "DeltaF")
Nf = _req_int(r, "Nf")
src = r.get("source_csv")
if not isinstance(src, str):
raise ValueError(f"F record missing source_csv for n={n}")
a2 = _opt_int(c, "a")
y2 = _opt_int(c, "y")
pre2 = _opt_int(c, "preimage_m")
dF2 = _opt_int(c, "DeltaF")
Nf2 = _opt_int(c, "Nf")
src2 = _opt_str(c, "source_csv")
if (k_or_t, A, C, a2, y2, pre2, dF2, Nf2, src2) != (t, A_t, C_t, a, y, pre_m, dF, Nf, src):
raise ValueError(f"F clause params mismatch for n={n}")
if m_stab is None or r_stab is None or word is None:
raise ValueError(f"F clause missing stability fields for n={n}")
if m_stab != A_t + 1:
raise ValueError(f"F stable modulus mismatch for n={n}: {m_stab} != {A_t+1}")
pref = prefix_data(n, t)
if pref.word != word or pref.A != A_t or pref.C != C_t or pref.y != y:
raise ValueError(f"F prefix mismatch for n={n}")
pref_stab = prefix_data(r_stab, t)
if pref_stab.word != word or pref_stab.A != A_t or pref_stab.C != C_t or pref_stab.y != y:
raise ValueError(f"F lift mismatch for n={n}")
a_calc = fusion_choice_a(y)
if a_calc is None or a_calc != a:
raise ValueError(f"F a mismatch for n={n}: fusion_choice_a(y)={a_calc} != {a}")
dF_calc = delta_F(A_t, t, a)
if dF_calc != dF:
raise ValueError(f"F DeltaF mismatch for n={n}: {dF_calc} != {dF}")
Nf_calc = Nf_F(C_t, A_t, t, a)
if Nf_calc != Nf:
raise ValueError(f"F Nf mismatch for n={n}: {Nf_calc} != {Nf}")
pre_calc = preimage_m(y, a)
if pre_calc != pre_m:
raise ValueError(f"F preimage mismatch for n={n}: {pre_calc} != {pre_m}")
um, _ = U_step(pre_m)
if um != y:
raise ValueError(f"F witness failed for n={n}: U(preimage_m)={um} != y={y}")
if n >= Nf and not (pre_m < n):
raise ValueError(f"F m<n failed on representative for n={n}: m={pre_m} >= n")
ok_records += 1
continue
if kind == "D_brother_local":
if r.get("kind") != "D_brother":
raise ValueError(f"Record kind mismatch for n={n}: expected D_brother")
mate = _req_int(r, "mate_exact")
mate2 = _opt_int(c, "mate_exact")
if mate2 != mate:
raise ValueError(f"D_brother_local mate mismatch for n={n}")
shift = 1 << (palier - 1)
if (n ^ shift) != mate:
raise ValueError(f"D_brother_local mate relation mismatch for n={n}: n^2^(m-1) != mate_exact")
mate_rec = by_record_n.get(mate)
if not isinstance(mate_rec, dict) or mate_rec.get("kind") != "D_exact":
raise ValueError(f"D_brother_local mate is not a D_exact record for n={n}: mate={mate}")
ok_records += 1
continue
raise ValueError(f"Unhandled kind for n={n}: {kind}")
output_dir.mkdir(parents=True, exist_ok=True)
out_json = output_dir / "verification_universal_clauses.json"
out_md = output_dir / "verification_universal_clauses.md"
summary = {
"inputs": {
"verification_c3_local_descent_json": _display_path(verification_json, repo_root),
"clauses_universelles_json": _display_path(clauses_json, repo_root),
},
"domain": {"palier": palier, "max_stable_modulus_power": max_stable_mod},
"counts": {"ok_records": ok_records, **kind_counts},
"ok": True,
}
out_json.write_text(json.dumps(summary, indent=2, ensure_ascii=False) + "\n", encoding="utf-8")
lines: list[str] = []
lines.append("**Auteur** : Équipe 4NK")
lines.append("")
lines.append("# Vérification déterministe — clauses universelles extraites (Option A)")
lines.append("")
lines.append("## Entrées")
lines.append("")
lines.append(f"- C3 vérification JSON : `{_display_path(verification_json, repo_root)}`")
lines.append(f"- clauses JSON : `{_display_path(clauses_json, repo_root)}`")
lines.append("")
lines.append("## Résultat")
lines.append("")
lines.append(f"- ok : {summary['ok']}")
lines.append(f"- palier (domaine) : 2^{palier}")
if max_stable_mod is not None:
lines.append(f"- max(2^m_stable) observé : 2^{max_stable_mod}")
lines.append("")
lines.append("## Compteurs")
lines.append("")
lines.append(f"- ok_records : {ok_records}")
lines.append(f"- D_exact : {kind_counts['D_exact']}")
lines.append(f"- F : {kind_counts['F']}")
lines.append(f"- D_brother_local : {kind_counts['D_brother_local']}")
lines.append("")
out_md.write_text("\n".join(lines) + "\n", encoding="utf-8")
def main() -> None:
ap = argparse.ArgumentParser(description="Verify extracted universal clauses (Option A)")
ap.add_argument(
"--verification-json",
default="docs/artefacts/collatz/c3_local_descent/verification_c3_local_descent.json",
help="Path to deterministic C3 verification JSON",
)
ap.add_argument(
"--clauses-json",
default="docs/artefacts/collatz/universal_clauses/clauses_universelles.json",
help="Path to extracted clauses JSON",
)
ap.add_argument(
"--output-dir",
default="docs/artefacts/collatz/universal_clauses",
help="Output directory for verification artefacts (JSON + MD)",
)
ap.add_argument(
"--repo-root",
default="",
help="Repository root used to display relative paths in outputs (defaults to current working directory)",
)
args = ap.parse_args()
repo_root = Path(args.repo_root).resolve() if args.repo_root.strip() else Path.cwd().resolve()
run(
verification_json=Path(args.verification_json).resolve(),
clauses_json=Path(args.clauses_json).resolve(),
output_dir=Path(args.output_dir).resolve(),
repo_root=repo_root,
)
if __name__ == "__main__":
main()