algo/applications/collatz/collatz_k_scripts/collatz_extract_universal_clauses.py
ncantu 94280b93fd collatz: extract and verify universal clause candidates (Option A)
**Motivations:**
- Produce a deterministic bridge from C3 local witnesses to universal-clause candidates (D/F) usable in later H4/H5 formalization.

**Root causes:**
- Local witnesses (Lift(B12)-scoped) were not exportable into a versioned, machine-checkable clause artefact.

**Correctifs:**
- Add deterministic extraction with minimal stable modulus lifting for D_exact/F and local encoding for D_brother dependencies.
- Add deterministic verifier checking arithmetic consistency, lifting stability, and brother↔mate relation.

**Evolutions:**
- Version universal clause artefacts (JSON/MD) under `docs/artefacts/collatz/universal_clauses/`.
- Document the feature and reproduction steps in `docs/features/`.

**Pages affectées:**
- applications/collatz/collatz_k_scripts/collatz_extract_universal_clauses.py
- applications/collatz/collatz_k_scripts/collatz_verify_universal_clauses.py
- docs/artefacts/collatz/universal_clauses/*
- docs/features/collatz_universal_clause_extraction_optionA.md
2026-03-09 03:26:07 +01:00

393 lines
14 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
collatz_extract_universal_clauses.py
Option A (Lift(B12)-scoped): convert deterministic *local witnesses* from the C3
verification artefact into *universal clause candidates* (D/F) by lifting each
witnessed residue to the minimal modulus that fixes its prefix of valuations.
Inputs:
- docs/artefacts/collatz/c3_local_descent/verification_c3_local_descent.json
Outputs (versionable, no transcripts):
- docs/artefacts/collatz/universal_clauses/clauses_universelles.json
- docs/artefacts/collatz/universal_clauses/clauses_universelles.md
Notes:
- D_exact and F witnesses are lifted to modulus 2^(A+1) where A is the prefix sum.
- D_brother witnesses are preserved as *local* dependencies (mate_exact) at the
domain palier; they are not asserted as standalone universal reductions here.
"""
from __future__ import annotations
import argparse
import json
from dataclasses import dataclass
from pathlib import Path
from typing import Literal
from collatz_k_core import prefix_data
ClauseKind = Literal["D_exact", "F", "D_brother_local"]
def _read_json(path: Path) -> object:
return json.loads(path.read_text(encoding="utf-8", errors="strict"))
def _display_path(path: Path, repo_root: Path) -> str:
try:
rel = path.resolve().relative_to(repo_root.resolve())
return str(rel)
except ValueError:
return str(path)
@dataclass(frozen=True)
class ExtractedClause:
kind: ClauseKind
domain_palier: int
source_n: int
stable_modulus_power: int | None
stable_residue_mod_2p: int | None
# shared: prefix length + affine constants
k_or_t: int | None
A: int | None
C: int | None
word: tuple[int, ...] | None
# D
N0: int | None
# F
a: int | None
y: int | None
preimage_m: int | None
DeltaF: int | None
Nf: int | None
source_csv: str | None
# brother
mate_exact: int | None
def to_json(self) -> dict[str, object]:
return {
"kind": self.kind,
"domain_palier": self.domain_palier,
"source_n": self.source_n,
"stable_modulus_power": self.stable_modulus_power,
"stable_residue_mod_2p": self.stable_residue_mod_2p,
"k_or_t": self.k_or_t,
"A": self.A,
"C": self.C,
"word": list(self.word) if self.word is not None else None,
"N0": self.N0,
"a": self.a,
"y": self.y,
"preimage_m": self.preimage_m,
"DeltaF": self.DeltaF,
"Nf": self.Nf,
"source_csv": self.source_csv,
"mate_exact": self.mate_exact,
}
def _lift_to_stable_residue(
*,
n0: int,
base_palier: int,
target_palier: int,
prefix_len: int,
expected_word: tuple[int, ...],
) -> int:
"""
Deterministically lift a residue from 2^base_palier to 2^target_palier by choosing,
at each step, the unique child (r or r+2^m) that preserves the expected prefix word.
"""
if target_palier < base_palier:
raise ValueError("target_palier must be >= base_palier")
if prefix_len < 1:
raise ValueError("prefix_len must be >= 1")
r = n0
for m in range(base_palier, target_palier):
shift = 1 << m
c0 = r
c1 = r + shift
w0 = prefix_data(c0, prefix_len).word
w1 = prefix_data(c1, prefix_len).word
ok0 = w0 == expected_word
ok1 = w1 == expected_word
if ok0 and not ok1:
r = c0
elif ok1 and not ok0:
r = c1
else:
raise ValueError(
"Ambiguous lift (expected exactly one child to preserve the prefix word): "
f"m={m}, base_palier={base_palier}, target_palier={target_palier}, n0={n0}, ok0={ok0}, ok1={ok1}"
)
return r
def _format_int_list(values: list[int], per_line: int = 16) -> list[str]:
lines: list[str] = []
for i in range(0, len(values), per_line):
lines.append(", ".join(str(x) for x in values[i : i + per_line]))
return lines
def run(*, verification_json: Path, output_dir: Path, repo_root: Path) -> None:
obj = _read_json(verification_json)
if not isinstance(obj, dict):
raise ValueError("Invalid C3 verification JSON: expected object")
domain = obj.get("domain")
records = obj.get("records")
if not isinstance(domain, dict) or not isinstance(records, list):
raise ValueError("Invalid C3 verification JSON: missing domain/records")
palier = domain.get("palier")
if not isinstance(palier, int):
raise ValueError("Invalid C3 verification JSON: domain.palier must be int")
extracted: list[ExtractedClause] = []
for rec in records:
if not isinstance(rec, dict):
raise ValueError("Invalid C3 record: expected object")
n = rec.get("n")
kind = rec.get("kind")
if not isinstance(n, int) or not isinstance(kind, str):
raise ValueError("Invalid C3 record: missing n/kind")
if kind == "D_exact":
k = rec.get("k")
A_k = rec.get("A_k")
C_k = rec.get("C_k")
N0 = rec.get("N0")
if not all(isinstance(x, int) for x in [k, A_k, C_k, N0]):
raise ValueError(f"Invalid D_exact record for n={n}: missing k/A_k/C_k/N0")
pref = prefix_data(n, k)
if pref.A != A_k or pref.C != C_k:
raise ValueError(f"D_exact mismatch for n={n}: prefix_data differs from record")
m_stab = A_k + 1
r_stab = _lift_to_stable_residue(
n0=n,
base_palier=palier,
target_palier=m_stab,
prefix_len=k,
expected_word=pref.word,
)
pref_stab = prefix_data(r_stab, k)
if pref_stab.word != pref.word or pref_stab.A != pref.A or pref_stab.C != pref.C:
raise ValueError(f"D_exact lift check failed for n={n} at modulus 2^{m_stab}")
extracted.append(
ExtractedClause(
kind="D_exact",
domain_palier=palier,
source_n=n,
stable_modulus_power=m_stab,
stable_residue_mod_2p=r_stab,
k_or_t=k,
A=A_k,
C=C_k,
word=pref.word,
N0=N0,
a=None,
y=None,
preimage_m=None,
DeltaF=None,
Nf=None,
source_csv=None,
mate_exact=None,
)
)
continue
if kind == "F":
t = rec.get("t")
a = rec.get("a")
y = rec.get("y")
preimage_m = rec.get("preimage_m")
A_t = rec.get("A_t")
C_t = rec.get("C_t")
DeltaF = rec.get("DeltaF")
Nf = rec.get("Nf")
source_csv = rec.get("source_csv")
ints_ok = all(isinstance(x, int) for x in [t, a, y, preimage_m, A_t, C_t, DeltaF, Nf])
if not ints_ok or not isinstance(source_csv, str):
raise ValueError(f"Invalid F record for n={n}: missing fields")
pref = prefix_data(n, t)
if pref.A != A_t or pref.C != C_t or pref.y != y:
raise ValueError(f"F mismatch for n={n}: prefix_data differs from record")
m_stab = A_t + 1
r_stab = _lift_to_stable_residue(
n0=n,
base_palier=palier,
target_palier=m_stab,
prefix_len=t,
expected_word=pref.word,
)
pref_stab = prefix_data(r_stab, t)
if pref_stab.word != pref.word or pref_stab.A != pref.A or pref_stab.C != pref.C or pref_stab.y != pref.y:
raise ValueError(f"F lift check failed for n={n} at modulus 2^{m_stab}")
extracted.append(
ExtractedClause(
kind="F",
domain_palier=palier,
source_n=n,
stable_modulus_power=m_stab,
stable_residue_mod_2p=r_stab,
k_or_t=t,
A=A_t,
C=C_t,
word=pref.word,
N0=None,
a=a,
y=y,
preimage_m=preimage_m,
DeltaF=DeltaF,
Nf=Nf,
source_csv=source_csv,
mate_exact=None,
)
)
continue
if kind == "D_brother":
k = rec.get("k")
A_k = rec.get("A_k")
C_k = rec.get("C_k")
mate = rec.get("mate_exact")
if not all(isinstance(x, int) for x in [k, A_k, C_k, mate]):
raise ValueError(f"Invalid D_brother record for n={n}: missing fields")
pref = prefix_data(n, k)
if pref.A != A_k or pref.C != C_k:
raise ValueError(f"D_brother mismatch for n={n}: prefix_data differs from record")
extracted.append(
ExtractedClause(
kind="D_brother_local",
domain_palier=palier,
source_n=n,
stable_modulus_power=None,
stable_residue_mod_2p=None,
k_or_t=k,
A=A_k,
C=C_k,
word=pref.word,
N0=None,
a=None,
y=None,
preimage_m=None,
DeltaF=None,
Nf=None,
source_csv=None,
mate_exact=mate,
)
)
continue
raise ValueError(f"Unknown record kind in C3 verification JSON: {kind}")
extracted = sorted(extracted, key=lambda c: (c.kind, c.source_n))
max_stab = max([c.stable_modulus_power for c in extracted if c.stable_modulus_power is not None], default=None)
out_json = output_dir / "clauses_universelles.json"
out_md = output_dir / "clauses_universelles.md"
output_dir.mkdir(parents=True, exist_ok=True)
summary = {
"inputs": {
"verification_c3_local_descent_json": _display_path(verification_json, repo_root),
},
"domain": {"palier": palier, "max_stable_modulus_power": max_stab},
"counts": {
"total": len(extracted),
"D_exact": sum(1 for c in extracted if c.kind == "D_exact"),
"F": sum(1 for c in extracted if c.kind == "F"),
"D_brother_local": sum(1 for c in extracted if c.kind == "D_brother_local"),
},
"clauses": [c.to_json() for c in extracted],
}
out_json.write_text(json.dumps(summary, indent=2, ensure_ascii=False) + "\n", encoding="utf-8")
# Markdown output (short, human-checkable)
lines: list[str] = []
lines.append("**Auteur** : Équipe 4NK")
lines.append("")
lines.append("# Clauses universelles extraites — Option A (Lift(B12))")
lines.append("")
lines.append("## Entrée")
lines.append("")
lines.append(f"- C3 vérification JSON : `{_display_path(verification_json, repo_root)}`")
lines.append("")
lines.append("## Domaine")
lines.append("")
lines.append(f"- palier (domaine des témoins) : 2^{palier}")
if max_stab is not None:
lines.append(f"- max(2^m_stable) observé : 2^{max_stab}")
lines.append("")
lines.append("## Compteurs")
lines.append("")
lines.append(f"- total : {summary['counts']['total']}")
lines.append(f"- D_exact : {summary['counts']['D_exact']}")
lines.append(f"- F : {summary['counts']['F']}")
lines.append(f"- D_brother_local : {summary['counts']['D_brother_local']}")
lines.append("")
lines.append("## Table (extrait)")
lines.append("")
header = ["kind", "source_n", "m_stable", "residue_mod_2^m", "k/t", "A", "N0/Nf", "mate_exact"]
lines.append("| " + " | ".join(header) + " |")
lines.append("| " + " | ".join(["---"] * len(header)) + " |")
for c in extracted:
m_st = "" if c.stable_modulus_power is None else str(c.stable_modulus_power)
r_st = "" if c.stable_residue_mod_2p is None else str(c.stable_residue_mod_2p)
kt = "" if c.k_or_t is None else str(c.k_or_t)
A = "" if c.A is None else str(c.A)
if c.kind == "D_exact":
thr = "" if c.N0 is None else f"N0={c.N0}"
elif c.kind == "F":
thr = "" if c.Nf is None else f"Nf={c.Nf}"
else:
thr = ""
mate = "" if c.mate_exact is None else str(c.mate_exact)
lines.append(f"| {c.kind} | {c.source_n} | {m_st} | {r_st} | {kt} | {A} | {thr} | {mate} |")
lines.append("")
lines.append("## Notes")
lines.append("")
lines.append("- `D_exact` et `F` sont relevées au module minimal \\(2^{A+1}\\) qui fige le mot de valuations du témoin.")
lines.append("- `D_brother_local` encode une dépendance locale (palier du domaine) vers un `mate_exact` ; elle nest pas utilisée ici comme clause universelle autonome.")
lines.append("")
out_md.write_text("\n".join(lines) + "\n", encoding="utf-8")
def main() -> None:
ap = argparse.ArgumentParser(description="Extract universal clause candidates from deterministic C3 witnesses (Option A)")
ap.add_argument(
"--verification-json",
default="docs/artefacts/collatz/c3_local_descent/verification_c3_local_descent.json",
help="Path to deterministic C3 verification JSON",
)
ap.add_argument(
"--output-dir",
default="docs/artefacts/collatz/universal_clauses",
help="Output directory for extracted clause artefacts (JSON + MD)",
)
ap.add_argument(
"--repo-root",
default="",
help="Repository root used to display relative paths in outputs (defaults to current working directory)",
)
args = ap.parse_args()
repo_root = Path(args.repo_root).resolve() if args.repo_root.strip() else Path.cwd().resolve()
run(
verification_json=Path(args.verification_json).resolve(),
output_dir=Path(args.output_dir).resolve(),
repo_root=repo_root,
)
if __name__ == "__main__":
main()