algo/applications/collatz/collatz_k_scripts/collatz_extract_universal_clauses.py
ncantu bd529682bf collatz: add palier2p15/p16 artefacts and Sm refinement tooling
**Motivations:**
- Publish new Collatz palier runs and associated artefacts (C3 local descent, universal clauses, iteration protocol).
- Extend the scripts toolbox to generate/verify clauses and build refinement certificates over S_m.

**Root causes:**
- Universal clause witnesses were lifted to 2^(A+1) even when the witness is already fixed modulo the domain palier, leading to unstable or unnecessarily weak/ambiguous modulus choices.
- CSV palier inference in scission could mis-detect short column names (e.g. "m") by substring matching.

**Correctifs:**
- Lift D_exact/F witnesses to m_stable := max(m, A+1) in universal clause extraction and run reports.
- Make scission palier/m column detection exact-match to avoid false positives.
- Update C3 local descent verification/reporting to use strict fusion witness selection prioritizing lower modular stability and refreshed D/F metrics.
- Add a dedicated run report profile for per-palier universal clauses.

**Evolutions:**
- Add scripts for terminal clauses and minorated descent clauses over S_m, their deterministic verification, and multi-level refinement certificate building.
- Add modular tooling for register_K and incremental comparison of D_minor families.
- Add/update feature documentation for the new pipelines and generated reports.

**Pages affectées:**
- applications/collatz/collatz_k_scripts/README.md
- applications/collatz/collatz_k_scripts/collatz_extract_universal_clauses.py
- applications/collatz/collatz_k_scripts/collatz_generate_run_report.py
- applications/collatz/collatz_k_scripts/collatz_iterate_palier_protocol.py
- applications/collatz/collatz_k_scripts/collatz_scission.py
- applications/collatz/collatz_k_scripts/collatz_verify_c3_local_descent.py
- applications/collatz/collatz_k_scripts/collatz_verify_universal_clauses.py
- applications/collatz/collatz_k_scripts/*refinement*over_Sm*.py
- applications/collatz/collatz_k_scripts/collatz_generate_*clauses_over_Sm.py
- applications/collatz/collatz_k_scripts/collatz_verify_minorated_descent_clauses_over_Sm.py
- applications/collatz/collatz_k_scripts/collatz_build_register_K_modular.py
- applications/collatz/collatz_k_scripts/collatz_compare_dminor_families_incremental.py
- applications/collatz/*.md
- docs/features/*.md
- docs/artefacts/collatz/**
- docs/collatz_run_report_2026-03-09_*.md
2026-03-09 23:29:59 +01:00

397 lines
14 KiB
Python
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
collatz_extract_universal_clauses.py
Option A (Lift(B12)-scoped): convert deterministic *local witnesses* from the C3
verification artefact into *universal clause candidates* (D/F) by lifting each
witnessed residue to the minimal modulus that fixes its prefix of valuations.
Inputs:
- docs/artefacts/collatz/c3_local_descent/verification_c3_local_descent.json
Outputs (versionable, no transcripts):
- docs/artefacts/collatz/universal_clauses/clauses_universelles.json
- docs/artefacts/collatz/universal_clauses/clauses_universelles.md
Notes:
- D_exact and F witnesses are lifted to modulus 2^m_stable, where
m_stable := max(domain_palier, A+1), because the witness is already fixed modulo 2^domain_palier.
- D_brother witnesses are preserved as *local* dependencies (mate_exact) at the
domain palier; they are not asserted as standalone universal reductions here.
"""
from __future__ import annotations
import argparse
import json
from dataclasses import dataclass
from pathlib import Path
from typing import Literal
from collatz_k_core import prefix_data
ClauseKind = Literal["D_exact", "F", "D_brother_local"]
def _read_json(path: Path) -> object:
return json.loads(path.read_text(encoding="utf-8", errors="strict"))
def _display_path(path: Path, repo_root: Path) -> str:
try:
rel = path.resolve().relative_to(repo_root.resolve())
return str(rel)
except ValueError:
return str(path)
@dataclass(frozen=True)
class ExtractedClause:
kind: ClauseKind
domain_palier: int
source_n: int
stable_modulus_power: int | None
stable_residue_mod_2p: int | None
# shared: prefix length + affine constants
k_or_t: int | None
A: int | None
C: int | None
word: tuple[int, ...] | None
# D
N0: int | None
# F
a: int | None
y: int | None
preimage_m: int | None
DeltaF: int | None
Nf: int | None
source_csv: str | None
# brother
mate_exact: int | None
def to_json(self) -> dict[str, object]:
return {
"kind": self.kind,
"domain_palier": self.domain_palier,
"source_n": self.source_n,
"stable_modulus_power": self.stable_modulus_power,
"stable_residue_mod_2p": self.stable_residue_mod_2p,
"k_or_t": self.k_or_t,
"A": self.A,
"C": self.C,
"word": list(self.word) if self.word is not None else None,
"N0": self.N0,
"a": self.a,
"y": self.y,
"preimage_m": self.preimage_m,
"DeltaF": self.DeltaF,
"Nf": self.Nf,
"source_csv": self.source_csv,
"mate_exact": self.mate_exact,
}
def _lift_to_stable_residue(
*,
n0: int,
base_palier: int,
target_palier: int,
prefix_len: int,
expected_word: tuple[int, ...],
) -> int:
"""
Deterministically lift a residue from 2^base_palier to 2^target_palier by choosing,
at each step, the unique child (r or r+2^m) that preserves the expected prefix word.
"""
if target_palier < base_palier:
raise ValueError("target_palier must be >= base_palier")
if prefix_len < 1:
raise ValueError("prefix_len must be >= 1")
r = n0
for m in range(base_palier, target_palier):
shift = 1 << m
c0 = r
c1 = r + shift
w0 = prefix_data(c0, prefix_len).word
w1 = prefix_data(c1, prefix_len).word
ok0 = w0 == expected_word
ok1 = w1 == expected_word
if ok0 and not ok1:
r = c0
elif ok1 and not ok0:
r = c1
else:
raise ValueError(
"Ambiguous lift (expected exactly one child to preserve the prefix word): "
f"m={m}, base_palier={base_palier}, target_palier={target_palier}, n0={n0}, ok0={ok0}, ok1={ok1}"
)
return r
def _format_int_list(values: list[int], per_line: int = 16) -> list[str]:
lines: list[str] = []
for i in range(0, len(values), per_line):
lines.append(", ".join(str(x) for x in values[i : i + per_line]))
return lines
def run(*, verification_json: Path, output_dir: Path, repo_root: Path) -> None:
obj = _read_json(verification_json)
if not isinstance(obj, dict):
raise ValueError("Invalid C3 verification JSON: expected object")
domain = obj.get("domain")
records = obj.get("records")
if not isinstance(domain, dict) or not isinstance(records, list):
raise ValueError("Invalid C3 verification JSON: missing domain/records")
palier = domain.get("palier")
if not isinstance(palier, int):
raise ValueError("Invalid C3 verification JSON: domain.palier must be int")
extracted: list[ExtractedClause] = []
for rec in records:
if not isinstance(rec, dict):
raise ValueError("Invalid C3 record: expected object")
n = rec.get("n")
kind = rec.get("kind")
if not isinstance(n, int) or not isinstance(kind, str):
raise ValueError("Invalid C3 record: missing n/kind")
if kind == "D_exact":
k = rec.get("k")
A_k = rec.get("A_k")
C_k = rec.get("C_k")
N0 = rec.get("N0")
if not all(isinstance(x, int) for x in [k, A_k, C_k, N0]):
raise ValueError(f"Invalid D_exact record for n={n}: missing k/A_k/C_k/N0")
pref = prefix_data(n, k)
if pref.A != A_k or pref.C != C_k:
raise ValueError(f"D_exact mismatch for n={n}: prefix_data differs from record")
m_stab = max(palier, A_k + 1)
r_stab = _lift_to_stable_residue(
n0=n,
base_palier=palier,
target_palier=m_stab,
prefix_len=k,
expected_word=pref.word,
)
pref_stab = prefix_data(r_stab, k)
if pref_stab.word != pref.word or pref_stab.A != pref.A or pref_stab.C != pref.C:
raise ValueError(f"D_exact lift check failed for n={n} at modulus 2^{m_stab}")
extracted.append(
ExtractedClause(
kind="D_exact",
domain_palier=palier,
source_n=n,
stable_modulus_power=m_stab,
stable_residue_mod_2p=r_stab,
k_or_t=k,
A=A_k,
C=C_k,
word=pref.word,
N0=N0,
a=None,
y=None,
preimage_m=None,
DeltaF=None,
Nf=None,
source_csv=None,
mate_exact=None,
)
)
continue
if kind == "F":
t = rec.get("t")
a = rec.get("a")
y = rec.get("y")
preimage_m = rec.get("preimage_m")
A_t = rec.get("A_t")
C_t = rec.get("C_t")
DeltaF = rec.get("DeltaF")
Nf = rec.get("Nf")
source_csv = rec.get("source_csv")
ints_ok = all(isinstance(x, int) for x in [t, a, y, preimage_m, A_t, C_t, DeltaF, Nf])
if not ints_ok or not isinstance(source_csv, str):
raise ValueError(f"Invalid F record for n={n}: missing fields")
pref = prefix_data(n, t)
if pref.A != A_t or pref.C != C_t or pref.y != y:
raise ValueError(f"F mismatch for n={n}: prefix_data differs from record")
m_stab = max(palier, A_t + 1)
r_stab = _lift_to_stable_residue(
n0=n,
base_palier=palier,
target_palier=m_stab,
prefix_len=t,
expected_word=pref.word,
)
pref_stab = prefix_data(r_stab, t)
if pref_stab.word != pref.word or pref_stab.A != pref.A or pref_stab.C != pref.C or pref_stab.y != pref.y:
raise ValueError(f"F lift check failed for n={n} at modulus 2^{m_stab}")
extracted.append(
ExtractedClause(
kind="F",
domain_palier=palier,
source_n=n,
stable_modulus_power=m_stab,
stable_residue_mod_2p=r_stab,
k_or_t=t,
A=A_t,
C=C_t,
word=pref.word,
N0=None,
a=a,
y=y,
preimage_m=preimage_m,
DeltaF=DeltaF,
Nf=Nf,
source_csv=source_csv,
mate_exact=None,
)
)
continue
if kind == "D_brother":
k = rec.get("k")
A_k = rec.get("A_k")
C_k = rec.get("C_k")
mate = rec.get("mate_exact")
if not all(isinstance(x, int) for x in [k, A_k, C_k, mate]):
raise ValueError(f"Invalid D_brother record for n={n}: missing fields")
pref = prefix_data(n, k)
if pref.A != A_k or pref.C != C_k:
raise ValueError(f"D_brother mismatch for n={n}: prefix_data differs from record")
extracted.append(
ExtractedClause(
kind="D_brother_local",
domain_palier=palier,
source_n=n,
stable_modulus_power=None,
stable_residue_mod_2p=None,
k_or_t=k,
A=A_k,
C=C_k,
word=pref.word,
N0=None,
a=None,
y=None,
preimage_m=None,
DeltaF=None,
Nf=None,
source_csv=None,
mate_exact=mate,
)
)
continue
raise ValueError(f"Unknown record kind in C3 verification JSON: {kind}")
extracted = sorted(extracted, key=lambda c: (c.kind, c.source_n))
max_stab = max([c.stable_modulus_power for c in extracted if c.stable_modulus_power is not None], default=None)
out_json = output_dir / "clauses_universelles.json"
out_md = output_dir / "clauses_universelles.md"
output_dir.mkdir(parents=True, exist_ok=True)
summary = {
"inputs": {
"verification_c3_local_descent_json": _display_path(verification_json, repo_root),
},
"domain": {"palier": palier, "max_stable_modulus_power": max_stab},
"counts": {
"total": len(extracted),
"D_exact": sum(1 for c in extracted if c.kind == "D_exact"),
"F": sum(1 for c in extracted if c.kind == "F"),
"D_brother_local": sum(1 for c in extracted if c.kind == "D_brother_local"),
},
"clauses": [c.to_json() for c in extracted],
}
out_json.write_text(json.dumps(summary, indent=2, ensure_ascii=False) + "\n", encoding="utf-8")
# Markdown output (short, human-checkable)
lines: list[str] = []
lines.append("**Auteur** : Équipe 4NK")
lines.append("")
lines.append("# Clauses universelles extraites — Option A (Lift(B12))")
lines.append("")
lines.append("## Entrée")
lines.append("")
lines.append(f"- C3 vérification JSON : `{_display_path(verification_json, repo_root)}`")
lines.append("")
lines.append("## Domaine")
lines.append("")
lines.append(f"- palier (domaine des témoins) : 2^{palier}")
if max_stab is not None:
lines.append(f"- max(2^m_stable) observé : 2^{max_stab}")
lines.append("")
lines.append("## Compteurs")
lines.append("")
lines.append(f"- total : {summary['counts']['total']}")
lines.append(f"- D_exact : {summary['counts']['D_exact']}")
lines.append(f"- F : {summary['counts']['F']}")
lines.append(f"- D_brother_local : {summary['counts']['D_brother_local']}")
lines.append("")
lines.append("## Table (extrait)")
lines.append("")
header = ["kind", "source_n", "m_stable", "residue_mod_2^m", "k/t", "A", "N0/Nf", "mate_exact"]
lines.append("| " + " | ".join(header) + " |")
lines.append("| " + " | ".join(["---"] * len(header)) + " |")
for c in extracted:
m_st = "" if c.stable_modulus_power is None else str(c.stable_modulus_power)
r_st = "" if c.stable_residue_mod_2p is None else str(c.stable_residue_mod_2p)
kt = "" if c.k_or_t is None else str(c.k_or_t)
A = "" if c.A is None else str(c.A)
if c.kind == "D_exact":
thr = "" if c.N0 is None else f"N0={c.N0}"
elif c.kind == "F":
thr = "" if c.Nf is None else f"Nf={c.Nf}"
else:
thr = ""
mate = "" if c.mate_exact is None else str(c.mate_exact)
lines.append(f"| {c.kind} | {c.source_n} | {m_st} | {r_st} | {kt} | {A} | {thr} | {mate} |")
lines.append("")
lines.append("## Notes")
lines.append("")
lines.append(
"- `D_exact` et `F` sont relevées au module \\(2^{m_{stable}}\\) avec "
"\\(m_{stable}=\\max(m, A+1)\\), où \\(m\\) est le palier du domaine, afin de figer le mot de valuations du témoin."
)
lines.append("- `D_brother_local` encode une dépendance locale (palier du domaine) vers un `mate_exact` ; elle nest pas utilisée ici comme clause universelle autonome.")
lines.append("")
out_md.write_text("\n".join(lines) + "\n", encoding="utf-8")
def main() -> None:
ap = argparse.ArgumentParser(description="Extract universal clause candidates from deterministic C3 witnesses (Option A)")
ap.add_argument(
"--verification-json",
default="docs/artefacts/collatz/c3_local_descent/verification_c3_local_descent.json",
help="Path to deterministic C3 verification JSON",
)
ap.add_argument(
"--output-dir",
default="docs/artefacts/collatz/universal_clauses",
help="Output directory for extracted clause artefacts (JSON + MD)",
)
ap.add_argument(
"--repo-root",
default="",
help="Repository root used to display relative paths in outputs (defaults to current working directory)",
)
args = ap.parse_args()
repo_root = Path(args.repo_root).resolve() if args.repo_root.strip() else Path.cwd().resolve()
run(
verification_json=Path(args.verification_json).resolve(),
output_dir=Path(args.output_dir).resolve(),
repo_root=repo_root,
)
if __name__ == "__main__":
main()