algo/applications/collatz/collatz_k_scripts/collatz_verify_universal_clauses.py

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
collatz_verify_universal_clauses.py

Deterministic verifier for the extracted clause artefact produced by
collatz_extract_universal_clauses.py.

Scope (Option A):
- check internal arithmetic consistency for each extracted D_exact / F clause
  against the originating C3 witness record
- check deterministic lifting (stable residue) preserves the prefix word and
  affine constants on the canonical representative
- check D_brother_local records point to an existing D_exact mate and match the
  "sister" relation at the domain palier (xor 2^(m-1))

Outputs:
- docs/artefacts/collatz/universal_clauses/verification_universal_clauses.json
- docs/artefacts/collatz/universal_clauses/verification_universal_clauses.md
"""

from __future__ import annotations

import argparse
import json
from pathlib import Path

from collatz_k_core import (
    N0_D,
    Nf_F,
    U_step,
    delta_F,
    fusion_choice_a,
    prefix_data,
    preimage_m,
)


def _read_json(path: Path) -> object:
    return json.loads(path.read_text(encoding="utf-8", errors="strict"))


def _display_path(path: Path, repo_root: Path) -> str:
    try:
        rel = path.resolve().relative_to(repo_root.resolve())
        return str(rel)
    except ValueError:
        return str(path)


def _req_int(obj: dict[str, object], key: str) -> int:
    v = obj.get(key)
    if not isinstance(v, int):
        raise ValueError(f"Expected int for {key}")
    return v


def _opt_int(obj: dict[str, object], key: str) -> int | None:
    v = obj.get(key)
    if v is None:
        return None
    if not isinstance(v, int):
        raise ValueError(f"Expected int|null for {key}")
    return v


def _opt_str(obj: dict[str, object], key: str) -> str | None:
    v = obj.get(key)
    if v is None:
        return None
    if not isinstance(v, str):
        raise ValueError(f"Expected str|null for {key}")
    return v


def _opt_word(obj: dict[str, object], key: str) -> tuple[int, ...] | None:
    v = obj.get(key)
    if v is None:
        return None
    if not isinstance(v, list) or not all(isinstance(x, int) for x in v):
        raise ValueError(f"Expected list[int]|null for {key}")
    return tuple(v)


def run(
    *,
    verification_json: Path,
    clauses_json: Path,
    output_dir: Path,
    repo_root: Path,
) -> None:
    vobj = _read_json(verification_json)
    if not isinstance(vobj, dict):
        raise ValueError("Invalid C3 verification JSON: expected object")
    domain = vobj.get("domain")
    records = vobj.get("records")
    if not isinstance(domain, dict) or not isinstance(records, list):
        raise ValueError("Invalid C3 verification JSON: missing domain/records")
    palier = domain.get("palier")
    if not isinstance(palier, int):
        raise ValueError("Invalid C3 verification JSON: domain.palier must be int")

    cobj = _read_json(clauses_json)
    if not isinstance(cobj, dict):
        raise ValueError("Invalid clauses JSON: expected object")
    clauses = cobj.get("clauses")
    if not isinstance(clauses, list) or not all(isinstance(x, dict) for x in clauses):
        raise ValueError("Invalid clauses JSON: missing clauses list")

    by_source_n: dict[int, dict[str, object]] = {}
    for c in clauses:
        sn = c.get("source_n")
        if not isinstance(sn, int):
            raise ValueError("Invalid clause: missing source_n")
        if sn in by_source_n:
            raise ValueError(f"Duplicate source_n in clauses JSON: {sn}")
        by_source_n[sn] = c

    by_record_n: dict[int, dict[str, object]] = {}
    for r in records:
        if not isinstance(r, dict):
            raise ValueError("Invalid C3 record: expected object")
        n = r.get("n")
        if not isinstance(n, int):
            raise ValueError("Invalid C3 record: missing n")
        if n in by_record_n:
            raise ValueError(f"Duplicate n in C3 records: {n}")
        by_record_n[n] = r

    missing = sorted(set(by_record_n.keys()) - set(by_source_n.keys()))
    if missing:
        raise ValueError(f"Missing extracted clauses for {len(missing)} records (e.g. {missing[:20]})")

    ok_records = 0
    kind_counts: dict[str, int] = {"D_exact": 0, "F": 0, "D_brother_local": 0}
    max_stable_mod: int | None = None

    for n, r in by_record_n.items():
        c = by_source_n[n]
        kind = c.get("kind")
        if not isinstance(kind, str):
            raise ValueError(f"Invalid clause for n={n}: missing kind")
        if kind not in kind_counts:
            raise ValueError(f"Unknown clause kind for n={n}: {kind}")
        kind_counts[kind] += 1

        domain_palier = _req_int(c, "domain_palier")
        if domain_palier != palier:
            raise ValueError(f"Clause domain_palier mismatch for n={n}: {domain_palier} != {palier}")

        word = _opt_word(c, "word")
        A = _opt_int(c, "A")
        C = _opt_int(c, "C")
        k_or_t = _opt_int(c, "k_or_t")
        m_stab = _opt_int(c, "stable_modulus_power")
        r_stab = _opt_int(c, "stable_residue_mod_2p")
        if m_stab is not None:
            max_stable_mod = m_stab if max_stable_mod is None else max(max_stable_mod, m_stab)

        if kind == "D_exact":
            if r.get("kind") != "D_exact":
                raise ValueError(f"Record kind mismatch for n={n}: expected D_exact")
            k = _req_int(r, "k")
            A_k = _req_int(r, "A_k")
            C_k = _req_int(r, "C_k")
            N0 = _req_int(r, "N0")
            if k_or_t != k or A != A_k or C != C_k:
                raise ValueError(f"D_exact clause params mismatch for n={n}")
            if m_stab is None or r_stab is None or word is None:
                raise ValueError(f"D_exact clause missing stability fields for n={n}")
            if m_stab != A_k + 1:
                raise ValueError(f"D_exact stable modulus mismatch for n={n}: {m_stab} != {A_k+1}")
            pref = prefix_data(n, k)
            if pref.word != word or pref.A != A_k or pref.C != C_k:
                raise ValueError(f"D_exact prefix mismatch for n={n}")
            pref_stab = prefix_data(r_stab, k)
            if pref_stab.word != word or pref_stab.A != A_k or pref_stab.C != C_k:
                raise ValueError(f"D_exact lift mismatch for n={n}")
            N0_calc = N0_D(C_k, A_k, k)
            if N0_calc != N0:
                raise ValueError(f"D_exact N0 mismatch for n={n}: {N0_calc} != {N0}")
            if n >= N0:
                next_n = prefix_data(n, k).y
                if not next_n < n:
                    raise ValueError(f"D_exact descent failed on representative for n={n}: U^k(n)={next_n} not < n")
            ok_records += 1
            continue

        if kind == "F":
            if r.get("kind") != "F":
                raise ValueError(f"Record kind mismatch for n={n}: expected F")
            t = _req_int(r, "t")
            a = _req_int(r, "a")
            y = _req_int(r, "y")
            pre_m = _req_int(r, "preimage_m")
            A_t = _req_int(r, "A_t")
            C_t = _req_int(r, "C_t")
            dF = _req_int(r, "DeltaF")
            Nf = _req_int(r, "Nf")
            src = r.get("source_csv")
            if not isinstance(src, str):
                raise ValueError(f"F record missing source_csv for n={n}")

            a2 = _opt_int(c, "a")
            y2 = _opt_int(c, "y")
            pre2 = _opt_int(c, "preimage_m")
            dF2 = _opt_int(c, "DeltaF")
            Nf2 = _opt_int(c, "Nf")
            src2 = _opt_str(c, "source_csv")
            if (k_or_t, A, C, a2, y2, pre2, dF2, Nf2, src2) != (t, A_t, C_t, a, y, pre_m, dF, Nf, src):
                raise ValueError(f"F clause params mismatch for n={n}")
            if m_stab is None or r_stab is None or word is None:
                raise ValueError(f"F clause missing stability fields for n={n}")
            if m_stab != A_t + 1:
                raise ValueError(f"F stable modulus mismatch for n={n}: {m_stab} != {A_t+1}")

            pref = prefix_data(n, t)
            if pref.word != word or pref.A != A_t or pref.C != C_t or pref.y != y:
                raise ValueError(f"F prefix mismatch for n={n}")
            pref_stab = prefix_data(r_stab, t)
            if pref_stab.word != word or pref_stab.A != A_t or pref_stab.C != C_t or pref_stab.y != y:
                raise ValueError(f"F lift mismatch for n={n}")

            a_calc = fusion_choice_a(y)
            if a_calc is None or a_calc != a:
                raise ValueError(f"F a mismatch for n={n}: fusion_choice_a(y)={a_calc} != {a}")
            dF_calc = delta_F(A_t, t, a)
            if dF_calc != dF:
                raise ValueError(f"F DeltaF mismatch for n={n}: {dF_calc} != {dF}")
            Nf_calc = Nf_F(C_t, A_t, t, a)
            if Nf_calc != Nf:
                raise ValueError(f"F Nf mismatch for n={n}: {Nf_calc} != {Nf}")
            pre_calc = preimage_m(y, a)
            if pre_calc != pre_m:
                raise ValueError(f"F preimage mismatch for n={n}: {pre_calc} != {pre_m}")
            um, _ = U_step(pre_m)
            if um != y:
                raise ValueError(f"F witness failed for n={n}: U(preimage_m)={um} != y={y}")
            if n >= Nf and not (pre_m < n):
                raise ValueError(f"F m<n failed on representative for n={n}: m={pre_m} >= n")
            ok_records += 1
            continue

        if kind == "D_brother_local":
            if r.get("kind") != "D_brother":
                raise ValueError(f"Record kind mismatch for n={n}: expected D_brother")
            mate = _req_int(r, "mate_exact")
            mate2 = _opt_int(c, "mate_exact")
            if mate2 != mate:
                raise ValueError(f"D_brother_local mate mismatch for n={n}")
            shift = 1 << (palier - 1)
            if (n ^ shift) != mate:
                raise ValueError(f"D_brother_local mate relation mismatch for n={n}: n^2^(m-1) != mate_exact")
            mate_rec = by_record_n.get(mate)
            if not isinstance(mate_rec, dict) or mate_rec.get("kind") != "D_exact":
                raise ValueError(f"D_brother_local mate is not a D_exact record for n={n}: mate={mate}")
            ok_records += 1
            continue

        raise ValueError(f"Unhandled kind for n={n}: {kind}")

    output_dir.mkdir(parents=True, exist_ok=True)
    out_json = output_dir / "verification_universal_clauses.json"
    out_md = output_dir / "verification_universal_clauses.md"

    summary = {
        "inputs": {
            "verification_c3_local_descent_json": _display_path(verification_json, repo_root),
            "clauses_universelles_json": _display_path(clauses_json, repo_root),
        },
        "domain": {"palier": palier, "max_stable_modulus_power": max_stable_mod},
        "counts": {"ok_records": ok_records, **kind_counts},
        "ok": True,
    }
    out_json.write_text(json.dumps(summary, indent=2, ensure_ascii=False) + "\n", encoding="utf-8")

    lines: list[str] = []
    lines.append("**Auteur** : Équipe 4NK")
    lines.append("")
    lines.append("# Vérification déterministe — clauses universelles extraites (Option A)")
    lines.append("")
    lines.append("## Entrées")
    lines.append("")
    lines.append(f"- C3 vérification JSON : `{_display_path(verification_json, repo_root)}`")
    lines.append(f"- clauses JSON : `{_display_path(clauses_json, repo_root)}`")
    lines.append("")
    lines.append("## Résultat")
    lines.append("")
    lines.append(f"- ok : {summary['ok']}")
    lines.append(f"- palier (domaine) : 2^{palier}")
    if max_stable_mod is not None:
        lines.append(f"- max(2^m_stable) observé : 2^{max_stable_mod}")
    lines.append("")
    lines.append("## Compteurs")
    lines.append("")
    lines.append(f"- ok_records : {ok_records}")
    lines.append(f"- D_exact : {kind_counts['D_exact']}")
    lines.append(f"- F : {kind_counts['F']}")
    lines.append(f"- D_brother_local : {kind_counts['D_brother_local']}")
    lines.append("")
    out_md.write_text("\n".join(lines) + "\n", encoding="utf-8")


def main() -> None:
    ap = argparse.ArgumentParser(description="Verify extracted universal clauses (Option A)")
    ap.add_argument(
        "--verification-json",
        default="docs/artefacts/collatz/c3_local_descent/verification_c3_local_descent.json",
        help="Path to deterministic C3 verification JSON",
    )
    ap.add_argument(
        "--clauses-json",
        default="docs/artefacts/collatz/universal_clauses/clauses_universelles.json",
        help="Path to extracted clauses JSON",
    )
    ap.add_argument(
        "--output-dir",
        default="docs/artefacts/collatz/universal_clauses",
        help="Output directory for verification artefacts (JSON + MD)",
    )
    ap.add_argument(
        "--repo-root",
        default="",
        help="Repository root used to display relative paths in outputs (defaults to current working directory)",
    )
    args = ap.parse_args()

    repo_root = Path(args.repo_root).resolve() if args.repo_root.strip() else Path.cwd().resolve()
    run(
        verification_json=Path(args.verification_json).resolve(),
        clauses_json=Path(args.clauses_json).resolve(),
        output_dir=Path(args.output_dir).resolve(),
        repo_root=repo_root,
    )


if __name__ == "__main__":
    main()