# -*- coding: utf-8 -*- """ collatz_k_pipeline.py Pipeline principale (reproduction des audits "après fusion" et des paquets D16/D17). Entrées attendues: - audit_60_etats_B12_mod4096_horizon7.json - complétion_minorée_m15_vers_m16.md - candidats_D10_palier2p17.md Sorties: - audits Markdown - CSV exhaustifs - Markdown listes exhaustives (bloc ```csv```) """ from __future__ import annotations from pathlib import Path import csv import json import re import tempfile from collections import Counter from typing import List, Set, Dict, Tuple, Iterable from collatz_k_core import A_k, prefix_data, N0_D from collatz_k_utils import parse_markdown_table_to_rows, write_text from collatz_k_fusion import build_fusion_clauses def load_state_map_60(audit60_json_path: str) -> Tuple[Dict[int, int], Dict[int, str]]: import json data = json.loads(Path(audit60_json_path).read_text(encoding="utf-8")) res_to_state = {int(k): int(v) for k, v in data["residue_to_state"].items()} state_mot7 = {} for row in data["state_table"]: state_mot7[int(row["État"])] = row["Mot (a0..a6)"] return res_to_state, state_mot7 def build_R17_from_completion(completion_m15_to_m16_md: str) -> List[int]: """Build R17 from completion (Parents both) without D10 subtraction.""" text = Path(completion_m15_to_m16_md).read_text(encoding="utf-8") m = re.search(r"### Parents « both ».*?\n(.*)\Z", text, flags=re.S) if not m: raise ValueError("Section 'Parents both' introuvable") B15 = sorted(set(map(int, re.findall(r"\b\d+\b", m.group(1))))) shift15 = 1 << 15 shift16 = 1 << 16 R16 = set(B15) | {p + shift15 for p in B15} R17 = set(R16) | {x + shift16 for x in R16} return sorted(R17) def rebuild_R17_after_full_D10(completion_m15_to_m16_md: str, candidats_D10_md: str) -> List[int]: text = Path(completion_m15_to_m16_md).read_text(encoding="utf-8") m = re.search(r"### Parents « both ».*?\n(.*)\Z", text, flags=re.S) if not m: raise ValueError("Section 'Parents both' introuvable") B15 = sorted(set(map(int, re.findall(r"\b\d+\b", m.group(1))))) shift15 = 1 << 15 shift16 = 1 << 16 R16 = set(B15) | {p + shift15 for p in B15} R17 = set(R16) | {x + shift16 for x in R16} rows = parse_markdown_table_to_rows(candidats_D10_md) cover175: Set[int] = set() for parts in rows[2:]: low = int(parts[0]) high = int(parts[1]) cover175.add(low) cover175.add(high) R17_after_175 = R17 - cover175 A10_16_high = [x for x in R17_after_175 if A_k(x, 10) == 16] cover171: Set[int] = set() for x in A10_16_high: cover171.add(x) cover171.add(x - shift16) return sorted(R17_after_175 - cover171) def lift_set(residues: Iterable[int], shift: int, count: int) -> List[int]: out: List[int] = [] for r in residues: for j in range(count): out.append(r + j * shift) return out def csv_to_md_list(csv_path: str, md_path: str, title: str, intro: str) -> None: p_csv = Path(csv_path) p_md = Path(md_path) with p_csv.open("r", encoding="utf-8") as fin, p_md.open("w", encoding="utf-8") as fout: fout.write(f"# {title}\n\n") fout.write("## Introduction\n\n") fout.write(intro.strip() + "\n\n") fout.write("## Liste exhaustive\n\n") fout.write("```csv\n") last = "" for line in fin: fout.write(line) last = line if last and not last.endswith("\n"): fout.write("\n") fout.write("```\n") def run_after_fusion_D16_D17( audit60_json: str, completion_m15_to_m16_md: str, candidats_D10_md: str, out_dir: str, ) -> None: Path(out_dir).mkdir(parents=True, exist_ok=True) res_to_state, state_mot7 = load_state_map_60(audit60_json) # R17 après D10 complet R17_after_full = rebuild_R17_after_full_D10(completion_m15_to_m16_md, candidats_D10_md) # D11 (2^19) shift17 = 1 << 17 shift18 = 1 << 18 R19 = lift_set(R17_after_full, shift17, 4) cand11 = set([n for n in R19 if A_k(n, 11) == 18]) cover11 = cand11 | {n ^ shift18 for n in cand11} R19_after = [n for n in R19 if n not in cover11] # D12 (2^21) shift19 = 1 << 19 shift20 = 1 << 20 R21 = lift_set(R19_after, shift19, 4) cand12 = set([n for n in R21 if A_k(n, 12) == 20]) cover12 = cand12 | {n ^ shift20 for n in cand12} R21_after = [n for n in R21 if n not in cover12] # D13 (2^22) shift21 = 1 << 21 R22 = list(R21_after) + [n + shift21 for n in R21_after] cand13 = set([n for n in R22 if A_k(n, 13) == 21]) cover13 = cand13 | {n ^ shift21 for n in cand13} R22_after = [n for n in R22 if n not in cover13] # D14 (2^24) shift22 = 1 << 22 shift23 = 1 << 23 R24 = lift_set(R22_after, shift22, 4) cand14 = set([n for n in R24 if A_k(n, 14) == 23]) cover14 = cand14 | {n ^ shift23 for n in cand14} R24_after = [n for n in R24 if n not in cover14] # D15 (2^25) shift24 = 1 << 24 R25 = list(R24_after) + [n + shift24 for n in R24_after] cand15 = set([n for n in R25 if A_k(n, 15) == 24]) cover15 = cand15 | {n ^ shift24 for n in cand15} R25_after = sorted([n for n in R25 if n not in cover15]) # Fusion (t=11,12,14) au palier 2^25 md_f11 = str(Path(out_dir) / "fusion_t11_palier2p25.md") csv_f11 = str(Path(out_dir) / "fusion_t11_palier2p25.csv") md_f12 = str(Path(out_dir) / "fusion_t12_palier2p25.md") csv_f12 = str(Path(out_dir) / "fusion_t12_palier2p25.csv") md_f14 = str(Path(out_dir) / "fusion_t14_palier2p25.md") csv_f14 = str(Path(out_dir) / "fusion_t14_palier2p25.csv") build_fusion_clauses(R25_after, 11, res_to_state, state_mot7, md_f11, csv_f11, 25) build_fusion_clauses(R25_after, 12, res_to_state, state_mot7, md_f12, csv_f12, 25) build_fusion_clauses(R25_after, 14, res_to_state, state_mot7, md_f14, csv_f14, 25) def load_hitset(csv_path: str) -> Set[int]: hs: Set[int] = set() p = Path(csv_path) if p.stat().st_size == 0: return hs with p.open("r", encoding="utf-8") as f: r = csv.DictReader(f) for row in r: hs.add(int(row["classe_mod_2^m"])) return hs unionF = load_hitset(csv_f11) | load_hitset(csv_f12) | load_hitset(csv_f14) R25_after_F = [n for n in R25_after if n not in unionF] # D16 après fusion (2^27) shift25 = 1 << 25 shift26 = 1 << 26 k16 = 16 A16_target = 26 cand_D16: Set[int] = set() for r in R25_after_F: for j in range(4): n = r + j * shift25 if A_k(n, k16) == A16_target: cand_D16.add(n) cover_D16 = cand_D16 | {n ^ shift26 for n in cand_D16} delta16 = (1 << 26) - (3**16) N0_dist = Counter() csv_d16 = str(Path(out_dir) / "candidats_D16_apres_fusion_palier2p27.csv") with Path(csv_d16).open("w", newline="", encoding="utf-8") as f: w = csv.writer(f) w.writerow(["classe_mod_2^27", "sœur", "mot_a0..a15", "A16", "C16", "delta", "N0", "U^16(n)", "etat_id", "base_mod_4096"]) for n in sorted(cand_D16): pref = prefix_data(n, 16) N0 = N0_D(pref.C, pref.A, 16) N0_dist[N0] += 1 w.writerow([n, n ^ shift26, " ".join(map(str, pref.word)), pref.A, pref.C, delta16, N0, pref.y, res_to_state[n % 4096], n % 4096]) # audit D16 (minimal) maxA16_after = 0 for r in R25_after_F: for j in range(4): n = r + j * shift25 if n in cover_D16: continue A = A_k(n, k16) if A > maxA16_after: maxA16_after = A md_d16 = str(Path(out_dir) / "candidats_D16_apres_fusion_palier2p27_et_impact.md") write_text( md_d16, "\n".join( [ "# Paquet D16 minimal après fusion (palier 2^27)", "", "## Introduction", "", "Audit D16 sur le noyau au palier 2^25 après fusion F(11)∪F(12)∪F(14).", "", "## Tailles", "", f"- noyau après D15 : {len(R25_after)}", f"- noyau après fusion : {len(R25_after_F)}", f"- relèvements 2^27 : {4 * len(R25_after_F)}", f"- candidats D16 : {len(cand_D16)}", f"- couverture (avec sœurs) : {len(cover_D16)}", f"- invariant max A16 après : {maxA16_after}", "", "## CSV exhaustif", "", f"- {Path(csv_d16).name}", "", ] ) + "\n", ) csv_to_md_list( csv_d16, str(Path(out_dir) / "candidats_D16_apres_fusion_palier2p27_liste_exhaustive.md"), "Liste exhaustive des clauses D16 après fusion (palier 2^27)", "Liste exhaustive (format CSV copiable).", ) # D17 après fusion et D16 (2^28) shift27 = 1 << 27 k17 = 17 A17_target = 27 pair_low_set: Set[int] = set() for r in R25_after_F: for j in range(4): low = r + j * shift25 if low in cover_D16: continue if A_k(low, k17) == A17_target or A_k(low + shift27, k17) == A17_target: pair_low_set.add(low) delta17 = (1 << 27) - (3**17) csv_d17 = str(Path(out_dir) / "candidats_D17_apres_fusion_palier2p28.csv") with Path(csv_d17).open("w", newline="", encoding="utf-8") as f: w = csv.writer(f) w.writerow(["classe_mod_2^28", "sœur", "côté", "mot_a0..a16", "A17", "C17", "delta", "N0", "U^17(n)", "etat_id", "base_mod_4096"]) for low in sorted(pair_low_set): high = low + shift27 rep = low if A_k(low, k17) == A17_target else high side = "basse" if rep == low else "haute" pref = prefix_data(rep, 17) N0 = N0_D(pref.C, pref.A, 17) w.writerow([rep, rep ^ shift27, side, " ".join(map(str, pref.word)), pref.A, pref.C, delta17, N0, pref.y, res_to_state[rep % 4096], rep % 4096]) md_d17 = str(Path(out_dir) / "candidats_D17_apres_fusion_palier2p28_et_impact.md") write_text( md_d17, "\n".join( [ "# Paquet D17 minimal après fusion (palier 2^28)", "", "## Introduction", "", "Audit D17 sur le domaine résiduel après fusion et après D16.", "", "## Tailles", "", f"- paires candidates D17 : {len(pair_low_set)}", "", "## CSV exhaustif", "", f"- {Path(csv_d17).name}", "", ] ) + "\n", ) csv_to_md_list( csv_d17, str(Path(out_dir) / "candidats_D17_apres_fusion_palier2p28_liste_exhaustive.md"), "Liste exhaustive des clauses D17 après fusion (palier 2^28)", "Liste exhaustive (format CSV copiable).", ) cover_D17 = {low for low in pair_low_set} | {low + shift27 for low in pair_low_set} all_lifted_28: Set[int] = set() for r in R25_after_F: for j in range(4): low = r + j * shift25 if low in cover_D16: continue all_lifted_28.add(low) all_lifted_28.add(low + shift27) R28_after_D17 = sorted(all_lifted_28 - cover_D17) noyau_path = Path(out_dir) / "noyaux" / "noyau_post_D17.json" noyau_path.parent.mkdir(parents=True, exist_ok=True) noyau_path.write_text( json.dumps({"noyau": R28_after_D17, "palier": 28}), encoding="utf-8", ) def run_extended_D18_to_D21( audit60_json: str, out_dir: str, noyau_post_D17_path: str | None = None, resume_from: str | None = None, ) -> None: """Continue from D17 to D18, D19, F15, D20, F16, D21. resume_from='D20' skips to D20.""" from collatz_fusion_pipeline import run_fusion_pipeline from collatz_scission import run_scission from collatz_update_noyau import run_update_noyau out = Path(out_dir) out.mkdir(parents=True, exist_ok=True) (out / "noyaux").mkdir(exist_ok=True) (out / "candidats").mkdir(exist_ok=True) (out / "certificats").mkdir(exist_ok=True) if resume_from == "D20": prev_noyau = str(out / "noyaux" / "noyau_post_F15.json") if not Path(prev_noyau).exists(): raise FileNotFoundError(f"Resume D20 requires {prev_noyau}") else: noyau_d17 = noyau_post_D17_path or str(out / "noyaux" / "noyau_post_D17.json") if not Path(noyau_d17).exists(): raise FileNotFoundError(f"Run full pipeline first to produce {noyau_d17}") prev_noyau = noyau_d17 if resume_from != "D20": for horizon, palier, valeur, label in [(18, 30, 29, "D18"), (19, 32, 31, "D19")]: run_single_palier( horizon=horizon, palier=palier, valeur=valeur, input_noyau=prev_noyau, output_csv=str(out / "candidats" / f"candidats_{label}_palier2p{palier}.csv"), audit60_json=audit60_json, output_noyau_path=str(out / "noyaux" / f"noyau_post_{label}.json"), ) prev_noyau = str(out / "noyaux" / f"noyau_post_{label}.json") csv_f15 = str(out / "candidats" / "candidats_F15_palier2p32.csv") cert_f15 = str(out / "certificats" / "certificat_F15_palier2p32.json") run_fusion_pipeline( horizons=[15], palier=32, input_noyau=prev_noyau, output_csv=csv_f15, audit60_json=audit60_json, cible="critique", ) run_scission(csv_f15, cert_f15) noyau_f15 = str(out / "noyaux" / "noyau_post_F15.json") run_update_noyau(cert_f15, prev_noyau, noyau_f15) prev_noyau = noyau_f15 csv_d20 = str(out / "candidats" / "candidats_D20_palier2p34.csv") noyau_d20 = str(out / "noyaux" / "noyau_post_D20.json") if Path(noyau_d20).exists(): print(f"Using existing {noyau_d20}") elif Path(csv_d20).exists(): from collatz_recover_noyau import run_recover print("Recovering noyau_post_D20 from existing candidats CSV...") run_recover( previous_noyau=prev_noyau, candidats_csv=csv_d20, palier=34, output=noyau_d20, input_palier=32, ) else: run_single_palier( horizon=20, palier=34, valeur=32, input_noyau=prev_noyau, output_csv=csv_d20, audit60_json=audit60_json, output_noyau_path=noyau_d20, ) prev_noyau = noyau_d20 csv_f16 = str(out / "candidats" / "candidats_F16_palier2p35.csv") cert_f16 = str(out / "certificats" / "certificat_F16_palier2p35.json") run_fusion_pipeline( horizons=[16], palier=35, input_noyau=prev_noyau, output_csv=csv_f16, audit60_json=audit60_json, modulo=9, ) run_scission(csv_f16, cert_f16) noyau_f16 = str(out / "noyaux" / "noyau_post_F16.json") run_update_noyau(cert_f16, prev_noyau, noyau_f16) prev_noyau = noyau_f16 run_single_palier( horizon=21, palier=36, valeur=34, input_noyau=prev_noyau, output_csv=str(out / "candidats" / "candidats_D21_palier2p36.csv"), audit60_json=audit60_json, output_noyau_path=str(out / "noyaux" / "noyau_post_D21.json"), ) def load_noyau(path: str) -> List[int]: """Load noyau from JSON: list of residues or dict with noyau/residues/covered.""" data = json.loads(Path(path).read_text(encoding="utf-8")) if isinstance(data, list): return [int(x) for x in data] if isinstance(data, dict): for key in ("noyau", "residues", "uncovered", "R25_after", "R24_after"): if key in data and isinstance(data[key], list): return [int(x) for x in data[key]] raise ValueError(f"Noyau JSON: no residue list in {path}") def run_single_palier( horizon: int, palier: int, valeur: int, input_noyau: str, output_csv: str, audit60_json: str, output_noyau_path: str | None = None, ) -> None: """ Run a single palier: load noyau, lift to 2^palier, extract D_k candidates with A_k=valeur. """ residues = load_noyau(input_noyau) res_to_state, _ = load_state_map_60(audit60_json) max_r = max(residues) if residues else 0 input_palier = max_r.bit_length() if max_r else 0 curr_shift = 1 << (palier - 1) if palier == 17: prev_shift = 1 << 16 lift_count = 1 elif palier - input_palier >= 2: prev_shift = 1 << input_palier lift_count = 1 << (palier - input_palier) else: prev_shift = 1 << (palier - 1) lift_count = 2 lifted: List[int] = [] for r in residues: for j in range(lift_count): lifted.append(r + j * prev_shift) cand = set(n for n in lifted if A_k(n, horizon) == valeur) cover = cand | {n ^ curr_shift for n in cand} delta = (1 << valeur) - (3**horizon) if (1 << valeur) > (3**horizon) else 0 Path(output_csv).parent.mkdir(parents=True, exist_ok=True) with Path(output_csv).open("w", newline="", encoding="utf-8") as f: w = csv.writer(f) col_palier = f"classe_mod_2^{palier}" w.writerow([col_palier, "sœur", f"mot_a0..a{horizon-1}", f"A{horizon}", f"C{horizon}", "delta", "N0", f"U^{horizon}(n)", "etat_id", "base_mod_4096"]) for n in sorted(cand): pref = prefix_data(n, horizon) N0 = N0_D(pref.C, pref.A, horizon) if delta > 0 else 0 base = n % 4096 etat = res_to_state.get(base, 0) w.writerow([n, n ^ curr_shift, " ".join(map(str, pref.word)), pref.A, pref.C, delta, N0, pref.y, etat, base]) if output_noyau_path: residual = sorted(set(lifted) - cover) Path(output_noyau_path).parent.mkdir(parents=True, exist_ok=True) Path(output_noyau_path).write_text( json.dumps({"noyau": residual, "palier": palier}), encoding="utf-8", ) print(f"Wrote noyau: {output_noyau_path} ({len(residual)} residues)") print(f"Wrote {output_csv}: {len(cand)} candidates, palier 2^{palier}") def main() -> None: import argparse ap = argparse.ArgumentParser(description="Collatz pipeline: full run or single palier") ap.add_argument("--audit60", help="Audit 60 états JSON (for full run)") ap.add_argument("--m15m16", help="Complétion m15→m16 MD (for full run)") ap.add_argument("--d10", help="Candidats D10 MD (for full run)") ap.add_argument("--out", help="Output directory (for full run)") ap.add_argument("--horizon", type=int, help="Horizon k for single palier") ap.add_argument("--palier", type=int, help="Palier m (2^m) for single palier") ap.add_argument("--seuil", default="A_min", help="Seuil type (A_min)") ap.add_argument("--valeur", type=int, help="A_k target value for single palier") ap.add_argument("--input-noyau", help="Input noyau JSON for single palier") ap.add_argument("--output", help="Output CSV for single palier") ap.add_argument("--output-noyau", help="Output residual noyau JSON for next palier") ap.add_argument("--parallel", action="store_true", help="Use parallel mode (placeholder)") ap.add_argument("--threads", type=int, default=1, help="Thread count (placeholder)") ap.add_argument("--extend", action="store_true", help="Run extended D18-D21 pipeline (requires noyau_post_D17)") ap.add_argument("--resume-from", help="Resume from step (e.g. D20) - skip earlier steps") args = ap.parse_args() if args.extend: audit60 = args.audit60 or str(Path(__file__).parent / "audit_60_etats_B12_mod4096_horizon7.json") out_dir = args.out or str(Path(__file__).parent / "out") if not Path(audit60).exists(): raise SystemExit(f"Audit60 not found: {audit60}") run_extended_D18_to_D21( audit60_json=audit60, out_dir=out_dir, resume_from=args.resume_from, ) return if args.horizon is not None and args.palier is not None and args.valeur is not None and args.output: audit60 = args.audit60 or str(Path(__file__).parent / "audit_60_etats_B12_mod4096_horizon7.json") if args.horizon == 10 and args.palier == 17 and args.m15m16: residues = build_R17_from_completion(args.m15m16) elif args.input_noyau: residues = load_noyau(args.input_noyau) else: raise SystemExit("--input-noyau or --m15m16 (for D10) required for single palier mode") if not Path(audit60).exists(): raise SystemExit(f"Audit60 not found: {audit60}") with tempfile.NamedTemporaryFile(mode="w", suffix=".json", delete=False) as tf: json.dump(residues, tf) tmp_noyau = tf.name try: run_single_palier( horizon=args.horizon, palier=args.palier, valeur=args.valeur, input_noyau=tmp_noyau if args.horizon == 10 and args.palier == 17 else args.input_noyau, output_csv=args.output, audit60_json=audit60, output_noyau_path=args.output_noyau, ) finally: if args.horizon == 10 and args.palier == 17: Path(tmp_noyau).unlink(missing_ok=True) elif args.audit60 and args.m15m16 and args.d10 and args.out: run_after_fusion_D16_D17(args.audit60, args.m15m16, args.d10, args.out) else: ap.error("Use either (--audit60 --m15m16 --d10 --out) or (--horizon --palier --valeur --input-noyau --output)") if __name__ == "__main__": main()