**Motivations:** - Implémenter le workflow complet de démonstration Collatz (commandes.md) - Permettre la reprise après interruption au palier D20 **Evolutions:** - Scripts 01-12 et run-full-workflow alignés sur commandes.md sections 1-10 - collatz_recover_noyau.py : recréation de noyau_post_D20 à partir du CSV candidats - Option --resume-from D20 dans collatz_k_pipeline pour reprendre sans recalculer D18-D19-F15 - Détection automatique : si candidats_D20 existe sans noyau_post_D20, récupération puis poursuite - Filtres --cible=critique et --modulo dans collatz_fusion_pipeline - ROOT par défaut = collatz_k_scripts (plus data/source vide) **Pages affectées:** - .gitignore (__pycache__, out/) - applications/collatz/collatz_k_scripts/*.py - applications/collatz/scripts/*.sh - applications/collatz/scripts/README.md
142 lines
5.2 KiB
Python
142 lines
5.2 KiB
Python
# -*- coding: utf-8 -*-
|
|
"""
|
|
collatz_fusion_pipeline.py
|
|
|
|
Pipeline de fusion F(t) sur un noyau donné.
|
|
Charge le noyau JSON, appelle build_fusion_clauses pour chaque horizon,
|
|
et fusionne les sorties en un seul CSV.
|
|
|
|
CLI: --horizons 11,12,14 --palier 25 --input-noyau PATH --output CSV_PATH [--audit60 PATH]
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
from collections import Counter
|
|
from pathlib import Path
|
|
import argparse
|
|
import csv
|
|
import json
|
|
import tempfile
|
|
|
|
from collatz_k_fusion import build_fusion_clauses
|
|
from collatz_k_pipeline import load_state_map_60
|
|
|
|
|
|
def load_noyau(path: str) -> list[int]:
|
|
"""Load noyau from JSON: list of residues or dict with R{palier}_after / noyau / residues."""
|
|
data = json.loads(Path(path).read_text(encoding="utf-8"))
|
|
if isinstance(data, list):
|
|
return [int(x) for x in data]
|
|
if isinstance(data, dict):
|
|
for key in ("R25_after", "R24_after", "noyau", "residues", "uncovered"):
|
|
if key in data and isinstance(data[key], list):
|
|
return [int(x) for x in data[key]]
|
|
raise ValueError(f"Noyau JSON: no known key (R25_after, noyau, residues, uncovered) in {list(data.keys())}")
|
|
raise ValueError("Noyau JSON must be a list or dict with residue list")
|
|
|
|
|
|
def _filter_residues_critique(residues: list[int], res_to_state: dict[int, int]) -> list[int]:
|
|
"""Filter residues to those in states with highest count (critical coverage)."""
|
|
state_counts: Counter[int] = Counter()
|
|
for r in residues:
|
|
base = r % 4096
|
|
sid = res_to_state.get(base, 0)
|
|
state_counts[sid] += 1
|
|
if not state_counts:
|
|
return residues
|
|
threshold = max(state_counts.values()) * 0.5
|
|
critical_states = {s for s, c in state_counts.items() if c >= threshold}
|
|
return [r for r in residues if res_to_state.get(r % 4096, 0) in critical_states]
|
|
|
|
|
|
def run_fusion_pipeline(
|
|
horizons: list[int],
|
|
palier: int,
|
|
input_noyau: str,
|
|
output_csv: str,
|
|
audit60_json: str,
|
|
cible: str | None = None,
|
|
modulo: int | None = None,
|
|
) -> None:
|
|
residues = load_noyau(input_noyau)
|
|
res_to_state, state_mot7 = load_state_map_60(audit60_json)
|
|
|
|
if modulo is not None:
|
|
residues = [r for r in residues if r % modulo == 0]
|
|
print(f"Modulo {modulo} filter: {len(residues)} residues")
|
|
if cible == "critique":
|
|
residues = _filter_residues_critique(residues, res_to_state)
|
|
print(f"Cible critique filter: {len(residues)} residues")
|
|
|
|
out_path = Path(output_csv)
|
|
out_path.parent.mkdir(parents=True, exist_ok=True)
|
|
|
|
all_rows: list[dict] = []
|
|
for t in horizons:
|
|
with tempfile.NamedTemporaryFile(mode="w", suffix=".csv", delete=False) as f_csv:
|
|
tmp_csv = f_csv.name
|
|
with tempfile.NamedTemporaryFile(mode="w", suffix=".md", delete=False) as f_md:
|
|
tmp_md = f_md.name
|
|
|
|
try:
|
|
build_fusion_clauses(
|
|
residues,
|
|
t,
|
|
res_to_state,
|
|
state_mot7,
|
|
tmp_md,
|
|
tmp_csv,
|
|
palier,
|
|
)
|
|
with Path(tmp_csv).open("r", encoding="utf-8") as f:
|
|
if Path(tmp_csv).stat().st_size > 0:
|
|
reader = csv.DictReader(f)
|
|
for row in reader:
|
|
row["horizon_t"] = t
|
|
all_rows.append(row)
|
|
finally:
|
|
Path(tmp_csv).unlink(missing_ok=True)
|
|
Path(tmp_md).unlink(missing_ok=True)
|
|
|
|
with out_path.open("w", newline="", encoding="utf-8") as f:
|
|
if all_rows:
|
|
fieldnames = ["horizon_t"] + [k for k in all_rows[0].keys() if k != "horizon_t"]
|
|
w = csv.DictWriter(f, fieldnames=fieldnames, extrasaction="ignore")
|
|
w.writeheader()
|
|
for row in all_rows:
|
|
w.writerow(row)
|
|
else:
|
|
f.write("horizon_t,classe_mod_2^m,m,t,a,A_t,mot_a0..,C_t,y,y_mod_3,DeltaF,Nf,preimage_m,etat_id,base_mod_4096\n")
|
|
|
|
print(f"Wrote merged fusion CSV: {out_path} ({len(all_rows)} rows)")
|
|
|
|
|
|
def main() -> None:
|
|
ap = argparse.ArgumentParser(description="Fusion pipeline: build fusion clauses and merge to CSV")
|
|
ap.add_argument("--horizons", required=True, help="Comma-separated horizons, e.g. 11,12,14")
|
|
ap.add_argument("--palier", type=int, required=True, help="Modulus power (e.g. 25 for 2^25)")
|
|
ap.add_argument("--input-noyau", required=True, help="Path to noyau JSON (list of residues or R*_after)")
|
|
ap.add_argument("--output", required=True, help="Path to output merged CSV")
|
|
ap.add_argument(
|
|
"--audit60",
|
|
default="audit_60_etats_B12_mod4096_horizon7.json",
|
|
help="Path to audit 60 états JSON (residue_to_state, state_table)",
|
|
)
|
|
ap.add_argument("--cible", help="Target filter, e.g. critique")
|
|
ap.add_argument("--modulo", type=int, help="Filter residues by modulo (e.g. 9)")
|
|
args = ap.parse_args()
|
|
|
|
horizons = [int(h.strip()) for h in args.horizons.split(",")]
|
|
run_fusion_pipeline(
|
|
horizons=horizons,
|
|
palier=args.palier,
|
|
input_noyau=args.input_noyau,
|
|
output_csv=args.output,
|
|
audit60_json=args.audit60,
|
|
cible=args.cible,
|
|
modulo=args.modulo,
|
|
)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|