Collatz: pipelines, scripts paliers, docs et fixKnowledge

**Motivations:**
- Conserver l'état des scripts Collatz k, pipelines et démonstration
- Documenter diagnostic D18/D21, errata, plan de preuve et correctif OOM paliers

**Root causes:**
- Consommation mémoire excessive (OOM) sur script paliers finale f16

**Correctifs:**
- Documentation du crash OOM paliers finale f16 et pistes de correction

**Evolutions:**
- Évolutions des pipelines fusion/k, recover/update noyau, script 08-paliers-finale
- Ajout de docs (diagnostic, errata, plan lemmes, fixKnowledge OOM)

**Pages affectées:**
- applications/collatz/collatz_k_scripts/*.py, note.md, requirements.txt
- applications/collatz/collatz_k_scripts/*.md (diagnostic, errata, plan)
- applications/collatz/scripts/08-paliers-finale.sh, README.md
- docs/fixKnowledge/crash_paliers_finale_f16_oom.md
This commit is contained in:
ncantu 2026-03-04 17:19:50 +01:00
parent 14ed1de36b
commit f05f2380ff
12 changed files with 776 additions and 104 deletions

View File

@ -12,6 +12,7 @@ CLI: --horizons 11,12,14 --palier 25 --input-noyau PATH --output CSV_PATH [--aud
from __future__ import annotations from __future__ import annotations
from collections import Counter from collections import Counter
from pathlib import Path from pathlib import Path
from typing import Iterator
import argparse import argparse
import csv import csv
import json import json
@ -34,6 +35,42 @@ def load_noyau(path: str) -> list[int]:
raise ValueError("Noyau JSON must be a list or dict with residue list") raise ValueError("Noyau JSON must be a list or dict with residue list")
def _stream_load_noyau_modulo(path: str, modulo: int) -> list[int]:
"""Stream-parse noyau JSON and return only residues with r % modulo == 0. Use for large files to avoid OOM."""
import ijson
p = Path(path)
if not p.exists():
raise FileNotFoundError(path)
residues: list[int] = []
with p.open("rb") as f:
for x in ijson.items(f, "noyau.item"):
r = int(x)
if r % modulo == 0:
residues.append(r)
return residues
def _stream_load_noyau_modulo_chunked(
path: str, modulo: int, chunk_size: int = 800_000
) -> Iterator[list[int]]:
"""Stream-parse noyau JSON, yield chunks of residues with r % modulo == 0. Use for very large files to avoid OOM."""
import ijson
p = Path(path)
if not p.exists():
raise FileNotFoundError(path)
chunk: list[int] = []
with p.open("rb") as f:
for x in ijson.items(f, "noyau.item"):
r = int(x)
if r % modulo == 0:
chunk.append(r)
if len(chunk) >= chunk_size:
yield chunk
chunk = []
if chunk:
yield chunk
def _filter_residues_critique(residues: list[int], res_to_state: dict[int, int]) -> list[int]: def _filter_residues_critique(residues: list[int], res_to_state: dict[int, int]) -> list[int]:
"""Filter residues to those in states with highest count (critical coverage).""" """Filter residues to those in states with highest count (critical coverage)."""
state_counts: Counter[int] = Counter() state_counts: Counter[int] = Counter()
@ -48,6 +85,50 @@ def _filter_residues_critique(residues: list[int], res_to_state: dict[int, int])
return [r for r in residues if res_to_state.get(r % 4096, 0) in critical_states] return [r for r in residues if res_to_state.get(r % 4096, 0) in critical_states]
def _run_fusion_chunked(
input_noyau: str,
modulo: int,
horizons: list[int],
palier: int,
res_to_state: dict[int, int],
state_mot7: dict[int, str],
out_csv_path: Path,
) -> int:
"""Run fusion pipeline over streamed chunks; write rows directly to out_csv_path. Returns total row count. Used when noyau file is very large."""
fieldnames = ["horizon_t", "classe_mod_2^m", "m", "t", "a", "A_t", "mot_a0..", "C_t", "y", "y_mod_3", "DeltaF", "Nf", "preimage_m", "etat_id", "base_mod_4096"]
total_rows = 0
with out_csv_path.open("w", newline="", encoding="utf-8") as out_f:
writer = csv.DictWriter(out_f, fieldnames=fieldnames, extrasaction="ignore")
writer.writeheader()
for chunk in _stream_load_noyau_modulo_chunked(input_noyau, modulo):
for t in horizons:
with tempfile.NamedTemporaryFile(mode="w", suffix=".csv", delete=False) as f_csv:
tmp_csv = f_csv.name
with tempfile.NamedTemporaryFile(mode="w", suffix=".md", delete=False) as f_md:
tmp_md = f_md.name
try:
build_fusion_clauses(
chunk,
t,
res_to_state,
state_mot7,
tmp_md,
tmp_csv,
palier,
)
with Path(tmp_csv).open("r", encoding="utf-8") as f:
if Path(tmp_csv).stat().st_size > 0:
reader = csv.DictReader(f)
for row in reader:
row["horizon_t"] = t
writer.writerow(row)
total_rows += 1
finally:
Path(tmp_csv).unlink(missing_ok=True)
Path(tmp_md).unlink(missing_ok=True)
return total_rows
def run_fusion_pipeline( def run_fusion_pipeline(
horizons: list[int], horizons: list[int],
palier: int, palier: int,
@ -57,15 +138,40 @@ def run_fusion_pipeline(
cible: str | None = None, cible: str | None = None,
modulo: int | None = None, modulo: int | None = None,
) -> None: ) -> None:
residues = load_noyau(input_noyau) input_path = Path(input_noyau)
size_mb = input_path.stat().st_size / (1024 * 1024) if input_path.exists() else 0
if modulo is not None and size_mb > 500:
if cible == "critique":
raise ValueError("Chunked stream path does not support cible=critique (needs full residue set)")
print(f"F16 chunked path: file {size_mb:.0f} MB, modulo {modulo}", flush=True)
res_to_state, state_mot7 = load_state_map_60(audit60_json) res_to_state, state_mot7 = load_state_map_60(audit60_json)
print("F16 chunked path: state map loaded, starting stream chunks", flush=True)
out_path = Path(output_csv)
out_path.parent.mkdir(parents=True, exist_ok=True)
total_rows = _run_fusion_chunked(
input_noyau=input_noyau,
modulo=modulo,
horizons=horizons,
palier=palier,
res_to_state=res_to_state,
state_mot7=state_mot7,
out_csv_path=out_path,
)
print(f"Stream-loaded noyau (modulo {modulo}), chunked: {total_rows} rows (file size {size_mb:.0f} MB)", flush=True)
print(f"Wrote merged fusion CSV: {out_path} ({total_rows} rows)", flush=True)
return
if modulo is not None:
residues = _stream_load_noyau_modulo(input_noyau, modulo)
print(f"Stream-loaded noyau (modulo {modulo}): {len(residues)} residues (file size {size_mb:.0f} MB)", flush=True)
else:
residues = load_noyau(input_noyau)
if modulo is not None: if modulo is not None:
residues = [r for r in residues if r % modulo == 0] residues = [r for r in residues if r % modulo == 0]
print(f"Modulo {modulo} filter: {len(residues)} residues") print(f"Modulo {modulo} filter: {len(residues)} residues", flush=True)
res_to_state, state_mot7 = load_state_map_60(audit60_json)
if cible == "critique": if cible == "critique":
residues = _filter_residues_critique(residues, res_to_state) residues = _filter_residues_critique(residues, res_to_state)
print(f"Cible critique filter: {len(residues)} residues") print(f"Cible critique filter: {len(residues)} residues", flush=True)
out_path = Path(output_csv) out_path = Path(output_csv)
out_path.parent.mkdir(parents=True, exist_ok=True) out_path.parent.mkdir(parents=True, exist_ok=True)
@ -107,7 +213,7 @@ def run_fusion_pipeline(
else: else:
f.write("horizon_t,classe_mod_2^m,m,t,a,A_t,mot_a0..,C_t,y,y_mod_3,DeltaF,Nf,preimage_m,etat_id,base_mod_4096\n") f.write("horizon_t,classe_mod_2^m,m,t,a,A_t,mot_a0..,C_t,y,y_mod_3,DeltaF,Nf,preimage_m,etat_id,base_mod_4096\n")
print(f"Wrote merged fusion CSV: {out_path} ({len(all_rows)} rows)") print(f"Wrote merged fusion CSV: {out_path} ({len(all_rows)} rows)", flush=True)
def main() -> None: def main() -> None:

View File

@ -20,14 +20,56 @@ from pathlib import Path
import csv import csv
import json import json
import re import re
import sys
import tempfile import tempfile
import time
from collections import Counter from collections import Counter
from typing import List, Set, Dict, Tuple, Iterable from typing import List, Set, Dict, Tuple, Iterable, Optional
from collatz_k_core import A_k, prefix_data, N0_D from collatz_k_core import A_k, prefix_data, N0_D
from collatz_k_utils import parse_markdown_table_to_rows, write_text from collatz_k_utils import parse_markdown_table_to_rows, write_text
from collatz_k_fusion import build_fusion_clauses from collatz_k_fusion import build_fusion_clauses
# When set by run_extended_D18_to_D21, steps log to this file (flush after each line).
_pipeline_log_path: Optional[Path] = None
_original_excepthook: Optional[object] = None
def _get_memory_str() -> str:
"""Return max RSS in MB (Unix). Empty string if unavailable."""
try:
import resource
ru = resource.getrusage(resource.RUSAGE_SELF)
rss = getattr(ru, "ru_maxrss", 0)
if not rss:
return ""
if sys.platform == "darwin":
rss_mb = rss / (1024 * 1024)
else:
rss_mb = rss / 1024
return f"rss_max_mb={rss_mb:.0f}"
except (ImportError, OSError, AttributeError):
return ""
def _log_step(msg: str, out_dir: Optional[Path] = None, memory: bool = False) -> None:
"""Log to stderr and optionally to pipeline log file. Flush so crash leaves trace."""
ts = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())
if memory:
mem = _get_memory_str()
if mem:
msg = f"{msg} {mem}"
line = f"[{ts}] {msg}"
print(line, flush=True)
path = out_dir if isinstance(out_dir, Path) else _pipeline_log_path
if path is not None:
try:
with path.open("a", encoding="utf-8") as f:
f.write(line + "\n")
f.flush()
except OSError:
pass
def load_state_map_60(audit60_json_path: str) -> Tuple[Dict[int, int], Dict[int, str]]: def load_state_map_60(audit60_json_path: str) -> Tuple[Dict[int, int], Dict[int, str]]:
import json import json
@ -342,6 +384,7 @@ def run_extended_D18_to_D21(
resume_from: str | None = None, resume_from: str | None = None,
) -> None: ) -> None:
"""Continue from D17 to D18, D19, F15, D20, F16, D21. resume_from='D20' skips to D20.""" """Continue from D17 to D18, D19, F15, D20, F16, D21. resume_from='D20' skips to D20."""
global _pipeline_log_path
from collatz_fusion_pipeline import run_fusion_pipeline from collatz_fusion_pipeline import run_fusion_pipeline
from collatz_scission import run_scission from collatz_scission import run_scission
from collatz_update_noyau import run_update_noyau from collatz_update_noyau import run_update_noyau
@ -352,18 +395,42 @@ def run_extended_D18_to_D21(
(out / "candidats").mkdir(exist_ok=True) (out / "candidats").mkdir(exist_ok=True)
(out / "certificats").mkdir(exist_ok=True) (out / "certificats").mkdir(exist_ok=True)
log_file = out / "pipeline_extend.log"
_pipeline_log_path = log_file
_log_step(f"START extend pipeline out_dir={out_dir} resume_from={resume_from!r} log={log_file}", memory=True)
def _extend_excepthook(etype: type, value: BaseException, tb: object) -> None:
mem = _get_memory_str()
try:
if _pipeline_log_path and _pipeline_log_path.exists():
with _pipeline_log_path.open("a", encoding="utf-8") as f:
f.write(f"[CRASH] {etype.__name__}: {value} {mem}\n")
f.flush()
except OSError:
pass
sys.__excepthook__(etype, value, tb)
global _original_excepthook
_original_excepthook = sys.excepthook
sys.excepthook = _extend_excepthook # type: ignore[assignment]
if resume_from == "D20": if resume_from == "D20":
prev_noyau = str(out / "noyaux" / "noyau_post_F15.json") prev_noyau = str(out / "noyaux" / "noyau_post_F15.json")
if not Path(prev_noyau).exists(): if not Path(prev_noyau).exists():
_log_step(f"ERROR: Resume D20 requires {prev_noyau}")
raise FileNotFoundError(f"Resume D20 requires {prev_noyau}") raise FileNotFoundError(f"Resume D20 requires {prev_noyau}")
_log_step("Resume from D20: using noyau_post_F15.json", memory=True)
else: else:
noyau_d17 = noyau_post_D17_path or str(out / "noyaux" / "noyau_post_D17.json") noyau_d17 = noyau_post_D17_path or str(out / "noyaux" / "noyau_post_D17.json")
if not Path(noyau_d17).exists(): if not Path(noyau_d17).exists():
_log_step(f"ERROR: Run full pipeline first to produce {noyau_d17}")
raise FileNotFoundError(f"Run full pipeline first to produce {noyau_d17}") raise FileNotFoundError(f"Run full pipeline first to produce {noyau_d17}")
prev_noyau = noyau_d17 prev_noyau = noyau_d17
if resume_from != "D20": if resume_from != "D20":
for horizon, palier, valeur, label in [(18, 30, 29, "D18"), (19, 32, 31, "D19")]: for horizon, palier, valeur, label in [(18, 30, 29, "D18"), (19, 32, 31, "D19")]:
_log_step(f"STEP start {label} horizon={horizon} palier=2^{palier} valeur={valeur} input={prev_noyau}", memory=True)
try:
run_single_palier( run_single_palier(
horizon=horizon, horizon=horizon,
palier=palier, palier=palier,
@ -373,10 +440,16 @@ def run_extended_D18_to_D21(
audit60_json=audit60_json, audit60_json=audit60_json,
output_noyau_path=str(out / "noyaux" / f"noyau_post_{label}.json"), output_noyau_path=str(out / "noyaux" / f"noyau_post_{label}.json"),
) )
except Exception as e:
_log_step(f"STEP FAILED {label}: {type(e).__name__}: {e}")
raise
prev_noyau = str(out / "noyaux" / f"noyau_post_{label}.json") prev_noyau = str(out / "noyaux" / f"noyau_post_{label}.json")
_log_step(f"STEP done {label} next_noyau={prev_noyau}", memory=True)
_log_step("STEP start F15 fusion palier=2^32", memory=True)
csv_f15 = str(out / "candidats" / "candidats_F15_palier2p32.csv") csv_f15 = str(out / "candidats" / "candidats_F15_palier2p32.csv")
cert_f15 = str(out / "certificats" / "certificat_F15_palier2p32.json") cert_f15 = str(out / "certificats" / "certificat_F15_palier2p32.json")
try:
run_fusion_pipeline( run_fusion_pipeline(
horizons=[15], horizons=[15],
palier=32, palier=32,
@ -388,16 +461,20 @@ def run_extended_D18_to_D21(
run_scission(csv_f15, cert_f15) run_scission(csv_f15, cert_f15)
noyau_f15 = str(out / "noyaux" / "noyau_post_F15.json") noyau_f15 = str(out / "noyaux" / "noyau_post_F15.json")
run_update_noyau(cert_f15, prev_noyau, noyau_f15) run_update_noyau(cert_f15, prev_noyau, noyau_f15)
prev_noyau = noyau_f15 except Exception as e:
_log_step(f"STEP FAILED F15: {type(e).__name__}: {e}")
raise
prev_noyau = str(out / "noyaux" / "noyau_post_F15.json")
_log_step("STEP done F15", memory=True)
csv_d20 = str(out / "candidats" / "candidats_D20_palier2p34.csv") csv_d20 = str(out / "candidats" / "candidats_D20_palier2p34.csv")
noyau_d20 = str(out / "noyaux" / "noyau_post_D20.json") noyau_d20 = str(out / "noyaux" / "noyau_post_D20.json")
if Path(noyau_d20).exists(): if Path(noyau_d20).exists():
print(f"Using existing {noyau_d20}") _log_step(f"Using existing {noyau_d20}", memory=True)
elif Path(csv_d20).exists(): elif Path(csv_d20).exists():
from collatz_recover_noyau import run_recover from collatz_recover_noyau import run_recover
print("Recovering noyau_post_D20 from existing candidats CSV...") _log_step("Recovering noyau_post_D20 from existing candidats CSV...", memory=True)
run_recover( run_recover(
previous_noyau=prev_noyau, previous_noyau=prev_noyau,
candidats_csv=csv_d20, candidats_csv=csv_d20,
@ -406,6 +483,8 @@ def run_extended_D18_to_D21(
input_palier=32, input_palier=32,
) )
else: else:
_log_step(f"STEP start D20 palier=2^34 input={prev_noyau}", memory=True)
try:
run_single_palier( run_single_palier(
horizon=20, horizon=20,
palier=34, palier=34,
@ -415,10 +494,16 @@ def run_extended_D18_to_D21(
audit60_json=audit60_json, audit60_json=audit60_json,
output_noyau_path=noyau_d20, output_noyau_path=noyau_d20,
) )
except Exception as e:
_log_step(f"STEP FAILED D20: {type(e).__name__}: {e}")
raise
_log_step("STEP done D20", memory=True)
prev_noyau = noyau_d20 prev_noyau = noyau_d20
_log_step("STEP start F16 fusion palier=2^35", memory=True)
csv_f16 = str(out / "candidats" / "candidats_F16_palier2p35.csv") csv_f16 = str(out / "candidats" / "candidats_F16_palier2p35.csv")
cert_f16 = str(out / "certificats" / "certificat_F16_palier2p35.json") cert_f16 = str(out / "certificats" / "certificat_F16_palier2p35.json")
try:
run_fusion_pipeline( run_fusion_pipeline(
horizons=[16], horizons=[16],
palier=35, palier=35,
@ -430,8 +515,14 @@ def run_extended_D18_to_D21(
run_scission(csv_f16, cert_f16) run_scission(csv_f16, cert_f16)
noyau_f16 = str(out / "noyaux" / "noyau_post_F16.json") noyau_f16 = str(out / "noyaux" / "noyau_post_F16.json")
run_update_noyau(cert_f16, prev_noyau, noyau_f16) run_update_noyau(cert_f16, prev_noyau, noyau_f16)
except Exception as e:
_log_step(f"STEP FAILED F16: {type(e).__name__}: {e}")
raise
prev_noyau = noyau_f16 prev_noyau = noyau_f16
_log_step("STEP done F16", memory=True)
_log_step("STEP start D21 palier=2^36 (final)", memory=True)
try:
run_single_palier( run_single_palier(
horizon=21, horizon=21,
palier=36, palier=36,
@ -441,6 +532,12 @@ def run_extended_D18_to_D21(
audit60_json=audit60_json, audit60_json=audit60_json,
output_noyau_path=str(out / "noyaux" / "noyau_post_D21.json"), output_noyau_path=str(out / "noyaux" / "noyau_post_D21.json"),
) )
except Exception as e:
_log_step(f"STEP FAILED D21: {type(e).__name__}: {e}")
raise
_log_step("STEP done D21 - extend pipeline complete", memory=True)
sys.excepthook = _original_excepthook # type: ignore[assignment]
_pipeline_log_path = None
def load_noyau(path: str) -> List[int]: def load_noyau(path: str) -> List[int]:
@ -455,6 +552,98 @@ def load_noyau(path: str) -> List[int]:
raise ValueError(f"Noyau JSON: no residue list in {path}") raise ValueError(f"Noyau JSON: no residue list in {path}")
def _stream_noyau_items(path: str) -> Iterable[int]:
"""Stream-parse noyau JSON and yield residues. Use for large files to avoid loading all in memory."""
import ijson
p = Path(path)
if not p.exists():
raise FileNotFoundError(path)
with p.open("rb") as f:
for x in ijson.items(f, "noyau.item"):
yield int(x)
def _run_single_palier_stream(
horizon: int,
palier: int,
valeur: int,
input_noyau: str,
output_csv: str,
output_noyau_path: Optional[str],
audit60_json: str,
) -> None:
"""Stream-based single palier for large noyau files (>500 MB). Three passes: max_r, cand/cover, residual write."""
_log_step(" stream path: pass 1 (max_r)", memory=True)
max_r = 0
n_res = 0
for r in _stream_noyau_items(input_noyau):
max_r = max(max_r, r)
n_res += 1
_log_step(f" stream max_r done n_res={n_res} max_r={max_r}", memory=True)
input_palier = max_r.bit_length() if max_r else 0
curr_shift = 1 << (palier - 1)
if palier == 17:
prev_shift = 1 << 16
lift_count = 1
elif palier - input_palier >= 2:
prev_shift = 1 << input_palier
lift_count = 1 << (palier - input_palier)
else:
prev_shift = 1 << (palier - 1)
lift_count = 2
_log_step(" stream path: pass 2 (cand/cover)", memory=True)
cand: Set[int] = set()
for r in _stream_noyau_items(input_noyau):
for j in range(lift_count):
n = r + j * prev_shift
if A_k(n, horizon) == valeur:
cand.add(n)
cover = cand | {n ^ curr_shift for n in cand}
_log_step(f" cand/cover done len(cand)={len(cand)} len(cover)={len(cover)}", memory=True)
res_to_state, _ = load_state_map_60(audit60_json)
delta = (1 << valeur) - (3**horizon) if (1 << valeur) > (3**horizon) else 0
Path(output_csv).parent.mkdir(parents=True, exist_ok=True)
_log_step(f" writing CSV {output_csv}")
with Path(output_csv).open("w", newline="", encoding="utf-8") as f:
w = csv.writer(f)
col_palier = f"classe_mod_2^{palier}"
w.writerow([col_palier, "sœur", f"mot_a0..a{horizon-1}", f"A{horizon}", f"C{horizon}", "delta", "N0", f"U^{horizon}(n)", "etat_id", "base_mod_4096"])
for n in sorted(cand):
pref = prefix_data(n, horizon)
N0 = N0_D(pref.C, pref.A, horizon) if delta > 0 else 0
base = n % 4096
etat = res_to_state.get(base, 0)
w.writerow([n, n ^ curr_shift, " ".join(map(str, pref.word)), pref.A, pref.C, delta, N0, pref.y, etat, base])
_log_step(" CSV written")
del cand
if output_noyau_path:
_log_step(" stream path: pass 3 (residual write)", memory=True)
Path(output_noyau_path).parent.mkdir(parents=True, exist_ok=True)
n_residual = 0
with Path(output_noyau_path).open("w", encoding="utf-8") as f:
f.write('{"noyau": [')
first = True
for r in _stream_noyau_items(input_noyau):
for j in range(lift_count):
n = r + j * prev_shift
if n not in cover:
if not first:
f.write(",")
f.write(str(n))
first = False
n_residual += 1
f.write(f'], "palier": {palier}}}')
_log_step(f" noyau written ({n_residual} residues)", memory=True)
print(f"Wrote noyau: {output_noyau_path} ({n_residual} residues)", flush=True)
print(f"Wrote {output_csv}: {len(cover) // 2} candidates, palier 2^{palier}", flush=True)
def run_single_palier( def run_single_palier(
horizon: int, horizon: int,
palier: int, palier: int,
@ -466,8 +655,28 @@ def run_single_palier(
) -> None: ) -> None:
""" """
Run a single palier: load noyau, lift to 2^palier, extract D_k candidates with A_k=valeur. Run a single palier: load noyau, lift to 2^palier, extract D_k candidates with A_k=valeur.
Memory-optimized: no full lifted list; two passes over residues (cand then residual); stream-write noyau JSON.
""" """
p_in = Path(input_noyau)
file_size_mb = p_in.stat().st_size / (1024 * 1024) if p_in.exists() else 0
_log_step(f"run_single_palier k={horizon} palier=2^{palier} valeur={valeur} input={input_noyau} size_mb={file_size_mb:.1f}", memory=True)
if file_size_mb > 500:
_run_single_palier_stream(
horizon=horizon,
palier=palier,
valeur=valeur,
input_noyau=input_noyau,
output_csv=output_csv,
output_noyau_path=output_noyau_path,
audit60_json=audit60_json,
)
return
residues = load_noyau(input_noyau) residues = load_noyau(input_noyau)
n_res = len(residues)
_log_step(f" load_noyau done len(residues)={n_res}", memory=True)
res_to_state, _ = load_state_map_60(audit60_json) res_to_state, _ = load_state_map_60(audit60_json)
max_r = max(residues) if residues else 0 max_r = max(residues) if residues else 0
@ -483,16 +692,20 @@ def run_single_palier(
prev_shift = 1 << (palier - 1) prev_shift = 1 << (palier - 1)
lift_count = 2 lift_count = 2
lifted: List[int] = [] # Pass 1: build cand (and cover) without storing full lifted list
cand: Set[int] = set()
for r in residues: for r in residues:
for j in range(lift_count): for j in range(lift_count):
lifted.append(r + j * prev_shift) n = r + j * prev_shift
if A_k(n, horizon) == valeur:
cand = set(n for n in lifted if A_k(n, horizon) == valeur) cand.add(n)
cover = cand | {n ^ curr_shift for n in cand} cover = cand | {n ^ curr_shift for n in cand}
_log_step(f" cand/cover done len(cand)={len(cand)} len(cover)={len(cover)}", memory=True)
delta = (1 << valeur) - (3**horizon) if (1 << valeur) > (3**horizon) else 0 delta = (1 << valeur) - (3**horizon) if (1 << valeur) > (3**horizon) else 0
Path(output_csv).parent.mkdir(parents=True, exist_ok=True) Path(output_csv).parent.mkdir(parents=True, exist_ok=True)
_log_step(f" writing CSV {output_csv}")
with Path(output_csv).open("w", newline="", encoding="utf-8") as f: with Path(output_csv).open("w", newline="", encoding="utf-8") as f:
w = csv.writer(f) w = csv.writer(f)
col_palier = f"classe_mod_2^{palier}" col_palier = f"classe_mod_2^{palier}"
@ -503,17 +716,32 @@ def run_single_palier(
base = n % 4096 base = n % 4096
etat = res_to_state.get(base, 0) etat = res_to_state.get(base, 0)
w.writerow([n, n ^ curr_shift, " ".join(map(str, pref.word)), pref.A, pref.C, delta, N0, pref.y, etat, base]) w.writerow([n, n ^ curr_shift, " ".join(map(str, pref.word)), pref.A, pref.C, delta, N0, pref.y, etat, base])
_log_step(f" CSV written")
del cand # free before building residual
if output_noyau_path: if output_noyau_path:
residual = sorted(set(lifted) - cover) _log_step(f" computing residual (second pass over residues)")
residual: List[int] = []
for r in residues:
for j in range(lift_count):
n = r + j * prev_shift
if n not in cover:
residual.append(n)
residual.sort()
n_residual = len(residual)
_log_step(f" residual len={n_residual} writing noyau {output_noyau_path}", memory=True)
Path(output_noyau_path).parent.mkdir(parents=True, exist_ok=True) Path(output_noyau_path).parent.mkdir(parents=True, exist_ok=True)
Path(output_noyau_path).write_text( with Path(output_noyau_path).open("w", encoding="utf-8") as f:
json.dumps({"noyau": residual, "palier": palier}), f.write('{"noyau": [')
encoding="utf-8", for i, r in enumerate(residual):
) if i > 0:
print(f"Wrote noyau: {output_noyau_path} ({len(residual)} residues)") f.write(",")
f.write(str(r))
f.write(f'], "palier": {palier}}}')
_log_step(f" noyau written ({n_residual} residues)", memory=True)
print(f"Wrote noyau: {output_noyau_path} ({n_residual} residues)", flush=True)
print(f"Wrote {output_csv}: {len(cand)} candidates, palier 2^{palier}") print(f"Wrote {output_csv}: {len(cover) // 2} candidates, palier 2^{palier}", flush=True)
def main() -> None: def main() -> None:

View File

@ -57,6 +57,16 @@ def load_covered_from_csv(csv_path: str, palier: int) -> set[int]:
return covered return covered
def infer_input_palier(noyau_path: str) -> int:
"""Infer palier from noyau JSON or max residue."""
data = json.loads(Path(noyau_path).read_text(encoding="utf-8"))
if isinstance(data, dict) and "palier" in data:
return int(data["palier"])
residues = load_noyau(noyau_path)
max_r = max(residues) if residues else 0
return max_r.bit_length() if max_r else 0
def lift_residues(residues: list[int], from_palier: int, to_palier: int) -> list[int]: def lift_residues(residues: list[int], from_palier: int, to_palier: int) -> list[int]:
"""Lift residues from 2^from_palier to 2^to_palier.""" """Lift residues from 2^from_palier to 2^to_palier."""
prev_shift = 1 << from_palier prev_shift = 1 << from_palier
@ -68,16 +78,6 @@ def lift_residues(residues: list[int], from_palier: int, to_palier: int) -> list
return lifted return lifted
def infer_input_palier(noyau_path: str) -> int:
"""Infer palier from noyau JSON or max residue."""
data = json.loads(Path(noyau_path).read_text(encoding="utf-8"))
if isinstance(data, dict) and "palier" in data:
return int(data["palier"])
residues = load_noyau(noyau_path)
max_r = max(residues) if residues else 0
return max_r.bit_length() if max_r else 0
def run_recover( def run_recover(
previous_noyau: str, previous_noyau: str,
candidats_csv: str, candidats_csv: str,
@ -85,20 +85,31 @@ def run_recover(
output: str, output: str,
input_palier: int | None = None, input_palier: int | None = None,
) -> None: ) -> None:
"""Recover noyau from interrupted run_single_palier.""" """Recover noyau from interrupted run_single_palier. Memory-optimized: no full lifted list; stream-write JSON."""
residues = load_noyau(previous_noyau) residues = load_noyau(previous_noyau)
from_p = input_palier if input_palier is not None else infer_input_palier(previous_noyau) from_p = input_palier if input_palier is not None else infer_input_palier(previous_noyau)
covered = load_covered_from_csv(candidats_csv, palier) covered = load_covered_from_csv(candidats_csv, palier)
lifted = lift_residues(residues, from_p, palier) prev_shift = 1 << from_p
residual = sorted(set(lifted) - covered) lift_count = 1 << (palier - from_p)
residual: list[int] = []
for r in residues:
for j in range(lift_count):
n = r + j * prev_shift
if n not in covered:
residual.append(n)
residual.sort()
out_path = Path(output) out_path = Path(output)
out_path.parent.mkdir(parents=True, exist_ok=True) out_path.parent.mkdir(parents=True, exist_ok=True)
out_path.write_text( with out_path.open("w", encoding="utf-8") as f:
json.dumps({"noyau": residual, "palier": palier}, indent=2), f.write('{"noyau": [')
encoding="utf-8", for i, r in enumerate(residual):
) if i > 0:
print(f"Recovered noyau: {len(residual)} residues (from {len(lifted)} lifted, {len(covered)} covered)") f.write(",")
f.write(str(r))
f.write(f'], "palier": {palier}}}')
n_lifted = len(residues) * lift_count
print(f"Recovered noyau: {len(residual)} residues (from {n_lifted} lifted, {len(covered)} covered)")
print(f"Wrote: {out_path}") print(f"Wrote: {out_path}")

View File

@ -14,6 +14,7 @@ from pathlib import Path
import argparse import argparse
import csv import csv
import json import json
import re
def load_noyau(path: str) -> list[int]: def load_noyau(path: str) -> list[int]:
@ -72,25 +73,80 @@ def load_covered_classes(path: str) -> set[int]:
def _get_palier(path: str) -> int | None: def _get_palier(path: str) -> int | None:
"""Extract palier from noyau JSON if present.""" """Extract palier from noyau JSON if present (full read; use _get_palier_from_tail for large files)."""
data = json.loads(Path(path).read_text(encoding="utf-8")) data = json.loads(Path(path).read_text(encoding="utf-8"))
if isinstance(data, dict) and "palier" in data: if isinstance(data, dict) and "palier" in data:
return int(data["palier"]) return int(data["palier"])
return None return None
def _get_palier_from_tail(path: str) -> int | None:
"""Extract palier from end of noyau JSON file without loading full content. Expects ...\"palier\": N}."""
p = Path(path)
if not p.exists():
return None
with p.open("rb") as f:
f.seek(max(0, p.stat().st_size - 128))
tail = f.read().decode("utf-8", errors="ignore")
m = re.search(r'"palier"\s*:\s*(\d+)', tail)
return int(m.group(1)) if m else None
def _stream_update_noyau(previous_noyau: str, covered: set[int], output_path: Path, palier: int | None) -> int:
"""Stream-parse previous noyau, write residues not in covered to output. Returns count written. Use when noyau file is very large."""
import ijson
p = Path(previous_noyau)
if not p.exists():
raise FileNotFoundError(previous_noyau)
out_path = Path(output_path)
out_path.parent.mkdir(parents=True, exist_ok=True)
count = 0
with p.open("rb") as f_in:
with out_path.open("w", encoding="utf-8") as f_out:
f_out.write('{"noyau": [')
first = True
for x in ijson.items(f_in, "noyau.item"):
r = int(x)
if r in covered:
continue
if not first:
f_out.write(",")
f_out.write(str(r))
first = False
count += 1
suffix = f'], "palier": {palier}}}' if palier is not None else "]}"
f_out.write(suffix)
return count
def run_update_noyau(fusion_cert: str, previous_noyau: str, output: str) -> None: def run_update_noyau(fusion_cert: str, previous_noyau: str, output: str) -> None:
noyau = set(load_noyau(previous_noyau)) p_prev = Path(previous_noyau)
size_mb = p_prev.stat().st_size / (1024 * 1024) if p_prev.exists() else 0
covered = load_covered_classes(fusion_cert) covered = load_covered_classes(fusion_cert)
new_noyau = sorted(noyau - covered)
if size_mb > 500:
palier = _get_palier_from_tail(previous_noyau)
count = _stream_update_noyau(previous_noyau, covered, Path(output), palier)
print(f"Stream update: covered {len(covered)}, new noyau {count} residues (previous file {size_mb:.0f} MB)", flush=True)
print(f"Wrote: {output}", flush=True)
return
noyau = set(load_noyau(previous_noyau))
palier = _get_palier(previous_noyau) palier = _get_palier(previous_noyau)
new_noyau = sorted(noyau - covered)
out_path = Path(output) out_path = Path(output)
out_path.parent.mkdir(parents=True, exist_ok=True) out_path.parent.mkdir(parents=True, exist_ok=True)
payload: list[int] | dict = new_noyau
if palier is not None: if palier is not None:
payload = {"noyau": new_noyau, "palier": palier} with out_path.open("w", encoding="utf-8") as f:
out_path.write_text(json.dumps(payload, indent=2), encoding="utf-8") f.write('{"noyau": [')
for i, r in enumerate(new_noyau):
if i > 0:
f.write(",")
f.write(str(r))
f.write(f'], "palier": {palier}}}')
else:
out_path.write_text(json.dumps(new_noyau), encoding="utf-8")
print(f"Previous noyau: {len(noyau)}, covered: {len(covered)}, new noyau: {len(new_noyau)}") print(f"Previous noyau: {len(noyau)}, covered: {len(covered)}, new noyau: {len(new_noyau)}")
print(f"Wrote: {out_path}") print(f"Wrote: {out_path}")

View File

@ -0,0 +1,70 @@
# Diagnostic du run D18→D21 (avec F15/F16) et statut logique des affirmations « extinction / saturation »
## Introduction
Ce document formalise lécart entre :
- les artefacts computationnels rapportés pour le run D18→D21 (avec F15/F16), dont la sortie annoncée inclut un noyau résiduel final massif ;
- et certaines formulations dun texte de preuve (mentionné comme « démonstration collatz.md ») qui conclut à une extinction et à une saturation complète dun registre fini \(\mathcal{K}\).
Lobjectif est de verrouiller ce qui est réellement établi par les artefacts, ce qui reste conditionnel, et ce qui doit être réécrit pour rester académiquement standard.
## Éléments factuels issus du run (résumé fourni)
Résumé (tel que fourni) :
- F16 : chemin chunked + stream update → CSV 259 766 lignes, noyau_post_F16 ≈ 155,7 millions de résidus.
- D21 : chemin stream (pass 1 max_r, pass 2 cand/cover, pass 3 écriture résiduel) → 16,5 millions de candidats, noyau_post_D21 = 590 062 326 résidus (RSS ≈ 7,7 Go).
- Audits / scissions D18D21, F15, F16 exécutés.
- Une étape « Verify both extinction » annoncée, sans artefact de sortie explicite dans lextrait (pas de fichier cité).
Conséquence immédiate :
- il ny a pas extinction au palier final rapporté : le noyau résiduel final est non nul et très grand.
## Incompatibilité avec une conclusion « Collatz démontrée » fondée sur extinction
Lorsque le texte de preuve affirme :
- extinction du sous-ensemble persistant à un palier fini \(2^M\),
- puis saturation (au sens dune identité de type Kraft/Haar) \(\sum_{c\in \mathcal{K}} 2^{-m_c} = 1\),
- et conclut « la conjecture de Collatz est ainsi démontrée »,
alors cette conclusion exige au minimum lun des deux verrous suivants :
- extinction effective : \(|R_M|=0\) pour le noyau résiduel au module \(2^M\),
- ou lemme analytique indépendant transformant une tendance en extinction universelle sur \(\mathbb{N}\).
Or le run rapporté donne \(|R_M|\neq 0\) (590 062 326 résidus). Donc, tel quel, ce run ne valide pas les passages « extinction » ni « saturation = 1 » sils sont écrits comme des faits établis.
## Statut logique correct du texte « démonstration collatz.md »
Le texte doit être requalifié en schéma conditionnel.
Forme standard conseillée :
- Théorème conditionnel :
« Si un registre fini \(\mathcal{K}\) de clauses (D/F) est complet sur \(\mathbb{N}\) au sens quil ferme toutes les classes modulo \(2^M\) pour un certain \(M\), alors toute trajectoire Collatz termine. »
- Corollaire :
« Si laudit aboutit à \(|R_M|=0\), alors Collatz est démontrée. »
Dans ce cadre, les artefacts D18→D21 sont des données daudit et non une clôture.
## Cohérence avec « conjoncture_collatz.md »
Les extraits cités pour « conjoncture_collatz.md » sont cohérents avec le run :
- le lemme global (couverture totale à un palier fini, ou contraction uniforme) nest pas encore établi ;
- les coefficients de survie \(q_m\) rapportés autour de 0,880,91 ne satisfont pas la borne \(q_m\le \lambda < 0,5\) qui donnerait une extinction par contraction uniforme.
## Point méthodologique : « Verify both extinction » sans artefact exploitable
Pour quune étape « Verify both extinction » soit citable dans un texte de preuve, il faut :
- un fichier de sortie attestant \(|R_M|=0\) ou attestant léchec (\(|R_M|>0\)) ;
- un protocole de vérification reproductible (script + hash/empreinte).
Sans un artefact explicite, cette étape napporte pas (en létat) un énoncé vérifié exploitable, seulement une intention.
## Conclusion
Le run D18→D21 (F15/F16) augmente laudit computationnel et la matière sur les coefficients de survie, mais ne clôt pas une preuve par extinction puisque \(|R_M|\neq 0\) au palier final rapporté.
Conséquence directe :
- tout texte concluant « Collatz est démontrée » via « extinction » doit être réécrit en théorème conditionnel,
- la preuve complète reste concentrée sur le lemme manquant : extinction à palier fini ou contraction uniforme suffisante.

View File

@ -0,0 +1,51 @@
# Errata proposé pour « démonstration collatz.md » : remplacer une conclusion affirmative par une conclusion conditionnelle
## Introduction
Ce document propose une correction minimale, compatible avec un standard académique, lorsque les artefacts de calcul nétablissent pas une extinction finale.
Le principe : conserver la structure, mais remplacer les affirmations de type « fait établi » par des implications conditionnelles explicitant le lemme manquant.
## Remplacement recommandé des passages « extinction / saturation / conclusion »
### Remplacer « extinction » par une hypothèse nommée
Définir une hypothèse formelle :
Hypothèse H_ext(M)
Il existe un entier \(M\) tel que le noyau résiduel \(R_M\) (classes survivantes modulo \(2^M\) après application de \(\mathcal{K}\)) soit vide :
\[
R_M = \varnothing.
\]
### Reformuler la section « saturation »
Au lieu décrire
\[
\sum_{c \in \mathcal{K}} 2^{-m_c} = 1
\]
comme identité impliquant directement la complétude sur \(\mathbb{N}\), écrire :
- (Kraft) : si les clauses sont préfixes et couvrent toutes les suites binaires, alors légalité est une condition de complétude dans lespace des suites ;
- pont arithmétique requis : pour conclure sur \(\mathbb{N}\), il faut un lemme supplémentaire reliant les suites effectivement réalisées par les entiers à cette couverture.
### Remplacer la conclusion « Collatz est démontrée » par un théorème conditionnel
Théorème (conditionnel).
Si H_ext(M) est vraie pour un certain \(M\), alors pour tout entier \(n\ge 1\), lorbite Collatz de \(n\) atteint \(1\).
Preuve (schéma).
La vacuité de \(R_M\) signifie : toute classe modulo \(2^M\) est fermée par une clause (descente ou fusion) menant à un strictement plus petit. Par bon ordre de \(\mathbb{N}\), aucune trajectoire ne peut échapper indéfiniment à une réduction, donc terminaison.
### Ajouter une section « statut expérimental »
Ajouter explicitement :
- « Les audits D18D21 montrent que H_ext(M) nest pas encore satisfaite au dernier palier audité ; un noyau résiduel non nul subsiste. »
## Conclusion
Avec ces corrections, le texte devient mathématiquement standard :
- il formule un théorème correct,
- il isole lhypothèse manquante,
- il intègre les artefacts computationnels comme preuves partielles (ou contre-indications) sans sur-annoncer.

View File

@ -32,6 +32,8 @@ Reconstruction : Reprend les données de $D_{10}$ (palier $2^{17}$).
Expansion : Génère les candidats pour $D_{16}$ (palier $2^{27}$) et $D_{17}$ (palier $2^{28}$). Expansion : Génère les candidats pour $D_{16}$ (palier $2^{27}$) et $D_{17}$ (palier $2^{28}$).
Optimisations mémoire : run_single_palier et run_recover n'allouent pas la liste « lifted » complète ; deux passes sur les résidus ; écriture JSON du noyau en flux. run_update_noyau écrit aussi le noyau en flux.
1.4. collatz_k_utils.py 1.4. collatz_k_utils.py
Fournit les outils de parsing pour extraire les entiers et les tables depuis les fichiers Markdown existants, assurant la continuité avec les rapports précédents. Fournit les outils de parsing pour extraire les entiers et les tables depuis les fichiers Markdown existants, assurant la continuité avec les rapports précédents.

View File

@ -0,0 +1,62 @@
# Plan de preuve : lemme manquant, objectifs formels et protocole daudit
## Introduction
Ce document formalise lobjectif unique restant pour transformer une trajectoire daudit en preuve complète : obtenir un lemme global transformant le registre \(\mathcal{K}\) en couverture universelle de \(\mathbb{N}\), soit par extinction finie à un palier \(2^M\), soit par contraction uniforme.
## Cadre
- \(U\) : application accélérée impairs → impairs.
- \(R_m\) : ensemble des résidus survivants modulo \(2^m\) après application des clauses \(\mathcal{K}\).
- \(q_m\) : coefficient de survie
\[
q_m = \frac{|R_{m+1}|}{2|R_m|}.
\]
## Objectif 1 : extinction à palier fini (certificat total)
Énoncé cible
Il existe \(M\) tel que \(R_M=\varnothing\).
Éléments nécessaires
- définition formelle de \(R_m\) et de lopérateur « appliquer \(\mathcal{K}\) » ;
- preuve que la procédure de construction de \(\mathcal{K}\) est correcte (chaque clause est valide sur sa classe et mène à une réduction bien fondée) ;
- artefact final : un fichier attestant \(|R_M|=0\) et un protocole reproductible permettant de re-vérifier ce fait.
## Objectif 2 : contraction uniforme (preuve analytique)
Énoncé cible
Il existe \(\lambda<\tfrac{1}{2}\) et \(m_0\) tels que pour tout \(m\ge m_0\),
\[
q_m \le \lambda.
\]
Conséquence
Alors
\[
|R_m| \le (2\lambda)^{m-m_0} |R_{m_0}|
\]
et comme \(2\lambda<1\), on obtient \(|R_m|\to 0\), donc extinction pour un \(M\) assez grand.
Point bloquant
Des valeurs \(q_m\approx 0,88\)\(0,91\) sont incompatibles avec \(\lambda<0,5\). Une preuve analytique demanderait donc :
- soit une redéfinition/raffinement de \(R_m\) (réduction à un sous-noyau pertinent),
- soit une grammaire plus puissante (nouveaux schémas de fusion, nouveaux invariants),
- soit un lemme arithmétique différent (réduction non strictement modulaire, ou mesure de hauteur adaptée).
## Protocole daudit standard (pour rendre « Verify extinction » citable)
Minimum publiable pour une étape de vérification :
- un fichier `verify_extinction_M.json` contenant :
- M,
- |R_M|,
- un hash des artefacts dentrée (csv/json de clauses),
- un hash du fichier résiduel,
- un résumé des paramètres (seuils, règles de scission) ;
- un script déterministe `verify_extinction.py` + une commande de reproduction ;
- si |R_M|>0 : un fichier exportant les résidus survivants.
## Conclusion
La trajectoire D18D21 (F15/F16) augmente le matériau et affine la cartographie du noyau résiduel, mais la preuve complète exige encore lun des deux verrous : extinction à palier fini (certificat total) ou contraction uniforme (lemme analytique).
Toute rédaction « standard » doit refléter ce statut sans ambiguïté.

View File

@ -1 +1,2 @@
pandas>=2.0.0 pandas>=2.0.0
ijson>=3.2.0

View File

@ -1,7 +1,12 @@
#!/usr/bin/env bash #!/usr/bin/env bash
# Section 6 from commandes.md: Final paliers (D18-D21, F15, F16, extinction noyau both) # Section 6 from commandes.md: Final paliers (D18-D21, F15, F16, extinction noyau both)
# Requires: noyau_post_D17.json from 02-run-pipeline.sh # Requires: noyau_post_D17.json from 02-run-pipeline.sh (or noyau_post_F15.json if RESUME_FROM=D20)
# Uses: collatz_k_pipeline.py --extend # Uses: collatz_k_pipeline.py --extend
# Option: RESUME_FROM=D20 => run only D20, F16, D21 (skip D18, D19, F15). Use after D18/D19/F15 already computed.
# Example: RESUME_FROM=D20 ./scripts/08-paliers-finale.sh
# Logs: OUT/paliers_finale.log and OUT/pipeline_extend.log (Python, includes rss_max_mb per step)
# Memory: D20/D21 load large noyau (e.g. noyau_post_F15 ~650MB); ensure enough RAM or run without Cursor/IDE.
# Crash: F16 loads noyau_post_D20 (~1.7GB file, ~20GB RAM peak). Run this script OUTSIDE Cursor (e.g. separate terminal or nohup) to avoid OOM killing Cursor. See docs/fixKnowledge/crash_paliers_finale_f16_oom.md.
set -e set -e
@ -9,11 +14,20 @@ SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
PROJECT_ROOT="$(cd "$SCRIPT_DIR/.." && pwd)" PROJECT_ROOT="$(cd "$SCRIPT_DIR/.." && pwd)"
OUT="${OUT:-$PROJECT_ROOT/out}" OUT="${OUT:-$PROJECT_ROOT/out}"
ROOT="${ROOT:-$PROJECT_ROOT/collatz_k_scripts}" ROOT="${ROOT:-$PROJECT_ROOT/collatz_k_scripts}"
LOG_FILE="${OUT}/paliers_finale.log"
cd "$PROJECT_ROOT" cd "$PROJECT_ROOT"
mkdir -p "$OUT"
if [[ ! -f "$OUT/noyaux/noyau_post_D17.json" ]]; then log() { echo "[$(date -Iseconds)] $*" | tee -a "$LOG_FILE"; }
echo "Missing $OUT/noyaux/noyau_post_D17.json. Run 02-run-pipeline.sh first."
if [[ -n "${RESUME_FROM:-}" && "${RESUME_FROM}" == "D20" ]]; then
if [[ ! -f "$OUT/noyaux/noyau_post_F15.json" ]]; then
log "ERROR: RESUME_FROM=D20 requires $OUT/noyaux/noyau_post_F15.json. Run full 08 once to produce D18/D19/F15."
exit 1
fi
elif [[ ! -f "$OUT/noyaux/noyau_post_D17.json" ]]; then
log "ERROR: Missing $OUT/noyaux/noyau_post_D17.json. Run 02-run-pipeline.sh first."
exit 1 exit 1
fi fi
@ -22,32 +36,44 @@ if [[ ! -f "$AUDIT60" ]]; then
AUDIT60="$PROJECT_ROOT/collatz_k_scripts/audit_60_etats_B12_mod4096_horizon7.json" AUDIT60="$PROJECT_ROOT/collatz_k_scripts/audit_60_etats_B12_mod4096_horizon7.json"
fi fi
if [[ ! -f "$AUDIT60" ]]; then if [[ ! -f "$AUDIT60" ]]; then
echo "Missing audit60. Place it in $ROOT or collatz_k_scripts/" log "ERROR: Missing audit60. Place it in $ROOT or collatz_k_scripts/"
exit 1 exit 1
fi fi
log "START 08-paliers-finale.sh OUT=$OUT"
log "Tip: run from a separate terminal (not inside Cursor) to avoid OOM killing the IDE when F16 loads noyau_post_D20 (~20GB peak)."
cd collatz_k_scripts cd collatz_k_scripts
RESUME_ARG="" RESUME_ARG=""
if [[ -n "${RESUME_FROM:-}" ]]; then if [[ -n "${RESUME_FROM:-}" ]]; then
RESUME_ARG="--resume-from $RESUME_FROM" RESUME_ARG="--resume-from $RESUME_FROM"
log "RESUME_FROM=$RESUME_FROM => only D20, F16, D21 (D18/D19/F15 skipped). Requires noyau_post_F15.json."
fi
log "Running: python3 collatz_k_pipeline.py --extend --audit60 $AUDIT60 --out $OUT $RESUME_ARG"
python3 collatz_k_pipeline.py --extend --audit60 "$AUDIT60" --out "$OUT" $RESUME_ARG 2>&1 | tee -a "$LOG_FILE"
PY_EXIT=${PIPESTATUS[0]}
if [[ "$PY_EXIT" -ne 0 ]]; then
log "ERROR: Python pipeline exited with code $PY_EXIT. Check $OUT/pipeline_extend.log for last step."
exit "$PY_EXIT"
fi fi
python3 collatz_k_pipeline.py --extend --audit60 "$AUDIT60" --out "$OUT" $RESUME_ARG
# Audit and scission for D18-D21, F15, F16 (commandes.md section 6) # Audit and scission for D18-D21, F15, F16 (commandes.md section 6)
log "Audit and scission for D18-D21, F15, F16"
mkdir -p "$OUT/audits" "$OUT/certificats" mkdir -p "$OUT/audits" "$OUT/certificats"
for label in D18_palier2p30 D19_palier2p32 F15_palier2p32 D20_palier2p34 F16_palier2p35 D21_palier2p36; do for label in D18_palier2p30 D19_palier2p32 F15_palier2p32 D20_palier2p34 F16_palier2p35 D21_palier2p36; do
csv="$OUT/candidats/candidats_${label}.csv" csv="$OUT/candidats/candidats_${label}.csv"
if [[ -f "$csv" ]]; then if [[ -f "$csv" ]]; then
python3 collatz_audit.py --input "$csv" --output "$OUT/audits/audit_${label}.md" log " audit+scission $label"
python3 collatz_scission.py --input "$csv" --output "$OUT/certificats/certificat_${label}.json" python3 collatz_audit.py --input "$csv" --output "$OUT/audits/audit_${label}.md" 2>&1 | tee -a "$LOG_FILE"
python3 collatz_scission.py --input "$csv" --output "$OUT/certificats/certificat_${label}.json" 2>&1 | tee -a "$LOG_FILE"
fi fi
done done
# Verify both extinction (commandes.md section 7) # Verify both extinction (commandes.md section 7)
if [[ -f "$OUT/noyaux/noyau_post_D21.json" ]]; then if [[ -f "$OUT/noyaux/noyau_post_D21.json" ]]; then
log "Verify both extinction"
python3 collatz_verify_both_extinction.py --palier=36 \ python3 collatz_verify_both_extinction.py --palier=36 \
--input-noyau="$OUT/noyaux/noyau_post_D21.json" \ --input-noyau="$OUT/noyaux/noyau_post_D21.json" \
--output="$OUT/audits/verification_extinction_noyau_both.md" --output="$OUT/audits/verification_extinction_noyau_both.md" 2>&1 | tee -a "$LOG_FILE"
fi fi
echo "Extended D18-D21 complete. Outputs in $OUT/noyaux, $OUT/candidats, $OUT/certificats" log "Extended D18-D21 complete. Outputs in $OUT/noyaux, $OUT/candidats, $OUT/certificats"

View File

@ -80,6 +80,8 @@ Reprise explicite à partir de D20 (sans recalculer D18, D19, F15) :
RESUME_FROM=D20 ./scripts/08-paliers-finale.sh RESUME_FROM=D20 ./scripts/08-paliers-finale.sh
``` ```
Prérequis : `out/noyaux/noyau_post_F15.json` doit exister (produit par une exécution complète de `08-paliers-finale.sh` jusqu'à F15). Utile après un crash en D20 pour reprendre sans refaire D18/D19/F15. Les logs mémoire (`rss_max_mb`) et la dernière étape sont dans `out/pipeline_extend.log` ; en cas de crash une ligne `[CRASH]` y est écrite avec l'exception et la mémoire.
### Chemins personnalisés ### Chemins personnalisés
```bash ```bash

View File

@ -0,0 +1,57 @@
# Crash 08-paliers-finale / Cursor during F16
## Problem
The script `08-paliers-finale.sh` (extended pipeline D18→D21, F15, F16) crashes, and Cursor (which launched it) also crashes. No Python exception is logged; the last line in `out/pipeline_extend.log` is:
```
[2026-03-04 09:26:35] STEP start F16 fusion palier=2^35 rss_max_mb=11789
```
## Root cause
1. **Where it stops**: The process is killed during **F16** (fusion pipeline, palier 2^35), right after D20 completed successfully.
2. **Why there is no `[CRASH]` line**: The Python excepthook only runs on uncaught exceptions. The process was almost certainly killed by the **Linux OOM killer (SIGKILL)** when the system ran out of RAM. SIGKILL cannot be caught; the process disappears without running exception handlers.
3. **Memory sequence**:
- After D20: **rss_max_mb=11789** (~11.8 GB) with `noyau_post_D20.json` written (156 M residues, 1.77 GB on disk).
- F16 starts and loads `noyau_post_D20.json`. An initial fix used **stream load** (ijson) with `--modulo 9` so only residues with `r % 9 == 0` are kept (~17 M residues). That still allocates a single list of ~17 M Python integers (on the order of several GB), so **OOM can still occur** on a 16 GB machine when combined with the rest of the process and Cursor.
- A second fix uses **chunked stream load**: the noyau is streamed in chunks (e.g. 1.5 M residues per chunk); each chunk is passed to `build_fusion_clauses()` and only the output rows are accumulated. No single list of all filtered residues is ever built, so peak RSS stays bounded.
4. **Why Cursor crashes**: Cursor and the pipeline share the same machine RAM. When the pipelines memory spikes during F16 load, either the Python process is killed (and Cursor stays up but the run “crashes”) or the system is so starved that the OOM killer also kills Cursor, or the machine becomes unresponsive and Cursor appears to crash.
## Corrective actions
- **Run the extended pipeline outside Cursor**: Use a standalone terminal (or SSH session, or `nohup` in a separate terminal) so Cursor is not in the same memory space. Example:
- From a separate terminal: `cd /home/ncantu/code/algo/applications/collatz && ./scripts/08-paliers-finale.sh`
- Or: `nohup ./scripts/08-paliers-finale.sh > out/run.log 2>&1 &`
- **Ensure enough free RAM** before F16 (e.g. 20+ GB free, or close other heavy apps) if running on the same machine as Cursor.
- **Resume from D20** if D18D20 are already done: `RESUME_FROM=D20 ./scripts/08-paliers-finale.sh` still loads `noyau_post_F15` then runs D20, then F16. To skip straight to F16 you would need a new option (e.g. `RESUME_FROM=F16`) and `noyau_post_D20` already present; currently not implemented.
## Impact
- D18, D19, F15, D20 complete successfully; artefacts are in `out/noyaux/` and `out/candidats/`.
- F16 and D21 never run; Cursor can crash when the pipeline is started from inside Cursor on a RAM-limited machine.
## Analysis modalities
- Inspect last lines: `tail -30 out/pipeline_extend.log`.
- Check for OOM in kernel logs: `dmesg | grep -i out.of.memory` or `journalctl -k -b | grep -i oom` (if available).
- Monitor RSS during run: `watch -n 5 'ps -o rss= -p $(pgrep -f "collatz_k_pipeline")'` (RSS in KB).
## Deployment
Run the script outside the Cursor process so that memory pressure does not kill Cursor. Code fix (two steps):
1. **Stream load (already in place)**
When the noyau file is >500 MB and `--modulo` is set, the fusion pipeline uses `ijson` to stream-parse the JSON and keep only residues with `r % modulo == 0`, instead of loading the full file with `json.loads()`. Install: `pip3 install -r collatz_k_scripts/requirements.txt`.
2. **Chunked processing (added after OOM persisted)**
For noyau files >500 MB with modulo set, the pipeline no longer builds a single list of all filtered residues. It uses `_stream_load_noyau_modulo_chunked()` to yield chunks (default 800k residues). For each chunk it runs `build_fusion_clauses()`, then appends the rows to the output CSV. Peak memory stays bounded by one chunk plus the audit state maps and the merged rows. F16 with `noyau_post_D20.json` (~1.7 GB, modulo 9) now completes and writes the fusion CSV.
3. **run_update_noyau stream path (post-F16 OOM)**
After F16, the pipeline calls `run_update_noyau(cert_f16, noyau_post_D20, noyau_post_F16)`. That step was loading the full `noyau_post_D20.json` (1.7 GB, 156 M residues) with `read_text()` + `json.loads()`, causing OOM. For previous-noyau files >500 MB, `run_update_noyau` now uses `_get_palier_from_tail()` (read last 128 bytes to get palier) and `_stream_update_noyau()`: stream-parse the noyau with ijson, keep only residues not in the covered set (from the cert), and stream-write the new noyau JSON. No full noyau list is ever materialized.
4. **run_single_palier stream path (D21 OOM)**
D21 loads `noyau_post_F16.json` (~1.7 GB, ~156 M residues). Loading it fully in `run_single_palier` caused OOM. For input noyau files >500 MB, `run_single_palier` now uses `_run_single_palier_stream`: (1) stream pass to compute max_r and count; (2) stream pass to build cand and cover sets; (3) write CSV from cand; (4) stream pass to write residual noyau (only cover set in memory, residual written incrementally). No full residue list or full residual list is materialized.