algo/v0/collatz_k_scripts/collatz_k_fusion.py
Nicolas Cantu 2b99e8ff02 Skills document-improvement et scripts Collatz
**Motivations:**
- Ajout skill pour amélioration de documents en background
- Scripts et documentation Collatz

**Evolutions:**
- .cursor/skills/document-improvement/ (SKILL, reference, examples)
- v0/collatz_k_scripts/ (core, fusion, pipeline, utils, reproduce)
- v0/journal.md, v0/log.md, v0/README collatz

**Pages affectées:**
- .cursor/skills/document-improvement/
- v0/collatz_k_scripts/
- v0/journal.md, v0/log.md
2026-02-27 16:23:25 +01:00

145 lines
4.3 KiB
Python

# -*- coding: utf-8 -*-
"""
collatz_k_fusion.py
Construction de clauses de fusion (F) sur un ensemble de classes modulo 2^m.
Sorties:
- CSV exhaustif
- Markdown audit (résumé + impact par état)
"""
from __future__ import annotations
from pathlib import Path
import csv
from collections import Counter
from typing import Iterable, Dict, Tuple
from collatz_k_core import prefix_data, fusion_choice_a, delta_F, Nf_F, preimage_m
from collatz_k_utils import write_text
def build_fusion_clauses(
residues: Iterable[int],
t: int,
state_id_of_residue4096: Dict[int, int],
state_mot7: Dict[int, str],
out_md: str,
out_csv: str,
modulus_power: int,
) -> Tuple[int, int]:
residues = list(residues)
hits = []
A_dist = Counter()
a_dist = Counter()
Nf_dist = Counter()
ymod_dist = Counter()
tot_state = Counter()
hit_state = Counter()
for n0 in residues:
sid = state_id_of_residue4096[n0 % 4096]
tot_state[sid] += 1
pref = prefix_data(n0, t)
a = fusion_choice_a(pref.y)
if a is None:
continue
dF = delta_F(pref.A, t, a)
if dF <= 0:
continue
Nf = Nf_F(pref.C, pref.A, t, a)
if n0 < Nf:
continue
m = preimage_m(pref.y, a)
if m >= n0:
continue
hit_state[sid] += 1
A_dist[pref.A] += 1
a_dist[a] += 1
Nf_dist[Nf] += 1
ymod_dist[pref.y % 3] += 1
hits.append(
{
"classe_mod_2^m": n0,
"m": modulus_power,
"t": t,
"a": a,
"A_t": pref.A,
"mot_a0..": " ".join(map(str, pref.word)),
"C_t": pref.C,
"y": pref.y,
"y_mod_3": pref.y % 3,
"DeltaF": dF,
"Nf": Nf,
"preimage_m": m,
"etat_id": sid,
"base_mod_4096": n0 % 4096,
}
)
out_csv_p = Path(out_csv)
out_csv_p.parent.mkdir(parents=True, exist_ok=True)
with out_csv_p.open("w", newline="", encoding="utf-8") as f:
if hits:
w = csv.DictWriter(f, fieldnames=list(hits[0].keys()))
w.writeheader()
for row in hits:
w.writerow(row)
else:
f.write("")
lines = []
lines.append(f"# Clauses de fusion F(t={t}) au palier 2^{modulus_power}")
lines.append("")
lines.append("## Introduction")
lines.append("")
lines.append("Audit de clauses de fusion (F) sur U (impairs → impairs).")
lines.append("")
lines.append("## Résultats globaux")
lines.append("")
lines.append(f"- Taille du domaine analysé : {len(residues)}")
lines.append(f"- Clauses valides : {len(hits)}")
frac = (len(hits) / len(residues)) if residues else 0.0
lines.append(f"- Fraction couverte : {frac}")
lines.append("")
lines.append("Distribution de y mod 3 :")
for k in sorted(ymod_dist):
lines.append(f"- y mod 3 = {k} : {ymod_dist[k]}")
lines.append("")
lines.append("Distribution de a :")
for k in sorted(a_dist):
lines.append(f"- a = {k} : {a_dist[k]}")
lines.append("")
lines.append("Distribution de A_t :")
for k in sorted(A_dist):
lines.append(f"- A_t = {k} : {A_dist[k]}")
lines.append("")
lines.append("Distribution de N_F (premières valeurs) :")
for k in sorted(Nf_dist)[:40]:
lines.append(f"- N_F = {k} : {Nf_dist[k]}")
if len(Nf_dist) > 40:
lines.append(f"- (… {len(Nf_dist)-40} valeurs supplémentaires)")
lines.append("")
lines.append("## Impact par état (60 états base B12)")
lines.append("")
header = ["état_id", "mot_7", "effectif", "touchés", "fraction"]
lines.append("| " + " | ".join(header) + " |")
lines.append("| " + " | ".join(["---"] * len(header)) + " |")
for sid in range(1, 61):
eff = tot_state.get(sid, 0)
hit = hit_state.get(sid, 0)
frac_s = (hit / eff) if eff else 0.0
mot = state_mot7.get(sid, "")
lines.append(f"| {sid} | {mot} | {eff} | {hit} | {frac_s} |")
lines.append("")
lines.append("## Fichier exhaustif")
lines.append("")
lines.append(f"- {out_csv_p.name}")
write_text(out_md, "\n".join(lines) + "\n")
return len(hits), len(residues)