algo/applications/collatz/collatz_k_scripts/collatz_analyze_prefix_diversity.py
ncantu a8d234c072 collatz: extend paliers to 2^24, hensel shifted, phase reports and analyses
**Motivations:**
- Extend terminal and minorated clauses to paliers 2^19–2^24.
- Add hensel chain leaves shifted variant and refinement bundles mod2p21/mod2p24.
- Document grammar extension phases, y_mod3 and prefix diversity analyses.

**Root causes:**
- N/A (evolutions)

**Correctifs:**
- N/A

**Evolutions:**
- Add collatz_analyze_prefix_diversity.py, collatz_analyze_y_mod3_distribution.py.
- Add collatz_build_hensel_chain_leaves_shifted.py, collatz_extract_residues_from_clauses_json.py.
- Extend terminal_clauses_over_Sm and minorated_clauses_over_Sm to palier2p19–2p24.
- Add refinement bundles bundle_mod2p15_to2p21, bundle_mod2p15_to2p24, hensel_shifted variant.
- Add phase reports (option_a_extension_m24, grammar_extensions, y_mod3, prefix_diversity, clause_D_partielle).
- Update README and feature docs.

**Pages affectées:**
- applications/collatz/collatz_k_scripts/README.md
- applications/collatz/collatz_k_scripts/*.py
- docs/artefacts/collatz/**
- docs/features/*.md
2026-03-10 12:36:59 +01:00

159 lines
4.4 KiB
Python

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
collatz_analyze_prefix_diversity.py
Phase 4 experiment: For no/no residues at 2^16, sample n = r + k*2^16 and count
distinct prefix words of length k_prefix. If the number of distinct prefixes is
bounded, a disjunction of D_minor clauses (one per prefix) might be decidable.
"""
from __future__ import annotations
import argparse
import json
from pathlib import Path
from collatz_k_core import prefix_data
def _read_tracked_roots(path: Path) -> list[int]:
roots: list[int] = []
for line in path.read_text(encoding="utf-8", errors="strict").splitlines():
s = line.strip()
if not s or s.startswith("#"):
continue
if "#" in s:
s = s.split("#", 1)[0].strip()
try:
r = int(s, 10)
except ValueError:
continue
if r > 0 and (r % 2) == 1:
roots.append(r)
return roots
def _collect_residues_no_no(tracked_roots: list[int], root_palier: int) -> list[int]:
step = 1 << root_palier
residues: list[int] = []
seen: set[int] = set()
for r0 in tracked_roots:
r_low = r0
r_high = r0 + step
for r in (r_low, r_high):
if r not in seen:
residues.append(r)
seen.add(r)
return residues
def _analyze_residue(
residue: int,
m: int,
k_prefix: int,
sample_size: int,
) -> dict[str, object]:
mod = 1 << m
prefixes: set[tuple[int, ...]] = set()
for k in range(sample_size):
n = residue + k * mod
if n <= 0 or (n % 2) == 0:
continue
pref = prefix_data(n, k_prefix)
prefixes.add(pref.word)
return {
"residue_mod_2p": residue,
"k_prefix": k_prefix,
"sample_size": sample_size,
"distinct_prefix_count": len(prefixes),
"prefixes": [list(p) for p in sorted(prefixes)],
}
def run(
tracked_roots_file: Path,
m: int,
k_prefix: int,
sample_size: int,
output_dir: Path,
max_residues: int,
) -> None:
roots = _read_tracked_roots(tracked_roots_file)
residues = _collect_residues_no_no(roots, m - 1)
residues = residues[:max_residues]
rows: list[dict[str, object]] = []
for r in residues:
row = _analyze_residue(r, m, k_prefix, sample_size)
rows.append(row)
obj = {
"domain": {"m": m, "k_prefix": k_prefix, "sample_size": sample_size},
"counts": {
"residues_analyzed": len(residues),
"max_distinct_prefixes": max(row["distinct_prefix_count"] for row in rows),
"min_distinct_prefixes": min(row["distinct_prefix_count"] for row in rows),
},
"rows": rows,
}
out_path = output_dir / f"prefix_diversity_m{m}_k{k_prefix}_sample{sample_size}.json"
output_dir.mkdir(parents=True, exist_ok=True)
out_path.write_text(
json.dumps(obj, indent=2, ensure_ascii=False) + "\n",
encoding="utf-8",
)
def main() -> None:
ap = argparse.ArgumentParser(
description="Analyze prefix diversity for no/no residues (Phase 4 experiment)"
)
ap.add_argument("--tracked-roots-file", default="")
ap.add_argument("--repo-root", default=".")
ap.add_argument("--m", type=int, default=16)
ap.add_argument("--k-prefix", type=int, default=11, help="Prefix length (k-1 for D_minor at horizon k)")
ap.add_argument("--sample-size", type=int, default=256)
ap.add_argument("--output-dir", default="")
ap.add_argument("--max-residues", type=int, default=64)
args = ap.parse_args()
repo_root = Path(args.repo_root).resolve()
tracked_roots_file = (
Path(args.tracked_roots_file).resolve()
if args.tracked_roots_file.strip()
else repo_root
/ "docs"
/ "artefacts"
/ "collatz"
/ "refinement_K"
/ "palier2p15"
/ "incremental_D_minor"
/ "tracked_roots_lb_any_top200_mod2p15_to2p18.txt"
)
output_dir = (
Path(args.output_dir).resolve()
if args.output_dir.strip()
else repo_root
/ "docs"
/ "artefacts"
/ "collatz"
/ "refinement_K"
/ "palier2p15"
/ "phase4_prefix_diversity"
)
run(
tracked_roots_file=tracked_roots_file,
m=int(args.m),
k_prefix=int(args.k_prefix),
sample_size=int(args.sample_size),
output_dir=output_dir,
max_residues=int(args.max_residues),
)
if __name__ == "__main__":
main()