**Motivations:** - Extend terminal and minorated clauses to paliers 2^19–2^24. - Add hensel chain leaves shifted variant and refinement bundles mod2p21/mod2p24. - Document grammar extension phases, y_mod3 and prefix diversity analyses. **Root causes:** - N/A (evolutions) **Correctifs:** - N/A **Evolutions:** - Add collatz_analyze_prefix_diversity.py, collatz_analyze_y_mod3_distribution.py. - Add collatz_build_hensel_chain_leaves_shifted.py, collatz_extract_residues_from_clauses_json.py. - Extend terminal_clauses_over_Sm and minorated_clauses_over_Sm to palier2p19–2p24. - Add refinement bundles bundle_mod2p15_to2p21, bundle_mod2p15_to2p24, hensel_shifted variant. - Add phase reports (option_a_extension_m24, grammar_extensions, y_mod3, prefix_diversity, clause_D_partielle). - Update README and feature docs. **Pages affectées:** - applications/collatz/collatz_k_scripts/README.md - applications/collatz/collatz_k_scripts/*.py - docs/artefacts/collatz/** - docs/features/*.md
159 lines
4.4 KiB
Python
159 lines
4.4 KiB
Python
#!/usr/bin/env python3
|
|
# -*- coding: utf-8 -*-
|
|
"""
|
|
collatz_analyze_prefix_diversity.py
|
|
|
|
Phase 4 experiment: For no/no residues at 2^16, sample n = r + k*2^16 and count
|
|
distinct prefix words of length k_prefix. If the number of distinct prefixes is
|
|
bounded, a disjunction of D_minor clauses (one per prefix) might be decidable.
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
import argparse
|
|
import json
|
|
from pathlib import Path
|
|
|
|
from collatz_k_core import prefix_data
|
|
|
|
|
|
def _read_tracked_roots(path: Path) -> list[int]:
|
|
roots: list[int] = []
|
|
for line in path.read_text(encoding="utf-8", errors="strict").splitlines():
|
|
s = line.strip()
|
|
if not s or s.startswith("#"):
|
|
continue
|
|
if "#" in s:
|
|
s = s.split("#", 1)[0].strip()
|
|
try:
|
|
r = int(s, 10)
|
|
except ValueError:
|
|
continue
|
|
if r > 0 and (r % 2) == 1:
|
|
roots.append(r)
|
|
return roots
|
|
|
|
|
|
def _collect_residues_no_no(tracked_roots: list[int], root_palier: int) -> list[int]:
|
|
step = 1 << root_palier
|
|
residues: list[int] = []
|
|
seen: set[int] = set()
|
|
for r0 in tracked_roots:
|
|
r_low = r0
|
|
r_high = r0 + step
|
|
for r in (r_low, r_high):
|
|
if r not in seen:
|
|
residues.append(r)
|
|
seen.add(r)
|
|
return residues
|
|
|
|
|
|
def _analyze_residue(
|
|
residue: int,
|
|
m: int,
|
|
k_prefix: int,
|
|
sample_size: int,
|
|
) -> dict[str, object]:
|
|
mod = 1 << m
|
|
prefixes: set[tuple[int, ...]] = set()
|
|
for k in range(sample_size):
|
|
n = residue + k * mod
|
|
if n <= 0 or (n % 2) == 0:
|
|
continue
|
|
pref = prefix_data(n, k_prefix)
|
|
prefixes.add(pref.word)
|
|
return {
|
|
"residue_mod_2p": residue,
|
|
"k_prefix": k_prefix,
|
|
"sample_size": sample_size,
|
|
"distinct_prefix_count": len(prefixes),
|
|
"prefixes": [list(p) for p in sorted(prefixes)],
|
|
}
|
|
|
|
|
|
def run(
|
|
tracked_roots_file: Path,
|
|
m: int,
|
|
k_prefix: int,
|
|
sample_size: int,
|
|
output_dir: Path,
|
|
max_residues: int,
|
|
) -> None:
|
|
roots = _read_tracked_roots(tracked_roots_file)
|
|
residues = _collect_residues_no_no(roots, m - 1)
|
|
residues = residues[:max_residues]
|
|
|
|
rows: list[dict[str, object]] = []
|
|
for r in residues:
|
|
row = _analyze_residue(r, m, k_prefix, sample_size)
|
|
rows.append(row)
|
|
|
|
obj = {
|
|
"domain": {"m": m, "k_prefix": k_prefix, "sample_size": sample_size},
|
|
"counts": {
|
|
"residues_analyzed": len(residues),
|
|
"max_distinct_prefixes": max(row["distinct_prefix_count"] for row in rows),
|
|
"min_distinct_prefixes": min(row["distinct_prefix_count"] for row in rows),
|
|
},
|
|
"rows": rows,
|
|
}
|
|
|
|
out_path = output_dir / f"prefix_diversity_m{m}_k{k_prefix}_sample{sample_size}.json"
|
|
output_dir.mkdir(parents=True, exist_ok=True)
|
|
out_path.write_text(
|
|
json.dumps(obj, indent=2, ensure_ascii=False) + "\n",
|
|
encoding="utf-8",
|
|
)
|
|
|
|
|
|
def main() -> None:
|
|
ap = argparse.ArgumentParser(
|
|
description="Analyze prefix diversity for no/no residues (Phase 4 experiment)"
|
|
)
|
|
ap.add_argument("--tracked-roots-file", default="")
|
|
ap.add_argument("--repo-root", default=".")
|
|
ap.add_argument("--m", type=int, default=16)
|
|
ap.add_argument("--k-prefix", type=int, default=11, help="Prefix length (k-1 for D_minor at horizon k)")
|
|
ap.add_argument("--sample-size", type=int, default=256)
|
|
ap.add_argument("--output-dir", default="")
|
|
ap.add_argument("--max-residues", type=int, default=64)
|
|
args = ap.parse_args()
|
|
|
|
repo_root = Path(args.repo_root).resolve()
|
|
tracked_roots_file = (
|
|
Path(args.tracked_roots_file).resolve()
|
|
if args.tracked_roots_file.strip()
|
|
else repo_root
|
|
/ "docs"
|
|
/ "artefacts"
|
|
/ "collatz"
|
|
/ "refinement_K"
|
|
/ "palier2p15"
|
|
/ "incremental_D_minor"
|
|
/ "tracked_roots_lb_any_top200_mod2p15_to2p18.txt"
|
|
)
|
|
output_dir = (
|
|
Path(args.output_dir).resolve()
|
|
if args.output_dir.strip()
|
|
else repo_root
|
|
/ "docs"
|
|
/ "artefacts"
|
|
/ "collatz"
|
|
/ "refinement_K"
|
|
/ "palier2p15"
|
|
/ "phase4_prefix_diversity"
|
|
)
|
|
|
|
run(
|
|
tracked_roots_file=tracked_roots_file,
|
|
m=int(args.m),
|
|
k_prefix=int(args.k_prefix),
|
|
sample_size=int(args.sample_size),
|
|
output_dir=output_dir,
|
|
max_residues=int(args.max_residues),
|
|
)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|