#!/usr/bin/env python3 # -*- coding: utf-8 -*- """ collatz_analyze_prefix_diversity.py Phase 4 experiment: For no/no residues at 2^16, sample n = r + k*2^16 and count distinct prefix words of length k_prefix. If the number of distinct prefixes is bounded, a disjunction of D_minor clauses (one per prefix) might be decidable. """ from __future__ import annotations import argparse import json from pathlib import Path from collatz_k_core import prefix_data def _read_tracked_roots(path: Path) -> list[int]: roots: list[int] = [] for line in path.read_text(encoding="utf-8", errors="strict").splitlines(): s = line.strip() if not s or s.startswith("#"): continue if "#" in s: s = s.split("#", 1)[0].strip() try: r = int(s, 10) except ValueError: continue if r > 0 and (r % 2) == 1: roots.append(r) return roots def _collect_residues_no_no(tracked_roots: list[int], root_palier: int) -> list[int]: step = 1 << root_palier residues: list[int] = [] seen: set[int] = set() for r0 in tracked_roots: r_low = r0 r_high = r0 + step for r in (r_low, r_high): if r not in seen: residues.append(r) seen.add(r) return residues def _analyze_residue( residue: int, m: int, k_prefix: int, sample_size: int, ) -> dict[str, object]: mod = 1 << m prefixes: set[tuple[int, ...]] = set() for k in range(sample_size): n = residue + k * mod if n <= 0 or (n % 2) == 0: continue pref = prefix_data(n, k_prefix) prefixes.add(pref.word) return { "residue_mod_2p": residue, "k_prefix": k_prefix, "sample_size": sample_size, "distinct_prefix_count": len(prefixes), "prefixes": [list(p) for p in sorted(prefixes)], } def run( tracked_roots_file: Path, m: int, k_prefix: int, sample_size: int, output_dir: Path, max_residues: int, ) -> None: roots = _read_tracked_roots(tracked_roots_file) residues = _collect_residues_no_no(roots, m - 1) residues = residues[:max_residues] rows: list[dict[str, object]] = [] for r in residues: row = _analyze_residue(r, m, k_prefix, sample_size) rows.append(row) obj = { "domain": {"m": m, "k_prefix": k_prefix, "sample_size": sample_size}, "counts": { "residues_analyzed": len(residues), "max_distinct_prefixes": max(row["distinct_prefix_count"] for row in rows), "min_distinct_prefixes": min(row["distinct_prefix_count"] for row in rows), }, "rows": rows, } out_path = output_dir / f"prefix_diversity_m{m}_k{k_prefix}_sample{sample_size}.json" output_dir.mkdir(parents=True, exist_ok=True) out_path.write_text( json.dumps(obj, indent=2, ensure_ascii=False) + "\n", encoding="utf-8", ) def main() -> None: ap = argparse.ArgumentParser( description="Analyze prefix diversity for no/no residues (Phase 4 experiment)" ) ap.add_argument("--tracked-roots-file", default="") ap.add_argument("--repo-root", default=".") ap.add_argument("--m", type=int, default=16) ap.add_argument("--k-prefix", type=int, default=11, help="Prefix length (k-1 for D_minor at horizon k)") ap.add_argument("--sample-size", type=int, default=256) ap.add_argument("--output-dir", default="") ap.add_argument("--max-residues", type=int, default=64) args = ap.parse_args() repo_root = Path(args.repo_root).resolve() tracked_roots_file = ( Path(args.tracked_roots_file).resolve() if args.tracked_roots_file.strip() else repo_root / "docs" / "artefacts" / "collatz" / "refinement_K" / "palier2p15" / "incremental_D_minor" / "tracked_roots_lb_any_top200_mod2p15_to2p18.txt" ) output_dir = ( Path(args.output_dir).resolve() if args.output_dir.strip() else repo_root / "docs" / "artefacts" / "collatz" / "refinement_K" / "palier2p15" / "phase4_prefix_diversity" ) run( tracked_roots_file=tracked_roots_file, m=int(args.m), k_prefix=int(args.k_prefix), sample_size=int(args.sample_size), output_dir=output_dir, max_residues=int(args.max_residues), ) if __name__ == "__main__": main()