#!/usr/bin/env python3 # -*- coding: utf-8 -*- """ collatz_conjoncture_rationalize.py Deterministic, idempotent rationalization for `applications/collatz/conjoncture_collatz.md`. Goals: - keep one canonical trunk (first H1 block) - move duplicated trunks (other H1 blocks) into an annex file - move the imported long block starting at the line "La raison mathématique n'est pas connue." up to the first "## Branche ..." heading into the annex file - make generic repeated headings unique by numbering: - "## Conclusion de la section précédente" -> "... (CSP-XXX)" - "## Conclusion de l'étape" -> "... (CE-XXX)" The transformation preserves all text by moving it to: `applications/collatz/conjoncture_collatz_annexes.md`. """ from __future__ import annotations import argparse import re from pathlib import Path def _read_lines(path: Path) -> list[str]: return path.read_text(encoding="utf-8", errors="strict").splitlines() def _write_lines(path: Path, lines: list[str]) -> None: path.parent.mkdir(parents=True, exist_ok=True) path.write_text("\n".join(lines) + "\n", encoding="utf-8") def _find_line_eq(lines: list[str], needle: str) -> int | None: for i, line in enumerate(lines, start=1): if line.strip() == needle: return i return None def _find_lines_prefix(lines: list[str], prefix: str) -> list[int]: out: list[int] = [] for i, line in enumerate(lines, start=1): if line.startswith(prefix): out.append(i) return out def _find_first_match(lines: list[str], pattern: str) -> int | None: rx = re.compile(pattern) for i, line in enumerate(lines, start=1): if rx.match(line): return i return None def _slice(lines: list[str], start_line: int, end_line: int) -> list[str]: if start_line < 1 or end_line < start_line: raise ValueError("Invalid slice bounds") return lines[start_line - 1 : end_line] def _replace_csp_ce_headings(lines: list[str]) -> list[str]: csp_idx = 0 ce_idx = 0 out: list[str] = [] for line in lines: if line.strip() == "## Conclusion de la section précédente": csp_idx += 1 out.append(f"## Conclusion de la section précédente (CSP-{csp_idx:03d})") continue if line.strip() in ("## Conclusion de l'étape", "## Conclusion de l’étape"): ce_idx += 1 out.append(f"## Conclusion de l'étape (CE-{ce_idx:03d})") continue out.append(line) return out def run(*, input_path: Path, annex_path: Path) -> None: lines = _read_lines(input_path) h1_conj = _find_lines_prefix(lines, "# Conjecture de Collatz:") vulg_start = _find_line_eq(lines, "La raison mathématique n'est pas connue.") branche_first = _find_first_match(lines, r"^##\s+Branche\b") # Already rationalized if: single H1 and no vulg marker before branches. if len(h1_conj) <= 1 and (vulg_start is None or (branche_first is not None and vulg_start > branche_first)): _write_lines(input_path, _replace_csp_ce_headings(lines)) return if len(h1_conj) < 1: raise ValueError("Cannot find canonical H1 '# Conjecture de Collatz:'") if branche_first is None: raise ValueError("Cannot find first '## Branche ...' heading (needed to bound imported block)") # Boundaries for rationalization canonical_end = (h1_conj[1] - 1) if len(h1_conj) >= 2 else len(lines) imported_start = vulg_start imported_end = (branche_first - 1) if (imported_start is not None and branche_first > imported_start) else None # Collect moved blocks (by line ranges) moved: list[tuple[str, int, int]] = [] if len(h1_conj) >= 2: for j in range(1, len(h1_conj)): start = h1_conj[j] end = (h1_conj[j + 1] - 1) if (j + 1 < len(h1_conj)) else canonical_end # In the original file, the last H1 block may extend into imported content; we cap at imported_start-1. if imported_start is not None and start < imported_start: end = min(end, imported_start - 1) if end >= start: moved.append((f"Duplicated trunk #{j}", start, end)) if imported_start is not None and imported_end is not None and imported_end >= imported_start: moved.append(("Imported block (Futurs Accessibles / non-formal)", imported_start, imported_end)) # Build annex file (overwrite deterministically) annex: list[str] = [] annex.append("**Auteur** : Équipe 4NK") annex.append("") annex.append("# Annexes — `conjoncture_collatz.md` (contenu déplacé)") annex.append("") annex.append("Ce fichier contient des blocs déplacés de `applications/collatz/conjoncture_collatz.md` lors de la rationalisation.") annex.append("") for idx, (label, start, end) in enumerate(moved, start=1): annex.append(f"## Annexe {idx} — {label} (lignes {start}–{end})") annex.append("") annex.extend(_slice(lines, start, end)) annex.append("") annex.append("---") annex.append("") _write_lines(annex_path, annex) # Build new main file: # - keep canonical block (1..canonical_end) but remove everything after the first H1 block beyond references # - remove duplicated trunks + imported block (which lie before branche_first) # - append from branche_first to EOF kept: list[str] = [] kept.extend(_slice(lines, 1, canonical_end)) kept.append("") kept.append("## Annexes (contenu déplacé)") kept.append("") kept.append("Des blocs ont été déplacés dans un fichier annexe afin de rendre le document indexable et d’éliminer les duplications de tronc.") kept.append("") kept.append(f"- annexes : `{annex_path.name}`") for idx, (label, start, end) in enumerate(moved, start=1): kept.append(f" - Annexe {idx} : {label} (lignes {start}–{end})") kept.append("") kept.append("---") kept.append("") kept.extend(_slice(lines, branche_first, len(lines))) kept = _replace_csp_ce_headings(kept) _write_lines(input_path, kept) def main() -> None: ap = argparse.ArgumentParser(description="Rationalize conjoncture_collatz.md into a canonical file + annexes") ap.add_argument( "--input", default="applications/collatz/conjoncture_collatz.md", help="Path to conjoncture_collatz.md", ) ap.add_argument( "--annex", default="applications/collatz/conjoncture_collatz_annexes.md", help="Path to annex file to write (overwritten deterministically)", ) args = ap.parse_args() run(input_path=Path(args.input).resolve(), annex_path=Path(args.annex).resolve()) if __name__ == "__main__": main()