**Motivations:** - Make `conjoncture_collatz.md` indexable and avoid duplicated trunks. **Root causes:** - Multiple full copies of the formal trunk and a large non-formal imported block prevented unambiguous references. **Correctifs:** - Move duplicated trunks and the imported non-formal block to `conjoncture_collatz_annexes.md`. - Make generic repeated headings unique via deterministic numbering (CSP/CE). **Evolutions:** - Add deterministic audit + rationalization scripts and versioned audit artefacts. **Pages affectées:** - applications/collatz/conjoncture_collatz.md - applications/collatz/conjoncture_collatz_annexes.md - applications/collatz/collatz_k_scripts/collatz_conjoncture_audit.py - applications/collatz/collatz_k_scripts/collatz_conjoncture_rationalize.py - docs/artefacts/collatz/conjoncture_rationalisation/* - docs/features/collatz_conjoncture_rationalization_tooling.md - docs/collatz_conjoncture_collatz_cartographie.md
180 lines
6.6 KiB
Python
180 lines
6.6 KiB
Python
#!/usr/bin/env python3
|
||
# -*- coding: utf-8 -*-
|
||
"""
|
||
collatz_conjoncture_rationalize.py
|
||
|
||
Deterministic, idempotent rationalization for
|
||
`applications/collatz/conjoncture_collatz.md`.
|
||
|
||
Goals:
|
||
- keep one canonical trunk (first H1 block)
|
||
- move duplicated trunks (other H1 blocks) into an annex file
|
||
- move the imported long block starting at the line "La raison mathématique n'est pas connue."
|
||
up to the first "## Branche ..." heading into the annex file
|
||
- make generic repeated headings unique by numbering:
|
||
- "## Conclusion de la section précédente" -> "... (CSP-XXX)"
|
||
- "## Conclusion de l'étape" -> "... (CE-XXX)"
|
||
|
||
The transformation preserves all text by moving it to:
|
||
`applications/collatz/conjoncture_collatz_annexes.md`.
|
||
"""
|
||
|
||
from __future__ import annotations
|
||
|
||
import argparse
|
||
import re
|
||
from pathlib import Path
|
||
|
||
|
||
def _read_lines(path: Path) -> list[str]:
|
||
return path.read_text(encoding="utf-8", errors="strict").splitlines()
|
||
|
||
|
||
def _write_lines(path: Path, lines: list[str]) -> None:
|
||
path.parent.mkdir(parents=True, exist_ok=True)
|
||
path.write_text("\n".join(lines) + "\n", encoding="utf-8")
|
||
|
||
|
||
def _find_line_eq(lines: list[str], needle: str) -> int | None:
|
||
for i, line in enumerate(lines, start=1):
|
||
if line.strip() == needle:
|
||
return i
|
||
return None
|
||
|
||
|
||
def _find_lines_prefix(lines: list[str], prefix: str) -> list[int]:
|
||
out: list[int] = []
|
||
for i, line in enumerate(lines, start=1):
|
||
if line.startswith(prefix):
|
||
out.append(i)
|
||
return out
|
||
|
||
|
||
def _find_first_match(lines: list[str], pattern: str) -> int | None:
|
||
rx = re.compile(pattern)
|
||
for i, line in enumerate(lines, start=1):
|
||
if rx.match(line):
|
||
return i
|
||
return None
|
||
|
||
|
||
def _slice(lines: list[str], start_line: int, end_line: int) -> list[str]:
|
||
if start_line < 1 or end_line < start_line:
|
||
raise ValueError("Invalid slice bounds")
|
||
return lines[start_line - 1 : end_line]
|
||
|
||
|
||
def _replace_csp_ce_headings(lines: list[str]) -> list[str]:
|
||
csp_idx = 0
|
||
ce_idx = 0
|
||
out: list[str] = []
|
||
for line in lines:
|
||
if line.strip() == "## Conclusion de la section précédente":
|
||
csp_idx += 1
|
||
out.append(f"## Conclusion de la section précédente (CSP-{csp_idx:03d})")
|
||
continue
|
||
if line.strip() in ("## Conclusion de l'étape", "## Conclusion de l’étape"):
|
||
ce_idx += 1
|
||
out.append(f"## Conclusion de l'étape (CE-{ce_idx:03d})")
|
||
continue
|
||
out.append(line)
|
||
return out
|
||
|
||
|
||
def run(*, input_path: Path, annex_path: Path) -> None:
|
||
lines = _read_lines(input_path)
|
||
|
||
h1_conj = _find_lines_prefix(lines, "# Conjecture de Collatz:")
|
||
vulg_start = _find_line_eq(lines, "La raison mathématique n'est pas connue.")
|
||
branche_first = _find_first_match(lines, r"^##\s+Branche\b")
|
||
|
||
# Already rationalized if: single H1 and no vulg marker before branches.
|
||
if len(h1_conj) <= 1 and (vulg_start is None or (branche_first is not None and vulg_start > branche_first)):
|
||
_write_lines(input_path, _replace_csp_ce_headings(lines))
|
||
return
|
||
|
||
if len(h1_conj) < 1:
|
||
raise ValueError("Cannot find canonical H1 '# Conjecture de Collatz:'")
|
||
if branche_first is None:
|
||
raise ValueError("Cannot find first '## Branche ...' heading (needed to bound imported block)")
|
||
|
||
# Boundaries for rationalization
|
||
canonical_end = (h1_conj[1] - 1) if len(h1_conj) >= 2 else len(lines)
|
||
imported_start = vulg_start
|
||
imported_end = (branche_first - 1) if (imported_start is not None and branche_first > imported_start) else None
|
||
|
||
# Collect moved blocks (by line ranges)
|
||
moved: list[tuple[str, int, int]] = []
|
||
if len(h1_conj) >= 2:
|
||
for j in range(1, len(h1_conj)):
|
||
start = h1_conj[j]
|
||
end = (h1_conj[j + 1] - 1) if (j + 1 < len(h1_conj)) else canonical_end
|
||
# In the original file, the last H1 block may extend into imported content; we cap at imported_start-1.
|
||
if imported_start is not None and start < imported_start:
|
||
end = min(end, imported_start - 1)
|
||
if end >= start:
|
||
moved.append((f"Duplicated trunk #{j}", start, end))
|
||
|
||
if imported_start is not None and imported_end is not None and imported_end >= imported_start:
|
||
moved.append(("Imported block (Futurs Accessibles / non-formal)", imported_start, imported_end))
|
||
|
||
# Build annex file (overwrite deterministically)
|
||
annex: list[str] = []
|
||
annex.append("**Auteur** : Équipe 4NK")
|
||
annex.append("")
|
||
annex.append("# Annexes — `conjoncture_collatz.md` (contenu déplacé)")
|
||
annex.append("")
|
||
annex.append("Ce fichier contient des blocs déplacés de `applications/collatz/conjoncture_collatz.md` lors de la rationalisation.")
|
||
annex.append("")
|
||
for idx, (label, start, end) in enumerate(moved, start=1):
|
||
annex.append(f"## Annexe {idx} — {label} (lignes {start}–{end})")
|
||
annex.append("")
|
||
annex.extend(_slice(lines, start, end))
|
||
annex.append("")
|
||
annex.append("---")
|
||
annex.append("")
|
||
_write_lines(annex_path, annex)
|
||
|
||
# Build new main file:
|
||
# - keep canonical block (1..canonical_end) but remove everything after the first H1 block beyond references
|
||
# - remove duplicated trunks + imported block (which lie before branche_first)
|
||
# - append from branche_first to EOF
|
||
kept: list[str] = []
|
||
kept.extend(_slice(lines, 1, canonical_end))
|
||
kept.append("")
|
||
kept.append("## Annexes (contenu déplacé)")
|
||
kept.append("")
|
||
kept.append("Des blocs ont été déplacés dans un fichier annexe afin de rendre le document indexable et d’éliminer les duplications de tronc.")
|
||
kept.append("")
|
||
kept.append(f"- annexes : `{annex_path.name}`")
|
||
for idx, (label, start, end) in enumerate(moved, start=1):
|
||
kept.append(f" - Annexe {idx} : {label} (lignes {start}–{end})")
|
||
kept.append("")
|
||
kept.append("---")
|
||
kept.append("")
|
||
kept.extend(_slice(lines, branche_first, len(lines)))
|
||
|
||
kept = _replace_csp_ce_headings(kept)
|
||
_write_lines(input_path, kept)
|
||
|
||
|
||
def main() -> None:
|
||
ap = argparse.ArgumentParser(description="Rationalize conjoncture_collatz.md into a canonical file + annexes")
|
||
ap.add_argument(
|
||
"--input",
|
||
default="applications/collatz/conjoncture_collatz.md",
|
||
help="Path to conjoncture_collatz.md",
|
||
)
|
||
ap.add_argument(
|
||
"--annex",
|
||
default="applications/collatz/conjoncture_collatz_annexes.md",
|
||
help="Path to annex file to write (overwritten deterministically)",
|
||
)
|
||
args = ap.parse_args()
|
||
run(input_path=Path(args.input).resolve(), annex_path=Path(args.annex).resolve())
|
||
|
||
|
||
if __name__ == "__main__":
|
||
main()
|
||
|