diff --git a/conf/dnsmasq/dnsmasq.conf.exemple b/conf/dnsmasq/dnsmasq.conf.exemple new file mode 100644 index 00000000..010db38c --- /dev/null +++ b/conf/dnsmasq/dnsmasq.conf.exemple @@ -0,0 +1,20 @@ +# dnsmasq config for 4NK - listens on 127.0.0.1:5353 +no-dhcp-interface= +port=5353 +interface=lo +bind-interfaces +log-queries + +# 4NK Docker hosts +address=/tor.local/172.20.0.10 +address=/bitcoin.local/172.20.0.11 +address=/blindbit-oracle.local/172.20.0.12 +address=/sdk-storage.local/172.20.0.13 +address=/sdk-relay1.local/172.20.0.14 +address=/sdk-relay2.local/172.20.0.15 +address=/sdk-relay3.local/172.20.0.16 +address=/sdk-signer.local/172.20.0.17 +address=/ihm-client.local/172.20.0.18 +address=/miniback-postgres.local/172.20.0.30 +address=/coffre-front.local/172.20.0.32 +address=/coffre-back-mini.local/172.20.0.33 diff --git a/docker-compose.yml b/docker-compose.yml index a83439af..47bbf472 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -6,14 +6,10 @@ services: tor: image: dperson/torproxy:latest container_name: 4nk-tor - hostname: tor.4nk.local + hostname: tor.local ports: - "9050:9050" - "9051:9051" - volumes: - - ./modules/tor/conf/tor.conf:/etc/tor/torrc:ro - - ./modules/tor/data:/var/lib/tor - - ./modules/tor/log:/var/log/tor networks: 4nk_network: ipv4_address: 172.20.0.10 @@ -22,14 +18,14 @@ services: bitcoin: image: ruimarinho/bitcoin-core:latest container_name: 4nk-bitcoin - hostname: bitcoin.4nk.local + hostname: bitcoin.local ports: - "38333:38333" - "38332:38332" - "29000:29000" volumes: - ./modules/bitcoin/data:/home/bitcoin/.bitcoin - - ./modules/bitcoin/conf/bitcoin.conf:/home/bitcoin/.bitcoin/bitcoin.conf:ro + - ./modules/bitcoin/conf/bitcoin.conf:/home/bitcoin/.bitcoin/bitcoin.conf - ./modules/bitcoin/logs:/home/bitcoin/.bitcoin/logs networks: 4nk_network: @@ -41,7 +37,7 @@ services: blindbit: image: git.4nkweb.com/4nk/blindbit-oracle:dev container_name: 4nk-blindbit-oracle - hostname: blindbit-oracle.4nk.local + hostname: blindbit-oracle.local ports: - "8000:8000" @@ -77,9 +73,9 @@ services: sdk_storage: image: git.4nkweb.com/4nk/sdk_storage:dev container_name: 4nk-sdk-storage - hostname: sdk-storage.4nk.local + hostname: sdk-storage.local ports: - - "8081:8081" + - "8081:8080" volumes: - ./modules/sdk_storage/conf/sdk_storage.conf:/usr/local/bin/sdk_storage.conf:ro - ./modules/sdk_storage/log:/app/logs @@ -88,7 +84,7 @@ services: ipv4_address: 172.20.0.13 restart: unless-stopped healthcheck: - test: [ "CMD", "wget", "--quiet", "--tries=1", "--timeout=5", "--spider", "http://localhost:8081" ] + test: [ "CMD", "wget", "--quiet", "--tries=1", "--timeout=5", "--spider", "http://localhost:8080" ] interval: 30s timeout: 10s retries: 3 @@ -97,7 +93,7 @@ services: sdk_relay1: image: git.4nkweb.com/4nk/sdk_relay:dev container_name: 4nk-sdk-relay1 - hostname: sdk-relay1.4nk.local + hostname: sdk-relay1.local ports: - "8090:8090" - "8091:8091" @@ -115,7 +111,7 @@ services: depends_on: - blindbit healthcheck: - test: [ "CMD", "wget", "--quiet", "--tries=1", "--timeout=5", "--spider", "http://localhost:8091" ] + test: [ "CMD", "curl", "-f", "http://localhost:8090/health" ] interval: 30s timeout: 10s retries: 3 @@ -124,7 +120,7 @@ services: sdk_relay2: image: git.4nkweb.com/4nk/sdk_relay:dev container_name: 4nk-sdk-relay2 - hostname: sdk-relay2.4nk.local + hostname: sdk-relay2.local ports: - "8092:8090" - "8093:8091" @@ -142,7 +138,7 @@ services: depends_on: - blindbit healthcheck: - test: [ "CMD", "wget", "--quiet", "--tries=1", "--timeout=5", "--spider", "http://localhost:8091" ] + test: [ "CMD", "curl", "-f", "http://localhost:8090/health" ] interval: 30s timeout: 10s retries: 3 @@ -151,7 +147,7 @@ services: sdk_relay3: image: git.4nkweb.com/4nk/sdk_relay:dev container_name: 4nk-sdk-relay3 - hostname: sdk-relay3.4nk.local + hostname: sdk-relay3.local ports: - "8094:8090" - "8095:8091" @@ -169,7 +165,7 @@ services: depends_on: - blindbit healthcheck: - test: [ "CMD", "wget", "--quiet", "--tries=1", "--timeout=5", "--spider", "http://localhost:8091" ] + test: [ "CMD", "curl", "-f", "http://localhost:8090/health" ] interval: 30s timeout: 10s retries: 3 @@ -178,7 +174,7 @@ services: sdk_signer: image: git.4nkweb.com/4nk/sdk_signer:dev container_name: 4nk-sdk-signer - hostname: sdk-signer.4nk.local + hostname: sdk-signer.local ports: - "9090:9090" volumes: @@ -204,7 +200,7 @@ services: ihm_client: image: git.4nkweb.com/4nk/ihm_client:dev container_name: 4nk-ihm-client - hostname: ihm-client.4nk.local + hostname: ihm-client.local ports: - "8080:80" - "3003:3003" @@ -229,7 +225,7 @@ services: miniback-postgres: image: postgres:16 container_name: miniback-postgres - hostname: miniback-postgres.4nk.local + hostname: miniback-postgres.local environment: - POSTGRES_USER=miniback - POSTGRES_PASSWORD=minibackpassword @@ -246,7 +242,7 @@ services: coffre_front: image: git.4nkweb.com/4nk/lecoffre-front:dev container_name: coffre-front - hostname: coffre-front.4nk.local + hostname: coffre-front.local volumes: - ./projects/lecoffre/lecoffre-front/logs:/logs networks: @@ -261,7 +257,7 @@ services: coffre_back_mini: image: git.4nkweb.com/4nk/lecoffre-back-mini:dev container_name: coffre-back-mini - hostname: coffre-back-mini.4nk.local + hostname: coffre-back-mini.local networks: 4nk_network: ipv4_address: 172.20.0.33 diff --git a/docs/DNSMASQ_SETUP.md b/docs/DNSMASQ_SETUP.md index bbcbd2fc..18b627c6 100644 --- a/docs/DNSMASQ_SETUP.md +++ b/docs/DNSMASQ_SETUP.md @@ -27,7 +27,7 @@ Services Docker (172.20.0.0/16) |---------|----------|-----------| | tor | `tor.4nk.local` | 172.20.0.10 | | bitcoin | `bitcoin.4nk.local` | 172.20.0.11 | -| blindbit | `blindbit.4nk.local` | 172.20.0.12 | +| blindbit | `blindbit-core.4nk.local` | 172.20.0.12 | | sdk_storage | `sdk-storage.4nk.local` | 172.20.0.13 | | sdk_relay1 | `sdk-relay1.4nk.local` | 172.20.0.14 | | sdk_relay2 | `sdk-relay2.4nk.local` | 172.20.0.15 | diff --git a/docs/TODO_IA.md b/docs/TODO_IA.md deleted file mode 100644 index a64092dd..00000000 --- a/docs/TODO_IA.md +++ /dev/null @@ -1,671 +0,0 @@ -Objectif et périmètre - -Mettre en place, en « infrastructure as code », tout le pipeline décrit : ingestion des fichiers, pré-traitement/OCR, classification, extraction, contextualisation, indexation AnythingLLM/Ollama, graphe, recherche plein-texte, contrôle métier, audit. L’ensemble tourne via Docker Compose, avec des scripts reproductibles pour Debian et Windows (Docker Desktop + WSL2). Aucune promesse de traitement différé : tout ce qui suit est immédiatement exécutable tel quel, en adaptant les variables d’environnement. - -Architecture logique et composants - -host-api : API d’ingestion et d’orchestration (FastAPI Python). - -workers : tâches asynchrones (Celery + Redis) pour preprocess, ocr, classify, extract, index, checks, finalize. - -stockage applicatif : Postgres (métier), MinIO (objet, S3-compatible) pour PDF/artefacts, Redis (queues/cache). - -RAG et LLM : Ollama (modèles locaux), AnythingLLM (workspaces + embeddings). - -graphe et recherche : Neo4j (contextes dossier), OpenSearch (plein-texte). - -passerelle HTTP : Traefik (TLS, routage). - -supervision : Prometheus + Grafana, Loki + Promtail (logs), Sentry (optionnel). - -Arborescence du dépôt -notariat-pipeline/ - docker/ - host-api/ - Dockerfile - requirements.txt - worker/ - Dockerfile - requirements.txt - traefik/ - traefik.yml - dynamic/ - tls.yml - infra/ - docker-compose.yml - .env.example - make/.mk - ops/ - install-debian.sh - install-windows.ps1 - bootstrap.sh - seed/ # seeds init (lexiques, schémas JSON, checklists) - schemas/ - extraction_acte.schema.json - extraction_piece.schema.json - dossier.schema.json - checklists/ - vente.yaml - donation.yaml - dictionaries/ - ocr_fr_notarial.txt - rag/ - trames/... - normes/... - systemd/ - notariat-pipeline.service - services/ - host_api/ - app.py - settings.py - routes/ - domain/ - tasks/ # appels Celery: preprocess, ocr, classify, extract, index... - clients/ # Ban, Sirene, RNE, AnythingLLM, Ollama... - utils/ - worker/ - worker.py - pipelines/ - preprocess.py - ocr.py - classify.py - extract.py - index.py - checks.py - finalize.py - models/ - prompts/ - classify_prompt.txt - extract_prompt.txt - postprocess/ - lexical_corrections.py - charts/ # dashboards Grafana JSON - README.md - Makefile - -Fichier d’environnement -# infra/.env.example -PROJECT_NAME=notariat -DOMAIN=localhost -TZ=Europe/Paris - -POSTGRES_USER=notariat -POSTGRES_PASSWORD=notariat_pwd -POSTGRES_DB=notariat - -REDIS_PASSWORD= -MINIO_ROOT_USER=minio -MINIO_ROOT_PASSWORD=minio_pwd -MINIO_BUCKET=ingest - -ANYLLM_API_KEY=change_me -ANYLLM_BASE_URL=http://anythingllm:3001 -ANYLLM_WORKSPACE_NORMES=workspace_normes -ANYLLM_WORKSPACE_TRAMES=workspace_trames -ANYLLM_WORKSPACE_ACTES=workspace_actes - -OLLAMA_BASE_URL=http://ollama:11434 -OLLAMA_MODELS=llama3:8b,mistral:7b - -NEO4J_AUTH=neo4j/neo4j_pwd -OPENSEARCH_PASSWORD=opensearch_pwd - -TRAEFIK_ACME_EMAIL=ops@example.org - - -Copier en infra/.env et ajuster. - -Docker Compose -# infra/docker-compose.yml -version: "3.9" - -x-env: &default-env - TZ: ${TZ} - PUID: "1000" - PGID: "1000" - -services: - traefik: - image: traefik:v3.1 - command: - - --providers.docker=true - - --entrypoints.web.address=:80 - - --entrypoints.websecure.address=:443 - ports: - - "80:80" - - "443:443" - volumes: - - /var/run/docker.sock:/var/run/docker.sock:ro - - ./../docker/traefik/traefik.yml:/traefik.yml:ro - - ./../docker/traefik/dynamic:/dynamic:ro - environment: *default-env - restart: unless-stopped - - postgres: - image: postgres:16 - environment: - POSTGRES_USER: ${POSTGRES_USER} - POSTGRES_PASSWORD: ${POSTGRES_PASSWORD} - POSTGRES_DB: ${POSTGRES_DB} - volumes: - - pgdata:/var/lib/postgresql/data - restart: unless-stopped - - redis: - image: redis:7 - command: ["redis-server", "--appendonly", "yes"] - volumes: - - redis:/data - restart: unless-stopped - - minio: - image: minio/minio:RELEASE.2025-01-13T00-00-00Z - command: server /data --console-address ":9001" - environment: - MINIO_ROOT_USER: ${MINIO_ROOT_USER} - MINIO_ROOT_PASSWORD: ${MINIO_ROOT_PASSWORD} - volumes: - - minio:/data - ports: - - "9000:9000" - - "9001:9001" - restart: unless-stopped - - anythingsqlite: - image: kevincharm/anythingllm:latest - environment: - - DISABLE_AUTH=true - depends_on: - - ollama - ports: - - "3001:3001" - container_name: anythingllm - restart: unless-stopped - - ollama: - image: ollama/ollama:latest - volumes: - - ollama:/root/.ollama - ports: - - "11434:11434" - restart: unless-stopped - - neo4j: - image: neo4j:5 - environment: - - NEO4J_AUTH=${NEO4J_AUTH} - volumes: - - neo4j:/data - ports: - - "7474:7474" - - "7687:7687" - restart: unless-stopped - - opensearch: - image: opensearchproject/opensearch:2.14.0 - environment: - - discovery.type=single-node - - OPENSEARCH_INITIAL_ADMIN_PASSWORD=${OPENSEARCH_PASSWORD} - ulimits: - memlock: - soft: -1 - hard: -1 - volumes: - - opensearch:/usr/share/opensearch/data - ports: - - "9200:9200" - restart: unless-stopped - - host-api: - build: - context: ../docker/host-api - env_file: ./.env - environment: - <<: *default-env - DATABASE_URL: postgresql+psycopg://$POSTGRES_USER:$POSTGRES_PASSWORD@postgres:5432/$POSTGRES_DB - REDIS_URL: redis://redis:6379/0 - MINIO_ENDPOINT: http://minio:9000 - MINIO_BUCKET: ${MINIO_BUCKET} - ANYLLM_BASE_URL: ${ANYLLM_BASE_URL} - ANYLLM_API_KEY: ${ANYLLM_API_KEY} - OLLAMA_BASE_URL: ${OLLAMA_BASE_URL} - volumes: - - ../services/host_api:/app - - ../ops/seed:/seed:ro - - ../ops/seed/schemas:/schemas:ro - depends_on: - - postgres - - redis - - minio - - ollama - - anythingsqlite - - neo4j - - opensearch - restart: unless-stopped - labels: - - "traefik.enable=true" - - "traefik.http.routers.hostapi.rule=Host(`${DOMAIN}`) && PathPrefix(`/api`)" - - "traefik.http.routers.hostapi.entrypoints=web" - - worker: - build: - context: ../docker/worker - env_file: ./.env - environment: - <<: *default-env - DATABASE_URL: postgresql+psycopg://$POSTGRES_USER:$POSTGRES_PASSWORD@postgres:5432/$POSTGRES_DB - REDIS_URL: redis://redis:6379/0 - MINIO_ENDPOINT: http://minio:9000 - MINIO_BUCKET: ${MINIO_BUCKET} - ANYLLM_BASE_URL: ${ANYLLM_BASE_URL} - ANYLLM_API_KEY: ${ANYLLM_API_KEY} - OLLAMA_BASE_URL: ${OLLAMA_BASE_URL} - OPENSEARCH_URL: http://opensearch:9200 - NEO4J_URL: bolt://neo4j:7687 - NEO4J_AUTH: ${NEO4J_AUTH} - volumes: - - ../services/worker:/app - - ../ops/seed:/seed:ro - depends_on: - - host-api - restart: unless-stopped - - prometheus: - image: prom/prometheus:v2.54.1 - volumes: - - prometheus:/prometheus - restart: unless-stopped - - grafana: - image: grafana/grafana:11.1.0 - volumes: - - grafana:/var/lib/grafana - - ../services/charts:/var/lib/grafana/dashboards:ro - ports: - - "3000:3000" - restart: unless-stopped - -volumes: - pgdata: - redis: - minio: - ollama: - neo4j: - opensearch: - prometheus: - grafana: - -Dockerfiles principaux -# docker/host-api/Dockerfile -FROM python:3.11-slim -RUN apt-get update && apt-get install -y libmagic1 poppler-utils && rm -rf /var/lib/apt/lists/* -WORKDIR /app -COPY requirements.txt . -RUN pip install --no-cache-dir -r requirements.txt -COPY ../../services/host_api /app -CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "8000"] - -# docker/host-api/requirements.txt -fastapi==0.115.0 -uvicorn[standard]==0.30.6 -pydantic==2.8.2 -sqlalchemy==2.0.35 -psycopg[binary]==3.2.1 -minio==7.2.7 -redis==5.0.7 -requests==2.32.3 -opensearch-py==2.6.0 -neo4j==5.23.1 -python-multipart==0.0.9 - -# docker/worker/Dockerfile -FROM python:3.11-slim -RUN apt-get update && apt-get install -y tesseract-ocr tesseract-ocr-fra \ - poppler-utils imagemagick ghostscript libgl1 python3-opencv \ - && rm -rf /var/lib/apt/lists/* -WORKDIR /app -COPY requirements.txt . -RUN pip install --no-cache-dir -r requirements.txt -COPY ../../services/worker /app -CMD ["python", "worker.py"] - -# docker/worker/requirements.txt -celery[redis]==5.4.0 -opencv-python-headless==4.10.0.84 -pytesseract==0.3.13 -numpy==2.0.1 -pillow==10.4.0 -pdfminer.six==20240706 -python-alto==0.5.0 -rapidfuzz==3.9.6 -requests==2.32.3 -minio==7.2.7 -psycopg[binary]==3.2.1 -sqlalchemy==2.0.35 -opensearch-py==2.6.0 -neo4j==5.23.1 -jsonschema==4.23.0 - -Scripts d’installation -# ops/install-debian.sh -set -euo pipefail -sudo apt-get update -sudo apt-get install -y ca-certificates curl gnupg lsb-release make git -# Docker -curl -fsSL https://get.docker.com | sh -sudo usermod -aG docker $USER -# Compose plugin -DOCKER_CONFIG=${DOCKER_CONFIG:-$HOME/.docker} -mkdir -p $DOCKER_CONFIG/cli-plugins -curl -SL https://github.com/docker/compose/releases/download/v2.29.7/docker-compose-linux-x86_64 \ - -o $DOCKER_CONFIG/cli-plugins/docker-compose -chmod +x $DOCKER_CONFIG/cli-plugins/docker-compose -echo "Relog required to apply docker group membership." - -# ops/install-windows.ps1 (à exécuter dans PowerShell admin) -winget install --id Docker.DockerDesktop -e -winget install --id Git.Git -e -winget install --id GnuWin32.Make -e - -Bootstrap de l’infrastructure -# ops/bootstrap.sh -set -euo pipefail -cd "$(dirname "$0")/../infra" - -cp -n .env.example .env || true - -docker compose pull - -docker compose up -d postgres redis minio opensearch neo4j ollama anythingsqlite traefik - -sleep 8 - -# MinIO: création de bucket -mc alias set local http://127.0.0.1:9000 $MINIO_ROOT_USER $MINIO_ROOT_PASSWORD || true -mc mb -p local/$MINIO_BUCKET || true - -# Ollama: pull des modèles -curl -s http://127.0.0.1:11434/api/pull -d '{"name":"llama3:8b"}' -curl -s http://127.0.0.1:11434/api/pull -d '{"name":"mistral:7b"}' - -docker compose up -d host-api worker grafana prometheus - - -Astuce pour mc : installer minio-client localement ou exécuter un conteneur minio/mc et le lier au réseau Docker. - -Makefile pour commande unique -# Makefile -SHELL := /bin/bash -ENV ?= infra/.env - -up: - cd infra && docker compose up -d - -down: - cd infra && docker compose down - -bootstrap: - bash ops/bootstrap.sh - -logs: - cd infra && docker compose logs -f --tail=200 - -ps: - cd infra && docker compose ps - -seed-anythingllm: - curl -s -X POST "$(ANYLLM_BASE_URL)/api/workspaces" \ - -H "Authorization: Bearer $(ANYLLM_API_KEY)" \ - -H "Content-Type: application/json" \ - -d '{"name":"$(ANYLLM_WORKSPACE_NORMES)"}' || true; \ - curl -s -X POST "$(ANYLLM_BASE_URL)/api/workspaces" \ - -H "Authorization: Bearer $(ANYLLM_API_KEY)" \ - -H "Content-Type: application/json" \ - -d '{"name":"$(ANYLLM_WORKSPACE_TRAMES)"}' || true; \ - curl -s -X POST "$(ANYLLM_BASE_URL)/api/workspaces" \ - -H "Authorization: Bearer $(ANYLLM_API_KEY)" \ - -H "Content-Type: application/json" \ - -d '{"name":"$(ANYLLM_WORKSPACE_ACTES)"}' || true - - -Exécuter : make bootstrap && make seed-anythingllm. - -API d’ingestion minimaliste -# services/host_api/app.py -from fastapi import FastAPI, UploadFile, File, Form, HTTPException -from tasks.enqueue import enqueue_import -from pydantic import BaseModel -import uuid, time - -app = FastAPI() - -class ImportMeta(BaseModel): - id_dossier: str - source: str - etude_id: str - utilisateur_id: str - -@app.post("/api/import") -async def import_doc( - file: UploadFile = File(...), - id_dossier: str = Form(...), - source: str = Form("upload"), - etude_id: str = Form(...), - utilisateur_id: str = Form(...) -): - if file.content_type not in ("application/pdf","image/jpeg","image/png","image/tiff","image/heic"): - raise HTTPException(415,"type non supporté") - doc_id = str(uuid.uuid4()) - # push vers MinIO et enreg. DB (omise ici), puis enqueue - enqueue_import(doc_id, { - "id_dossier": id_dossier, - "source": source, - "etude_id": etude_id, - "utilisateur_id": utilisateur_id, - "filename": file.filename, - "mime": file.content_type, - "received_at": int(time.time()) - }) - return {"status":"queued","id_document":doc_id} - -# services/host_api/tasks/enqueue.py -from redis import Redis -import json, os - -r = Redis.from_url(os.getenv("REDIS_URL","redis://localhost:6379/0")) - -def enqueue_import(doc_id: str, meta: dict): - payload = {"doc_id":doc_id, "meta":meta} - r.lpush("queue:import", json.dumps(payload)) - -Worker Celery orchestrant le pipeline -# services/worker/worker.py -import os -from celery import Celery -from pipelines import preprocess, ocr, classify, extract, index, checks, finalize - -app = Celery('worker', broker=os.getenv("REDIS_URL"), backend=os.getenv("REDIS_URL")) - -@app.task -def pipeline_run(doc_id: str): - ctx = {} - preprocess.run(doc_id, ctx) - ocr.run(doc_id, ctx) - classify.run(doc_id, ctx) - extract.run(doc_id, ctx) - index.run(doc_id, ctx) - checks.run(doc_id, ctx) - finalize.run(doc_id, ctx) - return {"doc_id": doc_id, "status": "done"} - - -Pour transformer la file Redis « queue:import » en exécution Celery, ajouter un petit « bridge » (service ou thread) qui lit queue:import et appelle pipeline_run.delay(doc_id). - -Intégrations clés dans les pipelines - -Exemple de post-OCR avec correction lexicale et export ALTO : - -# services/worker/pipelines/ocr.py -import pytesseract, json, tempfile, subprocess -from PIL import Image -from .utils import storage, alto_tools, text_normalize - -def run(doc_id, ctx): - pdf_path = storage.get_local_pdf(doc_id) # télécharge depuis MinIO - # si PDF texte natif: skip et extraire avec pdftotext - out_pdf = tempfile.NamedTemporaryFile(suffix=".pdf", delete=False).name - subprocess.run(["ocrmypdf", "--sidecar", out_pdf+".txt", - "--output-type", "pdf", pdf_path, out_pdf], check=True) - with open(out_pdf+".txt","r",encoding="utf8") as f: - text = f.read() - text = text_normalize.correct_notarial(text, dict_path="/seed/dictionaries/ocr_fr_notarial.txt") - # générer ALTO (ex via ocrmypdf --alto ou tesseract hOCR->ALTO) - # stocker artefacts dans MinIO et maj contexte - storage.put(doc_id, "ocr.pdf", out_pdf) - storage.put_bytes(doc_id, "ocr.txt", text.encode("utf8")) - ctx["text"] = text - - -Classification via Ollama + prompt few-shot : - -# services/worker/pipelines/classify.py -import requests, os, json -from .utils import chunks - -OLLAMA = os.getenv("OLLAMA_BASE_URL","http://ollama:11434") - -PROMPT = open("/app/models/prompts/classify_prompt.txt","r",encoding="utf8").read() - -def run(doc_id, ctx): - text = ctx["text"][:16000] # limite contexte - prompt = PROMPT.replace("{{TEXT}}", text) - resp = requests.post(f"{OLLAMA}/api/generate", json={"model":"llama3:8b","prompt":prompt, "stream": False}, timeout=120) - data = resp.json() - label = json.loads(data["response"])["label"] # convention: retour JSON - ctx["label"] = label - - -Indexation AnythingLLM : - -# services/worker/pipelines/index.py -import requests, os -ANY = os.getenv("ANYLLM_BASE_URL") -KEY = os.getenv("ANYLLM_API_KEY") -WS_ACTES = os.getenv("ANYLLM_WORKSPACE_ACTES") - -def run(doc_id, ctx): - headers={"Authorization": f"Bearer {KEY}","Content-Type":"application/json"} - chunks = build_chunks(ctx["text"], meta={"doc_id":doc_id,"label":ctx["label"]}) - requests.post(f"{ANY}/api/workspaces/{WS_ACTES}/documents", - headers=headers, json={"documents":chunks}, timeout=60) - - -Graphe Neo4j et OpenSearch idem, avec clients respectifs. Les contrôles DMTO et cohérences s’implémentent dans checks.py avec barèmes en seed. - -Sécurité et conformité - -chiffrement au repos : volumes Docker hébergés sur un FS chiffré, ou chiffrement applicatif des blobs sensibles avant MinIO. - -TLS en frontal via Traefik, avec certificats Let’s Encrypt en prod. - -cloisonnement par étude via séparations de workspaces AnythingLLM, index nommés OpenSearch, labels Neo4j. - -masquage sélectif des données à l’entraînement : fonctions de redaction sur RIB, MRZ, numéros. - -journaux d’audit : chaque pipeline écrit un évènement structuré JSON (horodatage, versions, hash des entrées/sorties). - -Supervision et métriques - -exporter Celery, host-api et workers avec /metrics Prometheus. - -tableaux Grafana fournis dans services/charts : taux d’erreur, latence par étape, qualité OCR (CER/WER), F1 classification, précision/rappel extraction, MRR/NDCG RAG. - -Déploiement de bout en bout - -installer Docker et Compose sur Debian ou Windows comme fourni. - -cloner le dépôt et copier infra/.env.example en infra/.env, éditer les secrets. - -exécuter make bootstrap. - -créer les workspaces AnythingLLM : make seed-anythingllm. - -vérifier Ollama a bien pullé les modèles. - -importer des seeds : placer trames et normes publiques dans ops/seed/rag/… puis lancer un script d’ingestion simple via l’API AnythingLLM (exemples fournis). - -tester une ingestion : - -curl -F "file=@/chemin/mon_scan.pdf" \ - -F "id_dossier=D-2025-001" \ - -F "source=upload" \ - -F "etude_id=E-001" \ - -F "utilisateur_id=U-123" \ - http://localhost:80/api/import - - -suivre les logs make logs et consulter les tableaux Grafana sur http://localhost:3000. - -Automatisation au démarrage - -Service systemd pour Debian : - -# ops/systemd/notariat-pipeline.service -[Unit] -Description=Notariat pipeline -After=docker.service -Requires=docker.service - -[Service] -WorkingDirectory=/opt/notariat/infra -Environment=COMPOSE_PROJECT_NAME=notariat -ExecStart=/usr/bin/docker compose up -d -ExecStop=/usr/bin/docker compose down -TimeoutStartSec=0 -RemainAfterExit=yes - -[Install] -WantedBy=multi-user.target - - -Copier dans /etc/systemd/system/, puis sudo systemctl enable --now notariat-pipeline. - -Données initiales et seeds - -schémas JSON : placer les trois schémas fournis dans ops/seed/schemas. - -checklists par type d’acte : YAML exhaustifs dans ops/seed/checklists. - -dictionnaire OCR notarial : ops/seed/dictionaries/ocr_fr_notarial.txt. - -trames et normes publiques : déposer les fichiers et utiliser un script Python d’ingestion qui découpe en chunks 1 000–2 000 caractères avec métadonnées, puis POST vers l’API AnythingLLM. - -Tests automatisés - -tests unitaires : pytest services/ avec datasets d’exemple anonymisés dans tests/data/. - -tests de perf : locust ou k6 contre /api/import, objectifs par étape documentés dans README.md. - -seuils de qualité : variables d’environnement pour marquer manual_review=true si CER > 0.08, confiance classification < 0.75, champs obligatoires manquants. - -Adaptations Windows - -usage de Docker Desktop, activer WSL2 backend. - -monter le dépôt sous \\wsl$\Ubuntu\home\… pour éviter les soucis de volumes. - -exécuter make bootstrap depuis WSL. - -Points d’attention - -mémoire et CPU d’Ollama : dimensionner en fonction des modèles. Lancer avec --gpus all si GPU NVIDIA disponible. - -AnythingLLM SQLite convient pour démarrer ; migrer vers Postgres dès que nécessaire. - -OpenSearch nécessite 4–6 Go RAM pour le confort local. - -mises à jour des normes : tâche périodique Celery beat qui recharge les embeddings concernés, avec versionnage des dumps et étiquettes version_date. - -Conclusion opérationnelle - -Le dépôt et les scripts ci-dessus fournissent une installation entièrement scriptée, reproductible et cloisonnée, couvrant \ No newline at end of file diff --git a/modules/blindbit-oracle/conf/blindbit.toml.exemple b/modules/blindbit-oracle/conf/blindbit.toml.exemple index 1124bbf5..00e54aaf 100644 --- a/modules/blindbit-oracle/conf/blindbit.toml.exemple +++ b/modules/blindbit-oracle/conf/blindbit.toml.exemple @@ -1,6 +1,6 @@ # 0.0.0.0:8000 to expose outside of localhost # default: "127.0.0.1:8000" -host = "blindbit.4nk.local:8000" +host = "blindbit-oracle.4nk.local:8000" # Defines on which chain the wallet runs. Allowed values: main, testnet, signet, regtest. # default: signet diff --git a/modules/sdk_signer/data/CURRENT b/modules/sdk_signer/data/CURRENT index f60e23b0..e889b4d1 100644 --- a/modules/sdk_signer/data/CURRENT +++ b/modules/sdk_signer/data/CURRENT @@ -1 +1 @@ -MANIFEST-000093 +MANIFEST-000395 diff --git a/modules/sdk_signer/data/LOG b/modules/sdk_signer/data/LOG index c06d329e..4ba80670 100644 --- a/modules/sdk_signer/data/LOG +++ b/modules/sdk_signer/data/LOG @@ -1,3 +1,3 @@ -2025/09/10-14:08:17.295322 7f1f74e7cb38 Recovering log #92 -2025/09/10-14:08:17.312865 7f1f74e7cb38 Delete type=3 #91 -2025/09/10-14:08:17.312899 7f1f74e7cb38 Delete type=0 #92 +2025/09/10-15:27:48.575066 7f172cc2cb38 Recovering log #394 +2025/09/10-15:27:48.592820 7f172cc2cb38 Delete type=0 #394 +2025/09/10-15:27:48.592888 7f172cc2cb38 Delete type=3 #393 diff --git a/modules/sdk_signer/data/LOG.old b/modules/sdk_signer/data/LOG.old index cf0100ac..d2af3857 100644 --- a/modules/sdk_signer/data/LOG.old +++ b/modules/sdk_signer/data/LOG.old @@ -1,3 +1,3 @@ -2025/09/10-14:07:46.179318 7f7ff9accb38 Recovering log #90 -2025/09/10-14:07:46.197168 7f7ff9accb38 Delete type=0 #90 -2025/09/10-14:07:46.197243 7f7ff9accb38 Delete type=3 #89 +2025/09/10-15:27:17.521565 7f77e1b29b38 Recovering log #392 +2025/09/10-15:27:17.536519 7f77e1b29b38 Delete type=0 #392 +2025/09/10-15:27:17.536567 7f77e1b29b38 Delete type=3 #391 diff --git a/modules/sdk_signer/data/MANIFEST-000093 b/modules/sdk_signer/data/MANIFEST-000395 similarity index 57% rename from modules/sdk_signer/data/MANIFEST-000093 rename to modules/sdk_signer/data/MANIFEST-000395 index fe4ca76a..dd2600af 100644 Binary files a/modules/sdk_signer/data/MANIFEST-000093 and b/modules/sdk_signer/data/MANIFEST-000395 differ diff --git a/scripts/start-dnsmasq.sh b/scripts/start-dnsmasq.sh index a3aaeb94..1597df9f 100755 --- a/scripts/start-dnsmasq.sh +++ b/scripts/start-dnsmasq.sh @@ -8,6 +8,7 @@ set -e SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" PROJECT_ROOT="$(dirname "$SCRIPT_DIR")" DNSMASQ_CONF="$PROJECT_ROOT/conf/dnsmasq/dnsmasq.conf" +DNSMASQ_BIN="$(command -v dnsmasq || echo /usr/sbin/dnsmasq)" echo "🚀 Démarrage dnsmasq pour 4NK..." @@ -26,7 +27,7 @@ fi # Démarrer dnsmasq avec la configuration 4NK echo "🔧 Démarrage dnsmasq sur le port 5353..." -dnsmasq --conf-file="$DNSMASQ_CONF" --no-daemon & +"$DNSMASQ_BIN" --conf-file="$DNSMASQ_CONF" --no-daemon & DNSMASQ_PID=$! echo "✅ dnsmasq démarré avec PID: $DNSMASQ_PID" diff --git a/tests/connectivity/check_http_endpoints.sh b/tests/connectivity/check_http_endpoints.sh index edb04c67..42506967 100755 --- a/tests/connectivity/check_http_endpoints.sh +++ b/tests/connectivity/check_http_endpoints.sh @@ -1,41 +1,49 @@ #!/usr/bin/env bash set -euo pipefail -endpoints=( - "http://127.0.0.1:8000" # blindbit - "http://127.0.0.1:8081" # sdk_storage - "http://127.0.0.1:8090" # relay1 - "http://127.0.0.1:8092" # relay2 - "http://127.0.0.1:8094" # relay3 - "http://127.0.0.1:9090" # sdk_signer - "http://127.0.0.1:8080" # ihm_client +hostnames=( + tor.local + bitcoin.local + blindbit-oracle.local + sdk-storage.local + sdk-relay1.local + sdk-relay2.local + sdk-relay3.local + sdk-signer.local + ihm-client.local + miniback-postgres.local + coffre-front.local + coffre-back-mini.local +) + +# Vérifier la résolution DNS via dnsmasq (127.0.0.1:5353) +for h in "${hostnames[@]}"; do + if nslookup -port=5353 "$h" 127.0.0.1 >/dev/null 2>&1; then + echo "DNS OK $h" + else + echo "DNS NOK $h" + fi +done + +declare -A ports=( + [blindbit-oracle.local]=8000 + [sdk-storage.local]=8080 + [sdk-relay1.local]=8090 + [sdk-relay2.local]=8090 + [sdk-relay3.local]=8090 + [sdk-signer.local]=9090 + [ihm-client.local]=80 ) fail=0 -for url in "${endpoints[@]}"; do +for host in "${!ports[@]}"; do + url="http://$host:${ports[$host]}" if wget --quiet --tries=1 --timeout=5 --spider "$url"; then - echo "OK $url" + echo "HTTP OK $url" else - echo "NOK $url" + echo "HTTP NOK $url" fail=1 fi done exit $fail - -set -euo pipefail - -endpoints=( - http://127.0.0.1:8000 # blindbit - http://127.0.0.1:8081 # sdk_storage - http://127.0.0.1:8090 # relay1 - http://127.0.0.1:8092 # relay2 - http://127.0.0.1:8094 # relay3 - http://127.0.0.1:9090 # sdk_signer - http://127.0.0.1:8080 # ihm_client -) - -fail=0 -for url in ; do - if wget --quiet --tries=1 --timeout=5 --spider ; then - echo OK