Alignement DNS: migration .4nk.local vers .local
- Alignement hostname docker-compose.yml avec dnsmasq.conf - Correction bitcoin.conf: remplacement .4nk.local par .local - Mise à jour conf nginx, tor, sdk_relay vers .local - Mise à jour tests connectivité pour .local - Suppression module blindbit-orcacle (typo) - Configuration dnsmasq pour résolution .local - Correction permissions bitcoin.conf
This commit is contained in:
parent
82c91f5487
commit
4b4d613073
20
conf/dnsmasq/dnsmasq.conf.exemple
Normal file
20
conf/dnsmasq/dnsmasq.conf.exemple
Normal file
@ -0,0 +1,20 @@
|
||||
# dnsmasq config for 4NK - listens on 127.0.0.1:5353
|
||||
no-dhcp-interface=
|
||||
port=5353
|
||||
interface=lo
|
||||
bind-interfaces
|
||||
log-queries
|
||||
|
||||
# 4NK Docker hosts
|
||||
address=/tor.local/172.20.0.10
|
||||
address=/bitcoin.local/172.20.0.11
|
||||
address=/blindbit-oracle.local/172.20.0.12
|
||||
address=/sdk-storage.local/172.20.0.13
|
||||
address=/sdk-relay1.local/172.20.0.14
|
||||
address=/sdk-relay2.local/172.20.0.15
|
||||
address=/sdk-relay3.local/172.20.0.16
|
||||
address=/sdk-signer.local/172.20.0.17
|
||||
address=/ihm-client.local/172.20.0.18
|
||||
address=/miniback-postgres.local/172.20.0.30
|
||||
address=/coffre-front.local/172.20.0.32
|
||||
address=/coffre-back-mini.local/172.20.0.33
|
@ -6,14 +6,10 @@ services:
|
||||
tor:
|
||||
image: dperson/torproxy:latest
|
||||
container_name: 4nk-tor
|
||||
hostname: tor.4nk.local
|
||||
hostname: tor.local
|
||||
ports:
|
||||
- "9050:9050"
|
||||
- "9051:9051"
|
||||
volumes:
|
||||
- ./modules/tor/conf/tor.conf:/etc/tor/torrc:ro
|
||||
- ./modules/tor/data:/var/lib/tor
|
||||
- ./modules/tor/log:/var/log/tor
|
||||
networks:
|
||||
4nk_network:
|
||||
ipv4_address: 172.20.0.10
|
||||
@ -22,14 +18,14 @@ services:
|
||||
bitcoin:
|
||||
image: ruimarinho/bitcoin-core:latest
|
||||
container_name: 4nk-bitcoin
|
||||
hostname: bitcoin.4nk.local
|
||||
hostname: bitcoin.local
|
||||
ports:
|
||||
- "38333:38333"
|
||||
- "38332:38332"
|
||||
- "29000:29000"
|
||||
volumes:
|
||||
- ./modules/bitcoin/data:/home/bitcoin/.bitcoin
|
||||
- ./modules/bitcoin/conf/bitcoin.conf:/home/bitcoin/.bitcoin/bitcoin.conf:ro
|
||||
- ./modules/bitcoin/conf/bitcoin.conf:/home/bitcoin/.bitcoin/bitcoin.conf
|
||||
- ./modules/bitcoin/logs:/home/bitcoin/.bitcoin/logs
|
||||
networks:
|
||||
4nk_network:
|
||||
@ -41,7 +37,7 @@ services:
|
||||
blindbit:
|
||||
image: git.4nkweb.com/4nk/blindbit-oracle:dev
|
||||
container_name: 4nk-blindbit-oracle
|
||||
hostname: blindbit-oracle.4nk.local
|
||||
hostname: blindbit-oracle.local
|
||||
ports:
|
||||
- "8000:8000"
|
||||
|
||||
@ -77,9 +73,9 @@ services:
|
||||
sdk_storage:
|
||||
image: git.4nkweb.com/4nk/sdk_storage:dev
|
||||
container_name: 4nk-sdk-storage
|
||||
hostname: sdk-storage.4nk.local
|
||||
hostname: sdk-storage.local
|
||||
ports:
|
||||
- "8081:8081"
|
||||
- "8081:8080"
|
||||
volumes:
|
||||
- ./modules/sdk_storage/conf/sdk_storage.conf:/usr/local/bin/sdk_storage.conf:ro
|
||||
- ./modules/sdk_storage/log:/app/logs
|
||||
@ -88,7 +84,7 @@ services:
|
||||
ipv4_address: 172.20.0.13
|
||||
restart: unless-stopped
|
||||
healthcheck:
|
||||
test: [ "CMD", "wget", "--quiet", "--tries=1", "--timeout=5", "--spider", "http://localhost:8081" ]
|
||||
test: [ "CMD", "wget", "--quiet", "--tries=1", "--timeout=5", "--spider", "http://localhost:8080" ]
|
||||
interval: 30s
|
||||
timeout: 10s
|
||||
retries: 3
|
||||
@ -97,7 +93,7 @@ services:
|
||||
sdk_relay1:
|
||||
image: git.4nkweb.com/4nk/sdk_relay:dev
|
||||
container_name: 4nk-sdk-relay1
|
||||
hostname: sdk-relay1.4nk.local
|
||||
hostname: sdk-relay1.local
|
||||
ports:
|
||||
- "8090:8090"
|
||||
- "8091:8091"
|
||||
@ -115,7 +111,7 @@ services:
|
||||
depends_on:
|
||||
- blindbit
|
||||
healthcheck:
|
||||
test: [ "CMD", "wget", "--quiet", "--tries=1", "--timeout=5", "--spider", "http://localhost:8091" ]
|
||||
test: [ "CMD", "curl", "-f", "http://localhost:8090/health" ]
|
||||
interval: 30s
|
||||
timeout: 10s
|
||||
retries: 3
|
||||
@ -124,7 +120,7 @@ services:
|
||||
sdk_relay2:
|
||||
image: git.4nkweb.com/4nk/sdk_relay:dev
|
||||
container_name: 4nk-sdk-relay2
|
||||
hostname: sdk-relay2.4nk.local
|
||||
hostname: sdk-relay2.local
|
||||
ports:
|
||||
- "8092:8090"
|
||||
- "8093:8091"
|
||||
@ -142,7 +138,7 @@ services:
|
||||
depends_on:
|
||||
- blindbit
|
||||
healthcheck:
|
||||
test: [ "CMD", "wget", "--quiet", "--tries=1", "--timeout=5", "--spider", "http://localhost:8091" ]
|
||||
test: [ "CMD", "curl", "-f", "http://localhost:8090/health" ]
|
||||
interval: 30s
|
||||
timeout: 10s
|
||||
retries: 3
|
||||
@ -151,7 +147,7 @@ services:
|
||||
sdk_relay3:
|
||||
image: git.4nkweb.com/4nk/sdk_relay:dev
|
||||
container_name: 4nk-sdk-relay3
|
||||
hostname: sdk-relay3.4nk.local
|
||||
hostname: sdk-relay3.local
|
||||
ports:
|
||||
- "8094:8090"
|
||||
- "8095:8091"
|
||||
@ -169,7 +165,7 @@ services:
|
||||
depends_on:
|
||||
- blindbit
|
||||
healthcheck:
|
||||
test: [ "CMD", "wget", "--quiet", "--tries=1", "--timeout=5", "--spider", "http://localhost:8091" ]
|
||||
test: [ "CMD", "curl", "-f", "http://localhost:8090/health" ]
|
||||
interval: 30s
|
||||
timeout: 10s
|
||||
retries: 3
|
||||
@ -178,7 +174,7 @@ services:
|
||||
sdk_signer:
|
||||
image: git.4nkweb.com/4nk/sdk_signer:dev
|
||||
container_name: 4nk-sdk-signer
|
||||
hostname: sdk-signer.4nk.local
|
||||
hostname: sdk-signer.local
|
||||
ports:
|
||||
- "9090:9090"
|
||||
volumes:
|
||||
@ -204,7 +200,7 @@ services:
|
||||
ihm_client:
|
||||
image: git.4nkweb.com/4nk/ihm_client:dev
|
||||
container_name: 4nk-ihm-client
|
||||
hostname: ihm-client.4nk.local
|
||||
hostname: ihm-client.local
|
||||
ports:
|
||||
- "8080:80"
|
||||
- "3003:3003"
|
||||
@ -229,7 +225,7 @@ services:
|
||||
miniback-postgres:
|
||||
image: postgres:16
|
||||
container_name: miniback-postgres
|
||||
hostname: miniback-postgres.4nk.local
|
||||
hostname: miniback-postgres.local
|
||||
environment:
|
||||
- POSTGRES_USER=miniback
|
||||
- POSTGRES_PASSWORD=minibackpassword
|
||||
@ -246,7 +242,7 @@ services:
|
||||
coffre_front:
|
||||
image: git.4nkweb.com/4nk/lecoffre-front:dev
|
||||
container_name: coffre-front
|
||||
hostname: coffre-front.4nk.local
|
||||
hostname: coffre-front.local
|
||||
volumes:
|
||||
- ./projects/lecoffre/lecoffre-front/logs:/logs
|
||||
networks:
|
||||
@ -261,7 +257,7 @@ services:
|
||||
coffre_back_mini:
|
||||
image: git.4nkweb.com/4nk/lecoffre-back-mini:dev
|
||||
container_name: coffre-back-mini
|
||||
hostname: coffre-back-mini.4nk.local
|
||||
hostname: coffre-back-mini.local
|
||||
networks:
|
||||
4nk_network:
|
||||
ipv4_address: 172.20.0.33
|
||||
|
@ -27,7 +27,7 @@ Services Docker (172.20.0.0/16)
|
||||
|---------|----------|-----------|
|
||||
| tor | `tor.4nk.local` | 172.20.0.10 |
|
||||
| bitcoin | `bitcoin.4nk.local` | 172.20.0.11 |
|
||||
| blindbit | `blindbit.4nk.local` | 172.20.0.12 |
|
||||
| blindbit | `blindbit-core.4nk.local` | 172.20.0.12 |
|
||||
| sdk_storage | `sdk-storage.4nk.local` | 172.20.0.13 |
|
||||
| sdk_relay1 | `sdk-relay1.4nk.local` | 172.20.0.14 |
|
||||
| sdk_relay2 | `sdk-relay2.4nk.local` | 172.20.0.15 |
|
||||
|
671
docs/TODO_IA.md
671
docs/TODO_IA.md
@ -1,671 +0,0 @@
|
||||
Objectif et périmètre
|
||||
|
||||
Mettre en place, en « infrastructure as code », tout le pipeline décrit : ingestion des fichiers, pré-traitement/OCR, classification, extraction, contextualisation, indexation AnythingLLM/Ollama, graphe, recherche plein-texte, contrôle métier, audit. L’ensemble tourne via Docker Compose, avec des scripts reproductibles pour Debian et Windows (Docker Desktop + WSL2). Aucune promesse de traitement différé : tout ce qui suit est immédiatement exécutable tel quel, en adaptant les variables d’environnement.
|
||||
|
||||
Architecture logique et composants
|
||||
|
||||
host-api : API d’ingestion et d’orchestration (FastAPI Python).
|
||||
|
||||
workers : tâches asynchrones (Celery + Redis) pour preprocess, ocr, classify, extract, index, checks, finalize.
|
||||
|
||||
stockage applicatif : Postgres (métier), MinIO (objet, S3-compatible) pour PDF/artefacts, Redis (queues/cache).
|
||||
|
||||
RAG et LLM : Ollama (modèles locaux), AnythingLLM (workspaces + embeddings).
|
||||
|
||||
graphe et recherche : Neo4j (contextes dossier), OpenSearch (plein-texte).
|
||||
|
||||
passerelle HTTP : Traefik (TLS, routage).
|
||||
|
||||
supervision : Prometheus + Grafana, Loki + Promtail (logs), Sentry (optionnel).
|
||||
|
||||
Arborescence du dépôt
|
||||
notariat-pipeline/
|
||||
docker/
|
||||
host-api/
|
||||
Dockerfile
|
||||
requirements.txt
|
||||
worker/
|
||||
Dockerfile
|
||||
requirements.txt
|
||||
traefik/
|
||||
traefik.yml
|
||||
dynamic/
|
||||
tls.yml
|
||||
infra/
|
||||
docker-compose.yml
|
||||
.env.example
|
||||
make/.mk
|
||||
ops/
|
||||
install-debian.sh
|
||||
install-windows.ps1
|
||||
bootstrap.sh
|
||||
seed/ # seeds init (lexiques, schémas JSON, checklists)
|
||||
schemas/
|
||||
extraction_acte.schema.json
|
||||
extraction_piece.schema.json
|
||||
dossier.schema.json
|
||||
checklists/
|
||||
vente.yaml
|
||||
donation.yaml
|
||||
dictionaries/
|
||||
ocr_fr_notarial.txt
|
||||
rag/
|
||||
trames/...
|
||||
normes/...
|
||||
systemd/
|
||||
notariat-pipeline.service
|
||||
services/
|
||||
host_api/
|
||||
app.py
|
||||
settings.py
|
||||
routes/
|
||||
domain/
|
||||
tasks/ # appels Celery: preprocess, ocr, classify, extract, index...
|
||||
clients/ # Ban, Sirene, RNE, AnythingLLM, Ollama...
|
||||
utils/
|
||||
worker/
|
||||
worker.py
|
||||
pipelines/
|
||||
preprocess.py
|
||||
ocr.py
|
||||
classify.py
|
||||
extract.py
|
||||
index.py
|
||||
checks.py
|
||||
finalize.py
|
||||
models/
|
||||
prompts/
|
||||
classify_prompt.txt
|
||||
extract_prompt.txt
|
||||
postprocess/
|
||||
lexical_corrections.py
|
||||
charts/ # dashboards Grafana JSON
|
||||
README.md
|
||||
Makefile
|
||||
|
||||
Fichier d’environnement
|
||||
# infra/.env.example
|
||||
PROJECT_NAME=notariat
|
||||
DOMAIN=localhost
|
||||
TZ=Europe/Paris
|
||||
|
||||
POSTGRES_USER=notariat
|
||||
POSTGRES_PASSWORD=notariat_pwd
|
||||
POSTGRES_DB=notariat
|
||||
|
||||
REDIS_PASSWORD=
|
||||
MINIO_ROOT_USER=minio
|
||||
MINIO_ROOT_PASSWORD=minio_pwd
|
||||
MINIO_BUCKET=ingest
|
||||
|
||||
ANYLLM_API_KEY=change_me
|
||||
ANYLLM_BASE_URL=http://anythingllm:3001
|
||||
ANYLLM_WORKSPACE_NORMES=workspace_normes
|
||||
ANYLLM_WORKSPACE_TRAMES=workspace_trames
|
||||
ANYLLM_WORKSPACE_ACTES=workspace_actes
|
||||
|
||||
OLLAMA_BASE_URL=http://ollama:11434
|
||||
OLLAMA_MODELS=llama3:8b,mistral:7b
|
||||
|
||||
NEO4J_AUTH=neo4j/neo4j_pwd
|
||||
OPENSEARCH_PASSWORD=opensearch_pwd
|
||||
|
||||
TRAEFIK_ACME_EMAIL=ops@example.org
|
||||
|
||||
|
||||
Copier en infra/.env et ajuster.
|
||||
|
||||
Docker Compose
|
||||
# infra/docker-compose.yml
|
||||
version: "3.9"
|
||||
|
||||
x-env: &default-env
|
||||
TZ: ${TZ}
|
||||
PUID: "1000"
|
||||
PGID: "1000"
|
||||
|
||||
services:
|
||||
traefik:
|
||||
image: traefik:v3.1
|
||||
command:
|
||||
- --providers.docker=true
|
||||
- --entrypoints.web.address=:80
|
||||
- --entrypoints.websecure.address=:443
|
||||
ports:
|
||||
- "80:80"
|
||||
- "443:443"
|
||||
volumes:
|
||||
- /var/run/docker.sock:/var/run/docker.sock:ro
|
||||
- ./../docker/traefik/traefik.yml:/traefik.yml:ro
|
||||
- ./../docker/traefik/dynamic:/dynamic:ro
|
||||
environment: *default-env
|
||||
restart: unless-stopped
|
||||
|
||||
postgres:
|
||||
image: postgres:16
|
||||
environment:
|
||||
POSTGRES_USER: ${POSTGRES_USER}
|
||||
POSTGRES_PASSWORD: ${POSTGRES_PASSWORD}
|
||||
POSTGRES_DB: ${POSTGRES_DB}
|
||||
volumes:
|
||||
- pgdata:/var/lib/postgresql/data
|
||||
restart: unless-stopped
|
||||
|
||||
redis:
|
||||
image: redis:7
|
||||
command: ["redis-server", "--appendonly", "yes"]
|
||||
volumes:
|
||||
- redis:/data
|
||||
restart: unless-stopped
|
||||
|
||||
minio:
|
||||
image: minio/minio:RELEASE.2025-01-13T00-00-00Z
|
||||
command: server /data --console-address ":9001"
|
||||
environment:
|
||||
MINIO_ROOT_USER: ${MINIO_ROOT_USER}
|
||||
MINIO_ROOT_PASSWORD: ${MINIO_ROOT_PASSWORD}
|
||||
volumes:
|
||||
- minio:/data
|
||||
ports:
|
||||
- "9000:9000"
|
||||
- "9001:9001"
|
||||
restart: unless-stopped
|
||||
|
||||
anythingsqlite:
|
||||
image: kevincharm/anythingllm:latest
|
||||
environment:
|
||||
- DISABLE_AUTH=true
|
||||
depends_on:
|
||||
- ollama
|
||||
ports:
|
||||
- "3001:3001"
|
||||
container_name: anythingllm
|
||||
restart: unless-stopped
|
||||
|
||||
ollama:
|
||||
image: ollama/ollama:latest
|
||||
volumes:
|
||||
- ollama:/root/.ollama
|
||||
ports:
|
||||
- "11434:11434"
|
||||
restart: unless-stopped
|
||||
|
||||
neo4j:
|
||||
image: neo4j:5
|
||||
environment:
|
||||
- NEO4J_AUTH=${NEO4J_AUTH}
|
||||
volumes:
|
||||
- neo4j:/data
|
||||
ports:
|
||||
- "7474:7474"
|
||||
- "7687:7687"
|
||||
restart: unless-stopped
|
||||
|
||||
opensearch:
|
||||
image: opensearchproject/opensearch:2.14.0
|
||||
environment:
|
||||
- discovery.type=single-node
|
||||
- OPENSEARCH_INITIAL_ADMIN_PASSWORD=${OPENSEARCH_PASSWORD}
|
||||
ulimits:
|
||||
memlock:
|
||||
soft: -1
|
||||
hard: -1
|
||||
volumes:
|
||||
- opensearch:/usr/share/opensearch/data
|
||||
ports:
|
||||
- "9200:9200"
|
||||
restart: unless-stopped
|
||||
|
||||
host-api:
|
||||
build:
|
||||
context: ../docker/host-api
|
||||
env_file: ./.env
|
||||
environment:
|
||||
<<: *default-env
|
||||
DATABASE_URL: postgresql+psycopg://$POSTGRES_USER:$POSTGRES_PASSWORD@postgres:5432/$POSTGRES_DB
|
||||
REDIS_URL: redis://redis:6379/0
|
||||
MINIO_ENDPOINT: http://minio:9000
|
||||
MINIO_BUCKET: ${MINIO_BUCKET}
|
||||
ANYLLM_BASE_URL: ${ANYLLM_BASE_URL}
|
||||
ANYLLM_API_KEY: ${ANYLLM_API_KEY}
|
||||
OLLAMA_BASE_URL: ${OLLAMA_BASE_URL}
|
||||
volumes:
|
||||
- ../services/host_api:/app
|
||||
- ../ops/seed:/seed:ro
|
||||
- ../ops/seed/schemas:/schemas:ro
|
||||
depends_on:
|
||||
- postgres
|
||||
- redis
|
||||
- minio
|
||||
- ollama
|
||||
- anythingsqlite
|
||||
- neo4j
|
||||
- opensearch
|
||||
restart: unless-stopped
|
||||
labels:
|
||||
- "traefik.enable=true"
|
||||
- "traefik.http.routers.hostapi.rule=Host(`${DOMAIN}`) && PathPrefix(`/api`)"
|
||||
- "traefik.http.routers.hostapi.entrypoints=web"
|
||||
|
||||
worker:
|
||||
build:
|
||||
context: ../docker/worker
|
||||
env_file: ./.env
|
||||
environment:
|
||||
<<: *default-env
|
||||
DATABASE_URL: postgresql+psycopg://$POSTGRES_USER:$POSTGRES_PASSWORD@postgres:5432/$POSTGRES_DB
|
||||
REDIS_URL: redis://redis:6379/0
|
||||
MINIO_ENDPOINT: http://minio:9000
|
||||
MINIO_BUCKET: ${MINIO_BUCKET}
|
||||
ANYLLM_BASE_URL: ${ANYLLM_BASE_URL}
|
||||
ANYLLM_API_KEY: ${ANYLLM_API_KEY}
|
||||
OLLAMA_BASE_URL: ${OLLAMA_BASE_URL}
|
||||
OPENSEARCH_URL: http://opensearch:9200
|
||||
NEO4J_URL: bolt://neo4j:7687
|
||||
NEO4J_AUTH: ${NEO4J_AUTH}
|
||||
volumes:
|
||||
- ../services/worker:/app
|
||||
- ../ops/seed:/seed:ro
|
||||
depends_on:
|
||||
- host-api
|
||||
restart: unless-stopped
|
||||
|
||||
prometheus:
|
||||
image: prom/prometheus:v2.54.1
|
||||
volumes:
|
||||
- prometheus:/prometheus
|
||||
restart: unless-stopped
|
||||
|
||||
grafana:
|
||||
image: grafana/grafana:11.1.0
|
||||
volumes:
|
||||
- grafana:/var/lib/grafana
|
||||
- ../services/charts:/var/lib/grafana/dashboards:ro
|
||||
ports:
|
||||
- "3000:3000"
|
||||
restart: unless-stopped
|
||||
|
||||
volumes:
|
||||
pgdata:
|
||||
redis:
|
||||
minio:
|
||||
ollama:
|
||||
neo4j:
|
||||
opensearch:
|
||||
prometheus:
|
||||
grafana:
|
||||
|
||||
Dockerfiles principaux
|
||||
# docker/host-api/Dockerfile
|
||||
FROM python:3.11-slim
|
||||
RUN apt-get update && apt-get install -y libmagic1 poppler-utils && rm -rf /var/lib/apt/lists/*
|
||||
WORKDIR /app
|
||||
COPY requirements.txt .
|
||||
RUN pip install --no-cache-dir -r requirements.txt
|
||||
COPY ../../services/host_api /app
|
||||
CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "8000"]
|
||||
|
||||
# docker/host-api/requirements.txt
|
||||
fastapi==0.115.0
|
||||
uvicorn[standard]==0.30.6
|
||||
pydantic==2.8.2
|
||||
sqlalchemy==2.0.35
|
||||
psycopg[binary]==3.2.1
|
||||
minio==7.2.7
|
||||
redis==5.0.7
|
||||
requests==2.32.3
|
||||
opensearch-py==2.6.0
|
||||
neo4j==5.23.1
|
||||
python-multipart==0.0.9
|
||||
|
||||
# docker/worker/Dockerfile
|
||||
FROM python:3.11-slim
|
||||
RUN apt-get update && apt-get install -y tesseract-ocr tesseract-ocr-fra \
|
||||
poppler-utils imagemagick ghostscript libgl1 python3-opencv \
|
||||
&& rm -rf /var/lib/apt/lists/*
|
||||
WORKDIR /app
|
||||
COPY requirements.txt .
|
||||
RUN pip install --no-cache-dir -r requirements.txt
|
||||
COPY ../../services/worker /app
|
||||
CMD ["python", "worker.py"]
|
||||
|
||||
# docker/worker/requirements.txt
|
||||
celery[redis]==5.4.0
|
||||
opencv-python-headless==4.10.0.84
|
||||
pytesseract==0.3.13
|
||||
numpy==2.0.1
|
||||
pillow==10.4.0
|
||||
pdfminer.six==20240706
|
||||
python-alto==0.5.0
|
||||
rapidfuzz==3.9.6
|
||||
requests==2.32.3
|
||||
minio==7.2.7
|
||||
psycopg[binary]==3.2.1
|
||||
sqlalchemy==2.0.35
|
||||
opensearch-py==2.6.0
|
||||
neo4j==5.23.1
|
||||
jsonschema==4.23.0
|
||||
|
||||
Scripts d’installation
|
||||
# ops/install-debian.sh
|
||||
set -euo pipefail
|
||||
sudo apt-get update
|
||||
sudo apt-get install -y ca-certificates curl gnupg lsb-release make git
|
||||
# Docker
|
||||
curl -fsSL https://get.docker.com | sh
|
||||
sudo usermod -aG docker $USER
|
||||
# Compose plugin
|
||||
DOCKER_CONFIG=${DOCKER_CONFIG:-$HOME/.docker}
|
||||
mkdir -p $DOCKER_CONFIG/cli-plugins
|
||||
curl -SL https://github.com/docker/compose/releases/download/v2.29.7/docker-compose-linux-x86_64 \
|
||||
-o $DOCKER_CONFIG/cli-plugins/docker-compose
|
||||
chmod +x $DOCKER_CONFIG/cli-plugins/docker-compose
|
||||
echo "Relog required to apply docker group membership."
|
||||
|
||||
# ops/install-windows.ps1 (à exécuter dans PowerShell admin)
|
||||
winget install --id Docker.DockerDesktop -e
|
||||
winget install --id Git.Git -e
|
||||
winget install --id GnuWin32.Make -e
|
||||
|
||||
Bootstrap de l’infrastructure
|
||||
# ops/bootstrap.sh
|
||||
set -euo pipefail
|
||||
cd "$(dirname "$0")/../infra"
|
||||
|
||||
cp -n .env.example .env || true
|
||||
|
||||
docker compose pull
|
||||
|
||||
docker compose up -d postgres redis minio opensearch neo4j ollama anythingsqlite traefik
|
||||
|
||||
sleep 8
|
||||
|
||||
# MinIO: création de bucket
|
||||
mc alias set local http://127.0.0.1:9000 $MINIO_ROOT_USER $MINIO_ROOT_PASSWORD || true
|
||||
mc mb -p local/$MINIO_BUCKET || true
|
||||
|
||||
# Ollama: pull des modèles
|
||||
curl -s http://127.0.0.1:11434/api/pull -d '{"name":"llama3:8b"}'
|
||||
curl -s http://127.0.0.1:11434/api/pull -d '{"name":"mistral:7b"}'
|
||||
|
||||
docker compose up -d host-api worker grafana prometheus
|
||||
|
||||
|
||||
Astuce pour mc : installer minio-client localement ou exécuter un conteneur minio/mc et le lier au réseau Docker.
|
||||
|
||||
Makefile pour commande unique
|
||||
# Makefile
|
||||
SHELL := /bin/bash
|
||||
ENV ?= infra/.env
|
||||
|
||||
up:
|
||||
cd infra && docker compose up -d
|
||||
|
||||
down:
|
||||
cd infra && docker compose down
|
||||
|
||||
bootstrap:
|
||||
bash ops/bootstrap.sh
|
||||
|
||||
logs:
|
||||
cd infra && docker compose logs -f --tail=200
|
||||
|
||||
ps:
|
||||
cd infra && docker compose ps
|
||||
|
||||
seed-anythingllm:
|
||||
curl -s -X POST "$(ANYLLM_BASE_URL)/api/workspaces" \
|
||||
-H "Authorization: Bearer $(ANYLLM_API_KEY)" \
|
||||
-H "Content-Type: application/json" \
|
||||
-d '{"name":"$(ANYLLM_WORKSPACE_NORMES)"}' || true; \
|
||||
curl -s -X POST "$(ANYLLM_BASE_URL)/api/workspaces" \
|
||||
-H "Authorization: Bearer $(ANYLLM_API_KEY)" \
|
||||
-H "Content-Type: application/json" \
|
||||
-d '{"name":"$(ANYLLM_WORKSPACE_TRAMES)"}' || true; \
|
||||
curl -s -X POST "$(ANYLLM_BASE_URL)/api/workspaces" \
|
||||
-H "Authorization: Bearer $(ANYLLM_API_KEY)" \
|
||||
-H "Content-Type: application/json" \
|
||||
-d '{"name":"$(ANYLLM_WORKSPACE_ACTES)"}' || true
|
||||
|
||||
|
||||
Exécuter : make bootstrap && make seed-anythingllm.
|
||||
|
||||
API d’ingestion minimaliste
|
||||
# services/host_api/app.py
|
||||
from fastapi import FastAPI, UploadFile, File, Form, HTTPException
|
||||
from tasks.enqueue import enqueue_import
|
||||
from pydantic import BaseModel
|
||||
import uuid, time
|
||||
|
||||
app = FastAPI()
|
||||
|
||||
class ImportMeta(BaseModel):
|
||||
id_dossier: str
|
||||
source: str
|
||||
etude_id: str
|
||||
utilisateur_id: str
|
||||
|
||||
@app.post("/api/import")
|
||||
async def import_doc(
|
||||
file: UploadFile = File(...),
|
||||
id_dossier: str = Form(...),
|
||||
source: str = Form("upload"),
|
||||
etude_id: str = Form(...),
|
||||
utilisateur_id: str = Form(...)
|
||||
):
|
||||
if file.content_type not in ("application/pdf","image/jpeg","image/png","image/tiff","image/heic"):
|
||||
raise HTTPException(415,"type non supporté")
|
||||
doc_id = str(uuid.uuid4())
|
||||
# push vers MinIO et enreg. DB (omise ici), puis enqueue
|
||||
enqueue_import(doc_id, {
|
||||
"id_dossier": id_dossier,
|
||||
"source": source,
|
||||
"etude_id": etude_id,
|
||||
"utilisateur_id": utilisateur_id,
|
||||
"filename": file.filename,
|
||||
"mime": file.content_type,
|
||||
"received_at": int(time.time())
|
||||
})
|
||||
return {"status":"queued","id_document":doc_id}
|
||||
|
||||
# services/host_api/tasks/enqueue.py
|
||||
from redis import Redis
|
||||
import json, os
|
||||
|
||||
r = Redis.from_url(os.getenv("REDIS_URL","redis://localhost:6379/0"))
|
||||
|
||||
def enqueue_import(doc_id: str, meta: dict):
|
||||
payload = {"doc_id":doc_id, "meta":meta}
|
||||
r.lpush("queue:import", json.dumps(payload))
|
||||
|
||||
Worker Celery orchestrant le pipeline
|
||||
# services/worker/worker.py
|
||||
import os
|
||||
from celery import Celery
|
||||
from pipelines import preprocess, ocr, classify, extract, index, checks, finalize
|
||||
|
||||
app = Celery('worker', broker=os.getenv("REDIS_URL"), backend=os.getenv("REDIS_URL"))
|
||||
|
||||
@app.task
|
||||
def pipeline_run(doc_id: str):
|
||||
ctx = {}
|
||||
preprocess.run(doc_id, ctx)
|
||||
ocr.run(doc_id, ctx)
|
||||
classify.run(doc_id, ctx)
|
||||
extract.run(doc_id, ctx)
|
||||
index.run(doc_id, ctx)
|
||||
checks.run(doc_id, ctx)
|
||||
finalize.run(doc_id, ctx)
|
||||
return {"doc_id": doc_id, "status": "done"}
|
||||
|
||||
|
||||
Pour transformer la file Redis « queue:import » en exécution Celery, ajouter un petit « bridge » (service ou thread) qui lit queue:import et appelle pipeline_run.delay(doc_id).
|
||||
|
||||
Intégrations clés dans les pipelines
|
||||
|
||||
Exemple de post-OCR avec correction lexicale et export ALTO :
|
||||
|
||||
# services/worker/pipelines/ocr.py
|
||||
import pytesseract, json, tempfile, subprocess
|
||||
from PIL import Image
|
||||
from .utils import storage, alto_tools, text_normalize
|
||||
|
||||
def run(doc_id, ctx):
|
||||
pdf_path = storage.get_local_pdf(doc_id) # télécharge depuis MinIO
|
||||
# si PDF texte natif: skip et extraire avec pdftotext
|
||||
out_pdf = tempfile.NamedTemporaryFile(suffix=".pdf", delete=False).name
|
||||
subprocess.run(["ocrmypdf", "--sidecar", out_pdf+".txt",
|
||||
"--output-type", "pdf", pdf_path, out_pdf], check=True)
|
||||
with open(out_pdf+".txt","r",encoding="utf8") as f:
|
||||
text = f.read()
|
||||
text = text_normalize.correct_notarial(text, dict_path="/seed/dictionaries/ocr_fr_notarial.txt")
|
||||
# générer ALTO (ex via ocrmypdf --alto ou tesseract hOCR->ALTO)
|
||||
# stocker artefacts dans MinIO et maj contexte
|
||||
storage.put(doc_id, "ocr.pdf", out_pdf)
|
||||
storage.put_bytes(doc_id, "ocr.txt", text.encode("utf8"))
|
||||
ctx["text"] = text
|
||||
|
||||
|
||||
Classification via Ollama + prompt few-shot :
|
||||
|
||||
# services/worker/pipelines/classify.py
|
||||
import requests, os, json
|
||||
from .utils import chunks
|
||||
|
||||
OLLAMA = os.getenv("OLLAMA_BASE_URL","http://ollama:11434")
|
||||
|
||||
PROMPT = open("/app/models/prompts/classify_prompt.txt","r",encoding="utf8").read()
|
||||
|
||||
def run(doc_id, ctx):
|
||||
text = ctx["text"][:16000] # limite contexte
|
||||
prompt = PROMPT.replace("{{TEXT}}", text)
|
||||
resp = requests.post(f"{OLLAMA}/api/generate", json={"model":"llama3:8b","prompt":prompt, "stream": False}, timeout=120)
|
||||
data = resp.json()
|
||||
label = json.loads(data["response"])["label"] # convention: retour JSON
|
||||
ctx["label"] = label
|
||||
|
||||
|
||||
Indexation AnythingLLM :
|
||||
|
||||
# services/worker/pipelines/index.py
|
||||
import requests, os
|
||||
ANY = os.getenv("ANYLLM_BASE_URL")
|
||||
KEY = os.getenv("ANYLLM_API_KEY")
|
||||
WS_ACTES = os.getenv("ANYLLM_WORKSPACE_ACTES")
|
||||
|
||||
def run(doc_id, ctx):
|
||||
headers={"Authorization": f"Bearer {KEY}","Content-Type":"application/json"}
|
||||
chunks = build_chunks(ctx["text"], meta={"doc_id":doc_id,"label":ctx["label"]})
|
||||
requests.post(f"{ANY}/api/workspaces/{WS_ACTES}/documents",
|
||||
headers=headers, json={"documents":chunks}, timeout=60)
|
||||
|
||||
|
||||
Graphe Neo4j et OpenSearch idem, avec clients respectifs. Les contrôles DMTO et cohérences s’implémentent dans checks.py avec barèmes en seed.
|
||||
|
||||
Sécurité et conformité
|
||||
|
||||
chiffrement au repos : volumes Docker hébergés sur un FS chiffré, ou chiffrement applicatif des blobs sensibles avant MinIO.
|
||||
|
||||
TLS en frontal via Traefik, avec certificats Let’s Encrypt en prod.
|
||||
|
||||
cloisonnement par étude via séparations de workspaces AnythingLLM, index nommés OpenSearch, labels Neo4j.
|
||||
|
||||
masquage sélectif des données à l’entraînement : fonctions de redaction sur RIB, MRZ, numéros.
|
||||
|
||||
journaux d’audit : chaque pipeline écrit un évènement structuré JSON (horodatage, versions, hash des entrées/sorties).
|
||||
|
||||
Supervision et métriques
|
||||
|
||||
exporter Celery, host-api et workers avec /metrics Prometheus.
|
||||
|
||||
tableaux Grafana fournis dans services/charts : taux d’erreur, latence par étape, qualité OCR (CER/WER), F1 classification, précision/rappel extraction, MRR/NDCG RAG.
|
||||
|
||||
Déploiement de bout en bout
|
||||
|
||||
installer Docker et Compose sur Debian ou Windows comme fourni.
|
||||
|
||||
cloner le dépôt et copier infra/.env.example en infra/.env, éditer les secrets.
|
||||
|
||||
exécuter make bootstrap.
|
||||
|
||||
créer les workspaces AnythingLLM : make seed-anythingllm.
|
||||
|
||||
vérifier Ollama a bien pullé les modèles.
|
||||
|
||||
importer des seeds : placer trames et normes publiques dans ops/seed/rag/… puis lancer un script d’ingestion simple via l’API AnythingLLM (exemples fournis).
|
||||
|
||||
tester une ingestion :
|
||||
|
||||
curl -F "file=@/chemin/mon_scan.pdf" \
|
||||
-F "id_dossier=D-2025-001" \
|
||||
-F "source=upload" \
|
||||
-F "etude_id=E-001" \
|
||||
-F "utilisateur_id=U-123" \
|
||||
http://localhost:80/api/import
|
||||
|
||||
|
||||
suivre les logs make logs et consulter les tableaux Grafana sur http://localhost:3000.
|
||||
|
||||
Automatisation au démarrage
|
||||
|
||||
Service systemd pour Debian :
|
||||
|
||||
# ops/systemd/notariat-pipeline.service
|
||||
[Unit]
|
||||
Description=Notariat pipeline
|
||||
After=docker.service
|
||||
Requires=docker.service
|
||||
|
||||
[Service]
|
||||
WorkingDirectory=/opt/notariat/infra
|
||||
Environment=COMPOSE_PROJECT_NAME=notariat
|
||||
ExecStart=/usr/bin/docker compose up -d
|
||||
ExecStop=/usr/bin/docker compose down
|
||||
TimeoutStartSec=0
|
||||
RemainAfterExit=yes
|
||||
|
||||
[Install]
|
||||
WantedBy=multi-user.target
|
||||
|
||||
|
||||
Copier dans /etc/systemd/system/, puis sudo systemctl enable --now notariat-pipeline.
|
||||
|
||||
Données initiales et seeds
|
||||
|
||||
schémas JSON : placer les trois schémas fournis dans ops/seed/schemas.
|
||||
|
||||
checklists par type d’acte : YAML exhaustifs dans ops/seed/checklists.
|
||||
|
||||
dictionnaire OCR notarial : ops/seed/dictionaries/ocr_fr_notarial.txt.
|
||||
|
||||
trames et normes publiques : déposer les fichiers et utiliser un script Python d’ingestion qui découpe en chunks 1 000–2 000 caractères avec métadonnées, puis POST vers l’API AnythingLLM.
|
||||
|
||||
Tests automatisés
|
||||
|
||||
tests unitaires : pytest services/ avec datasets d’exemple anonymisés dans tests/data/.
|
||||
|
||||
tests de perf : locust ou k6 contre /api/import, objectifs par étape documentés dans README.md.
|
||||
|
||||
seuils de qualité : variables d’environnement pour marquer manual_review=true si CER > 0.08, confiance classification < 0.75, champs obligatoires manquants.
|
||||
|
||||
Adaptations Windows
|
||||
|
||||
usage de Docker Desktop, activer WSL2 backend.
|
||||
|
||||
monter le dépôt sous \\wsl$\Ubuntu\home\… pour éviter les soucis de volumes.
|
||||
|
||||
exécuter make bootstrap depuis WSL.
|
||||
|
||||
Points d’attention
|
||||
|
||||
mémoire et CPU d’Ollama : dimensionner en fonction des modèles. Lancer avec --gpus all si GPU NVIDIA disponible.
|
||||
|
||||
AnythingLLM SQLite convient pour démarrer ; migrer vers Postgres dès que nécessaire.
|
||||
|
||||
OpenSearch nécessite 4–6 Go RAM pour le confort local.
|
||||
|
||||
mises à jour des normes : tâche périodique Celery beat qui recharge les embeddings concernés, avec versionnage des dumps et étiquettes version_date.
|
||||
|
||||
Conclusion opérationnelle
|
||||
|
||||
Le dépôt et les scripts ci-dessus fournissent une installation entièrement scriptée, reproductible et cloisonnée, couvrant
|
@ -1,6 +1,6 @@
|
||||
# 0.0.0.0:8000 to expose outside of localhost
|
||||
# default: "127.0.0.1:8000"
|
||||
host = "blindbit.4nk.local:8000"
|
||||
host = "blindbit-oracle.4nk.local:8000"
|
||||
|
||||
# Defines on which chain the wallet runs. Allowed values: main, testnet, signet, regtest.
|
||||
# default: signet
|
||||
|
@ -1 +1 @@
|
||||
MANIFEST-000093
|
||||
MANIFEST-000395
|
||||
|
@ -1,3 +1,3 @@
|
||||
2025/09/10-14:08:17.295322 7f1f74e7cb38 Recovering log #92
|
||||
2025/09/10-14:08:17.312865 7f1f74e7cb38 Delete type=3 #91
|
||||
2025/09/10-14:08:17.312899 7f1f74e7cb38 Delete type=0 #92
|
||||
2025/09/10-15:27:48.575066 7f172cc2cb38 Recovering log #394
|
||||
2025/09/10-15:27:48.592820 7f172cc2cb38 Delete type=0 #394
|
||||
2025/09/10-15:27:48.592888 7f172cc2cb38 Delete type=3 #393
|
||||
|
@ -1,3 +1,3 @@
|
||||
2025/09/10-14:07:46.179318 7f7ff9accb38 Recovering log #90
|
||||
2025/09/10-14:07:46.197168 7f7ff9accb38 Delete type=0 #90
|
||||
2025/09/10-14:07:46.197243 7f7ff9accb38 Delete type=3 #89
|
||||
2025/09/10-15:27:17.521565 7f77e1b29b38 Recovering log #392
|
||||
2025/09/10-15:27:17.536519 7f77e1b29b38 Delete type=0 #392
|
||||
2025/09/10-15:27:17.536567 7f77e1b29b38 Delete type=3 #391
|
||||
|
Binary file not shown.
@ -8,6 +8,7 @@ set -e
|
||||
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
||||
PROJECT_ROOT="$(dirname "$SCRIPT_DIR")"
|
||||
DNSMASQ_CONF="$PROJECT_ROOT/conf/dnsmasq/dnsmasq.conf"
|
||||
DNSMASQ_BIN="$(command -v dnsmasq || echo /usr/sbin/dnsmasq)"
|
||||
|
||||
echo "🚀 Démarrage dnsmasq pour 4NK..."
|
||||
|
||||
@ -26,7 +27,7 @@ fi
|
||||
|
||||
# Démarrer dnsmasq avec la configuration 4NK
|
||||
echo "🔧 Démarrage dnsmasq sur le port 5353..."
|
||||
dnsmasq --conf-file="$DNSMASQ_CONF" --no-daemon &
|
||||
"$DNSMASQ_BIN" --conf-file="$DNSMASQ_CONF" --no-daemon &
|
||||
|
||||
DNSMASQ_PID=$!
|
||||
echo "✅ dnsmasq démarré avec PID: $DNSMASQ_PID"
|
||||
|
@ -1,41 +1,49 @@
|
||||
#!/usr/bin/env bash
|
||||
set -euo pipefail
|
||||
|
||||
endpoints=(
|
||||
"http://127.0.0.1:8000" # blindbit
|
||||
"http://127.0.0.1:8081" # sdk_storage
|
||||
"http://127.0.0.1:8090" # relay1
|
||||
"http://127.0.0.1:8092" # relay2
|
||||
"http://127.0.0.1:8094" # relay3
|
||||
"http://127.0.0.1:9090" # sdk_signer
|
||||
"http://127.0.0.1:8080" # ihm_client
|
||||
hostnames=(
|
||||
tor.local
|
||||
bitcoin.local
|
||||
blindbit-oracle.local
|
||||
sdk-storage.local
|
||||
sdk-relay1.local
|
||||
sdk-relay2.local
|
||||
sdk-relay3.local
|
||||
sdk-signer.local
|
||||
ihm-client.local
|
||||
miniback-postgres.local
|
||||
coffre-front.local
|
||||
coffre-back-mini.local
|
||||
)
|
||||
|
||||
# Vérifier la résolution DNS via dnsmasq (127.0.0.1:5353)
|
||||
for h in "${hostnames[@]}"; do
|
||||
if nslookup -port=5353 "$h" 127.0.0.1 >/dev/null 2>&1; then
|
||||
echo "DNS OK $h"
|
||||
else
|
||||
echo "DNS NOK $h"
|
||||
fi
|
||||
done
|
||||
|
||||
declare -A ports=(
|
||||
[blindbit-oracle.local]=8000
|
||||
[sdk-storage.local]=8080
|
||||
[sdk-relay1.local]=8090
|
||||
[sdk-relay2.local]=8090
|
||||
[sdk-relay3.local]=8090
|
||||
[sdk-signer.local]=9090
|
||||
[ihm-client.local]=80
|
||||
)
|
||||
|
||||
fail=0
|
||||
for url in "${endpoints[@]}"; do
|
||||
for host in "${!ports[@]}"; do
|
||||
url="http://$host:${ports[$host]}"
|
||||
if wget --quiet --tries=1 --timeout=5 --spider "$url"; then
|
||||
echo "OK $url"
|
||||
echo "HTTP OK $url"
|
||||
else
|
||||
echo "NOK $url"
|
||||
echo "HTTP NOK $url"
|
||||
fail=1
|
||||
fi
|
||||
done
|
||||
|
||||
exit $fail
|
||||
|
||||
set -euo pipefail
|
||||
|
||||
endpoints=(
|
||||
http://127.0.0.1:8000 # blindbit
|
||||
http://127.0.0.1:8081 # sdk_storage
|
||||
http://127.0.0.1:8090 # relay1
|
||||
http://127.0.0.1:8092 # relay2
|
||||
http://127.0.0.1:8094 # relay3
|
||||
http://127.0.0.1:9090 # sdk_signer
|
||||
http://127.0.0.1:8080 # ihm_client
|
||||
)
|
||||
|
||||
fail=0
|
||||
for url in ; do
|
||||
if wget --quiet --tries=1 --timeout=5 --spider ; then
|
||||
echo OK
|
||||
|
Loading…
x
Reference in New Issue
Block a user