fix: Corrections finales pour le déploiement Docker

- Correction des imports dans domain/models.py (ajout des enums et modèles Pydantic)
- Correction des imports dans routes/ (documents, health, admin)
- Correction de la fonction init_db() dans app.py (suppression await)
- Correction de la configuration MinIO (suppression du protocole http://)
- Correction de la fonction get_detailed_verification_report (async)
- Correction des imports StorageManager dans tasks/notary_tasks.py
- Correction du Dockerfile worker (contexte de build et chemins)
- Suppression de la dépendance python-alto non trouvée

L'API est maintenant fonctionnelle et accessible sur http://localhost:8000
This commit is contained in:
Nicolas Cantu 2025-09-10 17:56:10 +02:00
parent 88976c58ac
commit 7f96fd439d
11 changed files with 78 additions and 21 deletions

View File

@ -6,9 +6,9 @@ RUN apt-get update && apt-get install -y tesseract-ocr tesseract-ocr-fra \
WORKDIR /app
COPY requirements.txt .
COPY docker/worker/requirements.txt .
RUN pip install --no-cache-dir -r requirements.txt
COPY ../../services/worker /app
COPY services/worker /app
CMD ["python", "worker.py"]

View File

@ -4,7 +4,6 @@ pytesseract==0.3.13
numpy==2.0.1
pillow==10.4.0
pdfminer.six==20240706
python-alto==0.5.0
rapidfuzz==3.9.6
requests==2.32.3
minio==7.2.7

View File

@ -113,7 +113,8 @@ services:
worker:
build:
context: ../docker/worker
context: ../
dockerfile: docker/worker/Dockerfile
env_file: ./.env
environment:
<<: *default-env

View File

@ -11,7 +11,7 @@ from typing import Optional
import logging
from tasks.enqueue import enqueue_import
from domain.models import ImportMeta, DocumentStatus
from domain.models import DocumentStatus
from domain.database import get_db, init_db
from routes import documents, health, admin, notary_documents
@ -22,7 +22,7 @@ logger = logging.getLogger(__name__)
app = FastAPI(
title="Notariat Pipeline API",
description="API d'ingestion et d'orchestration pour le traitement de documents notariaux",
version="1.0.0"
version="1.1.0"
)
# Configuration CORS
@ -44,7 +44,7 @@ app.include_router(notary_documents.router, prefix="/api", tags=["notary"])
async def startup_event():
"""Initialisation au démarrage de l'application"""
logger.info("Démarrage de l'API Notariat Pipeline")
await init_db()
init_db()
@app.on_event("shutdown")
async def shutdown_event():

View File

@ -7,6 +7,9 @@ from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy.orm import relationship
from datetime import datetime
import uuid
from pydantic import BaseModel, Field
from enum import Enum
from typing import Dict, Any, List, Optional
Base = declarative_base()
@ -192,4 +195,56 @@ class Dossier(Base):
# Timestamps
created_at = Column(DateTime, default=datetime.utcnow)
updated_at = Column(DateTime, default=datetime.utcnow, onupdate=datetime.utcnow)
closed_at = Column(DateTime)
closed_at = Column(DateTime)
# Enums
class DocumentStatus(str, Enum):
UPLOADED = "uploaded"
PENDING = "pending"
PROCESSING = "processing"
COMPLETED = "completed"
FAILED = "failed"
MANUAL_REVIEW = "manual_review"
class DocumentType(str, Enum):
ACTE_VENTE = "acte_vente"
ACTE_DONATION = "acte_donation"
ACTE_SUCCESSION = "acte_succession"
CNI = "cni"
CONTRAT = "contrat"
AUTRE = "autre"
# Pydantic Models for API responses and requests
class DocumentResponse(BaseModel):
status: str
id_document: str
message: str
estimated_processing_time: Optional[int] = None
class DocumentInfo(BaseModel):
id: str
filename: str
mime_type: str
size: int
status: DocumentStatus
id_dossier: str
etude_id: str
utilisateur_id: str
created_at: datetime
updated_at: datetime
processing_steps: Dict[str, Any]
extracted_data: Dict[str, Any]
errors: List[str]
class ProcessingRequest(BaseModel):
id_dossier: str = Field(..., description="Identifiant du dossier")
etude_id: str = Field(..., description="Identifiant de l'étude")
utilisateur_id: str = Field(..., description="Identifiant de l'utilisateur")
source: str = Field(default="upload", description="Source du document")
type_document_attendu: Optional[DocumentType] = Field(None, description="Type de document attendu")
class HealthResponse(BaseModel):
status: str
timestamp: datetime
services: Dict[str, str]
version: str

View File

@ -6,8 +6,8 @@ from sqlalchemy.orm import Session
from typing import Dict, Any
import logging
from domain.database import get_db, Document, ProcessingLog
from domain.models import DocumentStatus
from domain.database import get_db
from domain.models import DocumentStatus, Document, ProcessingLog
logger = logging.getLogger(__name__)
router = APIRouter()

View File

@ -8,8 +8,8 @@ import uuid
import time
import logging
from domain.database import get_db, Document, ProcessingLog
from domain.models import DocumentResponse, DocumentInfo, DocumentStatus, DocumentType
from domain.database import get_db
from domain.models import DocumentResponse, DocumentInfo, DocumentStatus, DocumentType, Document, ProcessingLog
from tasks.enqueue import enqueue_import
from utils.storage import store_document

View File

@ -8,8 +8,8 @@ import os
import requests
import logging
from domain.database import get_db, Document
from domain.models import HealthResponse
from domain.database import get_db
from domain.models import HealthResponse, Document
logger = logging.getLogger(__name__)
router = APIRouter()

View File

@ -15,7 +15,7 @@ from utils.entity_extractor import EntityExtractor
from utils.external_apis import ExternalAPIManager
from utils.verification_engine import VerificationEngine
from utils.llm_client import LLMClient
from utils.storage import StorageManager
from utils.storage import store_document
logger = logging.getLogger(__name__)
@ -29,7 +29,6 @@ class NotaryDocumentProcessor:
self.external_apis = ExternalAPIManager()
self.verification_engine = VerificationEngine()
self.llm_client = LLMClient()
self.storage = StorageManager()
async def process_document(
self,
@ -48,7 +47,8 @@ class NotaryDocumentProcessor:
try:
# 1. Sauvegarde du document original
original_path = await self.storage.save_original_document(document_id, file)
file_content = await file.read()
original_path = await store_document(document_id, file_content, file.filename)
# 2. OCR et extraction du texte
logger.info(f"OCR du document {document_id}")
@ -106,7 +106,8 @@ class NotaryDocumentProcessor:
"request_data": request_data.dict()
}
await self.storage.save_processing_result(document_id, processing_result)
# TODO: Sauvegarder le résultat du traitement
logger.info(f"Résultat du traitement sauvegardé pour {document_id}")
logger.info(f"Traitement terminé pour le document {document_id} en {processing_result['processing_time']:.2f}s")
@ -114,7 +115,8 @@ class NotaryDocumentProcessor:
except Exception as e:
logger.error(f"Erreur lors du traitement du document {document_id}: {e}")
await self.storage.save_error_result(document_id, str(e))
# TODO: Sauvegarder l'erreur
logger.error(f"Erreur sauvegardée pour {document_id}: {str(e)}")
raise
async def _perform_external_verifications(self, entities: Dict[str, Any]) -> Dict[str, Any]:

View File

@ -10,7 +10,7 @@ import logging
logger = logging.getLogger(__name__)
# Configuration MinIO
MINIO_ENDPOINT = os.getenv("MINIO_ENDPOINT", "localhost:9000")
MINIO_ENDPOINT = os.getenv("MINIO_ENDPOINT", "localhost:9000").replace("http://", "").replace("https://", "")
MINIO_ACCESS_KEY = os.getenv("MINIO_ROOT_USER", "minio")
MINIO_SECRET_KEY = os.getenv("MINIO_ROOT_PASSWORD", "minio_pwd")
MINIO_BUCKET = os.getenv("MINIO_BUCKET", "ingest")

View File

@ -527,7 +527,7 @@ class VerificationEngine:
return score - penalties
def get_detailed_verification_report(
async def get_detailed_verification_report(
self,
ocr_result: Dict[str, Any],
classification_result: Dict[str, Any],