- Correction des imports dans domain/models.py (ajout des enums et modèles Pydantic) - Correction des imports dans routes/ (documents, health, admin) - Correction de la fonction init_db() dans app.py (suppression await) - Correction de la configuration MinIO (suppression du protocole http://) - Correction de la fonction get_detailed_verification_report (async) - Correction des imports StorageManager dans tasks/notary_tasks.py - Correction du Dockerfile worker (contexte de build et chemins) - Suppression de la dépendance python-alto non trouvée L'API est maintenant fonctionnelle et accessible sur http://localhost:8000
201 lines
7.7 KiB
Python
201 lines
7.7 KiB
Python
"""
|
|
Tâches de traitement des documents notariaux
|
|
"""
|
|
import asyncio
|
|
import logging
|
|
from typing import Dict, Any, Optional
|
|
from fastapi import UploadFile
|
|
import uuid
|
|
import time
|
|
|
|
from domain.models import ProcessingRequest
|
|
from utils.ocr_processor import OCRProcessor
|
|
from utils.document_classifier import DocumentClassifier
|
|
from utils.entity_extractor import EntityExtractor
|
|
from utils.external_apis import ExternalAPIManager
|
|
from utils.verification_engine import VerificationEngine
|
|
from utils.llm_client import LLMClient
|
|
from utils.storage import store_document
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
class NotaryDocumentProcessor:
|
|
"""Processeur principal pour les documents notariaux"""
|
|
|
|
def __init__(self):
|
|
self.ocr_processor = OCRProcessor()
|
|
self.classifier = DocumentClassifier()
|
|
self.entity_extractor = EntityExtractor()
|
|
self.external_apis = ExternalAPIManager()
|
|
self.verification_engine = VerificationEngine()
|
|
self.llm_client = LLMClient()
|
|
|
|
async def process_document(
|
|
self,
|
|
document_id: str,
|
|
file: UploadFile,
|
|
request_data: ProcessingRequest,
|
|
reprocess: bool = False,
|
|
force_reclassification: bool = False,
|
|
force_reverification: bool = False
|
|
) -> Dict[str, Any]:
|
|
"""
|
|
Traitement complet d'un document notarial
|
|
"""
|
|
start_time = time.time()
|
|
logger.info(f"Début du traitement du document {document_id}")
|
|
|
|
try:
|
|
# 1. Sauvegarde du document original
|
|
file_content = await file.read()
|
|
original_path = await store_document(document_id, file_content, file.filename)
|
|
|
|
# 2. OCR et extraction du texte
|
|
logger.info(f"OCR du document {document_id}")
|
|
ocr_result = await self.ocr_processor.process_document(original_path)
|
|
|
|
# 3. Classification du document
|
|
logger.info(f"Classification du document {document_id}")
|
|
classification_result = await self.classifier.classify_document(
|
|
ocr_result["text"],
|
|
expected_type=request_data.type_document_attendu,
|
|
force_reclassification=force_reclassification
|
|
)
|
|
|
|
# 4. Extraction des entités
|
|
logger.info(f"Extraction des entités du document {document_id}")
|
|
entities = await self.entity_extractor.extract_entities(
|
|
ocr_result["text"],
|
|
document_type=classification_result["type"]
|
|
)
|
|
|
|
# 5. Vérifications externes
|
|
logger.info(f"Vérifications externes du document {document_id}")
|
|
verifications = await self._perform_external_verifications(entities)
|
|
|
|
# 6. Calcul du score de vraisemblance
|
|
logger.info(f"Calcul du score de vraisemblance du document {document_id}")
|
|
credibility_score = await self.verification_engine.calculate_credibility_score(
|
|
ocr_result,
|
|
classification_result,
|
|
entities,
|
|
verifications
|
|
)
|
|
|
|
# 7. Génération de l'avis de synthèse via LLM
|
|
logger.info(f"Génération de l'avis de synthèse du document {document_id}")
|
|
synthesis = await self.llm_client.generate_synthesis(
|
|
document_type=classification_result["type"],
|
|
extracted_text=ocr_result["text"],
|
|
entities=entities,
|
|
verifications=verifications,
|
|
credibility_score=credibility_score
|
|
)
|
|
|
|
# 8. Sauvegarde des résultats
|
|
processing_result = {
|
|
"document_id": document_id,
|
|
"processing_time": time.time() - start_time,
|
|
"ocr_result": ocr_result,
|
|
"classification": classification_result,
|
|
"entities": entities,
|
|
"verifications": verifications,
|
|
"credibility_score": credibility_score,
|
|
"synthesis": synthesis,
|
|
"timestamp": time.strftime("%Y-%m-%d %H:%M:%S"),
|
|
"request_data": request_data.dict()
|
|
}
|
|
|
|
# TODO: Sauvegarder le résultat du traitement
|
|
logger.info(f"Résultat du traitement sauvegardé pour {document_id}")
|
|
|
|
logger.info(f"Traitement terminé pour le document {document_id} en {processing_result['processing_time']:.2f}s")
|
|
|
|
return processing_result
|
|
|
|
except Exception as e:
|
|
logger.error(f"Erreur lors du traitement du document {document_id}: {e}")
|
|
# TODO: Sauvegarder l'erreur
|
|
logger.error(f"Erreur sauvegardée pour {document_id}: {str(e)}")
|
|
raise
|
|
|
|
async def _perform_external_verifications(self, entities: Dict[str, Any]) -> Dict[str, Any]:
|
|
"""
|
|
Effectue les vérifications externes basées sur les entités extraites
|
|
"""
|
|
verifications = {}
|
|
|
|
try:
|
|
# Vérifications des adresses
|
|
if "adresses" in entities:
|
|
for address in entities["adresses"]:
|
|
# Vérification Cadastre
|
|
cadastre_result = await self.external_apis.verify_cadastre(address["adresse"])
|
|
verifications["cadastre"] = cadastre_result
|
|
|
|
# Vérification Géorisques
|
|
georisques_result = await self.external_apis.check_georisques(address["adresse"])
|
|
verifications["georisques"] = georisques_result
|
|
|
|
# Vérifications des identités
|
|
if "identites" in entities:
|
|
for identity in entities["identites"]:
|
|
# Vérification BODACC
|
|
bodacc_result = await self.external_apis.check_bodacc(identity["nom"], identity["prenom"])
|
|
verifications["bodacc"] = bodacc_result
|
|
|
|
# Vérification Gel des avoirs
|
|
gel_result = await self.external_apis.check_gel_avoirs(identity["nom"], identity["prenom"])
|
|
verifications["gel_avoirs"] = gel_result
|
|
|
|
# Vérifications des entreprises (si présentes)
|
|
if "entreprises" in entities:
|
|
for company in entities["entreprises"]:
|
|
# Vérification Infogreffe
|
|
infogreffe_result = await self.external_apis.check_infogreffe(company["nom"])
|
|
verifications["infogreffe"] = infogreffe_result
|
|
|
|
# Vérification RBE
|
|
rbe_result = await self.external_apis.check_rbe(company["nom"])
|
|
verifications["rbe"] = rbe_result
|
|
|
|
except Exception as e:
|
|
logger.error(f"Erreur lors des vérifications externes: {e}")
|
|
verifications["error"] = str(e)
|
|
|
|
return verifications
|
|
|
|
# Instance globale du processeur
|
|
processor = NotaryDocumentProcessor()
|
|
|
|
async def process_notary_document(
|
|
document_id: str,
|
|
file: UploadFile,
|
|
request_data: ProcessingRequest,
|
|
reprocess: bool = False,
|
|
force_reclassification: bool = False,
|
|
force_reverification: bool = False
|
|
):
|
|
"""
|
|
Fonction principale de traitement d'un document notarial
|
|
"""
|
|
try:
|
|
result = await processor.process_document(
|
|
document_id=document_id,
|
|
file=file,
|
|
request_data=request_data,
|
|
reprocess=reprocess,
|
|
force_reclassification=force_reclassification,
|
|
force_reverification=force_reverification
|
|
)
|
|
|
|
# TODO: Notifier l'utilisateur de la fin du traitement
|
|
# via WebSocket ou webhook
|
|
|
|
return result
|
|
|
|
except Exception as e:
|
|
logger.error(f"Erreur fatale lors du traitement du document {document_id}: {e}")
|
|
# TODO: Notifier l'utilisateur de l'erreur
|
|
raise
|