212 lines
8.1 KiB
Python

"""
Tâches de traitement des documents notariaux
"""
import asyncio
import logging
from typing import Dict, Any, Optional
from fastapi import UploadFile
import uuid
import time
from domain.models import ProcessingRequest
from utils.ocr_processor import OCRProcessor
from utils.document_classifier import DocumentClassifier
from utils.entity_extractor import EntityExtractor
from utils.external_apis import ExternalAPIManager
from utils.verification_engine import VerificationEngine
from utils.llm_client import LLMClient
from utils.storage import StorageManager
logger = logging.getLogger(__name__)
class NotaryDocumentProcessor:
"""Processeur principal pour les documents notariaux"""
def __init__(self):
self.ocr_processor = OCRProcessor()
self.classifier = DocumentClassifier()
self.entity_extractor = EntityExtractor()
self.external_apis = ExternalAPIManager()
self.verification_engine = VerificationEngine()
self.llm_client = LLMClient()
self.storage = StorageManager()
async def process_document(
self,
document_id: str,
file: UploadFile = None,
request_data: ProcessingRequest = None,
file_bytes: bytes = None,
filename: str = "upload.bin",
reprocess: bool = False,
force_reclassification: bool = False,
force_reverification: bool = False
) -> Dict[str, Any]:
"""
Traitement complet d'un document notarial
"""
start_time = time.time()
logger.info(f"Début du traitement du document {document_id}")
try:
# Lire le contenu soit depuis file_bytes, soit depuis UploadFile
if file_bytes is None and file is not None:
file_bytes = await file.read()
filename = getattr(file, 'filename', filename)
from io import BytesIO
original_path = await self.storage.save_original_document(
document_id,
type("_Buf", (), {"read": lambda self, size=-1: file_bytes, "filename": filename})()
)
# 2. OCR et extraction du texte
logger.info(f"OCR du document {document_id}")
ocr_result = await self.ocr_processor.process_document(original_path)
# 3. Classification du document
logger.info(f"Classification du document {document_id}")
classification_result = await self.classifier.classify_document(
ocr_result["text"],
expected_type=request_data.type_document_attendu,
force_reclassification=force_reclassification
)
# 4. Extraction des entités
logger.info(f"Extraction des entités du document {document_id}")
entities = await self.entity_extractor.extract_entities(
ocr_result["text"],
document_type=classification_result["type"]
)
# 5. Vérifications externes
logger.info(f"Vérifications externes du document {document_id}")
verifications = await self._perform_external_verifications(entities)
# 6. Calcul du score de vraisemblance
logger.info(f"Calcul du score de vraisemblance du document {document_id}")
credibility_score = await self.verification_engine.calculate_credibility_score(
ocr_result,
classification_result,
entities,
verifications
)
# 7. Génération de l'avis de synthèse via LLM
logger.info(f"Génération de l'avis de synthèse du document {document_id}")
synthesis = await self.llm_client.generate_synthesis(
document_type=classification_result["type"],
extracted_text=ocr_result["text"],
entities=entities,
verifications=verifications,
credibility_score=credibility_score
)
# 8. Sauvegarde des résultats
processing_result = {
"document_id": document_id,
"processing_time": time.time() - start_time,
"ocr_result": ocr_result,
"classification": classification_result,
"entities": entities,
"verifications": verifications,
"credibility_score": credibility_score,
"synthesis": synthesis,
"timestamp": time.strftime("%Y-%m-%d %H:%M:%S"),
"request_data": request_data.dict()
}
await self.storage.save_processing_result(document_id, processing_result)
logger.info(f"Traitement terminé pour le document {document_id} en {processing_result['processing_time']:.2f}s")
return processing_result
except Exception as e:
logger.error(f"Erreur lors du traitement du document {document_id}: {e}")
await self.storage.save_error_result(document_id, str(e))
raise
async def _perform_external_verifications(self, entities: Dict[str, Any]) -> Dict[str, Any]:
"""
Effectue les vérifications externes basées sur les entités extraites
"""
verifications = {}
try:
# Vérifications des adresses
if "adresses" in entities:
for address in entities["adresses"]:
# Vérification Cadastre
cadastre_result = await self.external_apis.verify_cadastre(address["adresse"])
verifications["cadastre"] = cadastre_result
# Vérification Géorisques
georisques_result = await self.external_apis.check_georisques(address["adresse"])
verifications["georisques"] = georisques_result
# Vérifications des identités
if "identites" in entities:
for identity in entities["identites"]:
# Vérification BODACC
bodacc_result = await self.external_apis.check_bodacc(identity["nom"], identity["prenom"])
verifications["bodacc"] = bodacc_result
# Vérification Gel des avoirs
gel_result = await self.external_apis.check_gel_avoirs(identity["nom"], identity["prenom"])
verifications["gel_avoirs"] = gel_result
# Vérifications des entreprises (si présentes)
if "entreprises" in entities:
for company in entities["entreprises"]:
# Vérification Infogreffe
infogreffe_result = await self.external_apis.check_infogreffe(company["nom"])
verifications["infogreffe"] = infogreffe_result
# Vérification RBE
rbe_result = await self.external_apis.check_rbe(company["nom"])
verifications["rbe"] = rbe_result
except Exception as e:
logger.error(f"Erreur lors des vérifications externes: {e}")
verifications["error"] = str(e)
return verifications
# Instance globale du processeur
processor = NotaryDocumentProcessor()
async def process_notary_document(
document_id: str,
file: UploadFile = None,
request_data: ProcessingRequest = None,
reprocess: bool = False,
force_reclassification: bool = False,
force_reverification: bool = False,
file_bytes: bytes = None,
filename: str = "upload.bin",
):
"""
Fonction principale de traitement d'un document notarial
"""
try:
result = await processor.process_document(
document_id=document_id,
file=file,
request_data=request_data,
file_bytes=file_bytes,
filename=filename,
reprocess=reprocess,
force_reclassification=force_reclassification,
force_reverification=force_reverification
)
# TODO: Notifier l'utilisateur de la fin du traitement
# via WebSocket ou webhook
return result
except Exception as e:
logger.error(f"Erreur fatale lors du traitement du document {document_id}: {e}")
# TODO: Notifier l'utilisateur de l'erreur
raise