212 lines
8.1 KiB
Python
212 lines
8.1 KiB
Python
"""
|
|
Tâches de traitement des documents notariaux
|
|
"""
|
|
import asyncio
|
|
import logging
|
|
from typing import Dict, Any, Optional
|
|
from fastapi import UploadFile
|
|
import uuid
|
|
import time
|
|
|
|
from domain.models import ProcessingRequest
|
|
from utils.ocr_processor import OCRProcessor
|
|
from utils.document_classifier import DocumentClassifier
|
|
from utils.entity_extractor import EntityExtractor
|
|
from utils.external_apis import ExternalAPIManager
|
|
from utils.verification_engine import VerificationEngine
|
|
from utils.llm_client import LLMClient
|
|
from utils.storage import StorageManager
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
class NotaryDocumentProcessor:
|
|
"""Processeur principal pour les documents notariaux"""
|
|
|
|
def __init__(self):
|
|
self.ocr_processor = OCRProcessor()
|
|
self.classifier = DocumentClassifier()
|
|
self.entity_extractor = EntityExtractor()
|
|
self.external_apis = ExternalAPIManager()
|
|
self.verification_engine = VerificationEngine()
|
|
self.llm_client = LLMClient()
|
|
self.storage = StorageManager()
|
|
|
|
async def process_document(
|
|
self,
|
|
document_id: str,
|
|
file: UploadFile = None,
|
|
request_data: ProcessingRequest = None,
|
|
file_bytes: bytes = None,
|
|
filename: str = "upload.bin",
|
|
reprocess: bool = False,
|
|
force_reclassification: bool = False,
|
|
force_reverification: bool = False
|
|
) -> Dict[str, Any]:
|
|
"""
|
|
Traitement complet d'un document notarial
|
|
"""
|
|
start_time = time.time()
|
|
logger.info(f"Début du traitement du document {document_id}")
|
|
|
|
try:
|
|
# Lire le contenu soit depuis file_bytes, soit depuis UploadFile
|
|
if file_bytes is None and file is not None:
|
|
file_bytes = await file.read()
|
|
filename = getattr(file, 'filename', filename)
|
|
from io import BytesIO
|
|
original_path = await self.storage.save_original_document(
|
|
document_id,
|
|
type("_Buf", (), {"read": lambda self, size=-1: file_bytes, "filename": filename})()
|
|
)
|
|
|
|
# 2. OCR et extraction du texte
|
|
logger.info(f"OCR du document {document_id}")
|
|
ocr_result = await self.ocr_processor.process_document(original_path)
|
|
|
|
# 3. Classification du document
|
|
logger.info(f"Classification du document {document_id}")
|
|
classification_result = await self.classifier.classify_document(
|
|
ocr_result["text"],
|
|
expected_type=request_data.type_document_attendu,
|
|
force_reclassification=force_reclassification
|
|
)
|
|
|
|
# 4. Extraction des entités
|
|
logger.info(f"Extraction des entités du document {document_id}")
|
|
entities = await self.entity_extractor.extract_entities(
|
|
ocr_result["text"],
|
|
document_type=classification_result["type"]
|
|
)
|
|
|
|
# 5. Vérifications externes
|
|
logger.info(f"Vérifications externes du document {document_id}")
|
|
verifications = await self._perform_external_verifications(entities)
|
|
|
|
# 6. Calcul du score de vraisemblance
|
|
logger.info(f"Calcul du score de vraisemblance du document {document_id}")
|
|
credibility_score = await self.verification_engine.calculate_credibility_score(
|
|
ocr_result,
|
|
classification_result,
|
|
entities,
|
|
verifications
|
|
)
|
|
|
|
# 7. Génération de l'avis de synthèse via LLM
|
|
logger.info(f"Génération de l'avis de synthèse du document {document_id}")
|
|
synthesis = await self.llm_client.generate_synthesis(
|
|
document_type=classification_result["type"],
|
|
extracted_text=ocr_result["text"],
|
|
entities=entities,
|
|
verifications=verifications,
|
|
credibility_score=credibility_score
|
|
)
|
|
|
|
# 8. Sauvegarde des résultats
|
|
processing_result = {
|
|
"document_id": document_id,
|
|
"processing_time": time.time() - start_time,
|
|
"ocr_result": ocr_result,
|
|
"classification": classification_result,
|
|
"entities": entities,
|
|
"verifications": verifications,
|
|
"credibility_score": credibility_score,
|
|
"synthesis": synthesis,
|
|
"timestamp": time.strftime("%Y-%m-%d %H:%M:%S"),
|
|
"request_data": request_data.dict()
|
|
}
|
|
|
|
await self.storage.save_processing_result(document_id, processing_result)
|
|
|
|
logger.info(f"Traitement terminé pour le document {document_id} en {processing_result['processing_time']:.2f}s")
|
|
|
|
return processing_result
|
|
|
|
except Exception as e:
|
|
logger.error(f"Erreur lors du traitement du document {document_id}: {e}")
|
|
await self.storage.save_error_result(document_id, str(e))
|
|
raise
|
|
|
|
async def _perform_external_verifications(self, entities: Dict[str, Any]) -> Dict[str, Any]:
|
|
"""
|
|
Effectue les vérifications externes basées sur les entités extraites
|
|
"""
|
|
verifications = {}
|
|
|
|
try:
|
|
# Vérifications des adresses
|
|
if "adresses" in entities:
|
|
for address in entities["adresses"]:
|
|
# Vérification Cadastre
|
|
cadastre_result = await self.external_apis.verify_cadastre(address["adresse"])
|
|
verifications["cadastre"] = cadastre_result
|
|
|
|
# Vérification Géorisques
|
|
georisques_result = await self.external_apis.check_georisques(address["adresse"])
|
|
verifications["georisques"] = georisques_result
|
|
|
|
# Vérifications des identités
|
|
if "identites" in entities:
|
|
for identity in entities["identites"]:
|
|
# Vérification BODACC
|
|
bodacc_result = await self.external_apis.check_bodacc(identity["nom"], identity["prenom"])
|
|
verifications["bodacc"] = bodacc_result
|
|
|
|
# Vérification Gel des avoirs
|
|
gel_result = await self.external_apis.check_gel_avoirs(identity["nom"], identity["prenom"])
|
|
verifications["gel_avoirs"] = gel_result
|
|
|
|
# Vérifications des entreprises (si présentes)
|
|
if "entreprises" in entities:
|
|
for company in entities["entreprises"]:
|
|
# Vérification Infogreffe
|
|
infogreffe_result = await self.external_apis.check_infogreffe(company["nom"])
|
|
verifications["infogreffe"] = infogreffe_result
|
|
|
|
# Vérification RBE
|
|
rbe_result = await self.external_apis.check_rbe(company["nom"])
|
|
verifications["rbe"] = rbe_result
|
|
|
|
except Exception as e:
|
|
logger.error(f"Erreur lors des vérifications externes: {e}")
|
|
verifications["error"] = str(e)
|
|
|
|
return verifications
|
|
|
|
# Instance globale du processeur
|
|
processor = NotaryDocumentProcessor()
|
|
|
|
async def process_notary_document(
|
|
document_id: str,
|
|
file: UploadFile = None,
|
|
request_data: ProcessingRequest = None,
|
|
reprocess: bool = False,
|
|
force_reclassification: bool = False,
|
|
force_reverification: bool = False,
|
|
file_bytes: bytes = None,
|
|
filename: str = "upload.bin",
|
|
):
|
|
"""
|
|
Fonction principale de traitement d'un document notarial
|
|
"""
|
|
try:
|
|
result = await processor.process_document(
|
|
document_id=document_id,
|
|
file=file,
|
|
request_data=request_data,
|
|
file_bytes=file_bytes,
|
|
filename=filename,
|
|
reprocess=reprocess,
|
|
force_reclassification=force_reclassification,
|
|
force_reverification=force_reverification
|
|
)
|
|
|
|
# TODO: Notifier l'utilisateur de la fin du traitement
|
|
# via WebSocket ou webhook
|
|
|
|
return result
|
|
|
|
except Exception as e:
|
|
logger.error(f"Erreur fatale lors du traitement du document {document_id}: {e}")
|
|
# TODO: Notifier l'utilisateur de l'erreur
|
|
raise
|