""" Tâches de traitement des documents notariaux """ import asyncio import logging from typing import Dict, Any, Optional from fastapi import UploadFile import uuid import time from domain.models import ProcessingRequest from utils.ocr_processor import OCRProcessor from utils.document_classifier import DocumentClassifier from utils.entity_extractor import EntityExtractor from utils.external_apis import ExternalAPIManager from utils.verification_engine import VerificationEngine from utils.llm_client import LLMClient from utils.storage import StorageManager logger = logging.getLogger(__name__) class NotaryDocumentProcessor: """Processeur principal pour les documents notariaux""" def __init__(self): self.ocr_processor = OCRProcessor() self.classifier = DocumentClassifier() self.entity_extractor = EntityExtractor() self.external_apis = ExternalAPIManager() self.verification_engine = VerificationEngine() self.llm_client = LLMClient() self.storage = StorageManager() async def process_document( self, document_id: str, file: UploadFile, request_data: ProcessingRequest, reprocess: bool = False, force_reclassification: bool = False, force_reverification: bool = False ) -> Dict[str, Any]: """ Traitement complet d'un document notarial """ start_time = time.time() logger.info(f"Début du traitement du document {document_id}") try: # 1. Sauvegarde du document original original_path = await self.storage.save_original_document(document_id, file) # 2. OCR et extraction du texte logger.info(f"OCR du document {document_id}") ocr_result = await self.ocr_processor.process_document(original_path) # 3. Classification du document logger.info(f"Classification du document {document_id}") classification_result = await self.classifier.classify_document( ocr_result["text"], expected_type=request_data.type_document_attendu, force_reclassification=force_reclassification ) # 4. Extraction des entités logger.info(f"Extraction des entités du document {document_id}") entities = await self.entity_extractor.extract_entities( ocr_result["text"], document_type=classification_result["type"] ) # 5. Vérifications externes logger.info(f"Vérifications externes du document {document_id}") verifications = await self._perform_external_verifications(entities) # 6. Calcul du score de vraisemblance logger.info(f"Calcul du score de vraisemblance du document {document_id}") credibility_score = await self.verification_engine.calculate_credibility_score( ocr_result, classification_result, entities, verifications ) # 7. Génération de l'avis de synthèse via LLM logger.info(f"Génération de l'avis de synthèse du document {document_id}") synthesis = await self.llm_client.generate_synthesis( document_type=classification_result["type"], extracted_text=ocr_result["text"], entities=entities, verifications=verifications, credibility_score=credibility_score ) # 8. Sauvegarde des résultats processing_result = { "document_id": document_id, "processing_time": time.time() - start_time, "ocr_result": ocr_result, "classification": classification_result, "entities": entities, "verifications": verifications, "credibility_score": credibility_score, "synthesis": synthesis, "timestamp": time.strftime("%Y-%m-%d %H:%M:%S"), "request_data": request_data.dict() } await self.storage.save_processing_result(document_id, processing_result) logger.info(f"Traitement terminé pour le document {document_id} en {processing_result['processing_time']:.2f}s") return processing_result except Exception as e: logger.error(f"Erreur lors du traitement du document {document_id}: {e}") await self.storage.save_error_result(document_id, str(e)) raise async def _perform_external_verifications(self, entities: Dict[str, Any]) -> Dict[str, Any]: """ Effectue les vérifications externes basées sur les entités extraites """ verifications = {} try: # Vérifications des adresses if "adresses" in entities: for address in entities["adresses"]: # Vérification Cadastre cadastre_result = await self.external_apis.verify_cadastre(address["adresse"]) verifications["cadastre"] = cadastre_result # Vérification Géorisques georisques_result = await self.external_apis.check_georisques(address["adresse"]) verifications["georisques"] = georisques_result # Vérifications des identités if "identites" in entities: for identity in entities["identites"]: # Vérification BODACC bodacc_result = await self.external_apis.check_bodacc(identity["nom"], identity["prenom"]) verifications["bodacc"] = bodacc_result # Vérification Gel des avoirs gel_result = await self.external_apis.check_gel_avoirs(identity["nom"], identity["prenom"]) verifications["gel_avoirs"] = gel_result # Vérifications des entreprises (si présentes) if "entreprises" in entities: for company in entities["entreprises"]: # Vérification Infogreffe infogreffe_result = await self.external_apis.check_infogreffe(company["nom"]) verifications["infogreffe"] = infogreffe_result # Vérification RBE rbe_result = await self.external_apis.check_rbe(company["nom"]) verifications["rbe"] = rbe_result except Exception as e: logger.error(f"Erreur lors des vérifications externes: {e}") verifications["error"] = str(e) return verifications # Instance globale du processeur processor = NotaryDocumentProcessor() async def process_notary_document( document_id: str, file: UploadFile, request_data: ProcessingRequest, reprocess: bool = False, force_reclassification: bool = False, force_reverification: bool = False ): """ Fonction principale de traitement d'un document notarial """ try: result = await processor.process_document( document_id=document_id, file=file, request_data=request_data, reprocess=reprocess, force_reclassification=force_reclassification, force_reverification=force_reverification ) # TODO: Notifier l'utilisateur de la fin du traitement # via WebSocket ou webhook return result except Exception as e: logger.error(f"Erreur fatale lors du traitement du document {document_id}: {e}") # TODO: Notifier l'utilisateur de l'erreur raise