""" Pipeline de vérifications et contrôles métier """ import os import logging from typing import Dict, Any, List logger = logging.getLogger(__name__) def run(doc_id: str, ctx: dict): """ Vérifications et contrôles métier """ logger.info(f"Vérifications du document {doc_id}") try: # Récupération des données classification = ctx.get("classification", {}) extracted_data = ctx.get("extracted_data", {}) ocr_meta = ctx.get("ocr_meta", {}) # Liste des vérifications checks_results = [] # Vérification de la qualité OCR ocr_check = _check_ocr_quality(ocr_meta) checks_results.append(ocr_check) # Vérification de la classification classification_check = _check_classification(classification) checks_results.append(classification_check) # Vérifications spécifiques au type de document type_checks = _check_document_type(classification.get("label", ""), extracted_data) checks_results.extend(type_checks) # Vérification de la cohérence des données consistency_check = _check_data_consistency(extracted_data) checks_results.append(consistency_check) # Détermination du statut final overall_status = _determine_overall_status(checks_results) # Stockage des résultats ctx["checks_results"] = checks_results ctx["overall_status"] = overall_status # Métadonnées de vérification checks_meta = { "checks_completed": True, "total_checks": len(checks_results), "passed_checks": sum(1 for check in checks_results if check["status"] == "passed"), "failed_checks": sum(1 for check in checks_results if check["status"] == "failed"), "warnings": sum(1 for check in checks_results if check["status"] == "warning"), "overall_status": overall_status } ctx["checks_meta"] = checks_meta logger.info(f"Vérifications terminées pour le document {doc_id}: {overall_status}") except Exception as e: logger.error(f"Erreur lors des vérifications du document {doc_id}: {e}") raise def _check_ocr_quality(ocr_meta: Dict[str, Any]) -> Dict[str, Any]: """ Vérification de la qualité OCR """ confidence = ocr_meta.get("confidence", 0.0) text_length = ocr_meta.get("text_length", 0) if confidence >= 0.8: status = "passed" message = f"Qualité OCR excellente (confiance: {confidence:.2f})" elif confidence >= 0.6: status = "warning" message = f"Qualité OCR acceptable (confiance: {confidence:.2f})" else: status = "failed" message = f"Qualité OCR insuffisante (confiance: {confidence:.2f})" if text_length < 100: status = "failed" message += " - Texte trop court" return { "check_name": "ocr_quality", "status": status, "message": message, "details": { "confidence": confidence, "text_length": text_length } } def _check_classification(classification: Dict[str, Any]) -> Dict[str, Any]: """ Vérification de la classification """ confidence = classification.get("confidence", 0.0) label = classification.get("label", "document_inconnu") if confidence >= 0.8: status = "passed" message = f"Classification fiable ({label}, confiance: {confidence:.2f})" elif confidence >= 0.6: status = "warning" message = f"Classification incertaine ({label}, confiance: {confidence:.2f})" else: status = "failed" message = f"Classification non fiable ({label}, confiance: {confidence:.2f})" if label == "document_inconnu": status = "warning" message = "Type de document non identifié" return { "check_name": "classification", "status": status, "message": message, "details": { "label": label, "confidence": confidence } } def _check_document_type(document_type: str, extracted_data: Dict[str, Any]) -> List[Dict[str, Any]]: """ Vérifications spécifiques au type de document """ checks = [] if document_type == "acte_vente": checks.extend(_check_vente_requirements(extracted_data)) elif document_type == "acte_achat": checks.extend(_check_achat_requirements(extracted_data)) elif document_type == "donation": checks.extend(_check_donation_requirements(extracted_data)) elif document_type == "testament": checks.extend(_check_testament_requirements(extracted_data)) elif document_type == "succession": checks.extend(_check_succession_requirements(extracted_data)) return checks def _check_vente_requirements(data: Dict[str, Any]) -> List[Dict[str, Any]]: """ Vérifications pour un acte de vente """ checks = [] # Vérification des champs obligatoires required_fields = ["vendeur", "acheteur", "prix", "bien"] for field in required_fields: if not data.get(field): checks.append({ "check_name": f"vente_{field}_present", "status": "failed", "message": f"Champ obligatoire manquant: {field}", "details": {"field": field} }) else: checks.append({ "check_name": f"vente_{field}_present", "status": "passed", "message": f"Champ {field} présent", "details": {"field": field, "value": data[field]} }) # Vérification du prix prix = data.get("prix", "") if prix and not _is_valid_amount(prix): checks.append({ "check_name": "vente_prix_format", "status": "warning", "message": f"Format de prix suspect: {prix}", "details": {"prix": prix} }) return checks def _check_achat_requirements(data: Dict[str, Any]) -> List[Dict[str, Any]]: """ Vérifications pour un acte d'achat """ checks = [] # Vérification des champs obligatoires required_fields = ["vendeur", "acheteur", "prix", "bien"] for field in required_fields: if not data.get(field): checks.append({ "check_name": f"achat_{field}_present", "status": "failed", "message": f"Champ obligatoire manquant: {field}", "details": {"field": field} }) else: checks.append({ "check_name": f"achat_{field}_present", "status": "passed", "message": f"Champ {field} présent", "details": {"field": field, "value": data[field]} }) return checks def _check_donation_requirements(data: Dict[str, Any]) -> List[Dict[str, Any]]: """ Vérifications pour une donation """ checks = [] # Vérification des champs obligatoires required_fields = ["donateur", "donataire", "bien_donne"] for field in required_fields: if not data.get(field): checks.append({ "check_name": f"donation_{field}_present", "status": "failed", "message": f"Champ obligatoire manquant: {field}", "details": {"field": field} }) else: checks.append({ "check_name": f"donation_{field}_present", "status": "passed", "message": f"Champ {field} présent", "details": {"field": field, "value": data[field]} }) return checks def _check_testament_requirements(data: Dict[str, Any]) -> List[Dict[str, Any]]: """ Vérifications pour un testament """ checks = [] # Vérification des champs obligatoires required_fields = ["testateur"] for field in required_fields: if not data.get(field): checks.append({ "check_name": f"testament_{field}_present", "status": "failed", "message": f"Champ obligatoire manquant: {field}", "details": {"field": field} }) else: checks.append({ "check_name": f"testament_{field}_present", "status": "passed", "message": f"Champ {field} présent", "details": {"field": field, "value": data[field]} }) return checks def _check_succession_requirements(data: Dict[str, Any]) -> List[Dict[str, Any]]: """ Vérifications pour une succession """ checks = [] # Vérification des champs obligatoires required_fields = ["defunt"] for field in required_fields: if not data.get(field): checks.append({ "check_name": f"succession_{field}_present", "status": "failed", "message": f"Champ obligatoire manquant: {field}", "details": {"field": field} }) else: checks.append({ "check_name": f"succession_{field}_present", "status": "passed", "message": f"Champ {field} présent", "details": {"field": field, "value": data[field]} }) return checks def _check_data_consistency(data: Dict[str, Any]) -> Dict[str, Any]: """ Vérification de la cohérence des données """ issues = [] # Vérification des dates dates = data.get("dates", []) for date in dates: if not _is_valid_date(date): issues.append(f"Date invalide: {date}") # Vérification des montants montants = data.get("montants", []) for montant in montants: if not _is_valid_amount(montant): issues.append(f"Montant invalide: {montant}") if issues: return { "check_name": "data_consistency", "status": "warning", "message": f"Cohérence des données: {len(issues)} problème(s) détecté(s)", "details": {"issues": issues} } else: return { "check_name": "data_consistency", "status": "passed", "message": "Données cohérentes", "details": {} } def _determine_overall_status(checks_results: List[Dict[str, Any]]) -> str: """ Détermination du statut global """ failed_checks = sum(1 for check in checks_results if check["status"] == "failed") warning_checks = sum(1 for check in checks_results if check["status"] == "warning") if failed_checks > 0: return "manual_review" elif warning_checks > 2: return "manual_review" else: return "completed" def _is_valid_date(date_str: str) -> bool: """ Validation d'une date """ import re # Format DD/MM/YYYY ou DD-MM-YYYY pattern = r'^\d{1,2}[/-]\d{1,2}[/-]\d{2,4}$' return bool(re.match(pattern, date_str)) def _is_valid_amount(amount_str: str) -> bool: """ Validation d'un montant """ import re # Format avec euros pattern = r'^\d{1,3}(?:\s\d{3})*(?:[.,]\d{2})?\s*€?$' return bool(re.match(pattern, amount_str))