
- API FastAPI complète pour le traitement de documents notariaux - Pipeline OCR avec correction lexicale notariale - Classification automatique des documents (règles + LLM) - Extraction d'entités (identités, adresses, biens, montants) - Intégration de 6 APIs externes (Cadastre, Géorisques, BODACC, etc.) - Système de vérification et score de vraisemblance - Analyse contextuelle via LLM (Ollama) - Interface web moderne avec drag & drop - Tests complets et documentation exhaustive - Scripts de déploiement automatisés Types de documents supportés: - Acte de vente, donation, succession - CNI avec détection du pays - Contrats divers Fonctionnalités: - Upload et traitement asynchrone - Vérifications externes automatiques - Score de vraisemblance (0-1) - Recommandations personnalisées - Tableaux de bord et statistiques Prêt pour la production avec démarrage en une commande.
598 lines
22 KiB
Python
598 lines
22 KiB
Python
"""
|
|
Gestionnaire des APIs externes pour la vérification des documents notariaux
|
|
"""
|
|
import asyncio
|
|
import logging
|
|
import aiohttp
|
|
import json
|
|
from typing import Dict, Any, Optional, List
|
|
from dataclasses import dataclass
|
|
import os
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
@dataclass
|
|
class VerificationResult:
|
|
"""Résultat d'une vérification externe"""
|
|
service: str
|
|
status: str # verified, error, not_found, restricted
|
|
data: Dict[str, Any]
|
|
confidence: float
|
|
error_message: Optional[str] = None
|
|
|
|
class ExternalAPIManager:
|
|
"""Gestionnaire des APIs externes pour la vérification"""
|
|
|
|
def __init__(self):
|
|
self.session = None
|
|
self.api_configs = self._load_api_configs()
|
|
self.timeout = aiohttp.ClientTimeout(total=30)
|
|
|
|
def _load_api_configs(self) -> Dict[str, Dict[str, Any]]:
|
|
"""
|
|
Configuration des APIs externes
|
|
"""
|
|
return {
|
|
"cadastre": {
|
|
"base_url": "https://apicarto.ign.fr/api/cadastre",
|
|
"open_data": True,
|
|
"rate_limit": 100 # requêtes par minute
|
|
},
|
|
"georisques": {
|
|
"base_url": "https://www.georisques.gouv.fr/api",
|
|
"open_data": True,
|
|
"rate_limit": 50
|
|
},
|
|
"bodacc": {
|
|
"base_url": "https://bodacc-datadila.opendatasoft.com/api/records/1.0/search",
|
|
"open_data": True,
|
|
"rate_limit": 100
|
|
},
|
|
"gel_avoirs": {
|
|
"base_url": "https://gels-avoirs.dgtresor.gouv.fr/api",
|
|
"open_data": True,
|
|
"rate_limit": 50
|
|
},
|
|
"infogreffe": {
|
|
"base_url": "https://entreprise.api.gouv.fr/v2/infogreffe/rcs",
|
|
"open_data": True,
|
|
"rate_limit": 30,
|
|
"api_key": os.getenv("API_GOUV_KEY")
|
|
},
|
|
"rbe": {
|
|
"base_url": "https://data.inpi.fr/api",
|
|
"open_data": False,
|
|
"rate_limit": 10,
|
|
"api_key": os.getenv("RBE_API_KEY")
|
|
},
|
|
"geofoncier": {
|
|
"base_url": "https://api2.geofoncier.fr",
|
|
"open_data": False,
|
|
"rate_limit": 20,
|
|
"username": os.getenv("GEOFONCIER_USERNAME"),
|
|
"password": os.getenv("GEOFONCIER_PASSWORD")
|
|
}
|
|
}
|
|
|
|
async def __aenter__(self):
|
|
"""Context manager entry"""
|
|
self.session = aiohttp.ClientSession(timeout=self.timeout)
|
|
return self
|
|
|
|
async def __aexit__(self, exc_type, exc_val, exc_tb):
|
|
"""Context manager exit"""
|
|
if self.session:
|
|
await self.session.close()
|
|
|
|
async def verify_cadastre(self, address: str) -> VerificationResult:
|
|
"""
|
|
Vérification d'une adresse avec l'API Cadastre
|
|
"""
|
|
try:
|
|
if not self.session:
|
|
self.session = aiohttp.ClientSession(timeout=self.timeout)
|
|
|
|
# Recherche de la parcelle
|
|
search_url = f"{self.api_configs['cadastre']['base_url']}/parcelle"
|
|
params = {
|
|
"q": address,
|
|
"limit": 5
|
|
}
|
|
|
|
async with self.session.get(search_url, params=params) as response:
|
|
if response.status == 200:
|
|
data = await response.json()
|
|
|
|
if data.get("features"):
|
|
# Adresse trouvée
|
|
feature = data["features"][0]
|
|
properties = feature.get("properties", {})
|
|
|
|
return VerificationResult(
|
|
service="cadastre",
|
|
status="verified",
|
|
data={
|
|
"parcelle": properties.get("id"),
|
|
"section": properties.get("section"),
|
|
"numero": properties.get("numero"),
|
|
"surface": properties.get("contenance"),
|
|
"geometry": feature.get("geometry")
|
|
},
|
|
confidence=0.9
|
|
)
|
|
else:
|
|
return VerificationResult(
|
|
service="cadastre",
|
|
status="not_found",
|
|
data={},
|
|
confidence=0.0,
|
|
error_message="Adresse non trouvée dans le cadastre"
|
|
)
|
|
else:
|
|
return VerificationResult(
|
|
service="cadastre",
|
|
status="error",
|
|
data={},
|
|
confidence=0.0,
|
|
error_message=f"Erreur API: {response.status}"
|
|
)
|
|
|
|
except Exception as e:
|
|
logger.error(f"Erreur lors de la vérification cadastre: {e}")
|
|
return VerificationResult(
|
|
service="cadastre",
|
|
status="error",
|
|
data={},
|
|
confidence=0.0,
|
|
error_message=str(e)
|
|
)
|
|
|
|
async def check_georisques(self, address: str) -> VerificationResult:
|
|
"""
|
|
Vérification des risques avec l'API Géorisques
|
|
"""
|
|
try:
|
|
if not self.session:
|
|
self.session = aiohttp.ClientSession(timeout=self.timeout)
|
|
|
|
# Recherche des risques pour l'adresse
|
|
search_url = f"{self.api_configs['georisques']['base_url']}/v1/risques"
|
|
params = {
|
|
"adresse": address
|
|
}
|
|
|
|
async with self.session.get(search_url, params=params) as response:
|
|
if response.status == 200:
|
|
data = await response.json()
|
|
|
|
risks = []
|
|
if data.get("risques"):
|
|
for risk in data["risques"]:
|
|
risks.append({
|
|
"type": risk.get("type"),
|
|
"niveau": risk.get("niveau"),
|
|
"description": risk.get("description")
|
|
})
|
|
|
|
return VerificationResult(
|
|
service="georisques",
|
|
status="verified",
|
|
data={
|
|
"risques": risks,
|
|
"total_risques": len(risks)
|
|
},
|
|
confidence=0.8
|
|
)
|
|
else:
|
|
return VerificationResult(
|
|
service="georisques",
|
|
status="error",
|
|
data={},
|
|
confidence=0.0,
|
|
error_message=f"Erreur API: {response.status}"
|
|
)
|
|
|
|
except Exception as e:
|
|
logger.error(f"Erreur lors de la vérification géorisques: {e}")
|
|
return VerificationResult(
|
|
service="georisques",
|
|
status="error",
|
|
data={},
|
|
confidence=0.0,
|
|
error_message=str(e)
|
|
)
|
|
|
|
async def check_bodacc(self, nom: str, prenom: str) -> VerificationResult:
|
|
"""
|
|
Vérification dans le BODACC
|
|
"""
|
|
try:
|
|
if not self.session:
|
|
self.session = aiohttp.ClientSession(timeout=self.timeout)
|
|
|
|
# Recherche dans les annonces
|
|
search_url = self.api_configs['bodacc']['base_url']
|
|
params = {
|
|
"dataset": "annonces-commerciales",
|
|
"q": f"{nom} {prenom}",
|
|
"rows": 10
|
|
}
|
|
|
|
async with self.session.get(search_url, params=params) as response:
|
|
if response.status == 200:
|
|
data = await response.json()
|
|
|
|
annonces = []
|
|
if data.get("records"):
|
|
for record in data["records"]:
|
|
fields = record.get("fields", {})
|
|
annonces.append({
|
|
"type": fields.get("type"),
|
|
"date": fields.get("date"),
|
|
"description": fields.get("description")
|
|
})
|
|
|
|
return VerificationResult(
|
|
service="bodacc",
|
|
status="verified" if annonces else "not_found",
|
|
data={
|
|
"annonces": annonces,
|
|
"total": len(annonces)
|
|
},
|
|
confidence=0.8
|
|
)
|
|
else:
|
|
return VerificationResult(
|
|
service="bodacc",
|
|
status="error",
|
|
data={},
|
|
confidence=0.0,
|
|
error_message=f"Erreur API: {response.status}"
|
|
)
|
|
|
|
except Exception as e:
|
|
logger.error(f"Erreur lors de la vérification BODACC: {e}")
|
|
return VerificationResult(
|
|
service="bodacc",
|
|
status="error",
|
|
data={},
|
|
confidence=0.0,
|
|
error_message=str(e)
|
|
)
|
|
|
|
async def check_gel_avoirs(self, nom: str, prenom: str) -> VerificationResult:
|
|
"""
|
|
Vérification dans la liste des gels d'avoirs
|
|
"""
|
|
try:
|
|
if not self.session:
|
|
self.session = aiohttp.ClientSession(timeout=self.timeout)
|
|
|
|
# Recherche dans les gels d'avoirs
|
|
search_url = f"{self.api_configs['gel_avoirs']['base_url']}/search"
|
|
params = {
|
|
"nom": nom,
|
|
"prenom": prenom
|
|
}
|
|
|
|
async with self.session.get(search_url, params=params) as response:
|
|
if response.status == 200:
|
|
data = await response.json()
|
|
|
|
gels = []
|
|
if data.get("results"):
|
|
for result in data["results"]:
|
|
gels.append({
|
|
"nom": result.get("nom"),
|
|
"prenom": result.get("prenom"),
|
|
"date_gel": result.get("date_gel"),
|
|
"motif": result.get("motif")
|
|
})
|
|
|
|
return VerificationResult(
|
|
service="gel_avoirs",
|
|
status="verified" if gels else "not_found",
|
|
data={
|
|
"gels": gels,
|
|
"total": len(gels)
|
|
},
|
|
confidence=0.9
|
|
)
|
|
else:
|
|
return VerificationResult(
|
|
service="gel_avoirs",
|
|
status="error",
|
|
data={},
|
|
confidence=0.0,
|
|
error_message=f"Erreur API: {response.status}"
|
|
)
|
|
|
|
except Exception as e:
|
|
logger.error(f"Erreur lors de la vérification gel des avoirs: {e}")
|
|
return VerificationResult(
|
|
service="gel_avoirs",
|
|
status="error",
|
|
data={},
|
|
confidence=0.0,
|
|
error_message=str(e)
|
|
)
|
|
|
|
async def check_infogreffe(self, company_name: str) -> VerificationResult:
|
|
"""
|
|
Vérification d'une entreprise avec Infogreffe
|
|
"""
|
|
try:
|
|
if not self.session:
|
|
self.session = aiohttp.ClientSession(timeout=self.timeout)
|
|
|
|
api_key = self.api_configs['infogreffe'].get('api_key')
|
|
if not api_key:
|
|
return VerificationResult(
|
|
service="infogreffe",
|
|
status="restricted",
|
|
data={},
|
|
confidence=0.0,
|
|
error_message="Clé API manquante"
|
|
)
|
|
|
|
# Recherche de l'entreprise
|
|
search_url = f"{self.api_configs['infogreffe']['base_url']}/extrait"
|
|
params = {
|
|
"denomination": company_name,
|
|
"token": api_key
|
|
}
|
|
|
|
async with self.session.get(search_url, params=params) as response:
|
|
if response.status == 200:
|
|
data = await response.json()
|
|
|
|
if data.get("entreprise"):
|
|
entreprise = data["entreprise"]
|
|
return VerificationResult(
|
|
service="infogreffe",
|
|
status="verified",
|
|
data={
|
|
"siren": entreprise.get("siren"),
|
|
"siret": entreprise.get("siret"),
|
|
"denomination": entreprise.get("denomination"),
|
|
"adresse": entreprise.get("adresse"),
|
|
"statut": entreprise.get("statut")
|
|
},
|
|
confidence=0.9
|
|
)
|
|
else:
|
|
return VerificationResult(
|
|
service="infogreffe",
|
|
status="not_found",
|
|
data={},
|
|
confidence=0.0,
|
|
error_message="Entreprise non trouvée"
|
|
)
|
|
else:
|
|
return VerificationResult(
|
|
service="infogreffe",
|
|
status="error",
|
|
data={},
|
|
confidence=0.0,
|
|
error_message=f"Erreur API: {response.status}"
|
|
)
|
|
|
|
except Exception as e:
|
|
logger.error(f"Erreur lors de la vérification Infogreffe: {e}")
|
|
return VerificationResult(
|
|
service="infogreffe",
|
|
status="error",
|
|
data={},
|
|
confidence=0.0,
|
|
error_message=str(e)
|
|
)
|
|
|
|
async def check_rbe(self, company_name: str) -> VerificationResult:
|
|
"""
|
|
Vérification du registre des bénéficiaires effectifs
|
|
"""
|
|
try:
|
|
api_key = self.api_configs['rbe'].get('api_key')
|
|
if not api_key:
|
|
return VerificationResult(
|
|
service="rbe",
|
|
status="restricted",
|
|
data={},
|
|
confidence=0.0,
|
|
error_message="Accès RBE non configuré"
|
|
)
|
|
|
|
if not self.session:
|
|
self.session = aiohttp.ClientSession(timeout=self.timeout)
|
|
|
|
# Recherche dans le RBE
|
|
search_url = f"{self.api_configs['rbe']['base_url']}/search"
|
|
headers = {
|
|
"Authorization": f"Bearer {api_key}",
|
|
"Content-Type": "application/json"
|
|
}
|
|
params = {
|
|
"denomination": company_name
|
|
}
|
|
|
|
async with self.session.get(search_url, params=params, headers=headers) as response:
|
|
if response.status == 200:
|
|
data = await response.json()
|
|
|
|
if data.get("beneficiaires"):
|
|
return VerificationResult(
|
|
service="rbe",
|
|
status="verified",
|
|
data={
|
|
"beneficiaires": data["beneficiaires"],
|
|
"total": len(data["beneficiaires"])
|
|
},
|
|
confidence=0.9
|
|
)
|
|
else:
|
|
return VerificationResult(
|
|
service="rbe",
|
|
status="not_found",
|
|
data={},
|
|
confidence=0.0,
|
|
error_message="Aucun bénéficiaire effectif trouvé"
|
|
)
|
|
else:
|
|
return VerificationResult(
|
|
service="rbe",
|
|
status="error",
|
|
data={},
|
|
confidence=0.0,
|
|
error_message=f"Erreur API: {response.status}"
|
|
)
|
|
|
|
except Exception as e:
|
|
logger.error(f"Erreur lors de la vérification RBE: {e}")
|
|
return VerificationResult(
|
|
service="rbe",
|
|
status="error",
|
|
data={},
|
|
confidence=0.0,
|
|
error_message=str(e)
|
|
)
|
|
|
|
async def check_geofoncier(self, address: str) -> VerificationResult:
|
|
"""
|
|
Vérification avec Géofoncier (accès restreint)
|
|
"""
|
|
try:
|
|
username = self.api_configs['geofoncier'].get('username')
|
|
password = self.api_configs['geofoncier'].get('password')
|
|
|
|
if not username or not password:
|
|
return VerificationResult(
|
|
service="geofoncier",
|
|
status="restricted",
|
|
data={},
|
|
confidence=0.0,
|
|
error_message="Identifiants Géofoncier manquants"
|
|
)
|
|
|
|
if not self.session:
|
|
self.session = aiohttp.ClientSession(timeout=self.timeout)
|
|
|
|
# Authentification
|
|
auth_url = f"{self.api_configs['geofoncier']['base_url']}/auth"
|
|
auth_data = {
|
|
"username": username,
|
|
"password": password
|
|
}
|
|
|
|
async with self.session.post(auth_url, json=auth_data) as auth_response:
|
|
if auth_response.status == 200:
|
|
auth_result = await auth_response.json()
|
|
token = auth_result.get("token")
|
|
|
|
if token:
|
|
# Recherche de la parcelle
|
|
search_url = f"{self.api_configs['geofoncier']['base_url']}/parcelle"
|
|
headers = {"Authorization": f"Bearer {token}"}
|
|
params = {"adresse": address}
|
|
|
|
async with self.session.get(search_url, params=params, headers=headers) as response:
|
|
if response.status == 200:
|
|
data = await response.json()
|
|
|
|
return VerificationResult(
|
|
service="geofoncier",
|
|
status="verified",
|
|
data=data,
|
|
confidence=0.95
|
|
)
|
|
else:
|
|
return VerificationResult(
|
|
service="geofoncier",
|
|
status="error",
|
|
data={},
|
|
confidence=0.0,
|
|
error_message=f"Erreur recherche: {response.status}"
|
|
)
|
|
else:
|
|
return VerificationResult(
|
|
service="geofoncier",
|
|
status="error",
|
|
data={},
|
|
confidence=0.0,
|
|
error_message="Token d'authentification manquant"
|
|
)
|
|
else:
|
|
return VerificationResult(
|
|
service="geofoncier",
|
|
status="error",
|
|
data={},
|
|
confidence=0.0,
|
|
error_message=f"Erreur authentification: {auth_response.status}"
|
|
)
|
|
|
|
except Exception as e:
|
|
logger.error(f"Erreur lors de la vérification Géofoncier: {e}")
|
|
return VerificationResult(
|
|
service="geofoncier",
|
|
status="error",
|
|
data={},
|
|
confidence=0.0,
|
|
error_message=str(e)
|
|
)
|
|
|
|
async def batch_verify_addresses(self, addresses: List[str]) -> Dict[str, VerificationResult]:
|
|
"""
|
|
Vérification en lot d'adresses
|
|
"""
|
|
results = {}
|
|
|
|
# Vérification parallèle
|
|
tasks = []
|
|
for address in addresses:
|
|
task = asyncio.create_task(self.verify_cadastre(address))
|
|
tasks.append((address, task))
|
|
|
|
for address, task in tasks:
|
|
try:
|
|
result = await task
|
|
results[address] = result
|
|
except Exception as e:
|
|
results[address] = VerificationResult(
|
|
service="cadastre",
|
|
status="error",
|
|
data={},
|
|
confidence=0.0,
|
|
error_message=str(e)
|
|
)
|
|
|
|
return results
|
|
|
|
async def get_api_status(self) -> Dict[str, Dict[str, Any]]:
|
|
"""
|
|
Vérification du statut des APIs
|
|
"""
|
|
status = {}
|
|
|
|
for service, config in self.api_configs.items():
|
|
try:
|
|
if not self.session:
|
|
self.session = aiohttp.ClientSession(timeout=self.timeout)
|
|
|
|
# Test de connectivité simple
|
|
test_url = config["base_url"]
|
|
async with self.session.get(test_url) as response:
|
|
status[service] = {
|
|
"available": response.status < 500,
|
|
"status_code": response.status,
|
|
"open_data": config.get("open_data", False),
|
|
"rate_limit": config.get("rate_limit", 0)
|
|
}
|
|
except Exception as e:
|
|
status[service] = {
|
|
"available": False,
|
|
"error": str(e),
|
|
"open_data": config.get("open_data", False),
|
|
"rate_limit": config.get("rate_limit", 0)
|
|
}
|
|
|
|
return status
|