
- API FastAPI complète pour le traitement de documents notariaux - Pipeline OCR avec correction lexicale notariale - Classification automatique des documents (règles + LLM) - Extraction d'entités (identités, adresses, biens, montants) - Intégration de 6 APIs externes (Cadastre, Géorisques, BODACC, etc.) - Système de vérification et score de vraisemblance - Analyse contextuelle via LLM (Ollama) - Interface web moderne avec drag & drop - Tests complets et documentation exhaustive - Scripts de déploiement automatisés Types de documents supportés: - Acte de vente, donation, succession - CNI avec détection du pays - Contrats divers Fonctionnalités: - Upload et traitement asynchrone - Vérifications externes automatiques - Score de vraisemblance (0-1) - Recommandations personnalisées - Tableaux de bord et statistiques Prêt pour la production avec démarrage en une commande.
427 lines
15 KiB
Python
427 lines
15 KiB
Python
"""
|
|
Tests complets pour l'API Notariale 4NK
|
|
"""
|
|
import pytest
|
|
import asyncio
|
|
import json
|
|
from fastapi.testclient import TestClient
|
|
from unittest.mock import Mock, patch, AsyncMock
|
|
import tempfile
|
|
import os
|
|
|
|
# Import de l'application
|
|
import sys
|
|
sys.path.append('services/host_api')
|
|
from app import app
|
|
|
|
client = TestClient(app)
|
|
|
|
class TestNotaryAPI:
|
|
"""Tests pour l'API Notariale"""
|
|
|
|
def test_health_check(self):
|
|
"""Test du health check"""
|
|
response = client.get("/api/health")
|
|
assert response.status_code == 200
|
|
data = response.json()
|
|
assert "status" in data
|
|
assert data["status"] == "healthy"
|
|
|
|
def test_upload_document_success(self):
|
|
"""Test d'upload de document réussi"""
|
|
# Création d'un fichier PDF de test
|
|
with tempfile.NamedTemporaryFile(suffix=".pdf", delete=False) as tmp_file:
|
|
tmp_file.write(b"%PDF-1.4\n1 0 obj\n<<\n/Type /Catalog\n/Pages 2 0 R\n>>\nendobj\n")
|
|
tmp_file.flush()
|
|
|
|
with open(tmp_file.name, "rb") as f:
|
|
response = client.post(
|
|
"/api/notary/upload",
|
|
files={"file": ("test.pdf", f, "application/pdf")},
|
|
data={
|
|
"id_dossier": "TEST-001",
|
|
"etude_id": "E-001",
|
|
"utilisateur_id": "U-123",
|
|
"source": "upload"
|
|
}
|
|
)
|
|
|
|
os.unlink(tmp_file.name)
|
|
|
|
assert response.status_code == 200
|
|
data = response.json()
|
|
assert "document_id" in data
|
|
assert data["status"] == "queued"
|
|
assert "message" in data
|
|
|
|
def test_upload_document_invalid_type(self):
|
|
"""Test d'upload avec type de fichier invalide"""
|
|
with tempfile.NamedTemporaryFile(suffix=".txt", delete=False) as tmp_file:
|
|
tmp_file.write(b"Ceci est un fichier texte")
|
|
tmp_file.flush()
|
|
|
|
with open(tmp_file.name, "rb") as f:
|
|
response = client.post(
|
|
"/api/notary/upload",
|
|
files={"file": ("test.txt", f, "text/plain")},
|
|
data={
|
|
"id_dossier": "TEST-001",
|
|
"etude_id": "E-001",
|
|
"utilisateur_id": "U-123"
|
|
}
|
|
)
|
|
|
|
os.unlink(tmp_file.name)
|
|
|
|
assert response.status_code == 415
|
|
data = response.json()
|
|
assert "Type de fichier non supporté" in data["detail"]
|
|
|
|
def test_upload_document_missing_fields(self):
|
|
"""Test d'upload avec champs manquants"""
|
|
with tempfile.NamedTemporaryFile(suffix=".pdf", delete=False) as tmp_file:
|
|
tmp_file.write(b"%PDF-1.4")
|
|
tmp_file.flush()
|
|
|
|
with open(tmp_file.name, "rb") as f:
|
|
response = client.post(
|
|
"/api/notary/upload",
|
|
files={"file": ("test.pdf", f, "application/pdf")},
|
|
data={
|
|
"id_dossier": "TEST-001"
|
|
# etude_id et utilisateur_id manquants
|
|
}
|
|
)
|
|
|
|
os.unlink(tmp_file.name)
|
|
|
|
assert response.status_code == 422 # Validation error
|
|
|
|
def test_get_document_status(self):
|
|
"""Test de récupération du statut d'un document"""
|
|
# Mock d'un document existant
|
|
with patch('services.host_api.routes.notary_documents.get_document_status') as mock_status:
|
|
mock_status.return_value = {
|
|
"document_id": "test-123",
|
|
"status": "processing",
|
|
"progress": 50,
|
|
"current_step": "extraction_entites"
|
|
}
|
|
|
|
response = client.get("/api/notary/document/test-123/status")
|
|
assert response.status_code == 200
|
|
data = response.json()
|
|
assert data["status"] == "processing"
|
|
assert data["progress"] == 50
|
|
|
|
def test_get_document_analysis(self):
|
|
"""Test de récupération de l'analyse d'un document"""
|
|
# Mock d'une analyse complète
|
|
with patch('services.host_api.routes.notary_documents.get_document_analysis') as mock_analysis:
|
|
mock_analysis.return_value = {
|
|
"document_id": "test-123",
|
|
"type_detecte": "acte_vente",
|
|
"confiance_classification": 0.95,
|
|
"texte_extrait": "Texte de test",
|
|
"entites_extraites": {
|
|
"identites": [
|
|
{"nom": "DUPONT", "prenom": "Jean", "type": "vendeur"}
|
|
]
|
|
},
|
|
"verifications_externes": {
|
|
"cadastre": {"status": "verified", "confidence": 0.9}
|
|
},
|
|
"score_vraisemblance": 0.92,
|
|
"avis_synthese": "Document cohérent",
|
|
"recommandations": ["Vérifier l'identité"],
|
|
"timestamp_analyse": "2025-01-09 10:30:00"
|
|
}
|
|
|
|
response = client.get("/api/notary/document/test-123/analysis")
|
|
assert response.status_code == 200
|
|
data = response.json()
|
|
assert data["type_detecte"] == "acte_vente"
|
|
assert data["score_vraisemblance"] == 0.92
|
|
|
|
def test_list_documents(self):
|
|
"""Test de la liste des documents"""
|
|
with patch('services.host_api.routes.notary_documents.list_documents') as mock_list:
|
|
mock_list.return_value = {
|
|
"documents": [
|
|
{
|
|
"document_id": "test-123",
|
|
"filename": "test.pdf",
|
|
"status": "completed",
|
|
"created_at": "2025-01-09T10:00:00"
|
|
}
|
|
],
|
|
"total": 1,
|
|
"limit": 50,
|
|
"offset": 0
|
|
}
|
|
|
|
response = client.get("/api/notary/documents")
|
|
assert response.status_code == 200
|
|
data = response.json()
|
|
assert len(data["documents"]) == 1
|
|
assert data["total"] == 1
|
|
|
|
def test_get_processing_stats(self):
|
|
"""Test des statistiques de traitement"""
|
|
with patch('services.host_api.routes.notary_documents.get_processing_stats') as mock_stats:
|
|
mock_stats.return_value = {
|
|
"documents_traites": 100,
|
|
"documents_en_cours": 5,
|
|
"taux_reussite": 0.98,
|
|
"temps_moyen_traitement": 90,
|
|
"types_documents": {
|
|
"acte_vente": 50,
|
|
"acte_donation": 20,
|
|
"cni": 30
|
|
}
|
|
}
|
|
|
|
response = client.get("/api/notary/stats")
|
|
assert response.status_code == 200
|
|
data = response.json()
|
|
assert data["documents_traites"] == 100
|
|
assert data["taux_reussite"] == 0.98
|
|
|
|
class TestOCRProcessor:
|
|
"""Tests pour le processeur OCR"""
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_ocr_processing(self):
|
|
"""Test du traitement OCR"""
|
|
from services.host_api.utils.ocr_processor import OCRProcessor
|
|
|
|
processor = OCRProcessor()
|
|
|
|
# Mock d'une image de test
|
|
with patch('cv2.imread') as mock_imread:
|
|
mock_imread.return_value = None # Simuler une image
|
|
|
|
with patch('pytesseract.image_to_string') as mock_tesseract:
|
|
mock_tesseract.return_value = "Texte extrait par OCR"
|
|
|
|
with patch('pytesseract.image_to_data') as mock_data:
|
|
mock_data.return_value = {
|
|
'text': ['Texte', 'extrait'],
|
|
'conf': [90, 85]
|
|
}
|
|
|
|
# Test avec un fichier inexistant (sera mocké)
|
|
result = await processor.process_document("test_image.jpg")
|
|
|
|
assert "text" in result
|
|
assert result["confidence"] > 0
|
|
|
|
class TestDocumentClassifier:
|
|
"""Tests pour le classificateur de documents"""
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_classification_by_rules(self):
|
|
"""Test de classification par règles"""
|
|
from services.host_api.utils.document_classifier import DocumentClassifier
|
|
|
|
classifier = DocumentClassifier()
|
|
|
|
# Texte d'un acte de vente
|
|
text = """
|
|
ACTE DE VENTE
|
|
Entre les soussignés :
|
|
VENDEUR : M. DUPONT Jean
|
|
ACHETEUR : Mme MARTIN Marie
|
|
Prix de vente : 250 000 euros
|
|
"""
|
|
|
|
result = classifier._classify_by_rules(text)
|
|
|
|
assert result["type"] == "acte_vente"
|
|
assert result["confidence"] > 0
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_classification_by_llm(self):
|
|
"""Test de classification par LLM"""
|
|
from services.host_api.utils.document_classifier import DocumentClassifier
|
|
|
|
classifier = DocumentClassifier()
|
|
|
|
# Mock de la réponse LLM
|
|
with patch.object(classifier.llm_client, 'generate_response') as mock_llm:
|
|
mock_llm.return_value = '''
|
|
{
|
|
"type": "acte_vente",
|
|
"confidence": 0.95,
|
|
"reasoning": "Document contient vendeur, acheteur et prix",
|
|
"key_indicators": ["vendeur", "acheteur", "prix"]
|
|
}
|
|
'''
|
|
|
|
result = await classifier._classify_by_llm("Test document", None)
|
|
|
|
assert result["type"] == "acte_vente"
|
|
assert result["confidence"] == 0.95
|
|
|
|
class TestEntityExtractor:
|
|
"""Tests pour l'extracteur d'entités"""
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_entity_extraction(self):
|
|
"""Test d'extraction d'entités"""
|
|
from services.host_api.utils.entity_extractor import EntityExtractor
|
|
|
|
extractor = EntityExtractor()
|
|
|
|
text = """
|
|
VENDEUR : M. DUPONT Jean, né le 15/03/1980
|
|
ACHETEUR : Mme MARTIN Marie
|
|
Adresse : 123 rue de la Paix, 75001 Paris
|
|
Prix : 250 000 euros
|
|
"""
|
|
|
|
result = await extractor.extract_entities(text, "acte_vente")
|
|
|
|
assert "identites" in result
|
|
assert "adresses" in result
|
|
assert "montants" in result
|
|
assert len(result["identites"]) > 0
|
|
|
|
class TestExternalAPIs:
|
|
"""Tests pour les APIs externes"""
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_cadastre_verification(self):
|
|
"""Test de vérification cadastre"""
|
|
from services.host_api.utils.external_apis import ExternalAPIManager
|
|
|
|
api_manager = ExternalAPIManager()
|
|
|
|
# Mock de la réponse API
|
|
with patch('aiohttp.ClientSession.get') as mock_get:
|
|
mock_response = AsyncMock()
|
|
mock_response.status = 200
|
|
mock_response.json.return_value = {
|
|
"features": [
|
|
{
|
|
"properties": {
|
|
"id": "1234",
|
|
"section": "A",
|
|
"numero": "1"
|
|
}
|
|
}
|
|
]
|
|
}
|
|
mock_get.return_value.__aenter__.return_value = mock_response
|
|
|
|
result = await api_manager.verify_cadastre("123 rue de la Paix, Paris")
|
|
|
|
assert result.status == "verified"
|
|
assert result.confidence > 0
|
|
|
|
class TestVerificationEngine:
|
|
"""Tests pour le moteur de vérification"""
|
|
|
|
def test_credibility_score_calculation(self):
|
|
"""Test du calcul du score de vraisemblance"""
|
|
from services.host_api.utils.verification_engine import VerificationEngine
|
|
|
|
engine = VerificationEngine()
|
|
|
|
# Données de test
|
|
ocr_result = {"confidence": 85, "word_count": 100}
|
|
classification_result = {"confidence": 0.9, "type": "acte_vente"}
|
|
entities = {
|
|
"identites": [{"confidence": 0.8}],
|
|
"adresses": [{"confidence": 0.9}]
|
|
}
|
|
verifications = {
|
|
"cadastre": {"status": "verified", "confidence": 0.9}
|
|
}
|
|
|
|
# Test synchrone (le calcul est synchrone)
|
|
score = asyncio.run(engine.calculate_credibility_score(
|
|
ocr_result, classification_result, entities, verifications
|
|
))
|
|
|
|
assert 0 <= score <= 1
|
|
assert score > 0.5 # Score raisonnable pour des données de test
|
|
|
|
class TestLLMClient:
|
|
"""Tests pour le client LLM"""
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_llm_generation(self):
|
|
"""Test de génération LLM"""
|
|
from services.host_api.utils.llm_client import LLMClient
|
|
|
|
client = LLMClient()
|
|
|
|
# Mock de la réponse Ollama
|
|
with patch('aiohttp.ClientSession.post') as mock_post:
|
|
mock_response = AsyncMock()
|
|
mock_response.status = 200
|
|
mock_response.json.return_value = {
|
|
"response": "Réponse de test du LLM"
|
|
}
|
|
mock_post.return_value.__aenter__.return_value = mock_response
|
|
|
|
result = await client.generate_response("Test prompt")
|
|
|
|
assert "Réponse de test du LLM" in result
|
|
|
|
# Tests d'intégration
|
|
class TestIntegration:
|
|
"""Tests d'intégration"""
|
|
|
|
def test_full_pipeline_simulation(self):
|
|
"""Test de simulation du pipeline complet"""
|
|
# Ce test simule le pipeline complet sans les vraies APIs externes
|
|
|
|
# 1. Upload
|
|
with tempfile.NamedTemporaryFile(suffix=".pdf", delete=False) as tmp_file:
|
|
tmp_file.write(b"%PDF-1.4")
|
|
tmp_file.flush()
|
|
|
|
with open(tmp_file.name, "rb") as f:
|
|
upload_response = client.post(
|
|
"/api/notary/upload",
|
|
files={"file": ("test.pdf", f, "application/pdf")},
|
|
data={
|
|
"id_dossier": "INTEGRATION-001",
|
|
"etude_id": "E-001",
|
|
"utilisateur_id": "U-123"
|
|
}
|
|
)
|
|
|
|
os.unlink(tmp_file.name)
|
|
|
|
assert upload_response.status_code == 200
|
|
document_id = upload_response.json()["document_id"]
|
|
|
|
# 2. Statut (simulé)
|
|
with patch('services.host_api.routes.notary_documents.get_document_status') as mock_status:
|
|
mock_status.return_value = {
|
|
"document_id": document_id,
|
|
"status": "completed",
|
|
"progress": 100
|
|
}
|
|
|
|
status_response = client.get(f"/api/notary/document/{document_id}/status")
|
|
assert status_response.status_code == 200
|
|
|
|
# 3. Analyse (simulée)
|
|
with patch('services.host_api.routes.notary_documents.get_document_analysis') as mock_analysis:
|
|
mock_analysis.return_value = {
|
|
"document_id": document_id,
|
|
"type_detecte": "acte_vente",
|
|
"score_vraisemblance": 0.85,
|
|
"avis_synthese": "Document analysé avec succès"
|
|
}
|
|
|
|
analysis_response = client.get(f"/api/notary/document/{document_id}/analysis")
|
|
assert analysis_response.status_code == 200
|
|
|
|
if __name__ == "__main__":
|
|
pytest.main([__file__, "-v"])
|