- Mise à jour du README.md avec les nouvelles fonctionnalités - Documentation API mise à jour avec les intégrations externes - Guide d'installation avec bootstrap automatisé - Architecture mise à jour avec Celery et intégrations - CHANGELOG détaillé avec toutes les nouvelles fonctionnalités - Nouvelle documentation des fonctionnalités v1.2.0 Nouvelles sections documentées: - Pipeline de traitement asynchrone avec Celery - Intégrations avec APIs externes (Cadastre, Géorisques, BODACC, etc.) - Clients d'intégration (AnythingLLM, Neo4j, OpenSearch) - Configuration d'environnement centralisée - Script bootstrap automatisé - Monitoring et observabilité - Exemples d'utilisation et API
483 lines
19 KiB
Python
483 lines
19 KiB
Python
"""
|
|
Client pour l'intégration avec Neo4j
|
|
"""
|
|
import os
|
|
import logging
|
|
from typing import Dict, Any, List, Optional
|
|
from neo4j import GraphDatabase
|
|
import json
|
|
from datetime import datetime
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
class Neo4jClient:
|
|
"""Client pour l'intégration avec Neo4j"""
|
|
|
|
def __init__(self):
|
|
self.uri = os.getenv('NEO4J_URI', 'bolt://neo4j:7687')
|
|
self.username = os.getenv('NEO4J_USER', 'neo4j')
|
|
self.password = os.getenv('NEO4J_PASSWORD', 'neo4j_pwd')
|
|
|
|
self.driver = None
|
|
self._connect()
|
|
|
|
def _connect(self):
|
|
"""Connexion à Neo4j"""
|
|
try:
|
|
self.driver = GraphDatabase.driver(
|
|
self.uri,
|
|
auth=(self.username, self.password)
|
|
)
|
|
logger.info("✅ Connexion à Neo4j établie")
|
|
except Exception as e:
|
|
logger.error(f"❌ Erreur de connexion à Neo4j: {e}")
|
|
self.driver = None
|
|
|
|
def close(self):
|
|
"""Fermeture de la connexion"""
|
|
if self.driver:
|
|
self.driver.close()
|
|
|
|
async def create_dossier_context(self, dossier_id: str, metadata: Dict[str, Any]) -> Dict[str, Any]:
|
|
"""
|
|
Création du contexte d'un dossier dans le graphe
|
|
|
|
Args:
|
|
dossier_id: ID du dossier
|
|
metadata: Métadonnées du dossier
|
|
|
|
Returns:
|
|
Résultat de la création
|
|
"""
|
|
logger.info(f"📁 Création du contexte du dossier {dossier_id}")
|
|
|
|
try:
|
|
with self.driver.session() as session:
|
|
# Création du nœud dossier
|
|
result = session.run("""
|
|
MERGE (d:Dossier {id: $dossier_id})
|
|
SET d.etude_id = $etude_id,
|
|
d.utilisateur_id = $utilisateur_id,
|
|
d.created_at = datetime(),
|
|
d.updated_at = datetime(),
|
|
d.status = $status,
|
|
d.metadata = $metadata
|
|
RETURN d
|
|
""",
|
|
dossier_id=dossier_id,
|
|
etude_id=metadata.get('etude_id'),
|
|
utilisateur_id=metadata.get('utilisateur_id'),
|
|
status=metadata.get('status', 'active'),
|
|
metadata=json.dumps(metadata)
|
|
)
|
|
|
|
record = result.single()
|
|
if record:
|
|
logger.info(f"✅ Contexte du dossier {dossier_id} créé")
|
|
return {
|
|
'status': 'created',
|
|
'dossier_id': dossier_id,
|
|
'created_at': datetime.now().isoformat()
|
|
}
|
|
else:
|
|
return {
|
|
'status': 'error',
|
|
'error': 'Impossible de créer le contexte du dossier'
|
|
}
|
|
|
|
except Exception as e:
|
|
logger.error(f"❌ Erreur lors de la création du contexte du dossier {dossier_id}: {e}")
|
|
return {
|
|
'status': 'error',
|
|
'error': str(e)
|
|
}
|
|
|
|
async def add_document_to_dossier(self, dossier_id: str, doc_id: str,
|
|
doc_metadata: Dict[str, Any]) -> Dict[str, Any]:
|
|
"""
|
|
Ajout d'un document à un dossier
|
|
|
|
Args:
|
|
dossier_id: ID du dossier
|
|
doc_id: ID du document
|
|
doc_metadata: Métadonnées du document
|
|
|
|
Returns:
|
|
Résultat de l'ajout
|
|
"""
|
|
logger.info(f"📄 Ajout du document {doc_id} au dossier {dossier_id}")
|
|
|
|
try:
|
|
with self.driver.session() as session:
|
|
# Création du nœud document et relation avec le dossier
|
|
result = session.run("""
|
|
MATCH (d:Dossier {id: $dossier_id})
|
|
MERGE (doc:Document {id: $doc_id})
|
|
SET doc.filename = $filename,
|
|
doc.type = $type,
|
|
doc.status = $status,
|
|
doc.created_at = datetime(),
|
|
doc.updated_at = datetime(),
|
|
doc.metadata = $metadata
|
|
MERGE (d)-[:CONTAINS]->(doc)
|
|
RETURN doc
|
|
""",
|
|
dossier_id=dossier_id,
|
|
doc_id=doc_id,
|
|
filename=doc_metadata.get('filename'),
|
|
type=doc_metadata.get('type'),
|
|
status=doc_metadata.get('status', 'uploaded'),
|
|
metadata=json.dumps(doc_metadata)
|
|
)
|
|
|
|
record = result.single()
|
|
if record:
|
|
logger.info(f"✅ Document {doc_id} ajouté au dossier {dossier_id}")
|
|
return {
|
|
'status': 'added',
|
|
'dossier_id': dossier_id,
|
|
'doc_id': doc_id,
|
|
'added_at': datetime.now().isoformat()
|
|
}
|
|
else:
|
|
return {
|
|
'status': 'error',
|
|
'error': 'Impossible d\'ajouter le document au dossier'
|
|
}
|
|
|
|
except Exception as e:
|
|
logger.error(f"❌ Erreur lors de l'ajout du document {doc_id} au dossier {dossier_id}: {e}")
|
|
return {
|
|
'status': 'error',
|
|
'error': str(e)
|
|
}
|
|
|
|
async def add_entities_to_document(self, doc_id: str, entities: Dict[str, Any]) -> Dict[str, Any]:
|
|
"""
|
|
Ajout des entités extraites à un document
|
|
|
|
Args:
|
|
doc_id: ID du document
|
|
entities: Entités extraites
|
|
|
|
Returns:
|
|
Résultat de l'ajout
|
|
"""
|
|
logger.info(f"🏷️ Ajout des entités au document {doc_id}")
|
|
|
|
try:
|
|
with self.driver.session() as session:
|
|
# Traitement des entités selon leur type
|
|
for entity_type, entity_data in entities.items():
|
|
if entity_type == 'personnes':
|
|
await self._add_person_entities(session, doc_id, entity_data)
|
|
elif entity_type == 'adresses':
|
|
await self._add_address_entities(session, doc_id, entity_data)
|
|
elif entity_type == 'biens':
|
|
await self._add_property_entities(session, doc_id, entity_data)
|
|
elif entity_type == 'montants':
|
|
await self._add_amount_entities(session, doc_id, entity_data)
|
|
elif entity_type == 'dates':
|
|
await self._add_date_entities(session, doc_id, entity_data)
|
|
|
|
logger.info(f"✅ Entités ajoutées au document {doc_id}")
|
|
return {
|
|
'status': 'added',
|
|
'doc_id': doc_id,
|
|
'entities_count': len(entities),
|
|
'added_at': datetime.now().isoformat()
|
|
}
|
|
|
|
except Exception as e:
|
|
logger.error(f"❌ Erreur lors de l'ajout des entités au document {doc_id}: {e}")
|
|
return {
|
|
'status': 'error',
|
|
'error': str(e)
|
|
}
|
|
|
|
async def _add_person_entities(self, session, doc_id: str, persons: List[Dict[str, Any]]):
|
|
"""Ajout des entités personnes"""
|
|
for person in persons:
|
|
if isinstance(person, dict) and 'nom' in person:
|
|
result = session.run("""
|
|
MATCH (doc:Document {id: $doc_id})
|
|
MERGE (p:Personne {nom: $nom, prenom: $prenom})
|
|
SET p.date_naissance = $date_naissance,
|
|
p.lieu_naissance = $lieu_naissance,
|
|
p.nationalite = $nationalite,
|
|
p.adresse = $adresse,
|
|
p.updated_at = datetime()
|
|
MERGE (doc)-[:MENTIONS]->(p)
|
|
RETURN p
|
|
""",
|
|
doc_id=doc_id,
|
|
nom=person.get('nom'),
|
|
prenom=person.get('prenom'),
|
|
date_naissance=person.get('date_naissance'),
|
|
lieu_naissance=person.get('lieu_naissance'),
|
|
nationalite=person.get('nationalite'),
|
|
adresse=person.get('adresse')
|
|
)
|
|
|
|
async def _add_address_entities(self, session, doc_id: str, addresses: List[Dict[str, Any]]):
|
|
"""Ajout des entités adresses"""
|
|
for address in addresses:
|
|
if isinstance(address, dict) and 'adresse' in address:
|
|
result = session.run("""
|
|
MATCH (doc:Document {id: $doc_id})
|
|
MERGE (a:Adresse {adresse: $adresse})
|
|
SET a.code_postal = $code_postal,
|
|
a.ville = $ville,
|
|
a.departement = $departement,
|
|
a.region = $region,
|
|
a.coordinates = $coordinates,
|
|
a.updated_at = datetime()
|
|
MERGE (doc)-[:MENTIONS]->(a)
|
|
RETURN a
|
|
""",
|
|
doc_id=doc_id,
|
|
adresse=address.get('adresse'),
|
|
code_postal=address.get('code_postal'),
|
|
ville=address.get('ville'),
|
|
departement=address.get('departement'),
|
|
region=address.get('region'),
|
|
coordinates=json.dumps(address.get('coordinates', []))
|
|
)
|
|
|
|
async def _add_property_entities(self, session, doc_id: str, properties: List[Dict[str, Any]]):
|
|
"""Ajout des entités biens"""
|
|
for property_data in properties:
|
|
if isinstance(property_data, dict) and 'adresse' in property_data:
|
|
result = session.run("""
|
|
MATCH (doc:Document {id: $doc_id})
|
|
MERGE (b:Bien {adresse: $adresse})
|
|
SET b.surface = $surface,
|
|
b.prix = $prix,
|
|
b.type_bien = $type_bien,
|
|
b.reference_cadastrale = $reference_cadastrale,
|
|
b.updated_at = datetime()
|
|
MERGE (doc)-[:MENTIONS]->(b)
|
|
RETURN b
|
|
""",
|
|
doc_id=doc_id,
|
|
adresse=property_data.get('adresse'),
|
|
surface=property_data.get('surface'),
|
|
prix=property_data.get('prix'),
|
|
type_bien=property_data.get('type_bien'),
|
|
reference_cadastrale=property_data.get('reference_cadastrale')
|
|
)
|
|
|
|
async def _add_amount_entities(self, session, doc_id: str, amounts: List[Dict[str, Any]]):
|
|
"""Ajout des entités montants"""
|
|
for amount in amounts:
|
|
if isinstance(amount, dict) and 'montant' in amount:
|
|
result = session.run("""
|
|
MATCH (doc:Document {id: $doc_id})
|
|
MERGE (m:Montant {montant: $montant, devise: $devise})
|
|
SET m.type_montant = $type_montant,
|
|
m.description = $description,
|
|
m.updated_at = datetime()
|
|
MERGE (doc)-[:MENTIONS]->(m)
|
|
RETURN m
|
|
""",
|
|
doc_id=doc_id,
|
|
montant=amount.get('montant'),
|
|
devise=amount.get('devise', 'EUR'),
|
|
type_montant=amount.get('type_montant'),
|
|
description=amount.get('description')
|
|
)
|
|
|
|
async def _add_date_entities(self, session, doc_id: str, dates: List[Dict[str, Any]]):
|
|
"""Ajout des entités dates"""
|
|
for date_data in dates:
|
|
if isinstance(date_data, dict) and 'date' in date_data:
|
|
result = session.run("""
|
|
MATCH (doc:Document {id: $doc_id})
|
|
MERGE (d:Date {date: $date})
|
|
SET d.type_date = $type_date,
|
|
d.description = $description,
|
|
d.updated_at = datetime()
|
|
MERGE (doc)-[:MENTIONS]->(d)
|
|
RETURN d
|
|
""",
|
|
doc_id=doc_id,
|
|
date=date_data.get('date'),
|
|
type_date=date_data.get('type_date'),
|
|
description=date_data.get('description')
|
|
)
|
|
|
|
async def find_related_documents(self, doc_id: str, max_depth: int = 2) -> Dict[str, Any]:
|
|
"""
|
|
Recherche de documents liés
|
|
|
|
Args:
|
|
doc_id: ID du document
|
|
max_depth: Profondeur maximale de recherche
|
|
|
|
Returns:
|
|
Documents liés trouvés
|
|
"""
|
|
logger.info(f"🔗 Recherche de documents liés au document {doc_id}")
|
|
|
|
try:
|
|
with self.driver.session() as session:
|
|
result = session.run("""
|
|
MATCH (doc:Document {id: $doc_id})-[r*1..$max_depth]-(related:Document)
|
|
WHERE doc <> related
|
|
RETURN DISTINCT related, length(r) as distance
|
|
ORDER BY distance
|
|
LIMIT 10
|
|
""",
|
|
doc_id=doc_id,
|
|
max_depth=max_depth
|
|
)
|
|
|
|
related_docs = []
|
|
for record in result:
|
|
related_docs.append({
|
|
'doc_id': record['related']['id'],
|
|
'filename': record['related'].get('filename'),
|
|
'type': record['related'].get('type'),
|
|
'distance': record['distance']
|
|
})
|
|
|
|
logger.info(f"✅ {len(related_docs)} documents liés trouvés")
|
|
return {
|
|
'status': 'completed',
|
|
'doc_id': doc_id,
|
|
'related_documents': related_docs,
|
|
'count': len(related_docs),
|
|
'searched_at': datetime.now().isoformat()
|
|
}
|
|
|
|
except Exception as e:
|
|
logger.error(f"❌ Erreur lors de la recherche de documents liés: {e}")
|
|
return {
|
|
'status': 'error',
|
|
'error': str(e)
|
|
}
|
|
|
|
async def get_dossier_summary(self, dossier_id: str) -> Dict[str, Any]:
|
|
"""
|
|
Récupération du résumé d'un dossier
|
|
|
|
Args:
|
|
dossier_id: ID du dossier
|
|
|
|
Returns:
|
|
Résumé du dossier
|
|
"""
|
|
logger.info(f"📊 Génération du résumé du dossier {dossier_id}")
|
|
|
|
try:
|
|
with self.driver.session() as session:
|
|
# Statistiques générales
|
|
stats_result = session.run("""
|
|
MATCH (d:Dossier {id: $dossier_id})
|
|
OPTIONAL MATCH (d)-[:CONTAINS]->(doc:Document)
|
|
OPTIONAL MATCH (doc)-[:MENTIONS]->(entity)
|
|
RETURN
|
|
count(DISTINCT doc) as documents_count,
|
|
count(DISTINCT entity) as entities_count,
|
|
collect(DISTINCT doc.type) as document_types
|
|
""",
|
|
dossier_id=dossier_id
|
|
)
|
|
|
|
stats_record = stats_result.single()
|
|
|
|
# Entités les plus fréquentes
|
|
entities_result = session.run("""
|
|
MATCH (d:Dossier {id: $dossier_id})-[:CONTAINS]->(doc:Document)-[:MENTIONS]->(entity)
|
|
RETURN labels(entity)[0] as entity_type, count(*) as frequency
|
|
ORDER BY frequency DESC
|
|
LIMIT 10
|
|
""",
|
|
dossier_id=dossier_id
|
|
)
|
|
|
|
entity_frequencies = []
|
|
for record in entities_result:
|
|
entity_frequencies.append({
|
|
'type': record['entity_type'],
|
|
'frequency': record['frequency']
|
|
})
|
|
|
|
return {
|
|
'status': 'completed',
|
|
'dossier_id': dossier_id,
|
|
'summary': {
|
|
'documents_count': stats_record['documents_count'],
|
|
'entities_count': stats_record['entities_count'],
|
|
'document_types': stats_record['document_types'],
|
|
'entity_frequencies': entity_frequencies
|
|
},
|
|
'generated_at': datetime.now().isoformat()
|
|
}
|
|
|
|
except Exception as e:
|
|
logger.error(f"❌ Erreur lors de la génération du résumé du dossier {dossier_id}: {e}")
|
|
return {
|
|
'status': 'error',
|
|
'error': str(e)
|
|
}
|
|
|
|
async def create_relationships_between_entities(self, doc_id: str,
|
|
relationships: List[Dict[str, Any]]) -> Dict[str, Any]:
|
|
"""
|
|
Création de relations entre entités
|
|
|
|
Args:
|
|
doc_id: ID du document
|
|
relationships: Liste des relations à créer
|
|
|
|
Returns:
|
|
Résultat de la création des relations
|
|
"""
|
|
logger.info(f"🔗 Création de relations pour le document {doc_id}")
|
|
|
|
try:
|
|
with self.driver.session() as session:
|
|
created_relations = 0
|
|
|
|
for rel in relationships:
|
|
rel_type = rel.get('type')
|
|
from_entity = rel.get('from')
|
|
to_entity = rel.get('to')
|
|
properties = rel.get('properties', {})
|
|
|
|
if rel_type and from_entity and to_entity:
|
|
result = session.run(f"""
|
|
MATCH (doc:Document {{id: $doc_id}})
|
|
MATCH (from:{from_entity['type']} {{id: $from_id}})
|
|
MATCH (to:{to_entity['type']} {{id: $to_id}})
|
|
MERGE (from)-[r:{rel_type}]->(to)
|
|
SET r.doc_id = $doc_id,
|
|
r.created_at = datetime(),
|
|
r.properties = $properties
|
|
RETURN r
|
|
""",
|
|
doc_id=doc_id,
|
|
from_id=from_entity['id'],
|
|
to_id=to_entity['id'],
|
|
properties=json.dumps(properties)
|
|
)
|
|
|
|
if result.single():
|
|
created_relations += 1
|
|
|
|
logger.info(f"✅ {created_relations} relations créées")
|
|
return {
|
|
'status': 'completed',
|
|
'doc_id': doc_id,
|
|
'relations_created': created_relations,
|
|
'created_at': datetime.now().isoformat()
|
|
}
|
|
|
|
except Exception as e:
|
|
logger.error(f"❌ Erreur lors de la création des relations: {e}")
|
|
return {
|
|
'status': 'error',
|
|
'error': str(e)
|
|
}
|