4NK_IA_back/services/worker/tasks/classification_tasks.py
Nicolas Cantu f50481cc38
Some checks failed
publish-images / docker-build-and-push (push) Failing after 23s
chore(ci): ajout workflow publish + nettoyage services + conf hosts
2025-09-11 16:49:23 +02:00

118 lines
3.6 KiB
Python

"""
Tâches de classification des documents
"""
import logging
from typing import Dict, Any
from celery_app import app
logger = logging.getLogger(__name__)
@app.task(bind=True, name='classification.classify_document')
def classify_document(self, doc_id: str, text: str, context: Dict[str, Any]) -> Dict[str, Any]:
"""
Classification d'un document
Args:
doc_id: Identifiant du document
text: Texte extrait du document
context: Contexte de traitement
Returns:
Résultat de la classification
"""
try:
logger.info(f"Début de la classification pour le document {doc_id}")
# Mise à jour du statut
self.update_state(
state='PROGRESS',
meta={'current_step': 'classification_processing', 'progress': 0}
)
# TODO: Implémenter la classification réelle avec Ollama
# Pour l'instant, simulation
import time
time.sleep(1) # Simulation du traitement
# Classification simulée
document_types = [
'acte_vente',
'acte_donation',
'acte_succession',
'cni',
'contrat',
'autre'
]
# Simulation basée sur le contenu du texte
if 'vente' in text.lower() or 'achat' in text.lower():
predicted_type = 'acte_vente'
confidence = 0.85
elif 'donation' in text.lower() or 'don' in text.lower():
predicted_type = 'acte_donation'
confidence = 0.80
elif 'succession' in text.lower() or 'héritage' in text.lower():
predicted_type = 'acte_succession'
confidence = 0.75
elif 'carte' in text.lower() and 'identité' in text.lower():
predicted_type = 'cni'
confidence = 0.90
else:
predicted_type = 'autre'
confidence = 0.60
result = {
'doc_id': doc_id,
'status': 'completed',
'predicted_type': predicted_type,
'confidence': confidence,
'all_predictions': {
doc_type: 0.1 if doc_type != predicted_type else confidence
for doc_type in document_types
},
'processing_time': 1.0
}
logger.info(f"Classification terminée pour le document {doc_id}: {predicted_type} (confiance: {confidence})")
return result
except Exception as e:
logger.error(f"Erreur lors de la classification du document {doc_id}: {e}")
raise
@app.task(name='classification.batch_classify')
def batch_classify_documents(doc_ids: list, texts: list) -> Dict[str, Any]:
"""
Classification en lot de documents
Args:
doc_ids: Liste des identifiants de documents
texts: Liste des textes correspondants
Returns:
Résultats de la classification en lot
"""
if len(doc_ids) != len(texts):
raise ValueError("Le nombre de documents doit correspondre au nombre de textes")
logger.info(f"Classification en lot de {len(doc_ids)} documents")
results = []
for doc_id, text in zip(doc_ids, texts):
try:
result = classify_document.delay(doc_id, text, {}).get()
results.append(result)
except Exception as e:
logger.error(f"Erreur lors de la classification en lot pour {doc_id}: {e}")
results.append({
'doc_id': doc_id,
'status': 'failed',
'error': str(e)
})
return {
'batch_status': 'completed',
'total_documents': len(doc_ids),
'results': results
}