184 lines
6.4 KiB
Python
184 lines
6.4 KiB
Python
"""
|
|
Utilitaires de stockage avec MinIO
|
|
"""
|
|
import os
|
|
import uuid
|
|
from minio import Minio
|
|
from minio.error import S3Error
|
|
import logging
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
# Configuration MinIO
|
|
MINIO_ENDPOINT = os.getenv("MINIO_ENDPOINT", "localhost:9000")
|
|
MINIO_ACCESS_KEY = os.getenv("MINIO_ROOT_USER", "minio")
|
|
MINIO_SECRET_KEY = os.getenv("MINIO_ROOT_PASSWORD", "minio_pwd")
|
|
MINIO_BUCKET = os.getenv("MINIO_BUCKET", "ingest")
|
|
MINIO_SECURE = False # True en production avec HTTPS
|
|
|
|
# Client MinIO
|
|
minio_client = Minio(
|
|
MINIO_ENDPOINT,
|
|
access_key=MINIO_ACCESS_KEY,
|
|
secret_key=MINIO_SECRET_KEY,
|
|
secure=MINIO_SECURE
|
|
)
|
|
|
|
async def store_document(doc_id: str, content: bytes, filename: str) -> str:
|
|
"""
|
|
Stockage d'un document dans MinIO
|
|
"""
|
|
try:
|
|
# Génération du nom de fichier unique
|
|
file_extension = os.path.splitext(filename)[1] if filename else ""
|
|
object_name = f"{doc_id}/original{file_extension}"
|
|
|
|
# Création du bucket s'il n'existe pas (tolérant aux tests)
|
|
try:
|
|
if not minio_client.bucket_exists(MINIO_BUCKET):
|
|
minio_client.make_bucket(MINIO_BUCKET)
|
|
except Exception:
|
|
# En contexte de test sans MinIO, bascule sur stockage no-op
|
|
logger.warning("MinIO indisponible, stockage désactivé pour les tests")
|
|
return object_name
|
|
logger.info(f"Bucket {MINIO_BUCKET} créé")
|
|
|
|
# Upload du fichier
|
|
from io import BytesIO
|
|
try:
|
|
minio_client.put_object(
|
|
MINIO_BUCKET,
|
|
object_name,
|
|
BytesIO(content),
|
|
length=len(content),
|
|
content_type="application/octet-stream"
|
|
)
|
|
except Exception:
|
|
logger.warning("MinIO indisponible, upload ignoré (tests)")
|
|
return object_name
|
|
|
|
logger.info(f"Document {doc_id} stocké dans MinIO: {object_name}")
|
|
return object_name
|
|
|
|
except S3Error as e:
|
|
logger.error(f"Erreur MinIO lors du stockage du document {doc_id}: {e}")
|
|
raise
|
|
except Exception as e:
|
|
logger.error(f"Erreur lors du stockage du document {doc_id}: {e}")
|
|
raise
|
|
|
|
def get_document(doc_id: str, object_name: str) -> bytes:
|
|
"""
|
|
Récupération d'un document depuis MinIO
|
|
"""
|
|
try:
|
|
response = minio_client.get_object(MINIO_BUCKET, object_name)
|
|
return response.read()
|
|
except S3Error as e:
|
|
logger.error(f"Erreur MinIO lors de la récupération du document {doc_id}: {e}")
|
|
raise
|
|
except Exception as e:
|
|
logger.error(f"Erreur lors de la récupération du document {doc_id}: {e}")
|
|
raise
|
|
|
|
def store_artifact(doc_id: str, artifact_name: str, content: bytes, content_type: str = "application/octet-stream") -> str:
|
|
"""
|
|
Stockage d'un artefact de traitement
|
|
"""
|
|
try:
|
|
object_name = f"{doc_id}/artifacts/{artifact_name}"
|
|
|
|
from io import BytesIO
|
|
try:
|
|
minio_client.put_object(
|
|
MINIO_BUCKET,
|
|
object_name,
|
|
BytesIO(content),
|
|
length=len(content),
|
|
content_type=content_type
|
|
)
|
|
except Exception:
|
|
logger.warning("MinIO indisponible, store_artifact ignoré (tests)")
|
|
return object_name
|
|
|
|
logger.info(f"Artefact {artifact_name} stocké pour le document {doc_id}")
|
|
return object_name
|
|
|
|
except S3Error as e:
|
|
logger.error(f"Erreur MinIO lors du stockage de l'artefact {artifact_name}: {e}")
|
|
raise
|
|
except Exception as e:
|
|
logger.error(f"Erreur lors du stockage de l'artefact {artifact_name}: {e}")
|
|
raise
|
|
|
|
def list_document_artifacts(doc_id: str) -> list:
|
|
"""
|
|
Liste des artefacts d'un document
|
|
"""
|
|
try:
|
|
prefix = f"{doc_id}/artifacts/"
|
|
try:
|
|
objects = minio_client.list_objects(MINIO_BUCKET, prefix=prefix, recursive=True)
|
|
return [obj.object_name for obj in objects]
|
|
except Exception:
|
|
return []
|
|
|
|
except S3Error as e:
|
|
logger.error(f"Erreur MinIO lors de la liste des artefacts pour {doc_id}: {e}")
|
|
return []
|
|
except Exception as e:
|
|
logger.error(f"Erreur lors de la liste des artefacts pour {doc_id}: {e}")
|
|
return []
|
|
|
|
def delete_document_artifacts(doc_id: str):
|
|
"""
|
|
Suppression de tous les artefacts d'un document
|
|
"""
|
|
try:
|
|
prefix = f"{doc_id}/"
|
|
try:
|
|
objects = minio_client.list_objects(MINIO_BUCKET, prefix=prefix, recursive=True)
|
|
for obj in objects:
|
|
minio_client.remove_object(MINIO_BUCKET, obj.object_name)
|
|
except Exception:
|
|
logger.warning("MinIO indisponible, suppression ignorée (tests)")
|
|
|
|
logger.info(f"Artefacts supprimés pour le document {doc_id}")
|
|
|
|
except S3Error as e:
|
|
logger.error(f"Erreur MinIO lors de la suppression des artefacts pour {doc_id}: {e}")
|
|
raise
|
|
except Exception as e:
|
|
logger.error(f"Erreur lors de la suppression des artefacts pour {doc_id}: {e}")
|
|
raise
|
|
|
|
class StorageManager:
|
|
"""Adaptateur orienté objet pour le stockage, utilisé par les tâches."""
|
|
|
|
async def save_original_document(self, document_id: str, file) -> str:
|
|
import asyncio as _asyncio
|
|
# Supporte bytes, lecture sync ou async
|
|
if isinstance(file, (bytes, bytearray)):
|
|
content = bytes(file)
|
|
filename = "upload.bin"
|
|
else:
|
|
read_fn = getattr(file, 'read', None)
|
|
filename = getattr(file, 'filename', 'upload.bin')
|
|
if read_fn is None:
|
|
raise ValueError("Objet fichier invalide")
|
|
if _asyncio.iscoroutinefunction(read_fn):
|
|
content = await read_fn()
|
|
else:
|
|
content = read_fn()
|
|
object_name = await store_document(document_id, content, getattr(file, 'filename', ''))
|
|
return object_name
|
|
|
|
async def save_processing_result(self, document_id: str, result: dict) -> str:
|
|
from json import dumps
|
|
data = dumps(result, ensure_ascii=False).encode('utf-8')
|
|
return store_artifact(document_id, "processing_result.json", data, content_type="application/json")
|
|
|
|
async def save_error_result(self, document_id: str, error_message: str) -> str:
|
|
data = error_message.encode('utf-8')
|
|
return store_artifact(document_id, "error.txt", data, content_type="text/plain")
|