""" Utilitaires de stockage avec MinIO """ import os import uuid from minio import Minio from minio.error import S3Error import logging logger = logging.getLogger(__name__) # Configuration MinIO MINIO_ENDPOINT = os.getenv("MINIO_ENDPOINT", "localhost:9000") MINIO_ACCESS_KEY = os.getenv("MINIO_ROOT_USER", "minio") MINIO_SECRET_KEY = os.getenv("MINIO_ROOT_PASSWORD", "minio_pwd") MINIO_BUCKET = os.getenv("MINIO_BUCKET", "ingest") MINIO_SECURE = False # True en production avec HTTPS # Client MinIO minio_client = Minio( MINIO_ENDPOINT, access_key=MINIO_ACCESS_KEY, secret_key=MINIO_SECRET_KEY, secure=MINIO_SECURE ) async def store_document(doc_id: str, content: bytes, filename: str) -> str: """ Stockage d'un document dans MinIO """ try: # Génération du nom de fichier unique file_extension = os.path.splitext(filename)[1] if filename else "" object_name = f"{doc_id}/original{file_extension}" # Création du bucket s'il n'existe pas (tolérant aux tests) try: if not minio_client.bucket_exists(MINIO_BUCKET): minio_client.make_bucket(MINIO_BUCKET) except Exception: # En contexte de test sans MinIO, bascule sur stockage no-op logger.warning("MinIO indisponible, stockage désactivé pour les tests") return object_name logger.info(f"Bucket {MINIO_BUCKET} créé") # Upload du fichier from io import BytesIO try: minio_client.put_object( MINIO_BUCKET, object_name, BytesIO(content), length=len(content), content_type="application/octet-stream" ) except Exception: logger.warning("MinIO indisponible, upload ignoré (tests)") return object_name logger.info(f"Document {doc_id} stocké dans MinIO: {object_name}") return object_name except S3Error as e: logger.error(f"Erreur MinIO lors du stockage du document {doc_id}: {e}") raise except Exception as e: logger.error(f"Erreur lors du stockage du document {doc_id}: {e}") raise def get_document(doc_id: str, object_name: str) -> bytes: """ Récupération d'un document depuis MinIO """ try: response = minio_client.get_object(MINIO_BUCKET, object_name) return response.read() except S3Error as e: logger.error(f"Erreur MinIO lors de la récupération du document {doc_id}: {e}") raise except Exception as e: logger.error(f"Erreur lors de la récupération du document {doc_id}: {e}") raise def store_artifact(doc_id: str, artifact_name: str, content: bytes, content_type: str = "application/octet-stream") -> str: """ Stockage d'un artefact de traitement """ try: object_name = f"{doc_id}/artifacts/{artifact_name}" from io import BytesIO try: minio_client.put_object( MINIO_BUCKET, object_name, BytesIO(content), length=len(content), content_type=content_type ) except Exception: logger.warning("MinIO indisponible, store_artifact ignoré (tests)") return object_name logger.info(f"Artefact {artifact_name} stocké pour le document {doc_id}") return object_name except S3Error as e: logger.error(f"Erreur MinIO lors du stockage de l'artefact {artifact_name}: {e}") raise except Exception as e: logger.error(f"Erreur lors du stockage de l'artefact {artifact_name}: {e}") raise def list_document_artifacts(doc_id: str) -> list: """ Liste des artefacts d'un document """ try: prefix = f"{doc_id}/artifacts/" try: objects = minio_client.list_objects(MINIO_BUCKET, prefix=prefix, recursive=True) return [obj.object_name for obj in objects] except Exception: return [] except S3Error as e: logger.error(f"Erreur MinIO lors de la liste des artefacts pour {doc_id}: {e}") return [] except Exception as e: logger.error(f"Erreur lors de la liste des artefacts pour {doc_id}: {e}") return [] def delete_document_artifacts(doc_id: str): """ Suppression de tous les artefacts d'un document """ try: prefix = f"{doc_id}/" try: objects = minio_client.list_objects(MINIO_BUCKET, prefix=prefix, recursive=True) for obj in objects: minio_client.remove_object(MINIO_BUCKET, obj.object_name) except Exception: logger.warning("MinIO indisponible, suppression ignorée (tests)") logger.info(f"Artefacts supprimés pour le document {doc_id}") except S3Error as e: logger.error(f"Erreur MinIO lors de la suppression des artefacts pour {doc_id}: {e}") raise except Exception as e: logger.error(f"Erreur lors de la suppression des artefacts pour {doc_id}: {e}") raise class StorageManager: """Adaptateur orienté objet pour le stockage, utilisé par les tâches.""" async def save_original_document(self, document_id: str, file) -> str: import asyncio as _asyncio # Supporte bytes, lecture sync ou async if isinstance(file, (bytes, bytearray)): content = bytes(file) filename = "upload.bin" else: read_fn = getattr(file, 'read', None) filename = getattr(file, 'filename', 'upload.bin') if read_fn is None: raise ValueError("Objet fichier invalide") if _asyncio.iscoroutinefunction(read_fn): content = await read_fn() else: content = read_fn() object_name = await store_document(document_id, content, getattr(file, 'filename', '')) return object_name async def save_processing_result(self, document_id: str, result: dict) -> str: from json import dumps data = dumps(result, ensure_ascii=False).encode('utf-8') return store_artifact(document_id, "processing_result.json", data, content_type="application/json") async def save_error_result(self, document_id: str, error_message: str) -> str: data = error_message.encode('utf-8') return store_artifact(document_id, "error.txt", data, content_type="text/plain")