4NK_IA_front/scripts/precache.js

114 lines
3.2 KiB
JavaScript

#!/usr/bin/env node
/*
Génère des JSON de cache minimaux pour tous les fichiers présents dans backend/uploads/<folderHash>
Usage: node scripts/precache.js <folderHash>
*/
const fs = require('fs')
const path = require('path')
const crypto = require('crypto')
function getMimeTypeByExt(ext) {
const map = {
'.pdf': 'application/pdf',
'.jpg': 'image/jpeg',
'.jpeg': 'image/jpeg',
'.png': 'image/png',
'.tiff': 'image/tiff',
'.txt': 'text/plain',
}
return map[ext.toLowerCase()] || 'application/octet-stream'
}
function precacheFolder(folderHash) {
if (!folderHash) {
console.error('Usage: node scripts/precache.js <folderHash>')
process.exit(1)
}
const repoRoot = path.resolve(__dirname, '..')
const backendDir = path.join(repoRoot, 'backend')
const uploadsDir = path.join(backendDir, 'uploads', folderHash)
const cacheDir = path.join(backendDir, 'cache', folderHash)
if (!fs.existsSync(uploadsDir)) {
console.error(`Uploads introuvable: ${uploadsDir}`)
process.exit(2)
}
fs.mkdirSync(cacheDir, { recursive: true })
const files = fs
.readdirSync(uploadsDir)
.filter((f) => fs.statSync(path.join(uploadsDir, f)).isFile())
const nowIso = new Date().toISOString()
let written = 0
for (const fileName of files) {
const filePath = path.join(uploadsDir, fileName)
const buffer = fs.readFileSync(filePath)
const fileHash = crypto.createHash('sha256').update(buffer).digest('hex')
const size = buffer.length
const mime = getMimeTypeByExt(path.extname(fileName))
const json = {
document: {
id: `doc-preload-${Date.now()}`,
fileName,
fileSize: size,
mimeType: mime,
uploadTimestamp: nowIso,
},
classification: {
documentType: 'Document',
confidence: 0.6,
subType: 'Document',
language: 'fr',
pageCount: 1,
},
extraction: {
text: {
raw: `Préchargé: ${fileName}`,
processed: `Préchargé: ${fileName}`,
wordCount: 2,
characterCount: (`Préchargé: ${fileName}`).length,
confidence: 0.6,
},
entities: {
persons: [],
companies: [],
addresses: [],
financial: { amounts: [], totals: {}, payment: {} },
dates: [],
contractual: { clauses: [], signatures: [] },
references: [],
},
},
metadata: {
processing: {
engine: 'preload',
version: '1',
processingTime: '0ms',
ocrEngine: 'preload',
nerEngine: 'none',
preprocessing: { applied: false, reason: 'preload' },
},
quality: {
globalConfidence: 0.6,
textExtractionConfidence: 0.6,
entityExtractionConfidence: 0.6,
classificationConfidence: 0.6,
},
},
status: { success: true, errors: [], warnings: [], timestamp: nowIso },
}
const outPath = path.join(cacheDir, `${fileHash}.json`)
fs.writeFileSync(outPath, JSON.stringify(json))
written += 1
console.log(`cache écrit: ${outPath}`)
}
console.log(`OK - ${written} fichiers précachés dans ${cacheDir}`)
}
precacheFolder(process.argv[2])