118 lines
3.5 KiB
JavaScript
118 lines
3.5 KiB
JavaScript
#!/usr/bin/env node
|
|
/*
|
|
Génère des JSON de cache minimaux pour tous les fichiers présents dans backend/uploads/<folderHash>
|
|
Usage: node scripts/precache.cjs <folderHash>
|
|
*/
|
|
const fs = require('fs')
|
|
const path = require('path')
|
|
const crypto = require('crypto')
|
|
|
|
function getMimeTypeByExt(ext) {
|
|
const map = {
|
|
'.pdf': 'application/pdf',
|
|
'.jpg': 'image/jpeg',
|
|
'.jpeg': 'image/jpeg',
|
|
'.png': 'image/png',
|
|
'.tiff': 'image/tiff',
|
|
'.txt': 'text/plain',
|
|
}
|
|
return map[ext.toLowerCase()] || 'application/octet-stream'
|
|
}
|
|
|
|
function precacheFolder(folderHash) {
|
|
if (!folderHash) {
|
|
console.error('Usage: node scripts/precache.cjs <folderHash>')
|
|
process.exit(1)
|
|
}
|
|
|
|
const repoRoot = path.resolve(__dirname, '..')
|
|
const backendDir = path.join(repoRoot, 'backend')
|
|
const candidates = [
|
|
{ uploadsDir: path.join(repoRoot, 'uploads', folderHash), cacheDir: path.join(repoRoot, 'cache', folderHash) },
|
|
{ uploadsDir: path.join(backendDir, 'uploads', folderHash), cacheDir: path.join(backendDir, 'cache', folderHash) },
|
|
]
|
|
const picked = candidates.find((c) => fs.existsSync(c.uploadsDir))
|
|
if (!picked) {
|
|
console.error(`Uploads introuvable (ni racine ni backend) pour ${folderHash}`)
|
|
process.exit(2)
|
|
}
|
|
const { uploadsDir, cacheDir } = picked
|
|
fs.mkdirSync(cacheDir, { recursive: true })
|
|
|
|
const files = fs
|
|
.readdirSync(uploadsDir)
|
|
.filter((f) => fs.statSync(path.join(uploadsDir, f)).isFile())
|
|
|
|
const nowIso = new Date().toISOString()
|
|
let written = 0
|
|
for (const fileName of files) {
|
|
const filePath = path.join(uploadsDir, fileName)
|
|
const buffer = fs.readFileSync(filePath)
|
|
const fileHash = crypto.createHash('sha256').update(buffer).digest('hex')
|
|
const size = buffer.length
|
|
const mime = getMimeTypeByExt(path.extname(fileName))
|
|
|
|
const text = `Préchargé: ${fileName}`
|
|
const json = {
|
|
document: {
|
|
id: `doc-preload-${Date.now()}`,
|
|
fileName,
|
|
fileSize: size,
|
|
mimeType: mime,
|
|
uploadTimestamp: nowIso,
|
|
},
|
|
classification: {
|
|
documentType: 'Document',
|
|
confidence: 0.6,
|
|
subType: 'Document',
|
|
language: 'fr',
|
|
pageCount: 1,
|
|
},
|
|
extraction: {
|
|
text: {
|
|
raw: text,
|
|
processed: text,
|
|
wordCount: text.trim().split(/\s+/).filter(Boolean).length,
|
|
characterCount: text.length,
|
|
confidence: 0.6,
|
|
},
|
|
entities: {
|
|
persons: [],
|
|
companies: [],
|
|
addresses: [],
|
|
financial: { amounts: [], totals: {}, payment: {} },
|
|
dates: [],
|
|
contractual: { clauses: [], signatures: [] },
|
|
references: [],
|
|
},
|
|
},
|
|
metadata: {
|
|
processing: {
|
|
engine: 'preload',
|
|
version: '1',
|
|
processingTime: '0ms',
|
|
ocrEngine: 'preload',
|
|
nerEngine: 'none',
|
|
preprocessing: { applied: false, reason: 'preload' },
|
|
},
|
|
quality: {
|
|
globalConfidence: 0.6,
|
|
textExtractionConfidence: 0.6,
|
|
entityExtractionConfidence: 0.6,
|
|
classificationConfidence: 0.6,
|
|
},
|
|
},
|
|
status: { success: true, errors: [], warnings: [], timestamp: nowIso },
|
|
}
|
|
|
|
const outPath = path.join(cacheDir, `${fileHash}.json`)
|
|
fs.writeFileSync(outPath, JSON.stringify(json))
|
|
written += 1
|
|
console.log(`cache écrit: ${outPath}`)
|
|
}
|
|
|
|
console.log(`OK - ${written} fichiers précachés dans ${cacheDir}`)
|
|
}
|
|
|
|
precacheFolder(process.argv[2])
|