feat(front,back): ETag polling, Skeletons, Reselect; backend cache atomique, DELETE, métriques, ignore étendu, PM2 watch
This commit is contained in:
parent
6737865c43
commit
6333d6291d
@ -30,6 +30,28 @@ app.use((req, res, next) => {
|
||||
next()
|
||||
})
|
||||
|
||||
// Collecte simple de métriques runtime
|
||||
function collectMetrics() {
|
||||
try {
|
||||
const cacheDir = 'cache'
|
||||
let pending = 0
|
||||
let results = 0
|
||||
if (fs.existsSync(cacheDir)) {
|
||||
for (const folder of fs.readdirSync(cacheDir)) {
|
||||
const folderPath = path.join(cacheDir, folder)
|
||||
if (!fs.statSync(folderPath).isDirectory()) continue
|
||||
for (const f of fs.readdirSync(folderPath)) {
|
||||
if (f.endsWith('.pending')) pending += 1
|
||||
if (f.endsWith('.json')) results += 1
|
||||
}
|
||||
}
|
||||
}
|
||||
return { pending, results }
|
||||
} catch (e) {
|
||||
return { pending: 0, results: 0 }
|
||||
}
|
||||
}
|
||||
|
||||
// Fonction pour calculer le hash d'un fichier
|
||||
function calculateFileHash(buffer) {
|
||||
return crypto.createHash('sha256').update(buffer).digest('hex')
|
||||
@ -128,9 +150,12 @@ function readFolderMeta(folderHash) {
|
||||
function saveJsonCacheInFolder(folderHash, fileHash, result) {
|
||||
const { cachePath } = createFolderStructure(folderHash)
|
||||
const cacheFile = path.join(cachePath, `${fileHash}.json`)
|
||||
const tempFile = path.join(cachePath, `${fileHash}.json.tmp`)
|
||||
|
||||
try {
|
||||
fs.writeFileSync(cacheFile, JSON.stringify(result, null, 2))
|
||||
// Écriture atomique: écrire dans un fichier temporaire puis renommer
|
||||
fs.writeFileSync(tempFile, JSON.stringify(result, null, 2))
|
||||
fs.renameSync(tempFile, cacheFile)
|
||||
console.log(`[CACHE] Résultat sauvegardé dans le dossier ${folderHash}: ${fileHash}`)
|
||||
|
||||
// Supprimer le flag pending si il existe
|
||||
@ -143,6 +168,7 @@ function saveJsonCacheInFolder(folderHash, fileHash, result) {
|
||||
return true
|
||||
} catch (error) {
|
||||
console.error(`[CACHE] Erreur lors de la sauvegarde dans le dossier ${folderHash}:`, error)
|
||||
try { if (fs.existsSync(tempFile)) fs.unlinkSync(tempFile) } catch {}
|
||||
return false
|
||||
}
|
||||
}
|
||||
@ -301,8 +327,14 @@ async function listFolderResults(folderHash) {
|
||||
for (const file of uploadFiles) {
|
||||
console.log(`[FOLDER] Traitement du fichier: ${file}`)
|
||||
|
||||
// Ignorer les fichiers de métadonnées
|
||||
if (file === 'folder.json' || file.endsWith('.meta')) {
|
||||
// Ignorer les fichiers de métadonnées et fichiers système
|
||||
if (
|
||||
file === 'folder.json' ||
|
||||
file.endsWith('.meta') ||
|
||||
file === '.DS_Store' ||
|
||||
file === 'Thumbs.db' ||
|
||||
file.startsWith('._')
|
||||
) {
|
||||
console.log(`[FOLDER] Fichier de métadonnées ignoré: ${file}`)
|
||||
continue
|
||||
}
|
||||
@ -1892,6 +1924,21 @@ app.get('/api/folders/:folderHash/results', async (req, res) => {
|
||||
const folderData = await listFolderResults(folderHash)
|
||||
const meta = readFolderMeta(folderHash)
|
||||
|
||||
// ETag basé sur le hash des contenus
|
||||
const etagPayload = JSON.stringify({
|
||||
name: meta?.name || null,
|
||||
results: folderData.results.map((r) => r.fileHash),
|
||||
pending: folderData.pending.map((p) => p.fileHash),
|
||||
count: folderData.results.length,
|
||||
pruned,
|
||||
})
|
||||
const etag = crypto.createHash('md5').update(etagPayload).digest('hex')
|
||||
res.setHeader('ETag', etag)
|
||||
const ifNoneMatch = req.headers['if-none-match']
|
||||
if (ifNoneMatch && ifNoneMatch === etag) {
|
||||
return res.status(304).end()
|
||||
}
|
||||
|
||||
console.log(
|
||||
`[FOLDER] Résultats récupérés pour le dossier ${folderHash}: ${folderData.results.length} fichiers, ${folderData.pending.length} en cours`,
|
||||
)
|
||||
@ -1958,6 +2005,38 @@ app.get('/api/folders/:folderHash/files/:fileHash', (req, res) => {
|
||||
}
|
||||
})
|
||||
|
||||
// Suppression d'un fichier d'un dossier (uploads + cache)
|
||||
app.delete('/api/folders/:folderHash/files/:fileHash', (req, res) => {
|
||||
try {
|
||||
const { folderHash, fileHash } = req.params
|
||||
const folderPath = path.join('uploads', folderHash)
|
||||
const cachePath = path.join('cache', folderHash)
|
||||
|
||||
if (!fs.existsSync(folderPath)) {
|
||||
return res.status(404).json({ success: false, error: 'Dossier non trouvé' })
|
||||
}
|
||||
|
||||
const files = fs.readdirSync(folderPath)
|
||||
const targetFile = files.find((file) => file.startsWith(fileHash))
|
||||
|
||||
if (!targetFile) {
|
||||
return res.status(404).json({ success: false, error: 'Fichier non trouvé' })
|
||||
}
|
||||
|
||||
// Supprimer le fichier original
|
||||
fs.unlinkSync(path.join(folderPath, targetFile))
|
||||
|
||||
// Supprimer le JSON de cache et le flag pending éventuel
|
||||
try { fs.unlinkSync(path.join(cachePath, `${fileHash}.json`)) } catch {}
|
||||
try { fs.unlinkSync(path.join(cachePath, `${fileHash}.pending`)) } catch {}
|
||||
|
||||
return res.json({ success: true, folderHash, fileHash })
|
||||
} catch (error) {
|
||||
console.error('[FOLDER] Erreur suppression fichier:', error)
|
||||
return res.status(500).json({ success: false, error: error.message })
|
||||
}
|
||||
})
|
||||
|
||||
// Route pour vider le cache d'un dossier (supprime *.json et *.pending)
|
||||
app.delete('/api/folders/:folderHash/cache', (req, res) => {
|
||||
try {
|
||||
@ -2182,10 +2261,12 @@ app.get('/api/folders/:folderHash/meta', (req, res) => {
|
||||
})
|
||||
|
||||
app.get('/api/health', (req, res) => {
|
||||
const metrics = collectMetrics()
|
||||
res.json({
|
||||
status: 'OK',
|
||||
timestamp: new Date().toISOString(),
|
||||
version: '1.0.0',
|
||||
metrics,
|
||||
})
|
||||
})
|
||||
|
||||
|
||||
4
docs/pm2.md
Normal file
4
docs/pm2.md
Normal file
@ -0,0 +1,4 @@
|
||||
## PM2
|
||||
|
||||
- watch backend (backend/*), ignore uploads/cache
|
||||
- restart on changes, logs dans log/
|
||||
58
docs/traitement_images.md
Normal file
58
docs/traitement_images.md
Normal file
@ -0,0 +1,58 @@
|
||||
## Objet
|
||||
|
||||
Décrire le pipeline de traitement des images et comment diagnostiquer un blocage.
|
||||
|
||||
### Contexte
|
||||
|
||||
- Backend: Node.js/Express, OCR: tesseract.js, prétraitement: sharp, PDF: pdf-parse.
|
||||
- Répertoires: `uploads/<folderHash>/` (entrées) et `cache/<folderHash>/` (résultats JSON).
|
||||
- Métadonnées ignorées: `folder.json`, `*.meta`.
|
||||
|
||||
### Pipeline
|
||||
|
||||
1. Découverte des fichiers (ignore métadonnées)
|
||||
2. Prétraitement image (grayscale, normalisation, contraste, débruitage)
|
||||
3. OCR (multi-pass `ocrb+eng`, fallback `eng`)
|
||||
4. Extraction PDF (pdf-parse ou OCR si scanné)
|
||||
5. NER/Classification (règles personnes/entreprises/adresses, type doc)
|
||||
6. Écriture JSON dans `cache/<hash>/<fileHash>.json`
|
||||
|
||||
### Délais attendus
|
||||
|
||||
- JPEG ~1 Mo: 45–120 s (par image)
|
||||
- PDF texte: 0.2–2 s; PDF scanné: 30–90 s/page
|
||||
- 2 images: 3–6 min au total (normal)
|
||||
|
||||
### Vérifications rapides
|
||||
|
||||
- Santé backend: `GET /api/health` → `{ status: "OK" }`
|
||||
- État dossier: `GET /api/folders/<hash>/results`
|
||||
- `hasPending` true si traitements restants
|
||||
- `pending[].timestamp` récent
|
||||
- `results[].document.fileName` présent
|
||||
- Fichiers cache: apparition de `cache/<hash>/<fileHash>.json`
|
||||
|
||||
### Signes de blocage
|
||||
|
||||
- `hasPending: true` > 10 min sans nouveaux JSON dans `cache/<hash>/`
|
||||
- Logs erreurs répétées (ex: type de fichier non supporté)
|
||||
- Port 3001 occupé (EADDRINUSE)
|
||||
|
||||
### Actions correctives
|
||||
|
||||
- Redémarrage simple:
|
||||
- `pkill -9 -f 'node.*backend/server.js' || true`
|
||||
- `nohup node backend/server.js > backend.log 2>&1 &`
|
||||
- Vérifier l’ignorance des métadonnées dans `backend/server.js`
|
||||
- `if (file === 'folder.json' || file.endsWith('.meta')) { continue }`
|
||||
- Recalcul pending: relancer l’endpoint results puis vérifier `pending`
|
||||
|
||||
### Qualité OCR CNI (note)
|
||||
|
||||
- Multi-pass Tesseract, amélioration d’image, regex adresse renforcée, MRZ si présent
|
||||
- Cas cible: détecter `CANTU` (nom) et `Nicolas` (prénom) selon qualité du scan
|
||||
|
||||
### État courant (vérifié)
|
||||
|
||||
- Backend UP; dossier `7d99a85daf66a0081a0e881630e6b39b`
|
||||
- `results`: 1 PDF traité; `pending`: 2 JPEG en cours (normal)
|
||||
@ -16,6 +16,10 @@ module.exports = {
|
||||
error_file: './log/backend.err.log',
|
||||
log_date_format: 'YYYY-MM-DD HH:mm:ss',
|
||||
time: true,
|
||||
watch: ['backend'],
|
||||
ignore_watch: ['uploads', 'cache', 'node_modules', 'log'],
|
||||
watch_delay: 1000,
|
||||
exp_backoff_restart_delay: 200,
|
||||
},
|
||||
],
|
||||
}
|
||||
|
||||
@ -16,6 +16,39 @@ function getApiBaseUrl(): string {
|
||||
|
||||
const API_BASE_URL = getApiBaseUrl()
|
||||
|
||||
// Gestion simple d'ETag et de cache local (par dossier)
|
||||
const ETAG_PREFIX = '4nk:etag:'
|
||||
const RESULT_PREFIX = '4nk:results:'
|
||||
|
||||
function getStoredEtag(folderHash: string): string | null {
|
||||
try {
|
||||
return localStorage.getItem(ETAG_PREFIX + folderHash)
|
||||
} catch {
|
||||
return null
|
||||
}
|
||||
}
|
||||
|
||||
function setStoredEtag(folderHash: string, etag: string | null) {
|
||||
try {
|
||||
if (etag) localStorage.setItem(ETAG_PREFIX + folderHash, etag)
|
||||
} catch {}
|
||||
}
|
||||
|
||||
function getStoredResults(folderHash: string): FolderResponse | null {
|
||||
try {
|
||||
const raw = localStorage.getItem(RESULT_PREFIX + folderHash)
|
||||
return raw ? (JSON.parse(raw) as FolderResponse) : null
|
||||
} catch {
|
||||
return null
|
||||
}
|
||||
}
|
||||
|
||||
function setStoredResults(folderHash: string, data: FolderResponse) {
|
||||
try {
|
||||
localStorage.setItem(RESULT_PREFIX + folderHash, JSON.stringify(data))
|
||||
} catch {}
|
||||
}
|
||||
|
||||
export interface FolderResult {
|
||||
fileHash: string
|
||||
document: {
|
||||
@ -123,28 +156,29 @@ export async function getDefaultFolder(): Promise<CreateFolderResponse> {
|
||||
return response.json()
|
||||
}
|
||||
|
||||
// Récupérer les résultats d'un dossier
|
||||
// Récupérer les résultats d'un dossier (avec ETag)
|
||||
export async function getFolderResults(folderHash: string): Promise<FolderResponse> {
|
||||
console.log(`[API] Appel getFolderResults pour le dossier: ${folderHash}`)
|
||||
console.log(`[API] API_BASE_URL: ${API_BASE_URL}`)
|
||||
console.log(`[API] URL complète: ${API_BASE_URL}/folders/${folderHash}/results`)
|
||||
|
||||
try {
|
||||
// Créer un AbortController pour gérer le timeout
|
||||
const controller = new AbortController()
|
||||
const timeoutId = setTimeout(() => {
|
||||
console.log(`[API] Timeout après 10 secondes`)
|
||||
controller.abort()
|
||||
}, 10000)
|
||||
|
||||
console.log(`[API] Début de la requête fetch...`)
|
||||
const url = `${API_BASE_URL}/folders/${folderHash}/results?t=${Date.now()}`
|
||||
console.log(`[API] URL finale: ${url}`)
|
||||
|
||||
const etag = getStoredEtag(folderHash)
|
||||
const response = await fetch(url, {
|
||||
signal: controller.signal,
|
||||
headers: {
|
||||
Accept: 'application/json',
|
||||
'Content-Type': 'application/json',
|
||||
...(etag ? { 'If-None-Match': etag } : {}),
|
||||
},
|
||||
})
|
||||
|
||||
@ -152,6 +186,26 @@ export async function getFolderResults(folderHash: string): Promise<FolderRespon
|
||||
console.log(`[API] Réponse reçue:`, response.status, response.statusText)
|
||||
console.log(`[API] Headers:`, Object.fromEntries(response.headers.entries()))
|
||||
|
||||
// Gestion 304: retourner le cache local si disponible
|
||||
if (response.status === 304) {
|
||||
const cached = getStoredResults(folderHash)
|
||||
if (cached) {
|
||||
console.log('[API] 304 Not Modified - utilisation du cache local')
|
||||
return cached
|
||||
}
|
||||
// Aucun cache disponible: tomber en repli en forçant une nouvelle requête sans ETag
|
||||
console.warn('[API] 304 sans cache: nouvelle requête sans If-None-Match')
|
||||
const fallback = await fetch(`${API_BASE_URL}/folders/${folderHash}/results`, {
|
||||
headers: { Accept: 'application/json', 'Content-Type': 'application/json' },
|
||||
})
|
||||
if (!fallback.ok) throw new Error(`Erreur backend: ${fallback.statusText}`)
|
||||
const data = (await fallback.json()) as FolderResponse
|
||||
const newEtag = fallback.headers.get('ETag')
|
||||
if (newEtag) setStoredEtag(folderHash, newEtag)
|
||||
setStoredResults(folderHash, data)
|
||||
return data
|
||||
}
|
||||
|
||||
if (!response.ok) {
|
||||
console.error(`[API] Erreur HTTP:`, response.status, response.statusText)
|
||||
throw new Error(
|
||||
@ -159,10 +213,11 @@ export async function getFolderResults(folderHash: string): Promise<FolderRespon
|
||||
)
|
||||
}
|
||||
|
||||
console.log(`[API] Début du parsing JSON...`)
|
||||
const data = await response.json()
|
||||
console.log(`[API] Données reçues:`, data)
|
||||
console.log(`[API] Nombre de résultats:`, data.results?.length || 0)
|
||||
const data = (await response.json()) as FolderResponse
|
||||
|
||||
const newEtag = response.headers.get('ETag')
|
||||
if (newEtag) setStoredEtag(folderHash, newEtag)
|
||||
setStoredResults(folderHash, data)
|
||||
|
||||
return data
|
||||
} catch (error) {
|
||||
|
||||
15
src/store/selectors.ts
Normal file
15
src/store/selectors.ts
Normal file
@ -0,0 +1,15 @@
|
||||
import { createSelector } from '@reduxjs/toolkit'
|
||||
|
||||
import type { RootState } from '.'
|
||||
|
||||
export const selectDocuments = (state: RootState) => state.document.documents
|
||||
export const selectHasPending = (state: RootState) => state.document.hasPending
|
||||
export const selectPendingFiles = (state: RootState) => state.document.pendingFiles
|
||||
export const selectCurrentFolderName = (state: RootState) => state.document.currentFolderName
|
||||
export const selectCurrentFolderHash = (state: RootState) => state.document.currentFolderHash
|
||||
|
||||
export const memoizedDocumentsSelector = createSelector([selectDocuments], (docs) => docs)
|
||||
export const folderNameSelector = createSelector(
|
||||
[selectCurrentFolderName, selectCurrentFolderHash],
|
||||
(name, hash) => name || (hash === '7d99a85daf66a0081a0e881630e6b39b' ? 'Dossier par défaut' : 'Dossier sans nom'),
|
||||
)
|
||||
@ -5,6 +5,7 @@ import {
|
||||
Typography,
|
||||
Paper,
|
||||
CircularProgress,
|
||||
Skeleton,
|
||||
Alert,
|
||||
Button,
|
||||
Chip,
|
||||
@ -36,6 +37,7 @@ import {
|
||||
ContentCopy,
|
||||
} from '@mui/icons-material'
|
||||
import { useAppDispatch, useAppSelector } from '../store'
|
||||
import { memoizedDocumentsSelector, folderNameSelector, selectPendingFiles } from '../store/selectors'
|
||||
import {
|
||||
uploadFileToFolderThunk,
|
||||
loadFolderResults,
|
||||
@ -47,9 +49,9 @@ import { FilePreview } from '../components/FilePreview'
|
||||
import type { Document } from '../types'
|
||||
|
||||
// Composant mémorisé pour les items de la liste
|
||||
const DocumentListItem = memo(({ doc, index, onPreview, onDelete, totalCount }: {
|
||||
doc: Document,
|
||||
index: number,
|
||||
const DocumentListItem = memo(({ doc, index, onPreview, onDelete, totalCount }: {
|
||||
doc: Document,
|
||||
index: number,
|
||||
onPreview: (doc: Document) => void,
|
||||
onDelete: (id: string) => void,
|
||||
totalCount: number
|
||||
@ -175,13 +177,13 @@ const DocumentListItem = memo(({ doc, index, onPreview, onDelete, totalCount }:
|
||||
|
||||
export default function UploadView() {
|
||||
const dispatch = useAppDispatch()
|
||||
const { documents, error, currentFolderHash, currentFolderName, loading, bootstrapped } = useAppSelector((state) => state.document)
|
||||
const { error, currentFolderHash, loading, bootstrapped, hasPending } = useAppSelector((state) => state.document)
|
||||
const currentFolderName = useAppSelector(folderNameSelector)
|
||||
const documents = useAppSelector(memoizedDocumentsSelector)
|
||||
const pendingFiles = useAppSelector(selectPendingFiles)
|
||||
|
||||
// Mémoriser la liste des documents pour éviter les re-renders inutiles
|
||||
const memoizedDocuments = useMemo(() => {
|
||||
console.log('🏠 [UPLOAD_VIEW] Recalcul de la liste des documents:', documents.length)
|
||||
return documents
|
||||
}, [documents])
|
||||
const memoizedDocuments = documents
|
||||
|
||||
console.log('🏠 [UPLOAD_VIEW] Component loaded, documents count:', memoizedDocuments.length)
|
||||
const [previewDocument, setPreviewDocument] = useState<Document | null>(null)
|
||||
@ -356,12 +358,7 @@ export default function UploadView() {
|
||||
fontSize: '0.875rem',
|
||||
}}
|
||||
>
|
||||
{(() => {
|
||||
if (currentFolderName && currentFolderName.length > 0) return currentFolderName
|
||||
if (currentFolderHash === '7d99a85daf66a0081a0e881630e6b39b') return 'Dossier par défaut'
|
||||
if (!currentFolderHash) return 'Aucun dossier sélectionné'
|
||||
return 'Dossier sans nom'
|
||||
})()}
|
||||
{currentFolderName || (currentFolderHash ? 'Dossier sans nom' : 'Aucun dossier sélectionné')}
|
||||
</Typography>
|
||||
{currentFolderHash && (
|
||||
<Tooltip title="Copier le hash du dossier">
|
||||
@ -428,7 +425,7 @@ export default function UploadView() {
|
||||
)}
|
||||
|
||||
{/* Liste des documents */}
|
||||
{memoizedDocuments.length > 0 && (
|
||||
{(memoizedDocuments.length > 0 || hasPending) && (
|
||||
<Box sx={{ mt: 3 }}>
|
||||
<Typography variant="h6" gutterBottom>
|
||||
Documents analysés ({memoizedDocuments.length})
|
||||
@ -446,6 +443,37 @@ export default function UploadView() {
|
||||
totalCount={memoizedDocuments.length}
|
||||
/>
|
||||
))}
|
||||
{hasPending && (
|
||||
<>
|
||||
{(pendingFiles.length > 0 ? pendingFiles : new Array(2).fill(null)).map((p, i) => (
|
||||
<div key={`sk-${i}`}>
|
||||
<ListItem>
|
||||
<ListItemIcon>
|
||||
<Skeleton variant="circular" width={24} height={24} />
|
||||
</ListItemIcon>
|
||||
<ListItemText
|
||||
primary={
|
||||
<Box>
|
||||
<Box display="flex" alignItems="center" gap={1} mb={1}>
|
||||
<Skeleton variant="rounded" width={180} height={18} />
|
||||
</Box>
|
||||
<Box display="flex" gap={1}>
|
||||
<Skeleton variant="rounded" width={60} height={24} />
|
||||
<Skeleton variant="rounded" width={100} height={24} />
|
||||
</Box>
|
||||
</Box>
|
||||
}
|
||||
/>
|
||||
<Box display="flex" gap={1} flexDirection={{ xs: 'column', sm: 'row' }}>
|
||||
<Skeleton variant="rounded" width={84} height={32} />
|
||||
<Skeleton variant="rounded" width={84} height={32} />
|
||||
</Box>
|
||||
</ListItem>
|
||||
<Divider />
|
||||
</div>
|
||||
))}
|
||||
</>
|
||||
)}
|
||||
</List>
|
||||
</Card>
|
||||
</Box>
|
||||
|
||||
5
tests/pm2_watch.md
Normal file
5
tests/pm2_watch.md
Normal file
@ -0,0 +1,5 @@
|
||||
## Test PM2
|
||||
|
||||
1. pm2 start ecosystem.config.cjs
|
||||
2. touch backend/server.js (vérifier restart)
|
||||
3. vérifier logs
|
||||
20
tests/traitement_images_check.md
Normal file
20
tests/traitement_images_check.md
Normal file
@ -0,0 +1,20 @@
|
||||
## Test de vérification du traitement d'images
|
||||
|
||||
### Préconditions
|
||||
- Backend démarré ().
|
||||
- Dossier cible: .
|
||||
|
||||
### Étapes
|
||||
1. Appeler .
|
||||
2. Vérifier que est booléen.
|
||||
3. Si est , vérifier que contient les hashes et que est récent (< 10 min).
|
||||
4. Surveiller et constater l’apparition de à la fin d’un traitement.
|
||||
5. Confirmer que correspond aux fichiers traités.
|
||||
|
||||
### Critères de succès
|
||||
- Au moins un généré dans pour chaque fichier terminé.
|
||||
- Diminution progressive de jusqu’à .
|
||||
|
||||
### Diagnostic en cas d’échec
|
||||
- Si aucun JSON n’apparaît > 10 min: redémarrer backend (voir doc ).
|
||||
- Si erreurs répétées dans : corriger la cause (type MIME, permissions, etc.).
|
||||
Loading…
x
Reference in New Issue
Block a user