- Déduplication déterministe des identités, adresses, dates, entreprises, signatures et références - Implémentation dans src/services/ruleNer.ts et src/services/backendApi.ts - Clés de normalisation: prénom+nom, rue+CP+ville, nom+SIRET, type+valeur - Test ciblé tests/deduplication.test.ts pour valider la fonctionnalité - Documentation complète dans docs/deduplication_entites.md - Correction des tests existants (supertest, extractEntitiesFromText) - Compilation validée et services opérationnels
99 lines
3.5 KiB
JavaScript
99 lines
3.5 KiB
JavaScript
/**
|
|
* Tests OCR et extraction de texte
|
|
*/
|
|
|
|
import { describe, it, expect, beforeEach } from 'vitest'
|
|
|
|
let extractTextFromImageEnhanced, extractEntitiesFromText
|
|
beforeEach(async () => {
|
|
const enhancedOcrModule = await import('../backend/enhancedOcr.js')
|
|
const serverModule = await import('../backend/server.js')
|
|
extractTextFromImageEnhanced = enhancedOcrModule.extractTextFromImageEnhanced
|
|
// La fonction extractEntitiesFromText n'est pas exportée, on utilise runRuleNER à la place
|
|
const { runRuleNER } = await import('../src/services/ruleNer.ts')
|
|
extractEntitiesFromText = (text) => {
|
|
const result = runRuleNER('test-doc', text)
|
|
return {
|
|
persons: result.identities,
|
|
addresses: result.addresses,
|
|
companies: result.companies || []
|
|
}
|
|
}
|
|
})
|
|
|
|
describe('OCR et extraction de texte', () => {
|
|
describe('Extraction de texte améliorée', () => {
|
|
it('devrait extraire du texte d\'une image', async () => {
|
|
// Test avec une image de test (à créer)
|
|
const testImagePath = 'tests/fixtures/test-image.jpg'
|
|
|
|
try {
|
|
const result = await extractTextFromImageEnhanced(testImagePath)
|
|
|
|
expect(result).toHaveProperty('text')
|
|
expect(result).toHaveProperty('confidence')
|
|
expect(result).toHaveProperty('method')
|
|
|
|
expect(typeof result.text).toBe('string')
|
|
expect(typeof result.confidence).toBe('number')
|
|
expect(result.confidence).toBeGreaterThanOrEqual(0)
|
|
expect(result.confidence).toBeLessThanOrEqual(100)
|
|
} catch (error) {
|
|
// Si l'image de test n'existe pas, on skip le test
|
|
console.warn('Image de test non trouvée, test ignoré')
|
|
}
|
|
})
|
|
|
|
it('devrait gérer les erreurs d\'OCR', async () => {
|
|
const result = await extractTextFromImageEnhanced('fichier-inexistant.jpg')
|
|
|
|
expect(result).toHaveProperty('text', '')
|
|
expect(result).toHaveProperty('confidence', 0)
|
|
})
|
|
})
|
|
|
|
describe('Extraction d\'entités', () => {
|
|
it('devrait extraire des personnes d\'un texte', () => {
|
|
const text = 'Monsieur Jean DUPONT habite à Paris. Madame Marie MARTIN est directrice.'
|
|
|
|
const entities = extractEntitiesFromText(text)
|
|
|
|
expect(entities).toHaveProperty('persons')
|
|
expect(Array.isArray(entities.persons)).toBe(true)
|
|
expect(entities.persons.length).toBeGreaterThan(0)
|
|
|
|
const firstPerson = entities.persons[0]
|
|
expect(firstPerson).toHaveProperty('firstName')
|
|
expect(firstPerson).toHaveProperty('lastName')
|
|
})
|
|
|
|
it('devrait extraire des adresses d\'un texte', () => {
|
|
const text = 'Adresse: 1 rue de la Paix, 75001 Paris, France'
|
|
|
|
const entities = extractEntitiesFromText(text)
|
|
|
|
expect(entities).toHaveProperty('addresses')
|
|
expect(Array.isArray(entities.addresses)).toBe(true)
|
|
expect(entities.addresses.length).toBeGreaterThan(0)
|
|
|
|
const firstAddress = entities.addresses[0]
|
|
expect(firstAddress).toHaveProperty('street')
|
|
expect(firstAddress).toHaveProperty('city')
|
|
expect(firstAddress).toHaveProperty('postalCode')
|
|
})
|
|
|
|
it('devrait extraire des entreprises d\'un texte', () => {
|
|
const text = 'La société MICROSOFT FRANCE est située à Paris.'
|
|
|
|
const entities = extractEntitiesFromText(text)
|
|
|
|
expect(entities).toHaveProperty('companies')
|
|
expect(Array.isArray(entities.companies)).toBe(true)
|
|
expect(entities.companies.length).toBeGreaterThan(0)
|
|
|
|
const firstCompany = entities.companies[0]
|
|
expect(firstCompany).toHaveProperty('name')
|
|
})
|
|
})
|
|
})
|