const fs = require('fs') const path = require('path') function loadCsvNames(filePath) { try { const raw = fs.readFileSync(filePath, 'utf8') const lines = raw.split(/\r?\n/).map((l) => l.trim()).filter(Boolean) const names = [] for (const line of lines) { const parts = line.split(/[;,\t]/).map((p) => p.trim()).filter(Boolean) for (const p of parts) { if (/^[A-Za-zÀ-ÖØ-öø-ÿ'\-\s]{2,}$/.test(p)) names.push(p) } } return names } catch { return [] } } function buildNameSets() { const baseDir = path.join(__dirname, 'data', 'names') const firstNames = new Set() const lastNames = new Set() try { if (!fs.existsSync(baseDir)) return { firstNames, lastNames } // Prioriser les fichiers unifiés légers (références finales) const preferredOrder = [ 'firstnames_all.csv', 'lastnames_all.csv', ] const files = fs .readdirSync(baseDir) .sort((a, b) => preferredOrder.indexOf(a) - preferredOrder.indexOf(b)) for (const f of files) { const fp = path.join(baseDir, f) if (!fs.statSync(fp).isFile()) continue // N'utiliser que les deux références finales si présentes const isFirst = /^(firstnames_all\.|first|prenom|given)/i.test(f) const isLast = /^(lastnames_all\.|last|nom|surname|family)/i.test(f) if (!isFirst && !isLast) continue const list = loadCsvNames(fp) for (const n of list) { const norm = n.normalize('NFD').replace(/[\u0300-\u036f]/g, '').toLowerCase() if (isFirst) firstNames.add(norm) if (isLast) lastNames.add(norm) } } } catch {} return { firstNames, lastNames } } let cache = null function getNameDirectory() { if (!cache) cache = buildNameSets() return cache } function nameConfidenceBoost(firstName, lastName) { try { const dir = getNameDirectory() const f = (firstName || '').normalize('NFD').replace(/[\u0300-\u036f]/g, '').toLowerCase() const l = (lastName || '').normalize('NFD').replace(/[\u0300-\u036f]/g, '').toLowerCase() let boost = 0 if (f && dir.firstNames.has(f)) boost += 0.05 if (l && dir.lastNames.has(l)) boost += 0.05 return boost } catch { return 0 } } module.exports = { getNameDirectory, nameConfidenceBoost }