74 lines
2.2 KiB
JavaScript
74 lines
2.2 KiB
JavaScript
const fs = require('fs')
|
|
const path = require('path')
|
|
|
|
function loadCsvNames(filePath) {
|
|
try {
|
|
const raw = fs.readFileSync(filePath, 'utf8')
|
|
const lines = raw.split(/\r?\n/).map((l) => l.trim()).filter(Boolean)
|
|
const names = []
|
|
for (const line of lines) {
|
|
const parts = line.split(/[;,\t]/).map((p) => p.trim()).filter(Boolean)
|
|
for (const p of parts) {
|
|
if (/^[A-Za-zÀ-ÖØ-öø-ÿ'\-\s]{2,}$/.test(p)) names.push(p)
|
|
}
|
|
}
|
|
return names
|
|
} catch {
|
|
return []
|
|
}
|
|
}
|
|
|
|
function buildNameSets() {
|
|
const baseDir = path.join(__dirname, 'data', 'names')
|
|
const firstNames = new Set()
|
|
const lastNames = new Set()
|
|
try {
|
|
if (!fs.existsSync(baseDir)) return { firstNames, lastNames }
|
|
// Prioriser les fichiers unifiés légers (références finales)
|
|
const preferredOrder = [
|
|
'firstnames_all.csv',
|
|
'lastnames_all.csv',
|
|
]
|
|
const files = fs
|
|
.readdirSync(baseDir)
|
|
.sort((a, b) => preferredOrder.indexOf(a) - preferredOrder.indexOf(b))
|
|
for (const f of files) {
|
|
const fp = path.join(baseDir, f)
|
|
if (!fs.statSync(fp).isFile()) continue
|
|
// N'utiliser que les deux références finales si présentes
|
|
const isFirst = /^(firstnames_all\.|first|prenom|given)/i.test(f)
|
|
const isLast = /^(lastnames_all\.|last|nom|surname|family)/i.test(f)
|
|
if (!isFirst && !isLast) continue
|
|
const list = loadCsvNames(fp)
|
|
for (const n of list) {
|
|
const norm = n.normalize('NFD').replace(/[\u0300-\u036f]/g, '').toLowerCase()
|
|
if (isFirst) firstNames.add(norm)
|
|
if (isLast) lastNames.add(norm)
|
|
}
|
|
}
|
|
} catch {}
|
|
return { firstNames, lastNames }
|
|
}
|
|
|
|
let cache = null
|
|
function getNameDirectory() {
|
|
if (!cache) cache = buildNameSets()
|
|
return cache
|
|
}
|
|
|
|
function nameConfidenceBoost(firstName, lastName) {
|
|
try {
|
|
const dir = getNameDirectory()
|
|
const f = (firstName || '').normalize('NFD').replace(/[\u0300-\u036f]/g, '').toLowerCase()
|
|
const l = (lastName || '').normalize('NFD').replace(/[\u0300-\u036f]/g, '').toLowerCase()
|
|
let boost = 0
|
|
if (f && dir.firstNames.has(f)) boost += 0.05
|
|
if (l && dir.lastNames.has(l)) boost += 0.05
|
|
return boost
|
|
} catch {
|
|
return 0
|
|
}
|
|
}
|
|
|
|
module.exports = { getNameDirectory, nameConfidenceBoost }
|