129 lines
4.3 KiB
Python
129 lines
4.3 KiB
Python
#!/usr/bin/env python3
|
|
# -*- coding: utf-8 -*-
|
|
"""
|
|
Temporary script to translate seed files from French to English
|
|
"""
|
|
|
|
import json
|
|
import re
|
|
|
|
# Translation dictionary for common terms
|
|
translations = {
|
|
"lisier": "slurry",
|
|
"vache": "cow",
|
|
"porc": "pig",
|
|
"porcelet": "piglet",
|
|
"poules pondeuses": "laying hens",
|
|
"poulets de chair": "broiler chickens",
|
|
"huiles": "oils",
|
|
"restaurant à fort volume": "high volume restaurant",
|
|
"cantine scolaire": "school canteen",
|
|
"ménages": "households",
|
|
"usine": "plant",
|
|
"usine de margarine": "margarine plant",
|
|
"déchets verts": "green waste",
|
|
"feuilles": "leaves",
|
|
"tiges tendres": "tender stems",
|
|
"hectare de production d'herbes": "hectare of herb production",
|
|
"arbre urbain": "urban tree",
|
|
"branches": "branches",
|
|
"arbre élagué": "pruned tree",
|
|
"hectare d'oliviers": "hectare of olive trees",
|
|
"grignons d'olive": "olive pomace",
|
|
"margines": "olive mill wastewater",
|
|
"boues urbaines": "urban sludge",
|
|
"station d'épuration urbaine": "urban wastewater treatment plant",
|
|
"flottats graisseux urbains": "urban fatty floatables",
|
|
"graisses de station d'épuration": "wastewater treatment plant fats",
|
|
"sargasses": "sargassum",
|
|
"laminaires": "laminaria",
|
|
"gracilaria": "gracilaria",
|
|
"ulves": "ulva",
|
|
"bananes mûres": "ripe bananas",
|
|
"petit marché local": "small local market",
|
|
"mangues mûres invendues": "unsold ripe mangoes",
|
|
"usine agroalimentaire": "food processing plant",
|
|
"légumes frais humides": "fresh wet vegetables",
|
|
"marché de gros": "wholesale market",
|
|
"résidus de légumes transformés": "processed vegetable residues",
|
|
"site industriel agroalimentaire": "food industrial site",
|
|
"restes de repas cuisinés": "cooked food leftovers",
|
|
"viande avariée": "spoiled meat",
|
|
"abattoir": "slaughterhouse",
|
|
"lait": "milk",
|
|
"yaourt": "yogurt",
|
|
"crème périmés": "expired cream",
|
|
"usine de produits laitiers": "dairy plant",
|
|
"pain rassis": "stale bread",
|
|
"boulangerie": "bakery",
|
|
"poissons avariés": "spoiled fish",
|
|
"marché aux poissons": "fish market",
|
|
"biodéchets mixtes": "mixed biowaste",
|
|
"déchets alimentaires ménagers": "household food waste",
|
|
}
|
|
|
|
def translate_text(text):
|
|
"""Simple translation function"""
|
|
if not text:
|
|
return text
|
|
|
|
# Replace common terms
|
|
result = text
|
|
for fr, en in translations.items():
|
|
result = result.replace(fr, en)
|
|
|
|
# Translate pH notes
|
|
result = re.sub(r'pH:\s*([0-9,\.]+)', r'pH: \1', result)
|
|
result = result.replace('à', 'to')
|
|
result = result.replace('selon', 'depending on')
|
|
|
|
return result
|
|
|
|
def translate_wastes():
|
|
"""Translate wastes-seeds.json"""
|
|
with open('data/seeds/wastes-seeds.json', 'r', encoding='utf-8') as f:
|
|
data = json.load(f)
|
|
|
|
for waste in data['wastes']:
|
|
if 'name' in waste:
|
|
waste['name'] = translate_text(waste['name'])
|
|
if 'originSubType' in waste:
|
|
waste['originSubType'] = translate_text(waste['originSubType'])
|
|
if 'notes' in waste:
|
|
waste['notes'] = translate_text(waste['notes'])
|
|
|
|
with open('data/seeds/wastes-seeds.json', 'w', encoding='utf-8') as f:
|
|
json.dump(data, f, indent=2, ensure_ascii=False)
|
|
|
|
def translate_regulators():
|
|
"""Translate and clean regulators-seeds.json"""
|
|
with open('data/seeds/regulators-seeds.json', 'r', encoding='utf-8') as f:
|
|
data = json.load(f)
|
|
|
|
# Filter out invalid entries
|
|
valid_regulators = []
|
|
for reg in data['regulators']:
|
|
# Skip entries with type "unknown" or invalid names
|
|
if reg.get('type') == 'unknown':
|
|
continue
|
|
if reg.get('name') in ['Logique de symbiose :', 'Effets requis :', 'Cohabitation recommandée :']:
|
|
continue
|
|
|
|
# Translate
|
|
if 'name' in reg:
|
|
reg['name'] = translate_text(reg['name'])
|
|
if 'applicationConditions' in reg:
|
|
reg['applicationConditions'] = translate_text(reg['applicationConditions'])
|
|
|
|
valid_regulators.append(reg)
|
|
|
|
data['regulators'] = valid_regulators
|
|
|
|
with open('data/seeds/regulators-seeds.json', 'w', encoding='utf-8') as f:
|
|
json.dump(data, f, indent=2, ensure_ascii=False)
|
|
|
|
if __name__ == '__main__':
|
|
translate_wastes()
|
|
translate_regulators()
|
|
print("Translation complete!")
|