DeepL quality_optimized translations covering 2,897 keys across all pages including researcher, leader, implementer, architecture, home-ai, values, koha, faq, gdpr, privacy, and all remaining locale files. Completes MI from 21% to 100% coverage. Also adds reusable translation script. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
260 lines
8.5 KiB
JavaScript
260 lines
8.5 KiB
JavaScript
#!/usr/bin/env node
|
|
/**
|
|
* Batch translate EN locale JSON files to Te Reo Māori (MI) using DeepL API.
|
|
* Preserves JSON structure, translates all leaf string values.
|
|
*
|
|
* Usage: node scripts/translate-to-mi.js [filename.json] [--all]
|
|
*
|
|
* Uses DeepL Pro API with quality_optimized model (required for MI).
|
|
* Batches requests to stay within API limits.
|
|
*/
|
|
|
|
const fs = require('fs');
|
|
const path = require('path');
|
|
const https = require('https');
|
|
|
|
const DEEPL_API_KEY = process.env.DEEPL_API_KEY || 'c23731a2-33e8-4996-81f1-324dbb677df6';
|
|
const DEEPL_API_URL = 'https://api.deepl.com/v2/translate';
|
|
const EN_DIR = path.join(__dirname, '..', 'public', 'locales', 'en');
|
|
const MI_DIR = path.join(__dirname, '..', 'public', 'locales', 'mi');
|
|
const BATCH_SIZE = 50; // DeepL allows up to 50 texts per request
|
|
const DELAY_MS = 200; // Rate limiting between batches
|
|
|
|
// Files to skip (already complete or special)
|
|
const SKIP_FILES = ['faq.json.backup-1761644843'];
|
|
|
|
/**
|
|
* Extract all leaf string values from nested JSON, preserving paths
|
|
*/
|
|
function extractLeafStrings(obj, prefix = '') {
|
|
const results = [];
|
|
if (typeof obj === 'string') {
|
|
results.push({ path: prefix, value: obj });
|
|
} else if (Array.isArray(obj)) {
|
|
obj.forEach((item, i) => {
|
|
results.push(...extractLeafStrings(item, `${prefix}[${i}]`));
|
|
});
|
|
} else if (obj && typeof obj === 'object') {
|
|
for (const [key, val] of Object.entries(obj)) {
|
|
const newPath = prefix ? `${prefix}.${key}` : key;
|
|
results.push(...extractLeafStrings(val, newPath));
|
|
}
|
|
}
|
|
return results;
|
|
}
|
|
|
|
/**
|
|
* Set a value at a dot/bracket path in a nested object
|
|
*/
|
|
function setNestedValue(obj, pathStr, value) {
|
|
const parts = pathStr.split(/\.(?![^\[]*\])/).flatMap(part => {
|
|
const matches = [];
|
|
let remaining = part;
|
|
while (remaining) {
|
|
const bracketMatch = remaining.match(/^([^\[]*)\[(\d+)\](.*)/);
|
|
if (bracketMatch) {
|
|
if (bracketMatch[1]) matches.push(bracketMatch[1]);
|
|
matches.push(parseInt(bracketMatch[2]));
|
|
remaining = bracketMatch[3];
|
|
if (remaining.startsWith('.')) remaining = remaining.slice(1);
|
|
} else {
|
|
matches.push(remaining);
|
|
remaining = '';
|
|
}
|
|
}
|
|
return matches;
|
|
});
|
|
|
|
let current = obj;
|
|
for (let i = 0; i < parts.length - 1; i++) {
|
|
const part = parts[i];
|
|
const nextPart = parts[i + 1];
|
|
if (current[part] === undefined) {
|
|
current[part] = typeof nextPart === 'number' ? [] : {};
|
|
}
|
|
current = current[part];
|
|
}
|
|
current[parts[parts.length - 1]] = value;
|
|
}
|
|
|
|
/**
|
|
* Translate a batch of texts using DeepL API
|
|
*/
|
|
function translateBatch(texts) {
|
|
return new Promise((resolve, reject) => {
|
|
const params = new URLSearchParams();
|
|
texts.forEach(t => params.append('text', t));
|
|
params.append('source_lang', 'EN');
|
|
params.append('target_lang', 'MI');
|
|
|
|
const postData = params.toString();
|
|
const url = new URL(DEEPL_API_URL);
|
|
|
|
const options = {
|
|
hostname: url.hostname,
|
|
port: 443,
|
|
path: url.pathname,
|
|
method: 'POST',
|
|
headers: {
|
|
'Authorization': `DeepL-Auth-Key ${DEEPL_API_KEY}`,
|
|
'Content-Type': 'application/x-www-form-urlencoded',
|
|
'Content-Length': Buffer.byteLength(postData)
|
|
}
|
|
};
|
|
|
|
const req = https.request(options, (res) => {
|
|
let data = '';
|
|
res.on('data', chunk => data += chunk);
|
|
res.on('end', () => {
|
|
if (res.statusCode !== 200) {
|
|
reject(new Error(`DeepL API error ${res.statusCode}: ${data}`));
|
|
return;
|
|
}
|
|
try {
|
|
const result = JSON.parse(data);
|
|
resolve(result.translations.map(t => t.text));
|
|
} catch (e) {
|
|
reject(new Error(`Parse error: ${e.message}`));
|
|
}
|
|
});
|
|
});
|
|
|
|
req.on('error', reject);
|
|
req.write(postData);
|
|
req.end();
|
|
});
|
|
}
|
|
|
|
function sleep(ms) {
|
|
return new Promise(resolve => setTimeout(resolve, ms));
|
|
}
|
|
|
|
/**
|
|
* Translate an entire JSON file from EN to MI
|
|
*/
|
|
async function translateFile(filename) {
|
|
const enPath = path.join(EN_DIR, filename);
|
|
const miPath = path.join(MI_DIR, filename);
|
|
|
|
if (!fs.existsSync(enPath)) {
|
|
console.error(` ✗ EN file not found: ${filename}`);
|
|
return false;
|
|
}
|
|
|
|
// Load existing MI file if it exists (to preserve existing translations)
|
|
let existingMi = {};
|
|
if (fs.existsSync(miPath)) {
|
|
try {
|
|
existingMi = JSON.parse(fs.readFileSync(miPath, 'utf8'));
|
|
} catch (e) {
|
|
console.warn(` ⚠ Could not parse existing MI file, will overwrite`);
|
|
}
|
|
}
|
|
|
|
const enData = JSON.parse(fs.readFileSync(enPath, 'utf8'));
|
|
const leaves = extractLeafStrings(enData);
|
|
const existingLeaves = extractLeafStrings(existingMi);
|
|
const existingMap = new Map(existingLeaves.map(l => [l.path, l.value]));
|
|
|
|
// Identify which leaves need translation (not already in MI)
|
|
const needsTranslation = leaves.filter(l => !existingMap.has(l.path));
|
|
const alreadyTranslated = leaves.filter(l => existingMap.has(l.path));
|
|
|
|
console.log(` ${leaves.length} total keys, ${alreadyTranslated.length} already translated, ${needsTranslation.length} to translate`);
|
|
|
|
if (needsTranslation.length === 0) {
|
|
console.log(` ✓ Already fully translated`);
|
|
return true;
|
|
}
|
|
|
|
// Build output object starting with EN structure, overlaying existing MI translations
|
|
const output = JSON.parse(JSON.stringify(enData)); // Deep clone EN structure
|
|
|
|
// Apply existing MI translations
|
|
for (const leaf of alreadyTranslated) {
|
|
setNestedValue(output, leaf.path, existingMap.get(leaf.path));
|
|
}
|
|
|
|
// Translate in batches
|
|
const textsToTranslate = needsTranslation.map(l => l.value);
|
|
const translatedTexts = [];
|
|
|
|
for (let i = 0; i < textsToTranslate.length; i += BATCH_SIZE) {
|
|
const batch = textsToTranslate.slice(i, i + BATCH_SIZE);
|
|
const batchNum = Math.floor(i / BATCH_SIZE) + 1;
|
|
const totalBatches = Math.ceil(textsToTranslate.length / BATCH_SIZE);
|
|
process.stdout.write(` Translating batch ${batchNum}/${totalBatches} (${batch.length} texts)...`);
|
|
|
|
try {
|
|
const results = await translateBatch(batch);
|
|
translatedTexts.push(...results);
|
|
console.log(' ✓');
|
|
} catch (e) {
|
|
console.log(` ✗ ${e.message}`);
|
|
return false;
|
|
}
|
|
|
|
if (i + BATCH_SIZE < textsToTranslate.length) {
|
|
await sleep(DELAY_MS);
|
|
}
|
|
}
|
|
|
|
// Apply translations to output
|
|
for (let i = 0; i < needsTranslation.length; i++) {
|
|
setNestedValue(output, needsTranslation[i].path, translatedTexts[i]);
|
|
}
|
|
|
|
// Write output
|
|
fs.writeFileSync(miPath, JSON.stringify(output, null, 2) + '\n', 'utf8');
|
|
console.log(` ✓ Written: ${miPath}`);
|
|
return true;
|
|
}
|
|
|
|
async function main() {
|
|
const args = process.argv.slice(2);
|
|
|
|
// Ensure MI directory exists
|
|
if (!fs.existsSync(MI_DIR)) {
|
|
fs.mkdirSync(MI_DIR, { recursive: true });
|
|
}
|
|
|
|
let files;
|
|
if (args.includes('--all')) {
|
|
files = fs.readdirSync(EN_DIR)
|
|
.filter(f => f.endsWith('.json') && !SKIP_FILES.includes(f))
|
|
.sort();
|
|
} else if (args.length > 0 && !args[0].startsWith('--')) {
|
|
files = args.filter(f => !f.startsWith('--'));
|
|
} else {
|
|
console.log('Usage: node scripts/translate-to-mi.js [filename.json ...] [--all]');
|
|
console.log(' --all Translate all EN files');
|
|
process.exit(1);
|
|
}
|
|
|
|
console.log(`\n═══════════════════════════════════════════════════════════`);
|
|
console.log(` DEEPL TRANSLATION: EN → MI (Te Reo Māori)`);
|
|
console.log(`═══════════════════════════════════════════════════════════\n`);
|
|
console.log(`Files to translate: ${files.length}\n`);
|
|
|
|
let success = 0;
|
|
let failed = 0;
|
|
|
|
for (const file of files) {
|
|
console.log(`\n📄 ${file}`);
|
|
const result = await translateFile(file);
|
|
if (result) success++;
|
|
else failed++;
|
|
}
|
|
|
|
console.log(`\n═══════════════════════════════════════════════════════════`);
|
|
console.log(` TRANSLATION COMPLETE`);
|
|
console.log(`═══════════════════════════════════════════════════════════`);
|
|
console.log(` ✓ Success: ${success}`);
|
|
if (failed > 0) console.log(` ✗ Failed: ${failed}`);
|
|
console.log(` Total: ${files.length}\n`);
|
|
}
|
|
|
|
main().catch(e => {
|
|
console.error('Fatal error:', e);
|
|
process.exit(1);
|
|
});
|