tractatus/scripts/translate-to-mi.js
TheFlow 28cb1139a9 feat: Add complete Te Reo Māori (MI) translations for all 19 locale files
DeepL quality_optimized translations covering 2,897 keys across all pages
including researcher, leader, implementer, architecture, home-ai, values,
koha, faq, gdpr, privacy, and all remaining locale files. Completes MI
from 21% to 100% coverage. Also adds reusable translation script.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-11 08:23:58 +13:00

260 lines
8.5 KiB
JavaScript

#!/usr/bin/env node
/**
* Batch translate EN locale JSON files to Te Reo Māori (MI) using DeepL API.
* Preserves JSON structure, translates all leaf string values.
*
* Usage: node scripts/translate-to-mi.js [filename.json] [--all]
*
* Uses DeepL Pro API with quality_optimized model (required for MI).
* Batches requests to stay within API limits.
*/
const fs = require('fs');
const path = require('path');
const https = require('https');
const DEEPL_API_KEY = process.env.DEEPL_API_KEY || 'c23731a2-33e8-4996-81f1-324dbb677df6';
const DEEPL_API_URL = 'https://api.deepl.com/v2/translate';
const EN_DIR = path.join(__dirname, '..', 'public', 'locales', 'en');
const MI_DIR = path.join(__dirname, '..', 'public', 'locales', 'mi');
const BATCH_SIZE = 50; // DeepL allows up to 50 texts per request
const DELAY_MS = 200; // Rate limiting between batches
// Files to skip (already complete or special)
const SKIP_FILES = ['faq.json.backup-1761644843'];
/**
* Extract all leaf string values from nested JSON, preserving paths
*/
function extractLeafStrings(obj, prefix = '') {
const results = [];
if (typeof obj === 'string') {
results.push({ path: prefix, value: obj });
} else if (Array.isArray(obj)) {
obj.forEach((item, i) => {
results.push(...extractLeafStrings(item, `${prefix}[${i}]`));
});
} else if (obj && typeof obj === 'object') {
for (const [key, val] of Object.entries(obj)) {
const newPath = prefix ? `${prefix}.${key}` : key;
results.push(...extractLeafStrings(val, newPath));
}
}
return results;
}
/**
* Set a value at a dot/bracket path in a nested object
*/
function setNestedValue(obj, pathStr, value) {
const parts = pathStr.split(/\.(?![^\[]*\])/).flatMap(part => {
const matches = [];
let remaining = part;
while (remaining) {
const bracketMatch = remaining.match(/^([^\[]*)\[(\d+)\](.*)/);
if (bracketMatch) {
if (bracketMatch[1]) matches.push(bracketMatch[1]);
matches.push(parseInt(bracketMatch[2]));
remaining = bracketMatch[3];
if (remaining.startsWith('.')) remaining = remaining.slice(1);
} else {
matches.push(remaining);
remaining = '';
}
}
return matches;
});
let current = obj;
for (let i = 0; i < parts.length - 1; i++) {
const part = parts[i];
const nextPart = parts[i + 1];
if (current[part] === undefined) {
current[part] = typeof nextPart === 'number' ? [] : {};
}
current = current[part];
}
current[parts[parts.length - 1]] = value;
}
/**
* Translate a batch of texts using DeepL API
*/
function translateBatch(texts) {
return new Promise((resolve, reject) => {
const params = new URLSearchParams();
texts.forEach(t => params.append('text', t));
params.append('source_lang', 'EN');
params.append('target_lang', 'MI');
const postData = params.toString();
const url = new URL(DEEPL_API_URL);
const options = {
hostname: url.hostname,
port: 443,
path: url.pathname,
method: 'POST',
headers: {
'Authorization': `DeepL-Auth-Key ${DEEPL_API_KEY}`,
'Content-Type': 'application/x-www-form-urlencoded',
'Content-Length': Buffer.byteLength(postData)
}
};
const req = https.request(options, (res) => {
let data = '';
res.on('data', chunk => data += chunk);
res.on('end', () => {
if (res.statusCode !== 200) {
reject(new Error(`DeepL API error ${res.statusCode}: ${data}`));
return;
}
try {
const result = JSON.parse(data);
resolve(result.translations.map(t => t.text));
} catch (e) {
reject(new Error(`Parse error: ${e.message}`));
}
});
});
req.on('error', reject);
req.write(postData);
req.end();
});
}
function sleep(ms) {
return new Promise(resolve => setTimeout(resolve, ms));
}
/**
* Translate an entire JSON file from EN to MI
*/
async function translateFile(filename) {
const enPath = path.join(EN_DIR, filename);
const miPath = path.join(MI_DIR, filename);
if (!fs.existsSync(enPath)) {
console.error(` ✗ EN file not found: ${filename}`);
return false;
}
// Load existing MI file if it exists (to preserve existing translations)
let existingMi = {};
if (fs.existsSync(miPath)) {
try {
existingMi = JSON.parse(fs.readFileSync(miPath, 'utf8'));
} catch (e) {
console.warn(` ⚠ Could not parse existing MI file, will overwrite`);
}
}
const enData = JSON.parse(fs.readFileSync(enPath, 'utf8'));
const leaves = extractLeafStrings(enData);
const existingLeaves = extractLeafStrings(existingMi);
const existingMap = new Map(existingLeaves.map(l => [l.path, l.value]));
// Identify which leaves need translation (not already in MI)
const needsTranslation = leaves.filter(l => !existingMap.has(l.path));
const alreadyTranslated = leaves.filter(l => existingMap.has(l.path));
console.log(` ${leaves.length} total keys, ${alreadyTranslated.length} already translated, ${needsTranslation.length} to translate`);
if (needsTranslation.length === 0) {
console.log(` ✓ Already fully translated`);
return true;
}
// Build output object starting with EN structure, overlaying existing MI translations
const output = JSON.parse(JSON.stringify(enData)); // Deep clone EN structure
// Apply existing MI translations
for (const leaf of alreadyTranslated) {
setNestedValue(output, leaf.path, existingMap.get(leaf.path));
}
// Translate in batches
const textsToTranslate = needsTranslation.map(l => l.value);
const translatedTexts = [];
for (let i = 0; i < textsToTranslate.length; i += BATCH_SIZE) {
const batch = textsToTranslate.slice(i, i + BATCH_SIZE);
const batchNum = Math.floor(i / BATCH_SIZE) + 1;
const totalBatches = Math.ceil(textsToTranslate.length / BATCH_SIZE);
process.stdout.write(` Translating batch ${batchNum}/${totalBatches} (${batch.length} texts)...`);
try {
const results = await translateBatch(batch);
translatedTexts.push(...results);
console.log(' ✓');
} catch (e) {
console.log(`${e.message}`);
return false;
}
if (i + BATCH_SIZE < textsToTranslate.length) {
await sleep(DELAY_MS);
}
}
// Apply translations to output
for (let i = 0; i < needsTranslation.length; i++) {
setNestedValue(output, needsTranslation[i].path, translatedTexts[i]);
}
// Write output
fs.writeFileSync(miPath, JSON.stringify(output, null, 2) + '\n', 'utf8');
console.log(` ✓ Written: ${miPath}`);
return true;
}
async function main() {
const args = process.argv.slice(2);
// Ensure MI directory exists
if (!fs.existsSync(MI_DIR)) {
fs.mkdirSync(MI_DIR, { recursive: true });
}
let files;
if (args.includes('--all')) {
files = fs.readdirSync(EN_DIR)
.filter(f => f.endsWith('.json') && !SKIP_FILES.includes(f))
.sort();
} else if (args.length > 0 && !args[0].startsWith('--')) {
files = args.filter(f => !f.startsWith('--'));
} else {
console.log('Usage: node scripts/translate-to-mi.js [filename.json ...] [--all]');
console.log(' --all Translate all EN files');
process.exit(1);
}
console.log(`\n═══════════════════════════════════════════════════════════`);
console.log(` DEEPL TRANSLATION: EN → MI (Te Reo Māori)`);
console.log(`═══════════════════════════════════════════════════════════\n`);
console.log(`Files to translate: ${files.length}\n`);
let success = 0;
let failed = 0;
for (const file of files) {
console.log(`\n📄 ${file}`);
const result = await translateFile(file);
if (result) success++;
else failed++;
}
console.log(`\n═══════════════════════════════════════════════════════════`);
console.log(` TRANSLATION COMPLETE`);
console.log(`═══════════════════════════════════════════════════════════`);
console.log(` ✓ Success: ${success}`);
if (failed > 0) console.log(` ✗ Failed: ${failed}`);
console.log(` Total: ${files.length}\n`);
}
main().catch(e => {
console.error('Fatal error:', e);
process.exit(1);
});