#!/usr/bin/env node /** * Batch translate EN locale JSON files to Te Reo Māori (MI) using DeepL API. * Preserves JSON structure, translates all leaf string values. * * Usage: node scripts/translate-to-mi.js [filename.json] [--all] * * Uses DeepL Pro API with quality_optimized model (required for MI). * Batches requests to stay within API limits. */ const fs = require('fs'); const path = require('path'); const https = require('https'); const DEEPL_API_KEY = process.env.DEEPL_API_KEY || 'c23731a2-33e8-4996-81f1-324dbb677df6'; const DEEPL_API_URL = 'https://api.deepl.com/v2/translate'; const EN_DIR = path.join(__dirname, '..', 'public', 'locales', 'en'); const MI_DIR = path.join(__dirname, '..', 'public', 'locales', 'mi'); const BATCH_SIZE = 50; // DeepL allows up to 50 texts per request const DELAY_MS = 200; // Rate limiting between batches // Files to skip (already complete or special) const SKIP_FILES = ['faq.json.backup-1761644843']; /** * Extract all leaf string values from nested JSON, preserving paths */ function extractLeafStrings(obj, prefix = '') { const results = []; if (typeof obj === 'string') { results.push({ path: prefix, value: obj }); } else if (Array.isArray(obj)) { obj.forEach((item, i) => { results.push(...extractLeafStrings(item, `${prefix}[${i}]`)); }); } else if (obj && typeof obj === 'object') { for (const [key, val] of Object.entries(obj)) { const newPath = prefix ? `${prefix}.${key}` : key; results.push(...extractLeafStrings(val, newPath)); } } return results; } /** * Set a value at a dot/bracket path in a nested object */ function setNestedValue(obj, pathStr, value) { const parts = pathStr.split(/\.(?![^\[]*\])/).flatMap(part => { const matches = []; let remaining = part; while (remaining) { const bracketMatch = remaining.match(/^([^\[]*)\[(\d+)\](.*)/); if (bracketMatch) { if (bracketMatch[1]) matches.push(bracketMatch[1]); matches.push(parseInt(bracketMatch[2])); remaining = bracketMatch[3]; if (remaining.startsWith('.')) remaining = remaining.slice(1); } else { matches.push(remaining); remaining = ''; } } return matches; }); let current = obj; for (let i = 0; i < parts.length - 1; i++) { const part = parts[i]; const nextPart = parts[i + 1]; if (current[part] === undefined) { current[part] = typeof nextPart === 'number' ? [] : {}; } current = current[part]; } current[parts[parts.length - 1]] = value; } /** * Translate a batch of texts using DeepL API */ function translateBatch(texts) { return new Promise((resolve, reject) => { const params = new URLSearchParams(); texts.forEach(t => params.append('text', t)); params.append('source_lang', 'EN'); params.append('target_lang', 'MI'); const postData = params.toString(); const url = new URL(DEEPL_API_URL); const options = { hostname: url.hostname, port: 443, path: url.pathname, method: 'POST', headers: { 'Authorization': `DeepL-Auth-Key ${DEEPL_API_KEY}`, 'Content-Type': 'application/x-www-form-urlencoded', 'Content-Length': Buffer.byteLength(postData) } }; const req = https.request(options, (res) => { let data = ''; res.on('data', chunk => data += chunk); res.on('end', () => { if (res.statusCode !== 200) { reject(new Error(`DeepL API error ${res.statusCode}: ${data}`)); return; } try { const result = JSON.parse(data); resolve(result.translations.map(t => t.text)); } catch (e) { reject(new Error(`Parse error: ${e.message}`)); } }); }); req.on('error', reject); req.write(postData); req.end(); }); } function sleep(ms) { return new Promise(resolve => setTimeout(resolve, ms)); } /** * Translate an entire JSON file from EN to MI */ async function translateFile(filename) { const enPath = path.join(EN_DIR, filename); const miPath = path.join(MI_DIR, filename); if (!fs.existsSync(enPath)) { console.error(` ✗ EN file not found: ${filename}`); return false; } // Load existing MI file if it exists (to preserve existing translations) let existingMi = {}; if (fs.existsSync(miPath)) { try { existingMi = JSON.parse(fs.readFileSync(miPath, 'utf8')); } catch (e) { console.warn(` ⚠ Could not parse existing MI file, will overwrite`); } } const enData = JSON.parse(fs.readFileSync(enPath, 'utf8')); const leaves = extractLeafStrings(enData); const existingLeaves = extractLeafStrings(existingMi); const existingMap = new Map(existingLeaves.map(l => [l.path, l.value])); // Identify which leaves need translation (not already in MI) const needsTranslation = leaves.filter(l => !existingMap.has(l.path)); const alreadyTranslated = leaves.filter(l => existingMap.has(l.path)); console.log(` ${leaves.length} total keys, ${alreadyTranslated.length} already translated, ${needsTranslation.length} to translate`); if (needsTranslation.length === 0) { console.log(` ✓ Already fully translated`); return true; } // Build output object starting with EN structure, overlaying existing MI translations const output = JSON.parse(JSON.stringify(enData)); // Deep clone EN structure // Apply existing MI translations for (const leaf of alreadyTranslated) { setNestedValue(output, leaf.path, existingMap.get(leaf.path)); } // Translate in batches const textsToTranslate = needsTranslation.map(l => l.value); const translatedTexts = []; for (let i = 0; i < textsToTranslate.length; i += BATCH_SIZE) { const batch = textsToTranslate.slice(i, i + BATCH_SIZE); const batchNum = Math.floor(i / BATCH_SIZE) + 1; const totalBatches = Math.ceil(textsToTranslate.length / BATCH_SIZE); process.stdout.write(` Translating batch ${batchNum}/${totalBatches} (${batch.length} texts)...`); try { const results = await translateBatch(batch); translatedTexts.push(...results); console.log(' ✓'); } catch (e) { console.log(` ✗ ${e.message}`); return false; } if (i + BATCH_SIZE < textsToTranslate.length) { await sleep(DELAY_MS); } } // Apply translations to output for (let i = 0; i < needsTranslation.length; i++) { setNestedValue(output, needsTranslation[i].path, translatedTexts[i]); } // Write output fs.writeFileSync(miPath, JSON.stringify(output, null, 2) + '\n', 'utf8'); console.log(` ✓ Written: ${miPath}`); return true; } async function main() { const args = process.argv.slice(2); // Ensure MI directory exists if (!fs.existsSync(MI_DIR)) { fs.mkdirSync(MI_DIR, { recursive: true }); } let files; if (args.includes('--all')) { files = fs.readdirSync(EN_DIR) .filter(f => f.endsWith('.json') && !SKIP_FILES.includes(f)) .sort(); } else if (args.length > 0 && !args[0].startsWith('--')) { files = args.filter(f => !f.startsWith('--')); } else { console.log('Usage: node scripts/translate-to-mi.js [filename.json ...] [--all]'); console.log(' --all Translate all EN files'); process.exit(1); } console.log(`\n═══════════════════════════════════════════════════════════`); console.log(` DEEPL TRANSLATION: EN → MI (Te Reo Māori)`); console.log(`═══════════════════════════════════════════════════════════\n`); console.log(`Files to translate: ${files.length}\n`); let success = 0; let failed = 0; for (const file of files) { console.log(`\n📄 ${file}`); const result = await translateFile(file); if (result) success++; else failed++; } console.log(`\n═══════════════════════════════════════════════════════════`); console.log(` TRANSLATION COMPLETE`); console.log(`═══════════════════════════════════════════════════════════`); console.log(` ✓ Success: ${success}`); if (failed > 0) console.log(` ✗ Failed: ${failed}`); console.log(` Total: ${files.length}\n`); } main().catch(e => { console.error('Fatal error:', e); process.exit(1); });