tractatus/scripts/translate-all-documents.js
TheFlow 5e969bd4da feat(docs): intelligent section recategorization + i18n infrastructure
This commit includes two major improvements to the documentation system:

## 1. Section Recategorization (UX Fix)

**Problem**: 64 sections (24%) were incorrectly marked as "critical" and
displayed at the bottom of documents, burying important foundational content.

**Solution**:
- Created intelligent recategorization script analyzing titles, excerpts,
  and document context
- Reduced "critical" from 64 → 2 sections (97% reduction)
- Properly categorized content by purpose:
  - Conceptual: 63 → 138 (+119%) - foundations, "why this matters"
  - Practical: 3 → 46 (+1433%) - how-to guides, examples
  - Technical: 111 → 50 (-55%) - true implementation details

**UI Improvements**:
- Reordered category display: Critical → Conceptual → Practical → Technical → Reference
- Changed Critical color from amber to red for better visual distinction
- All 22 documents recategorized (173 sections updated)

## 2. i18n Infrastructure (Phase 2)

**Backend**:
- DeepL API integration service with quota management and error handling
- Translation API routes (GET /api/documents/:slug?lang=de, POST /api/documents/:id/translate)
- Document model already supports translations field (no schema changes)

**Frontend**:
- docs-app.js enhanced with language detection and URL parameter support
- Automatic fallback to English when translation unavailable
- Integration with existing i18n-simple.js system

**Scripts**:
- translate-all-documents.js: Batch translation workflow (dry-run support)
- audit-section-categories.js: Category distribution analysis

**URL Strategy**: Query parameter approach (?lang=de, ?lang=fr)

**Status**: Backend complete, ready for DeepL API key configuration

**Files Modified**:
- Frontend: document-cards.js, docs-app.js
- Backend: documents.controller.js, documents.routes.js, DeepL.service.js
- Scripts: 3 new governance/i18n scripts

**Database**: 173 sections recategorized via script (already applied)

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>
2025-10-26 00:48:27 +13:00

239 lines
8 KiB
JavaScript
Executable file

#!/usr/bin/env node
/**
* Batch Translation Script
*
* Translates all public documents to German (DE) and French (FR) using DeepL API
*
* Usage:
* node scripts/translate-all-documents.js [options]
*
* Options:
* --lang=de,fr Target languages (comma-separated, default: de,fr)
* --force Overwrite existing translations
* --dry-run Preview what would be translated without executing
* --limit=N Limit to N documents (for testing)
* --slug=document-slug Translate only specific document
*
* Examples:
* node scripts/translate-all-documents.js --dry-run
* node scripts/translate-all-documents.js --lang=de --limit=5
* node scripts/translate-all-documents.js --slug=getting-started --force
*
* Requirements:
* - DEEPL_API_KEY environment variable must be set
* - MongoDB running on localhost:27017
*/
require('dotenv').config();
const mongoose = require('mongoose');
const Document = require('../src/models/Document.model');
const deeplService = require('../src/services/DeepL.service');
// Parse command line arguments
const args = process.argv.slice(2);
const options = {
targetLangs: ['de', 'fr'],
force: false,
dryRun: false,
limit: null,
slug: null
};
args.forEach(arg => {
if (arg.startsWith('--lang=')) {
options.targetLangs = arg.split('=')[1].split(',');
} else if (arg === '--force') {
options.force = true;
} else if (arg === '--dry-run') {
options.dryRun = true;
} else if (arg.startsWith('--limit=')) {
options.limit = parseInt(arg.split('=')[1]);
} else if (arg.startsWith('--slug=')) {
options.slug = arg.split('=')[1];
}
});
// Statistics
const stats = {
total: 0,
translated: 0,
skipped: 0,
failed: 0,
errors: []
};
async function main() {
console.log('═══════════════════════════════════════════════════════════');
console.log(' BATCH DOCUMENT TRANSLATION');
console.log('═══════════════════════════════════════════════════════════\n');
// 1. Check DeepL service availability
if (!deeplService.isAvailable()) {
console.error('❌ ERROR: DeepL API key not configured');
console.error(' Set DEEPL_API_KEY environment variable\n');
process.exit(1);
}
console.log('✓ DeepL service available');
// 2. Show usage statistics
try {
const usage = await deeplService.getUsage();
console.log(`✓ DeepL quota: ${usage.character_count.toLocaleString()} / ${usage.character_limit.toLocaleString()} chars (${usage.percentage_used}% used)\n`);
} catch (error) {
console.warn(`⚠ Could not fetch DeepL usage: ${error.message}\n`);
}
// 3. Connect to MongoDB
console.log('📡 Connecting to MongoDB...');
await mongoose.connect('mongodb://localhost:27017/tractatus_dev', {
serverSelectionTimeoutMS: 5000
});
console.log('✓ Connected to tractatus_dev\n');
// 4. Fetch documents to translate
console.log('📚 Fetching documents...');
let documents;
if (options.slug) {
const doc = await Document.findBySlug(options.slug);
documents = doc ? [doc] : [];
console.log(`✓ Found document: ${doc?.title || 'Not found'}\n`);
} else {
const filter = { visibility: 'public' };
documents = await Document.list({
filter,
limit: options.limit || 1000,
sort: { order: 1, 'metadata.date_created': -1 }
});
console.log(`✓ Found ${documents.length} public documents\n`);
}
if (documents.length === 0) {
console.log('No documents to translate.\n');
await mongoose.disconnect();
process.exit(0);
}
stats.total = documents.length;
// 5. Show translation plan
console.log('Translation Plan:');
console.log(` Languages: ${options.targetLangs.join(', ')}`);
console.log(` Documents: ${documents.length}`);
console.log(` Force overwrite: ${options.force ? 'Yes' : 'No'}`);
console.log(` Dry run: ${options.dryRun ? 'Yes' : 'No'}\n`);
if (options.dryRun) {
console.log('═══════════════════════════════════════════════════════════');
console.log(' DRY RUN - Preview Only');
console.log('═══════════════════════════════════════════════════════════\n');
}
// 6. Translate each document
for (let i = 0; i < documents.length; i++) {
const doc = documents[i];
const progress = `[${i + 1}/${documents.length}]`;
console.log(`${progress} ${doc.title}`);
console.log(` Slug: ${doc.slug}`);
for (const lang of options.targetLangs) {
const langUpper = lang.toUpperCase();
// Check if translation exists
const hasTranslation = doc.translations && doc.translations[lang];
if (hasTranslation && !options.force) {
console.log(` ${langUpper}: ⏭ Skipped (exists, use --force to overwrite)`);
stats.skipped++;
continue;
}
if (options.dryRun) {
console.log(` ${langUpper}: 🔍 Would translate (${hasTranslation ? 'overwrite' : 'new'})`);
continue;
}
// Perform translation
try {
console.log(` ${langUpper}: 🔄 Translating...`);
const translation = await deeplService.translateDocument(doc, lang);
// Update document
await Document.update(doc._id.toString(), {
[`translations.${lang}`]: translation
});
console.log(` ${langUpper}: ✓ Complete`);
stats.translated++;
} catch (error) {
console.error(` ${langUpper}: ❌ Failed - ${error.message}`);
stats.failed++;
stats.errors.push({
document: doc.slug,
language: lang,
error: error.message
});
// If quota exceeded, stop
if (error.message.includes('quota')) {
console.error('\n❌ DeepL quota exceeded. Stopping.\n');
break;
}
}
// Rate limiting: Wait 1 second between translations
if (i < documents.length - 1 || lang !== options.targetLangs[options.targetLangs.length - 1]) {
await new Promise(resolve => setTimeout(resolve, 1000));
}
}
console.log('');
}
// 7. Summary
console.log('═══════════════════════════════════════════════════════════');
console.log(' TRANSLATION SUMMARY');
console.log('═══════════════════════════════════════════════════════════\n');
if (options.dryRun) {
console.log(' Dry run complete - no translations were performed\n');
} else {
console.log(` Documents processed: ${stats.total}`);
console.log(` Translations created: ${stats.translated}`);
console.log(` Skipped (existing): ${stats.skipped}`);
console.log(` Failed: ${stats.failed}\n`);
if (stats.errors.length > 0) {
console.log(' Errors:');
stats.errors.forEach(err => {
console.log(` - ${err.document} (${err.language}): ${err.error}`);
});
console.log('');
}
// Show final usage
try {
const usage = await deeplService.getUsage();
console.log(` DeepL usage: ${usage.character_count.toLocaleString()} / ${usage.character_limit.toLocaleString()} chars (${usage.percentage_used}% used)\n`);
} catch (error) {
// Ignore
}
}
await mongoose.disconnect();
console.log('✓ Database disconnected\n');
process.exit(stats.failed > 0 ? 1 : 0);
}
// Run
main().catch(err => {
console.error('\n❌ Fatal error:', err.message);
console.error(err.stack);
process.exit(1);
});