feat(docs): intelligent section recategorization + i18n infrastructure

This commit includes two major improvements to the documentation system:

## 1. Section Recategorization (UX Fix)

**Problem**: 64 sections (24%) were incorrectly marked as "critical" and
displayed at the bottom of documents, burying important foundational content.

**Solution**:
- Created intelligent recategorization script analyzing titles, excerpts,
  and document context
- Reduced "critical" from 64 → 2 sections (97% reduction)
- Properly categorized content by purpose:
  - Conceptual: 63 → 138 (+119%) - foundations, "why this matters"
  - Practical: 3 → 46 (+1433%) - how-to guides, examples
  - Technical: 111 → 50 (-55%) - true implementation details

**UI Improvements**:
- Reordered category display: Critical → Conceptual → Practical → Technical → Reference
- Changed Critical color from amber to red for better visual distinction
- All 22 documents recategorized (173 sections updated)

## 2. i18n Infrastructure (Phase 2)

**Backend**:
- DeepL API integration service with quota management and error handling
- Translation API routes (GET /api/documents/:slug?lang=de, POST /api/documents/:id/translate)
- Document model already supports translations field (no schema changes)

**Frontend**:
- docs-app.js enhanced with language detection and URL parameter support
- Automatic fallback to English when translation unavailable
- Integration with existing i18n-simple.js system

**Scripts**:
- translate-all-documents.js: Batch translation workflow (dry-run support)
- audit-section-categories.js: Category distribution analysis

**URL Strategy**: Query parameter approach (?lang=de, ?lang=fr)

**Status**: Backend complete, ready for DeepL API key configuration

**Files Modified**:
- Frontend: document-cards.js, docs-app.js
- Backend: documents.controller.js, documents.routes.js, DeepL.service.js
- Scripts: 3 new governance/i18n scripts

**Database**: 173 sections recategorized via script (already applied)

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
TheFlow 2025-10-26 00:48:27 +13:00
parent be53ab36f8
commit 5e969bd4da
8 changed files with 1334 additions and 21 deletions

View file

@ -122,18 +122,26 @@ class DocumentCards {
*/
renderCardGrid(sectionsByCategory) {
const categoryConfig = {
conceptual: { icon: '📘', label: 'Conceptual', color: 'blue' },
practical: { icon: '✨', label: 'Practical', color: 'green' },
technical: { icon: '🔧', label: 'Technical', color: 'purple' },
reference: { icon: '📋', label: 'Reference', color: 'gray' },
critical: { icon: '⚠️', label: 'Critical', color: 'amber' }
critical: { icon: '⚠️', label: 'Critical', color: 'red', order: 1 },
conceptual: { icon: '📘', label: 'Conceptual', color: 'blue', order: 2 },
practical: { icon: '✨', label: 'Practical', color: 'green', order: 3 },
technical: { icon: '🔧', label: 'Technical', color: 'purple', order: 4 },
reference: { icon: '📋', label: 'Reference', color: 'gray', order: 5 }
};
let html = '<div class="card-grid-container">';
// Render categories in priority order (critical first)
const orderedCategories = Object.entries(sectionsByCategory)
.filter(([category, sections]) => sections.length > 0)
.sort((a, b) => {
const orderA = categoryConfig[a[0]]?.order || 999;
const orderB = categoryConfig[b[0]]?.order || 999;
return orderA - orderB;
});
// Render each category that has sections
for (const [category, sections] of Object.entries(sectionsByCategory)) {
if (sections.length === 0) continue;
for (const [category, sections] of orderedCategories) {
const config = categoryConfig[category];
@ -169,27 +177,27 @@ class DocumentCards {
const levelLabel = section.technicalLevel.charAt(0).toUpperCase() + section.technicalLevel.slice(1);
const borderColor = {
red: 'border-red-500',
blue: 'border-blue-400',
green: 'border-green-400',
purple: 'border-purple-400',
gray: 'border-gray-400',
amber: 'border-amber-400'
gray: 'border-gray-400'
}[color] || 'border-blue-400';
const hoverColor = {
red: 'hover:border-red-700 hover:shadow-red-100',
blue: 'hover:border-blue-600 hover:shadow-blue-100',
green: 'hover:border-green-600 hover:shadow-green-100',
purple: 'hover:border-purple-600 hover:shadow-purple-100',
gray: 'hover:border-gray-600 hover:shadow-gray-100',
amber: 'hover:border-amber-600 hover:shadow-amber-100'
gray: 'hover:border-gray-600 hover:shadow-gray-100'
}[color] || 'hover:border-blue-600';
const bgColor = {
red: 'bg-red-50',
blue: 'bg-blue-50',
green: 'bg-green-50',
purple: 'bg-purple-50',
gray: 'bg-gray-50',
amber: 'bg-amber-50'
gray: 'bg-gray-50'
}[color] || 'bg-blue-50';
return `

View file

@ -1,12 +1,53 @@
let documents = [];
let currentDocument = null;
let documentCards = null;
let currentLanguage = 'en'; // Default language
// Initialize card-based viewer
if (typeof DocumentCards !== 'undefined') {
documentCards = new DocumentCards('document-content');
}
// Detect language from i18n system
function detectLanguage() {
if (window.I18n && window.I18n.currentLang) {
return window.I18n.currentLang;
}
// Fallback: Check URL parameter
const urlParams = new URLSearchParams(window.location.search);
return urlParams.get('lang') || 'en';
}
// Update URL with language parameter
function updateURL(slug, lang) {
const url = new URL(window.location);
url.searchParams.set('doc', slug);
if (lang && lang !== 'en') {
url.searchParams.set('lang', lang);
} else {
url.searchParams.delete('lang');
}
window.history.pushState({}, '', url);
}
// Listen for language changes from i18n system
if (typeof window !== 'undefined') {
window.addEventListener('languageChanged', (e) => {
const newLang = e.detail.language;
currentLanguage = newLang;
// Reload current document in new language
if (currentDocument) {
loadDocument(currentDocument.slug, newLang);
}
});
// Initialize language on i18n ready
window.addEventListener('i18nInitialized', (e) => {
currentLanguage = e.detail.language;
});
}
// Document categorization - Final 5 categories (curated for public docs)
const CATEGORIES = {
'getting-started': {
@ -355,10 +396,13 @@ async function loadDocuments() {
// Load specific document
let isLoading = false;
async function loadDocument(slug) {
async function loadDocument(slug, lang = null) {
// Prevent multiple simultaneous loads
if (isLoading) return;
// Use provided lang or detect from i18n system
const language = lang || detectLanguage();
try {
isLoading = true;
@ -374,14 +418,47 @@ async function loadDocument(slug) {
</div>
`;
const response = await fetch(`/api/documents/${slug}`);
// Build API URL with language parameter
const apiUrl = language && language !== 'en'
? `/api/documents/${slug}?lang=${language}`
: `/api/documents/${slug}`;
const response = await fetch(apiUrl);
const data = await response.json();
if (!data.success) {
throw new Error(data.error || 'Failed to load document');
}
// If translation not available, fall back to English
if (response.status === 404 && language !== 'en') {
console.warn(`Translation not available for ${language}, falling back to English`);
const enResponse = await fetch(`/api/documents/${slug}`);
const enData = await enResponse.json();
currentDocument = data.document;
if (enData.success) {
// Show notification that translation isn't available
showTranslationFallbackNotice(language);
const fallbackData = enData;
fallbackData.document.language = 'en';
fallbackData.document.fallback = true;
// Use English version
currentDocument = fallbackData.document;
currentLanguage = 'en';
updateURL(slug, 'en');
// Continue with rendering
data.success = true;
data.document = fallbackData.document;
} else {
throw new Error(data.error || 'Failed to load document');
}
} else {
throw new Error(data.error || 'Failed to load document');
}
} else {
currentDocument = data.document;
currentLanguage = language;
updateURL(slug, language);
}
// Update active state
document.querySelectorAll('.doc-link').forEach(el => {

View file

@ -0,0 +1,66 @@
#!/usr/bin/env node
const { MongoClient } = require('mongodb');
(async () => {
const client = await MongoClient.connect('mongodb://localhost:27017/tractatus_dev');
const db = client.db();
const docs = await db.collection('documents')
.find({visibility: 'public'}, {projection: {title: 1, slug: 1, sections: 1}})
.sort({order: 1})
.toArray();
console.log('═══════════════════════════════════════════════════════════');
console.log(' SECTION CATEGORY AUDIT - 22 Public Documents');
console.log('═══════════════════════════════════════════════════════════\n');
let totalDocs = 0;
let docsWithSections = 0;
const categoryStats = {};
docs.forEach(doc => {
totalDocs++;
if (!doc.sections || doc.sections.length === 0) {
console.log(`${doc.title}:`);
console.log(' ⚠️ NO SECTIONS (traditional view)\n');
return;
}
docsWithSections++;
const categories = {};
doc.sections.forEach(s => {
const cat = s.category || 'uncategorized';
categories[cat] = (categories[cat] || 0) + 1;
categoryStats[cat] = (categoryStats[cat] || 0) + 1;
});
console.log(`${doc.title}:`);
console.log(` Sections: ${doc.sections.length}`);
Object.entries(categories).sort((a,b) => b[1] - a[1]).forEach(([cat, count]) => {
const percent = Math.round(count / doc.sections.length * 100);
console.log(` - ${cat}: ${count} (${percent}%)`);
});
console.log('');
});
console.log('═══════════════════════════════════════════════════════════');
console.log(' OVERALL STATISTICS');
console.log('═══════════════════════════════════════════════════════════\n');
console.log(`Total documents: ${totalDocs}`);
console.log(`Documents with sections: ${docsWithSections}`);
console.log(`Documents without sections: ${totalDocs - docsWithSections}\n`);
console.log('Category distribution across ALL sections:');
const sortedStats = Object.entries(categoryStats).sort((a,b) => b[1] - a[1]);
const totalSections = sortedStats.reduce((sum, [,count]) => sum + count, 0);
sortedStats.forEach(([cat, count]) => {
const percent = Math.round(count / totalSections * 100);
console.log(` - ${cat}: ${count} sections (${percent}%)`);
});
console.log('\n');
await client.close();
})();

358
scripts/recategorize-sections.js Executable file
View file

@ -0,0 +1,358 @@
#!/usr/bin/env node
/**
* Smart Section Recategorization Script
*
* Analyzes section content and intelligently reassigns categories based on:
* - Section titles
* - Excerpts
* - Position in document
* - Content keywords
*
* Categories:
* - critical: Warnings, blockers, security issues, mandatory prerequisites
* - conceptual: Foundational ideas, "why this matters", core principles
* - practical: How-to guides, examples, step-by-step instructions
* - technical: Architecture, implementation details, code examples
* - reference: Appendices, glossaries, further reading, contact info
*
* Usage:
* node scripts/recategorize-sections.js --dry-run # Preview changes
* node scripts/recategorize-sections.js # Apply changes
* node scripts/recategorize-sections.js --doc=slug # Single document
*/
const { MongoClient } = require('mongodb');
// Parse arguments
const args = process.argv.slice(2);
const dryRun = args.includes('--dry-run');
const specificDoc = args.find(a => a.startsWith('--doc='))?.split('=')[1];
// Statistics
const stats = {
totalDocuments: 0,
totalSections: 0,
changed: 0,
unchanged: 0,
byCategory: {
critical: { before: 0, after: 0 },
conceptual: { before: 0, after: 0 },
practical: { before: 0, after: 0 },
technical: { before: 0, after: 0 },
reference: { before: 0, after: 0 }
},
changes: []
};
/**
* Categorization rules based on content analysis
*/
const RULES = {
critical: {
keywords: [
'security', 'warning', 'caution', 'danger', 'breaking change',
'must read first', 'before you begin', 'important notice',
'critical prerequisite', 'blockers', 'requirements'
],
titlePatterns: [
/^(security|warning|caution|critical)/i,
/breaking changes?/i,
/requirements?$/i,
/^before (you|starting)/i
],
// Reserve critical for ACTUAL warnings, not "why this matters"
exclude: [
'why this matters', 'who should', 'invitation', 'bottom line',
'key finding', 'introduction'
]
},
conceptual: {
keywords: [
'understanding', 'concept', 'principle', 'theory', 'foundation',
'why', 'what is', 'introduction', 'overview', 'core idea',
'key finding', 'philosophy', 'paradigm', 'mental model',
'thinking', 'perspective'
],
titlePatterns: [
/^(understanding|why|what is|introduction|overview)/i,
/concept(s)?$/i,
/principle(s)?$/i,
/foundation(s)?$/i,
/key (finding|idea)/i,
/bottom line/i,
/who should/i
]
},
practical: {
keywords: [
'guide', 'example', 'step', 'how to', 'tutorial', 'walkthrough',
'use case', 'scenario', 'getting started', 'quick start',
'implementation guide', 'hands-on', 'practical', 'workflow'
],
titlePatterns: [
/^(how to|getting started|quick start|guide)/i,
/step[- ]by[- ]step/i,
/example(s)?$/i,
/use case(s)?$/i,
/walkthrough/i,
/workflow/i
]
},
technical: {
keywords: [
'architecture', 'implementation', 'api', 'code', 'technical',
'development', 'engineering', 'system', 'design pattern',
'algorithm', 'data structure', 'performance', 'optimization'
],
titlePatterns: [
/^(architecture|technical|implementation|api|code)/i,
/design$/i,
/specification$/i,
/^system/i,
/performance/i,
/optimization/i
]
},
reference: {
keywords: [
'reference', 'appendix', 'glossary', 'contact', 'resources',
'further reading', 'bibliography', 'links', 'related work',
'acknowledgment', 'citation'
],
titlePatterns: [
/^(reference|appendix|glossary|contact|resources)/i,
/further reading/i,
/related (work|resources)/i,
/^(acknowledgment|citation)/i
]
}
};
/**
* Analyze section and determine best category
*/
function categorizeSection(section, docTitle, sectionIndex, totalSections) {
const title = (section.title || '').toLowerCase();
const excerpt = (section.excerpt || '').toLowerCase();
const content = (section.content_html || '').toLowerCase();
const combined = `${title} ${excerpt}`;
const scores = {
critical: 0,
conceptual: 0,
practical: 0,
technical: 0,
reference: 0
};
// Score each category based on rules
for (const [category, rules] of Object.entries(RULES)) {
// Check exclude patterns first (for critical)
if (rules.exclude) {
const hasExclude = rules.exclude.some(pattern =>
combined.includes(pattern.toLowerCase())
);
if (hasExclude && category === 'critical') {
scores[category] = -100; // Strong penalty
continue;
}
}
// Check title patterns (strong signal)
if (rules.titlePatterns) {
const titleMatch = rules.titlePatterns.some(pattern => pattern.test(title));
if (titleMatch) {
scores[category] += 50;
}
}
// Check keywords in title (medium signal)
const titleKeywords = rules.keywords.filter(kw => title.includes(kw));
scores[category] += titleKeywords.length * 20;
// Check keywords in excerpt (weak signal)
const excerptKeywords = rules.keywords.filter(kw => excerpt.includes(kw));
scores[category] += excerptKeywords.length * 5;
}
// Position-based adjustments
if (sectionIndex === 0) {
// First section usually conceptual or critical prerequisite
if (title.includes('introduction') || title.includes('overview')) {
scores.conceptual += 30;
}
} else if (sectionIndex === totalSections - 1) {
// Last section often reference
scores.reference += 10;
}
// Document context adjustments
const docTitleLower = docTitle.toLowerCase();
if (docTitleLower.includes('case study') || docTitleLower.includes('incident')) {
// Case studies are usually conceptual, not critical
scores.conceptual += 20;
scores.critical -= 30;
}
if (docTitleLower.includes('implementation') || docTitleLower.includes('guide')) {
scores.practical += 15;
}
if (docTitleLower.includes('api') || docTitleLower.includes('technical')) {
scores.technical += 15;
}
// Find category with highest score
const sortedScores = Object.entries(scores).sort((a, b) => b[1] - a[1]);
const bestCategory = sortedScores[0][0];
const bestScore = sortedScores[0][1];
// If all scores are very low, default to conceptual
if (bestScore < 10) {
return 'conceptual';
}
return bestCategory;
}
/**
* Main function
*/
async function main() {
console.log('═══════════════════════════════════════════════════════════');
console.log(' SECTION RECATEGORIZATION');
console.log('═══════════════════════════════════════════════════════════\n');
if (dryRun) {
console.log('🔍 DRY RUN MODE - No changes will be saved\n');
}
if (specificDoc) {
console.log(`📄 Processing single document: ${specificDoc}\n`);
}
// Connect to MongoDB
console.log('📡 Connecting to MongoDB...');
const client = await MongoClient.connect('mongodb://localhost:27017/tractatus_dev');
const db = client.db();
const collection = db.collection('documents');
// Fetch documents
const filter = { visibility: 'public' };
if (specificDoc) {
filter.slug = specificDoc;
}
const docs = await collection.find(filter).sort({ order: 1 }).toArray();
console.log(`✓ Found ${docs.length} document(s)\n`);
stats.totalDocuments = docs.length;
// Process each document
for (const doc of docs) {
if (!doc.sections || doc.sections.length === 0) {
console.log(`${doc.title}: No sections (skipping)\n`);
continue;
}
console.log(`\n${'='.repeat(70)}`);
console.log(`${doc.title}`);
console.log(`${'='.repeat(70)}\n`);
const updates = [];
stats.totalSections += doc.sections.length;
doc.sections.forEach((section, index) => {
const oldCategory = section.category || 'conceptual';
const newCategory = categorizeSection(section, doc.title, index, doc.sections.length);
stats.byCategory[oldCategory].before++;
stats.byCategory[newCategory].after++;
if (oldCategory !== newCategory) {
stats.changed++;
console.log(`[${index + 1}/${doc.sections.length}] ${section.title}`);
console.log(` ${oldCategory}${newCategory}`);
updates.push({
sectionIndex: index,
oldCategory,
newCategory,
title: section.title
});
stats.changes.push({
document: doc.title,
section: section.title,
from: oldCategory,
to: newCategory
});
} else {
stats.unchanged++;
}
});
// Apply updates if not dry run
if (!dryRun && updates.length > 0) {
const updateOperations = updates.map(update => {
return {
updateOne: {
filter: { _id: doc._id },
update: {
$set: {
[`sections.${update.sectionIndex}.category`]: update.newCategory
}
}
}
};
});
await collection.bulkWrite(updateOperations);
console.log(`\n✓ Applied ${updates.length} changes to database`);
} else if (updates.length > 0) {
console.log(`\n🔍 Would apply ${updates.length} changes (dry-run)`);
} else {
console.log(`\n✓ No changes needed`);
}
}
// Summary
console.log('\n\n═══════════════════════════════════════════════════════════');
console.log(' RECATEGORIZATION SUMMARY');
console.log('═══════════════════════════════════════════════════════════\n');
console.log(`Documents processed: ${stats.totalDocuments}`);
console.log(`Total sections: ${stats.totalSections}`);
console.log(`Changed: ${stats.changed} (${Math.round(stats.changed / stats.totalSections * 100)}%)`);
console.log(`Unchanged: ${stats.unchanged} (${Math.round(stats.unchanged / stats.totalSections * 100)}%)\n`);
console.log('Category changes:');
for (const [category, counts] of Object.entries(stats.byCategory)) {
const change = counts.after - counts.before;
const changeStr = change > 0 ? `+${change}` : change.toString();
const changePercent = counts.before > 0
? Math.round((change / counts.before) * 100)
: 0;
console.log(` ${category}: ${counts.before}${counts.after} (${changeStr}, ${changePercent > 0 ? '+' : ''}${changePercent}%)`);
}
if (dryRun) {
console.log('\n🔍 DRY RUN COMPLETE - No changes saved');
console.log(' Run without --dry-run to apply changes\n');
} else {
console.log('\n✅ RECATEGORIZATION COMPLETE\n');
}
await client.close();
}
// Run
main().catch(err => {
console.error('\n❌ Fatal error:', err.message);
console.error(err.stack);
process.exit(1);
});

View file

@ -0,0 +1,239 @@
#!/usr/bin/env node
/**
* Batch Translation Script
*
* Translates all public documents to German (DE) and French (FR) using DeepL API
*
* Usage:
* node scripts/translate-all-documents.js [options]
*
* Options:
* --lang=de,fr Target languages (comma-separated, default: de,fr)
* --force Overwrite existing translations
* --dry-run Preview what would be translated without executing
* --limit=N Limit to N documents (for testing)
* --slug=document-slug Translate only specific document
*
* Examples:
* node scripts/translate-all-documents.js --dry-run
* node scripts/translate-all-documents.js --lang=de --limit=5
* node scripts/translate-all-documents.js --slug=getting-started --force
*
* Requirements:
* - DEEPL_API_KEY environment variable must be set
* - MongoDB running on localhost:27017
*/
require('dotenv').config();
const mongoose = require('mongoose');
const Document = require('../src/models/Document.model');
const deeplService = require('../src/services/DeepL.service');
// Parse command line arguments
const args = process.argv.slice(2);
const options = {
targetLangs: ['de', 'fr'],
force: false,
dryRun: false,
limit: null,
slug: null
};
args.forEach(arg => {
if (arg.startsWith('--lang=')) {
options.targetLangs = arg.split('=')[1].split(',');
} else if (arg === '--force') {
options.force = true;
} else if (arg === '--dry-run') {
options.dryRun = true;
} else if (arg.startsWith('--limit=')) {
options.limit = parseInt(arg.split('=')[1]);
} else if (arg.startsWith('--slug=')) {
options.slug = arg.split('=')[1];
}
});
// Statistics
const stats = {
total: 0,
translated: 0,
skipped: 0,
failed: 0,
errors: []
};
async function main() {
console.log('═══════════════════════════════════════════════════════════');
console.log(' BATCH DOCUMENT TRANSLATION');
console.log('═══════════════════════════════════════════════════════════\n');
// 1. Check DeepL service availability
if (!deeplService.isAvailable()) {
console.error('❌ ERROR: DeepL API key not configured');
console.error(' Set DEEPL_API_KEY environment variable\n');
process.exit(1);
}
console.log('✓ DeepL service available');
// 2. Show usage statistics
try {
const usage = await deeplService.getUsage();
console.log(`✓ DeepL quota: ${usage.character_count.toLocaleString()} / ${usage.character_limit.toLocaleString()} chars (${usage.percentage_used}% used)\n`);
} catch (error) {
console.warn(`⚠ Could not fetch DeepL usage: ${error.message}\n`);
}
// 3. Connect to MongoDB
console.log('📡 Connecting to MongoDB...');
await mongoose.connect('mongodb://localhost:27017/tractatus_dev', {
serverSelectionTimeoutMS: 5000
});
console.log('✓ Connected to tractatus_dev\n');
// 4. Fetch documents to translate
console.log('📚 Fetching documents...');
let documents;
if (options.slug) {
const doc = await Document.findBySlug(options.slug);
documents = doc ? [doc] : [];
console.log(`✓ Found document: ${doc?.title || 'Not found'}\n`);
} else {
const filter = { visibility: 'public' };
documents = await Document.list({
filter,
limit: options.limit || 1000,
sort: { order: 1, 'metadata.date_created': -1 }
});
console.log(`✓ Found ${documents.length} public documents\n`);
}
if (documents.length === 0) {
console.log('No documents to translate.\n');
await mongoose.disconnect();
process.exit(0);
}
stats.total = documents.length;
// 5. Show translation plan
console.log('Translation Plan:');
console.log(` Languages: ${options.targetLangs.join(', ')}`);
console.log(` Documents: ${documents.length}`);
console.log(` Force overwrite: ${options.force ? 'Yes' : 'No'}`);
console.log(` Dry run: ${options.dryRun ? 'Yes' : 'No'}\n`);
if (options.dryRun) {
console.log('═══════════════════════════════════════════════════════════');
console.log(' DRY RUN - Preview Only');
console.log('═══════════════════════════════════════════════════════════\n');
}
// 6. Translate each document
for (let i = 0; i < documents.length; i++) {
const doc = documents[i];
const progress = `[${i + 1}/${documents.length}]`;
console.log(`${progress} ${doc.title}`);
console.log(` Slug: ${doc.slug}`);
for (const lang of options.targetLangs) {
const langUpper = lang.toUpperCase();
// Check if translation exists
const hasTranslation = doc.translations && doc.translations[lang];
if (hasTranslation && !options.force) {
console.log(` ${langUpper}: ⏭ Skipped (exists, use --force to overwrite)`);
stats.skipped++;
continue;
}
if (options.dryRun) {
console.log(` ${langUpper}: 🔍 Would translate (${hasTranslation ? 'overwrite' : 'new'})`);
continue;
}
// Perform translation
try {
console.log(` ${langUpper}: 🔄 Translating...`);
const translation = await deeplService.translateDocument(doc, lang);
// Update document
await Document.update(doc._id.toString(), {
[`translations.${lang}`]: translation
});
console.log(` ${langUpper}: ✓ Complete`);
stats.translated++;
} catch (error) {
console.error(` ${langUpper}: ❌ Failed - ${error.message}`);
stats.failed++;
stats.errors.push({
document: doc.slug,
language: lang,
error: error.message
});
// If quota exceeded, stop
if (error.message.includes('quota')) {
console.error('\n❌ DeepL quota exceeded. Stopping.\n');
break;
}
}
// Rate limiting: Wait 1 second between translations
if (i < documents.length - 1 || lang !== options.targetLangs[options.targetLangs.length - 1]) {
await new Promise(resolve => setTimeout(resolve, 1000));
}
}
console.log('');
}
// 7. Summary
console.log('═══════════════════════════════════════════════════════════');
console.log(' TRANSLATION SUMMARY');
console.log('═══════════════════════════════════════════════════════════\n');
if (options.dryRun) {
console.log(' Dry run complete - no translations were performed\n');
} else {
console.log(` Documents processed: ${stats.total}`);
console.log(` Translations created: ${stats.translated}`);
console.log(` Skipped (existing): ${stats.skipped}`);
console.log(` Failed: ${stats.failed}\n`);
if (stats.errors.length > 0) {
console.log(' Errors:');
stats.errors.forEach(err => {
console.log(` - ${err.document} (${err.language}): ${err.error}`);
});
console.log('');
}
// Show final usage
try {
const usage = await deeplService.getUsage();
console.log(` DeepL usage: ${usage.character_count.toLocaleString()} / ${usage.character_limit.toLocaleString()} chars (${usage.percentage_used}% used)\n`);
} catch (error) {
// Ignore
}
}
await mongoose.disconnect();
console.log('✓ Database disconnected\n');
process.exit(stats.failed > 0 ? 1 : 0);
}
// Run
main().catch(err => {
console.error('\n❌ Fatal error:', err.message);
console.error(err.stack);
process.exit(1);
});

View file

@ -74,11 +74,14 @@ async function listDocuments(req, res) {
/**
* Get document by ID or slug
* GET /api/documents/:identifier
* GET /api/documents/:identifier?lang=de
*
* Supports i18n: Returns translated version if lang parameter provided
*/
async function getDocument(req, res) {
try {
const { identifier } = req.params;
const { lang } = req.query; // en, de, fr
// Try to find by ID first, then by slug
let document;
@ -95,9 +98,53 @@ async function getDocument(req, res) {
});
}
// If language parameter provided and not English, return translated version
if (lang && lang !== 'en') {
const supportedLangs = ['de', 'fr'];
if (!supportedLangs.includes(lang)) {
return res.status(400).json({
error: 'Bad Request',
message: `Unsupported language: ${lang}. Supported: ${supportedLangs.join(', ')}`
});
}
// Check if translation exists
if (document.translations && document.translations[lang]) {
const translation = document.translations[lang];
// Return document with translated fields
const translatedDoc = {
...document,
title: translation.title || document.title,
content_html: translation.content_html || document.content_html,
content_markdown: translation.content_markdown || document.content_markdown,
toc: translation.toc || document.toc,
language: lang,
translation_metadata: translation.metadata
};
return res.json({
success: true,
document: translatedDoc
});
} else {
// Translation not available
return res.status(404).json({
error: 'Not Found',
message: `Translation not available for language: ${lang}`,
available_languages: Object.keys(document.translations || {})
});
}
}
// Default: Return English version
res.json({
success: true,
document
document: {
...document,
language: 'en'
}
});
} catch (error) {
@ -466,6 +513,214 @@ async function listDraftDocuments(req, res) {
}
}
/**
* Translate a document using DeepL (admin only)
* POST /api/documents/:id/translate
*
* Body: { targetLang: 'de' | 'fr', force: false }
*/
async function translateDocument(req, res) {
try {
const { id } = req.params;
const { targetLang, force = false } = req.body;
// Validate target language
const supportedLangs = ['de', 'fr'];
if (!supportedLangs.includes(targetLang)) {
return res.status(400).json({
error: 'Bad Request',
message: `Unsupported target language: ${targetLang}. Supported: ${supportedLangs.join(', ')}`
});
}
// Get document
const document = await Document.findById(id);
if (!document) {
return res.status(404).json({
error: 'Not Found',
message: 'Document not found'
});
}
// Check if translation already exists
if (!force && document.translations && document.translations[targetLang]) {
return res.status(409).json({
error: 'Conflict',
message: `Translation already exists for ${targetLang}. Use force: true to overwrite.`,
existing_translation: document.translations[targetLang].metadata
});
}
// Translate using DeepL service
const deeplService = require('../services/DeepL.service');
if (!deeplService.isAvailable()) {
return res.status(503).json({
error: 'Service Unavailable',
message: 'DeepL API is not configured. Set DEEPL_API_KEY environment variable.'
});
}
// Perform translation
logger.info(`Starting translation of document ${id} to ${targetLang} by ${req.user?.email || 'admin'}`);
const translation = await deeplService.translateDocument(document, targetLang);
// Update document with translation
const updates = {
[`translations.${targetLang}`]: translation
};
await Document.update(id, updates);
logger.info(`Translation complete: ${id} to ${targetLang}`);
res.json({
success: true,
message: `Document translated to ${targetLang} successfully`,
translation: {
language: targetLang,
title: translation.title,
metadata: translation.metadata
}
});
} catch (error) {
logger.error('Translate document error:', error);
if (error.message.includes('DeepL')) {
return res.status(503).json({
error: 'Service Unavailable',
message: error.message
});
}
res.status(500).json({
error: 'Internal Server Error',
message: 'Translation failed',
details: error.message
});
}
}
/**
* Get available translations for a document
* GET /api/documents/:identifier/translations
*/
async function getTranslations(req, res) {
try {
const { identifier } = req.params;
// Try to find by ID first, then by slug
let document;
if (identifier.match(/^[0-9a-fA-F]{24}$/)) {
document = await Document.findById(identifier);
} else {
document = await Document.findBySlug(identifier);
}
if (!document) {
return res.status(404).json({
error: 'Not Found',
message: 'Document not found'
});
}
// Build list of available translations
const translations = {
en: {
available: true,
title: document.title,
metadata: {
original: true,
version: document.metadata?.version || '1.0'
}
}
};
// Add translations if they exist
if (document.translations) {
Object.keys(document.translations).forEach(lang => {
translations[lang] = {
available: true,
title: document.translations[lang].title,
metadata: document.translations[lang].metadata
};
});
}
res.json({
success: true,
document_slug: document.slug,
document_title: document.title,
translations
});
} catch (error) {
logger.error('Get translations error:', error);
res.status(500).json({
error: 'Internal Server Error',
message: 'An error occurred'
});
}
}
/**
* Delete a translation (admin only)
* DELETE /api/documents/:id/translations/:lang
*/
async function deleteTranslation(req, res) {
try {
const { id, lang } = req.params;
// Validate language
if (lang === 'en') {
return res.status(400).json({
error: 'Bad Request',
message: 'Cannot delete original English version'
});
}
// Get document
const document = await Document.findById(id);
if (!document) {
return res.status(404).json({
error: 'Not Found',
message: 'Document not found'
});
}
// Check if translation exists
if (!document.translations || !document.translations[lang]) {
return res.status(404).json({
error: 'Not Found',
message: `Translation not found for language: ${lang}`
});
}
// Remove translation
const updates = {
[`translations.${lang}`]: null
};
await Document.update(id, updates);
logger.info(`Translation deleted: ${id} (${lang}) by ${req.user?.email || 'admin'}`);
res.json({
success: true,
message: `Translation for ${lang} deleted successfully`
});
} catch (error) {
logger.error('Delete translation error:', error);
res.status(500).json({
error: 'Internal Server Error',
message: 'An error occurred'
});
}
}
module.exports = {
listDocuments,
getDocument,
@ -476,5 +731,8 @@ module.exports = {
listArchivedDocuments,
publishDocument,
unpublishDocument,
listDraftDocuments
listDraftDocuments,
translateDocument,
getTranslations,
deleteTranslation
};

View file

@ -47,6 +47,11 @@ router.get('/', (req, res, next) => {
next();
}, asyncHandler(documentsController.listDocuments));
// GET /api/documents/:identifier/translations (public)
router.get('/:identifier/translations',
asyncHandler(documentsController.getTranslations)
);
// GET /api/documents/:identifier (ID or slug)
router.get('/:identifier',
asyncHandler(documentsController.getDocument)
@ -99,4 +104,23 @@ router.post('/:id/unpublish',
asyncHandler(documentsController.unpublishDocument)
);
// POST /api/documents/:id/translate (admin only)
// Translate document to target language using DeepL
router.post('/:id/translate',
authenticateToken,
requireRole('admin'),
validateObjectId('id'),
validateRequired(['targetLang']),
asyncHandler(documentsController.translateDocument)
);
// DELETE /api/documents/:id/translations/:lang (admin only)
// Delete a translation
router.delete('/:id/translations/:lang',
authenticateToken,
requireRole('admin'),
validateObjectId('id'),
asyncHandler(documentsController.deleteTranslation)
);
module.exports = router;

View file

@ -0,0 +1,283 @@
/**
* DeepL Translation Service
*
* Professional translation API for German and French translations
* Preserves markdown formatting and handles large documents
*
* API Docs: https://www.deepl.com/docs-api
*/
const axios = require('axios');
class DeepLService {
constructor() {
this.apiKey = process.env.DEEPL_API_KEY;
this.apiUrl = process.env.DEEPL_API_URL || 'https://api-free.deepl.com/v2';
if (!this.apiKey) {
console.warn('[DeepL] API key not configured. Translation service disabled.');
}
}
/**
* Check if DeepL service is available
*/
isAvailable() {
return !!this.apiKey;
}
/**
* Translate text to target language
*
* @param {string} text - Text to translate (supports markdown)
* @param {string} targetLang - Target language code (DE, FR)
* @param {object} options - Translation options
* @returns {Promise<{text: string, detectedSourceLang: string}>}
*/
async translate(text, targetLang, options = {}) {
if (!this.isAvailable()) {
throw new Error('DeepL API key not configured');
}
if (!text || text.trim().length === 0) {
return { text: '', detectedSourceLang: 'EN' };
}
const {
sourceLang = 'EN',
preserveFormatting = true,
formality = 'default', // 'default', 'more', 'less'
tagHandling = 'html' // 'html' or 'xml' - preserves markup
} = options;
try {
const response = await axios.post(
`${this.apiUrl}/translate`,
{
text: [text],
source_lang: sourceLang,
target_lang: targetLang.toUpperCase(),
preserve_formatting: preserveFormatting ? '1' : '0',
formality: formality,
tag_handling: tagHandling
},
{
headers: {
'Authorization': `DeepL-Auth-Key ${this.apiKey}`,
'Content-Type': 'application/json'
},
timeout: 30000 // 30 second timeout
}
);
if (response.data && response.data.translations && response.data.translations.length > 0) {
return {
text: response.data.translations[0].text,
detectedSourceLang: response.data.translations[0].detected_source_language
};
}
throw new Error('Invalid response from DeepL API');
} catch (error) {
if (error.response) {
// DeepL API error
const status = error.response.status;
const message = error.response.data?.message || 'Unknown error';
if (status === 403) {
throw new Error('DeepL API authentication failed. Check API key.');
} else if (status === 456) {
throw new Error('DeepL quota exceeded. Upgrade plan or wait for reset.');
} else if (status === 429) {
throw new Error('DeepL rate limit exceeded. Please retry later.');
} else {
throw new Error(`DeepL API error (${status}): ${message}`);
}
}
throw new Error(`Translation failed: ${error.message}`);
}
}
/**
* Translate markdown document while preserving structure
*
* @param {string} markdown - Markdown content
* @param {string} targetLang - Target language (DE, FR)
* @returns {Promise<string>} Translated markdown
*/
async translateMarkdown(markdown, targetLang) {
if (!markdown || markdown.trim().length === 0) {
return '';
}
// DeepL's tag_handling: 'html' preserves markdown well
// But we can also split by code blocks to protect them
const result = await this.translate(markdown, targetLang, {
tagHandling: 'html',
preserveFormatting: true
});
return result.text;
}
/**
* Translate HTML content while preserving markup
*
* @param {string} html - HTML content
* @param {string} targetLang - Target language (DE, FR)
* @returns {Promise<string>} Translated HTML
*/
async translateHTML(html, targetLang) {
if (!html || html.trim().length === 0) {
return '';
}
const result = await this.translate(html, targetLang, {
tagHandling: 'html',
preserveFormatting: true
});
return result.text;
}
/**
* Translate a full document object
* Translates: title, content_markdown, content_html, and toc
*
* @param {object} document - Document object
* @param {string} targetLang - Target language (DE, FR)
* @returns {Promise<object>} Translation object ready for storage
*/
async translateDocument(document, targetLang) {
if (!this.isAvailable()) {
throw new Error('DeepL API key not configured');
}
console.log(`[DeepL] Translating document "${document.title}" to ${targetLang}...`);
const translation = {
title: '',
content_markdown: '',
content_html: '',
toc: [],
metadata: {
translated_by: 'deepl',
translated_at: new Date(),
reviewed: false,
source_version: document.metadata?.version || '1.0'
}
};
try {
// 1. Translate title
const titleResult = await this.translate(document.title, targetLang);
translation.title = titleResult.text;
console.log(`[DeepL] ✓ Title translated`);
// 2. Translate markdown content (preferred source)
if (document.content_markdown) {
translation.content_markdown = await this.translateMarkdown(
document.content_markdown,
targetLang
);
console.log(`[DeepL] ✓ Markdown translated (${translation.content_markdown.length} chars)`);
}
// 3. Translate HTML content
if (document.content_html) {
translation.content_html = await this.translateHTML(
document.content_html,
targetLang
);
console.log(`[DeepL] ✓ HTML translated (${translation.content_html.length} chars)`);
}
// 4. Translate table of contents
if (document.toc && document.toc.length > 0) {
translation.toc = await Promise.all(
document.toc.map(async (item) => {
const translatedTitle = await this.translate(item.title, targetLang);
return {
...item,
title: translatedTitle.text
};
})
);
console.log(`[DeepL] ✓ ToC translated (${translation.toc.length} items)`);
}
console.log(`[DeepL] ✓ Document translation complete`);
return translation;
} catch (error) {
console.error(`[DeepL] Translation failed:`, error.message);
throw error;
}
}
/**
* Get usage statistics from DeepL API
*
* @returns {Promise<object>} Usage stats {character_count, character_limit}
*/
async getUsage() {
if (!this.isAvailable()) {
throw new Error('DeepL API key not configured');
}
try {
const response = await axios.get(
`${this.apiUrl}/usage`,
{
headers: {
'Authorization': `DeepL-Auth-Key ${this.apiKey}`
}
}
);
return {
character_count: response.data.character_count,
character_limit: response.data.character_limit,
percentage_used: (response.data.character_count / response.data.character_limit * 100).toFixed(2)
};
} catch (error) {
throw new Error(`Failed to get usage stats: ${error.message}`);
}
}
/**
* Get list of supported target languages
*
* @returns {Promise<Array>} List of language objects
*/
async getSupportedLanguages() {
if (!this.isAvailable()) {
throw new Error('DeepL API key not configured');
}
try {
const response = await axios.get(
`${this.apiUrl}/languages`,
{
headers: {
'Authorization': `DeepL-Auth-Key ${this.apiKey}`
},
params: {
type: 'target'
}
}
);
return response.data;
} catch (error) {
throw new Error(`Failed to get supported languages: ${error.message}`);
}
}
}
// Singleton instance
const deeplService = new DeepLService();
module.exports = deeplService;