feat(docs): intelligent section recategorization + i18n infrastructure
This commit includes two major improvements to the documentation system: ## 1. Section Recategorization (UX Fix) **Problem**: 64 sections (24%) were incorrectly marked as "critical" and displayed at the bottom of documents, burying important foundational content. **Solution**: - Created intelligent recategorization script analyzing titles, excerpts, and document context - Reduced "critical" from 64 → 2 sections (97% reduction) - Properly categorized content by purpose: - Conceptual: 63 → 138 (+119%) - foundations, "why this matters" - Practical: 3 → 46 (+1433%) - how-to guides, examples - Technical: 111 → 50 (-55%) - true implementation details **UI Improvements**: - Reordered category display: Critical → Conceptual → Practical → Technical → Reference - Changed Critical color from amber to red for better visual distinction - All 22 documents recategorized (173 sections updated) ## 2. i18n Infrastructure (Phase 2) **Backend**: - DeepL API integration service with quota management and error handling - Translation API routes (GET /api/documents/:slug?lang=de, POST /api/documents/:id/translate) - Document model already supports translations field (no schema changes) **Frontend**: - docs-app.js enhanced with language detection and URL parameter support - Automatic fallback to English when translation unavailable - Integration with existing i18n-simple.js system **Scripts**: - translate-all-documents.js: Batch translation workflow (dry-run support) - audit-section-categories.js: Category distribution analysis **URL Strategy**: Query parameter approach (?lang=de, ?lang=fr) **Status**: Backend complete, ready for DeepL API key configuration **Files Modified**: - Frontend: document-cards.js, docs-app.js - Backend: documents.controller.js, documents.routes.js, DeepL.service.js - Scripts: 3 new governance/i18n scripts **Database**: 173 sections recategorized via script (already applied) 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
parent
be53ab36f8
commit
5e969bd4da
8 changed files with 1334 additions and 21 deletions
|
|
@ -122,18 +122,26 @@ class DocumentCards {
|
|||
*/
|
||||
renderCardGrid(sectionsByCategory) {
|
||||
const categoryConfig = {
|
||||
conceptual: { icon: '📘', label: 'Conceptual', color: 'blue' },
|
||||
practical: { icon: '✨', label: 'Practical', color: 'green' },
|
||||
technical: { icon: '🔧', label: 'Technical', color: 'purple' },
|
||||
reference: { icon: '📋', label: 'Reference', color: 'gray' },
|
||||
critical: { icon: '⚠️', label: 'Critical', color: 'amber' }
|
||||
critical: { icon: '⚠️', label: 'Critical', color: 'red', order: 1 },
|
||||
conceptual: { icon: '📘', label: 'Conceptual', color: 'blue', order: 2 },
|
||||
practical: { icon: '✨', label: 'Practical', color: 'green', order: 3 },
|
||||
technical: { icon: '🔧', label: 'Technical', color: 'purple', order: 4 },
|
||||
reference: { icon: '📋', label: 'Reference', color: 'gray', order: 5 }
|
||||
};
|
||||
|
||||
let html = '<div class="card-grid-container">';
|
||||
|
||||
// Render categories in priority order (critical first)
|
||||
const orderedCategories = Object.entries(sectionsByCategory)
|
||||
.filter(([category, sections]) => sections.length > 0)
|
||||
.sort((a, b) => {
|
||||
const orderA = categoryConfig[a[0]]?.order || 999;
|
||||
const orderB = categoryConfig[b[0]]?.order || 999;
|
||||
return orderA - orderB;
|
||||
});
|
||||
|
||||
// Render each category that has sections
|
||||
for (const [category, sections] of Object.entries(sectionsByCategory)) {
|
||||
if (sections.length === 0) continue;
|
||||
for (const [category, sections] of orderedCategories) {
|
||||
|
||||
const config = categoryConfig[category];
|
||||
|
||||
|
|
@ -169,27 +177,27 @@ class DocumentCards {
|
|||
const levelLabel = section.technicalLevel.charAt(0).toUpperCase() + section.technicalLevel.slice(1);
|
||||
|
||||
const borderColor = {
|
||||
red: 'border-red-500',
|
||||
blue: 'border-blue-400',
|
||||
green: 'border-green-400',
|
||||
purple: 'border-purple-400',
|
||||
gray: 'border-gray-400',
|
||||
amber: 'border-amber-400'
|
||||
gray: 'border-gray-400'
|
||||
}[color] || 'border-blue-400';
|
||||
|
||||
const hoverColor = {
|
||||
red: 'hover:border-red-700 hover:shadow-red-100',
|
||||
blue: 'hover:border-blue-600 hover:shadow-blue-100',
|
||||
green: 'hover:border-green-600 hover:shadow-green-100',
|
||||
purple: 'hover:border-purple-600 hover:shadow-purple-100',
|
||||
gray: 'hover:border-gray-600 hover:shadow-gray-100',
|
||||
amber: 'hover:border-amber-600 hover:shadow-amber-100'
|
||||
gray: 'hover:border-gray-600 hover:shadow-gray-100'
|
||||
}[color] || 'hover:border-blue-600';
|
||||
|
||||
const bgColor = {
|
||||
red: 'bg-red-50',
|
||||
blue: 'bg-blue-50',
|
||||
green: 'bg-green-50',
|
||||
purple: 'bg-purple-50',
|
||||
gray: 'bg-gray-50',
|
||||
amber: 'bg-amber-50'
|
||||
gray: 'bg-gray-50'
|
||||
}[color] || 'bg-blue-50';
|
||||
|
||||
return `
|
||||
|
|
|
|||
|
|
@ -1,12 +1,53 @@
|
|||
let documents = [];
|
||||
let currentDocument = null;
|
||||
let documentCards = null;
|
||||
let currentLanguage = 'en'; // Default language
|
||||
|
||||
// Initialize card-based viewer
|
||||
if (typeof DocumentCards !== 'undefined') {
|
||||
documentCards = new DocumentCards('document-content');
|
||||
}
|
||||
|
||||
// Detect language from i18n system
|
||||
function detectLanguage() {
|
||||
if (window.I18n && window.I18n.currentLang) {
|
||||
return window.I18n.currentLang;
|
||||
}
|
||||
// Fallback: Check URL parameter
|
||||
const urlParams = new URLSearchParams(window.location.search);
|
||||
return urlParams.get('lang') || 'en';
|
||||
}
|
||||
|
||||
// Update URL with language parameter
|
||||
function updateURL(slug, lang) {
|
||||
const url = new URL(window.location);
|
||||
url.searchParams.set('doc', slug);
|
||||
if (lang && lang !== 'en') {
|
||||
url.searchParams.set('lang', lang);
|
||||
} else {
|
||||
url.searchParams.delete('lang');
|
||||
}
|
||||
window.history.pushState({}, '', url);
|
||||
}
|
||||
|
||||
// Listen for language changes from i18n system
|
||||
if (typeof window !== 'undefined') {
|
||||
window.addEventListener('languageChanged', (e) => {
|
||||
const newLang = e.detail.language;
|
||||
currentLanguage = newLang;
|
||||
|
||||
// Reload current document in new language
|
||||
if (currentDocument) {
|
||||
loadDocument(currentDocument.slug, newLang);
|
||||
}
|
||||
});
|
||||
|
||||
// Initialize language on i18n ready
|
||||
window.addEventListener('i18nInitialized', (e) => {
|
||||
currentLanguage = e.detail.language;
|
||||
});
|
||||
}
|
||||
|
||||
// Document categorization - Final 5 categories (curated for public docs)
|
||||
const CATEGORIES = {
|
||||
'getting-started': {
|
||||
|
|
@ -355,10 +396,13 @@ async function loadDocuments() {
|
|||
// Load specific document
|
||||
let isLoading = false;
|
||||
|
||||
async function loadDocument(slug) {
|
||||
async function loadDocument(slug, lang = null) {
|
||||
// Prevent multiple simultaneous loads
|
||||
if (isLoading) return;
|
||||
|
||||
// Use provided lang or detect from i18n system
|
||||
const language = lang || detectLanguage();
|
||||
|
||||
try {
|
||||
isLoading = true;
|
||||
|
||||
|
|
@ -374,14 +418,47 @@ async function loadDocument(slug) {
|
|||
</div>
|
||||
`;
|
||||
|
||||
const response = await fetch(`/api/documents/${slug}`);
|
||||
// Build API URL with language parameter
|
||||
const apiUrl = language && language !== 'en'
|
||||
? `/api/documents/${slug}?lang=${language}`
|
||||
: `/api/documents/${slug}`;
|
||||
|
||||
const response = await fetch(apiUrl);
|
||||
const data = await response.json();
|
||||
|
||||
if (!data.success) {
|
||||
// If translation not available, fall back to English
|
||||
if (response.status === 404 && language !== 'en') {
|
||||
console.warn(`Translation not available for ${language}, falling back to English`);
|
||||
const enResponse = await fetch(`/api/documents/${slug}`);
|
||||
const enData = await enResponse.json();
|
||||
|
||||
if (enData.success) {
|
||||
// Show notification that translation isn't available
|
||||
showTranslationFallbackNotice(language);
|
||||
const fallbackData = enData;
|
||||
fallbackData.document.language = 'en';
|
||||
fallbackData.document.fallback = true;
|
||||
|
||||
// Use English version
|
||||
currentDocument = fallbackData.document;
|
||||
currentLanguage = 'en';
|
||||
updateURL(slug, 'en');
|
||||
|
||||
// Continue with rendering
|
||||
data.success = true;
|
||||
data.document = fallbackData.document;
|
||||
} else {
|
||||
throw new Error(data.error || 'Failed to load document');
|
||||
}
|
||||
|
||||
} else {
|
||||
throw new Error(data.error || 'Failed to load document');
|
||||
}
|
||||
} else {
|
||||
currentDocument = data.document;
|
||||
currentLanguage = language;
|
||||
updateURL(slug, language);
|
||||
}
|
||||
|
||||
// Update active state
|
||||
document.querySelectorAll('.doc-link').forEach(el => {
|
||||
|
|
|
|||
66
scripts/audit-section-categories.js
Normal file
66
scripts/audit-section-categories.js
Normal file
|
|
@ -0,0 +1,66 @@
|
|||
#!/usr/bin/env node
|
||||
|
||||
const { MongoClient } = require('mongodb');
|
||||
|
||||
(async () => {
|
||||
const client = await MongoClient.connect('mongodb://localhost:27017/tractatus_dev');
|
||||
const db = client.db();
|
||||
|
||||
const docs = await db.collection('documents')
|
||||
.find({visibility: 'public'}, {projection: {title: 1, slug: 1, sections: 1}})
|
||||
.sort({order: 1})
|
||||
.toArray();
|
||||
|
||||
console.log('═══════════════════════════════════════════════════════════');
|
||||
console.log(' SECTION CATEGORY AUDIT - 22 Public Documents');
|
||||
console.log('═══════════════════════════════════════════════════════════\n');
|
||||
|
||||
let totalDocs = 0;
|
||||
let docsWithSections = 0;
|
||||
const categoryStats = {};
|
||||
|
||||
docs.forEach(doc => {
|
||||
totalDocs++;
|
||||
|
||||
if (!doc.sections || doc.sections.length === 0) {
|
||||
console.log(`${doc.title}:`);
|
||||
console.log(' ⚠️ NO SECTIONS (traditional view)\n');
|
||||
return;
|
||||
}
|
||||
|
||||
docsWithSections++;
|
||||
const categories = {};
|
||||
doc.sections.forEach(s => {
|
||||
const cat = s.category || 'uncategorized';
|
||||
categories[cat] = (categories[cat] || 0) + 1;
|
||||
categoryStats[cat] = (categoryStats[cat] || 0) + 1;
|
||||
});
|
||||
|
||||
console.log(`${doc.title}:`);
|
||||
console.log(` Sections: ${doc.sections.length}`);
|
||||
Object.entries(categories).sort((a,b) => b[1] - a[1]).forEach(([cat, count]) => {
|
||||
const percent = Math.round(count / doc.sections.length * 100);
|
||||
console.log(` - ${cat}: ${count} (${percent}%)`);
|
||||
});
|
||||
console.log('');
|
||||
});
|
||||
|
||||
console.log('═══════════════════════════════════════════════════════════');
|
||||
console.log(' OVERALL STATISTICS');
|
||||
console.log('═══════════════════════════════════════════════════════════\n');
|
||||
console.log(`Total documents: ${totalDocs}`);
|
||||
console.log(`Documents with sections: ${docsWithSections}`);
|
||||
console.log(`Documents without sections: ${totalDocs - docsWithSections}\n`);
|
||||
|
||||
console.log('Category distribution across ALL sections:');
|
||||
const sortedStats = Object.entries(categoryStats).sort((a,b) => b[1] - a[1]);
|
||||
const totalSections = sortedStats.reduce((sum, [,count]) => sum + count, 0);
|
||||
|
||||
sortedStats.forEach(([cat, count]) => {
|
||||
const percent = Math.round(count / totalSections * 100);
|
||||
console.log(` - ${cat}: ${count} sections (${percent}%)`);
|
||||
});
|
||||
|
||||
console.log('\n');
|
||||
await client.close();
|
||||
})();
|
||||
358
scripts/recategorize-sections.js
Executable file
358
scripts/recategorize-sections.js
Executable file
|
|
@ -0,0 +1,358 @@
|
|||
#!/usr/bin/env node
|
||||
|
||||
/**
|
||||
* Smart Section Recategorization Script
|
||||
*
|
||||
* Analyzes section content and intelligently reassigns categories based on:
|
||||
* - Section titles
|
||||
* - Excerpts
|
||||
* - Position in document
|
||||
* - Content keywords
|
||||
*
|
||||
* Categories:
|
||||
* - critical: Warnings, blockers, security issues, mandatory prerequisites
|
||||
* - conceptual: Foundational ideas, "why this matters", core principles
|
||||
* - practical: How-to guides, examples, step-by-step instructions
|
||||
* - technical: Architecture, implementation details, code examples
|
||||
* - reference: Appendices, glossaries, further reading, contact info
|
||||
*
|
||||
* Usage:
|
||||
* node scripts/recategorize-sections.js --dry-run # Preview changes
|
||||
* node scripts/recategorize-sections.js # Apply changes
|
||||
* node scripts/recategorize-sections.js --doc=slug # Single document
|
||||
*/
|
||||
|
||||
const { MongoClient } = require('mongodb');
|
||||
|
||||
// Parse arguments
|
||||
const args = process.argv.slice(2);
|
||||
const dryRun = args.includes('--dry-run');
|
||||
const specificDoc = args.find(a => a.startsWith('--doc='))?.split('=')[1];
|
||||
|
||||
// Statistics
|
||||
const stats = {
|
||||
totalDocuments: 0,
|
||||
totalSections: 0,
|
||||
changed: 0,
|
||||
unchanged: 0,
|
||||
byCategory: {
|
||||
critical: { before: 0, after: 0 },
|
||||
conceptual: { before: 0, after: 0 },
|
||||
practical: { before: 0, after: 0 },
|
||||
technical: { before: 0, after: 0 },
|
||||
reference: { before: 0, after: 0 }
|
||||
},
|
||||
changes: []
|
||||
};
|
||||
|
||||
/**
|
||||
* Categorization rules based on content analysis
|
||||
*/
|
||||
const RULES = {
|
||||
critical: {
|
||||
keywords: [
|
||||
'security', 'warning', 'caution', 'danger', 'breaking change',
|
||||
'must read first', 'before you begin', 'important notice',
|
||||
'critical prerequisite', 'blockers', 'requirements'
|
||||
],
|
||||
titlePatterns: [
|
||||
/^(security|warning|caution|critical)/i,
|
||||
/breaking changes?/i,
|
||||
/requirements?$/i,
|
||||
/^before (you|starting)/i
|
||||
],
|
||||
// Reserve critical for ACTUAL warnings, not "why this matters"
|
||||
exclude: [
|
||||
'why this matters', 'who should', 'invitation', 'bottom line',
|
||||
'key finding', 'introduction'
|
||||
]
|
||||
},
|
||||
|
||||
conceptual: {
|
||||
keywords: [
|
||||
'understanding', 'concept', 'principle', 'theory', 'foundation',
|
||||
'why', 'what is', 'introduction', 'overview', 'core idea',
|
||||
'key finding', 'philosophy', 'paradigm', 'mental model',
|
||||
'thinking', 'perspective'
|
||||
],
|
||||
titlePatterns: [
|
||||
/^(understanding|why|what is|introduction|overview)/i,
|
||||
/concept(s)?$/i,
|
||||
/principle(s)?$/i,
|
||||
/foundation(s)?$/i,
|
||||
/key (finding|idea)/i,
|
||||
/bottom line/i,
|
||||
/who should/i
|
||||
]
|
||||
},
|
||||
|
||||
practical: {
|
||||
keywords: [
|
||||
'guide', 'example', 'step', 'how to', 'tutorial', 'walkthrough',
|
||||
'use case', 'scenario', 'getting started', 'quick start',
|
||||
'implementation guide', 'hands-on', 'practical', 'workflow'
|
||||
],
|
||||
titlePatterns: [
|
||||
/^(how to|getting started|quick start|guide)/i,
|
||||
/step[- ]by[- ]step/i,
|
||||
/example(s)?$/i,
|
||||
/use case(s)?$/i,
|
||||
/walkthrough/i,
|
||||
/workflow/i
|
||||
]
|
||||
},
|
||||
|
||||
technical: {
|
||||
keywords: [
|
||||
'architecture', 'implementation', 'api', 'code', 'technical',
|
||||
'development', 'engineering', 'system', 'design pattern',
|
||||
'algorithm', 'data structure', 'performance', 'optimization'
|
||||
],
|
||||
titlePatterns: [
|
||||
/^(architecture|technical|implementation|api|code)/i,
|
||||
/design$/i,
|
||||
/specification$/i,
|
||||
/^system/i,
|
||||
/performance/i,
|
||||
/optimization/i
|
||||
]
|
||||
},
|
||||
|
||||
reference: {
|
||||
keywords: [
|
||||
'reference', 'appendix', 'glossary', 'contact', 'resources',
|
||||
'further reading', 'bibliography', 'links', 'related work',
|
||||
'acknowledgment', 'citation'
|
||||
],
|
||||
titlePatterns: [
|
||||
/^(reference|appendix|glossary|contact|resources)/i,
|
||||
/further reading/i,
|
||||
/related (work|resources)/i,
|
||||
/^(acknowledgment|citation)/i
|
||||
]
|
||||
}
|
||||
};
|
||||
|
||||
/**
|
||||
* Analyze section and determine best category
|
||||
*/
|
||||
function categorizeSection(section, docTitle, sectionIndex, totalSections) {
|
||||
const title = (section.title || '').toLowerCase();
|
||||
const excerpt = (section.excerpt || '').toLowerCase();
|
||||
const content = (section.content_html || '').toLowerCase();
|
||||
const combined = `${title} ${excerpt}`;
|
||||
|
||||
const scores = {
|
||||
critical: 0,
|
||||
conceptual: 0,
|
||||
practical: 0,
|
||||
technical: 0,
|
||||
reference: 0
|
||||
};
|
||||
|
||||
// Score each category based on rules
|
||||
for (const [category, rules] of Object.entries(RULES)) {
|
||||
// Check exclude patterns first (for critical)
|
||||
if (rules.exclude) {
|
||||
const hasExclude = rules.exclude.some(pattern =>
|
||||
combined.includes(pattern.toLowerCase())
|
||||
);
|
||||
if (hasExclude && category === 'critical') {
|
||||
scores[category] = -100; // Strong penalty
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
// Check title patterns (strong signal)
|
||||
if (rules.titlePatterns) {
|
||||
const titleMatch = rules.titlePatterns.some(pattern => pattern.test(title));
|
||||
if (titleMatch) {
|
||||
scores[category] += 50;
|
||||
}
|
||||
}
|
||||
|
||||
// Check keywords in title (medium signal)
|
||||
const titleKeywords = rules.keywords.filter(kw => title.includes(kw));
|
||||
scores[category] += titleKeywords.length * 20;
|
||||
|
||||
// Check keywords in excerpt (weak signal)
|
||||
const excerptKeywords = rules.keywords.filter(kw => excerpt.includes(kw));
|
||||
scores[category] += excerptKeywords.length * 5;
|
||||
}
|
||||
|
||||
// Position-based adjustments
|
||||
if (sectionIndex === 0) {
|
||||
// First section usually conceptual or critical prerequisite
|
||||
if (title.includes('introduction') || title.includes('overview')) {
|
||||
scores.conceptual += 30;
|
||||
}
|
||||
} else if (sectionIndex === totalSections - 1) {
|
||||
// Last section often reference
|
||||
scores.reference += 10;
|
||||
}
|
||||
|
||||
// Document context adjustments
|
||||
const docTitleLower = docTitle.toLowerCase();
|
||||
if (docTitleLower.includes('case study') || docTitleLower.includes('incident')) {
|
||||
// Case studies are usually conceptual, not critical
|
||||
scores.conceptual += 20;
|
||||
scores.critical -= 30;
|
||||
}
|
||||
if (docTitleLower.includes('implementation') || docTitleLower.includes('guide')) {
|
||||
scores.practical += 15;
|
||||
}
|
||||
if (docTitleLower.includes('api') || docTitleLower.includes('technical')) {
|
||||
scores.technical += 15;
|
||||
}
|
||||
|
||||
// Find category with highest score
|
||||
const sortedScores = Object.entries(scores).sort((a, b) => b[1] - a[1]);
|
||||
const bestCategory = sortedScores[0][0];
|
||||
const bestScore = sortedScores[0][1];
|
||||
|
||||
// If all scores are very low, default to conceptual
|
||||
if (bestScore < 10) {
|
||||
return 'conceptual';
|
||||
}
|
||||
|
||||
return bestCategory;
|
||||
}
|
||||
|
||||
/**
|
||||
* Main function
|
||||
*/
|
||||
async function main() {
|
||||
console.log('═══════════════════════════════════════════════════════════');
|
||||
console.log(' SECTION RECATEGORIZATION');
|
||||
console.log('═══════════════════════════════════════════════════════════\n');
|
||||
|
||||
if (dryRun) {
|
||||
console.log('🔍 DRY RUN MODE - No changes will be saved\n');
|
||||
}
|
||||
|
||||
if (specificDoc) {
|
||||
console.log(`📄 Processing single document: ${specificDoc}\n`);
|
||||
}
|
||||
|
||||
// Connect to MongoDB
|
||||
console.log('📡 Connecting to MongoDB...');
|
||||
const client = await MongoClient.connect('mongodb://localhost:27017/tractatus_dev');
|
||||
const db = client.db();
|
||||
const collection = db.collection('documents');
|
||||
|
||||
// Fetch documents
|
||||
const filter = { visibility: 'public' };
|
||||
if (specificDoc) {
|
||||
filter.slug = specificDoc;
|
||||
}
|
||||
|
||||
const docs = await collection.find(filter).sort({ order: 1 }).toArray();
|
||||
console.log(`✓ Found ${docs.length} document(s)\n`);
|
||||
|
||||
stats.totalDocuments = docs.length;
|
||||
|
||||
// Process each document
|
||||
for (const doc of docs) {
|
||||
if (!doc.sections || doc.sections.length === 0) {
|
||||
console.log(`${doc.title}: No sections (skipping)\n`);
|
||||
continue;
|
||||
}
|
||||
|
||||
console.log(`\n${'='.repeat(70)}`);
|
||||
console.log(`${doc.title}`);
|
||||
console.log(`${'='.repeat(70)}\n`);
|
||||
|
||||
const updates = [];
|
||||
stats.totalSections += doc.sections.length;
|
||||
|
||||
doc.sections.forEach((section, index) => {
|
||||
const oldCategory = section.category || 'conceptual';
|
||||
const newCategory = categorizeSection(section, doc.title, index, doc.sections.length);
|
||||
|
||||
stats.byCategory[oldCategory].before++;
|
||||
stats.byCategory[newCategory].after++;
|
||||
|
||||
if (oldCategory !== newCategory) {
|
||||
stats.changed++;
|
||||
console.log(`[${index + 1}/${doc.sections.length}] ${section.title}`);
|
||||
console.log(` ${oldCategory} → ${newCategory}`);
|
||||
|
||||
updates.push({
|
||||
sectionIndex: index,
|
||||
oldCategory,
|
||||
newCategory,
|
||||
title: section.title
|
||||
});
|
||||
|
||||
stats.changes.push({
|
||||
document: doc.title,
|
||||
section: section.title,
|
||||
from: oldCategory,
|
||||
to: newCategory
|
||||
});
|
||||
} else {
|
||||
stats.unchanged++;
|
||||
}
|
||||
});
|
||||
|
||||
// Apply updates if not dry run
|
||||
if (!dryRun && updates.length > 0) {
|
||||
const updateOperations = updates.map(update => {
|
||||
return {
|
||||
updateOne: {
|
||||
filter: { _id: doc._id },
|
||||
update: {
|
||||
$set: {
|
||||
[`sections.${update.sectionIndex}.category`]: update.newCategory
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
});
|
||||
|
||||
await collection.bulkWrite(updateOperations);
|
||||
console.log(`\n✓ Applied ${updates.length} changes to database`);
|
||||
} else if (updates.length > 0) {
|
||||
console.log(`\n🔍 Would apply ${updates.length} changes (dry-run)`);
|
||||
} else {
|
||||
console.log(`\n✓ No changes needed`);
|
||||
}
|
||||
}
|
||||
|
||||
// Summary
|
||||
console.log('\n\n═══════════════════════════════════════════════════════════');
|
||||
console.log(' RECATEGORIZATION SUMMARY');
|
||||
console.log('═══════════════════════════════════════════════════════════\n');
|
||||
|
||||
console.log(`Documents processed: ${stats.totalDocuments}`);
|
||||
console.log(`Total sections: ${stats.totalSections}`);
|
||||
console.log(`Changed: ${stats.changed} (${Math.round(stats.changed / stats.totalSections * 100)}%)`);
|
||||
console.log(`Unchanged: ${stats.unchanged} (${Math.round(stats.unchanged / stats.totalSections * 100)}%)\n`);
|
||||
|
||||
console.log('Category changes:');
|
||||
for (const [category, counts] of Object.entries(stats.byCategory)) {
|
||||
const change = counts.after - counts.before;
|
||||
const changeStr = change > 0 ? `+${change}` : change.toString();
|
||||
const changePercent = counts.before > 0
|
||||
? Math.round((change / counts.before) * 100)
|
||||
: 0;
|
||||
|
||||
console.log(` ${category}: ${counts.before} → ${counts.after} (${changeStr}, ${changePercent > 0 ? '+' : ''}${changePercent}%)`);
|
||||
}
|
||||
|
||||
if (dryRun) {
|
||||
console.log('\n🔍 DRY RUN COMPLETE - No changes saved');
|
||||
console.log(' Run without --dry-run to apply changes\n');
|
||||
} else {
|
||||
console.log('\n✅ RECATEGORIZATION COMPLETE\n');
|
||||
}
|
||||
|
||||
await client.close();
|
||||
}
|
||||
|
||||
// Run
|
||||
main().catch(err => {
|
||||
console.error('\n❌ Fatal error:', err.message);
|
||||
console.error(err.stack);
|
||||
process.exit(1);
|
||||
});
|
||||
239
scripts/translate-all-documents.js
Executable file
239
scripts/translate-all-documents.js
Executable file
|
|
@ -0,0 +1,239 @@
|
|||
#!/usr/bin/env node
|
||||
|
||||
/**
|
||||
* Batch Translation Script
|
||||
*
|
||||
* Translates all public documents to German (DE) and French (FR) using DeepL API
|
||||
*
|
||||
* Usage:
|
||||
* node scripts/translate-all-documents.js [options]
|
||||
*
|
||||
* Options:
|
||||
* --lang=de,fr Target languages (comma-separated, default: de,fr)
|
||||
* --force Overwrite existing translations
|
||||
* --dry-run Preview what would be translated without executing
|
||||
* --limit=N Limit to N documents (for testing)
|
||||
* --slug=document-slug Translate only specific document
|
||||
*
|
||||
* Examples:
|
||||
* node scripts/translate-all-documents.js --dry-run
|
||||
* node scripts/translate-all-documents.js --lang=de --limit=5
|
||||
* node scripts/translate-all-documents.js --slug=getting-started --force
|
||||
*
|
||||
* Requirements:
|
||||
* - DEEPL_API_KEY environment variable must be set
|
||||
* - MongoDB running on localhost:27017
|
||||
*/
|
||||
|
||||
require('dotenv').config();
|
||||
const mongoose = require('mongoose');
|
||||
const Document = require('../src/models/Document.model');
|
||||
const deeplService = require('../src/services/DeepL.service');
|
||||
|
||||
// Parse command line arguments
|
||||
const args = process.argv.slice(2);
|
||||
const options = {
|
||||
targetLangs: ['de', 'fr'],
|
||||
force: false,
|
||||
dryRun: false,
|
||||
limit: null,
|
||||
slug: null
|
||||
};
|
||||
|
||||
args.forEach(arg => {
|
||||
if (arg.startsWith('--lang=')) {
|
||||
options.targetLangs = arg.split('=')[1].split(',');
|
||||
} else if (arg === '--force') {
|
||||
options.force = true;
|
||||
} else if (arg === '--dry-run') {
|
||||
options.dryRun = true;
|
||||
} else if (arg.startsWith('--limit=')) {
|
||||
options.limit = parseInt(arg.split('=')[1]);
|
||||
} else if (arg.startsWith('--slug=')) {
|
||||
options.slug = arg.split('=')[1];
|
||||
}
|
||||
});
|
||||
|
||||
// Statistics
|
||||
const stats = {
|
||||
total: 0,
|
||||
translated: 0,
|
||||
skipped: 0,
|
||||
failed: 0,
|
||||
errors: []
|
||||
};
|
||||
|
||||
async function main() {
|
||||
console.log('═══════════════════════════════════════════════════════════');
|
||||
console.log(' BATCH DOCUMENT TRANSLATION');
|
||||
console.log('═══════════════════════════════════════════════════════════\n');
|
||||
|
||||
// 1. Check DeepL service availability
|
||||
if (!deeplService.isAvailable()) {
|
||||
console.error('❌ ERROR: DeepL API key not configured');
|
||||
console.error(' Set DEEPL_API_KEY environment variable\n');
|
||||
process.exit(1);
|
||||
}
|
||||
|
||||
console.log('✓ DeepL service available');
|
||||
|
||||
// 2. Show usage statistics
|
||||
try {
|
||||
const usage = await deeplService.getUsage();
|
||||
console.log(`✓ DeepL quota: ${usage.character_count.toLocaleString()} / ${usage.character_limit.toLocaleString()} chars (${usage.percentage_used}% used)\n`);
|
||||
} catch (error) {
|
||||
console.warn(`⚠ Could not fetch DeepL usage: ${error.message}\n`);
|
||||
}
|
||||
|
||||
// 3. Connect to MongoDB
|
||||
console.log('📡 Connecting to MongoDB...');
|
||||
await mongoose.connect('mongodb://localhost:27017/tractatus_dev', {
|
||||
serverSelectionTimeoutMS: 5000
|
||||
});
|
||||
console.log('✓ Connected to tractatus_dev\n');
|
||||
|
||||
// 4. Fetch documents to translate
|
||||
console.log('📚 Fetching documents...');
|
||||
|
||||
let documents;
|
||||
if (options.slug) {
|
||||
const doc = await Document.findBySlug(options.slug);
|
||||
documents = doc ? [doc] : [];
|
||||
console.log(`✓ Found document: ${doc?.title || 'Not found'}\n`);
|
||||
} else {
|
||||
const filter = { visibility: 'public' };
|
||||
documents = await Document.list({
|
||||
filter,
|
||||
limit: options.limit || 1000,
|
||||
sort: { order: 1, 'metadata.date_created': -1 }
|
||||
});
|
||||
console.log(`✓ Found ${documents.length} public documents\n`);
|
||||
}
|
||||
|
||||
if (documents.length === 0) {
|
||||
console.log('No documents to translate.\n');
|
||||
await mongoose.disconnect();
|
||||
process.exit(0);
|
||||
}
|
||||
|
||||
stats.total = documents.length;
|
||||
|
||||
// 5. Show translation plan
|
||||
console.log('Translation Plan:');
|
||||
console.log(` Languages: ${options.targetLangs.join(', ')}`);
|
||||
console.log(` Documents: ${documents.length}`);
|
||||
console.log(` Force overwrite: ${options.force ? 'Yes' : 'No'}`);
|
||||
console.log(` Dry run: ${options.dryRun ? 'Yes' : 'No'}\n`);
|
||||
|
||||
if (options.dryRun) {
|
||||
console.log('═══════════════════════════════════════════════════════════');
|
||||
console.log(' DRY RUN - Preview Only');
|
||||
console.log('═══════════════════════════════════════════════════════════\n');
|
||||
}
|
||||
|
||||
// 6. Translate each document
|
||||
for (let i = 0; i < documents.length; i++) {
|
||||
const doc = documents[i];
|
||||
const progress = `[${i + 1}/${documents.length}]`;
|
||||
|
||||
console.log(`${progress} ${doc.title}`);
|
||||
console.log(` Slug: ${doc.slug}`);
|
||||
|
||||
for (const lang of options.targetLangs) {
|
||||
const langUpper = lang.toUpperCase();
|
||||
|
||||
// Check if translation exists
|
||||
const hasTranslation = doc.translations && doc.translations[lang];
|
||||
|
||||
if (hasTranslation && !options.force) {
|
||||
console.log(` ${langUpper}: ⏭ Skipped (exists, use --force to overwrite)`);
|
||||
stats.skipped++;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (options.dryRun) {
|
||||
console.log(` ${langUpper}: 🔍 Would translate (${hasTranslation ? 'overwrite' : 'new'})`);
|
||||
continue;
|
||||
}
|
||||
|
||||
// Perform translation
|
||||
try {
|
||||
console.log(` ${langUpper}: 🔄 Translating...`);
|
||||
|
||||
const translation = await deeplService.translateDocument(doc, lang);
|
||||
|
||||
// Update document
|
||||
await Document.update(doc._id.toString(), {
|
||||
[`translations.${lang}`]: translation
|
||||
});
|
||||
|
||||
console.log(` ${langUpper}: ✓ Complete`);
|
||||
stats.translated++;
|
||||
|
||||
} catch (error) {
|
||||
console.error(` ${langUpper}: ❌ Failed - ${error.message}`);
|
||||
stats.failed++;
|
||||
stats.errors.push({
|
||||
document: doc.slug,
|
||||
language: lang,
|
||||
error: error.message
|
||||
});
|
||||
|
||||
// If quota exceeded, stop
|
||||
if (error.message.includes('quota')) {
|
||||
console.error('\n❌ DeepL quota exceeded. Stopping.\n');
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// Rate limiting: Wait 1 second between translations
|
||||
if (i < documents.length - 1 || lang !== options.targetLangs[options.targetLangs.length - 1]) {
|
||||
await new Promise(resolve => setTimeout(resolve, 1000));
|
||||
}
|
||||
}
|
||||
|
||||
console.log('');
|
||||
}
|
||||
|
||||
// 7. Summary
|
||||
console.log('═══════════════════════════════════════════════════════════');
|
||||
console.log(' TRANSLATION SUMMARY');
|
||||
console.log('═══════════════════════════════════════════════════════════\n');
|
||||
|
||||
if (options.dryRun) {
|
||||
console.log(' Dry run complete - no translations were performed\n');
|
||||
} else {
|
||||
console.log(` Documents processed: ${stats.total}`);
|
||||
console.log(` Translations created: ${stats.translated}`);
|
||||
console.log(` Skipped (existing): ${stats.skipped}`);
|
||||
console.log(` Failed: ${stats.failed}\n`);
|
||||
|
||||
if (stats.errors.length > 0) {
|
||||
console.log(' Errors:');
|
||||
stats.errors.forEach(err => {
|
||||
console.log(` - ${err.document} (${err.language}): ${err.error}`);
|
||||
});
|
||||
console.log('');
|
||||
}
|
||||
|
||||
// Show final usage
|
||||
try {
|
||||
const usage = await deeplService.getUsage();
|
||||
console.log(` DeepL usage: ${usage.character_count.toLocaleString()} / ${usage.character_limit.toLocaleString()} chars (${usage.percentage_used}% used)\n`);
|
||||
} catch (error) {
|
||||
// Ignore
|
||||
}
|
||||
}
|
||||
|
||||
await mongoose.disconnect();
|
||||
console.log('✓ Database disconnected\n');
|
||||
|
||||
process.exit(stats.failed > 0 ? 1 : 0);
|
||||
}
|
||||
|
||||
// Run
|
||||
main().catch(err => {
|
||||
console.error('\n❌ Fatal error:', err.message);
|
||||
console.error(err.stack);
|
||||
process.exit(1);
|
||||
});
|
||||
|
|
@ -74,11 +74,14 @@ async function listDocuments(req, res) {
|
|||
|
||||
/**
|
||||
* Get document by ID or slug
|
||||
* GET /api/documents/:identifier
|
||||
* GET /api/documents/:identifier?lang=de
|
||||
*
|
||||
* Supports i18n: Returns translated version if lang parameter provided
|
||||
*/
|
||||
async function getDocument(req, res) {
|
||||
try {
|
||||
const { identifier } = req.params;
|
||||
const { lang } = req.query; // en, de, fr
|
||||
|
||||
// Try to find by ID first, then by slug
|
||||
let document;
|
||||
|
|
@ -95,9 +98,53 @@ async function getDocument(req, res) {
|
|||
});
|
||||
}
|
||||
|
||||
// If language parameter provided and not English, return translated version
|
||||
if (lang && lang !== 'en') {
|
||||
const supportedLangs = ['de', 'fr'];
|
||||
|
||||
if (!supportedLangs.includes(lang)) {
|
||||
return res.status(400).json({
|
||||
error: 'Bad Request',
|
||||
message: `Unsupported language: ${lang}. Supported: ${supportedLangs.join(', ')}`
|
||||
});
|
||||
}
|
||||
|
||||
// Check if translation exists
|
||||
if (document.translations && document.translations[lang]) {
|
||||
const translation = document.translations[lang];
|
||||
|
||||
// Return document with translated fields
|
||||
const translatedDoc = {
|
||||
...document,
|
||||
title: translation.title || document.title,
|
||||
content_html: translation.content_html || document.content_html,
|
||||
content_markdown: translation.content_markdown || document.content_markdown,
|
||||
toc: translation.toc || document.toc,
|
||||
language: lang,
|
||||
translation_metadata: translation.metadata
|
||||
};
|
||||
|
||||
return res.json({
|
||||
success: true,
|
||||
document: translatedDoc
|
||||
});
|
||||
} else {
|
||||
// Translation not available
|
||||
return res.status(404).json({
|
||||
error: 'Not Found',
|
||||
message: `Translation not available for language: ${lang}`,
|
||||
available_languages: Object.keys(document.translations || {})
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
// Default: Return English version
|
||||
res.json({
|
||||
success: true,
|
||||
document
|
||||
document: {
|
||||
...document,
|
||||
language: 'en'
|
||||
}
|
||||
});
|
||||
|
||||
} catch (error) {
|
||||
|
|
@ -466,6 +513,214 @@ async function listDraftDocuments(req, res) {
|
|||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Translate a document using DeepL (admin only)
|
||||
* POST /api/documents/:id/translate
|
||||
*
|
||||
* Body: { targetLang: 'de' | 'fr', force: false }
|
||||
*/
|
||||
async function translateDocument(req, res) {
|
||||
try {
|
||||
const { id } = req.params;
|
||||
const { targetLang, force = false } = req.body;
|
||||
|
||||
// Validate target language
|
||||
const supportedLangs = ['de', 'fr'];
|
||||
if (!supportedLangs.includes(targetLang)) {
|
||||
return res.status(400).json({
|
||||
error: 'Bad Request',
|
||||
message: `Unsupported target language: ${targetLang}. Supported: ${supportedLangs.join(', ')}`
|
||||
});
|
||||
}
|
||||
|
||||
// Get document
|
||||
const document = await Document.findById(id);
|
||||
if (!document) {
|
||||
return res.status(404).json({
|
||||
error: 'Not Found',
|
||||
message: 'Document not found'
|
||||
});
|
||||
}
|
||||
|
||||
// Check if translation already exists
|
||||
if (!force && document.translations && document.translations[targetLang]) {
|
||||
return res.status(409).json({
|
||||
error: 'Conflict',
|
||||
message: `Translation already exists for ${targetLang}. Use force: true to overwrite.`,
|
||||
existing_translation: document.translations[targetLang].metadata
|
||||
});
|
||||
}
|
||||
|
||||
// Translate using DeepL service
|
||||
const deeplService = require('../services/DeepL.service');
|
||||
|
||||
if (!deeplService.isAvailable()) {
|
||||
return res.status(503).json({
|
||||
error: 'Service Unavailable',
|
||||
message: 'DeepL API is not configured. Set DEEPL_API_KEY environment variable.'
|
||||
});
|
||||
}
|
||||
|
||||
// Perform translation
|
||||
logger.info(`Starting translation of document ${id} to ${targetLang} by ${req.user?.email || 'admin'}`);
|
||||
|
||||
const translation = await deeplService.translateDocument(document, targetLang);
|
||||
|
||||
// Update document with translation
|
||||
const updates = {
|
||||
[`translations.${targetLang}`]: translation
|
||||
};
|
||||
|
||||
await Document.update(id, updates);
|
||||
|
||||
logger.info(`Translation complete: ${id} to ${targetLang}`);
|
||||
|
||||
res.json({
|
||||
success: true,
|
||||
message: `Document translated to ${targetLang} successfully`,
|
||||
translation: {
|
||||
language: targetLang,
|
||||
title: translation.title,
|
||||
metadata: translation.metadata
|
||||
}
|
||||
});
|
||||
|
||||
} catch (error) {
|
||||
logger.error('Translate document error:', error);
|
||||
|
||||
if (error.message.includes('DeepL')) {
|
||||
return res.status(503).json({
|
||||
error: 'Service Unavailable',
|
||||
message: error.message
|
||||
});
|
||||
}
|
||||
|
||||
res.status(500).json({
|
||||
error: 'Internal Server Error',
|
||||
message: 'Translation failed',
|
||||
details: error.message
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Get available translations for a document
|
||||
* GET /api/documents/:identifier/translations
|
||||
*/
|
||||
async function getTranslations(req, res) {
|
||||
try {
|
||||
const { identifier } = req.params;
|
||||
|
||||
// Try to find by ID first, then by slug
|
||||
let document;
|
||||
if (identifier.match(/^[0-9a-fA-F]{24}$/)) {
|
||||
document = await Document.findById(identifier);
|
||||
} else {
|
||||
document = await Document.findBySlug(identifier);
|
||||
}
|
||||
|
||||
if (!document) {
|
||||
return res.status(404).json({
|
||||
error: 'Not Found',
|
||||
message: 'Document not found'
|
||||
});
|
||||
}
|
||||
|
||||
// Build list of available translations
|
||||
const translations = {
|
||||
en: {
|
||||
available: true,
|
||||
title: document.title,
|
||||
metadata: {
|
||||
original: true,
|
||||
version: document.metadata?.version || '1.0'
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
// Add translations if they exist
|
||||
if (document.translations) {
|
||||
Object.keys(document.translations).forEach(lang => {
|
||||
translations[lang] = {
|
||||
available: true,
|
||||
title: document.translations[lang].title,
|
||||
metadata: document.translations[lang].metadata
|
||||
};
|
||||
});
|
||||
}
|
||||
|
||||
res.json({
|
||||
success: true,
|
||||
document_slug: document.slug,
|
||||
document_title: document.title,
|
||||
translations
|
||||
});
|
||||
|
||||
} catch (error) {
|
||||
logger.error('Get translations error:', error);
|
||||
res.status(500).json({
|
||||
error: 'Internal Server Error',
|
||||
message: 'An error occurred'
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Delete a translation (admin only)
|
||||
* DELETE /api/documents/:id/translations/:lang
|
||||
*/
|
||||
async function deleteTranslation(req, res) {
|
||||
try {
|
||||
const { id, lang } = req.params;
|
||||
|
||||
// Validate language
|
||||
if (lang === 'en') {
|
||||
return res.status(400).json({
|
||||
error: 'Bad Request',
|
||||
message: 'Cannot delete original English version'
|
||||
});
|
||||
}
|
||||
|
||||
// Get document
|
||||
const document = await Document.findById(id);
|
||||
if (!document) {
|
||||
return res.status(404).json({
|
||||
error: 'Not Found',
|
||||
message: 'Document not found'
|
||||
});
|
||||
}
|
||||
|
||||
// Check if translation exists
|
||||
if (!document.translations || !document.translations[lang]) {
|
||||
return res.status(404).json({
|
||||
error: 'Not Found',
|
||||
message: `Translation not found for language: ${lang}`
|
||||
});
|
||||
}
|
||||
|
||||
// Remove translation
|
||||
const updates = {
|
||||
[`translations.${lang}`]: null
|
||||
};
|
||||
|
||||
await Document.update(id, updates);
|
||||
|
||||
logger.info(`Translation deleted: ${id} (${lang}) by ${req.user?.email || 'admin'}`);
|
||||
|
||||
res.json({
|
||||
success: true,
|
||||
message: `Translation for ${lang} deleted successfully`
|
||||
});
|
||||
|
||||
} catch (error) {
|
||||
logger.error('Delete translation error:', error);
|
||||
res.status(500).json({
|
||||
error: 'Internal Server Error',
|
||||
message: 'An error occurred'
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
module.exports = {
|
||||
listDocuments,
|
||||
getDocument,
|
||||
|
|
@ -476,5 +731,8 @@ module.exports = {
|
|||
listArchivedDocuments,
|
||||
publishDocument,
|
||||
unpublishDocument,
|
||||
listDraftDocuments
|
||||
listDraftDocuments,
|
||||
translateDocument,
|
||||
getTranslations,
|
||||
deleteTranslation
|
||||
};
|
||||
|
|
|
|||
|
|
@ -47,6 +47,11 @@ router.get('/', (req, res, next) => {
|
|||
next();
|
||||
}, asyncHandler(documentsController.listDocuments));
|
||||
|
||||
// GET /api/documents/:identifier/translations (public)
|
||||
router.get('/:identifier/translations',
|
||||
asyncHandler(documentsController.getTranslations)
|
||||
);
|
||||
|
||||
// GET /api/documents/:identifier (ID or slug)
|
||||
router.get('/:identifier',
|
||||
asyncHandler(documentsController.getDocument)
|
||||
|
|
@ -99,4 +104,23 @@ router.post('/:id/unpublish',
|
|||
asyncHandler(documentsController.unpublishDocument)
|
||||
);
|
||||
|
||||
// POST /api/documents/:id/translate (admin only)
|
||||
// Translate document to target language using DeepL
|
||||
router.post('/:id/translate',
|
||||
authenticateToken,
|
||||
requireRole('admin'),
|
||||
validateObjectId('id'),
|
||||
validateRequired(['targetLang']),
|
||||
asyncHandler(documentsController.translateDocument)
|
||||
);
|
||||
|
||||
// DELETE /api/documents/:id/translations/:lang (admin only)
|
||||
// Delete a translation
|
||||
router.delete('/:id/translations/:lang',
|
||||
authenticateToken,
|
||||
requireRole('admin'),
|
||||
validateObjectId('id'),
|
||||
asyncHandler(documentsController.deleteTranslation)
|
||||
);
|
||||
|
||||
module.exports = router;
|
||||
|
|
|
|||
283
src/services/DeepL.service.js
Normal file
283
src/services/DeepL.service.js
Normal file
|
|
@ -0,0 +1,283 @@
|
|||
/**
|
||||
* DeepL Translation Service
|
||||
*
|
||||
* Professional translation API for German and French translations
|
||||
* Preserves markdown formatting and handles large documents
|
||||
*
|
||||
* API Docs: https://www.deepl.com/docs-api
|
||||
*/
|
||||
|
||||
const axios = require('axios');
|
||||
|
||||
class DeepLService {
|
||||
constructor() {
|
||||
this.apiKey = process.env.DEEPL_API_KEY;
|
||||
this.apiUrl = process.env.DEEPL_API_URL || 'https://api-free.deepl.com/v2';
|
||||
|
||||
if (!this.apiKey) {
|
||||
console.warn('[DeepL] API key not configured. Translation service disabled.');
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Check if DeepL service is available
|
||||
*/
|
||||
isAvailable() {
|
||||
return !!this.apiKey;
|
||||
}
|
||||
|
||||
/**
|
||||
* Translate text to target language
|
||||
*
|
||||
* @param {string} text - Text to translate (supports markdown)
|
||||
* @param {string} targetLang - Target language code (DE, FR)
|
||||
* @param {object} options - Translation options
|
||||
* @returns {Promise<{text: string, detectedSourceLang: string}>}
|
||||
*/
|
||||
async translate(text, targetLang, options = {}) {
|
||||
if (!this.isAvailable()) {
|
||||
throw new Error('DeepL API key not configured');
|
||||
}
|
||||
|
||||
if (!text || text.trim().length === 0) {
|
||||
return { text: '', detectedSourceLang: 'EN' };
|
||||
}
|
||||
|
||||
const {
|
||||
sourceLang = 'EN',
|
||||
preserveFormatting = true,
|
||||
formality = 'default', // 'default', 'more', 'less'
|
||||
tagHandling = 'html' // 'html' or 'xml' - preserves markup
|
||||
} = options;
|
||||
|
||||
try {
|
||||
const response = await axios.post(
|
||||
`${this.apiUrl}/translate`,
|
||||
{
|
||||
text: [text],
|
||||
source_lang: sourceLang,
|
||||
target_lang: targetLang.toUpperCase(),
|
||||
preserve_formatting: preserveFormatting ? '1' : '0',
|
||||
formality: formality,
|
||||
tag_handling: tagHandling
|
||||
},
|
||||
{
|
||||
headers: {
|
||||
'Authorization': `DeepL-Auth-Key ${this.apiKey}`,
|
||||
'Content-Type': 'application/json'
|
||||
},
|
||||
timeout: 30000 // 30 second timeout
|
||||
}
|
||||
);
|
||||
|
||||
if (response.data && response.data.translations && response.data.translations.length > 0) {
|
||||
return {
|
||||
text: response.data.translations[0].text,
|
||||
detectedSourceLang: response.data.translations[0].detected_source_language
|
||||
};
|
||||
}
|
||||
|
||||
throw new Error('Invalid response from DeepL API');
|
||||
|
||||
} catch (error) {
|
||||
if (error.response) {
|
||||
// DeepL API error
|
||||
const status = error.response.status;
|
||||
const message = error.response.data?.message || 'Unknown error';
|
||||
|
||||
if (status === 403) {
|
||||
throw new Error('DeepL API authentication failed. Check API key.');
|
||||
} else if (status === 456) {
|
||||
throw new Error('DeepL quota exceeded. Upgrade plan or wait for reset.');
|
||||
} else if (status === 429) {
|
||||
throw new Error('DeepL rate limit exceeded. Please retry later.');
|
||||
} else {
|
||||
throw new Error(`DeepL API error (${status}): ${message}`);
|
||||
}
|
||||
}
|
||||
|
||||
throw new Error(`Translation failed: ${error.message}`);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Translate markdown document while preserving structure
|
||||
*
|
||||
* @param {string} markdown - Markdown content
|
||||
* @param {string} targetLang - Target language (DE, FR)
|
||||
* @returns {Promise<string>} Translated markdown
|
||||
*/
|
||||
async translateMarkdown(markdown, targetLang) {
|
||||
if (!markdown || markdown.trim().length === 0) {
|
||||
return '';
|
||||
}
|
||||
|
||||
// DeepL's tag_handling: 'html' preserves markdown well
|
||||
// But we can also split by code blocks to protect them
|
||||
const result = await this.translate(markdown, targetLang, {
|
||||
tagHandling: 'html',
|
||||
preserveFormatting: true
|
||||
});
|
||||
|
||||
return result.text;
|
||||
}
|
||||
|
||||
/**
|
||||
* Translate HTML content while preserving markup
|
||||
*
|
||||
* @param {string} html - HTML content
|
||||
* @param {string} targetLang - Target language (DE, FR)
|
||||
* @returns {Promise<string>} Translated HTML
|
||||
*/
|
||||
async translateHTML(html, targetLang) {
|
||||
if (!html || html.trim().length === 0) {
|
||||
return '';
|
||||
}
|
||||
|
||||
const result = await this.translate(html, targetLang, {
|
||||
tagHandling: 'html',
|
||||
preserveFormatting: true
|
||||
});
|
||||
|
||||
return result.text;
|
||||
}
|
||||
|
||||
/**
|
||||
* Translate a full document object
|
||||
* Translates: title, content_markdown, content_html, and toc
|
||||
*
|
||||
* @param {object} document - Document object
|
||||
* @param {string} targetLang - Target language (DE, FR)
|
||||
* @returns {Promise<object>} Translation object ready for storage
|
||||
*/
|
||||
async translateDocument(document, targetLang) {
|
||||
if (!this.isAvailable()) {
|
||||
throw new Error('DeepL API key not configured');
|
||||
}
|
||||
|
||||
console.log(`[DeepL] Translating document "${document.title}" to ${targetLang}...`);
|
||||
|
||||
const translation = {
|
||||
title: '',
|
||||
content_markdown: '',
|
||||
content_html: '',
|
||||
toc: [],
|
||||
metadata: {
|
||||
translated_by: 'deepl',
|
||||
translated_at: new Date(),
|
||||
reviewed: false,
|
||||
source_version: document.metadata?.version || '1.0'
|
||||
}
|
||||
};
|
||||
|
||||
try {
|
||||
// 1. Translate title
|
||||
const titleResult = await this.translate(document.title, targetLang);
|
||||
translation.title = titleResult.text;
|
||||
console.log(`[DeepL] ✓ Title translated`);
|
||||
|
||||
// 2. Translate markdown content (preferred source)
|
||||
if (document.content_markdown) {
|
||||
translation.content_markdown = await this.translateMarkdown(
|
||||
document.content_markdown,
|
||||
targetLang
|
||||
);
|
||||
console.log(`[DeepL] ✓ Markdown translated (${translation.content_markdown.length} chars)`);
|
||||
}
|
||||
|
||||
// 3. Translate HTML content
|
||||
if (document.content_html) {
|
||||
translation.content_html = await this.translateHTML(
|
||||
document.content_html,
|
||||
targetLang
|
||||
);
|
||||
console.log(`[DeepL] ✓ HTML translated (${translation.content_html.length} chars)`);
|
||||
}
|
||||
|
||||
// 4. Translate table of contents
|
||||
if (document.toc && document.toc.length > 0) {
|
||||
translation.toc = await Promise.all(
|
||||
document.toc.map(async (item) => {
|
||||
const translatedTitle = await this.translate(item.title, targetLang);
|
||||
return {
|
||||
...item,
|
||||
title: translatedTitle.text
|
||||
};
|
||||
})
|
||||
);
|
||||
console.log(`[DeepL] ✓ ToC translated (${translation.toc.length} items)`);
|
||||
}
|
||||
|
||||
console.log(`[DeepL] ✓ Document translation complete`);
|
||||
return translation;
|
||||
|
||||
} catch (error) {
|
||||
console.error(`[DeepL] Translation failed:`, error.message);
|
||||
throw error;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Get usage statistics from DeepL API
|
||||
*
|
||||
* @returns {Promise<object>} Usage stats {character_count, character_limit}
|
||||
*/
|
||||
async getUsage() {
|
||||
if (!this.isAvailable()) {
|
||||
throw new Error('DeepL API key not configured');
|
||||
}
|
||||
|
||||
try {
|
||||
const response = await axios.get(
|
||||
`${this.apiUrl}/usage`,
|
||||
{
|
||||
headers: {
|
||||
'Authorization': `DeepL-Auth-Key ${this.apiKey}`
|
||||
}
|
||||
}
|
||||
);
|
||||
|
||||
return {
|
||||
character_count: response.data.character_count,
|
||||
character_limit: response.data.character_limit,
|
||||
percentage_used: (response.data.character_count / response.data.character_limit * 100).toFixed(2)
|
||||
};
|
||||
} catch (error) {
|
||||
throw new Error(`Failed to get usage stats: ${error.message}`);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Get list of supported target languages
|
||||
*
|
||||
* @returns {Promise<Array>} List of language objects
|
||||
*/
|
||||
async getSupportedLanguages() {
|
||||
if (!this.isAvailable()) {
|
||||
throw new Error('DeepL API key not configured');
|
||||
}
|
||||
|
||||
try {
|
||||
const response = await axios.get(
|
||||
`${this.apiUrl}/languages`,
|
||||
{
|
||||
headers: {
|
||||
'Authorization': `DeepL-Auth-Key ${this.apiKey}`
|
||||
},
|
||||
params: {
|
||||
type: 'target'
|
||||
}
|
||||
}
|
||||
);
|
||||
|
||||
return response.data;
|
||||
} catch (error) {
|
||||
throw new Error(`Failed to get supported languages: ${error.message}`);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Singleton instance
|
||||
const deeplService = new DeepLService();
|
||||
|
||||
module.exports = deeplService;
|
||||
Loading…
Add table
Reference in a new issue