This commit includes two major improvements to the documentation system: ## 1. Section Recategorization (UX Fix) **Problem**: 64 sections (24%) were incorrectly marked as "critical" and displayed at the bottom of documents, burying important foundational content. **Solution**: - Created intelligent recategorization script analyzing titles, excerpts, and document context - Reduced "critical" from 64 → 2 sections (97% reduction) - Properly categorized content by purpose: - Conceptual: 63 → 138 (+119%) - foundations, "why this matters" - Practical: 3 → 46 (+1433%) - how-to guides, examples - Technical: 111 → 50 (-55%) - true implementation details **UI Improvements**: - Reordered category display: Critical → Conceptual → Practical → Technical → Reference - Changed Critical color from amber to red for better visual distinction - All 22 documents recategorized (173 sections updated) ## 2. i18n Infrastructure (Phase 2) **Backend**: - DeepL API integration service with quota management and error handling - Translation API routes (GET /api/documents/:slug?lang=de, POST /api/documents/:id/translate) - Document model already supports translations field (no schema changes) **Frontend**: - docs-app.js enhanced with language detection and URL parameter support - Automatic fallback to English when translation unavailable - Integration with existing i18n-simple.js system **Scripts**: - translate-all-documents.js: Batch translation workflow (dry-run support) - audit-section-categories.js: Category distribution analysis **URL Strategy**: Query parameter approach (?lang=de, ?lang=fr) **Status**: Backend complete, ready for DeepL API key configuration **Files Modified**: - Frontend: document-cards.js, docs-app.js - Backend: documents.controller.js, documents.routes.js, DeepL.service.js - Scripts: 3 new governance/i18n scripts **Database**: 173 sections recategorized via script (already applied) 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
358 lines
11 KiB
JavaScript
Executable file
358 lines
11 KiB
JavaScript
Executable file
#!/usr/bin/env node
|
|
|
|
/**
|
|
* Smart Section Recategorization Script
|
|
*
|
|
* Analyzes section content and intelligently reassigns categories based on:
|
|
* - Section titles
|
|
* - Excerpts
|
|
* - Position in document
|
|
* - Content keywords
|
|
*
|
|
* Categories:
|
|
* - critical: Warnings, blockers, security issues, mandatory prerequisites
|
|
* - conceptual: Foundational ideas, "why this matters", core principles
|
|
* - practical: How-to guides, examples, step-by-step instructions
|
|
* - technical: Architecture, implementation details, code examples
|
|
* - reference: Appendices, glossaries, further reading, contact info
|
|
*
|
|
* Usage:
|
|
* node scripts/recategorize-sections.js --dry-run # Preview changes
|
|
* node scripts/recategorize-sections.js # Apply changes
|
|
* node scripts/recategorize-sections.js --doc=slug # Single document
|
|
*/
|
|
|
|
const { MongoClient } = require('mongodb');
|
|
|
|
// Parse arguments
|
|
const args = process.argv.slice(2);
|
|
const dryRun = args.includes('--dry-run');
|
|
const specificDoc = args.find(a => a.startsWith('--doc='))?.split('=')[1];
|
|
|
|
// Statistics
|
|
const stats = {
|
|
totalDocuments: 0,
|
|
totalSections: 0,
|
|
changed: 0,
|
|
unchanged: 0,
|
|
byCategory: {
|
|
critical: { before: 0, after: 0 },
|
|
conceptual: { before: 0, after: 0 },
|
|
practical: { before: 0, after: 0 },
|
|
technical: { before: 0, after: 0 },
|
|
reference: { before: 0, after: 0 }
|
|
},
|
|
changes: []
|
|
};
|
|
|
|
/**
|
|
* Categorization rules based on content analysis
|
|
*/
|
|
const RULES = {
|
|
critical: {
|
|
keywords: [
|
|
'security', 'warning', 'caution', 'danger', 'breaking change',
|
|
'must read first', 'before you begin', 'important notice',
|
|
'critical prerequisite', 'blockers', 'requirements'
|
|
],
|
|
titlePatterns: [
|
|
/^(security|warning|caution|critical)/i,
|
|
/breaking changes?/i,
|
|
/requirements?$/i,
|
|
/^before (you|starting)/i
|
|
],
|
|
// Reserve critical for ACTUAL warnings, not "why this matters"
|
|
exclude: [
|
|
'why this matters', 'who should', 'invitation', 'bottom line',
|
|
'key finding', 'introduction'
|
|
]
|
|
},
|
|
|
|
conceptual: {
|
|
keywords: [
|
|
'understanding', 'concept', 'principle', 'theory', 'foundation',
|
|
'why', 'what is', 'introduction', 'overview', 'core idea',
|
|
'key finding', 'philosophy', 'paradigm', 'mental model',
|
|
'thinking', 'perspective'
|
|
],
|
|
titlePatterns: [
|
|
/^(understanding|why|what is|introduction|overview)/i,
|
|
/concept(s)?$/i,
|
|
/principle(s)?$/i,
|
|
/foundation(s)?$/i,
|
|
/key (finding|idea)/i,
|
|
/bottom line/i,
|
|
/who should/i
|
|
]
|
|
},
|
|
|
|
practical: {
|
|
keywords: [
|
|
'guide', 'example', 'step', 'how to', 'tutorial', 'walkthrough',
|
|
'use case', 'scenario', 'getting started', 'quick start',
|
|
'implementation guide', 'hands-on', 'practical', 'workflow'
|
|
],
|
|
titlePatterns: [
|
|
/^(how to|getting started|quick start|guide)/i,
|
|
/step[- ]by[- ]step/i,
|
|
/example(s)?$/i,
|
|
/use case(s)?$/i,
|
|
/walkthrough/i,
|
|
/workflow/i
|
|
]
|
|
},
|
|
|
|
technical: {
|
|
keywords: [
|
|
'architecture', 'implementation', 'api', 'code', 'technical',
|
|
'development', 'engineering', 'system', 'design pattern',
|
|
'algorithm', 'data structure', 'performance', 'optimization'
|
|
],
|
|
titlePatterns: [
|
|
/^(architecture|technical|implementation|api|code)/i,
|
|
/design$/i,
|
|
/specification$/i,
|
|
/^system/i,
|
|
/performance/i,
|
|
/optimization/i
|
|
]
|
|
},
|
|
|
|
reference: {
|
|
keywords: [
|
|
'reference', 'appendix', 'glossary', 'contact', 'resources',
|
|
'further reading', 'bibliography', 'links', 'related work',
|
|
'acknowledgment', 'citation'
|
|
],
|
|
titlePatterns: [
|
|
/^(reference|appendix|glossary|contact|resources)/i,
|
|
/further reading/i,
|
|
/related (work|resources)/i,
|
|
/^(acknowledgment|citation)/i
|
|
]
|
|
}
|
|
};
|
|
|
|
/**
|
|
* Analyze section and determine best category
|
|
*/
|
|
function categorizeSection(section, docTitle, sectionIndex, totalSections) {
|
|
const title = (section.title || '').toLowerCase();
|
|
const excerpt = (section.excerpt || '').toLowerCase();
|
|
const content = (section.content_html || '').toLowerCase();
|
|
const combined = `${title} ${excerpt}`;
|
|
|
|
const scores = {
|
|
critical: 0,
|
|
conceptual: 0,
|
|
practical: 0,
|
|
technical: 0,
|
|
reference: 0
|
|
};
|
|
|
|
// Score each category based on rules
|
|
for (const [category, rules] of Object.entries(RULES)) {
|
|
// Check exclude patterns first (for critical)
|
|
if (rules.exclude) {
|
|
const hasExclude = rules.exclude.some(pattern =>
|
|
combined.includes(pattern.toLowerCase())
|
|
);
|
|
if (hasExclude && category === 'critical') {
|
|
scores[category] = -100; // Strong penalty
|
|
continue;
|
|
}
|
|
}
|
|
|
|
// Check title patterns (strong signal)
|
|
if (rules.titlePatterns) {
|
|
const titleMatch = rules.titlePatterns.some(pattern => pattern.test(title));
|
|
if (titleMatch) {
|
|
scores[category] += 50;
|
|
}
|
|
}
|
|
|
|
// Check keywords in title (medium signal)
|
|
const titleKeywords = rules.keywords.filter(kw => title.includes(kw));
|
|
scores[category] += titleKeywords.length * 20;
|
|
|
|
// Check keywords in excerpt (weak signal)
|
|
const excerptKeywords = rules.keywords.filter(kw => excerpt.includes(kw));
|
|
scores[category] += excerptKeywords.length * 5;
|
|
}
|
|
|
|
// Position-based adjustments
|
|
if (sectionIndex === 0) {
|
|
// First section usually conceptual or critical prerequisite
|
|
if (title.includes('introduction') || title.includes('overview')) {
|
|
scores.conceptual += 30;
|
|
}
|
|
} else if (sectionIndex === totalSections - 1) {
|
|
// Last section often reference
|
|
scores.reference += 10;
|
|
}
|
|
|
|
// Document context adjustments
|
|
const docTitleLower = docTitle.toLowerCase();
|
|
if (docTitleLower.includes('case study') || docTitleLower.includes('incident')) {
|
|
// Case studies are usually conceptual, not critical
|
|
scores.conceptual += 20;
|
|
scores.critical -= 30;
|
|
}
|
|
if (docTitleLower.includes('implementation') || docTitleLower.includes('guide')) {
|
|
scores.practical += 15;
|
|
}
|
|
if (docTitleLower.includes('api') || docTitleLower.includes('technical')) {
|
|
scores.technical += 15;
|
|
}
|
|
|
|
// Find category with highest score
|
|
const sortedScores = Object.entries(scores).sort((a, b) => b[1] - a[1]);
|
|
const bestCategory = sortedScores[0][0];
|
|
const bestScore = sortedScores[0][1];
|
|
|
|
// If all scores are very low, default to conceptual
|
|
if (bestScore < 10) {
|
|
return 'conceptual';
|
|
}
|
|
|
|
return bestCategory;
|
|
}
|
|
|
|
/**
|
|
* Main function
|
|
*/
|
|
async function main() {
|
|
console.log('═══════════════════════════════════════════════════════════');
|
|
console.log(' SECTION RECATEGORIZATION');
|
|
console.log('═══════════════════════════════════════════════════════════\n');
|
|
|
|
if (dryRun) {
|
|
console.log('🔍 DRY RUN MODE - No changes will be saved\n');
|
|
}
|
|
|
|
if (specificDoc) {
|
|
console.log(`📄 Processing single document: ${specificDoc}\n`);
|
|
}
|
|
|
|
// Connect to MongoDB
|
|
console.log('📡 Connecting to MongoDB...');
|
|
const client = await MongoClient.connect('mongodb://localhost:27017/tractatus_dev');
|
|
const db = client.db();
|
|
const collection = db.collection('documents');
|
|
|
|
// Fetch documents
|
|
const filter = { visibility: 'public' };
|
|
if (specificDoc) {
|
|
filter.slug = specificDoc;
|
|
}
|
|
|
|
const docs = await collection.find(filter).sort({ order: 1 }).toArray();
|
|
console.log(`✓ Found ${docs.length} document(s)\n`);
|
|
|
|
stats.totalDocuments = docs.length;
|
|
|
|
// Process each document
|
|
for (const doc of docs) {
|
|
if (!doc.sections || doc.sections.length === 0) {
|
|
console.log(`${doc.title}: No sections (skipping)\n`);
|
|
continue;
|
|
}
|
|
|
|
console.log(`\n${'='.repeat(70)}`);
|
|
console.log(`${doc.title}`);
|
|
console.log(`${'='.repeat(70)}\n`);
|
|
|
|
const updates = [];
|
|
stats.totalSections += doc.sections.length;
|
|
|
|
doc.sections.forEach((section, index) => {
|
|
const oldCategory = section.category || 'conceptual';
|
|
const newCategory = categorizeSection(section, doc.title, index, doc.sections.length);
|
|
|
|
stats.byCategory[oldCategory].before++;
|
|
stats.byCategory[newCategory].after++;
|
|
|
|
if (oldCategory !== newCategory) {
|
|
stats.changed++;
|
|
console.log(`[${index + 1}/${doc.sections.length}] ${section.title}`);
|
|
console.log(` ${oldCategory} → ${newCategory}`);
|
|
|
|
updates.push({
|
|
sectionIndex: index,
|
|
oldCategory,
|
|
newCategory,
|
|
title: section.title
|
|
});
|
|
|
|
stats.changes.push({
|
|
document: doc.title,
|
|
section: section.title,
|
|
from: oldCategory,
|
|
to: newCategory
|
|
});
|
|
} else {
|
|
stats.unchanged++;
|
|
}
|
|
});
|
|
|
|
// Apply updates if not dry run
|
|
if (!dryRun && updates.length > 0) {
|
|
const updateOperations = updates.map(update => {
|
|
return {
|
|
updateOne: {
|
|
filter: { _id: doc._id },
|
|
update: {
|
|
$set: {
|
|
[`sections.${update.sectionIndex}.category`]: update.newCategory
|
|
}
|
|
}
|
|
}
|
|
};
|
|
});
|
|
|
|
await collection.bulkWrite(updateOperations);
|
|
console.log(`\n✓ Applied ${updates.length} changes to database`);
|
|
} else if (updates.length > 0) {
|
|
console.log(`\n🔍 Would apply ${updates.length} changes (dry-run)`);
|
|
} else {
|
|
console.log(`\n✓ No changes needed`);
|
|
}
|
|
}
|
|
|
|
// Summary
|
|
console.log('\n\n═══════════════════════════════════════════════════════════');
|
|
console.log(' RECATEGORIZATION SUMMARY');
|
|
console.log('═══════════════════════════════════════════════════════════\n');
|
|
|
|
console.log(`Documents processed: ${stats.totalDocuments}`);
|
|
console.log(`Total sections: ${stats.totalSections}`);
|
|
console.log(`Changed: ${stats.changed} (${Math.round(stats.changed / stats.totalSections * 100)}%)`);
|
|
console.log(`Unchanged: ${stats.unchanged} (${Math.round(stats.unchanged / stats.totalSections * 100)}%)\n`);
|
|
|
|
console.log('Category changes:');
|
|
for (const [category, counts] of Object.entries(stats.byCategory)) {
|
|
const change = counts.after - counts.before;
|
|
const changeStr = change > 0 ? `+${change}` : change.toString();
|
|
const changePercent = counts.before > 0
|
|
? Math.round((change / counts.before) * 100)
|
|
: 0;
|
|
|
|
console.log(` ${category}: ${counts.before} → ${counts.after} (${changeStr}, ${changePercent > 0 ? '+' : ''}${changePercent}%)`);
|
|
}
|
|
|
|
if (dryRun) {
|
|
console.log('\n🔍 DRY RUN COMPLETE - No changes saved');
|
|
console.log(' Run without --dry-run to apply changes\n');
|
|
} else {
|
|
console.log('\n✅ RECATEGORIZATION COMPLETE\n');
|
|
}
|
|
|
|
await client.close();
|
|
}
|
|
|
|
// Run
|
|
main().catch(err => {
|
|
console.error('\n❌ Fatal error:', err.message);
|
|
console.error(err.stack);
|
|
process.exit(1);
|
|
});
|