feat: fix glossary structure with embedded translations and sections
- Created fix-glossary-structure.js to properly structure glossary
- Parses h2 headings into sections for card view (16 sections)
- Embeds German and French translations (not separate documents)
- Deletes duplicate/separate glossary documents
- Normalizes slug to lowercase 'glossary'
Translation structure matches existing documents (introduction, etc):
- translations.de.{title, content_markdown, content_html, toc, metadata}
- translations.fr.{title, content_markdown, content_html, toc, metadata}
Fixes:
- Card view now works (sections present)
- Language toggle works (embedded translations)
- One glossary entry in docs list (not three)
- All three PDFs accessible
🤖 Generated with [Claude Code](https://claude.com/claude-code)
Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
parent
4a7df8f63c
commit
9691dcabc6
1 changed files with 208 additions and 0 deletions
208
scripts/fix-glossary-structure.js
Normal file
208
scripts/fix-glossary-structure.js
Normal file
|
|
@ -0,0 +1,208 @@
|
|||
/**
|
||||
* Fix Glossary Structure
|
||||
* - Add embedded translations (not separate documents)
|
||||
* - Add sections for card view
|
||||
* - Clean up separate glossary-de/glossary-fr documents
|
||||
*/
|
||||
|
||||
require('dotenv').config();
|
||||
const { MongoClient } = require('mongodb');
|
||||
const fs = require('fs').promises;
|
||||
const path = require('path');
|
||||
const { markdownToHtml, extractTOC } = require('../src/utils/markdown.util');
|
||||
|
||||
// Parse frontmatter
|
||||
function extractFrontmatter(content) {
|
||||
const frontMatterRegex = /^---\n([\s\S]*?)\n---\n([\s\S]*)$/;
|
||||
const match = content.match(frontMatterRegex);
|
||||
|
||||
if (!match) return { metadata: {}, content };
|
||||
|
||||
const frontMatterText = match[1];
|
||||
const remainingContent = match[2];
|
||||
|
||||
const metadata = {};
|
||||
frontMatterText.split('\n').forEach(line => {
|
||||
const [key, ...valueParts] = line.split(':');
|
||||
if (key && valueParts.length > 0) {
|
||||
const value = valueParts.join(':').trim();
|
||||
metadata[key.trim()] = value.replace(/^["']|["']$/g, '');
|
||||
}
|
||||
});
|
||||
|
||||
return { metadata, content: remainingContent };
|
||||
}
|
||||
|
||||
// Build sections from h2 headings
|
||||
function buildSections(content, htmlContent) {
|
||||
const sections = [];
|
||||
const lines = content.split('\n');
|
||||
let currentSection = null;
|
||||
let currentContent = [];
|
||||
|
||||
for (const line of lines) {
|
||||
const h2Match = line.match(/^## (.+)$/);
|
||||
|
||||
if (h2Match) {
|
||||
// Save previous section
|
||||
if (currentSection) {
|
||||
const sectionMarkdown = currentContent.join('\n');
|
||||
currentSection.content_markdown = sectionMarkdown;
|
||||
currentSection.content_html = markdownToHtml(sectionMarkdown);
|
||||
sections.push(currentSection);
|
||||
}
|
||||
|
||||
// Start new section
|
||||
currentSection = {
|
||||
title: h2Match[1],
|
||||
slug: h2Match[1].toLowerCase()
|
||||
.replace(/[^\w\s-]/g, '')
|
||||
.replace(/\s+/g, '-'),
|
||||
order: sections.length + 1
|
||||
};
|
||||
currentContent = [];
|
||||
} else if (currentSection) {
|
||||
currentContent.push(line);
|
||||
}
|
||||
}
|
||||
|
||||
// Save last section
|
||||
if (currentSection) {
|
||||
const sectionMarkdown = currentContent.join('\n');
|
||||
currentSection.content_markdown = sectionMarkdown;
|
||||
currentSection.content_html = markdownToHtml(sectionMarkdown);
|
||||
sections.push(currentSection);
|
||||
}
|
||||
|
||||
return sections;
|
||||
}
|
||||
|
||||
async function run() {
|
||||
const mongoUri = process.env.MONGODB_URI || 'mongodb://localhost:27017';
|
||||
const dbName = process.env.MONGODB_DB || 'tractatus_dev';
|
||||
const client = new MongoClient(mongoUri);
|
||||
|
||||
try {
|
||||
await client.connect();
|
||||
const db = client.db(dbName);
|
||||
const collection = db.collection('documents');
|
||||
|
||||
console.log('═══════════════════════════════════════════════════════════');
|
||||
console.log(' FIXING GLOSSARY STRUCTURE');
|
||||
console.log('═══════════════════════════════════════════════════════════\n');
|
||||
|
||||
// Read markdown files
|
||||
const baseDir = path.join(__dirname, '../docs/markdown');
|
||||
const glossaryEN = await fs.readFile(path.join(baseDir, 'GLOSSARY.md'), 'utf8');
|
||||
const glossaryDE = await fs.readFile(path.join(baseDir, 'GLOSSARY-DE.md'), 'utf8');
|
||||
const glossaryFR = await fs.readFile(path.join(baseDir, 'GLOSSARY-FR.md'), 'utf8');
|
||||
|
||||
// Parse each file
|
||||
const en = extractFrontmatter(glossaryEN);
|
||||
const de = extractFrontmatter(glossaryDE);
|
||||
const fr = extractFrontmatter(glossaryFR);
|
||||
|
||||
console.log('✓ Parsed markdown files\n');
|
||||
|
||||
// Build English sections
|
||||
const sections = buildSections(en.content, markdownToHtml(en.content));
|
||||
console.log(`✓ Built ${sections.length} sections for English\n`);
|
||||
|
||||
// Build translation objects
|
||||
const translations = {
|
||||
de: {
|
||||
title: de.metadata.title,
|
||||
content_markdown: de.content,
|
||||
content_html: markdownToHtml(de.content),
|
||||
toc: extractTOC(de.content),
|
||||
metadata: {
|
||||
translated_by: 'deepl',
|
||||
translated_at: new Date(),
|
||||
reviewed: false,
|
||||
source_version: '1.1'
|
||||
}
|
||||
},
|
||||
fr: {
|
||||
title: fr.metadata.title,
|
||||
content_markdown: fr.content,
|
||||
content_html: markdownToHtml(fr.content),
|
||||
toc: extractTOC(fr.content),
|
||||
metadata: {
|
||||
translated_by: 'deepl',
|
||||
translated_at: new Date(),
|
||||
reviewed: false,
|
||||
source_version: '1.1'
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
console.log('✓ Built translation objects\n');
|
||||
|
||||
// Find main glossary document (could be 'glossary', 'GLOSSARY', or long slug)
|
||||
const existingDoc = await collection.findOne({
|
||||
$or: [
|
||||
{ slug: 'glossary' },
|
||||
{ slug: 'GLOSSARY' },
|
||||
{ slug: /^tractatus-agentic-governance-system-glossary-of-terms$/ }
|
||||
]
|
||||
});
|
||||
|
||||
if (!existingDoc) {
|
||||
console.error('✗ Could not find main glossary document');
|
||||
process.exit(1);
|
||||
}
|
||||
|
||||
console.log(`✓ Found glossary document: ${existingDoc.slug}\n`);
|
||||
|
||||
// Update main glossary document
|
||||
const result = await collection.updateOne(
|
||||
{ _id: existingDoc._id },
|
||||
{
|
||||
$set: {
|
||||
slug: 'glossary', // Normalize to lowercase
|
||||
sections: sections,
|
||||
translations: translations,
|
||||
content_html: markdownToHtml(en.content),
|
||||
content_markdown: en.content,
|
||||
toc: extractTOC(en.content),
|
||||
updated_at: new Date()
|
||||
}
|
||||
}
|
||||
);
|
||||
|
||||
console.log(`✓ Updated glossary document (${result.modifiedCount} modified)\n`);
|
||||
|
||||
// Delete separate translation documents and old duplicates
|
||||
const deleteResult = await collection.deleteMany({
|
||||
$and: [
|
||||
{ _id: { $ne: existingDoc._id } }, // Don't delete the main one
|
||||
{
|
||||
$or: [
|
||||
{ slug: 'glossary-de' },
|
||||
{ slug: 'glossary-fr' },
|
||||
{ slug: 'GLOSSARY' },
|
||||
{ slug: /^tractatus-agentic-governance-system-glossary-of-terms/ }
|
||||
]
|
||||
}
|
||||
]
|
||||
});
|
||||
|
||||
console.log(`✓ Deleted ${deleteResult.deletedCount} duplicate/separate translation documents\n`);
|
||||
|
||||
console.log('═══════════════════════════════════════════════════════════');
|
||||
console.log(' SUMMARY');
|
||||
console.log('═══════════════════════════════════════════════════════════\n');
|
||||
console.log(`Sections created: ${sections.length}`);
|
||||
console.log(`Translations embedded: 2 (de, fr)`);
|
||||
console.log(`Separate docs deleted: ${deleteResult.deletedCount}`);
|
||||
console.log('\n✅ Glossary structure fixed!\n');
|
||||
|
||||
} catch (error) {
|
||||
console.error('Error:', error);
|
||||
process.exit(1);
|
||||
} finally {
|
||||
await client.close();
|
||||
}
|
||||
}
|
||||
|
||||
run();
|
||||
Loading…
Add table
Reference in a new issue