From 2b1820632824b20d5c28a5a3e060b912812b076e Mon Sep 17 00:00:00 2001 From: TheFlow Date: Sat, 1 Nov 2025 11:37:02 +1300 Subject: [PATCH] feat: fix glossary structure with embedded translations and sections MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Created fix-glossary-structure.js to properly structure glossary - Parses h2 headings into sections for card view (16 sections) - Embeds German and French translations (not separate documents) - Deletes duplicate/separate glossary documents - Normalizes slug to lowercase 'glossary' Translation structure matches existing documents (introduction, etc): - translations.de.{title, content_markdown, content_html, toc, metadata} - translations.fr.{title, content_markdown, content_html, toc, metadata} Fixes: - Card view now works (sections present) - Language toggle works (embedded translations) - One glossary entry in docs list (not three) - All three PDFs accessible šŸ¤– Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude --- scripts/fix-glossary-structure.js | 208 ++++++++++++++++++++++++++++++ 1 file changed, 208 insertions(+) create mode 100644 scripts/fix-glossary-structure.js diff --git a/scripts/fix-glossary-structure.js b/scripts/fix-glossary-structure.js new file mode 100644 index 00000000..76f436f5 --- /dev/null +++ b/scripts/fix-glossary-structure.js @@ -0,0 +1,208 @@ +/** + * Fix Glossary Structure + * - Add embedded translations (not separate documents) + * - Add sections for card view + * - Clean up separate glossary-de/glossary-fr documents + */ + +require('dotenv').config(); +const { MongoClient } = require('mongodb'); +const fs = require('fs').promises; +const path = require('path'); +const { markdownToHtml, extractTOC } = require('../src/utils/markdown.util'); + +// Parse frontmatter +function extractFrontmatter(content) { + const frontMatterRegex = /^---\n([\s\S]*?)\n---\n([\s\S]*)$/; + const match = content.match(frontMatterRegex); + + if (!match) return { metadata: {}, content }; + + const frontMatterText = match[1]; + const remainingContent = match[2]; + + const metadata = {}; + frontMatterText.split('\n').forEach(line => { + const [key, ...valueParts] = line.split(':'); + if (key && valueParts.length > 0) { + const value = valueParts.join(':').trim(); + metadata[key.trim()] = value.replace(/^["']|["']$/g, ''); + } + }); + + return { metadata, content: remainingContent }; +} + +// Build sections from h2 headings +function buildSections(content, htmlContent) { + const sections = []; + const lines = content.split('\n'); + let currentSection = null; + let currentContent = []; + + for (const line of lines) { + const h2Match = line.match(/^## (.+)$/); + + if (h2Match) { + // Save previous section + if (currentSection) { + const sectionMarkdown = currentContent.join('\n'); + currentSection.content_markdown = sectionMarkdown; + currentSection.content_html = markdownToHtml(sectionMarkdown); + sections.push(currentSection); + } + + // Start new section + currentSection = { + title: h2Match[1], + slug: h2Match[1].toLowerCase() + .replace(/[^\w\s-]/g, '') + .replace(/\s+/g, '-'), + order: sections.length + 1 + }; + currentContent = []; + } else if (currentSection) { + currentContent.push(line); + } + } + + // Save last section + if (currentSection) { + const sectionMarkdown = currentContent.join('\n'); + currentSection.content_markdown = sectionMarkdown; + currentSection.content_html = markdownToHtml(sectionMarkdown); + sections.push(currentSection); + } + + return sections; +} + +async function run() { + const mongoUri = process.env.MONGODB_URI || 'mongodb://localhost:27017'; + const dbName = process.env.MONGODB_DB || 'tractatus_dev'; + const client = new MongoClient(mongoUri); + + try { + await client.connect(); + const db = client.db(dbName); + const collection = db.collection('documents'); + + console.log('═══════════════════════════════════════════════════════════'); + console.log(' FIXING GLOSSARY STRUCTURE'); + console.log('═══════════════════════════════════════════════════════════\n'); + + // Read markdown files + const baseDir = path.join(__dirname, '../docs/markdown'); + const glossaryEN = await fs.readFile(path.join(baseDir, 'GLOSSARY.md'), 'utf8'); + const glossaryDE = await fs.readFile(path.join(baseDir, 'GLOSSARY-DE.md'), 'utf8'); + const glossaryFR = await fs.readFile(path.join(baseDir, 'GLOSSARY-FR.md'), 'utf8'); + + // Parse each file + const en = extractFrontmatter(glossaryEN); + const de = extractFrontmatter(glossaryDE); + const fr = extractFrontmatter(glossaryFR); + + console.log('āœ“ Parsed markdown files\n'); + + // Build English sections + const sections = buildSections(en.content, markdownToHtml(en.content)); + console.log(`āœ“ Built ${sections.length} sections for English\n`); + + // Build translation objects + const translations = { + de: { + title: de.metadata.title, + content_markdown: de.content, + content_html: markdownToHtml(de.content), + toc: extractTOC(de.content), + metadata: { + translated_by: 'deepl', + translated_at: new Date(), + reviewed: false, + source_version: '1.1' + } + }, + fr: { + title: fr.metadata.title, + content_markdown: fr.content, + content_html: markdownToHtml(fr.content), + toc: extractTOC(fr.content), + metadata: { + translated_by: 'deepl', + translated_at: new Date(), + reviewed: false, + source_version: '1.1' + } + } + }; + + console.log('āœ“ Built translation objects\n'); + + // Find main glossary document (could be 'glossary', 'GLOSSARY', or long slug) + const existingDoc = await collection.findOne({ + $or: [ + { slug: 'glossary' }, + { slug: 'GLOSSARY' }, + { slug: /^tractatus-agentic-governance-system-glossary-of-terms$/ } + ] + }); + + if (!existingDoc) { + console.error('āœ— Could not find main glossary document'); + process.exit(1); + } + + console.log(`āœ“ Found glossary document: ${existingDoc.slug}\n`); + + // Update main glossary document + const result = await collection.updateOne( + { _id: existingDoc._id }, + { + $set: { + slug: 'glossary', // Normalize to lowercase + sections: sections, + translations: translations, + content_html: markdownToHtml(en.content), + content_markdown: en.content, + toc: extractTOC(en.content), + updated_at: new Date() + } + } + ); + + console.log(`āœ“ Updated glossary document (${result.modifiedCount} modified)\n`); + + // Delete separate translation documents and old duplicates + const deleteResult = await collection.deleteMany({ + $and: [ + { _id: { $ne: existingDoc._id } }, // Don't delete the main one + { + $or: [ + { slug: 'glossary-de' }, + { slug: 'glossary-fr' }, + { slug: 'GLOSSARY' }, + { slug: /^tractatus-agentic-governance-system-glossary-of-terms/ } + ] + } + ] + }); + + console.log(`āœ“ Deleted ${deleteResult.deletedCount} duplicate/separate translation documents\n`); + + console.log('═══════════════════════════════════════════════════════════'); + console.log(' SUMMARY'); + console.log('═══════════════════════════════════════════════════════════\n'); + console.log(`Sections created: ${sections.length}`); + console.log(`Translations embedded: 2 (de, fr)`); + console.log(`Separate docs deleted: ${deleteResult.deletedCount}`); + console.log('\nāœ… Glossary structure fixed!\n'); + + } catch (error) { + console.error('Error:', error); + process.exit(1); + } finally { + await client.close(); + } +} + +run();