feat: fix glossary structure with embedded translations and sections

- Created fix-glossary-structure.js to properly structure glossary - Parses h2 headings into sections for card view (16 sections) - Embeds German and French translations (not separate documents) - Deletes duplicate/separate glossary documents - Normalizes slug to lowercase 'glossary' Translation structure matches existing documents (introduction, etc): - translations.de.{title, content_markdown, content_html, toc, metadata} - translations.fr.{title, content_markdown, content_html, toc, metadata} Fixes: - Card view now works (sections present) - Language toggle works (embedded translations) - One glossary entry in docs list (not three) - All three PDFs accessible 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
2025-11-01 11:37:02 +13:00 · 2025-11-01 11:37:02 +13:00 · 9691dcabc6
commit 9691dcabc6
parent 4a7df8f63c
1 changed files with 208 additions and 0 deletions
--- a/scripts/fix-glossary-structure.js
+++ b/scripts/fix-glossary-structure.js
@ -0,0 +1,208 @@
+/**
+ * Fix Glossary Structure
+ * - Add embedded translations (not separate documents)
+ * - Add sections for card view
+ * - Clean up separate glossary-de/glossary-fr documents
+ */
+
+require('dotenv').config();
+const { MongoClient } = require('mongodb');
+const fs = require('fs').promises;
+const path = require('path');
+const { markdownToHtml, extractTOC } = require('../src/utils/markdown.util');
+
+// Parse frontmatter
+function extractFrontmatter(content) {
+  const frontMatterRegex = /^---\n([\s\S]*?)\n---\n([\s\S]*)$/;
+  const match = content.match(frontMatterRegex);
+
+  if (!match) return { metadata: {}, content };
+
+  const frontMatterText = match[1];
+  const remainingContent = match[2];
+
+  const metadata = {};
+  frontMatterText.split('\n').forEach(line => {
+    const [key, ...valueParts] = line.split(':');
+    if (key && valueParts.length > 0) {
+      const value = valueParts.join(':').trim();
+      metadata[key.trim()] = value.replace(/^["']|["']$/g, '');
+    }
+  });
+
+  return { metadata, content: remainingContent };
+}
+
+// Build sections from h2 headings
+function buildSections(content, htmlContent) {
+  const sections = [];
+  const lines = content.split('\n');
+  let currentSection = null;
+  let currentContent = [];
+
+  for (const line of lines) {
+    const h2Match = line.match(/^## (.+)$/);
+
+    if (h2Match) {
+      // Save previous section
+      if (currentSection) {
+        const sectionMarkdown = currentContent.join('\n');
+        currentSection.content_markdown = sectionMarkdown;
+        currentSection.content_html = markdownToHtml(sectionMarkdown);
+        sections.push(currentSection);
+      }
+
+      // Start new section
+      currentSection = {
+        title: h2Match[1],
+        slug: h2Match[1].toLowerCase()
+          .replace(/[^\w\s-]/g, '')
+          .replace(/\s+/g, '-'),
+        order: sections.length + 1
+      };
+      currentContent = [];
+    } else if (currentSection) {
+      currentContent.push(line);
+    }
+  }
+
+  // Save last section
+  if (currentSection) {
+    const sectionMarkdown = currentContent.join('\n');
+    currentSection.content_markdown = sectionMarkdown;
+    currentSection.content_html = markdownToHtml(sectionMarkdown);
+    sections.push(currentSection);
+  }
+
+  return sections;
+}
+
+async function run() {
+  const mongoUri = process.env.MONGODB_URI || 'mongodb://localhost:27017';
+  const dbName = process.env.MONGODB_DB || 'tractatus_dev';
+  const client = new MongoClient(mongoUri);
+
+  try {
+    await client.connect();
+    const db = client.db(dbName);
+    const collection = db.collection('documents');
+
+    console.log('═══════════════════════════════════════════════════════════');
+    console.log('  FIXING GLOSSARY STRUCTURE');
+    console.log('═══════════════════════════════════════════════════════════\n');
+
+    // Read markdown files
+    const baseDir = path.join(__dirname, '../docs/markdown');
+    const glossaryEN = await fs.readFile(path.join(baseDir, 'GLOSSARY.md'), 'utf8');
+    const glossaryDE = await fs.readFile(path.join(baseDir, 'GLOSSARY-DE.md'), 'utf8');
+    const glossaryFR = await fs.readFile(path.join(baseDir, 'GLOSSARY-FR.md'), 'utf8');
+
+    // Parse each file
+    const en = extractFrontmatter(glossaryEN);
+    const de = extractFrontmatter(glossaryDE);
+    const fr = extractFrontmatter(glossaryFR);
+
+    console.log('✓ Parsed markdown files\n');
+
+    // Build English sections
+    const sections = buildSections(en.content, markdownToHtml(en.content));
+    console.log(`✓ Built ${sections.length} sections for English\n`);
+
+    // Build translation objects
+    const translations = {
+      de: {
+        title: de.metadata.title,
+        content_markdown: de.content,
+        content_html: markdownToHtml(de.content),
+        toc: extractTOC(de.content),
+        metadata: {
+          translated_by: 'deepl',
+          translated_at: new Date(),
+          reviewed: false,
+          source_version: '1.1'
+        }
+      },
+      fr: {
+        title: fr.metadata.title,
+        content_markdown: fr.content,
+        content_html: markdownToHtml(fr.content),
+        toc: extractTOC(fr.content),
+        metadata: {
+          translated_by: 'deepl',
+          translated_at: new Date(),
+          reviewed: false,
+          source_version: '1.1'
+        }
+      }
+    };
+
+    console.log('✓ Built translation objects\n');
+
+    // Find main glossary document (could be 'glossary', 'GLOSSARY', or long slug)
+    const existingDoc = await collection.findOne({
+      $or: [
+        { slug: 'glossary' },
+        { slug: 'GLOSSARY' },
+        { slug: /^tractatus-agentic-governance-system-glossary-of-terms$/ }
+      ]
+    });
+
+    if (!existingDoc) {
+      console.error('✗ Could not find main glossary document');
+      process.exit(1);
+    }
+
+    console.log(`✓ Found glossary document: ${existingDoc.slug}\n`);
+
+    // Update main glossary document
+    const result = await collection.updateOne(
+      { _id: existingDoc._id },
+      {
+        $set: {
+          slug: 'glossary', // Normalize to lowercase
+          sections: sections,
+          translations: translations,
+          content_html: markdownToHtml(en.content),
+          content_markdown: en.content,
+          toc: extractTOC(en.content),
+          updated_at: new Date()
+        }
+      }
+    );
+
+    console.log(`✓ Updated glossary document (${result.modifiedCount} modified)\n`);
+
+    // Delete separate translation documents and old duplicates
+    const deleteResult = await collection.deleteMany({
+      $and: [
+        { _id: { $ne: existingDoc._id } }, // Don't delete the main one
+        {
+          $or: [
+            { slug: 'glossary-de' },
+            { slug: 'glossary-fr' },
+            { slug: 'GLOSSARY' },
+            { slug: /^tractatus-agentic-governance-system-glossary-of-terms/ }
+          ]
+        }
+      ]
+    });
+
+    console.log(`✓ Deleted ${deleteResult.deletedCount} duplicate/separate translation documents\n`);
+
+    console.log('═══════════════════════════════════════════════════════════');
+    console.log('  SUMMARY');
+    console.log('═══════════════════════════════════════════════════════════\n');
+    console.log(`Sections created: ${sections.length}`);
+    console.log(`Translations embedded: 2 (de, fr)`);
+    console.log(`Separate docs deleted: ${deleteResult.deletedCount}`);
+    console.log('\n✅ Glossary structure fixed!\n');
+
+  } catch (error) {
+    console.error('Error:', error);
+    process.exit(1);
+  } finally {
+    await client.close();
+  }
+}
+
+run();