feat: add German and French glossary translations via DeepL

Created translations using DeepL API: - GLOSSARY-DE.md (67KB, German translation) - GLOSSARY-FR.md (71KB, French translation) Added translate-glossary.py script for automated translation with: - Frontmatter preservation - Chunked translation for large documents - DeepL API integration Updated generate-public-pdfs.js to include: - tractatus-agentic-governance-system-glossary-of-terms-deutsch - tractatus-agentic-governance-system-glossary-of-terms-franais Both documents migrated to database and PDFs generated locally. Production deployment will generate PDFs on server. Note: Port numbers (27027/27017) are part of canonical "27027 Incident" educational example, not actual infrastructure exposure. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
2025-11-01 09:53:25 +13:00 · 2025-11-01 09:53:25 +13:00 · fc033c5607
commit fc033c5607
parent c15f06255a
4 changed files with 323 additions and 1 deletions
--- a/docs/markdown/GLOSSARY-DE.md
+++ b/docs/markdown/GLOSSARY-DE.md
--- a/docs/markdown/GLOSSARY-FR.md
+++ b/docs/markdown/GLOSSARY-FR.md
--- a/scripts/generate-public-pdfs.js
+++ b/scripts/generate-public-pdfs.js
@ -40,8 +40,10 @@ const PUBLIC_DOCS = [
  'tractatus-ai-safety-framework-core-values-and-principles',
  'organizational-theory-foundations',

-  // Reference Documentation (1)
+  // Reference Documentation (3)
  'tractatus-agentic-governance-system-glossary-of-terms',
+  'tractatus-agentic-governance-system-glossary-of-terms-deutsch',
+  'tractatus-agentic-governance-system-glossary-of-terms-franais',

  // Business Leadership (1)
  'business-case-tractatus-framework'
--- a/scripts/translate-glossary.py
+++ b/scripts/translate-glossary.py
@ -0,0 +1,146 @@
+#!/usr/bin/env python3
+"""
+Translate Glossary using DeepL API
+Handles large documents by chunking content
+"""
+
+import os
+import re
+import sys
+import deepl
+from pathlib import Path
+
+# Load API key from environment
+API_KEY = os.getenv('DEEPL_API_KEY')
+if not API_KEY:
+    print("Error: DEEPL_API_KEY not found in environment")
+    sys.exit(1)
+
+translator = deepl.Translator(API_KEY)
+
+def extract_frontmatter(content):
+    """Extract YAML frontmatter and content"""
+    match = re.match(r'^---\n(.*?)\n---\n(.*)$', content, re.DOTALL)
+    if match:
+        return match.group(1), match.group(2)
+    return None, content
+
+def update_frontmatter(frontmatter, lang_code, lang_name):
+    """Update frontmatter for translated version"""
+    lines = frontmatter.split('\n')
+    updated = []
+    for line in lines:
+        if line.startswith('title:'):
+            # Keep original title, add language suffix
+            title = line.split(':', 1)[1].strip()
+            updated.append(f'title: {title} ({lang_name})')
+        elif line.startswith('slug:'):
+            updated.append(f'slug: glossary-{lang_code}')
+        elif line.startswith('modified:'):
+            updated.append('modified: 2025-11-01')
+        else:
+            updated.append(line)
+    return '\n'.join(updated)
+
+def chunk_text(text, max_chunk_size=50000):
+    """Split text into chunks at paragraph boundaries"""
+    paragraphs = text.split('\n\n')
+    chunks = []
+    current_chunk = []
+    current_size = 0
+
+    for para in paragraphs:
+        para_size = len(para)
+        if current_size + para_size > max_chunk_size and current_chunk:
+            chunks.append('\n\n'.join(current_chunk))
+            current_chunk = [para]
+            current_size = para_size
+        else:
+            current_chunk.append(para)
+            current_size += para_size
+
+    if current_chunk:
+        chunks.append('\n\n'.join(current_chunk))
+
+    return chunks
+
+def translate_content(content, target_lang):
+    """Translate content in chunks"""
+    print(f"  Translating to {target_lang}...")
+    print(f"  Content length: {len(content)} characters")
+
+    chunks = chunk_text(content, max_chunk_size=50000)
+    print(f"  Split into {len(chunks)} chunks")
+
+    translated_chunks = []
+    for i, chunk in enumerate(chunks, 1):
+        print(f"  Translating chunk {i}/{len(chunks)}...", end='', flush=True)
+        try:
+            result = translator.translate_text(
+                chunk,
+                target_lang=target_lang,
+                preserve_formatting=True,
+                tag_handling='html'
+            )
+            translated_chunks.append(result.text)
+            print(" ✓")
+        except Exception as e:
+            print(f" ✗ Error: {e}")
+            raise
+
+    return '\n\n'.join(translated_chunks)
+
+def translate_glossary(input_file, output_file, lang_code, lang_name):
+    """Translate glossary file"""
+    print(f"\n{'='*60}")
+    print(f"Translating Glossary to {lang_name} ({lang_code})")
+    print(f"{'='*60}\n")
+
+    # Read input file
+    print(f"Reading: {input_file}")
+    with open(input_file, 'r', encoding='utf-8') as f:
+        content = f.read()
+
+    # Extract frontmatter and content
+    frontmatter, main_content = extract_frontmatter(content)
+
+    if frontmatter:
+        print("Frontmatter extracted")
+        updated_frontmatter = update_frontmatter(frontmatter, lang_code, lang_name)
+    else:
+        print("No frontmatter found")
+        updated_frontmatter = None
+
+    # Translate content
+    translated_content = translate_content(main_content, lang_code.upper())
+
+    # Reassemble
+    if updated_frontmatter:
+        final_content = f"---\n{updated_frontmatter}\n---\n{translated_content}"
+    else:
+        final_content = translated_content
+
+    # Write output
+    print(f"\nWriting: {output_file}")
+    with open(output_file, 'w', encoding='utf-8') as f:
+        f.write(final_content)
+
+    print(f"✓ Translation complete!")
+    print(f"  Output: {output_file}")
+    print(f"  Size: {len(final_content)} characters\n")
+
+if __name__ == '__main__':
+    base_dir = Path(__file__).parent.parent
+    input_file = base_dir / 'docs/markdown/GLOSSARY.md'
+
+    # Translate to German
+    output_de = base_dir / 'docs/markdown/GLOSSARY-DE.md'
+    translate_glossary(input_file, output_de, 'de', 'Deutsch')
+
+    # Translate to French
+    output_fr = base_dir / 'docs/markdown/GLOSSARY-FR.md'
+    translate_glossary(input_file, output_fr, 'fr', 'Français')
+
+    print(f"\n{'='*60}")
+    print("All translations complete!")
+    print(f"{'='*60}")