feat: add German and French glossary translations via DeepL
Created translations using DeepL API: - GLOSSARY-DE.md (67KB, German translation) - GLOSSARY-FR.md (71KB, French translation) Added translate-glossary.py script for automated translation with: - Frontmatter preservation - Chunked translation for large documents - DeepL API integration Updated generate-public-pdfs.js to include: - tractatus-agentic-governance-system-glossary-of-terms-deutsch - tractatus-agentic-governance-system-glossary-of-terms-franais Both documents migrated to database and PDFs generated locally. Production deployment will generate PDFs on server. Note: Port numbers (27027/27017) are part of canonical "27027 Incident" educational example, not actual infrastructure exposure. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
parent
b65ca7eca0
commit
f0db6052ad
4 changed files with 323 additions and 1 deletions
87
docs/markdown/GLOSSARY-DE.md
Normal file
87
docs/markdown/GLOSSARY-DE.md
Normal file
File diff suppressed because one or more lines are too long
87
docs/markdown/GLOSSARY-FR.md
Normal file
87
docs/markdown/GLOSSARY-FR.md
Normal file
File diff suppressed because one or more lines are too long
|
|
@ -40,8 +40,10 @@ const PUBLIC_DOCS = [
|
||||||
'tractatus-ai-safety-framework-core-values-and-principles',
|
'tractatus-ai-safety-framework-core-values-and-principles',
|
||||||
'organizational-theory-foundations',
|
'organizational-theory-foundations',
|
||||||
|
|
||||||
// Reference Documentation (1)
|
// Reference Documentation (3)
|
||||||
'tractatus-agentic-governance-system-glossary-of-terms',
|
'tractatus-agentic-governance-system-glossary-of-terms',
|
||||||
|
'tractatus-agentic-governance-system-glossary-of-terms-deutsch',
|
||||||
|
'tractatus-agentic-governance-system-glossary-of-terms-franais',
|
||||||
|
|
||||||
// Business Leadership (1)
|
// Business Leadership (1)
|
||||||
'business-case-tractatus-framework'
|
'business-case-tractatus-framework'
|
||||||
|
|
|
||||||
146
scripts/translate-glossary.py
Executable file
146
scripts/translate-glossary.py
Executable file
|
|
@ -0,0 +1,146 @@
|
||||||
|
#!/usr/bin/env python3
|
||||||
|
"""
|
||||||
|
Translate Glossary using DeepL API
|
||||||
|
Handles large documents by chunking content
|
||||||
|
"""
|
||||||
|
|
||||||
|
import os
|
||||||
|
import re
|
||||||
|
import sys
|
||||||
|
import deepl
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
# Load API key from environment
|
||||||
|
API_KEY = os.getenv('DEEPL_API_KEY')
|
||||||
|
if not API_KEY:
|
||||||
|
print("Error: DEEPL_API_KEY not found in environment")
|
||||||
|
sys.exit(1)
|
||||||
|
|
||||||
|
translator = deepl.Translator(API_KEY)
|
||||||
|
|
||||||
|
def extract_frontmatter(content):
|
||||||
|
"""Extract YAML frontmatter and content"""
|
||||||
|
match = re.match(r'^---\n(.*?)\n---\n(.*)$', content, re.DOTALL)
|
||||||
|
if match:
|
||||||
|
return match.group(1), match.group(2)
|
||||||
|
return None, content
|
||||||
|
|
||||||
|
def update_frontmatter(frontmatter, lang_code, lang_name):
|
||||||
|
"""Update frontmatter for translated version"""
|
||||||
|
lines = frontmatter.split('\n')
|
||||||
|
updated = []
|
||||||
|
for line in lines:
|
||||||
|
if line.startswith('title:'):
|
||||||
|
# Keep original title, add language suffix
|
||||||
|
title = line.split(':', 1)[1].strip()
|
||||||
|
updated.append(f'title: {title} ({lang_name})')
|
||||||
|
elif line.startswith('slug:'):
|
||||||
|
updated.append(f'slug: glossary-{lang_code}')
|
||||||
|
elif line.startswith('modified:'):
|
||||||
|
updated.append('modified: 2025-11-01')
|
||||||
|
else:
|
||||||
|
updated.append(line)
|
||||||
|
return '\n'.join(updated)
|
||||||
|
|
||||||
|
def chunk_text(text, max_chunk_size=50000):
|
||||||
|
"""Split text into chunks at paragraph boundaries"""
|
||||||
|
paragraphs = text.split('\n\n')
|
||||||
|
chunks = []
|
||||||
|
current_chunk = []
|
||||||
|
current_size = 0
|
||||||
|
|
||||||
|
for para in paragraphs:
|
||||||
|
para_size = len(para)
|
||||||
|
if current_size + para_size > max_chunk_size and current_chunk:
|
||||||
|
chunks.append('\n\n'.join(current_chunk))
|
||||||
|
current_chunk = [para]
|
||||||
|
current_size = para_size
|
||||||
|
else:
|
||||||
|
current_chunk.append(para)
|
||||||
|
current_size += para_size
|
||||||
|
|
||||||
|
if current_chunk:
|
||||||
|
chunks.append('\n\n'.join(current_chunk))
|
||||||
|
|
||||||
|
return chunks
|
||||||
|
|
||||||
|
def translate_content(content, target_lang):
|
||||||
|
"""Translate content in chunks"""
|
||||||
|
print(f" Translating to {target_lang}...")
|
||||||
|
print(f" Content length: {len(content)} characters")
|
||||||
|
|
||||||
|
chunks = chunk_text(content, max_chunk_size=50000)
|
||||||
|
print(f" Split into {len(chunks)} chunks")
|
||||||
|
|
||||||
|
translated_chunks = []
|
||||||
|
for i, chunk in enumerate(chunks, 1):
|
||||||
|
print(f" Translating chunk {i}/{len(chunks)}...", end='', flush=True)
|
||||||
|
try:
|
||||||
|
result = translator.translate_text(
|
||||||
|
chunk,
|
||||||
|
target_lang=target_lang,
|
||||||
|
preserve_formatting=True,
|
||||||
|
tag_handling='html'
|
||||||
|
)
|
||||||
|
translated_chunks.append(result.text)
|
||||||
|
print(" ✓")
|
||||||
|
except Exception as e:
|
||||||
|
print(f" ✗ Error: {e}")
|
||||||
|
raise
|
||||||
|
|
||||||
|
return '\n\n'.join(translated_chunks)
|
||||||
|
|
||||||
|
def translate_glossary(input_file, output_file, lang_code, lang_name):
|
||||||
|
"""Translate glossary file"""
|
||||||
|
print(f"\n{'='*60}")
|
||||||
|
print(f"Translating Glossary to {lang_name} ({lang_code})")
|
||||||
|
print(f"{'='*60}\n")
|
||||||
|
|
||||||
|
# Read input file
|
||||||
|
print(f"Reading: {input_file}")
|
||||||
|
with open(input_file, 'r', encoding='utf-8') as f:
|
||||||
|
content = f.read()
|
||||||
|
|
||||||
|
# Extract frontmatter and content
|
||||||
|
frontmatter, main_content = extract_frontmatter(content)
|
||||||
|
|
||||||
|
if frontmatter:
|
||||||
|
print("Frontmatter extracted")
|
||||||
|
updated_frontmatter = update_frontmatter(frontmatter, lang_code, lang_name)
|
||||||
|
else:
|
||||||
|
print("No frontmatter found")
|
||||||
|
updated_frontmatter = None
|
||||||
|
|
||||||
|
# Translate content
|
||||||
|
translated_content = translate_content(main_content, lang_code.upper())
|
||||||
|
|
||||||
|
# Reassemble
|
||||||
|
if updated_frontmatter:
|
||||||
|
final_content = f"---\n{updated_frontmatter}\n---\n{translated_content}"
|
||||||
|
else:
|
||||||
|
final_content = translated_content
|
||||||
|
|
||||||
|
# Write output
|
||||||
|
print(f"\nWriting: {output_file}")
|
||||||
|
with open(output_file, 'w', encoding='utf-8') as f:
|
||||||
|
f.write(final_content)
|
||||||
|
|
||||||
|
print(f"✓ Translation complete!")
|
||||||
|
print(f" Output: {output_file}")
|
||||||
|
print(f" Size: {len(final_content)} characters\n")
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
base_dir = Path(__file__).parent.parent
|
||||||
|
input_file = base_dir / 'docs/markdown/GLOSSARY.md'
|
||||||
|
|
||||||
|
# Translate to German
|
||||||
|
output_de = base_dir / 'docs/markdown/GLOSSARY-DE.md'
|
||||||
|
translate_glossary(input_file, output_de, 'de', 'Deutsch')
|
||||||
|
|
||||||
|
# Translate to French
|
||||||
|
output_fr = base_dir / 'docs/markdown/GLOSSARY-FR.md'
|
||||||
|
translate_glossary(input_file, output_fr, 'fr', 'Français')
|
||||||
|
|
||||||
|
print(f"\n{'='*60}")
|
||||||
|
print("All translations complete!")
|
||||||
|
print(f"{'='*60}")
|
||||||
Loading…
Add table
Reference in a new issue