feat: add German and French glossary translations via DeepL
Created translations using DeepL API: - GLOSSARY-DE.md (67KB, German translation) - GLOSSARY-FR.md (71KB, French translation) Added translate-glossary.py script for automated translation with: - Frontmatter preservation - Chunked translation for large documents - DeepL API integration Updated generate-public-pdfs.js to include: - tractatus-agentic-governance-system-glossary-of-terms-deutsch - tractatus-agentic-governance-system-glossary-of-terms-franais Both documents migrated to database and PDFs generated locally. Production deployment will generate PDFs on server. Note: Port numbers (27027/27017) are part of canonical "27027 Incident" educational example, not actual infrastructure exposure. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
parent
c15f06255a
commit
fc033c5607
4 changed files with 323 additions and 1 deletions
87
docs/markdown/GLOSSARY-DE.md
Normal file
87
docs/markdown/GLOSSARY-DE.md
Normal file
File diff suppressed because one or more lines are too long
87
docs/markdown/GLOSSARY-FR.md
Normal file
87
docs/markdown/GLOSSARY-FR.md
Normal file
File diff suppressed because one or more lines are too long
|
|
@ -40,8 +40,10 @@ const PUBLIC_DOCS = [
|
|||
'tractatus-ai-safety-framework-core-values-and-principles',
|
||||
'organizational-theory-foundations',
|
||||
|
||||
// Reference Documentation (1)
|
||||
// Reference Documentation (3)
|
||||
'tractatus-agentic-governance-system-glossary-of-terms',
|
||||
'tractatus-agentic-governance-system-glossary-of-terms-deutsch',
|
||||
'tractatus-agentic-governance-system-glossary-of-terms-franais',
|
||||
|
||||
// Business Leadership (1)
|
||||
'business-case-tractatus-framework'
|
||||
|
|
|
|||
146
scripts/translate-glossary.py
Executable file
146
scripts/translate-glossary.py
Executable file
|
|
@ -0,0 +1,146 @@
|
|||
#!/usr/bin/env python3
|
||||
"""
|
||||
Translate Glossary using DeepL API
|
||||
Handles large documents by chunking content
|
||||
"""
|
||||
|
||||
import os
|
||||
import re
|
||||
import sys
|
||||
import deepl
|
||||
from pathlib import Path
|
||||
|
||||
# Load API key from environment
|
||||
API_KEY = os.getenv('DEEPL_API_KEY')
|
||||
if not API_KEY:
|
||||
print("Error: DEEPL_API_KEY not found in environment")
|
||||
sys.exit(1)
|
||||
|
||||
translator = deepl.Translator(API_KEY)
|
||||
|
||||
def extract_frontmatter(content):
|
||||
"""Extract YAML frontmatter and content"""
|
||||
match = re.match(r'^---\n(.*?)\n---\n(.*)$', content, re.DOTALL)
|
||||
if match:
|
||||
return match.group(1), match.group(2)
|
||||
return None, content
|
||||
|
||||
def update_frontmatter(frontmatter, lang_code, lang_name):
|
||||
"""Update frontmatter for translated version"""
|
||||
lines = frontmatter.split('\n')
|
||||
updated = []
|
||||
for line in lines:
|
||||
if line.startswith('title:'):
|
||||
# Keep original title, add language suffix
|
||||
title = line.split(':', 1)[1].strip()
|
||||
updated.append(f'title: {title} ({lang_name})')
|
||||
elif line.startswith('slug:'):
|
||||
updated.append(f'slug: glossary-{lang_code}')
|
||||
elif line.startswith('modified:'):
|
||||
updated.append('modified: 2025-11-01')
|
||||
else:
|
||||
updated.append(line)
|
||||
return '\n'.join(updated)
|
||||
|
||||
def chunk_text(text, max_chunk_size=50000):
|
||||
"""Split text into chunks at paragraph boundaries"""
|
||||
paragraphs = text.split('\n\n')
|
||||
chunks = []
|
||||
current_chunk = []
|
||||
current_size = 0
|
||||
|
||||
for para in paragraphs:
|
||||
para_size = len(para)
|
||||
if current_size + para_size > max_chunk_size and current_chunk:
|
||||
chunks.append('\n\n'.join(current_chunk))
|
||||
current_chunk = [para]
|
||||
current_size = para_size
|
||||
else:
|
||||
current_chunk.append(para)
|
||||
current_size += para_size
|
||||
|
||||
if current_chunk:
|
||||
chunks.append('\n\n'.join(current_chunk))
|
||||
|
||||
return chunks
|
||||
|
||||
def translate_content(content, target_lang):
|
||||
"""Translate content in chunks"""
|
||||
print(f" Translating to {target_lang}...")
|
||||
print(f" Content length: {len(content)} characters")
|
||||
|
||||
chunks = chunk_text(content, max_chunk_size=50000)
|
||||
print(f" Split into {len(chunks)} chunks")
|
||||
|
||||
translated_chunks = []
|
||||
for i, chunk in enumerate(chunks, 1):
|
||||
print(f" Translating chunk {i}/{len(chunks)}...", end='', flush=True)
|
||||
try:
|
||||
result = translator.translate_text(
|
||||
chunk,
|
||||
target_lang=target_lang,
|
||||
preserve_formatting=True,
|
||||
tag_handling='html'
|
||||
)
|
||||
translated_chunks.append(result.text)
|
||||
print(" ✓")
|
||||
except Exception as e:
|
||||
print(f" ✗ Error: {e}")
|
||||
raise
|
||||
|
||||
return '\n\n'.join(translated_chunks)
|
||||
|
||||
def translate_glossary(input_file, output_file, lang_code, lang_name):
|
||||
"""Translate glossary file"""
|
||||
print(f"\n{'='*60}")
|
||||
print(f"Translating Glossary to {lang_name} ({lang_code})")
|
||||
print(f"{'='*60}\n")
|
||||
|
||||
# Read input file
|
||||
print(f"Reading: {input_file}")
|
||||
with open(input_file, 'r', encoding='utf-8') as f:
|
||||
content = f.read()
|
||||
|
||||
# Extract frontmatter and content
|
||||
frontmatter, main_content = extract_frontmatter(content)
|
||||
|
||||
if frontmatter:
|
||||
print("Frontmatter extracted")
|
||||
updated_frontmatter = update_frontmatter(frontmatter, lang_code, lang_name)
|
||||
else:
|
||||
print("No frontmatter found")
|
||||
updated_frontmatter = None
|
||||
|
||||
# Translate content
|
||||
translated_content = translate_content(main_content, lang_code.upper())
|
||||
|
||||
# Reassemble
|
||||
if updated_frontmatter:
|
||||
final_content = f"---\n{updated_frontmatter}\n---\n{translated_content}"
|
||||
else:
|
||||
final_content = translated_content
|
||||
|
||||
# Write output
|
||||
print(f"\nWriting: {output_file}")
|
||||
with open(output_file, 'w', encoding='utf-8') as f:
|
||||
f.write(final_content)
|
||||
|
||||
print(f"✓ Translation complete!")
|
||||
print(f" Output: {output_file}")
|
||||
print(f" Size: {len(final_content)} characters\n")
|
||||
|
||||
if __name__ == '__main__':
|
||||
base_dir = Path(__file__).parent.parent
|
||||
input_file = base_dir / 'docs/markdown/GLOSSARY.md'
|
||||
|
||||
# Translate to German
|
||||
output_de = base_dir / 'docs/markdown/GLOSSARY-DE.md'
|
||||
translate_glossary(input_file, output_de, 'de', 'Deutsch')
|
||||
|
||||
# Translate to French
|
||||
output_fr = base_dir / 'docs/markdown/GLOSSARY-FR.md'
|
||||
translate_glossary(input_file, output_fr, 'fr', 'Français')
|
||||
|
||||
print(f"\n{'='*60}")
|
||||
print("All translations complete!")
|
||||
print(f"{'='*60}")
|
||||
Loading…
Add table
Reference in a new issue