tractatus/scripts/generate-public-pdfs.js
TheFlow 1ee1499f1b fix: use environment MongoDB URI and update glossary slugs in PDF generator
- Changed hardcoded mongodb://localhost:27017 to use MONGODB_URI env var
- Added automatic database name extraction from URI or MONGODB_DB env
- Updated glossary slugs from long auto-generated to explicit frontmatter slugs:
  - tractatus-agentic-governance-system-glossary-of-terms → glossary
  - tractatus-agentic-governance-system-glossary-of-terms-deutsch → glossary-de
  - tractatus-agentic-governance-system-glossary-of-terms-franais → glossary-fr
- Enables PDF generation to work in production environment

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>
2025-11-01 11:04:40 +13:00

369 lines
9.1 KiB
JavaScript

/**
* Generate PDFs for All Public Documents
* Creates downloadable PDFs for documents that will be visible on /docs.html
* Uses Puppeteer (headless Chrome) for PDF generation
*/
const { MongoClient } = require('mongodb');
const puppeteer = require('puppeteer');
const fs = require('fs').promises;
const path = require('path');
// Correct slugs for public documents (verified from database)
const PUBLIC_DOCS = [
// Getting Started (6)
'introduction',
'core-concepts',
'executive-summary-tractatus-inflection-point',
'implementation-guide-v1.1',
'implementation-guide',
'implementation-guide-python-examples',
// Research & Theory (7)
'tractatus-framework-research', // Working Paper v0.1
'pluralistic-values-research-foundations',
'the-27027-incident-a-case-study-in-pattern-recognition-bias',
'real-world-ai-governance-a-case-study-in-framework-failure-and-recovery',
'llm-integration-feasibility-research-scope',
'research-topic-concurrent-session-architecture',
'research-topic-rule-proliferation-transactional-overhead',
// Technical Reference (5)
'technical-architecture',
'api-reference-complete',
'api-javascript-examples',
'api-python-examples',
'openapi-specification',
// Advanced Topics (3)
'value-pluralism-faq',
'tractatus-ai-safety-framework-core-values-and-principles',
'organizational-theory-foundations',
// Reference Documentation (3)
'glossary',
'glossary-de',
'glossary-fr',
// Business Leadership (1)
'business-case-tractatus-framework'
];
function generatePdfHtml(doc) {
let contentHtml = '';
if (doc.sections && doc.sections.length > 0) {
// Build from sections
doc.sections.forEach(section => {
contentHtml += `<h2>${section.title}</h2>\n`;
if (section.content_html) {
contentHtml += section.content_html + '\n';
}
});
} else if (doc.content_html) {
contentHtml = doc.content_html;
}
return `
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>${doc.title}</title>
<style>
@page {
margin: 2cm;
size: A4;
}
* {
box-sizing: border-box;
}
body {
font-family: -apple-system, BlinkMacSystemFont, "Segoe UI", Roboto, "Helvetica Neue", Arial, sans-serif;
font-size: 11pt;
line-height: 1.6;
color: #1f2937;
max-width: 100%;
margin: 0;
padding: 0;
}
.cover {
page-break-after: always;
display: flex;
flex-direction: column;
justify-content: center;
align-items: center;
min-height: 80vh;
text-align: center;
border-bottom: 3px solid #2563eb;
padding-bottom: 2cm;
}
.cover h1 {
font-size: 2.5rem;
font-weight: 700;
color: #111827;
margin-bottom: 1rem;
line-height: 1.2;
}
.cover .metadata {
font-size: 1rem;
color: #6b7280;
margin-top: 2rem;
}
.content {
color: #374151;
}
h1 {
font-size: 1.875rem;
font-weight: 700;
color: #111827;
margin-top: 2rem;
margin-bottom: 1rem;
page-break-after: avoid;
}
h2 {
font-size: 1.5rem;
font-weight: 600;
color: #1f2937;
margin-top: 1.5rem;
margin-bottom: 0.75rem;
page-break-after: avoid;
}
h3 {
font-size: 1.25rem;
font-weight: 600;
color: #374151;
margin-top: 1.25rem;
margin-bottom: 0.5rem;
page-break-after: avoid;
}
p {
margin: 0.75rem 0;
orphans: 3;
widows: 3;
}
ul, ol {
margin: 0.75rem 0;
padding-left: 2rem;
}
li {
margin: 0.25rem 0;
}
code {
background-color: #f3f4f6;
padding: 0.125rem 0.25rem;
border-radius: 0.25rem;
font-family: 'Courier New', monospace;
font-size: 0.9em;
}
pre {
background-color: #f9fafb;
border: 1px solid #e5e7eb;
border-radius: 0.375rem;
padding: 1rem;
overflow-x: auto;
margin: 1rem 0;
page-break-inside: avoid;
}
pre code {
background-color: transparent;
padding: 0;
}
a {
color: #2563eb;
text-decoration: none;
}
a:hover {
text-decoration: underline;
}
table {
width: 100%;
border-collapse: collapse;
margin: 1rem 0;
page-break-inside: avoid;
}
th, td {
border: 1px solid #e5e7eb;
padding: 0.5rem;
text-align: left;
}
th {
background-color: #f9fafb;
font-weight: 600;
}
blockquote {
border-left: 4px solid #2563eb;
padding-left: 1rem;
margin: 1rem 0;
color: #4b5563;
font-style: italic;
}
.page-break {
page-break-before: always;
}
</style>
</head>
<body>
<div class="cover">
<h1>${doc.title}</h1>
<div class="metadata">
<p><strong>Tractatus AI Safety Framework</strong></p>
<p>${new Date().toISOString().split('T')[0]}</p>
</div>
</div>
<div class="content">
${contentHtml}
</div>
</body>
</html>`;
}
async function generatePDF(doc, browser) {
try {
const outputPdf = path.join(__dirname, `../public/downloads/${doc.slug}.pdf`);
// Generate HTML
const html = generatePdfHtml(doc);
// Create new page
const page = await browser.newPage();
// Set content
await page.setContent(html, {
waitUntil: 'networkidle0'
});
// Generate PDF
await page.pdf({
path: outputPdf,
format: 'A4',
printBackground: true,
margin: {
top: '2cm',
right: '2cm',
bottom: '2cm',
left: '2cm'
}
});
await page.close();
console.log(` ✓ Generated: ${doc.slug}.pdf`);
return { success: true, slug: doc.slug };
} catch (error) {
console.error(` ✗ Failed: ${doc.slug} - ${error.message}`);
return { success: false, slug: doc.slug, error: error.message };
}
}
async function run() {
// Use MONGODB_URI from environment, fallback to local dev
const mongoUri = process.env.MONGODB_URI || 'mongodb://localhost:27017';
const client = new MongoClient(mongoUri);
let browser;
try {
await client.connect();
// Use DB name from environment or extract from URI, fallback to tractatus_dev
const dbName = process.env.MONGODB_DB || mongoUri.split('/').pop().split('?')[0] || 'tractatus_dev';
const db = client.db(dbName);
const collection = db.collection('documents');
console.log('═══════════════════════════════════════════════════════════');
console.log(' GENERATING PDFs FOR PUBLIC DOCUMENTS');
console.log('═══════════════════════════════════════════════════════════\n');
console.log(`Total documents: ${PUBLIC_DOCS.length}\n`);
// Ensure downloads directory exists
const downloadsDir = path.join(__dirname, '../public/downloads');
await fs.mkdir(downloadsDir, { recursive: true });
// Launch browser
console.log('Launching browser...\n');
browser = await puppeteer.launch({
headless: 'new',
args: ['--no-sandbox', '--disable-setuid-sandbox']
});
const results = {
success: [],
failed: [],
notFound: []
};
for (const slug of PUBLIC_DOCS) {
const doc = await collection.findOne({ slug });
if (!doc) {
console.log(` ⚠️ Not found: ${slug}`);
results.notFound.push(slug);
continue;
}
const result = await generatePDF(doc, browser);
if (result.success) {
results.success.push(slug);
// Update database with PDF path
await collection.updateOne(
{ slug },
{
$set: {
'download_formats.pdf': `/downloads/${slug}.pdf`,
updated_at: new Date()
}
}
);
} else {
results.failed.push({ slug, error: result.error });
}
}
console.log('\n═══════════════════════════════════════════════════════════');
console.log(' SUMMARY');
console.log('═══════════════════════════════════════════════════════════\n');
console.log(`✅ Successfully generated: ${results.success.length}`);
console.log(`✗ Failed: ${results.failed.length}`);
console.log(`⚠️ Not found: ${results.notFound.length}\n`);
if (results.failed.length > 0) {
console.log('Failed PDFs:');
results.failed.forEach(f => console.log(` - ${f.slug}: ${f.error}`));
}
if (browser) await browser.close();
await client.close();
process.exit(0);
} catch (error) {
console.error('Error:', error);
if (browser) await browser.close();
await client.close();
process.exit(1);
}
}
run();