tractatus/scripts/generate-public-pdfs.js
TheFlow 36c8c30108 feat(docs): documentation curation infrastructure (scripts + sidebar)
INFRASTRUCTURE COMPLETE (22 public documents from 129 total):

CATEGORY CONSOLIDATION (12 → 5):
- Eliminated chaotic category proliferation
- Defined 5 canonical categories with icons, descriptions
- Updated frontend sidebar (public/js/docs-app.js)
- Categories: getting-started, research-theory, technical-reference,
  advanced-topics, business-leadership

SCRIPTS CREATED:
- comprehensive-document-audit.js: Systematic audit of all 129 docs
- generate-public-pdfs.js: Puppeteer-based PDF generation (22 PDFs)
- migrate-documents-final.js: DB migration (22 updated, 104 archived)
- export-for-production.js: Export 22 docs for production
- import-from-export.js: Import documents to production DB
- analyze-categories.js: Category analysis tool
- prepare-public-docs.js: Document preparation validator

AUDIT RESULTS:
- docs/DOCUMENT_AUDIT_REPORT.json: Full analysis with recommendations
- 22 documents recommended for public visibility
- 104 documents to archive (internal/obsolete/poor quality)

REMAINING WORK:
- Fix inst_016/017/018 violations in 22 public documents (85 violations)
  • inst_016: Statistics need citations or [NEEDS VERIFICATION]
  • inst_017: Replace absolute assurance terms with evidence-based language
  • inst_018: Remove maturity claims or add documented evidence
- Regenerate PDFs after content fixes
- Regenerate production export file (compliant version)
- Deploy to production

Database migration already executed in dev (22 updated, 104 archived).

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>
2025-10-25 21:36:54 +13:00

360 lines
8.7 KiB
JavaScript

/**
* Generate PDFs for All Public Documents
* Creates downloadable PDFs for documents that will be visible on /docs.html
* Uses Puppeteer (headless Chrome) for PDF generation
*/
const { MongoClient } = require('mongodb');
const puppeteer = require('puppeteer');
const fs = require('fs').promises;
const path = require('path');
// Correct slugs for public documents (verified from database)
const PUBLIC_DOCS = [
// Getting Started (6)
'introduction',
'core-concepts',
'executive-summary-tractatus-inflection-point',
'implementation-guide-v1.1',
'implementation-guide',
'implementation-guide-python-examples',
// Research & Theory (7)
'tractatus-framework-research', // Working Paper v0.1
'pluralistic-values-research-foundations',
'the-27027-incident-a-case-study-in-pattern-recognition-bias',
'real-world-ai-governance-a-case-study-in-framework-failure-and-recovery',
'llm-integration-feasibility-research-scope',
'research-topic-concurrent-session-architecture',
'research-topic-rule-proliferation-transactional-overhead',
// Technical Reference (5)
'technical-architecture',
'api-reference-complete',
'api-javascript-examples',
'api-python-examples',
'openapi-specification',
// Advanced Topics (3)
'value-pluralism-faq',
'tractatus-ai-safety-framework-core-values-and-principles',
'organizational-theory-foundations',
// Business Leadership (1)
'business-case-tractatus-framework'
];
function generatePdfHtml(doc) {
let contentHtml = '';
if (doc.sections && doc.sections.length > 0) {
// Build from sections
doc.sections.forEach(section => {
contentHtml += `<h2>${section.title}</h2>\n`;
if (section.content_html) {
contentHtml += section.content_html + '\n';
}
});
} else if (doc.content_html) {
contentHtml = doc.content_html;
}
return `
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>${doc.title}</title>
<style>
@page {
margin: 2cm;
size: A4;
}
* {
box-sizing: border-box;
}
body {
font-family: -apple-system, BlinkMacSystemFont, "Segoe UI", Roboto, "Helvetica Neue", Arial, sans-serif;
font-size: 11pt;
line-height: 1.6;
color: #1f2937;
max-width: 100%;
margin: 0;
padding: 0;
}
.cover {
page-break-after: always;
display: flex;
flex-direction: column;
justify-content: center;
align-items: center;
min-height: 80vh;
text-align: center;
border-bottom: 3px solid #2563eb;
padding-bottom: 2cm;
}
.cover h1 {
font-size: 2.5rem;
font-weight: 700;
color: #111827;
margin-bottom: 1rem;
line-height: 1.2;
}
.cover .metadata {
font-size: 1rem;
color: #6b7280;
margin-top: 2rem;
}
.content {
color: #374151;
}
h1 {
font-size: 1.875rem;
font-weight: 700;
color: #111827;
margin-top: 2rem;
margin-bottom: 1rem;
page-break-after: avoid;
}
h2 {
font-size: 1.5rem;
font-weight: 600;
color: #1f2937;
margin-top: 1.5rem;
margin-bottom: 0.75rem;
page-break-after: avoid;
}
h3 {
font-size: 1.25rem;
font-weight: 600;
color: #374151;
margin-top: 1.25rem;
margin-bottom: 0.5rem;
page-break-after: avoid;
}
p {
margin: 0.75rem 0;
orphans: 3;
widows: 3;
}
ul, ol {
margin: 0.75rem 0;
padding-left: 2rem;
}
li {
margin: 0.25rem 0;
}
code {
background-color: #f3f4f6;
padding: 0.125rem 0.25rem;
border-radius: 0.25rem;
font-family: 'Courier New', monospace;
font-size: 0.9em;
}
pre {
background-color: #f9fafb;
border: 1px solid #e5e7eb;
border-radius: 0.375rem;
padding: 1rem;
overflow-x: auto;
margin: 1rem 0;
page-break-inside: avoid;
}
pre code {
background-color: transparent;
padding: 0;
}
a {
color: #2563eb;
text-decoration: none;
}
a:hover {
text-decoration: underline;
}
table {
width: 100%;
border-collapse: collapse;
margin: 1rem 0;
page-break-inside: avoid;
}
th, td {
border: 1px solid #e5e7eb;
padding: 0.5rem;
text-align: left;
}
th {
background-color: #f9fafb;
font-weight: 600;
}
blockquote {
border-left: 4px solid #2563eb;
padding-left: 1rem;
margin: 1rem 0;
color: #4b5563;
font-style: italic;
}
.page-break {
page-break-before: always;
}
</style>
</head>
<body>
<div class="cover">
<h1>${doc.title}</h1>
<div class="metadata">
<p><strong>Tractatus AI Safety Framework</strong></p>
<p>${new Date().toISOString().split('T')[0]}</p>
</div>
</div>
<div class="content">
${contentHtml}
</div>
</body>
</html>`;
}
async function generatePDF(doc, browser) {
try {
const outputPdf = path.join(__dirname, `../public/downloads/${doc.slug}.pdf`);
// Generate HTML
const html = generatePdfHtml(doc);
// Create new page
const page = await browser.newPage();
// Set content
await page.setContent(html, {
waitUntil: 'networkidle0'
});
// Generate PDF
await page.pdf({
path: outputPdf,
format: 'A4',
printBackground: true,
margin: {
top: '2cm',
right: '2cm',
bottom: '2cm',
left: '2cm'
}
});
await page.close();
console.log(` ✓ Generated: ${doc.slug}.pdf`);
return { success: true, slug: doc.slug };
} catch (error) {
console.error(` ✗ Failed: ${doc.slug} - ${error.message}`);
return { success: false, slug: doc.slug, error: error.message };
}
}
async function run() {
const client = new MongoClient('mongodb://localhost:27017');
let browser;
try {
await client.connect();
const db = client.db('tractatus_dev');
const collection = db.collection('documents');
console.log('═══════════════════════════════════════════════════════════');
console.log(' GENERATING PDFs FOR PUBLIC DOCUMENTS');
console.log('═══════════════════════════════════════════════════════════\n');
console.log(`Total documents: ${PUBLIC_DOCS.length}\n`);
// Ensure downloads directory exists
const downloadsDir = path.join(__dirname, '../public/downloads');
await fs.mkdir(downloadsDir, { recursive: true });
// Launch browser
console.log('Launching browser...\n');
browser = await puppeteer.launch({
headless: 'new',
args: ['--no-sandbox', '--disable-setuid-sandbox']
});
const results = {
success: [],
failed: [],
notFound: []
};
for (const slug of PUBLIC_DOCS) {
const doc = await collection.findOne({ slug });
if (!doc) {
console.log(` ⚠️ Not found: ${slug}`);
results.notFound.push(slug);
continue;
}
const result = await generatePDF(doc, browser);
if (result.success) {
results.success.push(slug);
// Update database with PDF path
await collection.updateOne(
{ slug },
{
$set: {
'download_formats.pdf': `/downloads/${slug}.pdf`,
updated_at: new Date()
}
}
);
} else {
results.failed.push({ slug, error: result.error });
}
}
console.log('\n═══════════════════════════════════════════════════════════');
console.log(' SUMMARY');
console.log('═══════════════════════════════════════════════════════════\n');
console.log(`✅ Successfully generated: ${results.success.length}`);
console.log(`✗ Failed: ${results.failed.length}`);
console.log(`⚠️ Not found: ${results.notFound.length}\n`);
if (results.failed.length > 0) {
console.log('Failed PDFs:');
results.failed.forEach(f => console.log(` - ${f.slug}: ${f.error}`));
}
if (browser) await browser.close();
await client.close();
process.exit(0);
} catch (error) {
console.error('Error:', error);
if (browser) await browser.close();
await client.close();
process.exit(1);
}
}
run();