tractatus/scripts/upload-document.js
TheFlow f8ef2128fc refactor(data): migrate legacy public field to modern visibility field
SUMMARY:
Completed migration from deprecated 'public: true/false' field to modern
'visibility' field across entire codebase. Ensures single source of truth
for document visibility state.

MIGRATION EXECUTION:
✓ Created migration script with dry-run support
✓ Migrated 120 documents in database (removed deprecated field)
✓ Post-migration: 0 documents with 'public' field, 127 with 'visibility'
✓ Zero data loss - all documents already had visibility set correctly

CODE CHANGES:

1. Database Migration (scripts/migrate-public-to-visibility.js):
   - Created safe migration with dry-run mode
   - Handles documents with both fields (cleanup)
   - Post-migration verification built-in
   - Execution: node scripts/migrate-public-to-visibility.js --execute

2. Document Model (src/models/Document.model.js):
   - Removed 'public' field from create() method
   - Updated findByQuadrant() to use visibility: 'public'
   - Updated findByAudience() to use visibility: 'public'
   - Updated search() to use visibility: 'public'

3. API Controller (src/controllers/documents.controller.js):
   - Removed legacy filter: { public: true, visibility: { $exists: false } }
   - listDocuments() now uses clean filter: visibility: 'public'
   - searchDocuments() now uses clean filter: visibility: 'public'

4. Scripts Updated:
   - upload-document.js: Removed public: true
   - seed-architectural-safeguards-document.js: Removed public: true
   - import-5-archives.js: Removed public: true
   - verify-34-documents.js: Updated query filter to use visibility
   - query-all-documents.js: Updated query filter to use visibility

VERIFICATION:
✓ 0 remaining 'public: true/false' usages in src/ and scripts/
✓ All documents use visibility field exclusively
✓ API queries now filter on visibility only
✓ Backward compatibility code removed

DATA MODEL:
Before: { public: true, visibility: 'public' } (redundant)
After:  { visibility: 'public' } (single source of truth)

BENEFITS:
- Cleaner data model
- Single source of truth for visibility
- Simplified API logic
- Removed backward compatibility overhead
- Consistent with document security model

FRAMEWORK COMPLIANCE:
Addresses SCHEDULED_TASKS.md item "Legacy public Field Migration"
Completes Sprint 2 Medium Priority task

NEXT STEPS (Optional):
- Deploy migration to production
- Monitor for any edge cases
- Consider adding visibility to database indexes

🤖 Generated with Claude Code (https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>
2025-10-19 13:49:21 +13:00

582 lines
15 KiB
JavaScript

#!/usr/bin/env node
/**
* Upload Document to Tractatus Docs System
*
* One-command script to:
* 1. Upload markdown file to database
* 2. Generate PDF automatically
* 3. Configure for docs.html sidebar
* 4. Create card rendering metadata
* 5. Set up download links
*
* Usage:
* node scripts/upload-document.js <markdown-file> [options]
*
* Options:
* --category <cat> Category (getting-started, technical-reference, research-theory, etc.)
* --audience <aud> Target audience (general, researcher, implementer, leader, etc.)
* --title <title> Override document title
* --author <author> Document author (default: Agentic Governance Research Team)
* --tags <tags> Comma-separated tags
* --no-pdf Skip PDF generation
* --pdf-dir <dir> Custom PDF output directory (default: docs/research/)
* --order <num> Display order (lower = higher priority, default: 999)
* --force Overwrite existing document
*/
require('dotenv').config();
const fs = require('fs').promises;
const path = require('path');
const { spawn } = require('child_process');
const { connect, close } = require('../src/utils/db.util');
const Document = require('../src/models/Document.model');
const { markdownToHtml, extractTOC, generateSlug } = require('../src/utils/markdown.util');
// Parse command line arguments
const args = process.argv.slice(2);
if (args.length === 0 || args[0] === '--help' || args[0] === '-h') {
console.log(`
Usage: node scripts/upload-document.js <markdown-file> [options]
Options:
--category <cat> Category: getting-started, technical-reference, research-theory,
advanced-topics, case-studies, business-leadership
--audience <aud> Audience: general, researcher, implementer, leader, advocate, developer
--title <title> Override document title (extracted from H1 if not provided)
--author <author> Document author (default: Agentic Governance Research Team)
--tags <tags> Comma-separated tags
--no-pdf Skip PDF generation
--pdf-dir <dir> Custom PDF output directory (default: docs/research/)
--order <num> Display order (lower = higher priority, default: 999)
--force Overwrite existing document
--contact <email> Contact email (default: research@agenticgovernance.digital)
Categories:
- getting-started 🚀 Getting Started
- technical-reference 🔌 Technical Reference
- research-theory 🔬 Theory & Research
- advanced-topics 🎓 Advanced Topics
- case-studies 📊 Case Studies
- business-leadership 💼 Business & Leadership
Examples:
# Upload research paper
node scripts/upload-document.js docs/research/my-paper.md \\
--category research-theory \\
--audience researcher \\
--tags "ai-safety,governance,research"
# Upload technical guide (no PDF)
node scripts/upload-document.js docs/guides/setup.md \\
--category getting-started \\
--audience developer \\
--no-pdf
# Upload with custom order
node scripts/upload-document.js docs/important.md \\
--category getting-started \\
--order 1 \\
--force
`);
process.exit(0);
}
// Extract markdown file path
const mdFilePath = args[0];
if (!mdFilePath) {
console.error('❌ Error: No markdown file specified');
process.exit(1);
}
// Parse options
const options = {
category: null,
audience: 'general',
title: null,
author: 'Agentic Governance Research Team',
tags: [],
generatePDF: true,
pdfDir: 'docs/research',
order: 999,
force: false,
contact: 'research@agenticgovernance.digital'
};
for (let i = 1; i < args.length; i++) {
switch (args[i]) {
case '--category':
options.category = args[++i];
break;
case '--audience':
options.audience = args[++i];
break;
case '--title':
options.title = args[++i];
break;
case '--author':
options.author = args[++i];
break;
case '--tags':
options.tags = args[++i].split(',').map(t => t.trim());
break;
case '--no-pdf':
options.generatePDF = false;
break;
case '--pdf-dir':
options.pdfDir = args[++i];
break;
case '--order':
options.order = parseInt(args[++i]);
break;
case '--force':
options.force = true;
break;
case '--contact':
options.contact = args[++i];
break;
}
}
// Validate category
const VALID_CATEGORIES = [
'getting-started',
'technical-reference',
'research-theory',
'advanced-topics',
'case-studies',
'business-leadership'
];
if (!options.category) {
console.error('❌ Error: --category is required');
console.log('Valid categories:', VALID_CATEGORIES.join(', '));
process.exit(1);
}
if (!VALID_CATEGORIES.includes(options.category)) {
console.error(`❌ Error: Invalid category "${options.category}"`);
console.log('Valid categories:', VALID_CATEGORIES.join(', '));
process.exit(1);
}
/**
* Generate PDF from markdown
*/
async function generatePDF(mdPath, outputDir) {
const mdFileName = path.basename(mdPath, '.md');
const pdfFileName = `${mdFileName}.pdf`;
const pdfPath = path.join(outputDir, pdfFileName);
console.log(`📄 Generating PDF: ${pdfPath}`);
// Create Python script for PDF generation
const pythonScript = `
import sys
import markdown
from weasyprint import HTML, CSS
from pathlib import Path
try:
from PyPDF2 import PdfReader, PdfWriter
has_pypdf2 = True
except ImportError:
has_pypdf2 = False
md_path = sys.argv[1]
pdf_path = sys.argv[2]
title = sys.argv[3]
author = sys.argv[4]
# Read markdown
with open(md_path, 'r', encoding='utf-8') as f:
md_content = f.read()
# Convert to HTML
html_content = markdown.markdown(
md_content,
extensions=[
'markdown.extensions.tables',
'markdown.extensions.fenced_code',
'markdown.extensions.toc',
'markdown.extensions.sane_lists'
]
)
# Wrap in HTML
full_html = f"""
<!DOCTYPE html>
<html>
<head>
<meta charset="utf-8">
<title>{title}</title>
</head>
<body>
{html_content}
</body>
</html>
"""
# CSS styling
css = CSS(string="""
@page {
size: Letter;
margin: 1in;
@bottom-center {
content: counter(page);
font-size: 10pt;
color: #666;
}
}
body {
font-family: "Georgia", "Times New Roman", serif;
font-size: 11pt;
line-height: 1.6;
color: #333;
}
h1 {
font-size: 24pt;
font-weight: bold;
color: #1976d2;
margin-top: 24pt;
margin-bottom: 12pt;
page-break-after: avoid;
border-bottom: 2px solid #1976d2;
padding-bottom: 4pt;
}
h2 {
font-size: 18pt;
font-weight: bold;
color: #1976d2;
margin-top: 20pt;
margin-bottom: 10pt;
page-break-after: avoid;
border-bottom: 2px solid #1976d2;
padding-bottom: 4pt;
}
h3 {
font-size: 14pt;
font-weight: bold;
color: #424242;
margin-top: 16pt;
margin-bottom: 8pt;
page-break-after: avoid;
}
p {
margin-top: 0;
margin-bottom: 10pt;
text-align: justify;
}
table {
width: 100%;
border-collapse: collapse;
margin: 12pt 0;
page-break-inside: avoid;
}
th {
background-color: #1976d2;
color: white;
font-weight: bold;
padding: 8pt;
text-align: left;
border: 1px solid #1976d2;
}
td {
padding: 6pt;
border: 1px solid #ddd;
}
code {
font-family: "Courier New", monospace;
font-size: 10pt;
background-color: #f5f5f5;
padding: 2pt 4pt;
border-radius: 2pt;
}
""")
# Generate PDF
HTML(string=full_html).write_pdf(pdf_path, stylesheets=[css])
# Add metadata if PyPDF2 is available
if has_pypdf2:
reader = PdfReader(pdf_path)
writer = PdfWriter()
for page in reader.pages:
writer.add_page(page)
writer.add_metadata({
'/Title': title,
'/Author': author,
'/Creator': 'Tractatus Framework',
'/Producer': 'WeasyPrint'
})
with open(pdf_path, 'wb') as f:
writer.write(f)
print(f"✓ PDF generated: {pdf_path}")
`;
return new Promise((resolve, reject) => {
const python = spawn('python3', [
'-c',
pythonScript,
mdPath,
pdfPath,
options.title || 'Tractatus Document',
options.author
]);
python.stdout.on('data', (data) => {
console.log(data.toString().trim());
});
python.stderr.on('data', (data) => {
console.error(data.toString().trim());
});
python.on('close', (code) => {
if (code === 0) {
resolve(pdfPath);
} else {
reject(new Error(`PDF generation failed with code ${code}`));
}
});
});
}
/**
* Add license and metadata to markdown file
*/
async function addLicenseAndMetadata(mdPath) {
const content = await fs.readFile(mdPath, 'utf-8');
// Check if already has license
if (content.includes('## License') || content.includes('Apache License')) {
console.log('⚠️ Document already has license section');
return;
}
const license = `
---
## Contact
**Research Inquiries:** ${options.contact}
**Website:** https://agenticgovernance.digital
**Repository:** https://github.com/AgenticGovernance/tractatus
---
## License
Copyright 2025 Agentic Governance Initiative
Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at:
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License.
**Summary:**
- ✅ Commercial use allowed
- ✅ Modification allowed
- ✅ Distribution allowed
- ✅ Patent grant included
- ✅ Private use allowed
- ⚠️ Must include license and copyright notice
- ⚠️ Must state significant changes
- ❌ No trademark rights granted
- ❌ No liability or warranty
---
## Document Metadata
<div class="document-metadata">
- **Version:** 1.0
- **Created:** ${new Date().toISOString().split('T')[0]}
- **Last Modified:** ${new Date().toISOString().split('T')[0]}
- **Author:** ${options.author}
- **Document ID:** ${generateSlug(options.title || path.basename(mdPath, '.md'))}
- **Status:** Active
</div>
`;
await fs.writeFile(mdPath, content + license, 'utf-8');
console.log('✓ Added license and metadata to markdown file');
}
/**
* Main upload function
*/
async function uploadDocument() {
try {
console.log('\n=== Tractatus Document Upload ===\n');
// Verify markdown file exists
const mdPath = path.resolve(mdFilePath);
try {
await fs.access(mdPath);
} catch (err) {
console.error(`❌ Error: File not found: ${mdPath}`);
process.exit(1);
}
console.log(`📄 Processing: ${mdPath}`);
// Add license and metadata
await addLicenseAndMetadata(mdPath);
// Read markdown content
const rawContent = await fs.readFile(mdPath, 'utf-8');
// Extract title from first H1 or use provided title
let title = options.title;
if (!title) {
const h1Match = rawContent.match(/^#\s+(.+)$/m);
title = h1Match ? h1Match[1] : path.basename(mdPath, '.md');
}
console.log(`📌 Title: ${title}`);
console.log(`📂 Category: ${options.category}`);
console.log(`👥 Audience: ${options.audience}`);
// Generate PDF if requested
let pdfPath = null;
let pdfWebPath = null;
if (options.generatePDF) {
try {
const outputDir = path.resolve(options.pdfDir);
await fs.mkdir(outputDir, { recursive: true });
pdfPath = await generatePDF(mdPath, outputDir);
// Convert to web path
pdfWebPath = '/' + path.relative(path.resolve('public'), pdfPath);
console.log(`✓ PDF available at: ${pdfWebPath}`);
} catch (err) {
console.error(`⚠️ PDF generation failed: ${err.message}`);
console.log(' Continuing without PDF...');
}
}
// Convert markdown to HTML
const htmlContent = markdownToHtml(rawContent);
// Extract table of contents
const tableOfContents = extractTOC(rawContent);
// Generate slug
const slug = generateSlug(title);
console.log(`🔗 Slug: ${slug}`);
// Connect to database
await connect();
// Check if document already exists
const existing = await Document.findBySlug(slug);
if (existing && !options.force) {
console.error(`\n❌ Error: Document already exists with slug: ${slug}`);
console.log(' Use --force to overwrite');
await close();
process.exit(1);
}
// Create document object
const doc = {
title: title,
slug: slug,
quadrant: null,
persistence: 'HIGH',
audience: options.audience,
visibility: 'public',
category: options.category,
order: options.order,
content_html: htmlContent,
content_markdown: rawContent,
toc: tableOfContents,
security_classification: {
contains_credentials: false,
contains_financial_info: false,
contains_vulnerability_info: false,
contains_infrastructure_details: false,
requires_authentication: false
},
metadata: {
author: options.author,
version: '1.0',
document_code: null,
related_documents: [],
tags: options.tags
},
translations: {},
search_index: rawContent.toLowerCase(),
download_formats: {}
};
// Add PDF download if available
if (pdfWebPath) {
doc.download_formats.pdf = pdfWebPath;
}
// Create or update document
if (existing && options.force) {
await Document.update(existing._id, doc);
console.log(`\n✅ Document updated successfully!`);
} else {
await Document.create(doc);
console.log(`\n✅ Document created successfully!`);
}
console.log(`\n📊 Document Details:`);
console.log(` Title: ${doc.title}`);
console.log(` Slug: ${doc.slug}`);
console.log(` Category: ${doc.category}`);
console.log(` Audience: ${doc.audience}`);
console.log(` Order: ${doc.order}`);
console.log(` Tags: ${doc.metadata.tags.join(', ') || 'none'}`);
if (pdfWebPath) {
console.log(` PDF: ${pdfWebPath}`);
}
console.log(`\n✅ Document is now available at:`);
console.log(` https://agenticgovernance.digital/docs.html?doc=${slug}`);
console.log(` https://agenticgovernance.digital/docs.html?category=${doc.category}`);
console.log(`\n💡 Next Steps:`);
console.log(` 1. Clear browser cache (Ctrl+Shift+R or Cmd+Shift+R)`);
console.log(` 2. Visit docs.html to see your document in the sidebar`);
console.log(` 3. Document will appear under "${options.category}" category`);
await close();
} catch (error) {
console.error('\n❌ Upload failed:', error.message);
console.error(error.stack);
process.exit(1);
}
}
// Run if called directly
if (require.main === module) {
uploadDocument();
}
module.exports = uploadDocument;