tractatus/scripts/comprehensive-document-audit.js
TheFlow 36c8c30108 feat(docs): documentation curation infrastructure (scripts + sidebar)
INFRASTRUCTURE COMPLETE (22 public documents from 129 total):

CATEGORY CONSOLIDATION (12 → 5):
- Eliminated chaotic category proliferation
- Defined 5 canonical categories with icons, descriptions
- Updated frontend sidebar (public/js/docs-app.js)
- Categories: getting-started, research-theory, technical-reference,
  advanced-topics, business-leadership

SCRIPTS CREATED:
- comprehensive-document-audit.js: Systematic audit of all 129 docs
- generate-public-pdfs.js: Puppeteer-based PDF generation (22 PDFs)
- migrate-documents-final.js: DB migration (22 updated, 104 archived)
- export-for-production.js: Export 22 docs for production
- import-from-export.js: Import documents to production DB
- analyze-categories.js: Category analysis tool
- prepare-public-docs.js: Document preparation validator

AUDIT RESULTS:
- docs/DOCUMENT_AUDIT_REPORT.json: Full analysis with recommendations
- 22 documents recommended for public visibility
- 104 documents to archive (internal/obsolete/poor quality)

REMAINING WORK:
- Fix inst_016/017/018 violations in 22 public documents (85 violations)
  • inst_016: Statistics need citations or [NEEDS VERIFICATION]
  • inst_017: Replace absolute assurance terms with evidence-based language
  • inst_018: Remove maturity claims or add documented evidence
- Regenerate PDFs after content fixes
- Regenerate production export file (compliant version)
- Deploy to production

Database migration already executed in dev (22 updated, 104 archived).

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>
2025-10-25 21:36:54 +13:00

243 lines
10 KiB
JavaScript

/**
* Comprehensive Document Audit
* Analyzes all documents in development database for:
* 1. Public suitability
* 2. Category assignment
* 3. Quality assessment
* 4. Archive/removal recommendations
*/
const { MongoClient } = require('mongodb');
// Audit criteria
const AUDIT_CRITERIA = {
PUBLIC_SUITABLE: {
// Documents that should be public-facing
REQUIRED: [
'Introduction', 'Getting Started', 'Core Concepts',
'Executive Brief', 'Framework Overview', 'Implementation Guide'
],
RESEARCH: ['Research', 'Working Paper', 'Study', 'Analysis'],
TECHNICAL: ['API', 'Technical', 'Architecture', 'Integration'],
BUSINESS: ['Business Case', 'ROI', 'Leadership'],
EDUCATIONAL: ['Case Study', 'Values', 'Pluralism', 'Theory']
},
INTERNAL_ONLY: {
SESSIONS: ['Session Handoff', 'Session Init', 'Session Summary', 'Part 1', 'Part 2', 'Part 3', 'Part 4'],
PROCESS: ['Workflow', 'Process', 'Checklist', 'Progress Report'],
PLANNING: ['Plan', 'Roadmap', 'Assessment', 'Advisory'],
DEVELOPMENT: ['PoC', 'Proof of Concept', 'Week 1', 'Week 2', 'Week 3'],
INTERNAL_TECH: ['Benchmark', 'Audit Report', 'Deployment Guide', 'Setup Guide']
},
QUALITY_MARKERS: {
HIGH: ['has sections', 'complete', 'published', 'reviewed'],
MEDIUM: ['draft', 'in progress'],
LOW: ['incomplete', 'obsolete', 'deprecated', 'old']
}
};
async function auditDocument(doc) {
const audit = {
title: doc.title,
slug: doc.slug,
category: doc.category,
order: doc.order,
visibility: doc.visibility,
hasSections: doc.sections && doc.sections.length > 0,
sectionCount: doc.sections ? doc.sections.length : 0,
recommendation: 'ANALYZE', // PUBLIC, ARCHIVE, REMOVE, REVISE
suggestedCategory: null,
reasoning: [],
quality: 'UNKNOWN' // HIGH, MEDIUM, LOW
};
const titleLower = doc.title.toLowerCase();
const slugLower = doc.slug.toLowerCase();
// Check if internal-only document
for (const [type, patterns] of Object.entries(AUDIT_CRITERIA.INTERNAL_ONLY)) {
if (patterns.some(p => titleLower.includes(p.toLowerCase()))) {
audit.recommendation = 'ARCHIVE';
audit.reasoning.push(`Internal document (${type})`);
audit.quality = 'N/A';
return audit;
}
}
// Check if public-suitable
let isPublicSuitable = false;
for (const [type, patterns] of Object.entries(AUDIT_CRITERIA.PUBLIC_SUITABLE)) {
if (patterns.some(p => titleLower.includes(p.toLowerCase()))) {
isPublicSuitable = true;
// Suggest category based on type
if (type === 'REQUIRED') audit.suggestedCategory = 'getting-started';
else if (type === 'RESEARCH') audit.suggestedCategory = 'research-theory';
else if (type === 'TECHNICAL') audit.suggestedCategory = 'technical-reference';
else if (type === 'BUSINESS') audit.suggestedCategory = 'business-leadership';
else if (type === 'EDUCATIONAL') {
if (titleLower.includes('case study')) audit.suggestedCategory = 'case-studies';
else audit.suggestedCategory = 'advanced-topics';
}
audit.reasoning.push(`Public-suitable (${type})`);
break;
}
}
// Quality assessment
if (audit.hasSections && audit.sectionCount >= 5) {
audit.quality = 'HIGH';
audit.reasoning.push(`Well-structured (${audit.sectionCount} sections)`);
} else if (audit.hasSections) {
audit.quality = 'MEDIUM';
audit.reasoning.push(`Structured (${audit.sectionCount} sections)`);
} else {
audit.quality = 'LOW';
audit.reasoning.push('No sections (may need revision)');
}
// Check current category
if (doc.category === 'none' || !doc.category) {
audit.reasoning.push('⚠️ Uncategorized');
}
if (doc.order === 999) {
audit.reasoning.push('⚠️ Default order (not prioritized)');
}
// Final recommendation
if (isPublicSuitable && audit.quality !== 'LOW') {
audit.recommendation = 'PUBLIC';
} else if (isPublicSuitable && audit.quality === 'LOW') {
audit.recommendation = 'REVISE';
audit.reasoning.push('Needs content improvement before public');
} else if (!isPublicSuitable) {
audit.recommendation = 'ARCHIVE';
}
return audit;
}
async function run() {
const client = new MongoClient('mongodb://localhost:27017');
try {
await client.connect();
const db = client.db('tractatus_dev');
const collection = db.collection('documents');
const allDocs = await collection.find({}).sort({ category: 1, order: 1 }).toArray();
console.log('═══════════════════════════════════════════════════════════');
console.log(' COMPREHENSIVE DOCUMENT AUDIT');
console.log('═══════════════════════════════════════════════════════════\n');
console.log(`Total documents: ${allDocs.length}\n`);
const audits = [];
for (const doc of allDocs) {
const audit = await auditDocument(doc);
audits.push(audit);
}
// Group by recommendation
const byRecommendation = {
PUBLIC: audits.filter(a => a.recommendation === 'PUBLIC'),
REVISE: audits.filter(a => a.recommendation === 'REVISE'),
ARCHIVE: audits.filter(a => a.recommendation === 'ARCHIVE'),
REMOVE: audits.filter(a => a.recommendation === 'REMOVE')
};
// Summary
console.log('═══════════════════════════════════════════════════════════');
console.log(' SUMMARY');
console.log('═══════════════════════════════════════════════════════════\n');
console.log(`PUBLIC (should be on /docs.html): ${byRecommendation.PUBLIC.length}`);
console.log(`REVISE (needs work before public): ${byRecommendation.REVISE.length}`);
console.log(`ARCHIVE (internal, keep but hide): ${byRecommendation.ARCHIVE.length}`);
console.log(`REMOVE (obsolete, delete): ${byRecommendation.REMOVE.length}\n`);
// Detailed recommendations
console.log('═══════════════════════════════════════════════════════════');
console.log(' 1. RECOMMENDED FOR PUBLIC (/docs.html)');
console.log('═══════════════════════════════════════════════════════════\n');
const byCategory = {};
byRecommendation.PUBLIC.forEach(a => {
const cat = a.suggestedCategory || a.category || 'uncategorized';
if (!byCategory[cat]) byCategory[cat] = [];
byCategory[cat].push(a);
});
Object.keys(byCategory).sort().forEach(cat => {
console.log(`\n${cat.toUpperCase()} (${byCategory[cat].length} documents):`);
byCategory[cat].forEach(a => {
console.log(`${a.title}`);
console.log(` Quality: ${a.quality} | Sections: ${a.sectionCount}`);
if (a.category !== a.suggestedCategory && a.suggestedCategory) {
console.log(` ⚠️ Move from "${a.category}" to "${a.suggestedCategory}"`);
}
});
});
console.log('\n\n═══════════════════════════════════════════════════════════');
console.log(' 2. NEEDS REVISION BEFORE PUBLIC');
console.log('═══════════════════════════════════════════════════════════\n');
byRecommendation.REVISE.forEach(a => {
console.log(` ⚠️ ${a.title}`);
console.log(` Current: ${a.category} | Suggested: ${a.suggestedCategory}`);
console.log(` Reason: ${a.reasoning.join(', ')}\n`);
});
console.log('\n═══════════════════════════════════════════════════════════');
console.log(' 3. RECOMMENDED FOR ARCHIVE');
console.log('═══════════════════════════════════════════════════════════\n');
const archiveGroups = {};
byRecommendation.ARCHIVE.forEach(a => {
const reason = a.reasoning[0] || 'Other';
if (!archiveGroups[reason]) archiveGroups[reason] = [];
archiveGroups[reason].push(a.title);
});
Object.keys(archiveGroups).forEach(reason => {
console.log(`\n${reason} (${archiveGroups[reason].length}):`);
archiveGroups[reason].slice(0, 5).forEach(title => {
console.log(` - ${title}`);
});
if (archiveGroups[reason].length > 5) {
console.log(` ... and ${archiveGroups[reason].length - 5} more`);
}
});
// Save detailed audit to file
const fs = require('fs');
const auditReport = {
timestamp: new Date().toISOString(),
totalDocuments: allDocs.length,
recommendations: byRecommendation,
categorySuggestions: byCategory
};
fs.writeFileSync(
'docs/DOCUMENT_AUDIT_REPORT.json',
JSON.stringify(auditReport, null, 2)
);
console.log('\n\n═══════════════════════════════════════════════════════════');
console.log(' Full audit report saved to: docs/DOCUMENT_AUDIT_REPORT.json');
console.log('═══════════════════════════════════════════════════════════\n');
await client.close();
process.exit(0);
} catch (error) {
console.error('Error:', error);
await client.close();
process.exit(1);
}
}
run();