#!/usr/bin/env node /** * Validate Document Licences — All Delivery Channels * * Checks MongoDB, HTML downloads, and markdown source files to verify * correct licence assignment (CC BY 4.0 for research, Apache 2.0 for code). * * Usage: * node scripts/validate-licences.js [--db ] * * Defaults to tractatus_dev. Use --db tractatus for production. */ const { MongoClient } = require('mongodb'); const fs = require('fs').promises; const path = require('path'); const dbArg = process.argv.indexOf('--db'); const DB_NAME = dbArg !== -1 ? process.argv[dbArg + 1] : 'tractatus_dev'; // --- Classification: slugs that MUST be CC BY 4.0 --- const CC_BY_SLUGS = new Set([ 'tractatus-framework-research', 'pluralistic-values-research-foundations', 'the-27027-incident-a-case-study-in-pattern-recognition-bias', 'real-world-ai-governance-a-case-study-in-framework-failure-and-recovery', 'research-topic-concurrent-session-architecture', 'research-topic-rule-proliferation-transactional-overhead', 'executive-summary-tractatus-inflection-point', 'value-pluralism-faq', 'value-pluralism-in-tractatus-frequently-asked-questions', 'tractatus-ai-safety-framework-core-values-and-principles', 'organizational-theory-foundations', 'glossary', 'glossary-de', 'glossary-fr', 'business-case-tractatus-framework', 'case-studies', 'steering-vectors-mechanical-bias-sovereign-ai', 'steering-vectors-and-mechanical-bias-inference-time-debiasing-for-sovereign-small-language-models', 'taonga-centred-steering-governance-polycentric-ai', 'taonga-centred-steering-governance-polycentric-authority-for-sovereign-small-language-models', 'pattern-bias-from-code-to-conversation', 'architectural-alignment-academic', 'philosophical-foundations-village-project', 'research-timeline', 'architectural-safeguards-against-llm-hierarchical-dominance-prose', 'case-studies-real-world-llm-failure-modes-appendix', ]); // HTML download files that MUST be CC BY 4.0 const CC_BY_HTML_FILES = [ 'steering-vectors-mechanical-bias-sovereign-ai.html', 'steering-vectors-mechanical-bias-sovereign-ai-de.html', 'steering-vectors-mechanical-bias-sovereign-ai-fr.html', 'steering-vectors-mechanical-bias-sovereign-ai-mi.html', 'taonga-centred-steering-governance-polycentric-ai.html', 'taonga-centred-steering-governance-polycentric-ai-de.html', 'taonga-centred-steering-governance-polycentric-ai-fr.html', 'taonga-centred-steering-governance-polycentric-ai-mi.html', 'architectural-alignment-academic-de.html', 'architectural-alignment-academic-fr.html', 'architectural-alignment-academic-mi.html', 'philosophical-foundations-village-project-de.html', 'philosophical-foundations-village-project-fr.html', 'philosophical-foundations-village-project-mi.html', ]; // Markdown files that MUST be CC BY 4.0 const CC_BY_MARKDOWN_FILES = [ 'docs/markdown/tractatus-framework-research.md', 'docs/markdown/business-case-tractatus-framework.md', 'docs/markdown/organizational-theory-foundations.md', 'docs/markdown/tractatus-ai-safety-framework-core-values-and-principles.md', 'docs/markdown/GLOSSARY.md', 'docs/markdown/GLOSSARY-DE.md', 'docs/markdown/GLOSSARY-FR.md', 'docs/markdown/case-studies.md', 'docs/research/pluralistic-values-research-foundations.md', 'docs/research/executive-summary-tractatus-inflection-point.md', 'docs/research/rule-proliferation-and-transactional-overhead.md', 'docs/research/concurrent-session-architecture-limitations.md', 'docs/research/ARCHITECTURAL-SAFEGUARDS-Against-LLM-Hierarchical-Dominance-Prose.md', ]; // --- Helpers --- // Acceptable Apache references (dual-licence notes) function stripAcceptableApache(text) { return text .replace(/separately licensed under the Apache License 2\.0/g, '') .replace(/separat unter der Apache License 2\.0 lizenziert/g, '') .replace(/séparément sous la Licence Apache 2\.0/g, '') .replace(/Apache License 2\.0\. This Creative Commons/g, '') .replace(/Apache License 2\.0\. Diese Creative-Commons/g, '') .replace(/Apache License 2\.0\. Cette licence Creative/g, '') .replace(/source code is separately licensed under the Apache/g, '') .replace(/Quellcode.*?Apache License 2\.0/g, '') .replace(/licencié séparément sous la Licence Apache/g, '') // Māori dual-licence note .replace(/kei raro anō i te Apache License 2\.0/g, ''); } function hasUnwantedApache(text) { if (!text) return false; const cleaned = stripAcceptableApache(text); return cleaned.includes('Apache License') || cleaned.includes('Apache-Lizenz'); } function hasCcBy(text) { if (!text) return false; return text.includes('Creative Commons') || text.includes('CC BY 4.0') || text.includes('CC BY'); } // --- Channel 1: MongoDB --- async function validateMongoDB(client) { console.log('\n══════════════════════════════════════════'); console.log(' CHANNEL 1: MongoDB Documents'); console.log('══════════════════════════════════════════\n'); const db = client.db(DB_NAME); const collection = db.collection('documents'); const documents = await collection.find({}).toArray(); console.log(`Found ${documents.length} documents\n`); const errors = []; let checkedCcBy = 0; let checkedApache = 0; for (const doc of documents) { const slug = doc.slug; const isCcBy = CC_BY_SLUGS.has(slug); const expectedLicence = isCcBy ? 'CC-BY-4.0' : 'Apache-2.0'; // Check 1: licence field exists and is correct if (!doc.licence) { errors.push({ slug, channel: 'mongodb', issue: 'MISSING licence field' }); } else if (doc.licence !== expectedLicence) { errors.push({ slug, channel: 'mongodb', issue: `WRONG licence field: ${doc.licence} (expected ${expectedLicence})` }); } if (isCcBy) { checkedCcBy++; // Check 2: content_html should NOT have stray Apache if (hasUnwantedApache(doc.content_html)) { errors.push({ slug, channel: 'mongodb:content_html', issue: 'Contains stray Apache licence text' }); } // Check 3: content_html SHOULD have CC BY if (doc.content_html && !hasCcBy(doc.content_html)) { errors.push({ slug, channel: 'mongodb:content_html', issue: 'MISSING CC BY 4.0 text' }); } // Check 4: content_markdown should NOT have stray Apache if (hasUnwantedApache(doc.content_markdown)) { errors.push({ slug, channel: 'mongodb:content_markdown', issue: 'Contains stray Apache licence text' }); } // Check 5: translations if (doc.translations) { for (const [lang, translation] of Object.entries(doc.translations)) { if (hasUnwantedApache(translation.content_html)) { errors.push({ slug, channel: `mongodb:translations.${lang}`, issue: 'Contains stray Apache licence text' }); } } } } else { checkedApache++; } } console.log(` CC BY 4.0 documents checked: ${checkedCcBy}`); console.log(` Apache 2.0 documents checked: ${checkedApache}`); console.log(` Errors found: ${errors.length}`); for (const err of errors) { console.log(` ❌ [${err.slug}] ${err.channel}: ${err.issue}`); } if (errors.length === 0) { console.log(' ✓ All MongoDB documents have correct licences'); } return errors; } // --- Channel 2: HTML Downloads --- async function validateHtmlDownloads() { console.log('\n══════════════════════════════════════════'); console.log(' CHANNEL 2: HTML Download Files'); console.log('══════════════════════════════════════════\n'); const downloadsDir = path.resolve(__dirname, '..', 'public', 'downloads'); const errors = []; let checked = 0; for (const filename of CC_BY_HTML_FILES) { const fullPath = path.join(downloadsDir, filename); let content; try { content = await fs.readFile(fullPath, 'utf-8'); } catch { errors.push({ file: filename, issue: 'FILE NOT FOUND' }); continue; } checked++; // Should NOT have stray Apache if (hasUnwantedApache(content)) { errors.push({ file: filename, issue: 'Contains stray Apache licence text' }); } // SHOULD have CC BY if (!hasCcBy(content)) { errors.push({ file: filename, issue: 'MISSING CC BY 4.0 text' }); } } console.log(` CC BY 4.0 HTML files checked: ${checked}`); console.log(` Errors found: ${errors.length}`); for (const err of errors) { console.log(` ❌ [${err.file}] ${err.issue}`); } if (errors.length === 0) { console.log(' ✓ All HTML download files have correct licences'); } return errors; } // --- Channel 3: Markdown Source Files --- async function validateMarkdownFiles() { console.log('\n══════════════════════════════════════════'); console.log(' CHANNEL 3: Markdown Source Files'); console.log('══════════════════════════════════════════\n'); const projectRoot = path.resolve(__dirname, '..'); const errors = []; let checked = 0; for (const relPath of CC_BY_MARKDOWN_FILES) { const fullPath = path.join(projectRoot, relPath); let content; try { content = await fs.readFile(fullPath, 'utf-8'); } catch { errors.push({ file: relPath, issue: 'FILE NOT FOUND' }); continue; } checked++; // Should NOT have stray Apache (outside dual-licence note) if (hasUnwantedApache(content)) { errors.push({ file: relPath, issue: 'Contains stray Apache licence text' }); } // SHOULD have CC BY if (!hasCcBy(content)) { errors.push({ file: relPath, issue: 'MISSING CC BY 4.0 text' }); } } console.log(` CC BY 4.0 markdown files checked: ${checked}`); console.log(` Errors found: ${errors.length}`); for (const err of errors) { console.log(` ❌ [${err.file}] ${err.issue}`); } if (errors.length === 0) { console.log(' ✓ All markdown source files have correct licences'); } return errors; } // --- Main --- async function main() { console.log('╔══════════════════════════════════════════╗'); console.log('║ Licence Validation — All Channels ║'); console.log('╠══════════════════════════════════════════╣'); console.log(`║ Database: ${DB_NAME.padEnd(29)}║`); console.log('╚══════════════════════════════════════════╝'); const client = new MongoClient('mongodb://localhost:27017'); let allErrors = []; try { await client.connect(); const mongoErrors = await validateMongoDB(client); allErrors = allErrors.concat(mongoErrors.map(e => ({ ...e, channel_type: 'mongodb' }))); } finally { await client.close(); } const htmlErrors = await validateHtmlDownloads(); allErrors = allErrors.concat(htmlErrors.map(e => ({ ...e, channel_type: 'html' }))); const mdErrors = await validateMarkdownFiles(); allErrors = allErrors.concat(mdErrors.map(e => ({ ...e, channel_type: 'markdown' }))); // --- Final Summary --- console.log('\n╔══════════════════════════════════════════╗'); console.log('║ FINAL SUMMARY ║'); console.log('╚══════════════════════════════════════════╝\n'); if (allErrors.length === 0) { console.log(' ✓ ALL CHANNELS PASS — zero licence mismatches\n'); process.exit(0); } else { console.log(` ✗ ${allErrors.length} ERRORS FOUND:\n`); for (const err of allErrors) { const loc = err.slug || err.file; const chan = err.channel || err.channel_type; console.log(` ❌ [${loc}] (${chan}) ${err.issue}`); } console.log(''); process.exit(1); } } main().catch(err => { console.error('Fatal error:', err); process.exit(1); });