tractatus/scripts/validate-licences.js
TheFlow 2910eda301 docs: Add incident report and licence migration utility scripts
- INCIDENT_REPORT_20260222: Deliberate instruction refusal analysis
- fix-markdown-licences.js: Batch update licence sections in markdown
- migrate-licence-to-cc-by-4.js: Apache 2.0 → CC BY 4.0 migration tool
- publish-overtrust-blog-post.js: Blog post publishing utility
- validate-licences.js: Licence compliance checker

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-22 16:36:41 +13:00

336 lines
12 KiB
JavaScript

#!/usr/bin/env node
/**
* Validate Document Licences — All Delivery Channels
*
* Checks MongoDB, HTML downloads, and markdown source files to verify
* correct licence assignment (CC BY 4.0 for research, Apache 2.0 for code).
*
* Usage:
* node scripts/validate-licences.js [--db <name>]
*
* Defaults to tractatus_dev. Use --db tractatus for production.
*/
const { MongoClient } = require('mongodb');
const fs = require('fs').promises;
const path = require('path');
const dbArg = process.argv.indexOf('--db');
const DB_NAME = dbArg !== -1 ? process.argv[dbArg + 1] : 'tractatus_dev';
// --- Classification: slugs that MUST be CC BY 4.0 ---
const CC_BY_SLUGS = new Set([
'tractatus-framework-research',
'pluralistic-values-research-foundations',
'the-27027-incident-a-case-study-in-pattern-recognition-bias',
'real-world-ai-governance-a-case-study-in-framework-failure-and-recovery',
'research-topic-concurrent-session-architecture',
'research-topic-rule-proliferation-transactional-overhead',
'executive-summary-tractatus-inflection-point',
'value-pluralism-faq',
'value-pluralism-in-tractatus-frequently-asked-questions',
'tractatus-ai-safety-framework-core-values-and-principles',
'organizational-theory-foundations',
'glossary',
'glossary-de',
'glossary-fr',
'business-case-tractatus-framework',
'case-studies',
'steering-vectors-mechanical-bias-sovereign-ai',
'steering-vectors-and-mechanical-bias-inference-time-debiasing-for-sovereign-small-language-models',
'taonga-centred-steering-governance-polycentric-ai',
'taonga-centred-steering-governance-polycentric-authority-for-sovereign-small-language-models',
'pattern-bias-from-code-to-conversation',
'architectural-alignment-academic',
'philosophical-foundations-village-project',
'research-timeline',
'architectural-safeguards-against-llm-hierarchical-dominance-prose',
'case-studies-real-world-llm-failure-modes-appendix',
]);
// HTML download files that MUST be CC BY 4.0
const CC_BY_HTML_FILES = [
'steering-vectors-mechanical-bias-sovereign-ai.html',
'steering-vectors-mechanical-bias-sovereign-ai-de.html',
'steering-vectors-mechanical-bias-sovereign-ai-fr.html',
'steering-vectors-mechanical-bias-sovereign-ai-mi.html',
'taonga-centred-steering-governance-polycentric-ai.html',
'taonga-centred-steering-governance-polycentric-ai-de.html',
'taonga-centred-steering-governance-polycentric-ai-fr.html',
'taonga-centred-steering-governance-polycentric-ai-mi.html',
'architectural-alignment-academic-de.html',
'architectural-alignment-academic-fr.html',
'architectural-alignment-academic-mi.html',
'philosophical-foundations-village-project-de.html',
'philosophical-foundations-village-project-fr.html',
'philosophical-foundations-village-project-mi.html',
];
// Markdown files that MUST be CC BY 4.0
const CC_BY_MARKDOWN_FILES = [
'docs/markdown/tractatus-framework-research.md',
'docs/markdown/business-case-tractatus-framework.md',
'docs/markdown/organizational-theory-foundations.md',
'docs/markdown/tractatus-ai-safety-framework-core-values-and-principles.md',
'docs/markdown/GLOSSARY.md',
'docs/markdown/GLOSSARY-DE.md',
'docs/markdown/GLOSSARY-FR.md',
'docs/markdown/case-studies.md',
'docs/research/pluralistic-values-research-foundations.md',
'docs/research/executive-summary-tractatus-inflection-point.md',
'docs/research/rule-proliferation-and-transactional-overhead.md',
'docs/research/concurrent-session-architecture-limitations.md',
'docs/research/ARCHITECTURAL-SAFEGUARDS-Against-LLM-Hierarchical-Dominance-Prose.md',
];
// --- Helpers ---
// Acceptable Apache references (dual-licence notes)
function stripAcceptableApache(text) {
return text
.replace(/separately licensed under the Apache License 2\.0/g, '')
.replace(/separat unter der Apache License 2\.0 lizenziert/g, '')
.replace(/séparément sous la Licence Apache 2\.0/g, '')
.replace(/Apache License 2\.0\. This Creative Commons/g, '')
.replace(/Apache License 2\.0\. Diese Creative-Commons/g, '')
.replace(/Apache License 2\.0\. Cette licence Creative/g, '')
.replace(/source code is separately licensed under the Apache/g, '')
.replace(/Quellcode.*?Apache License 2\.0/g, '')
.replace(/licencié séparément sous la Licence Apache/g, '')
// Māori dual-licence note
.replace(/kei raro anō i te Apache License 2\.0/g, '');
}
function hasUnwantedApache(text) {
if (!text) return false;
const cleaned = stripAcceptableApache(text);
return cleaned.includes('Apache License') || cleaned.includes('Apache-Lizenz');
}
function hasCcBy(text) {
if (!text) return false;
return text.includes('Creative Commons') || text.includes('CC BY 4.0') || text.includes('CC BY');
}
// --- Channel 1: MongoDB ---
async function validateMongoDB(client) {
console.log('\n══════════════════════════════════════════');
console.log(' CHANNEL 1: MongoDB Documents');
console.log('══════════════════════════════════════════\n');
const db = client.db(DB_NAME);
const collection = db.collection('documents');
const documents = await collection.find({}).toArray();
console.log(`Found ${documents.length} documents\n`);
const errors = [];
let checkedCcBy = 0;
let checkedApache = 0;
for (const doc of documents) {
const slug = doc.slug;
const isCcBy = CC_BY_SLUGS.has(slug);
const expectedLicence = isCcBy ? 'CC-BY-4.0' : 'Apache-2.0';
// Check 1: licence field exists and is correct
if (!doc.licence) {
errors.push({ slug, channel: 'mongodb', issue: 'MISSING licence field' });
} else if (doc.licence !== expectedLicence) {
errors.push({ slug, channel: 'mongodb', issue: `WRONG licence field: ${doc.licence} (expected ${expectedLicence})` });
}
if (isCcBy) {
checkedCcBy++;
// Check 2: content_html should NOT have stray Apache
if (hasUnwantedApache(doc.content_html)) {
errors.push({ slug, channel: 'mongodb:content_html', issue: 'Contains stray Apache licence text' });
}
// Check 3: content_html SHOULD have CC BY
if (doc.content_html && !hasCcBy(doc.content_html)) {
errors.push({ slug, channel: 'mongodb:content_html', issue: 'MISSING CC BY 4.0 text' });
}
// Check 4: content_markdown should NOT have stray Apache
if (hasUnwantedApache(doc.content_markdown)) {
errors.push({ slug, channel: 'mongodb:content_markdown', issue: 'Contains stray Apache licence text' });
}
// Check 5: translations
if (doc.translations) {
for (const [lang, translation] of Object.entries(doc.translations)) {
if (hasUnwantedApache(translation.content_html)) {
errors.push({ slug, channel: `mongodb:translations.${lang}`, issue: 'Contains stray Apache licence text' });
}
}
}
} else {
checkedApache++;
}
}
console.log(` CC BY 4.0 documents checked: ${checkedCcBy}`);
console.log(` Apache 2.0 documents checked: ${checkedApache}`);
console.log(` Errors found: ${errors.length}`);
for (const err of errors) {
console.log(` ❌ [${err.slug}] ${err.channel}: ${err.issue}`);
}
if (errors.length === 0) {
console.log(' ✓ All MongoDB documents have correct licences');
}
return errors;
}
// --- Channel 2: HTML Downloads ---
async function validateHtmlDownloads() {
console.log('\n══════════════════════════════════════════');
console.log(' CHANNEL 2: HTML Download Files');
console.log('══════════════════════════════════════════\n');
const downloadsDir = path.resolve(__dirname, '..', 'public', 'downloads');
const errors = [];
let checked = 0;
for (const filename of CC_BY_HTML_FILES) {
const fullPath = path.join(downloadsDir, filename);
let content;
try {
content = await fs.readFile(fullPath, 'utf-8');
} catch {
errors.push({ file: filename, issue: 'FILE NOT FOUND' });
continue;
}
checked++;
// Should NOT have stray Apache
if (hasUnwantedApache(content)) {
errors.push({ file: filename, issue: 'Contains stray Apache licence text' });
}
// SHOULD have CC BY
if (!hasCcBy(content)) {
errors.push({ file: filename, issue: 'MISSING CC BY 4.0 text' });
}
}
console.log(` CC BY 4.0 HTML files checked: ${checked}`);
console.log(` Errors found: ${errors.length}`);
for (const err of errors) {
console.log(` ❌ [${err.file}] ${err.issue}`);
}
if (errors.length === 0) {
console.log(' ✓ All HTML download files have correct licences');
}
return errors;
}
// --- Channel 3: Markdown Source Files ---
async function validateMarkdownFiles() {
console.log('\n══════════════════════════════════════════');
console.log(' CHANNEL 3: Markdown Source Files');
console.log('══════════════════════════════════════════\n');
const projectRoot = path.resolve(__dirname, '..');
const errors = [];
let checked = 0;
for (const relPath of CC_BY_MARKDOWN_FILES) {
const fullPath = path.join(projectRoot, relPath);
let content;
try {
content = await fs.readFile(fullPath, 'utf-8');
} catch {
errors.push({ file: relPath, issue: 'FILE NOT FOUND' });
continue;
}
checked++;
// Should NOT have stray Apache (outside dual-licence note)
if (hasUnwantedApache(content)) {
errors.push({ file: relPath, issue: 'Contains stray Apache licence text' });
}
// SHOULD have CC BY
if (!hasCcBy(content)) {
errors.push({ file: relPath, issue: 'MISSING CC BY 4.0 text' });
}
}
console.log(` CC BY 4.0 markdown files checked: ${checked}`);
console.log(` Errors found: ${errors.length}`);
for (const err of errors) {
console.log(` ❌ [${err.file}] ${err.issue}`);
}
if (errors.length === 0) {
console.log(' ✓ All markdown source files have correct licences');
}
return errors;
}
// --- Main ---
async function main() {
console.log('╔══════════════════════════════════════════╗');
console.log('║ Licence Validation — All Channels ║');
console.log('╠══════════════════════════════════════════╣');
console.log(`║ Database: ${DB_NAME.padEnd(29)}`);
console.log('╚══════════════════════════════════════════╝');
const client = new MongoClient('mongodb://localhost:27017');
let allErrors = [];
try {
await client.connect();
const mongoErrors = await validateMongoDB(client);
allErrors = allErrors.concat(mongoErrors.map(e => ({ ...e, channel_type: 'mongodb' })));
} finally {
await client.close();
}
const htmlErrors = await validateHtmlDownloads();
allErrors = allErrors.concat(htmlErrors.map(e => ({ ...e, channel_type: 'html' })));
const mdErrors = await validateMarkdownFiles();
allErrors = allErrors.concat(mdErrors.map(e => ({ ...e, channel_type: 'markdown' })));
// --- Final Summary ---
console.log('\n╔══════════════════════════════════════════╗');
console.log('║ FINAL SUMMARY ║');
console.log('╚══════════════════════════════════════════╝\n');
if (allErrors.length === 0) {
console.log(' ✓ ALL CHANNELS PASS — zero licence mismatches\n');
process.exit(0);
} else {
console.log(`${allErrors.length} ERRORS FOUND:\n`);
for (const err of allErrors) {
const loc = err.slug || err.file;
const chan = err.channel || err.channel_type;
console.log(` ❌ [${loc}] (${chan}) ${err.issue}`);
}
console.log('');
process.exit(1);
}
}
main().catch(err => {
console.error('Fatal error:', err);
process.exit(1);
});