- INCIDENT_REPORT_20260222: Deliberate instruction refusal analysis - fix-markdown-licences.js: Batch update licence sections in markdown - migrate-licence-to-cc-by-4.js: Apache 2.0 → CC BY 4.0 migration tool - publish-overtrust-blog-post.js: Blog post publishing utility - validate-licences.js: Licence compliance checker Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
336 lines
12 KiB
JavaScript
336 lines
12 KiB
JavaScript
#!/usr/bin/env node
|
|
/**
|
|
* Validate Document Licences — All Delivery Channels
|
|
*
|
|
* Checks MongoDB, HTML downloads, and markdown source files to verify
|
|
* correct licence assignment (CC BY 4.0 for research, Apache 2.0 for code).
|
|
*
|
|
* Usage:
|
|
* node scripts/validate-licences.js [--db <name>]
|
|
*
|
|
* Defaults to tractatus_dev. Use --db tractatus for production.
|
|
*/
|
|
|
|
const { MongoClient } = require('mongodb');
|
|
const fs = require('fs').promises;
|
|
const path = require('path');
|
|
|
|
const dbArg = process.argv.indexOf('--db');
|
|
const DB_NAME = dbArg !== -1 ? process.argv[dbArg + 1] : 'tractatus_dev';
|
|
|
|
// --- Classification: slugs that MUST be CC BY 4.0 ---
|
|
const CC_BY_SLUGS = new Set([
|
|
'tractatus-framework-research',
|
|
'pluralistic-values-research-foundations',
|
|
'the-27027-incident-a-case-study-in-pattern-recognition-bias',
|
|
'real-world-ai-governance-a-case-study-in-framework-failure-and-recovery',
|
|
'research-topic-concurrent-session-architecture',
|
|
'research-topic-rule-proliferation-transactional-overhead',
|
|
'executive-summary-tractatus-inflection-point',
|
|
'value-pluralism-faq',
|
|
'value-pluralism-in-tractatus-frequently-asked-questions',
|
|
'tractatus-ai-safety-framework-core-values-and-principles',
|
|
'organizational-theory-foundations',
|
|
'glossary',
|
|
'glossary-de',
|
|
'glossary-fr',
|
|
'business-case-tractatus-framework',
|
|
'case-studies',
|
|
'steering-vectors-mechanical-bias-sovereign-ai',
|
|
'steering-vectors-and-mechanical-bias-inference-time-debiasing-for-sovereign-small-language-models',
|
|
'taonga-centred-steering-governance-polycentric-ai',
|
|
'taonga-centred-steering-governance-polycentric-authority-for-sovereign-small-language-models',
|
|
'pattern-bias-from-code-to-conversation',
|
|
'architectural-alignment-academic',
|
|
'philosophical-foundations-village-project',
|
|
'research-timeline',
|
|
'architectural-safeguards-against-llm-hierarchical-dominance-prose',
|
|
'case-studies-real-world-llm-failure-modes-appendix',
|
|
]);
|
|
|
|
// HTML download files that MUST be CC BY 4.0
|
|
const CC_BY_HTML_FILES = [
|
|
'steering-vectors-mechanical-bias-sovereign-ai.html',
|
|
'steering-vectors-mechanical-bias-sovereign-ai-de.html',
|
|
'steering-vectors-mechanical-bias-sovereign-ai-fr.html',
|
|
'steering-vectors-mechanical-bias-sovereign-ai-mi.html',
|
|
'taonga-centred-steering-governance-polycentric-ai.html',
|
|
'taonga-centred-steering-governance-polycentric-ai-de.html',
|
|
'taonga-centred-steering-governance-polycentric-ai-fr.html',
|
|
'taonga-centred-steering-governance-polycentric-ai-mi.html',
|
|
'architectural-alignment-academic-de.html',
|
|
'architectural-alignment-academic-fr.html',
|
|
'architectural-alignment-academic-mi.html',
|
|
'philosophical-foundations-village-project-de.html',
|
|
'philosophical-foundations-village-project-fr.html',
|
|
'philosophical-foundations-village-project-mi.html',
|
|
];
|
|
|
|
// Markdown files that MUST be CC BY 4.0
|
|
const CC_BY_MARKDOWN_FILES = [
|
|
'docs/markdown/tractatus-framework-research.md',
|
|
'docs/markdown/business-case-tractatus-framework.md',
|
|
'docs/markdown/organizational-theory-foundations.md',
|
|
'docs/markdown/tractatus-ai-safety-framework-core-values-and-principles.md',
|
|
'docs/markdown/GLOSSARY.md',
|
|
'docs/markdown/GLOSSARY-DE.md',
|
|
'docs/markdown/GLOSSARY-FR.md',
|
|
'docs/markdown/case-studies.md',
|
|
'docs/research/pluralistic-values-research-foundations.md',
|
|
'docs/research/executive-summary-tractatus-inflection-point.md',
|
|
'docs/research/rule-proliferation-and-transactional-overhead.md',
|
|
'docs/research/concurrent-session-architecture-limitations.md',
|
|
'docs/research/ARCHITECTURAL-SAFEGUARDS-Against-LLM-Hierarchical-Dominance-Prose.md',
|
|
];
|
|
|
|
// --- Helpers ---
|
|
|
|
// Acceptable Apache references (dual-licence notes)
|
|
function stripAcceptableApache(text) {
|
|
return text
|
|
.replace(/separately licensed under the Apache License 2\.0/g, '')
|
|
.replace(/separat unter der Apache License 2\.0 lizenziert/g, '')
|
|
.replace(/séparément sous la Licence Apache 2\.0/g, '')
|
|
.replace(/Apache License 2\.0\. This Creative Commons/g, '')
|
|
.replace(/Apache License 2\.0\. Diese Creative-Commons/g, '')
|
|
.replace(/Apache License 2\.0\. Cette licence Creative/g, '')
|
|
.replace(/source code is separately licensed under the Apache/g, '')
|
|
.replace(/Quellcode.*?Apache License 2\.0/g, '')
|
|
.replace(/licencié séparément sous la Licence Apache/g, '')
|
|
// Māori dual-licence note
|
|
.replace(/kei raro anō i te Apache License 2\.0/g, '');
|
|
}
|
|
|
|
function hasUnwantedApache(text) {
|
|
if (!text) return false;
|
|
const cleaned = stripAcceptableApache(text);
|
|
return cleaned.includes('Apache License') || cleaned.includes('Apache-Lizenz');
|
|
}
|
|
|
|
function hasCcBy(text) {
|
|
if (!text) return false;
|
|
return text.includes('Creative Commons') || text.includes('CC BY 4.0') || text.includes('CC BY');
|
|
}
|
|
|
|
// --- Channel 1: MongoDB ---
|
|
|
|
async function validateMongoDB(client) {
|
|
console.log('\n══════════════════════════════════════════');
|
|
console.log(' CHANNEL 1: MongoDB Documents');
|
|
console.log('══════════════════════════════════════════\n');
|
|
|
|
const db = client.db(DB_NAME);
|
|
const collection = db.collection('documents');
|
|
const documents = await collection.find({}).toArray();
|
|
|
|
console.log(`Found ${documents.length} documents\n`);
|
|
|
|
const errors = [];
|
|
let checkedCcBy = 0;
|
|
let checkedApache = 0;
|
|
|
|
for (const doc of documents) {
|
|
const slug = doc.slug;
|
|
const isCcBy = CC_BY_SLUGS.has(slug);
|
|
const expectedLicence = isCcBy ? 'CC-BY-4.0' : 'Apache-2.0';
|
|
|
|
// Check 1: licence field exists and is correct
|
|
if (!doc.licence) {
|
|
errors.push({ slug, channel: 'mongodb', issue: 'MISSING licence field' });
|
|
} else if (doc.licence !== expectedLicence) {
|
|
errors.push({ slug, channel: 'mongodb', issue: `WRONG licence field: ${doc.licence} (expected ${expectedLicence})` });
|
|
}
|
|
|
|
if (isCcBy) {
|
|
checkedCcBy++;
|
|
|
|
// Check 2: content_html should NOT have stray Apache
|
|
if (hasUnwantedApache(doc.content_html)) {
|
|
errors.push({ slug, channel: 'mongodb:content_html', issue: 'Contains stray Apache licence text' });
|
|
}
|
|
|
|
// Check 3: content_html SHOULD have CC BY
|
|
if (doc.content_html && !hasCcBy(doc.content_html)) {
|
|
errors.push({ slug, channel: 'mongodb:content_html', issue: 'MISSING CC BY 4.0 text' });
|
|
}
|
|
|
|
// Check 4: content_markdown should NOT have stray Apache
|
|
if (hasUnwantedApache(doc.content_markdown)) {
|
|
errors.push({ slug, channel: 'mongodb:content_markdown', issue: 'Contains stray Apache licence text' });
|
|
}
|
|
|
|
// Check 5: translations
|
|
if (doc.translations) {
|
|
for (const [lang, translation] of Object.entries(doc.translations)) {
|
|
if (hasUnwantedApache(translation.content_html)) {
|
|
errors.push({ slug, channel: `mongodb:translations.${lang}`, issue: 'Contains stray Apache licence text' });
|
|
}
|
|
}
|
|
}
|
|
} else {
|
|
checkedApache++;
|
|
}
|
|
}
|
|
|
|
console.log(` CC BY 4.0 documents checked: ${checkedCcBy}`);
|
|
console.log(` Apache 2.0 documents checked: ${checkedApache}`);
|
|
console.log(` Errors found: ${errors.length}`);
|
|
|
|
for (const err of errors) {
|
|
console.log(` ❌ [${err.slug}] ${err.channel}: ${err.issue}`);
|
|
}
|
|
|
|
if (errors.length === 0) {
|
|
console.log(' ✓ All MongoDB documents have correct licences');
|
|
}
|
|
|
|
return errors;
|
|
}
|
|
|
|
// --- Channel 2: HTML Downloads ---
|
|
|
|
async function validateHtmlDownloads() {
|
|
console.log('\n══════════════════════════════════════════');
|
|
console.log(' CHANNEL 2: HTML Download Files');
|
|
console.log('══════════════════════════════════════════\n');
|
|
|
|
const downloadsDir = path.resolve(__dirname, '..', 'public', 'downloads');
|
|
const errors = [];
|
|
let checked = 0;
|
|
|
|
for (const filename of CC_BY_HTML_FILES) {
|
|
const fullPath = path.join(downloadsDir, filename);
|
|
let content;
|
|
try {
|
|
content = await fs.readFile(fullPath, 'utf-8');
|
|
} catch {
|
|
errors.push({ file: filename, issue: 'FILE NOT FOUND' });
|
|
continue;
|
|
}
|
|
|
|
checked++;
|
|
|
|
// Should NOT have stray Apache
|
|
if (hasUnwantedApache(content)) {
|
|
errors.push({ file: filename, issue: 'Contains stray Apache licence text' });
|
|
}
|
|
|
|
// SHOULD have CC BY
|
|
if (!hasCcBy(content)) {
|
|
errors.push({ file: filename, issue: 'MISSING CC BY 4.0 text' });
|
|
}
|
|
}
|
|
|
|
console.log(` CC BY 4.0 HTML files checked: ${checked}`);
|
|
console.log(` Errors found: ${errors.length}`);
|
|
|
|
for (const err of errors) {
|
|
console.log(` ❌ [${err.file}] ${err.issue}`);
|
|
}
|
|
|
|
if (errors.length === 0) {
|
|
console.log(' ✓ All HTML download files have correct licences');
|
|
}
|
|
|
|
return errors;
|
|
}
|
|
|
|
// --- Channel 3: Markdown Source Files ---
|
|
|
|
async function validateMarkdownFiles() {
|
|
console.log('\n══════════════════════════════════════════');
|
|
console.log(' CHANNEL 3: Markdown Source Files');
|
|
console.log('══════════════════════════════════════════\n');
|
|
|
|
const projectRoot = path.resolve(__dirname, '..');
|
|
const errors = [];
|
|
let checked = 0;
|
|
|
|
for (const relPath of CC_BY_MARKDOWN_FILES) {
|
|
const fullPath = path.join(projectRoot, relPath);
|
|
let content;
|
|
try {
|
|
content = await fs.readFile(fullPath, 'utf-8');
|
|
} catch {
|
|
errors.push({ file: relPath, issue: 'FILE NOT FOUND' });
|
|
continue;
|
|
}
|
|
|
|
checked++;
|
|
|
|
// Should NOT have stray Apache (outside dual-licence note)
|
|
if (hasUnwantedApache(content)) {
|
|
errors.push({ file: relPath, issue: 'Contains stray Apache licence text' });
|
|
}
|
|
|
|
// SHOULD have CC BY
|
|
if (!hasCcBy(content)) {
|
|
errors.push({ file: relPath, issue: 'MISSING CC BY 4.0 text' });
|
|
}
|
|
}
|
|
|
|
console.log(` CC BY 4.0 markdown files checked: ${checked}`);
|
|
console.log(` Errors found: ${errors.length}`);
|
|
|
|
for (const err of errors) {
|
|
console.log(` ❌ [${err.file}] ${err.issue}`);
|
|
}
|
|
|
|
if (errors.length === 0) {
|
|
console.log(' ✓ All markdown source files have correct licences');
|
|
}
|
|
|
|
return errors;
|
|
}
|
|
|
|
// --- Main ---
|
|
|
|
async function main() {
|
|
console.log('╔══════════════════════════════════════════╗');
|
|
console.log('║ Licence Validation — All Channels ║');
|
|
console.log('╠══════════════════════════════════════════╣');
|
|
console.log(`║ Database: ${DB_NAME.padEnd(29)}║`);
|
|
console.log('╚══════════════════════════════════════════╝');
|
|
|
|
const client = new MongoClient('mongodb://localhost:27017');
|
|
let allErrors = [];
|
|
|
|
try {
|
|
await client.connect();
|
|
|
|
const mongoErrors = await validateMongoDB(client);
|
|
allErrors = allErrors.concat(mongoErrors.map(e => ({ ...e, channel_type: 'mongodb' })));
|
|
} finally {
|
|
await client.close();
|
|
}
|
|
|
|
const htmlErrors = await validateHtmlDownloads();
|
|
allErrors = allErrors.concat(htmlErrors.map(e => ({ ...e, channel_type: 'html' })));
|
|
|
|
const mdErrors = await validateMarkdownFiles();
|
|
allErrors = allErrors.concat(mdErrors.map(e => ({ ...e, channel_type: 'markdown' })));
|
|
|
|
// --- Final Summary ---
|
|
console.log('\n╔══════════════════════════════════════════╗');
|
|
console.log('║ FINAL SUMMARY ║');
|
|
console.log('╚══════════════════════════════════════════╝\n');
|
|
|
|
if (allErrors.length === 0) {
|
|
console.log(' ✓ ALL CHANNELS PASS — zero licence mismatches\n');
|
|
process.exit(0);
|
|
} else {
|
|
console.log(` ✗ ${allErrors.length} ERRORS FOUND:\n`);
|
|
for (const err of allErrors) {
|
|
const loc = err.slug || err.file;
|
|
const chan = err.channel || err.channel_type;
|
|
console.log(` ❌ [${loc}] (${chan}) ${err.issue}`);
|
|
}
|
|
console.log('');
|
|
process.exit(1);
|
|
}
|
|
}
|
|
|
|
main().catch(err => {
|
|
console.error('Fatal error:', err);
|
|
process.exit(1);
|
|
});
|