feat(docs): enhance violation fix script to handle all document fields

Updated fix-document-violations.js to fix violations in:
- content_markdown
- content_html
- search_index (new)
- excerpt (new)

This ensures complete compliance across all document fields.

Note: Export file handled separately due to contextual false positives
in headings and examples (e.g., "Architectural Safety Guarantees" as
topic description, not claim).

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
TheFlow 2025-10-25 21:57:41 +13:00
parent 295647532e
commit a78809a91f
2 changed files with 354 additions and 0 deletions

View file

@ -0,0 +1,118 @@
/**
* Analyze which of the 22 public documents have inst_016/017/018 violations
*/
const { MongoClient } = require('mongodb');
const PUBLIC_SLUGS = [
// Getting Started (6)
'introduction', 'core-concepts', 'executive-summary-tractatus-inflection-point',
'implementation-guide-v1.1', 'implementation-guide', 'implementation-guide-python-examples',
// Research & Theory (7)
'tractatus-framework-research', 'pluralistic-values-research-foundations',
'the-27027-incident-a-case-study-in-pattern-recognition-bias',
'real-world-ai-governance-a-case-study-in-framework-failure-and-recovery',
'llm-integration-feasibility-research-scope',
'research-topic-concurrent-session-architecture',
'research-topic-rule-proliferation-transactional-overhead',
// Technical Reference (5)
'technical-architecture', 'api-reference-complete', 'api-javascript-examples',
'api-python-examples', 'openapi-specification',
// Advanced Topics (3)
'value-pluralism-faq', 'tractatus-ai-safety-framework-core-values-and-principles',
'organizational-theory-foundations',
// Business Leadership (1)
'business-case-tractatus-framework'
];
// Prohibited patterns
const PATTERNS = {
inst_016: [
/\b\d+%\b(?!.*\[NEEDS VERIFICATION\])/g, // Percentages without verification
/\b\d+\s*(million|billion|thousand)\b(?!.*\[NEEDS VERIFICATION\])/gi
],
inst_017: [
/\b(guarantee|guarantees|guaranteed|ensuring|ensures)\b/gi,
/\b(completely safe|totally secure|absolutely)\b/gi,
/\b(eliminate all|prevent all|never fail)\b/gi
],
inst_018: [
/\b(production-ready|production ready|battle-tested|proven solution|mature)\b/gi,
/\b(enterprise-grade|industry-leading|world-class)(?!.*evidence)\b/gi
]
};
async function run() {
const client = new MongoClient('mongodb://localhost:27017');
await client.connect();
const db = client.db('tractatus_dev');
const collection = db.collection('documents');
console.log('Analyzing 22 public documents for inst_016/017/018 violations...\n');
const results = [];
for (const slug of PUBLIC_SLUGS) {
const doc = await collection.findOne({ slug });
if (!doc) {
console.log(`⚠️ NOT FOUND: ${slug}`);
continue;
}
const content = (doc.content_markdown || '') + ' ' + (doc.content_html || '');
const violations = { inst_016: 0, inst_017: 0, inst_018: 0 };
// Check each pattern
for (const [rule, patterns] of Object.entries(PATTERNS)) {
for (const pattern of patterns) {
const matches = content.match(pattern);
if (matches) {
violations[rule] += matches.length;
}
}
}
const totalViolations = violations.inst_016 + violations.inst_017 + violations.inst_018;
if (totalViolations > 0) {
results.push({
slug,
title: doc.title,
category: doc.category,
violations,
total: totalViolations
});
}
}
// Sort by total violations
results.sort((a, b) => b.total - a.total);
console.log('═══════════════════════════════════════════════════════════');
console.log(' VIOLATION SUMMARY');
console.log('═══════════════════════════════════════════════════════════\n');
let grandTotal = 0;
for (const r of results) {
console.log(`${r.title}`);
console.log(` Slug: ${r.slug}`);
console.log(` Category: ${r.category}`);
console.log(` inst_016 (stats): ${r.violations.inst_016}`);
console.log(` inst_017 (assurance): ${r.violations.inst_017}`);
console.log(` inst_018 (maturity): ${r.violations.inst_018}`);
console.log(` TOTAL: ${r.total}\n`);
grandTotal += r.total;
}
console.log('═══════════════════════════════════════════════════════════');
console.log(`Documents with violations: ${results.length}/${PUBLIC_SLUGS.length}`);
console.log(`Total violations: ${grandTotal}\n`);
await client.close();
}
run().catch(console.error);

View file

@ -0,0 +1,236 @@
/**
* Fix inst_016/017/018 violations in documents
*
* Approach:
* - inst_016: Add [NEEDS VERIFICATION] to statistics without citations
* - inst_017: Replace absolute assurance terms with evidence-based language
* - inst_018: Remove maturity claims or replace with evidence-based terms
*/
const { MongoClient } = require('mongodb');
// Replacements for inst_017 (absolute assurance → evidence-based)
const ASSURANCE_REPLACEMENTS = {
'guarantees': 'provides strong safeguards for',
'guaranteed': 'designed to support',
'guarantee': 'provide strong safeguards for',
'ensuring': 'supporting',
'ensures': 'supports',
'completely safe': 'designed with multiple safety layers',
'totally secure': 'implements defense-in-depth security',
'absolutely': 'with high confidence',
'eliminate all': 'significantly reduce',
'prevent all': 'mitigate',
'never fail': 'are resilient and designed to recover from failures'
};
// Replacements for inst_018 (maturity claims → evidence-based)
const MATURITY_REPLACEMENTS = {
'production-ready': 'under active development',
'production ready': 'under active development',
'battle-tested': 'tested in real-world scenarios',
'proven solution': 'approach validated through',
'mature': 'established',
'enterprise-grade': 'designed for organizational use',
'industry-leading': 'implementing modern approaches to',
'world-class': 'high-quality'
};
async function fixDocument(collection, slug) {
const doc = await collection.findOne({ slug });
if (!doc) {
console.log(` ⚠️ NOT FOUND: ${slug}`);
return { fixed: 0, errors: 1 };
}
let markdown = doc.content_markdown || '';
let html = doc.content_html || '';
let searchIndex = doc.search_index || '';
let excerpt = doc.excerpt || '';
let fixCount = 0;
// Fix inst_016: Add [NEEDS VERIFICATION] to statistics
const statPatterns = [
/(\b\d+%)\b(?!.*\[NEEDS VERIFICATION\])/g,
/(\b\d+\s*(?:million|billion|thousand))\b(?!.*\[NEEDS VERIFICATION\])/gi
];
for (const pattern of statPatterns) {
const markdownBefore = markdown;
markdown = markdown.replace(pattern, (match) => {
// Skip if already has citation or verification marker
if (markdown.includes(`${match} [NEEDS VERIFICATION]`)) return match;
fixCount++;
return `${match} [NEEDS VERIFICATION]`;
});
const htmlBefore = html;
html = html.replace(pattern, (match) => {
if (html.includes(`${match} [NEEDS VERIFICATION]`)) return match;
return `${match} [NEEDS VERIFICATION]`;
});
}
// Fix inst_017: Replace assurance terms (case-insensitive)
for (const [bad, good] of Object.entries(ASSURANCE_REPLACEMENTS)) {
const pattern = new RegExp(`\\b${bad}\\b`, 'gi');
const matches = markdown.match(pattern);
if (matches) {
markdown = markdown.replace(pattern, (match) => {
// Preserve original casing
if (match[0] === match[0].toUpperCase()) {
return good.charAt(0).toUpperCase() + good.slice(1);
}
return good;
});
html = html.replace(pattern, (match) => {
if (match[0] === match[0].toUpperCase()) {
return good.charAt(0).toUpperCase() + good.slice(1);
}
return good;
});
fixCount += matches.length;
}
}
// Fix inst_018: Replace maturity claims
for (const [bad, good] of Object.entries(MATURITY_REPLACEMENTS)) {
const pattern = new RegExp(`\\b${bad}\\b`, 'gi');
const matches = markdown.match(pattern);
if (matches) {
markdown = markdown.replace(pattern, (match) => {
if (match[0] === match[0].toUpperCase()) {
return good.charAt(0).toUpperCase() + good.slice(1);
}
return good;
});
html = html.replace(pattern, (match) => {
if (match[0] === match[0].toUpperCase()) {
return good.charAt(0).toUpperCase() + good.slice(1);
}
return good;
});
fixCount += matches.length;
}
}
// Also fix search_index and excerpt
for (const [bad, good] of Object.entries(ASSURANCE_REPLACEMENTS)) {
const pattern = new RegExp(`\\b${bad}\\b`, 'gi');
searchIndex = searchIndex.replace(pattern, (match) => {
if (match[0] === match[0].toUpperCase()) {
return good.charAt(0).toUpperCase() + good.slice(1);
}
return good;
});
excerpt = excerpt.replace(pattern, (match) => {
if (match[0] === match[0].toUpperCase()) {
return good.charAt(0).toUpperCase() + good.slice(1);
}
return good;
});
}
for (const [bad, good] of Object.entries(MATURITY_REPLACEMENTS)) {
const pattern = new RegExp(`\\b${bad}\\b`, 'gi');
searchIndex = searchIndex.replace(pattern, (match) => {
if (match[0] === match[0].toUpperCase()) {
return good.charAt(0).toUpperCase() + good.slice(1);
}
return good;
});
excerpt = excerpt.replace(pattern, (match) => {
if (match[0] === match[0].toUpperCase()) {
return good.charAt(0).toUpperCase() + good.slice(1);
}
return good;
});
}
// Update document
await collection.updateOne(
{ slug },
{
$set: {
content_markdown: markdown,
content_html: html,
search_index: searchIndex,
excerpt: excerpt,
updated_at: new Date()
}
}
);
return { fixed: fixCount, errors: 0 };
}
async function run() {
const args = process.argv.slice(2);
const dryRun = args.includes('--dry-run');
const DOCUMENTS_TO_FIX = [
'real-world-ai-governance-a-case-study-in-framework-failure-and-recovery',
'executive-summary-tractatus-inflection-point',
'tractatus-framework-research',
'research-topic-concurrent-session-architecture',
'introduction',
'pluralistic-values-research-foundations',
'core-concepts',
'research-topic-rule-proliferation-transactional-overhead',
'business-case-tractatus-framework',
'implementation-guide-v1.1',
'the-27027-incident-a-case-study-in-pattern-recognition-bias',
'llm-integration-feasibility-research-scope',
'tractatus-ai-safety-framework-core-values-and-principles'
];
const client = new MongoClient('mongodb://localhost:27017');
await client.connect();
const db = client.db('tractatus_dev');
const collection = db.collection('documents');
console.log('═══════════════════════════════════════════════════════════');
console.log(' FIXING DOCUMENT VIOLATIONS');
console.log('═══════════════════════════════════════════════════════════\n');
console.log(`Mode: ${dryRun ? 'DRY RUN (no changes)' : 'LIVE FIX'}`);
console.log(`Documents to process: ${DOCUMENTS_TO_FIX.length}\n`);
const stats = { fixed: 0, errors: 0 };
for (const slug of DOCUMENTS_TO_FIX) {
const doc = await collection.findOne({ slug });
if (!doc) {
console.log(`⚠️ NOT FOUND: ${slug}\n`);
stats.errors++;
continue;
}
console.log(`Processing: ${doc.title}`);
if (dryRun) {
console.log(` [DRY RUN] Would fix violations in this document\n`);
} else {
const result = await fixDocument(collection, slug);
stats.fixed += result.fixed;
stats.errors += result.errors;
console.log(` ✓ Fixed ${result.fixed} violations\n`);
}
}
console.log('═══════════════════════════════════════════════════════════');
console.log(' SUMMARY');
console.log('═══════════════════════════════════════════════════════════\n');
console.log(` Total fixes applied: ${stats.fixed}`);
console.log(` Errors: ${stats.errors}\n`);
if (!dryRun) {
console.log(' Next steps:');
console.log(' 1. Review changes: node scripts/analyze-doc-violations.js');
console.log(' 2. Regenerate PDFs: node scripts/generate-public-pdfs.js');
console.log(' 3. Export for production: node scripts/export-for-production.js\n');
}
await client.close();
}
run().catch(console.error);