tractatus/scripts/fix-document-violations.js
TheFlow a78809a91f feat(docs): enhance violation fix script to handle all document fields
Updated fix-document-violations.js to fix violations in:
- content_markdown
- content_html
- search_index (new)
- excerpt (new)

This ensures complete compliance across all document fields.

Note: Export file handled separately due to contextual false positives
in headings and examples (e.g., "Architectural Safety Guarantees" as
topic description, not claim).

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>
2025-10-25 21:57:41 +13:00

236 lines
8.2 KiB
JavaScript

/**
* Fix inst_016/017/018 violations in documents
*
* Approach:
* - inst_016: Add [NEEDS VERIFICATION] to statistics without citations
* - inst_017: Replace absolute assurance terms with evidence-based language
* - inst_018: Remove maturity claims or replace with evidence-based terms
*/
const { MongoClient } = require('mongodb');
// Replacements for inst_017 (absolute assurance → evidence-based)
const ASSURANCE_REPLACEMENTS = {
'guarantees': 'provides strong safeguards for',
'guaranteed': 'designed to support',
'guarantee': 'provide strong safeguards for',
'ensuring': 'supporting',
'ensures': 'supports',
'completely safe': 'designed with multiple safety layers',
'totally secure': 'implements defense-in-depth security',
'absolutely': 'with high confidence',
'eliminate all': 'significantly reduce',
'prevent all': 'mitigate',
'never fail': 'are resilient and designed to recover from failures'
};
// Replacements for inst_018 (maturity claims → evidence-based)
const MATURITY_REPLACEMENTS = {
'production-ready': 'under active development',
'production ready': 'under active development',
'battle-tested': 'tested in real-world scenarios',
'proven solution': 'approach validated through',
'mature': 'established',
'enterprise-grade': 'designed for organizational use',
'industry-leading': 'implementing modern approaches to',
'world-class': 'high-quality'
};
async function fixDocument(collection, slug) {
const doc = await collection.findOne({ slug });
if (!doc) {
console.log(` ⚠️ NOT FOUND: ${slug}`);
return { fixed: 0, errors: 1 };
}
let markdown = doc.content_markdown || '';
let html = doc.content_html || '';
let searchIndex = doc.search_index || '';
let excerpt = doc.excerpt || '';
let fixCount = 0;
// Fix inst_016: Add [NEEDS VERIFICATION] to statistics
const statPatterns = [
/(\b\d+%)\b(?!.*\[NEEDS VERIFICATION\])/g,
/(\b\d+\s*(?:million|billion|thousand))\b(?!.*\[NEEDS VERIFICATION\])/gi
];
for (const pattern of statPatterns) {
const markdownBefore = markdown;
markdown = markdown.replace(pattern, (match) => {
// Skip if already has citation or verification marker
if (markdown.includes(`${match} [NEEDS VERIFICATION]`)) return match;
fixCount++;
return `${match} [NEEDS VERIFICATION]`;
});
const htmlBefore = html;
html = html.replace(pattern, (match) => {
if (html.includes(`${match} [NEEDS VERIFICATION]`)) return match;
return `${match} [NEEDS VERIFICATION]`;
});
}
// Fix inst_017: Replace assurance terms (case-insensitive)
for (const [bad, good] of Object.entries(ASSURANCE_REPLACEMENTS)) {
const pattern = new RegExp(`\\b${bad}\\b`, 'gi');
const matches = markdown.match(pattern);
if (matches) {
markdown = markdown.replace(pattern, (match) => {
// Preserve original casing
if (match[0] === match[0].toUpperCase()) {
return good.charAt(0).toUpperCase() + good.slice(1);
}
return good;
});
html = html.replace(pattern, (match) => {
if (match[0] === match[0].toUpperCase()) {
return good.charAt(0).toUpperCase() + good.slice(1);
}
return good;
});
fixCount += matches.length;
}
}
// Fix inst_018: Replace maturity claims
for (const [bad, good] of Object.entries(MATURITY_REPLACEMENTS)) {
const pattern = new RegExp(`\\b${bad}\\b`, 'gi');
const matches = markdown.match(pattern);
if (matches) {
markdown = markdown.replace(pattern, (match) => {
if (match[0] === match[0].toUpperCase()) {
return good.charAt(0).toUpperCase() + good.slice(1);
}
return good;
});
html = html.replace(pattern, (match) => {
if (match[0] === match[0].toUpperCase()) {
return good.charAt(0).toUpperCase() + good.slice(1);
}
return good;
});
fixCount += matches.length;
}
}
// Also fix search_index and excerpt
for (const [bad, good] of Object.entries(ASSURANCE_REPLACEMENTS)) {
const pattern = new RegExp(`\\b${bad}\\b`, 'gi');
searchIndex = searchIndex.replace(pattern, (match) => {
if (match[0] === match[0].toUpperCase()) {
return good.charAt(0).toUpperCase() + good.slice(1);
}
return good;
});
excerpt = excerpt.replace(pattern, (match) => {
if (match[0] === match[0].toUpperCase()) {
return good.charAt(0).toUpperCase() + good.slice(1);
}
return good;
});
}
for (const [bad, good] of Object.entries(MATURITY_REPLACEMENTS)) {
const pattern = new RegExp(`\\b${bad}\\b`, 'gi');
searchIndex = searchIndex.replace(pattern, (match) => {
if (match[0] === match[0].toUpperCase()) {
return good.charAt(0).toUpperCase() + good.slice(1);
}
return good;
});
excerpt = excerpt.replace(pattern, (match) => {
if (match[0] === match[0].toUpperCase()) {
return good.charAt(0).toUpperCase() + good.slice(1);
}
return good;
});
}
// Update document
await collection.updateOne(
{ slug },
{
$set: {
content_markdown: markdown,
content_html: html,
search_index: searchIndex,
excerpt: excerpt,
updated_at: new Date()
}
}
);
return { fixed: fixCount, errors: 0 };
}
async function run() {
const args = process.argv.slice(2);
const dryRun = args.includes('--dry-run');
const DOCUMENTS_TO_FIX = [
'real-world-ai-governance-a-case-study-in-framework-failure-and-recovery',
'executive-summary-tractatus-inflection-point',
'tractatus-framework-research',
'research-topic-concurrent-session-architecture',
'introduction',
'pluralistic-values-research-foundations',
'core-concepts',
'research-topic-rule-proliferation-transactional-overhead',
'business-case-tractatus-framework',
'implementation-guide-v1.1',
'the-27027-incident-a-case-study-in-pattern-recognition-bias',
'llm-integration-feasibility-research-scope',
'tractatus-ai-safety-framework-core-values-and-principles'
];
const client = new MongoClient('mongodb://localhost:27017');
await client.connect();
const db = client.db('tractatus_dev');
const collection = db.collection('documents');
console.log('═══════════════════════════════════════════════════════════');
console.log(' FIXING DOCUMENT VIOLATIONS');
console.log('\n');
console.log(`Mode: ${dryRun ? 'DRY RUN (no changes)' : 'LIVE FIX'}`);
console.log(`Documents to process: ${DOCUMENTS_TO_FIX.length}\n`);
const stats = { fixed: 0, errors: 0 };
for (const slug of DOCUMENTS_TO_FIX) {
const doc = await collection.findOne({ slug });
if (!doc) {
console.log(`⚠️ NOT FOUND: ${slug}\n`);
stats.errors++;
continue;
}
console.log(`Processing: ${doc.title}`);
if (dryRun) {
console.log(` [DRY RUN] Would fix violations in this document\n`);
} else {
const result = await fixDocument(collection, slug);
stats.fixed += result.fixed;
stats.errors += result.errors;
console.log(` ✓ Fixed ${result.fixed} violations\n`);
}
}
console.log('');
console.log(' SUMMARY');
console.log('\n');
console.log(` Total fixes applied: ${stats.fixed}`);
console.log(` Errors: ${stats.errors}\n`);
if (!dryRun) {
console.log(' Next steps:');
console.log(' 1. Review changes: node scripts/analyze-doc-violations.js');
console.log(' 2. Regenerate PDFs: node scripts/generate-public-pdfs.js');
console.log(' 3. Export for production: node scripts/export-for-production.js\n');
}
await client.close();
}
run().catch(console.error);