tractatus/scripts/check-prohibited-terms.js
TheFlow b9a301f2a7 feat(security): implement attack surface exposure prevention (inst_084)
Adds comprehensive protection against exposing internal implementation
details in public-facing documentation.

New Governance Rule (inst_084):
- Quadrant: SYSTEM
- Persistence: HIGH
- Scope: Public documents (confidential:false)
- Enforcement: Pre-commit hooks (mandatory)

Implementation:
1. attack-surface-validator.util.js
   - Pattern detection for file paths, API endpoints, admin URLs, ports
   - Frontmatter parsing (respects confidential:true exemption)
   - Code block exemption (doesn't flag technical examples)
   - Intelligent line numbering for violation reporting

2. check-attack-surface.js
   - Pre-commit script that scans staged documents
   - User-friendly violation reporting with suggestions
   - Integration with git workflow

3. Pre-commit hook integration
   - Added as Check #3 in git hooks
   - Runs after prohibited terms, before test requirements
   - Blocks commits with attack surface exposures

Detection Patterns:
 File paths: src/*, public/*, scripts/*
 API endpoints: /api/*, /admin/*
 File naming patterns: *.util.js, *.service.js
 Port numbers in prose
 Connection strings

Exemptions:
- Code blocks (```)
- Inline code (`)
- Confidential documents (confidential:true)
- Internal technical documentation

Security Rationale (Defense-in-Depth):
- Prevents reconnaissance by obscuring architecture
- Reduces attack surface by hiding implementation paths
- Complements credential protection (inst_069/070)
- Part of layered security strategy (inst_072)

Testing:
- Validated against test document with known exposures
- 7 violations detected correctly
- Code block exemption verified
- All expected pattern types detected

Example Violations Blocked:
 "Dashboard at /admin/audit-analytics.html"
 "Administrative Dashboard"
 "GET /api/admin/audit-logs endpoint"
 "Authenticated API for audit data"
 "In activity-classifier.util.js"
 "The activity classifier"

This enforcement prevented the exact security issue discovered in
governance-bi-tools.md which exposed admin paths and API endpoints.

Also fixed prohibited terms checker to exempt instruction-history.json
(which contains prohibited term DEFINITIONS, not violations).

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>
2025-10-27 12:11:43 +13:00

170 lines
5.1 KiB
JavaScript
Executable file

#!/usr/bin/env node
/**
* Prohibited Terms Scanner - Enforces inst_016, inst_017, inst_018
*
* Scans files for prohibited language that violates governance rules
*/
const fs = require('fs');
const path = require('path');
const { execSync } = require('child_process');
// inst_017: Prohibited absolute assurance terms
const PROHIBITED_ABSOLUTE_TERMS = [
/\bguarantee(s|d)?\b/i,
/\bensures?\s+(100%|complete|total|absolute)/i,
/\beliminates?\s+all\b/i,
/\bcompletely\s+(prevents?|eliminates?|removes?)\b/i,
/\bnever\s+fails?\b/i,
/\b100%\s+(safe|secure|reliable|accurate)\b/i,
/\babsolutely\s+(prevents?|guarantees?)\b/i
];
// inst_018: Prohibited maturity claims without evidence
const PROHIBITED_MATURITY_CLAIMS = [
/\b(production-ready|battle-tested|enterprise-proven)\b/i,
/\bvalidated\s+by\s+\d+\s+(companies|organizations|teams)\b/i,
/\bwidely\s+adopted\b/i,
/\bmarket\s+(leader|validated)\b/i,
/\bcustomer\s+base\s+of\b/i
];
// inst_016: Requires citation or [NEEDS VERIFICATION]
const STATS_PATTERNS = [
/\d+%\s+(improvement|increase|reduction|faster|better)/i,
/\d+x\s+(faster|better|more)/i,
/ROI\s+of\s+\d+/i,
/reduces?\s+(cost|time|effort)\s+by\s+\d+/i
];
function checkFile(filePath) {
const content = fs.readFileSync(filePath, 'utf8');
const lines = content.split('\n');
const violations = [];
lines.forEach((line, idx) => {
const lineNum = idx + 1;
// Check inst_017: Absolute assurance terms
PROHIBITED_ABSOLUTE_TERMS.forEach(pattern => {
if (pattern.test(line)) {
violations.push({
file: filePath,
line: lineNum,
type: 'inst_017',
severity: 'HIGH',
text: line.trim(),
rule: 'Prohibited absolute assurance term detected'
});
}
});
// Check inst_018: Maturity claims
PROHIBITED_MATURITY_CLAIMS.forEach(pattern => {
if (pattern.test(line)) {
violations.push({
file: filePath,
line: lineNum,
type: 'inst_018',
severity: 'HIGH',
text: line.trim(),
rule: 'Prohibited maturity claim without evidence'
});
}
});
// Check inst_016: Statistics without citation
STATS_PATTERNS.forEach(pattern => {
if (pattern.test(line)) {
// Check if line has citation or [NEEDS VERIFICATION]
if (!line.includes('[') && !line.includes('(source:') && !line.includes('Citation:')) {
violations.push({
file: filePath,
line: lineNum,
type: 'inst_016',
severity: 'MEDIUM',
text: line.trim(),
rule: 'Statistic without citation or [NEEDS VERIFICATION] marker'
});
}
}
});
});
return violations;
}
function scanFiles(files) {
const allViolations = [];
files.forEach(file => {
if (!fs.existsSync(file)) return;
// Skip instruction-history.json (contains prohibited term DEFINITIONS, not violations)
if (file.includes('instruction-history.json')) return;
// Only scan text files (markdown, HTML, text)
const ext = path.extname(file).toLowerCase();
if (!['.md', '.html', '.txt', '.json'].includes(ext)) return;
const violations = checkFile(file);
allViolations.push(...violations);
});
return allViolations;
}
function main() {
const args = process.argv.slice(2);
let files = [];
if (args.length === 0) {
// Scan staged git files
try {
const staged = execSync('git diff --cached --name-only --diff-filter=ACM', {
encoding: 'utf8'
});
files = staged.trim().split('\n').filter(f => f.length > 0);
} catch (err) {
console.error('Not in a git repository or no staged files');
process.exit(0);
}
} else {
files = args;
}
if (files.length === 0) {
console.log('✅ No files to scan');
process.exit(0);
}
console.log(`\n🔍 Scanning ${files.length} file(s) for prohibited terms...\n`);
const violations = scanFiles(files);
if (violations.length === 0) {
console.log('✅ No prohibited terms detected\n');
process.exit(0);
}
// Report violations
console.log(`❌ Found ${violations.length} violation(s):\n`);
console.log('━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\n');
violations.forEach(v => {
console.log(`${v.severity === 'HIGH' ? '🔴' : '🟡'} ${v.file}:${v.line}`);
console.log(` Rule: ${v.type} - ${v.rule}`);
console.log(` Text: ${v.text.substring(0, 80)}${v.text.length > 80 ? '...' : ''}`);
console.log('');
});
console.log('━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\n');
console.log('Fix violations before committing/deploying:\n');
console.log(' inst_016: Add citation or [NEEDS VERIFICATION] to statistics');
console.log(' inst_017: Replace absolute terms with evidence-based language');
console.log(' inst_018: Remove maturity claims or add documented evidence\n');
process.exit(1);
}
main();