tractatus/scripts/framework-components/ProhibitedTermsScanner.js
TheFlow 2298d36bed fix(submissions): restructure Economist package and fix article display
- Create Economist SubmissionTracking package correctly:
  * mainArticle = full blog post content
  * coverLetter = 216-word SIR— letter
  * Links to blog post via blogPostId
- Archive 'Letter to The Economist' from blog posts (it's the cover letter)
- Fix date display on article cards (use published_at)
- Target publication already displaying via blue badge

Database changes:
- Make blogPostId optional in SubmissionTracking model
- Economist package ID: 68fa85ae49d4900e7f2ecd83
- Le Monde package ID: 68fa2abd2e6acd5691932150

Next: Enhanced modal with tabs, validation, export

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>
2025-10-24 08:47:42 +13:00

440 lines
13 KiB
JavaScript

/**
* ProhibitedTermsScanner
*
* Proactively scans codebase for violations of inst_016/017/018
* Part of Framework Improvement Phase 1: Proactive Content Scanning
*
* Usage:
* const scanner = new ProhibitedTermsScanner();
* const violations = await scanner.scan();
* const fixed = await scanner.autoFix(violations);
*
* CLI:
* node scripts/framework-components/ProhibitedTermsScanner.js [--details] [--fix] [--staged]
*/
const fs = require('fs').promises;
const path = require('path');
const { glob } = require('glob');
const { execSync } = require('child_process');
class ProhibitedTermsScanner {
constructor(options = {}) {
this.options = {
silent: options.silent || false,
fixMode: options.fixMode || false,
staged: options.staged || false,
basePath: options.basePath || process.cwd(),
...options
};
// Pattern definitions from inst_016/017/018
this.patterns = [
{
id: 'inst_017',
name: 'Absolute Assurance Terms',
severity: 'HIGH',
patterns: [
/\bguarantee(?:s|d|ing)?\b/gi,
/ensures?\s+100%/gi,
/eliminates?\s+all\b/gi,
/completely\s+prevents?\b/gi,
/never\s+fails?\b/gi,
/always\s+works?\b/gi
],
suggestions: {
'guarantee': 'enforcement',
'guarantees': 'enforces',
'guaranteed': 'enforced',
'guaranteeing': 'enforcing',
'ensures 100%': 'helps ensure',
'ensure 100%': 'help ensure',
'eliminates all': 'reduces',
'eliminate all': 'reduce',
'completely prevents': 'designed to prevent',
'completely prevent': 'designed to prevent',
'never fails': 'designed to prevent failures',
'never fail': 'designed to prevent failures',
'always works': 'designed to work',
'always work': 'designed to work'
}
},
{
id: 'inst_016',
name: 'Fabricated Statistics',
severity: 'HIGH',
patterns: [
// Match percentage claims without [NEEDS VERIFICATION] or source citations
/\b\d+%\s+(?:faster|better|improvement|increase|decrease|reduction|more|less)\b(?!\s*\[NEEDS VERIFICATION\]|\s*\(source:|\s*\[source:)/gi,
/\b(?:faster|better|improvement|increase|decrease|reduction)\s+of\s+\d+%\b(?!\s*\[NEEDS VERIFICATION\]|\s*\(source:|\s*\[source:)/gi
],
suggestions: {
'default': 'Add [NEEDS VERIFICATION] or cite source'
}
},
{
id: 'inst_018',
name: 'Unverified Readiness Claims',
severity: 'MEDIUM',
patterns: [
/\bproduction-ready\b(?!\s+development\s+tool|\s+proof-of-concept)/gi,
/\bbattle-tested\b/gi,
/\benterprise-proven\b/gi,
/\bwidespread\s+adoption\b/gi,
/\bcustomer\s+base\b(?!\s+of\s+zero|\s+\(none\))/gi,
/\bmarket\s+validation\b(?!\s+pending|\s+not\s+yet)/gi
],
suggestions: {
'production-ready': 'proof-of-concept',
'battle-tested': 'in development',
'enterprise-proven': 'designed for',
'widespread adoption': 'early development',
'customer base': 'development project',
'market validation': 'internal validation'
}
}
];
// File inclusion patterns
this.includePatterns = [
'**/*.md',
'**/*.html',
'**/*.js',
'**/*.json',
'**/*.jsx',
'**/*.tsx'
];
// File exclusion patterns
this.excludePatterns = [
'**/node_modules/**',
'**/.git/**',
'**/.claude/**',
'**/tests/**/*.test.js',
'**/tests/**/*.spec.js',
'**/docs/case-studies/**',
'**/GOVERNANCE-RULE-LIBRARY.md',
'**/.claude/instruction-history.json',
'**/dist/**',
'**/build/**',
'**/.next/**'
];
}
/**
* Scan files for prohibited terms
* @param {Object} options - Scan options
* @returns {Promise<Array>} Array of violations
*/
async scan(options = {}) {
const scanOptions = { ...this.options, ...options };
const violations = [];
// Get files to scan
const files = await this.getFilesToScan(scanOptions.staged);
if (!scanOptions.silent) {
console.log(`\n🔍 Scanning ${files.length} files for prohibited terms...`);
}
// Scan each file
for (const file of files) {
try {
const content = await fs.readFile(file, 'utf8');
const lines = content.split('\n');
// Check each pattern type
for (const patternSet of this.patterns) {
for (const pattern of patternSet.patterns) {
lines.forEach((line, index) => {
const matches = line.match(pattern);
if (matches) {
matches.forEach(match => {
// Skip if in allowed context
if (this.isAllowedContext(line, match, file)) {
return;
}
violations.push({
file,
line: index + 1,
match,
rule: patternSet.id,
ruleName: patternSet.name,
severity: patternSet.severity,
context: line.trim(),
suggestion: this.getSuggestion(match, patternSet.suggestions)
});
});
}
});
}
}
} catch (err) {
// Skip files that can't be read (binary files, etc.)
if (err.code !== 'ENOENT') {
console.error(`⚠ Error reading ${file}: ${err.message}`);
}
}
}
return violations;
}
/**
* Auto-fix simple violations
* @param {Array} violations - Violations to fix
* @returns {Promise<Object>} Fix results
*/
async autoFix(violations) {
const results = {
fixed: 0,
total: violations.length,
skipped: 0,
errors: []
};
// Group violations by file
const fileGroups = violations.reduce((acc, v) => {
if (!acc[v.file]) acc[v.file] = [];
acc[v.file].push(v);
return acc;
}, {});
// Fix each file
for (const [file, fileViolations] of Object.entries(fileGroups)) {
try {
let content = await fs.readFile(file, 'utf8');
let modified = false;
// Apply fixes (reverse order to preserve line numbers)
for (const violation of fileViolations.reverse()) {
// Only auto-fix if we have a clear suggestion
if (violation.suggestion && violation.suggestion !== 'Add [NEEDS VERIFICATION] or cite source') {
const originalContent = content;
// Simple case-preserving replacement
const regex = new RegExp(this.escapeRegex(violation.match), 'g');
content = content.replace(regex, violation.suggestion);
if (content !== originalContent) {
modified = true;
results.fixed++;
}
} else {
results.skipped++;
}
}
// Write file if modified
if (modified) {
await fs.writeFile(file, content, 'utf8');
console.log(`✓ Fixed ${file}`);
}
} catch (err) {
results.errors.push({ file, error: err.message });
console.error(`✗ Error fixing ${file}: ${err.message}`);
}
}
return results;
}
/**
* Get files to scan
* @param {boolean} stagedOnly - Only scan staged files
* @returns {Promise<Array>} Array of file paths
*/
async getFilesToScan(stagedOnly = false) {
if (stagedOnly) {
try {
const output = execSync('git diff --cached --name-only', { encoding: 'utf8' });
return output.split('\n').filter(f => f.trim());
} catch (err) {
console.error('⚠ Error getting staged files, falling back to all files');
}
}
// Use glob to find all matching files
const files = [];
for (const pattern of this.includePatterns) {
try {
const matches = await glob(pattern, {
ignore: this.excludePatterns,
nodir: true,
cwd: this.options.basePath
});
// glob returns an array, so we can spread it
if (Array.isArray(matches)) {
// Prepend base path to make absolute paths
const absolutePaths = matches.map(f => path.join(this.options.basePath, f));
files.push(...absolutePaths);
}
} catch (err) {
// Ignore glob errors (e.g., pattern doesn't match anything)
}
}
// Remove duplicates
return [...new Set(files)];
}
/**
* Check if context allows the term
* @param {string} line - Line containing match
* @param {string} match - Matched term
* @param {string} file - File path
* @returns {boolean} True if allowed
*/
isAllowedContext(line, match, file) {
// Allow in comments about the rules themselves
if (line.includes('inst_017') || line.includes('inst_016') || line.includes('inst_018')) {
return true;
}
// Allow in GOVERNANCE-RULE-LIBRARY.md
if (file.includes('GOVERNANCE-RULE-LIBRARY.md')) {
return true;
}
// Allow in case studies
if (file.includes('case-studies')) {
return true;
}
// Allow in test files (shouldn't reach here but double-check)
if (file.includes('.test.') || file.includes('.spec.')) {
return true;
}
// Allow "production-ready development tool" or "production-ready proof-of-concept"
if (match.toLowerCase() === 'production-ready') {
if (line.includes('development tool') || line.includes('proof-of-concept')) {
return true;
}
}
return false;
}
/**
* Get suggestion for a match
* @param {string} match - Matched term
* @param {Object} suggestions - Suggestion map
* @returns {string} Suggestion
*/
getSuggestion(match, suggestions) {
const lowerMatch = match.toLowerCase();
// Try exact match first
if (suggestions[lowerMatch]) {
return suggestions[lowerMatch];
}
// Try partial matches
for (const [key, value] of Object.entries(suggestions)) {
if (lowerMatch.includes(key)) {
return value;
}
}
return suggestions.default || 'Review and revise';
}
/**
* Format violations for display
* @param {Array} violations - Violations to format
* @param {boolean} detailed - Show detailed output
* @returns {string} Formatted output
*/
formatViolations(violations, detailed = false) {
if (violations.length === 0) {
return '\n✅ No prohibited terms found\n';
}
// Group by rule
const byRule = violations.reduce((acc, v) => {
if (!acc[v.rule]) acc[v.rule] = [];
acc[v.rule].push(v);
return acc;
}, {});
let output = `\n⚠ Found ${violations.length} violation(s):\n`;
// Summary
for (const [rule, items] of Object.entries(byRule)) {
output += ` ${rule}: ${items.length} violation(s)\n`;
}
// Details
if (detailed) {
output += '\nDetails:\n';
for (const v of violations) {
output += `\n ${v.file}:${v.line}\n`;
output += ` Rule: ${v.rule} (${v.severity})\n`;
output += ` Found: "${v.match}"\n`;
output += ` Context: ${v.context.substring(0, 80)}...\n`;
output += ` Suggestion: ${v.suggestion}\n`;
}
} else {
output += '\nRun with --details for full violation list\n';
}
output += '\nTo fix: node scripts/framework-components/ProhibitedTermsScanner.js --fix\n';
return output;
}
/**
* Escape regex special characters
* @param {string} str - String to escape
* @returns {string} Escaped string
*/
escapeRegex(str) {
return str.replace(/[.*+?^${}()|[\]\\]/g, '\\$&');
}
}
// CLI interface
async function main() {
const args = process.argv.slice(2);
const options = {
silent: false,
fixMode: args.includes('--fix'),
staged: args.includes('--staged'),
details: args.includes('--details')
};
const scanner = new ProhibitedTermsScanner(options);
console.log('\n━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━');
console.log(' Tractatus Framework - Prohibited Terms Scanner');
console.log('━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━');
const violations = await scanner.scan();
console.log(scanner.formatViolations(violations, options.details));
if (options.fixMode && violations.length > 0) {
console.log('\n🔧 Applying auto-fixes...\n');
const results = await scanner.autoFix(violations);
console.log(`\n✓ Fixed: ${results.fixed}`);
console.log(`⊘ Skipped: ${results.skipped} (manual review required)`);
if (results.errors.length > 0) {
console.log(`✗ Errors: ${results.errors.length}`);
}
}
// Exit with error code if violations found (for pre-commit hooks)
process.exit(violations.length > 0 ? 1 : 0);
}
// Run if called directly
if (require.main === module) {
main().catch(err => {
console.error('Error:', err);
process.exit(1);
});
}
module.exports = ProhibitedTermsScanner;