/** * ProhibitedTermsScanner * * Proactively scans codebase for violations of inst_016/017/018 * Part of Framework Improvement Phase 1: Proactive Content Scanning * * Usage: * const scanner = new ProhibitedTermsScanner(); * const violations = await scanner.scan(); * const fixed = await scanner.autoFix(violations); * * CLI: * node scripts/framework-components/ProhibitedTermsScanner.js [--details] [--fix] [--staged] */ const fs = require('fs').promises; const path = require('path'); const glob = require('glob'); const { execSync } = require('child_process'); class ProhibitedTermsScanner { constructor(options = {}) { this.options = { silent: options.silent || false, fixMode: options.fixMode || false, staged: options.staged || false, basePath: options.basePath || process.cwd(), ...options }; // Pattern definitions from inst_016/017/018 this.patterns = [ { id: 'inst_017', name: 'Absolute Assurance Terms', severity: 'HIGH', patterns: [ /\bguarantee(?:s|d|ing)?\b/gi, /ensures?\s+100%/gi, /eliminates?\s+all\b/gi, /completely\s+prevents?\b/gi, /never\s+fails?\b/gi, /always\s+works?\b/gi ], suggestions: { 'guarantee': 'enforcement', 'guarantees': 'enforces', 'guaranteed': 'enforced', 'guaranteeing': 'enforcing', 'ensures 100%': 'helps ensure', 'ensure 100%': 'help ensure', 'eliminates all': 'reduces', 'eliminate all': 'reduce', 'completely prevents': 'designed to prevent', 'completely prevent': 'designed to prevent', 'never fails': 'designed to prevent failures', 'never fail': 'designed to prevent failures', 'always works': 'designed to work', 'always work': 'designed to work' } }, { id: 'inst_016', name: 'Fabricated Statistics', severity: 'HIGH', patterns: [ // Match percentage claims without [NEEDS VERIFICATION] or source citations /\b\d+%\s+(?:faster|better|improvement|increase|decrease|reduction|more|less)\b(?!\s*\[NEEDS VERIFICATION\]|\s*\(source:|\s*\[source:)/gi, /\b(?:faster|better|improvement|increase|decrease|reduction)\s+of\s+\d+%\b(?!\s*\[NEEDS VERIFICATION\]|\s*\(source:|\s*\[source:)/gi ], suggestions: { 'default': 'Add [NEEDS VERIFICATION] or cite source' } }, { id: 'inst_018', name: 'Unverified Readiness Claims', severity: 'MEDIUM', patterns: [ /\bproduction-ready\b(?!\s+development\s+tool|\s+proof-of-concept)/gi, /\bbattle-tested\b/gi, /\benterprise-proven\b/gi, /\bwidespread\s+adoption\b/gi, /\bcustomer\s+base\b(?!\s+of\s+zero|\s+\(none\))/gi, /\bmarket\s+validation\b(?!\s+pending|\s+not\s+yet)/gi ], suggestions: { 'production-ready': 'proof-of-concept', 'battle-tested': 'in development', 'enterprise-proven': 'designed for', 'widespread adoption': 'early development', 'customer base': 'development project', 'market validation': 'internal validation' } } ]; // File inclusion patterns this.includePatterns = [ '**/*.md', '**/*.html', '**/*.js', '**/*.json', '**/*.jsx', '**/*.tsx' ]; // File exclusion patterns this.excludePatterns = [ '**/node_modules/**', '**/.git/**', '**/.claude/**', '**/tests/**/*.test.js', '**/tests/**/*.spec.js', '**/docs/case-studies/**', '**/GOVERNANCE-RULE-LIBRARY.md', '**/.claude/instruction-history.json', '**/dist/**', '**/build/**', '**/.next/**' ]; } /** * Scan files for prohibited terms * @param {Object} options - Scan options * @returns {Promise} Array of violations */ async scan(options = {}) { const scanOptions = { ...this.options, ...options }; const violations = []; // Get files to scan const files = await this.getFilesToScan(scanOptions.staged); if (!scanOptions.silent) { console.log(`\nšŸ” Scanning ${files.length} files for prohibited terms...`); } // Scan each file for (const file of files) { try { const content = await fs.readFile(file, 'utf8'); const lines = content.split('\n'); // Check each pattern type for (const patternSet of this.patterns) { for (const pattern of patternSet.patterns) { lines.forEach((line, index) => { const matches = line.match(pattern); if (matches) { matches.forEach(match => { // Skip if in allowed context if (this.isAllowedContext(line, match, file)) { return; } violations.push({ file, line: index + 1, match, rule: patternSet.id, ruleName: patternSet.name, severity: patternSet.severity, context: line.trim(), suggestion: this.getSuggestion(match, patternSet.suggestions) }); }); } }); } } } catch (err) { // Skip files that can't be read (binary files, etc.) if (err.code !== 'ENOENT') { console.error(`⚠ Error reading ${file}: ${err.message}`); } } } return violations; } /** * Auto-fix simple violations * @param {Array} violations - Violations to fix * @returns {Promise} Fix results */ async autoFix(violations) { const results = { fixed: 0, total: violations.length, skipped: 0, errors: [] }; // Group violations by file const fileGroups = violations.reduce((acc, v) => { if (!acc[v.file]) acc[v.file] = []; acc[v.file].push(v); return acc; }, {}); // Fix each file for (const [file, fileViolations] of Object.entries(fileGroups)) { try { let content = await fs.readFile(file, 'utf8'); let modified = false; // Apply fixes (reverse order to preserve line numbers) for (const violation of fileViolations.reverse()) { // Only auto-fix if we have a clear suggestion if (violation.suggestion && violation.suggestion !== 'Add [NEEDS VERIFICATION] or cite source') { const originalContent = content; // Simple case-preserving replacement const regex = new RegExp(this.escapeRegex(violation.match), 'g'); content = content.replace(regex, violation.suggestion); if (content !== originalContent) { modified = true; results.fixed++; } } else { results.skipped++; } } // Write file if modified if (modified) { await fs.writeFile(file, content, 'utf8'); console.log(`āœ“ Fixed ${file}`); } } catch (err) { results.errors.push({ file, error: err.message }); console.error(`āœ— Error fixing ${file}: ${err.message}`); } } return results; } /** * Get files to scan * @param {boolean} stagedOnly - Only scan staged files * @returns {Promise} Array of file paths */ async getFilesToScan(stagedOnly = false) { if (stagedOnly) { try { const output = execSync('git diff --cached --name-only', { encoding: 'utf8' }); return output.split('\n').filter(f => f.trim()); } catch (err) { console.error('⚠ Error getting staged files, falling back to all files'); } } // Use glob to find all matching files const files = []; for (const pattern of this.includePatterns) { try { const matches = glob.sync(pattern, { ignore: this.excludePatterns, nodir: true, cwd: this.options.basePath }); // Prepend base path to make absolute paths const absolutePaths = matches.map(f => path.join(this.options.basePath, f)); files.push(...absolutePaths); } catch (err) { // Ignore glob errors (e.g., pattern doesn't match anything) } } // Remove duplicates return [...new Set(files)]; } /** * Check if context allows the term * @param {string} line - Line containing match * @param {string} match - Matched term * @param {string} file - File path * @returns {boolean} True if allowed */ isAllowedContext(line, match, file) { // Allow in comments about the rules themselves if (line.includes('inst_017') || line.includes('inst_016') || line.includes('inst_018')) { return true; } // Allow in GOVERNANCE-RULE-LIBRARY.md if (file.includes('GOVERNANCE-RULE-LIBRARY.md')) { return true; } // Allow in case studies if (file.includes('case-studies')) { return true; } // Allow in test files (shouldn't reach here but double-check) if (file.includes('.test.') || file.includes('.spec.')) { return true; } // Allow "production-ready development tool" or "production-ready proof-of-concept" if (match.toLowerCase() === 'production-ready') { if (line.includes('development tool') || line.includes('proof-of-concept')) { return true; } } return false; } /** * Get suggestion for a match * @param {string} match - Matched term * @param {Object} suggestions - Suggestion map * @returns {string} Suggestion */ getSuggestion(match, suggestions) { const lowerMatch = match.toLowerCase(); // Try exact match first if (suggestions[lowerMatch]) { return suggestions[lowerMatch]; } // Try partial matches for (const [key, value] of Object.entries(suggestions)) { if (lowerMatch.includes(key)) { return value; } } return suggestions.default || 'Review and revise'; } /** * Format violations for display * @param {Array} violations - Violations to format * @param {boolean} detailed - Show detailed output * @returns {string} Formatted output */ formatViolations(violations, detailed = false) { if (violations.length === 0) { return '\nāœ… No prohibited terms found\n'; } // Group by rule const byRule = violations.reduce((acc, v) => { if (!acc[v.rule]) acc[v.rule] = []; acc[v.rule].push(v); return acc; }, {}); let output = `\n⚠ Found ${violations.length} violation(s):\n`; // Summary for (const [rule, items] of Object.entries(byRule)) { output += ` ${rule}: ${items.length} violation(s)\n`; } // Details if (detailed) { output += '\nDetails:\n'; for (const v of violations) { output += `\n ${v.file}:${v.line}\n`; output += ` Rule: ${v.rule} (${v.severity})\n`; output += ` Found: "${v.match}"\n`; output += ` Context: ${v.context.substring(0, 80)}...\n`; output += ` Suggestion: ${v.suggestion}\n`; } } else { output += '\nRun with --details for full violation list\n'; } output += '\nTo fix: node scripts/framework-components/ProhibitedTermsScanner.js --fix\n'; return output; } /** * Escape regex special characters * @param {string} str - String to escape * @returns {string} Escaped string */ escapeRegex(str) { return str.replace(/[.*+?^${}()|[\]\\]/g, '\\$&'); } } // CLI interface async function main() { const args = process.argv.slice(2); const options = { silent: false, fixMode: args.includes('--fix'), staged: args.includes('--staged'), details: args.includes('--details') }; const scanner = new ProhibitedTermsScanner(options); console.log('\n━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━'); console.log(' Tractatus Framework - Prohibited Terms Scanner'); console.log('━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━'); const violations = await scanner.scan(); console.log(scanner.formatViolations(violations, options.details)); if (options.fixMode && violations.length > 0) { console.log('\nšŸ”§ Applying auto-fixes...\n'); const results = await scanner.autoFix(violations); console.log(`\nāœ“ Fixed: ${results.fixed}`); console.log(`⊘ Skipped: ${results.skipped} (manual review required)`); if (results.errors.length > 0) { console.log(`āœ— Errors: ${results.errors.length}`); } } // Exit with error code if violations found (for pre-commit hooks) process.exit(violations.length > 0 ? 1 : 0); } // Run if called directly if (require.main === module) { main().catch(err => { console.error('Error:', err); process.exit(1); }); } module.exports = ProhibitedTermsScanner;