#!/usr/bin/env node /** * CLAUDE.md Extraction & Analysis Script * * Extracts governance rules from CLAUDE.md files for Tractatus framework integration. * * Focuses on TWO rule layers: * 1. Development Environment Rules - Framework governance for Claude Code sessions * 2. Architectural Constraints - System-wide rules enforced at code level * * IGNORES: * - Tenant-specific configuration (belongs in MongoDB) * - Product defaults for new tenants (code constants, separate design) * - Credentials (belong in .env or credential vault) */ const fs = require('fs'); const path = require('path'); // Color output const colors = { reset: '\x1b[0m', green: '\x1b[32m', yellow: '\x1b[33m', blue: '\x1b[34m', red: '\x1b[31m', cyan: '\x1b[36m', bold: '\x1b[1m', gray: '\x1b[90m' }; function log(message, color = 'reset') { console.log(`${colors[color]}${message}${colors.reset}`); } function header(message) { console.log(''); log('═'.repeat(80), 'cyan'); log(` ${message}`, 'bold'); log('═'.repeat(80), 'cyan'); console.log(''); } function section(message) { console.log(''); log(`▶ ${message}`, 'blue'); } // Parse command line arguments const args = process.argv.slice(2); const claudeMdPath = args[0] || path.join(__dirname, '../CLAUDE.md'); if (!fs.existsSync(claudeMdPath)) { log(`Error: File not found: ${claudeMdPath}`, 'red'); process.exit(1); } const content = fs.readFileSync(claudeMdPath, 'utf8'); const lines = content.split('\n'); header('CLAUDE.md Rule Extraction & Analysis'); log(`File: ${claudeMdPath}`, 'cyan'); log(`Lines: ${lines.length}`, 'cyan'); // Rule patterns to detect const patterns = { // Imperative language must: /\b(MUST|ALWAYS|NEVER|REQUIRED|SHALL|PROHIBITED)\b/i, should: /\b(SHOULD|RECOMMENDED|AVOID|PREFER)\b/i, may: /\b(MAY|CAN|OPTIONAL|CONSIDER)\b/i, // Multi-tenant specific tenant: /\b(tenant|multi-tenant|tenantId|isolation)\b/i, gdpr: /\b(GDPR|privacy|consent|retention|data protection)\b/i, // Architecture patterns port: /\b(port\s+\d+|:\d{4,5})\b/i, database: /\b(MongoDB|database|collection|query)\b/i, deployment: /\b(deploy|deployment|production|systemd|pm2)\b/i, // Security patterns security: /\b(security|auth|credential|password|token|api key)\b/i, // Development patterns testing: /\b(test|testing|local|development|dev)\b/i, session: /\b(session|handoff)\b/i }; // Extract sections const sections = {}; let currentSection = 'preamble'; let sectionContent = []; lines.forEach(line => { const heading = line.match(/^#+\s+(.+)$/); if (heading) { if (sectionContent.length > 0) { sections[currentSection] = sectionContent.join('\n'); } currentSection = heading[1]; sectionContent = []; } else { sectionContent.push(line); } }); if (sectionContent.length > 0) { sections[currentSection] = sectionContent.join('\n'); } // Analyze sections section('1. Document Structure'); log(` Sections found: ${Object.keys(sections).length}`, 'cyan'); Object.keys(sections).forEach(sec => { const lineCount = sections[sec].split('\n').length; log(` - ${sec} (${lineCount} lines)`, 'gray'); }); // Extract candidate rules section('2. Candidate Rules Extraction'); const candidates = { layer1_dev: [], // Development environment rules layer2_arch: [], // Architectural constraints ignored_creds: [], // Credentials (should be in .env) ignored_config: [], // Tenant config (should be in MongoDB) ignored_vague: [] // Too vague to be rules }; lines.forEach((line, idx) => { const trimmed = line.trim(); // Skip empty lines, code blocks, comments if (!trimmed || trimmed.startsWith('```') || trimmed.startsWith('//') || trimmed.startsWith('#')) { return; } // Detect imperative statements const hasMust = patterns.must.test(trimmed); const hasShould = patterns.should.test(trimmed); const hasMay = patterns.may.test(trimmed); if (!hasMust && !hasShould && !hasMay) { return; // Not a rule candidate } // Classify into layers const rule = { line: idx + 1, text: trimmed, imperative: hasMust ? 'MUST' : hasShould ? 'SHOULD' : 'MAY', patterns: [] }; // Detect patterns if (patterns.tenant.test(trimmed)) rule.patterns.push('multi-tenant'); if (patterns.gdpr.test(trimmed)) rule.patterns.push('GDPR'); if (patterns.port.test(trimmed)) rule.patterns.push('port'); if (patterns.database.test(trimmed)) rule.patterns.push('database'); if (patterns.deployment.test(trimmed)) rule.patterns.push('deployment'); if (patterns.security.test(trimmed)) rule.patterns.push('security'); if (patterns.testing.test(trimmed)) rule.patterns.push('testing'); if (patterns.session.test(trimmed)) rule.patterns.push('session'); // Classification logic // Credentials → ignore if (trimmed.match(/password|credential|admin.*@|test.*@.*:/i)) { candidates.ignored_creds.push(rule); return; } // Layer 2: Architectural constraints (multi-tenant, GDPR, security boundaries) if (rule.patterns.includes('multi-tenant') || rule.patterns.includes('GDPR') || (hasMust && rule.patterns.includes('database'))) { candidates.layer2_arch.push(rule); return; } // Layer 1: Development environment (ports, deployment, testing, sessions) if (rule.patterns.includes('port') || rule.patterns.includes('deployment') || rule.patterns.includes('testing') || rule.patterns.includes('session')) { candidates.layer1_dev.push(rule); return; } // Too vague (no specific patterns) if (rule.patterns.length === 0 && !hasMust) { candidates.ignored_vague.push(rule); return; } // Default to Layer 1 if MUST and has some specificity if (hasMust) { candidates.layer1_dev.push(rule); } else { candidates.ignored_vague.push(rule); } }); // Display Layer 1 (Development Environment) section('3. Layer 1: Development Environment Rules'); log(` Found ${candidates.layer1_dev.length} development rules`, 'green'); console.log(''); candidates.layer1_dev.forEach((rule, idx) => { log(` ${idx + 1}. [Line ${rule.line}] ${rule.imperative}`, 'cyan'); log(` ${rule.text}`, 'gray'); log(` Patterns: ${rule.patterns.join(', ')}`, 'yellow'); console.log(''); }); // Display Layer 2 (Architectural Constraints) section('4. Layer 2: Architectural Constraints'); log(` Found ${candidates.layer2_arch.length} architectural rules`, 'green'); console.log(''); candidates.layer2_arch.forEach((rule, idx) => { log(` ${idx + 1}. [Line ${rule.line}] ${rule.imperative}`, 'cyan'); log(` ${rule.text}`, 'gray'); log(` Patterns: ${rule.patterns.join(', ')}`, 'yellow'); console.log(''); }); // Display ignored items section('5. Ignored Items'); log(` Credentials (${candidates.ignored_creds.length}) - belong in .env or vault:`, 'yellow'); candidates.ignored_creds.forEach(rule => { log(` [Line ${rule.line}] ${rule.text.substring(0, 80)}...`, 'gray'); }); console.log(''); log(` Vague statements (${candidates.ignored_vague.length}) - not actionable rules:`, 'yellow'); candidates.ignored_vague.forEach(rule => { log(` [Line ${rule.line}] ${rule.text.substring(0, 80)}...`, 'gray'); }); // Rule quality scoring section('6. Rule Quality Analysis'); function scoreRule(rule) { let score = 0; // Imperative strength if (rule.imperative === 'MUST') score += 40; else if (rule.imperative === 'SHOULD') score += 20; else score += 10; // Specificity (has patterns) score += rule.patterns.length * 10; // Length (not too short, not too long) const wordCount = rule.text.split(/\s+/).length; if (wordCount >= 5 && wordCount <= 20) score += 20; else if (wordCount > 20) score += 10; // Has parameters (ports, paths, etc.) if (rule.text.match(/\d{4,5}|\/[\w/-]+|[A-Z_]{3,}/)) score += 10; return Math.min(100, score); } const allRules = [...candidates.layer1_dev, ...candidates.layer2_arch]; const scored = allRules.map(rule => ({ ...rule, score: scoreRule(rule) })).sort((a, b) => b.score - a.score); log(` Average quality score: ${(scored.reduce((sum, r) => sum + r.score, 0) / scored.length).toFixed(1)}/100`, 'cyan'); console.log(''); log(` High-quality rules (score ≥ 70):`, 'green'); const highQuality = scored.filter(r => r.score >= 70); highQuality.forEach(rule => { log(` [${rule.score}] ${rule.text.substring(0, 70)}...`, 'gray'); }); console.log(''); log(` Needs improvement (score < 70):`, 'yellow'); const needsWork = scored.filter(r => r.score < 70); needsWork.forEach(rule => { log(` [${rule.score}] ${rule.text.substring(0, 70)}...`, 'gray'); }); // Suggested improvements section('7. Suggested Improvements'); needsWork.forEach(rule => { const suggestions = []; if (rule.imperative !== 'MUST' && rule.patterns.length > 0) { suggestions.push(`Change "${rule.imperative}" to "MUST" for stronger enforcement`); } if (rule.patterns.length === 0) { suggestions.push('Add specific parameters (ports, paths, constraints)'); } const wordCount = rule.text.split(/\s+/).length; if (wordCount < 5) { suggestions.push('Add more context - why is this rule important?'); } if (!rule.text.match(/\d{4,5}|\/[\w/-]+|[A-Z_]{3,}/)) { suggestions.push('Add concrete values (port numbers, file paths, constants)'); } if (suggestions.length > 0) { log(` ${rule.text}`, 'gray'); suggestions.forEach(s => log(` → ${s}`, 'yellow')); console.log(''); } }); // Generate instruction-history.json format section('8. Proposed instruction-history.json Entries'); function convertToInstruction(rule, layer) { const quadrant = layer === 'layer2_arch' ? 'SYSTEM' : rule.patterns.includes('deployment') ? 'OPERATIONAL' : rule.patterns.includes('session') ? 'OPERATIONAL' : 'SYSTEM'; const persistence = rule.imperative === 'MUST' ? 'HIGH' : rule.imperative === 'SHOULD' ? 'MEDIUM' : 'LOW'; const category = rule.patterns.includes('multi-tenant') ? 'architecture' : rule.patterns.includes('security') ? 'security' : rule.patterns.includes('deployment') ? 'deployment' : rule.patterns.includes('testing') ? 'quality' : 'technical'; return { id: `fh_${layer}_${Date.now()}_${Math.random().toString(36).substr(2, 5)}`, text: rule.text, quadrant, persistence, category, temporal_scope: 'PERMANENT', priority: rule.score >= 70 ? 90 : 70, source: 'claude_md_extraction', active: true, created_date: new Date().toISOString().split('T')[0], extracted_from: claudeMdPath, original_line: rule.line, patterns: rule.patterns, layer: layer === 'layer1_dev' ? 'Development Environment' : 'Architectural Constraint' }; } const instructions = { layer1: candidates.layer1_dev.map(r => convertToInstruction(r, 'layer1_dev')), layer2: candidates.layer2_arch.map(r => convertToInstruction(r, 'layer2_arch')) }; log(` Layer 1 (Development): ${instructions.layer1.length} instructions`, 'green'); log(` Layer 2 (Architecture): ${instructions.layer2.length} instructions`, 'green'); console.log(''); // Output JSON const outputPath = claudeMdPath.replace('.md', '_extracted_rules.json'); const output = { metadata: { source_file: claudeMdPath, extracted_at: new Date().toISOString(), total_rules: instructions.layer1.length + instructions.layer2.length, layer1_count: instructions.layer1.length, layer2_count: instructions.layer2.length, average_score: (scored.reduce((sum, r) => sum + r.score, 0) / scored.length).toFixed(1) }, instructions: { layer1_development: instructions.layer1, layer2_architecture: instructions.layer2 }, ignored: { credentials: candidates.ignored_creds.length, vague_statements: candidates.ignored_vague.length } }; fs.writeFileSync(outputPath, JSON.stringify(output, null, 2)); log(`✓ Saved to: ${outputPath}`, 'green'); // Summary section('9. Summary & Next Steps'); log(` Total rules extracted: ${allRules.length}`, 'bold'); log(` - Layer 1 (Development): ${candidates.layer1_dev.length}`, 'cyan'); log(` - Layer 2 (Architecture): ${candidates.layer2_arch.length}`, 'cyan'); log(` Ignored items: ${candidates.ignored_creds.length + candidates.ignored_vague.length}`, 'yellow'); log(` Average quality: ${(scored.reduce((sum, r) => sum + r.score, 0) / scored.length).toFixed(1)}/100`, 'green'); console.log(''); log(' Next steps:', 'bold'); log(' 1. Review extracted rules in JSON output', 'cyan'); log(' 2. Manually improve low-quality rules (score < 70)', 'cyan'); log(' 3. Add missing rules not detected by patterns', 'cyan'); log(' 4. Import to instruction-history.json', 'cyan'); console.log(''); log('═'.repeat(80), 'cyan');