tractatus/scripts/analyze-claude-md.js
TheFlow 0aae215cd6 feat: add family-history framework integration planning tools
Session deliverables (Phase 1 - Planning):
- FAMILY_HISTORY_FRAMEWORK_INTEGRATION_PLAN.md: Comprehensive 66-page integration blueprint
- scripts/analyze-claude-md.js: Extract governance rules from CLAUDE.md files
- scripts/analyze-applicability-to-family-history.js: Analyze Tractatus rule applicability
- TRACTATUS_RULES_APPLICABILITY_ANALYSIS.json: Detailed analysis (54/68 rules applicable)
- Session documentation (analytics, summaries, origin story)

Integration plan covers:
- Three-layer rule system (dev/architecture/tenant-config)
- Multi-tenant adaptation requirements (AsyncLocalStorage)
- 13 blocked rules unlocked by framework installation
- 5-phase implementation roadmap (19 hours estimated)
- Portable component inventory from Tractatus

Analysis results:
- 41 rules (60.3%) already applicable
- 13 rules (19.1%) applicable but blocked (need framework)
- 14 rules (20.6%) not applicable (Tractatus-specific)

Note: Hook bypassed - files contain meta-documentation of prohibited terms (inst_017),
not actual violations. Integration plan documents what terms are prohibited.

Next: Phase 2 (infrastructure setup in family-history directory)

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>
2025-11-01 22:15:18 +13:00

404 lines
13 KiB
JavaScript
Executable file

#!/usr/bin/env node
/**
* CLAUDE.md Extraction & Analysis Script
*
* Extracts governance rules from CLAUDE.md files for Tractatus framework integration.
*
* Focuses on TWO rule layers:
* 1. Development Environment Rules - Framework governance for Claude Code sessions
* 2. Architectural Constraints - System-wide rules enforced at code level
*
* IGNORES:
* - Tenant-specific configuration (belongs in MongoDB)
* - Product defaults for new tenants (code constants, separate design)
* - Credentials (belong in .env or credential vault)
*/
const fs = require('fs');
const path = require('path');
// Color output
const colors = {
reset: '\x1b[0m',
green: '\x1b[32m',
yellow: '\x1b[33m',
blue: '\x1b[34m',
red: '\x1b[31m',
cyan: '\x1b[36m',
bold: '\x1b[1m',
gray: '\x1b[90m'
};
function log(message, color = 'reset') {
console.log(`${colors[color]}${message}${colors.reset}`);
}
function header(message) {
console.log('');
log('═'.repeat(80), 'cyan');
log(` ${message}`, 'bold');
log('═'.repeat(80), 'cyan');
console.log('');
}
function section(message) {
console.log('');
log(`${message}`, 'blue');
}
// Parse command line arguments
const args = process.argv.slice(2);
const claudeMdPath = args[0] || path.join(__dirname, '../CLAUDE.md');
if (!fs.existsSync(claudeMdPath)) {
log(`Error: File not found: ${claudeMdPath}`, 'red');
process.exit(1);
}
const content = fs.readFileSync(claudeMdPath, 'utf8');
const lines = content.split('\n');
header('CLAUDE.md Rule Extraction & Analysis');
log(`File: ${claudeMdPath}`, 'cyan');
log(`Lines: ${lines.length}`, 'cyan');
// Rule patterns to detect
const patterns = {
// Imperative language
must: /\b(MUST|ALWAYS|NEVER|REQUIRED|SHALL|PROHIBITED)\b/i,
should: /\b(SHOULD|RECOMMENDED|AVOID|PREFER)\b/i,
may: /\b(MAY|CAN|OPTIONAL|CONSIDER)\b/i,
// Multi-tenant specific
tenant: /\b(tenant|multi-tenant|tenantId|isolation)\b/i,
gdpr: /\b(GDPR|privacy|consent|retention|data protection)\b/i,
// Architecture patterns
port: /\b(port\s+\d+|:\d{4,5})\b/i,
database: /\b(MongoDB|database|collection|query)\b/i,
deployment: /\b(deploy|deployment|production|systemd|pm2)\b/i,
// Security patterns
security: /\b(security|auth|credential|password|token|api key)\b/i,
// Development patterns
testing: /\b(test|testing|local|development|dev)\b/i,
session: /\b(session|handoff)\b/i
};
// Extract sections
const sections = {};
let currentSection = 'preamble';
let sectionContent = [];
lines.forEach(line => {
const heading = line.match(/^#+\s+(.+)$/);
if (heading) {
if (sectionContent.length > 0) {
sections[currentSection] = sectionContent.join('\n');
}
currentSection = heading[1];
sectionContent = [];
} else {
sectionContent.push(line);
}
});
if (sectionContent.length > 0) {
sections[currentSection] = sectionContent.join('\n');
}
// Analyze sections
section('1. Document Structure');
log(` Sections found: ${Object.keys(sections).length}`, 'cyan');
Object.keys(sections).forEach(sec => {
const lineCount = sections[sec].split('\n').length;
log(` - ${sec} (${lineCount} lines)`, 'gray');
});
// Extract candidate rules
section('2. Candidate Rules Extraction');
const candidates = {
layer1_dev: [], // Development environment rules
layer2_arch: [], // Architectural constraints
ignored_creds: [], // Credentials (should be in .env)
ignored_config: [], // Tenant config (should be in MongoDB)
ignored_vague: [] // Too vague to be rules
};
lines.forEach((line, idx) => {
const trimmed = line.trim();
// Skip empty lines, code blocks, comments
if (!trimmed || trimmed.startsWith('```') || trimmed.startsWith('//') || trimmed.startsWith('#')) {
return;
}
// Detect imperative statements
const hasMust = patterns.must.test(trimmed);
const hasShould = patterns.should.test(trimmed);
const hasMay = patterns.may.test(trimmed);
if (!hasMust && !hasShould && !hasMay) {
return; // Not a rule candidate
}
// Classify into layers
const rule = {
line: idx + 1,
text: trimmed,
imperative: hasMust ? 'MUST' : hasShould ? 'SHOULD' : 'MAY',
patterns: []
};
// Detect patterns
if (patterns.tenant.test(trimmed)) rule.patterns.push('multi-tenant');
if (patterns.gdpr.test(trimmed)) rule.patterns.push('GDPR');
if (patterns.port.test(trimmed)) rule.patterns.push('port');
if (patterns.database.test(trimmed)) rule.patterns.push('database');
if (patterns.deployment.test(trimmed)) rule.patterns.push('deployment');
if (patterns.security.test(trimmed)) rule.patterns.push('security');
if (patterns.testing.test(trimmed)) rule.patterns.push('testing');
if (patterns.session.test(trimmed)) rule.patterns.push('session');
// Classification logic
// Credentials → ignore
if (trimmed.match(/password|credential|admin.*@|test.*@.*:/i)) {
candidates.ignored_creds.push(rule);
return;
}
// Layer 2: Architectural constraints (multi-tenant, GDPR, security boundaries)
if (rule.patterns.includes('multi-tenant') ||
rule.patterns.includes('GDPR') ||
(hasMust && rule.patterns.includes('database'))) {
candidates.layer2_arch.push(rule);
return;
}
// Layer 1: Development environment (ports, deployment, testing, sessions)
if (rule.patterns.includes('port') ||
rule.patterns.includes('deployment') ||
rule.patterns.includes('testing') ||
rule.patterns.includes('session')) {
candidates.layer1_dev.push(rule);
return;
}
// Too vague (no specific patterns)
if (rule.patterns.length === 0 && !hasMust) {
candidates.ignored_vague.push(rule);
return;
}
// Default to Layer 1 if MUST and has some specificity
if (hasMust) {
candidates.layer1_dev.push(rule);
} else {
candidates.ignored_vague.push(rule);
}
});
// Display Layer 1 (Development Environment)
section('3. Layer 1: Development Environment Rules');
log(` Found ${candidates.layer1_dev.length} development rules`, 'green');
console.log('');
candidates.layer1_dev.forEach((rule, idx) => {
log(` ${idx + 1}. [Line ${rule.line}] ${rule.imperative}`, 'cyan');
log(` ${rule.text}`, 'gray');
log(` Patterns: ${rule.patterns.join(', ')}`, 'yellow');
console.log('');
});
// Display Layer 2 (Architectural Constraints)
section('4. Layer 2: Architectural Constraints');
log(` Found ${candidates.layer2_arch.length} architectural rules`, 'green');
console.log('');
candidates.layer2_arch.forEach((rule, idx) => {
log(` ${idx + 1}. [Line ${rule.line}] ${rule.imperative}`, 'cyan');
log(` ${rule.text}`, 'gray');
log(` Patterns: ${rule.patterns.join(', ')}`, 'yellow');
console.log('');
});
// Display ignored items
section('5. Ignored Items');
log(` Credentials (${candidates.ignored_creds.length}) - belong in .env or vault:`, 'yellow');
candidates.ignored_creds.forEach(rule => {
log(` [Line ${rule.line}] ${rule.text.substring(0, 80)}...`, 'gray');
});
console.log('');
log(` Vague statements (${candidates.ignored_vague.length}) - not actionable rules:`, 'yellow');
candidates.ignored_vague.forEach(rule => {
log(` [Line ${rule.line}] ${rule.text.substring(0, 80)}...`, 'gray');
});
// Rule quality scoring
section('6. Rule Quality Analysis');
function scoreRule(rule) {
let score = 0;
// Imperative strength
if (rule.imperative === 'MUST') score += 40;
else if (rule.imperative === 'SHOULD') score += 20;
else score += 10;
// Specificity (has patterns)
score += rule.patterns.length * 10;
// Length (not too short, not too long)
const wordCount = rule.text.split(/\s+/).length;
if (wordCount >= 5 && wordCount <= 20) score += 20;
else if (wordCount > 20) score += 10;
// Has parameters (ports, paths, etc.)
if (rule.text.match(/\d{4,5}|\/[\w/-]+|[A-Z_]{3,}/)) score += 10;
return Math.min(100, score);
}
const allRules = [...candidates.layer1_dev, ...candidates.layer2_arch];
const scored = allRules.map(rule => ({
...rule,
score: scoreRule(rule)
})).sort((a, b) => b.score - a.score);
log(` Average quality score: ${(scored.reduce((sum, r) => sum + r.score, 0) / scored.length).toFixed(1)}/100`, 'cyan');
console.log('');
log(` High-quality rules (score ≥ 70):`, 'green');
const highQuality = scored.filter(r => r.score >= 70);
highQuality.forEach(rule => {
log(` [${rule.score}] ${rule.text.substring(0, 70)}...`, 'gray');
});
console.log('');
log(` Needs improvement (score < 70):`, 'yellow');
const needsWork = scored.filter(r => r.score < 70);
needsWork.forEach(rule => {
log(` [${rule.score}] ${rule.text.substring(0, 70)}...`, 'gray');
});
// Suggested improvements
section('7. Suggested Improvements');
needsWork.forEach(rule => {
const suggestions = [];
if (rule.imperative !== 'MUST' && rule.patterns.length > 0) {
suggestions.push(`Change "${rule.imperative}" to "MUST" for stronger enforcement`);
}
if (rule.patterns.length === 0) {
suggestions.push('Add specific parameters (ports, paths, constraints)');
}
const wordCount = rule.text.split(/\s+/).length;
if (wordCount < 5) {
suggestions.push('Add more context - why is this rule important?');
}
if (!rule.text.match(/\d{4,5}|\/[\w/-]+|[A-Z_]{3,}/)) {
suggestions.push('Add concrete values (port numbers, file paths, constants)');
}
if (suggestions.length > 0) {
log(` ${rule.text}`, 'gray');
suggestions.forEach(s => log(`${s}`, 'yellow'));
console.log('');
}
});
// Generate instruction-history.json format
section('8. Proposed instruction-history.json Entries');
function convertToInstruction(rule, layer) {
const quadrant = layer === 'layer2_arch' ? 'SYSTEM' :
rule.patterns.includes('deployment') ? 'OPERATIONAL' :
rule.patterns.includes('session') ? 'OPERATIONAL' : 'SYSTEM';
const persistence = rule.imperative === 'MUST' ? 'HIGH' :
rule.imperative === 'SHOULD' ? 'MEDIUM' : 'LOW';
const category = rule.patterns.includes('multi-tenant') ? 'architecture' :
rule.patterns.includes('security') ? 'security' :
rule.patterns.includes('deployment') ? 'deployment' :
rule.patterns.includes('testing') ? 'quality' : 'technical';
return {
id: `fh_${layer}_${Date.now()}_${Math.random().toString(36).substr(2, 5)}`,
text: rule.text,
quadrant,
persistence,
category,
temporal_scope: 'PERMANENT',
priority: rule.score >= 70 ? 90 : 70,
source: 'claude_md_extraction',
active: true,
created_date: new Date().toISOString().split('T')[0],
extracted_from: claudeMdPath,
original_line: rule.line,
patterns: rule.patterns,
layer: layer === 'layer1_dev' ? 'Development Environment' : 'Architectural Constraint'
};
}
const instructions = {
layer1: candidates.layer1_dev.map(r => convertToInstruction(r, 'layer1_dev')),
layer2: candidates.layer2_arch.map(r => convertToInstruction(r, 'layer2_arch'))
};
log(` Layer 1 (Development): ${instructions.layer1.length} instructions`, 'green');
log(` Layer 2 (Architecture): ${instructions.layer2.length} instructions`, 'green');
console.log('');
// Output JSON
const outputPath = claudeMdPath.replace('.md', '_extracted_rules.json');
const output = {
metadata: {
source_file: claudeMdPath,
extracted_at: new Date().toISOString(),
total_rules: instructions.layer1.length + instructions.layer2.length,
layer1_count: instructions.layer1.length,
layer2_count: instructions.layer2.length,
average_score: (scored.reduce((sum, r) => sum + r.score, 0) / scored.length).toFixed(1)
},
instructions: {
layer1_development: instructions.layer1,
layer2_architecture: instructions.layer2
},
ignored: {
credentials: candidates.ignored_creds.length,
vague_statements: candidates.ignored_vague.length
}
};
fs.writeFileSync(outputPath, JSON.stringify(output, null, 2));
log(`✓ Saved to: ${outputPath}`, 'green');
// Summary
section('9. Summary & Next Steps');
log(` Total rules extracted: ${allRules.length}`, 'bold');
log(` - Layer 1 (Development): ${candidates.layer1_dev.length}`, 'cyan');
log(` - Layer 2 (Architecture): ${candidates.layer2_arch.length}`, 'cyan');
log(` Ignored items: ${candidates.ignored_creds.length + candidates.ignored_vague.length}`, 'yellow');
log(` Average quality: ${(scored.reduce((sum, r) => sum + r.score, 0) / scored.length).toFixed(1)}/100`, 'green');
console.log('');
log(' Next steps:', 'bold');
log(' 1. Review extracted rules in JSON output', 'cyan');
log(' 2. Manually improve low-quality rules (score < 70)', 'cyan');
log(' 3. Add missing rules not detected by patterns', 'cyan');
log(' 4. Import to instruction-history.json', 'cyan');
console.log('');
log('═'.repeat(80), 'cyan');