tractatus/scripts/analyze-instruction-database.js

#!/usr/bin/env node

/**
 * Instruction Database Analysis
 *
 * Analyzes instruction-history.json for optimization opportunities:
 * - Identifies obsolete instructions
 * - Finds redundancies and consolidation candidates
 * - Suggests archival opportunities
 */

const fs = require('fs');
const path = require('path');

const INSTRUCTION_HISTORY_PATH = path.join(__dirname, '../.claude/instruction-history.json');

const colors = {
  reset: '\x1b[0m',
  green: '\x1b[32m',
  yellow: '\x1b[33m',
  blue: '\x1b[34m',
  red: '\x1b[31m',
  cyan: '\x1b[36m',
  bold: '\x1b[1m'
};

function log(message, color = 'reset') {
  console.log(`${colors[color]}${message}${colors.reset}`);
}

function header(message) {
  console.log('');
  log('═'.repeat(70), 'cyan');
  log(`  ${message}`, 'bold');
  log('═'.repeat(70), 'cyan');
  console.log('');
}

function section(message) {
  console.log('');
  log(`▶ ${message}`, 'blue');
}

// Load instruction history
const history = JSON.parse(fs.readFileSync(INSTRUCTION_HISTORY_PATH, 'utf8'));
const active = history.instructions.filter(i => i.active);
const inactive = history.instructions.filter(i => !i.active);

header('Instruction Database Analysis');

// 1. Summary Statistics
section('1. Database Statistics');
log(`  Total instructions: ${history.instructions.length}`, 'cyan');
log(`  Active instructions: ${active.length}`, 'cyan');
log(`  Inactive instructions: ${inactive.length}`, 'cyan');
console.log('');

// By persistence
const byPersistence = {
  HIGH: active.filter(i => i.persistence === 'HIGH').length,
  MEDIUM: active.filter(i => i.persistence === 'MEDIUM').length,
  LOW: active.filter(i => i.persistence === 'LOW').length
};
log(`  By persistence:`, 'bold');
log(`    HIGH: ${byPersistence.HIGH} (${(byPersistence.HIGH/active.length*100).toFixed(1)}%)`, 'green');
log(`    MEDIUM: ${byPersistence.MEDIUM} (${(byPersistence.MEDIUM/active.length*100).toFixed(1)}%)`, 'yellow');
log(`    LOW: ${byPersistence.LOW} (${(byPersistence.LOW/active.length*100).toFixed(1)}%)`, 'yellow');
console.log('');

// By quadrant
const byQuadrant = {
  STRATEGIC: active.filter(i => i.quadrant === 'STRATEGIC').length,
  SYSTEM: active.filter(i => i.quadrant === 'SYSTEM').length,
  OPERATIONAL: active.filter(i => i.quadrant === 'OPERATIONAL').length,
  TACTICAL: active.filter(i => i.quadrant === 'TACTICAL').length
};
log(`  By quadrant:`, 'bold');
log(`    STRATEGIC: ${byQuadrant.STRATEGIC}`, 'cyan');
log(`    SYSTEM: ${byQuadrant.SYSTEM}`, 'cyan');
log(`    OPERATIONAL: ${byQuadrant.OPERATIONAL}`, 'cyan');
log(`    TACTICAL: ${byQuadrant.TACTICAL}`, 'cyan');
console.log('');

// By temporal scope
const byScope = {
  PERMANENT: active.filter(i => i.temporal_scope === 'PERMANENT').length,
  PROJECT: active.filter(i => i.temporal_scope === 'PROJECT').length,
  SESSION: active.filter(i => i.temporal_scope === 'SESSION').length
};
log(`  By temporal scope:`, 'bold');
log(`    PERMANENT: ${byScope.PERMANENT}`, 'green');
log(`    PROJECT: ${byScope.PROJECT}`, 'yellow');
log(`    SESSION: ${byScope.SESSION || 0}`, 'red');

// 2. Consolidation Candidates
section('2. Instruction Series (Consolidation Candidates)');

// Find instruction series (e.g., inst_024a, inst_024b, etc.)
const series = {};
active.forEach(inst => {
  const match = inst.id.match(/^(inst_\d+)[a-z]$/);
  if (match) {
    const base = match[1];
    if (!series[base]) series[base] = [];
    series[base].push(inst);
  }
});

if (Object.keys(series).length > 0) {
  for (const [base, instructions] of Object.entries(series)) {
    log(`  ${base} series: ${instructions.length} instructions`, 'yellow');
    instructions.forEach(inst => {
      const wordCount = inst.text.split(' ').length;
      log(`    - ${inst.id}: ${wordCount} words - ${inst.text.substring(0, 60)}...`, 'cyan');
    });
    console.log('');
  }

  log(`  Recommendation: Consider consolidating instruction series into single comprehensive instructions`, 'yellow');
} else {
  log(`  No instruction series found`, 'green');
}

// 3. PROJECT-scoped instructions
section('3. PROJECT-Scoped Instructions (May Be Obsolete)');
const projectScoped = active.filter(i => i.temporal_scope === 'PROJECT');
log(`  Found ${projectScoped.length} PROJECT-scoped instructions`, 'cyan');
console.log('');

projectScoped.forEach(inst => {
  const age = inst.created_date || 'unknown';
  log(`  ${inst.id} (${age}): ${inst.text.substring(0, 70)}...`, 'yellow');
});

if (projectScoped.length > 0) {
  console.log('');
  log(`  Recommendation: Review PROJECT-scoped instructions - convert to PERMANENT or archive if no longer needed`, 'yellow');
}

// 4. TACTICAL/MEDIUM/LOW persistence
section('4. Lower Persistence Instructions (Archival Candidates)');
const lowerPersistence = active.filter(i => i.persistence !== 'HIGH');
log(`  Found ${lowerPersistence.length} non-HIGH persistence instructions`, 'cyan');
console.log('');

lowerPersistence.forEach(inst => {
  log(`  ${inst.id} (${inst.persistence}): ${inst.text.substring(0, 70)}...`, 'yellow');
});

if (lowerPersistence.length > 0) {
  console.log('');
  log(`  Recommendation: Review MEDIUM/LOW persistence - promote to HIGH or archive`, 'yellow');
}

// 5. Keyword-based redundancy detection
section('5. Potential Redundancies (Keyword Analysis)');

// Group by common keywords
const keywords = {};
active.forEach(inst => {
  const words = inst.text.toLowerCase()
    .replace(/[^a-z\s]/g, '')
    .split(/\s+/)
    .filter(w => w.length > 5); // Only words longer than 5 chars

  words.forEach(word => {
    if (!keywords[word]) keywords[word] = [];
    keywords[word].push(inst.id);
  });
});

// Find keywords appearing in multiple instructions (potential redundancy)
const commonKeywords = Object.entries(keywords)
  .filter(([word, ids]) => ids.length >= 3)
  .sort((a, b) => b[1].length - a[1].length)
  .slice(0, 10);

if (commonKeywords.length > 0) {
  log(`  Top keywords appearing in multiple instructions:`, 'cyan');
  console.log('');

  commonKeywords.forEach(([word, ids]) => {
    log(`  "${word}": ${ids.length} instructions (${ids.slice(0, 5).join(', ')})`, 'yellow');
  });

  console.log('');
  log(`  Recommendation: Review instructions with shared keywords for potential consolidation`, 'yellow');
}

// 6. Long instructions (>100 words)
section('6. Very Long Instructions (Simplification Candidates)');
const longInstructions = active.filter(i => i.text.split(' ').length > 100);
log(`  Found ${longInstructions.length} instructions > 100 words`, 'cyan');
console.log('');

longInstructions.forEach(inst => {
  const wordCount = inst.text.split(' ').length;
  log(`  ${inst.id} (${wordCount} words): ${inst.text.substring(0, 70)}...`, 'yellow');
});

if (longInstructions.length > 0) {
  console.log('');
  log(`  Recommendation: Consider splitting long instructions or consolidating into procedures`, 'yellow');
}

// 7. Optimization Summary
section('7. Optimization Recommendations');

const recommendations = [];

if (Object.keys(series).length > 0) {
  const seriesCount = Object.values(series).reduce((sum, arr) => sum + arr.length, 0);
  const consolidatedCount = Object.keys(series).length;
  const savings = seriesCount - consolidatedCount;
  recommendations.push({
    action: `Consolidate instruction series`,
    savings,
    details: `${Object.keys(series).length} series with ${seriesCount} total instructions`
  });
}

if (projectScoped.length > 5) {
  recommendations.push({
    action: `Review PROJECT-scoped instructions`,
    savings: Math.floor(projectScoped.length * 0.3),
    details: `${projectScoped.length} PROJECT-scoped (estimate 30% can be archived)`
  });
}

if (lowerPersistence.length > 0) {
  recommendations.push({
    action: `Archive/promote MEDIUM/LOW persistence`,
    savings: Math.floor(lowerPersistence.length * 0.5),
    details: `${lowerPersistence.length} non-HIGH persistence (estimate 50% can be archived)`
  });
}

if (recommendations.length === 0) {
  log(`  No obvious optimization opportunities found`, 'green');
  log(`  Database appears well-optimized at ${active.length} active instructions`, 'green');
} else {
  let totalSavings = 0;
  recommendations.forEach((rec, idx) => {
    log(`  ${idx + 1}. ${rec.action}`, 'yellow');
    log(`     Potential savings: ${rec.savings} instructions`, 'cyan');
    log(`     Details: ${rec.details}`, 'cyan');
    console.log('');
    totalSavings += rec.savings;
  });

  const target = active.length - totalSavings;
  log(`  Estimated result: ${active.length} → ${target} active instructions (${totalSavings} reduction)`, 'bold');

  if (target < 50) {
    log(`  ✓ Target <50 achievable!`, 'green');
  } else {
    log(`  ⚠ May need additional optimization to reach <50`, 'yellow');
  }
}

console.log('');
log('═'.repeat(70), 'cyan');