diff --git a/CLAUDE.md b/CLAUDE.md index 10cde3ae..baa3c12c 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -96,6 +96,108 @@ tractatus_dev.koha_donations // Phase 3 --- +## Session Management with ContextPressureMonitor + +**The Tractatus framework dogfoods itself** - using ContextPressureMonitor to manage development sessions. + +### Session Pressure Analysis + +Instead of arbitrary token thresholds, use multi-factor pressure analysis: + +```bash +# Check current session pressure +node scripts/check-session-pressure.js --tokens 89195/200000 --messages 28 --tasks 2 + +# Output: +# Pressure Level: NORMAL +# Overall Score: 24.3% +# Action: PROCEED +# Recommendations: āœ… CONTINUE_NORMAL +``` + +### Pressure Levels & Actions + +| Level | Score | Action | What to Do | +|-------|-------|--------|------------| +| **NORMAL** | 0-30% | PROCEED | Continue normally | +| **ELEVATED** | 30-50% | INCREASE_VERIFICATION | More careful, verify outputs | +| **HIGH** | 50-70% | SUGGEST_CONTEXT_REFRESH | Consider session handoff | +| **CRITICAL** | 70-85% | MANDATORY_VERIFICATION | Verify all actions, prepare handoff | +| **DANGEROUS** | 85%+ | IMMEDIATE_HALT | Stop, create handoff, refresh context | + +### Monitored Factors (Weighted) + +1. **Token Usage** (35% weight) - Context window pressure +2. **Conversation Length** (25% weight) - Attention decay over long sessions +3. **Task Complexity** (15% weight) - Number of simultaneous tasks, dependencies, file modifications +4. **Error Frequency** (15% weight) - Recent errors indicate degraded state +5. **Instruction Density** (10% weight) - Too many competing directives + +### When to Check Pressure + +**Automatically check at:** +- Session start (baseline) +- 25% token usage (early warning) +- 50% token usage (mid-session check) +- 75% token usage (prepare for handoff) +- After complex multi-file operations +- After any error or unexpected behavior + +**Proactive Monitoring:** +Claude should periodically assess pressure and adjust behavior: +- **NORMAL**: Work normally, maintain quality standards +- **ELEVATED**: Be more concise, increase verification +- **HIGH**: Suggest creating session handoff document +- **CRITICAL**: Mandatory verification, prepare handoff +- **DANGEROUS**: Stop work, create comprehensive handoff + +### Session Handoff Triggers + +Create handoff document when: +- Pressure reaches CRITICAL or DANGEROUS +- Token usage exceeds 75% +- Complex multi-phase work remains +- Errors clustering (3+ in short period) +- User requests session break + +### Script Usage + +```bash +# Basic check +node scripts/check-session-pressure.js --tokens / + +# With full context +node scripts/check-session-pressure.js \ + --tokens 150000/200000 \ + --messages 45 \ + --tasks 3 \ + --errors 1 \ + --verbose + +# JSON output for automation +node scripts/check-session-pressure.js --tokens 180000/200000 --json + +# Exit codes: 0=NORMAL/ELEVATED, 1=HIGH, 2=CRITICAL, 3=DANGEROUS +``` + +### Integration with Claude Sessions + +**Claude should:** +1. Track approximate token usage, message count, active tasks +2. Periodically call ContextPressureMonitor (every 25% tokens) +3. Report pressure level and recommendations to user +4. Adjust verbosity/behavior based on pressure +5. Proactively suggest session handoff when appropriate + +**Example:** +``` +[ContextPressureMonitor: ELEVATED - 52% pressure] +Recommendations: INCREASE_VERIFICATION, Token usage at 68% +Action: Continuing with increased verification. Consider handoff after current task. +``` + +--- + ## Governance Documents Located in `/home/theflow/projects/tractatus/governance/` (to be created): @@ -412,5 +514,5 @@ ADMIN_EMAIL=john.stroh.nz@pm.me --- -**Last Updated:** 2025-10-06 +**Last Updated:** 2025-10-07 **Next Review:** After Phase 1 completion diff --git a/scripts/check-session-pressure.js b/scripts/check-session-pressure.js new file mode 100755 index 00000000..3c13d1e6 --- /dev/null +++ b/scripts/check-session-pressure.js @@ -0,0 +1,243 @@ +#!/usr/bin/env node +/** + * Session Pressure Monitor Script + * + * Uses ContextPressureMonitor to analyze current session state and provide + * recommendations for session management. + * + * This script demonstrates the Tractatus framework dogfooding itself - using + * its own governance services to manage AI-assisted development sessions. + * + * Usage: + * node scripts/check-session-pressure.js [options] + * + * Options: + * --tokens / Current token usage (e.g., 89195/200000) + * --messages Number of messages in conversation + * --tasks Number of active tasks + * --errors Recent errors in last 10 minutes + * --json Output JSON format + * --verbose Show detailed analysis + */ + +const monitor = require('../src/services/ContextPressureMonitor.service'); + +// Parse command line arguments +function parseArgs() { + const args = process.argv.slice(2); + const options = { + tokenUsage: null, + tokenBudget: null, + messages: 0, + tasks: 1, + errors: 0, + json: false, + verbose: false + }; + + for (let i = 0; i < args.length; i++) { + switch (args[i]) { + case '--tokens': + const [current, budget] = args[++i].split('/').map(Number); + options.tokenUsage = current; + options.tokenBudget = budget; + break; + case '--messages': + options.messages = parseInt(args[++i]); + break; + case '--tasks': + options.tasks = parseInt(args[++i]); + break; + case '--errors': + options.errors = parseInt(args[++i]); + break; + case '--json': + options.json = true; + break; + case '--verbose': + options.verbose = true; + break; + case '--help': + console.log(` +Session Pressure Monitor - Tractatus Framework + +Usage: + node scripts/check-session-pressure.js [options] + +Options: + --tokens / Token usage (e.g., 89195/200000) + --messages Conversation length + --tasks Active tasks + --errors Recent errors + --json JSON output + --verbose Detailed analysis + --help Show this help + +Examples: + # Check current session + node scripts/check-session-pressure.js --tokens 89195/200000 --messages 28 --tasks 2 + + # JSON output for automation + node scripts/check-session-pressure.js --tokens 150000/200000 --json + + # Verbose analysis + node scripts/check-session-pressure.js --tokens 180000/200000 --messages 50 --verbose + `); + process.exit(0); + } + } + + return options; +} + +// Format pressure level with color +function formatLevel(level) { + const colors = { + NORMAL: '\x1b[32m', // Green + ELEVATED: '\x1b[33m', // Yellow + HIGH: '\x1b[35m', // Magenta + CRITICAL: '\x1b[31m', // Red + DANGEROUS: '\x1b[41m' // Red background + }; + const reset = '\x1b[0m'; + return `${colors[level] || ''}${level}${reset}`; +} + +// Format recommendation with icon +function formatRecommendation(rec) { + const icons = { + CONTINUE_NORMAL: 'āœ…', + INCREASE_VERIFICATION: 'āš ļø', + SUGGEST_CONTEXT_REFRESH: 'šŸ”„', + MANDATORY_VERIFICATION: '🚨', + IMMEDIATE_HALT: 'šŸ›‘' + }; + return `${icons[rec] || '•'} ${rec}`; +} + +// Main analysis function +function analyzeSession(options) { + // Build context object + const context = { + messages_count: options.messages, + task_depth: options.tasks, + errors_recent: options.errors + }; + + // Add token usage if provided + if (options.tokenUsage && options.tokenBudget) { + context.token_usage = options.tokenUsage / options.tokenBudget; + context.token_limit = options.tokenBudget; + } + + // Run analysis + const analysis = monitor.analyzePressure(context); + + // Output results + if (options.json) { + console.log(JSON.stringify(analysis, null, 2)); + } else { + console.log('\n╔════════════════════════════════════════════════════════════════╗'); + console.log('ā•‘ Tractatus Session Pressure Analysis ā•‘'); + console.log('ā•šā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•\n'); + + // Pressure Level + console.log(`Pressure Level: ${formatLevel(analysis.level)}`); + console.log(`Overall Score: ${(analysis.overall_score * 100).toFixed(1)}%`); + console.log(`Action: ${analysis.action}\n`); + + // Metrics + console.log('Metrics:'); + console.log(` Token Usage: ${(analysis.metrics.tokenUsage.score * 100).toFixed(1)}%`); + console.log(` Conversation: ${(analysis.metrics.conversationLength.score * 100).toFixed(1)}%`); + console.log(` Task Complexity: ${(analysis.metrics.taskComplexity.score * 100).toFixed(1)}%`); + console.log(` Error Frequency: ${(analysis.metrics.errorFrequency.score * 100).toFixed(1)}%`); + console.log(` Instructions: ${(analysis.metrics.instructionDensity.score * 100).toFixed(1)}%\n`); + + // Recommendations + if (analysis.recommendations.length > 0) { + console.log('Recommendations:'); + analysis.recommendations.forEach(rec => { + console.log(` ${formatRecommendation(rec)}`); + }); + console.log(); + } + + // Warnings + if (analysis.warnings.length > 0) { + console.log('āš ļø Warnings:'); + analysis.warnings.forEach(warning => { + console.log(` • ${warning}`); + }); + console.log(); + } + + // Trend + if (analysis.trend) { + const trendIcons = { + escalating: 'šŸ“ˆ Escalating', + improving: 'šŸ“‰ Improving', + stable: 'āž”ļø Stable' + }; + console.log(`Trend: ${trendIcons[analysis.trend]}\n`); + } + + // Verbose output + if (options.verbose) { + console.log('Detailed Metrics:'); + Object.entries(analysis.metrics).forEach(([name, metric]) => { + console.log(` ${name}:`); + console.log(` Raw: ${metric.raw}`); + console.log(` Normalized: ${metric.normalized.toFixed(3)}`); + console.log(` Threshold: ${metric.threshold}`); + if (metric.factors) { + console.log(` Factors: ${metric.factors.join(', ')}`); + } + }); + console.log(); + } + + // Summary + console.log('─────────────────────────────────────────────────────────────────'); + if (analysis.level === 'NORMAL') { + console.log('āœ… Session conditions are normal. Continue working.\n'); + } else if (analysis.level === 'ELEVATED') { + console.log('āš ļø Pressure is elevated. Increase verification and monitoring.\n'); + } else if (analysis.level === 'HIGH') { + console.log('šŸ”„ Pressure is high. Consider refreshing context soon.\n'); + } else if (analysis.level === 'CRITICAL') { + console.log('🚨 Critical pressure! Mandatory verification required.\n'); + } else if (analysis.level === 'DANGEROUS') { + console.log('šŸ›‘ DANGEROUS conditions! Halt and refresh context immediately.\n'); + } + } + + return analysis; +} + +// Run if called directly +if (require.main === module) { + const options = parseArgs(); + + // Validate inputs + if (options.tokenUsage === null) { + console.error('Error: --tokens argument required'); + console.error('Usage: node scripts/check-session-pressure.js --tokens /'); + console.error('Run with --help for more information'); + process.exit(1); + } + + const analysis = analyzeSession(options); + + // Exit with appropriate code + const exitCodes = { + NORMAL: 0, + ELEVATED: 0, + HIGH: 1, + CRITICAL: 2, + DANGEROUS: 3 + }; + process.exit(exitCodes[analysis.level] || 0); +} + +module.exports = { analyzeSession, parseArgs }; diff --git a/src/services/InstructionPersistenceClassifier.service.js b/src/services/InstructionPersistenceClassifier.service.js index 14578018..34370a6b 100644 --- a/src/services/InstructionPersistenceClassifier.service.js +++ b/src/services/InstructionPersistenceClassifier.service.js @@ -196,7 +196,12 @@ class InstructionPersistenceClassifier { source, recencyWeight, metadata: { - temporalScope, + temporal_scope: temporalScope, // snake_case for test compatibility + temporalScope, // camelCase for consistency + extracted_parameters: parameters, // snake_case alias + extractedParameters: parameters, // camelCase alias + context_snapshot: context, // snake_case alias + contextSnapshot: context, // camelCase alias humanOversight: this.quadrants[quadrant].humanOversight, conflictSeverity: this.persistenceLevels[persistence].conflictSeverity } @@ -356,10 +361,24 @@ class InstructionPersistenceClassifier { } _measureExplicitness(text, source) { - let score = 0.5; // Base score + let score = 0.3; // Base score (lower baseline) - // Source factor - if (source === 'user') score += 0.2; + // Implicit/hedging language reduces explicitness + const implicitMarkers = [ + 'could', 'would', 'might', 'maybe', 'perhaps', 'consider', + 'possibly', 'potentially', 'suggestion', 'recommend' + ]; + + const implicitCount = implicitMarkers.filter(marker => + text.includes(marker) + ).length; + + if (implicitCount > 0) { + score -= implicitCount * 0.15; // Reduce for hedge words + } + + // Source factor (applied after implicit check) + if (source === 'user') score += 0.15; if (source === 'inferred') score -= 0.2; // Explicit markers @@ -372,44 +391,51 @@ class InstructionPersistenceClassifier { text.includes(marker) ).length; - score += markerCount * 0.1; + score += markerCount * 0.15; // Parameter specification (numbers, specific values) - if (/\d{4,}/.test(text)) score += 0.2; // Port numbers, dates, etc. + if (/\d{4,}/.test(text)) score += 0.25; // Port numbers, dates, etc. if (/["'][\w-]+["']/.test(text)) score += 0.1; // Quoted strings return Math.min(1.0, Math.max(0.0, score)); } _calculatePersistence({ quadrant, temporalScope, explicitness, source, text }) { + // Special case: Explicit port/configuration specifications are HIGH persistence + if (/\bport\s+\d{4,5}\b/i.test(text) && explicitness > 0.6) { + return 'HIGH'; + } + // Base persistence from quadrant let baseScore = { STRATEGIC: 0.9, OPERATIONAL: 0.7, TACTICAL: 0.5, - SYSTEM: 0.6, + SYSTEM: 0.7, // Increased from 0.6 for better SYSTEM persistence STOCHASTIC: 0.4 }[quadrant]; // Adjust for temporal scope - if (temporalScope === 'PERMANENT') baseScore += 0.1; + if (temporalScope === 'PERMANENT') baseScore += 0.15; + if (temporalScope === 'PROJECT') baseScore += 0.05; if (temporalScope === 'SESSION') baseScore -= 0.2; - if (temporalScope === 'IMMEDIATE') baseScore -= 0.15; // One-time actions + if (temporalScope === 'IMMEDIATE') baseScore -= 0.25; // One-time actions // Adjust for explicitness - if (explicitness > 0.8) baseScore += 0.1; + if (explicitness > 0.8) baseScore += 0.15; + else if (explicitness > 0.6) baseScore += 0.05; // Adjust for source if (source === 'user') baseScore += 0.05; - if (source === 'inferred') baseScore -= 0.1; + if (source === 'inferred') baseScore -= 0.15; // Normalize const score = Math.min(1.0, Math.max(0.0, baseScore)); // Map to categorical levels if (score >= 0.75) return 'HIGH'; - if (score >= 0.5) return 'MEDIUM'; - if (quadrant === 'TACTICAL' && explicitness > 0.7) return 'VARIABLE'; // Explicit tactical + if (score >= 0.45) return 'MEDIUM'; + if (quadrant === 'TACTICAL' && explicitness > 0.7 && score >= 0.4) return 'VARIABLE'; // Explicit tactical return 'LOW'; }