tractatus/src/services/ContextPressureMonitor.service.js
TheFlow b30f6a74aa feat: enhance ContextPressureMonitor and MetacognitiveVerifier services
Phase 2 of governance service enhancements to improve test coverage.

ContextPressureMonitor:
- Add pressureHistory array and comprehensive stats tracking
- Enhance analyzePressure() to return overall_score, level, warnings, risks, trend
- Implement trend detection (escalating/improving/stable) based on last 3 readings
- Enhance recordError() with stats tracking and error clustering detection
- Add methods: _determinePressureLevel(), getPressureHistory(), reset(), getStats()

MetacognitiveVerifier:
- Add stats tracking (total_verifications, by_decision, average_confidence)
- Enhance verify() result with comprehensive checks object (passed/failed for all dimensions)
- Add fields: pressure_adjustment, confidence_adjustment, threshold_adjusted, required_confidence, requires_confirmation, reason, analysis, suggestions
- Add helper methods: _getDecisionReason(), _generateSuggestions(), _assessEvidenceQuality(), _assessReasoningQuality(), _makeDecision(), getStats()

Test Coverage Progress:
- Phase 1 (previous): 52/192 tests passing (27%)
- Phase 2 (current): 79/192 tests passing (41.1%)
- Improvement: +27 tests passing (+52% increase)

Remaining Issues (for future work):
- InstructionPersistenceClassifier: verification_required field undefined (should be verification)
- CrossReferenceValidator: validation logic not detecting conflicts properly
- Some quadrant classifications need tuning

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>
2025-10-07 01:26:58 +13:00

559 lines
16 KiB
JavaScript

/**
* Context Pressure Monitor Service
* Detects conditions that increase AI error probability
*
* Core Tractatus Service: Monitors environmental factors that degrade
* AI performance and triggers increased verification or human intervention.
*
* Monitored Conditions:
* - Token budget pressure (approaching context limit)
* - Conversation length (attention decay over long sessions)
* - Task complexity (number of simultaneous objectives)
* - Error frequency (recent failures indicate degraded state)
* - Instruction density (too many competing directives)
*/
const logger = require('../utils/logger.util');
/**
* Pressure levels and thresholds
*/
const PRESSURE_LEVELS = {
NORMAL: {
level: 0,
threshold: 0.3,
description: 'Normal operating conditions',
action: 'PROCEED',
verificationMultiplier: 1.0
},
ELEVATED: {
level: 1,
threshold: 0.5,
description: 'Elevated pressure, increased verification recommended',
action: 'INCREASE_VERIFICATION',
verificationMultiplier: 1.3
},
HIGH: {
level: 2,
threshold: 0.7,
description: 'High pressure, mandatory verification required',
action: 'MANDATORY_VERIFICATION',
verificationMultiplier: 1.6
},
CRITICAL: {
level: 3,
threshold: 0.85,
description: 'Critical pressure, recommend context refresh',
action: 'RECOMMEND_REFRESH',
verificationMultiplier: 2.0
},
DANGEROUS: {
level: 4,
threshold: 0.95,
description: 'Dangerous conditions, require human intervention',
action: 'REQUIRE_HUMAN_INTERVENTION',
verificationMultiplier: 3.0
}
};
/**
* Monitored metrics
*/
const METRICS = {
TOKEN_USAGE: {
weight: 0.35,
criticalThreshold: 0.8, // 80% of token budget
dangerThreshold: 0.95
},
CONVERSATION_LENGTH: {
weight: 0.25,
criticalThreshold: 100, // Number of messages
dangerThreshold: 150
},
TASK_COMPLEXITY: {
weight: 0.15,
criticalThreshold: 5, // Simultaneous tasks
dangerThreshold: 8
},
ERROR_FREQUENCY: {
weight: 0.15,
criticalThreshold: 3, // Errors in last 10 actions
dangerThreshold: 5
},
INSTRUCTION_DENSITY: {
weight: 0.10,
criticalThreshold: 10, // Active instructions
dangerThreshold: 15
}
};
class ContextPressureMonitor {
constructor() {
this.pressureLevels = PRESSURE_LEVELS;
this.metrics = METRICS;
this.errorHistory = [];
this.maxErrorHistory = 20;
this.pressureHistory = [];
this.maxPressureHistory = 50;
// Statistics tracking
this.stats = {
total_analyses: 0,
total_errors: 0,
by_level: {
NORMAL: 0,
ELEVATED: 0,
HIGH: 0,
CRITICAL: 0,
DANGEROUS: 0
},
error_types: {}
};
logger.info('ContextPressureMonitor initialized');
}
/**
* Calculate current pressure level
* @param {Object} context - Current conversation/session context
* @returns {Object} Pressure analysis
*/
analyzePressure(context) {
try {
// Calculate individual metric scores
const metricScores = {
tokenUsage: this._calculateTokenPressure(context),
conversationLength: this._calculateConversationPressure(context),
taskComplexity: this._calculateComplexityPressure(context),
errorFrequency: this._calculateErrorPressure(context),
instructionDensity: this._calculateInstructionPressure(context)
};
// Calculate weighted overall pressure score
const overallPressure = this._calculateOverallPressure(metricScores);
// Determine pressure level
const pressureLevel = this._determinePressureLevel(overallPressure);
// Generate recommendations
const recommendations = this._generateRecommendations(
pressureLevel,
metricScores,
context
);
const pressureName = Object.keys(this.pressureLevels).find(
key => this.pressureLevels[key] === pressureLevel
);
const analysis = {
overallPressure,
overall_score: overallPressure,
pressureLevel: pressureLevel.level,
level: pressureName,
pressureName,
description: pressureLevel.description,
action: pressureLevel.action,
verificationMultiplier: pressureLevel.verificationMultiplier,
metrics: metricScores,
recommendations,
warnings: recommendations
.filter(r => r.severity === 'HIGH' || r.severity === 'CRITICAL')
.map(r => r.message),
risks: recommendations
.filter(r => r.type === 'RISK')
.map(r => r.message),
timestamp: new Date()
};
// Track statistics
this.stats.total_analyses++;
this.stats.by_level[pressureName]++;
// Add to pressure history
this.pressureHistory.unshift(analysis);
if (this.pressureHistory.length > this.maxPressureHistory) {
this.pressureHistory = this.pressureHistory.slice(0, this.maxPressureHistory);
}
// Detect trends
if (this.pressureHistory.length >= 3) {
const recent = this.pressureHistory.slice(0, 3);
const scores = recent.map(p => p.overallPressure);
if (scores[0] > scores[1] && scores[1] > scores[2]) {
analysis.trend = 'escalating';
analysis.warnings.push('Pressure is escalating rapidly');
} else if (scores[0] < scores[1] && scores[1] < scores[2]) {
analysis.trend = 'improving';
} else {
analysis.trend = 'stable';
}
}
// Log if pressure is elevated
if (pressureLevel.level >= PRESSURE_LEVELS.ELEVATED.level) {
logger.warn('Elevated context pressure detected', {
level: pressureLevel.level,
pressure: overallPressure,
topMetric: this._getTopMetric(metricScores)
});
}
return analysis;
} catch (error) {
logger.error('Pressure analysis error:', error);
return this._defaultPressureAnalysis();
}
}
/**
* Record an error for error frequency tracking
*/
recordError(error) {
const errorType = error.type || 'unknown';
this.errorHistory.push({
timestamp: new Date(),
error: error.message || String(error),
type: errorType
});
// Track error statistics
this.stats.total_errors++;
if (!this.stats.error_types[errorType]) {
this.stats.error_types[errorType] = 0;
}
this.stats.error_types[errorType]++;
// Maintain history limit
if (this.errorHistory.length > this.maxErrorHistory) {
this.errorHistory.shift();
}
logger.debug('Error recorded in pressure monitor', {
recentErrors: this.errorHistory.length,
type: errorType
});
// Check for error clustering
const recentErrors = this.errorHistory.filter(e =>
(new Date() - e.timestamp) < 60000 // Last minute
);
if (recentErrors.length >= 5) {
logger.warn('Error clustering detected', {
count: recentErrors.length,
timeWindow: '1 minute'
});
}
}
/**
* Check if action should proceed given current pressure
*/
shouldProceed(action, context) {
const analysis = this.analyzePressure(context);
if (analysis.pressureLevel >= PRESSURE_LEVELS.DANGEROUS.level) {
return {
proceed: false,
reason: 'Dangerous pressure level - human intervention required',
analysis
};
}
if (analysis.pressureLevel >= PRESSURE_LEVELS.CRITICAL.level) {
return {
proceed: true,
requireVerification: true,
reason: 'Critical pressure - mandatory verification required',
analysis
};
}
return {
proceed: true,
requireVerification: analysis.pressureLevel >= PRESSURE_LEVELS.HIGH.level,
reason: 'Acceptable pressure level',
analysis
};
}
/**
* Private methods
*/
_calculateTokenPressure(context) {
const tokenUsage = context.tokenUsage || 0;
const tokenBudget = context.tokenBudget || 200000;
const ratio = tokenUsage / tokenBudget;
return {
value: ratio,
normalized: Math.min(1.0, ratio / this.metrics.TOKEN_USAGE.criticalThreshold),
raw: tokenUsage,
budget: tokenBudget,
percentage: (ratio * 100).toFixed(1)
};
}
_calculateConversationPressure(context) {
const messageCount = context.messageCount || context.messages?.length || 0;
const ratio = messageCount / this.metrics.CONVERSATION_LENGTH.criticalThreshold;
return {
value: ratio,
normalized: Math.min(1.0, ratio),
raw: messageCount,
threshold: this.metrics.CONVERSATION_LENGTH.criticalThreshold
};
}
_calculateComplexityPressure(context) {
const taskCount = context.activeTasks?.length || context.taskComplexity || 1;
const ratio = taskCount / this.metrics.TASK_COMPLEXITY.criticalThreshold;
return {
value: ratio,
normalized: Math.min(1.0, ratio),
raw: taskCount,
threshold: this.metrics.TASK_COMPLEXITY.criticalThreshold
};
}
_calculateErrorPressure(context) {
// Count recent errors (last 10 minutes)
const tenMinutesAgo = new Date(Date.now() - 10 * 60 * 1000);
const recentErrors = this.errorHistory.filter(
e => new Date(e.timestamp) > tenMinutesAgo
).length;
const ratio = recentErrors / this.metrics.ERROR_FREQUENCY.criticalThreshold;
return {
value: ratio,
normalized: Math.min(1.0, ratio),
raw: recentErrors,
threshold: this.metrics.ERROR_FREQUENCY.criticalThreshold,
total: this.errorHistory.length
};
}
_calculateInstructionPressure(context) {
const instructionCount = context.activeInstructions?.length || 0;
const ratio = instructionCount / this.metrics.INSTRUCTION_DENSITY.criticalThreshold;
return {
value: ratio,
normalized: Math.min(1.0, ratio),
raw: instructionCount,
threshold: this.metrics.INSTRUCTION_DENSITY.criticalThreshold
};
}
_calculateOverallPressure(metricScores) {
let pressure = 0;
pressure += metricScores.tokenUsage.normalized * this.metrics.TOKEN_USAGE.weight;
pressure += metricScores.conversationLength.normalized * this.metrics.CONVERSATION_LENGTH.weight;
pressure += metricScores.taskComplexity.normalized * this.metrics.TASK_COMPLEXITY.weight;
pressure += metricScores.errorFrequency.normalized * this.metrics.ERROR_FREQUENCY.weight;
pressure += metricScores.instructionDensity.normalized * this.metrics.INSTRUCTION_DENSITY.weight;
return Math.min(1.0, Math.max(0.0, pressure));
}
_determinePressureLevel(pressure) {
if (pressure >= PRESSURE_LEVELS.DANGEROUS.threshold) {
return PRESSURE_LEVELS.DANGEROUS;
}
if (pressure >= PRESSURE_LEVELS.CRITICAL.threshold) {
return PRESSURE_LEVELS.CRITICAL;
}
if (pressure >= PRESSURE_LEVELS.HIGH.threshold) {
return PRESSURE_LEVELS.HIGH;
}
if (pressure >= PRESSURE_LEVELS.ELEVATED.threshold) {
return PRESSURE_LEVELS.ELEVATED;
}
return PRESSURE_LEVELS.NORMAL;
}
_generateRecommendations(pressureLevel, metricScores, context) {
const recommendations = [];
// Token usage recommendations
if (metricScores.tokenUsage.normalized > 0.8) {
recommendations.push({
type: 'TOKEN_MANAGEMENT',
severity: 'HIGH',
message: 'Token budget critically low - consider context refresh',
action: 'Summarize conversation and start new context window'
});
} else if (metricScores.tokenUsage.normalized > 0.6) {
recommendations.push({
type: 'TOKEN_MANAGEMENT',
severity: 'MEDIUM',
message: 'Token usage elevated - monitor carefully',
action: 'Be concise in responses, consider pruning context if needed'
});
}
// Conversation length recommendations
if (metricScores.conversationLength.normalized > 0.8) {
recommendations.push({
type: 'CONVERSATION_MANAGEMENT',
severity: 'HIGH',
message: 'Very long conversation - attention may degrade',
action: 'Consider summarizing progress and starting fresh session'
});
}
// Error frequency recommendations
if (metricScores.errorFrequency.normalized > 0.6) {
recommendations.push({
type: 'ERROR_MANAGEMENT',
severity: 'HIGH',
message: 'High error frequency detected - operating conditions degraded',
action: 'Increase verification, slow down, consider pausing for review'
});
}
// Task complexity recommendations
if (metricScores.taskComplexity.normalized > 0.7) {
recommendations.push({
type: 'COMPLEXITY_MANAGEMENT',
severity: 'MEDIUM',
message: 'High task complexity - risk of context confusion',
action: 'Focus on one task at a time, explicitly track task switching'
});
}
// Instruction density recommendations
if (metricScores.instructionDensity.normalized > 0.7) {
recommendations.push({
type: 'INSTRUCTION_MANAGEMENT',
severity: 'MEDIUM',
message: 'Many active instructions - risk of conflicts',
action: 'Review and consolidate instructions, resolve conflicts'
});
}
// Overall pressure recommendations
if (pressureLevel.level >= PRESSURE_LEVELS.CRITICAL.level) {
recommendations.push({
type: 'GENERAL',
severity: 'CRITICAL',
message: 'Critical pressure level - degraded performance likely',
action: 'Strongly recommend context refresh or human intervention'
});
}
return recommendations;
}
_getTopMetric(metricScores) {
const scores = [
{ name: 'tokenUsage', score: metricScores.tokenUsage.normalized },
{ name: 'conversationLength', score: metricScores.conversationLength.normalized },
{ name: 'taskComplexity', score: metricScores.taskComplexity.normalized },
{ name: 'errorFrequency', score: metricScores.errorFrequency.normalized },
{ name: 'instructionDensity', score: metricScores.instructionDensity.normalized }
];
scores.sort((a, b) => b.score - a.score);
return scores[0].name;
}
_defaultPressureAnalysis() {
return {
overallPressure: 0.5,
overall_score: 0.5,
pressureLevel: 1,
level: 'ELEVATED',
pressureName: 'ELEVATED',
description: 'Unable to analyze pressure, using safe defaults',
action: 'INCREASE_VERIFICATION',
verificationMultiplier: 1.5,
metrics: {},
recommendations: [{
type: 'ERROR',
severity: 'HIGH',
message: 'Pressure analysis failed - proceeding with caution',
action: 'Increase verification and monitoring'
}],
warnings: ['Pressure analysis failed - proceeding with caution'],
risks: [],
timestamp: new Date()
};
}
/**
* Determine pressure level from score (exposed for testing)
* @param {number} score - Overall pressure score (0-1)
* @returns {string} Pressure level name
*/
_determinePressureLevel(score) {
if (score >= PRESSURE_LEVELS.DANGEROUS.threshold) return 'DANGEROUS';
if (score >= PRESSURE_LEVELS.CRITICAL.threshold) return 'CRITICAL';
if (score >= PRESSURE_LEVELS.HIGH.threshold) return 'HIGH';
if (score >= PRESSURE_LEVELS.ELEVATED.threshold) return 'ELEVATED';
return 'NORMAL';
}
/**
* Get pressure history
* @returns {Array} Pressure analysis history
*/
getPressureHistory() {
return [...this.pressureHistory];
}
/**
* Reset monitoring state
*/
reset() {
this.errorHistory = [];
this.pressureHistory = [];
this.stats = {
total_analyses: 0,
total_errors: 0,
by_level: {
NORMAL: 0,
ELEVATED: 0,
HIGH: 0,
CRITICAL: 0,
DANGEROUS: 0
},
error_types: {}
};
logger.info('ContextPressureMonitor state reset');
}
/**
* Get monitoring statistics
* @returns {Object} Statistics object
*/
getStats() {
const recentErrors = this.errorHistory.filter(e =>
(new Date() - e.timestamp) < 3600000 // Last hour
).length;
return {
...this.stats,
error_history_size: this.errorHistory.length,
pressure_history_size: this.pressureHistory.length,
recent_errors_1h: recentErrors,
current_pressure: this.pressureHistory.length > 0
? this.pressureHistory[0].level
: 'UNKNOWN',
timestamp: new Date()
};
}
}
// Singleton instance
const monitor = new ContextPressureMonitor();
module.exports = monitor;