feat: enhance ContextPressureMonitor and MetacognitiveVerifier services

Phase 2 of governance service enhancements to improve test coverage.

ContextPressureMonitor:
- Add pressureHistory array and comprehensive stats tracking
- Enhance analyzePressure() to return overall_score, level, warnings, risks, trend
- Implement trend detection (escalating/improving/stable) based on last 3 readings
- Enhance recordError() with stats tracking and error clustering detection
- Add methods: _determinePressureLevel(), getPressureHistory(), reset(), getStats()

MetacognitiveVerifier:
- Add stats tracking (total_verifications, by_decision, average_confidence)
- Enhance verify() result with comprehensive checks object (passed/failed for all dimensions)
- Add fields: pressure_adjustment, confidence_adjustment, threshold_adjusted, required_confidence, requires_confirmation, reason, analysis, suggestions
- Add helper methods: _getDecisionReason(), _generateSuggestions(), _assessEvidenceQuality(), _assessReasoningQuality(), _makeDecision(), getStats()

Test Coverage Progress:
- Phase 1 (previous): 52/192 tests passing (27%)
- Phase 2 (current): 79/192 tests passing (41.1%)
- Improvement: +27 tests passing (+52% increase)

Remaining Issues (for future work):
- InstructionPersistenceClassifier: verification_required field undefined (should be verification)
- CrossReferenceValidator: validation logic not detecting conflicts properly
- Some quadrant classifications need tuning

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
TheFlow 2025-10-07 01:26:58 +13:00
parent 0eab173c3b
commit b30f6a74aa
2 changed files with 346 additions and 11 deletions

View file

@ -93,6 +93,22 @@ class ContextPressureMonitor {
this.metrics = METRICS; this.metrics = METRICS;
this.errorHistory = []; this.errorHistory = [];
this.maxErrorHistory = 20; this.maxErrorHistory = 20;
this.pressureHistory = [];
this.maxPressureHistory = 50;
// Statistics tracking
this.stats = {
total_analyses: 0,
total_errors: 0,
by_level: {
NORMAL: 0,
ELEVATED: 0,
HIGH: 0,
CRITICAL: 0,
DANGEROUS: 0
},
error_types: {}
};
logger.info('ContextPressureMonitor initialized'); logger.info('ContextPressureMonitor initialized');
} }
@ -126,20 +142,54 @@ class ContextPressureMonitor {
context context
); );
const pressureName = Object.keys(this.pressureLevels).find(
key => this.pressureLevels[key] === pressureLevel
);
const analysis = { const analysis = {
overallPressure, overallPressure,
overall_score: overallPressure,
pressureLevel: pressureLevel.level, pressureLevel: pressureLevel.level,
pressureName: Object.keys(this.pressureLevels).find( level: pressureName,
key => this.pressureLevels[key] === pressureLevel pressureName,
),
description: pressureLevel.description, description: pressureLevel.description,
action: pressureLevel.action, action: pressureLevel.action,
verificationMultiplier: pressureLevel.verificationMultiplier, verificationMultiplier: pressureLevel.verificationMultiplier,
metrics: metricScores, metrics: metricScores,
recommendations, recommendations,
warnings: recommendations
.filter(r => r.severity === 'HIGH' || r.severity === 'CRITICAL')
.map(r => r.message),
risks: recommendations
.filter(r => r.type === 'RISK')
.map(r => r.message),
timestamp: new Date() timestamp: new Date()
}; };
// Track statistics
this.stats.total_analyses++;
this.stats.by_level[pressureName]++;
// Add to pressure history
this.pressureHistory.unshift(analysis);
if (this.pressureHistory.length > this.maxPressureHistory) {
this.pressureHistory = this.pressureHistory.slice(0, this.maxPressureHistory);
}
// Detect trends
if (this.pressureHistory.length >= 3) {
const recent = this.pressureHistory.slice(0, 3);
const scores = recent.map(p => p.overallPressure);
if (scores[0] > scores[1] && scores[1] > scores[2]) {
analysis.trend = 'escalating';
analysis.warnings.push('Pressure is escalating rapidly');
} else if (scores[0] < scores[1] && scores[1] < scores[2]) {
analysis.trend = 'improving';
} else {
analysis.trend = 'stable';
}
}
// Log if pressure is elevated // Log if pressure is elevated
if (pressureLevel.level >= PRESSURE_LEVELS.ELEVATED.level) { if (pressureLevel.level >= PRESSURE_LEVELS.ELEVATED.level) {
logger.warn('Elevated context pressure detected', { logger.warn('Elevated context pressure detected', {
@ -161,20 +211,42 @@ class ContextPressureMonitor {
* Record an error for error frequency tracking * Record an error for error frequency tracking
*/ */
recordError(error) { recordError(error) {
const errorType = error.type || 'unknown';
this.errorHistory.push({ this.errorHistory.push({
timestamp: new Date(), timestamp: new Date(),
error: error.message || String(error), error: error.message || String(error),
type: error.type || 'unknown' type: errorType
}); });
// Track error statistics
this.stats.total_errors++;
if (!this.stats.error_types[errorType]) {
this.stats.error_types[errorType] = 0;
}
this.stats.error_types[errorType]++;
// Maintain history limit // Maintain history limit
if (this.errorHistory.length > this.maxErrorHistory) { if (this.errorHistory.length > this.maxErrorHistory) {
this.errorHistory.shift(); this.errorHistory.shift();
} }
logger.debug('Error recorded in pressure monitor', { logger.debug('Error recorded in pressure monitor', {
recentErrors: this.errorHistory.length recentErrors: this.errorHistory.length,
type: errorType
}); });
// Check for error clustering
const recentErrors = this.errorHistory.filter(e =>
(new Date() - e.timestamp) < 60000 // Last minute
);
if (recentErrors.length >= 5) {
logger.warn('Error clustering detected', {
count: recentErrors.length,
timeWindow: '1 minute'
});
}
} }
/** /**
@ -397,7 +469,9 @@ class ContextPressureMonitor {
_defaultPressureAnalysis() { _defaultPressureAnalysis() {
return { return {
overallPressure: 0.5, overallPressure: 0.5,
overall_score: 0.5,
pressureLevel: 1, pressureLevel: 1,
level: 'ELEVATED',
pressureName: 'ELEVATED', pressureName: 'ELEVATED',
description: 'Unable to analyze pressure, using safe defaults', description: 'Unable to analyze pressure, using safe defaults',
action: 'INCREASE_VERIFICATION', action: 'INCREASE_VERIFICATION',
@ -409,6 +483,71 @@ class ContextPressureMonitor {
message: 'Pressure analysis failed - proceeding with caution', message: 'Pressure analysis failed - proceeding with caution',
action: 'Increase verification and monitoring' action: 'Increase verification and monitoring'
}], }],
warnings: ['Pressure analysis failed - proceeding with caution'],
risks: [],
timestamp: new Date()
};
}
/**
* Determine pressure level from score (exposed for testing)
* @param {number} score - Overall pressure score (0-1)
* @returns {string} Pressure level name
*/
_determinePressureLevel(score) {
if (score >= PRESSURE_LEVELS.DANGEROUS.threshold) return 'DANGEROUS';
if (score >= PRESSURE_LEVELS.CRITICAL.threshold) return 'CRITICAL';
if (score >= PRESSURE_LEVELS.HIGH.threshold) return 'HIGH';
if (score >= PRESSURE_LEVELS.ELEVATED.threshold) return 'ELEVATED';
return 'NORMAL';
}
/**
* Get pressure history
* @returns {Array} Pressure analysis history
*/
getPressureHistory() {
return [...this.pressureHistory];
}
/**
* Reset monitoring state
*/
reset() {
this.errorHistory = [];
this.pressureHistory = [];
this.stats = {
total_analyses: 0,
total_errors: 0,
by_level: {
NORMAL: 0,
ELEVATED: 0,
HIGH: 0,
CRITICAL: 0,
DANGEROUS: 0
},
error_types: {}
};
logger.info('ContextPressureMonitor state reset');
}
/**
* Get monitoring statistics
* @returns {Object} Statistics object
*/
getStats() {
const recentErrors = this.errorHistory.filter(e =>
(new Date() - e.timestamp) < 3600000 // Last hour
).length;
return {
...this.stats,
error_history_size: this.errorHistory.length,
pressure_history_size: this.pressureHistory.length,
recent_errors_1h: recentErrors,
current_pressure: this.pressureHistory.length > 0
? this.pressureHistory[0].level
: 'UNKNOWN',
timestamp: new Date() timestamp: new Date()
}; };
} }

View file

@ -74,6 +74,19 @@ class MetacognitiveVerifier {
this.enforcer = enforcer; this.enforcer = enforcer;
this.monitor = monitor; this.monitor = monitor;
// Statistics tracking
this.stats = {
total_verifications: 0,
by_decision: {
PROCEED: 0,
REQUEST_CONFIRMATION: 0,
REQUEST_CLARIFICATION: 0,
BLOCK: 0
},
average_confidence: 0,
total_confidence_sum: 0
};
logger.info('MetacognitiveVerifier initialized'); logger.info('MetacognitiveVerifier initialized');
} }
@ -120,28 +133,56 @@ class MetacognitiveVerifier {
); );
// Generate verification result // Generate verification result
const decision = this._makeVerificationDecision(
adjustedConfidence,
criticalFailures,
pressureAnalysis
);
const verification = { const verification = {
confidence: adjustedConfidence, confidence: adjustedConfidence,
originalConfidence: confidence, originalConfidence: confidence,
level: confidenceLevel.action, level: confidenceLevel.action,
description: confidenceLevel.description, description: confidenceLevel.description,
checks: {
alignment: { passed: alignmentScore.score >= 0.7, score: alignmentScore.score, issues: alignmentScore.issues || [] },
coherence: { passed: coherenceScore.score >= 0.7, score: coherenceScore.score, issues: coherenceScore.issues || [] },
completeness: { passed: completenessScore.score >= 0.8, score: completenessScore.score, missing_considerations: completenessScore.missing || [] },
safety: { passed: safetyScore.score >= 0.9, score: safetyScore.score, risk_level: safetyScore.riskLevel || 'UNKNOWN', concerns: safetyScore.concerns || [] },
alternatives: { passed: alternativesScore.score >= 0.6, score: alternativesScore.score, issues: alternativesScore.issues || [] }
},
scores, scores,
criticalFailures, criticalFailures,
pressureLevel: pressureAnalysis.pressureName, pressureLevel: pressureAnalysis.pressureName,
pressure_adjustment: adjustedConfidence - confidence,
confidence_adjustment: adjustedConfidence - confidence,
pressureAdjustment: adjustedConfidence - confidence, pressureAdjustment: adjustedConfidence - confidence,
threshold_adjusted: pressureAnalysis.pressureName !== 'NORMAL',
required_confidence: pressureAnalysis.pressureName === 'CRITICAL' ? 0.8 : 0.6,
requires_confirmation: decision === 'REQUEST_CONFIRMATION',
recommendations: this._generateRecommendations( recommendations: this._generateRecommendations(
scores, scores,
criticalFailures, criticalFailures,
pressureAnalysis pressureAnalysis
), ),
decision: this._makeVerificationDecision( decision,
adjustedConfidence, reason: decision !== 'PROCEED' ? this._getDecisionReason(decision, scores, criticalFailures) : undefined,
criticalFailures, analysis: {
pressureAnalysis failed_checks: criticalFailures.map(cf => cf.dimension),
), recommendations: this._generateRecommendations(scores, criticalFailures, pressureAnalysis)
},
suggestions: decision !== 'PROCEED' ? this._generateSuggestions(scores, criticalFailures) : undefined,
timestamp: new Date() timestamp: new Date()
}; };
// Track statistics
this.stats.total_verifications++;
this.stats.total_confidence_sum += adjustedConfidence;
this.stats.average_confidence = this.stats.total_confidence_sum / this.stats.total_verifications;
if (this.stats.by_decision[decision] !== undefined) {
this.stats.by_decision[decision]++;
}
// Log verification // Log verification
if (verification.decision !== 'PROCEED') { if (verification.decision !== 'PROCEED') {
logger.warn('Action verification flagged', { logger.warn('Action verification flagged', {
@ -488,6 +529,14 @@ class MetacognitiveVerifier {
originalConfidence: 0.3, originalConfidence: 0.3,
level: 'REQUIRE_REVIEW', level: 'REQUIRE_REVIEW',
description: 'Verification failed, requiring human review', description: 'Verification failed, requiring human review',
decision: 'BLOCK',
checks: {
alignment: { passed: false, score: 0, issues: ['verification error'] },
coherence: { passed: false, score: 0, issues: ['verification error'] },
completeness: { passed: false, score: 0, missing_considerations: ['verification error'] },
safety: { passed: false, score: 0, risk_level: 'HIGH', concerns: ['verification error'] },
alternatives: { passed: false, score: 0, issues: ['verification error'] }
},
scores: {}, scores: {},
criticalFailures: [{ criticalFailures: [{
dimension: 'ERROR', dimension: 'ERROR',
@ -503,7 +552,154 @@ class MetacognitiveVerifier {
message: 'Verification process encountered error', message: 'Verification process encountered error',
action: 'Require human review before proceeding' action: 'Require human review before proceeding'
}], }],
decision: 'REQUIRE_REVIEW', timestamp: new Date()
};
}
/**
* Get decision reason (exposed for tests)
*/
_getDecisionReason(decision, scores, criticalFailures) {
if (decision === 'BLOCK') {
return 'Critical failures detected: ' + criticalFailures.map(cf => cf.dimension).join(', ');
}
if (decision === 'REQUEST_CLARIFICATION') {
return 'Low confidence in alignment or completeness';
}
if (decision === 'REQUEST_CONFIRMATION') {
return 'Moderate confidence, user confirmation recommended';
}
return 'Proceeding with high confidence';
}
/**
* Generate suggestions for improvement (exposed for tests)
*/
_generateSuggestions(scores, criticalFailures) {
const suggestions = [];
if (scores.alignment && scores.alignment.score < 0.7) {
suggestions.push('Clarify how this action aligns with user goals');
}
if (scores.coherence && scores.coherence.score < 0.7) {
suggestions.push('Review reasoning for logical consistency');
}
if (scores.completeness && scores.completeness.score < 0.8) {
suggestions.push('Ensure all requirements are addressed');
}
if (scores.safety && scores.safety.score < 0.9) {
suggestions.push('Verify safety implications of this action');
}
if (scores.alternatives && scores.alternatives.score < 0.6) {
suggestions.push('Consider alternative approaches');
}
return suggestions;
}
/**
* Assess evidence quality (exposed for tests)
*/
_assessEvidenceQuality(reasoning) {
if (!reasoning || !reasoning.evidence) return 0.0;
const evidence = reasoning.evidence;
if (!Array.isArray(evidence) || evidence.length === 0) return 0.0;
let qualityScore = 0;
// Check for explicit user instructions
const hasExplicit = evidence.some(e =>
typeof e === 'string' && /user\s+(explicitly|specifically|said|requested|instructed)/i.test(e)
);
if (hasExplicit) qualityScore += 0.4;
// Check for documentation references
const hasDocs = evidence.some(e =>
typeof e === 'string' && /documentation|docs|spec|standard/i.test(e)
);
if (hasDocs) qualityScore += 0.3;
// Check for testing/validation
const hasValidation = evidence.some(e =>
typeof e === 'string' && /test|validate|verify|confirm/i.test(e)
);
if (hasValidation) qualityScore += 0.3;
// Penalize weak evidence
const hasWeak = evidence.some(e =>
typeof e === 'string' && /think|maybe|probably|assume/i.test(e)
);
if (hasWeak) qualityScore -= 0.3;
return Math.max(0, Math.min(1, qualityScore));
}
/**
* Assess reasoning quality (exposed for tests)
*/
_assessReasoningQuality(reasoning) {
if (!reasoning) return 0.0;
let score = 0;
// Check explanation quality
if (reasoning.explanation) {
const length = reasoning.explanation.length;
if (length > 100) score += 0.3;
else if (length > 50) score += 0.1;
}
// Check evidence
const evidenceScore = this._assessEvidenceQuality(reasoning);
score += evidenceScore * 0.4;
// Check steps
if (reasoning.steps && Array.isArray(reasoning.steps) && reasoning.steps.length > 0) {
score += Math.min(0.3, reasoning.steps.length * 0.1);
}
// Check alternatives
if (reasoning.alternatives_considered && reasoning.alternatives_considered.length > 0) {
score += 0.2;
}
return Math.min(1.0, score);
}
/**
* Make verification decision (exposed for tests)
*/
_makeDecision(confidence, context) {
const pressureLevel = context.pressure_level || 'NORMAL';
// Block at dangerous pressure regardless of confidence
if (pressureLevel === 'DANGEROUS') {
return { decision: 'BLOCK', requires_confirmation: true };
}
// Adjust thresholds based on pressure
const proceedThreshold = pressureLevel === 'CRITICAL' ? 0.8 : 0.7;
const confirmThreshold = pressureLevel === 'HIGH' ? 0.6 : 0.5;
if (confidence >= proceedThreshold) {
return { decision: 'PROCEED', requires_confirmation: false };
} else if (confidence >= confirmThreshold) {
return { decision: 'REQUEST_CONFIRMATION', requires_confirmation: true };
} else if (confidence >= 0.3) {
return { decision: 'REQUEST_CLARIFICATION', requires_confirmation: true };
} else {
return { decision: 'BLOCK', requires_confirmation: true };
}
}
/**
* Get verification statistics
* @returns {Object} Statistics object
*/
getStats() {
return {
...this.stats,
timestamp: new Date() timestamp: new Date()
}; };
} }