feat: Add performance degradation detection to context pressure monitoring
Implements 5-metric weighted degradation score to detect performance issues: - Error patterns (30%): Consecutive errors, clustering, severity - Framework fade (25%): Component staleness detection - Context quality (20%): Post-compaction degradation, session age - Behavioral indicators (15%): Tool retry patterns - Task completion (10%): Recent error rate Degradation levels: LOW (<20%), MODERATE (20-40%), HIGH (40-60%), CRITICAL (60%+) Displayed in 'ffs' command output with breakdown and recommendations. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
parent
4833ee1ff9
commit
b5d17f9dbc
4 changed files with 829 additions and 5 deletions
333
docs/plans/DEGRADATION_SCORE_IMPLEMENTATION.md
Normal file
333
docs/plans/DEGRADATION_SCORE_IMPLEMENTATION.md
Normal file
|
|
@ -0,0 +1,333 @@
|
|||
# Degradation Score Implementation Plan
|
||||
|
||||
**Problem**: Pressure gauge showed 3% but performance severely degraded
|
||||
**Root Cause**: Missing behavioral/quality metrics
|
||||
**Framework Audit**: 690964aa9eac658bf5f14cb4
|
||||
|
||||
---
|
||||
|
||||
## Missing Metrics Identified
|
||||
|
||||
### 1. ERROR PATTERN ANALYSIS (30% weight)
|
||||
- **Consecutive errors**: Track errors in sequence
|
||||
- **Error clustering**: Detect error bursts (3+ in 10-minute window)
|
||||
- **Error severity**: Weight by impact (blocked=3, warning=1)
|
||||
- **Repeated failures**: Same tool/operation failing multiple times
|
||||
|
||||
### 2. FRAMEWORK FADE (25% weight)
|
||||
- **Component staleness**: Time since MetacognitiveVerifier last used
|
||||
- **BoundaryEnforcer usage**: Should be invoked for values decisions
|
||||
- **Framework invocation rate**: Declining usage = fade
|
||||
|
||||
### 3. CONTEXT QUALITY (20% weight)
|
||||
- **Post-compaction flag**: Session continued after compaction = quality loss
|
||||
- **Knowledge domain shift**: Sudden change in task types
|
||||
- **Session age**: Very long sessions = accumulated drift
|
||||
|
||||
### 4. BEHAVIORAL INDICATORS (15% weight)
|
||||
- **Tool retry rate**: Same tool called 3+ times consecutively
|
||||
- **Read without action**: Files read but not edited/used
|
||||
- **Deployment thrashing**: Multiple restarts in short period
|
||||
|
||||
### 5. TASK COMPLETION (10% weight)
|
||||
- **Time since last success**: How long since error-free completion
|
||||
- **Success rate trend**: Declining completion rate
|
||||
|
||||
---
|
||||
|
||||
## Implementation
|
||||
|
||||
### File: `scripts/framework-components/ContextPressureMonitor.js`
|
||||
|
||||
Add new method:
|
||||
|
||||
```javascript
|
||||
/**
|
||||
* Calculate degradation score (0-100)
|
||||
* Combines behavioral and quality metrics
|
||||
*/
|
||||
async calculateDegradationScore() {
|
||||
const scores = {
|
||||
errorPattern: await this._analyzeErrorPatterns(), // 30%
|
||||
frameworkFade: await this._detectFrameworkFade(), // 25%
|
||||
contextQuality: await this._assessContextQuality(), // 20%
|
||||
behavioral: await this._analyzeBehavior(), // 15%
|
||||
taskCompletion: await this._measureTaskCompletion() // 10%
|
||||
};
|
||||
|
||||
const degradationScore =
|
||||
scores.errorPattern * 0.30 +
|
||||
scores.frameworkFade * 0.25 +
|
||||
scores.contextQuality * 0.20 +
|
||||
scores.behavioral * 0.15 +
|
||||
scores.taskCompletion * 0.10;
|
||||
|
||||
return {
|
||||
score: Math.round(degradationScore),
|
||||
level: this._getDegradationLevel(degradationScore),
|
||||
breakdown: scores,
|
||||
recommendation: this._getRecommendation(degradationScore)
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Analyze error patterns (returns 0-100)
|
||||
*/
|
||||
async _analyzeErrorPatterns() {
|
||||
const recentErrors = await this.memoryProxy.getRecentAuditLogs({
|
||||
limit: 50,
|
||||
filter: { hasError: true }
|
||||
});
|
||||
|
||||
// Consecutive errors
|
||||
let consecutive = 0;
|
||||
let maxConsecutive = 0;
|
||||
let currentStreak = 0;
|
||||
|
||||
recentErrors.forEach((e, i) => {
|
||||
if (e.decision?.blocked || e.decision?.errors) {
|
||||
currentStreak++;
|
||||
maxConsecutive = Math.max(maxConsecutive, currentStreak);
|
||||
} else {
|
||||
currentStreak = 0;
|
||||
}
|
||||
});
|
||||
|
||||
// Error clustering (3+ errors in 10-minute windows)
|
||||
const errorClusters = this._detectErrorClusters(recentErrors, 10 * 60 * 1000);
|
||||
|
||||
// Error severity weighting
|
||||
const severityScore = recentErrors.reduce((sum, e) => {
|
||||
if (e.decision?.blocked) return sum + 3;
|
||||
if (e.decision?.errors) return sum + 1;
|
||||
return sum;
|
||||
}, 0);
|
||||
|
||||
// Combine metrics
|
||||
const consecutiveScore = Math.min(maxConsecutive * 10, 100);
|
||||
const clusterScore = Math.min(errorClusters.length * 15, 100);
|
||||
const severityScoreNormalized = Math.min(severityScore * 2, 100);
|
||||
|
||||
return Math.round((consecutiveScore + clusterScore + severityScoreNormalized) / 3);
|
||||
}
|
||||
|
||||
/**
|
||||
* Detect framework fade (returns 0-100)
|
||||
*/
|
||||
async _detectFrameworkFade() {
|
||||
const criticalComponents = [
|
||||
'MetacognitiveVerifier',
|
||||
'BoundaryEnforcer',
|
||||
'PluralisticDeliberationOrchestrator'
|
||||
];
|
||||
|
||||
const componentActivity = await Promise.all(
|
||||
criticalComponents.map(async (service) => {
|
||||
const logs = await this.memoryProxy.getRecentAuditLogs({
|
||||
limit: 1,
|
||||
filter: { service }
|
||||
});
|
||||
|
||||
if (logs.length === 0) return { service, ageMinutes: Infinity };
|
||||
|
||||
const age = (Date.now() - logs[0].timestamp) / 1000 / 60;
|
||||
return { service, ageMinutes: age };
|
||||
})
|
||||
);
|
||||
|
||||
// Score: minutes since last use
|
||||
// 0-30 min = 0 points
|
||||
// 30-60 min = 50 points
|
||||
// 60+ min = 100 points
|
||||
const scores = componentActivity.map(c => {
|
||||
if (c.ageMinutes === Infinity) return 100;
|
||||
if (c.ageMinutes < 30) return 0;
|
||||
if (c.ageMinutes < 60) return 50;
|
||||
return 100;
|
||||
});
|
||||
|
||||
return Math.round(scores.reduce((a, b) => a + b, 0) / scores.length);
|
||||
}
|
||||
|
||||
/**
|
||||
* Assess context quality (returns 0-100)
|
||||
*/
|
||||
async _assessContextQuality() {
|
||||
const session = await this.memoryProxy.getSessionState();
|
||||
|
||||
let score = 0;
|
||||
|
||||
// Post-compaction flag (major degradation indicator)
|
||||
if (session.autoCompactions && session.autoCompactions.length > 0) {
|
||||
const lastCompaction = session.autoCompactions[session.autoCompactions.length - 1];
|
||||
const timeSinceCompaction = (Date.now() - lastCompaction.timestamp) / 1000 / 60;
|
||||
|
||||
// Within 60 minutes of compaction = high risk
|
||||
if (timeSinceCompaction < 60) {
|
||||
score += 60;
|
||||
} else if (timeSinceCompaction < 120) {
|
||||
score += 30;
|
||||
}
|
||||
}
|
||||
|
||||
// Session age (very long sessions accumulate drift)
|
||||
const sessionAge = (Date.now() - session.startTime) / 1000 / 60 / 60; // hours
|
||||
if (sessionAge > 6) score += 40;
|
||||
else if (sessionAge > 4) score += 20;
|
||||
|
||||
return Math.min(score, 100);
|
||||
}
|
||||
|
||||
/**
|
||||
* Analyze behavioral indicators (returns 0-100)
|
||||
*/
|
||||
async _analyzeBehavior() {
|
||||
const recentActions = await this.memoryProxy.getRecentAuditLogs({ limit: 50 });
|
||||
|
||||
// Tool retry rate
|
||||
const toolCalls = recentActions.map(a => a.metadata?.tool);
|
||||
let retries = 0;
|
||||
for (let i = 2; i < toolCalls.length; i++) {
|
||||
if (toolCalls[i] === toolCalls[i-1] && toolCalls[i] === toolCalls[i-2]) {
|
||||
retries++;
|
||||
}
|
||||
}
|
||||
|
||||
const retryScore = Math.min(retries * 20, 100);
|
||||
return retryScore;
|
||||
}
|
||||
|
||||
/**
|
||||
* Measure task completion (returns 0-100)
|
||||
*/
|
||||
async _measureTaskCompletion() {
|
||||
const recentErrors = await this.memoryProxy.getRecentAuditLogs({
|
||||
limit: 20,
|
||||
filter: { hasError: true }
|
||||
});
|
||||
|
||||
// Simple metric: error rate in last 20 actions
|
||||
const errorRate = (recentErrors.length / 20) * 100;
|
||||
return Math.round(errorRate);
|
||||
}
|
||||
|
||||
/**
|
||||
* Get degradation level
|
||||
*/
|
||||
_getDegradationLevel(score) {
|
||||
if (score >= 60) return 'CRITICAL';
|
||||
if (score >= 40) return 'HIGH';
|
||||
if (score >= 20) return 'MODERATE';
|
||||
return 'LOW';
|
||||
}
|
||||
|
||||
/**
|
||||
* Get recommendation
|
||||
*/
|
||||
_getRecommendation(score) {
|
||||
if (score >= 60) {
|
||||
return 'RECOMMEND SESSION RESTART - Quality severely degraded';
|
||||
}
|
||||
if (score >= 40) {
|
||||
return 'WARN USER - Performance declining, consider checkpoint review';
|
||||
}
|
||||
return 'Monitoring - No action needed';
|
||||
}
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Integration Points
|
||||
|
||||
### 1. Add to Pressure Analysis
|
||||
|
||||
Modify `analyzeContextPressure()` to include degradationScore:
|
||||
|
||||
```javascript
|
||||
async analyzeContextPressure(tokenCount = null, tokenBudget = 200000) {
|
||||
// ... existing metrics ...
|
||||
|
||||
const degradation = await this.calculateDegradationScore();
|
||||
|
||||
return {
|
||||
level: this._determineLevel(overallScore),
|
||||
score: overallScore,
|
||||
degradation: degradation.score,
|
||||
degradationLevel: degradation.level,
|
||||
degradationBreakdown: degradation.breakdown,
|
||||
recommendation: degradation.recommendation,
|
||||
// ... rest of response
|
||||
};
|
||||
}
|
||||
```
|
||||
|
||||
### 2. Token Checkpoint Reporting
|
||||
|
||||
Update checkpoint messages to include degradation:
|
||||
|
||||
```
|
||||
📊 Context Pressure: NORMAL (4%) | Degradation: HIGH (45%) | Tokens: 50000/200000
|
||||
⚠️ WARNING: Framework fade detected - MetacognitiveVerifier unused for 45 minutes
|
||||
```
|
||||
|
||||
### 3. Framework Stats (ffs)
|
||||
|
||||
Add degradation section to `scripts/framework-stats.js`:
|
||||
|
||||
```
|
||||
⚠️ DEGRADATION ANALYSIS
|
||||
Score: 45%
|
||||
Level: HIGH
|
||||
Breakdown:
|
||||
• Error patterns: 30%
|
||||
• Framework fade: 60% ← CRITICAL
|
||||
• Context quality: 40%
|
||||
• Behavioral: 20%
|
||||
• Task completion: 15%
|
||||
Recommendation: Consider checkpoint review
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Testing
|
||||
|
||||
### Test Case 1: Framework Fade Detection
|
||||
- Session runs for 2 hours without MetacognitiveVerifier
|
||||
- Degradation score should be HIGH (40%+)
|
||||
|
||||
### Test Case 2: Post-Compaction
|
||||
- Session continues after compaction
|
||||
- Context quality score should be 60+
|
||||
- Overall degradation should be HIGH
|
||||
|
||||
### Test Case 3: Error Clustering
|
||||
- 5 consecutive errors occur
|
||||
- Error pattern score should be 50+
|
||||
- User should see warning
|
||||
|
||||
---
|
||||
|
||||
## Implementation Steps
|
||||
|
||||
1. **Add degradation methods** to ContextPressureMonitor.js
|
||||
2. **Update analyzeContextPressure()** to calculate degradation
|
||||
3. **Modify checkpoint reporting** to show degradation
|
||||
4. **Update framework-stats.js** to display breakdown
|
||||
5. **Test with real session data**
|
||||
6. **Document in CLAUDE_Tractatus_Maintenance_Guide.md**
|
||||
|
||||
---
|
||||
|
||||
## Success Criteria
|
||||
|
||||
- ✅ Degradation score catches "random" performance drops
|
||||
- ✅ Framework fade detected within 30 minutes
|
||||
- ✅ Post-compaction quality loss flagged immediately
|
||||
- ✅ User warned before performance becomes unacceptable
|
||||
- ✅ False positive rate < 5%
|
||||
|
||||
---
|
||||
|
||||
**Estimated Implementation Time**: 4-6 hours
|
||||
**Priority**: HIGH (governance integrity issue)
|
||||
**Framework Audit ID**: 690964aa9eac658bf5f14cb4
|
||||
150
scripts/analyze-degradation.js
Normal file
150
scripts/analyze-degradation.js
Normal file
|
|
@ -0,0 +1,150 @@
|
|||
/**
|
||||
* Analyze Performance Degradation Patterns
|
||||
* Identifies missing metrics in pressure gauge
|
||||
*/
|
||||
|
||||
const { MongoClient } = require('mongodb');
|
||||
|
||||
async function analyzeDegradation() {
|
||||
const client = new MongoClient('mongodb://localhost:27017');
|
||||
await client.connect();
|
||||
const db = client.db('tractatus_dev');
|
||||
|
||||
const today = new Date('2025-11-04T00:00:00Z');
|
||||
|
||||
console.log('\n=== SESSION DEGRADATION ANALYSIS ===\n');
|
||||
|
||||
// 1. Error accumulation pattern (today)
|
||||
const errors = await db.collection('framework_audit').find({
|
||||
timestamp: { $gte: today },
|
||||
$or: [
|
||||
{ 'decision.blocked': true },
|
||||
{ 'decision.errors': { $exists: true } }
|
||||
]
|
||||
}).toArray();
|
||||
|
||||
console.log(`Total Errors Today: ${errors.length}`);
|
||||
|
||||
// Group by hour
|
||||
const errorsByHour = {};
|
||||
errors.forEach(e => {
|
||||
const hour = new Date(e.timestamp).getHours();
|
||||
errorsByHour[hour] = (errorsByHour[hour] || 0) + 1;
|
||||
});
|
||||
|
||||
console.log('\nErrors by Hour:');
|
||||
Object.entries(errorsByHour).forEach(([hour, count]) => {
|
||||
console.log(` ${hour}:00 - ${count} errors`);
|
||||
});
|
||||
|
||||
// 2. Consecutive error detection
|
||||
const recentAudits = await db.collection('framework_audit')
|
||||
.find({ timestamp: { $gte: today } })
|
||||
.sort({ timestamp: 1 })
|
||||
.limit(100)
|
||||
.toArray();
|
||||
|
||||
let consecutiveErrors = 0;
|
||||
let maxConsecutive = 0;
|
||||
let errorClusters = [];
|
||||
let currentCluster = [];
|
||||
|
||||
recentAudits.forEach((a, i) => {
|
||||
const hasError = a.decision?.blocked || a.decision?.errors;
|
||||
if (hasError) {
|
||||
consecutiveErrors++;
|
||||
currentCluster.push({ service: a.service, time: a.timestamp });
|
||||
maxConsecutive = Math.max(maxConsecutive, consecutiveErrors);
|
||||
} else {
|
||||
if (currentCluster.length > 2) {
|
||||
errorClusters.push([...currentCluster]);
|
||||
}
|
||||
currentCluster = [];
|
||||
consecutiveErrors = 0;
|
||||
}
|
||||
});
|
||||
|
||||
console.log(`\nConsecutive Error Patterns:`);
|
||||
console.log(` Max consecutive errors: ${maxConsecutive}`);
|
||||
console.log(` Error clusters (3+ errors): ${errorClusters.length}`);
|
||||
|
||||
// 3. Framework component activity
|
||||
const componentActivity = await db.collection('framework_audit').aggregate([
|
||||
{ $match: { timestamp: { $gte: today } } },
|
||||
{
|
||||
$group: {
|
||||
_id: '$service',
|
||||
count: { $sum: 1 },
|
||||
lastActivity: { $max: '$timestamp' }
|
||||
}
|
||||
}
|
||||
]).toArray();
|
||||
|
||||
console.log('\nFramework Component Activity:');
|
||||
const allComponents = [
|
||||
'ContextPressureMonitor',
|
||||
'BoundaryEnforcer',
|
||||
'MetacognitiveVerifier',
|
||||
'CrossReferenceValidator',
|
||||
'InstructionPersistenceClassifier',
|
||||
'PluralisticDeliberationOrchestrator'
|
||||
];
|
||||
|
||||
allComponents.forEach(comp => {
|
||||
const activity = componentActivity.find(c => c._id === comp);
|
||||
if (activity) {
|
||||
const age = (new Date() - activity.lastActivity) / 1000 / 60;
|
||||
console.log(` ✓ ${comp}: ${activity.count} calls, ${age.toFixed(0)}m ago`);
|
||||
} else {
|
||||
console.log(` ✗ ${comp}: NEVER USED (framework fade!)`);
|
||||
}
|
||||
});
|
||||
|
||||
// 4. Session state analysis
|
||||
const session = await db.collection('session_state').findOne({ sessionId: '2025-10-07-001' });
|
||||
|
||||
console.log('\n=== CURRENT PRESSURE GAUGE ===');
|
||||
console.log(`Metrics tracked:`);
|
||||
console.log(` • tokenUsage: ${session?.contextPressure?.metrics?.tokenUsage?.percentage || 'N/A'}%`);
|
||||
console.log(` • conversationLength: ${session?.contextPressure?.metrics?.conversationLength?.value || 0}`);
|
||||
console.log(` • taskComplexity: ${session?.contextPressure?.metrics?.taskComplexity?.value || 0}`);
|
||||
console.log(` • errorFrequency: ${session?.contextPressure?.metrics?.errorFrequency?.raw || 0} (simple count)`);
|
||||
console.log(` • instructionDensity: ${session?.contextPressure?.metrics?.instructionDensity?.value || 0}`);
|
||||
console.log(`\nOverall Score: ${session?.contextPressure?.score || 0}%`);
|
||||
|
||||
console.log('\n=== MISSING DEGRADATION METRICS ===');
|
||||
console.log('\n1. ERROR PATTERN ANALYSIS:');
|
||||
console.log(' ✗ Consecutive error count (current: ' + maxConsecutive + ')');
|
||||
console.log(' ✗ Error clustering detection');
|
||||
console.log(' ✗ Repeated failures on similar tasks');
|
||||
console.log(' ✗ Error severity weighting');
|
||||
|
||||
console.log('\n2. FRAMEWORK HEALTH:');
|
||||
console.log(' ✗ Framework component fade detection');
|
||||
console.log(' ✗ MetacognitiveVerifier usage frequency');
|
||||
console.log(' ✗ Time since last successful task completion');
|
||||
|
||||
console.log('\n3. CONTEXT QUALITY:');
|
||||
console.log(' ✗ Post-compaction context degradation');
|
||||
console.log(' ✗ Knowledge domain shift detection');
|
||||
console.log(' ✗ Continued session after compaction flag');
|
||||
|
||||
console.log('\n4. BEHAVIORAL INDICATORS:');
|
||||
console.log(' ✗ Tool retry rate (same tool called multiple times)');
|
||||
console.log(' ✗ File read without subsequent action rate');
|
||||
console.log(' ✗ Deployment/restart frequency (thrashing)');
|
||||
|
||||
console.log('\n=== RECOMMENDATION ===');
|
||||
console.log('\nAdd new pressure metric: "degradationScore" that combines:');
|
||||
console.log(' 1. Consecutive errors (weight: 0.3)');
|
||||
console.log(' 2. Framework fade (weight: 0.25)');
|
||||
console.log(' 3. Error clustering (weight: 0.2)');
|
||||
console.log(' 4. Post-compaction flag (weight: 0.15)');
|
||||
console.log(' 5. Tool retry rate (weight: 0.1)');
|
||||
console.log('\nThreshold: degradationScore > 40% = WARN user');
|
||||
console.log('Threshold: degradationScore > 60% = RECOMMEND session restart');
|
||||
|
||||
await client.close();
|
||||
}
|
||||
|
||||
analyzeDegradation().catch(console.error);
|
||||
|
|
@ -186,7 +186,11 @@ async function main() {
|
|||
// Calculate real-time pressure if tokens provided
|
||||
let realTimePressure = null;
|
||||
if (currentTokens !== null && tokenBudget !== null) {
|
||||
realTimePressure = await ContextPressureMonitor.analyzePressure(currentTokens, tokenBudget, 1);
|
||||
realTimePressure = await ContextPressureMonitor.analyzePressure({
|
||||
tokenUsage: currentTokens,
|
||||
tokenBudget: tokenBudget,
|
||||
messageCount: 1
|
||||
});
|
||||
}
|
||||
|
||||
// Build report
|
||||
|
|
@ -347,6 +351,25 @@ async function main() {
|
|||
console.log();
|
||||
}
|
||||
|
||||
// Degradation Score (NEW)
|
||||
if (realTimePressure && realTimePressure.degradation !== undefined) {
|
||||
console.log('⚠️ DEGRADATION ANALYSIS');
|
||||
console.log(` Score: ${realTimePressure.degradation}%`);
|
||||
console.log(` Level: ${realTimePressure.degradationLevel}`);
|
||||
if (realTimePressure.degradationBreakdown) {
|
||||
console.log(' Breakdown:');
|
||||
console.log(` • Error patterns: ${realTimePressure.degradationBreakdown.errorPattern}%`);
|
||||
console.log(` • Framework fade: ${realTimePressure.degradationBreakdown.frameworkFade}%${realTimePressure.degradationBreakdown.frameworkFade >= 50 ? ' ← CRITICAL' : ''}`);
|
||||
console.log(` • Context quality: ${realTimePressure.degradationBreakdown.contextQuality}%`);
|
||||
console.log(` • Behavioral: ${realTimePressure.degradationBreakdown.behavioral}%`);
|
||||
console.log(` • Task completion: ${realTimePressure.degradationBreakdown.taskCompletion}%`);
|
||||
}
|
||||
if (realTimePressure.degradationRecommendation) {
|
||||
console.log(` Recommendation: ${realTimePressure.degradationRecommendation}`);
|
||||
}
|
||||
console.log();
|
||||
}
|
||||
|
||||
// Auto-Compact Events
|
||||
if (report.autoCompacts) {
|
||||
console.log('🔄 AUTO-COMPACT EVENTS');
|
||||
|
|
|
|||
|
|
@ -210,9 +210,9 @@ class ContextPressureMonitor {
|
|||
/**
|
||||
* Calculate current pressure level
|
||||
* @param {Object} context - Current conversation/session context
|
||||
* @returns {Object} Pressure analysis
|
||||
* @returns {Promise<Object>} Pressure analysis
|
||||
*/
|
||||
analyzePressure(context) {
|
||||
async analyzePressure(context) {
|
||||
try {
|
||||
// Calculate individual metric scores
|
||||
const metricScores = {
|
||||
|
|
@ -230,6 +230,18 @@ class ContextPressureMonitor {
|
|||
const pressureName = this._determinePressureLevel(overallPressure);
|
||||
const pressureLevel = this.pressureLevels[pressureName];
|
||||
|
||||
// Calculate degradation score (async)
|
||||
let degradation = null;
|
||||
try {
|
||||
if (this.memoryProxyInitialized) {
|
||||
degradation = await this.calculateDegradationScore();
|
||||
}
|
||||
} catch (degradationError) {
|
||||
logger.warn('[ContextPressureMonitor] Degradation calculation failed, continuing without it', {
|
||||
error: degradationError.message
|
||||
});
|
||||
}
|
||||
|
||||
// Generate recommendations
|
||||
const recommendations = this._generateRecommendations(
|
||||
pressureLevel,
|
||||
|
|
@ -261,6 +273,19 @@ class ContextPressureMonitor {
|
|||
timestamp: new Date()
|
||||
};
|
||||
|
||||
// Add degradation analysis if available
|
||||
if (degradation) {
|
||||
analysis.degradation = degradation.score;
|
||||
analysis.degradationLevel = degradation.level;
|
||||
analysis.degradationBreakdown = degradation.breakdown;
|
||||
analysis.degradationRecommendation = degradation.recommendation;
|
||||
|
||||
// Add degradation warnings to main warnings if serious
|
||||
if (degradation.level === 'CRITICAL' || degradation.level === 'HIGH') {
|
||||
analysis.warnings.push(degradation.recommendation);
|
||||
}
|
||||
}
|
||||
|
||||
// Track statistics
|
||||
this.stats.total_analyses++;
|
||||
this.stats.by_level[pressureName]++;
|
||||
|
|
@ -368,8 +393,8 @@ class ContextPressureMonitor {
|
|||
/**
|
||||
* Check if action should proceed given current pressure
|
||||
*/
|
||||
shouldProceed(action, context) {
|
||||
const analysis = this.analyzePressure(context);
|
||||
async shouldProceed(action, context) {
|
||||
const analysis = await this.analyzePressure(context);
|
||||
|
||||
if (analysis.pressureLevel >= PRESSURE_LEVELS.DANGEROUS.level) {
|
||||
return {
|
||||
|
|
@ -931,6 +956,299 @@ class ContextPressureMonitor {
|
|||
});
|
||||
}
|
||||
|
||||
/**
|
||||
* Calculate degradation score (0-100)
|
||||
* Combines behavioral and quality metrics to detect performance degradation
|
||||
* that may not be captured by standard pressure metrics.
|
||||
*
|
||||
* @returns {Promise<Object>} Degradation analysis
|
||||
*/
|
||||
async calculateDegradationScore() {
|
||||
try {
|
||||
const scores = {
|
||||
errorPattern: await this._analyzeErrorPatterns(), // 30%
|
||||
frameworkFade: await this._detectFrameworkFade(), // 25%
|
||||
contextQuality: await this._assessContextQuality(), // 20%
|
||||
behavioral: await this._analyzeBehavior(), // 15%
|
||||
taskCompletion: await this._measureTaskCompletion() // 10%
|
||||
};
|
||||
|
||||
const degradationScore =
|
||||
scores.errorPattern * 0.30 +
|
||||
scores.frameworkFade * 0.25 +
|
||||
scores.contextQuality * 0.20 +
|
||||
scores.behavioral * 0.15 +
|
||||
scores.taskCompletion * 0.10;
|
||||
|
||||
return {
|
||||
score: Math.round(degradationScore),
|
||||
level: this._getDegradationLevel(degradationScore),
|
||||
breakdown: scores,
|
||||
recommendation: this._getDegradationRecommendation(degradationScore)
|
||||
};
|
||||
} catch (error) {
|
||||
logger.error('[ContextPressureMonitor] Failed to calculate degradation score', {
|
||||
error: error.message
|
||||
});
|
||||
return {
|
||||
score: 0,
|
||||
level: 'LOW',
|
||||
breakdown: {},
|
||||
recommendation: 'Unable to calculate degradation score'
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Analyze error patterns (returns 0-100)
|
||||
* Detects consecutive errors, clustering, and severity weighting
|
||||
* @private
|
||||
*/
|
||||
async _analyzeErrorPatterns() {
|
||||
try {
|
||||
const recentErrors = await this.memoryProxy.getRecentAuditLogs({
|
||||
limit: 50,
|
||||
filter: { hasError: true }
|
||||
});
|
||||
|
||||
if (!recentErrors || recentErrors.length === 0) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
// Consecutive errors
|
||||
let maxConsecutive = 0;
|
||||
let currentStreak = 0;
|
||||
|
||||
recentErrors.forEach((e) => {
|
||||
if (e.decision?.blocked || e.decision?.errors) {
|
||||
currentStreak++;
|
||||
maxConsecutive = Math.max(maxConsecutive, currentStreak);
|
||||
} else {
|
||||
currentStreak = 0;
|
||||
}
|
||||
});
|
||||
|
||||
// Error clustering (3+ errors in 10-minute windows)
|
||||
const errorClusters = this._detectErrorClusters(recentErrors, 10 * 60 * 1000);
|
||||
|
||||
// Error severity weighting
|
||||
const severityScore = recentErrors.reduce((sum, e) => {
|
||||
if (e.decision?.blocked) return sum + 3;
|
||||
if (e.decision?.errors) return sum + 1;
|
||||
return sum;
|
||||
}, 0);
|
||||
|
||||
// Combine metrics
|
||||
const consecutiveScore = Math.min(maxConsecutive * 10, 100);
|
||||
const clusterScore = Math.min(errorClusters.length * 15, 100);
|
||||
const severityScoreNormalized = Math.min(severityScore * 2, 100);
|
||||
|
||||
return Math.round((consecutiveScore + clusterScore + severityScoreNormalized) / 3);
|
||||
} catch (error) {
|
||||
logger.error('[ContextPressureMonitor] Error pattern analysis failed', {
|
||||
error: error.message
|
||||
});
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Detect error clusters in time windows
|
||||
* @private
|
||||
*/
|
||||
_detectErrorClusters(errors, windowMs) {
|
||||
const clusters = [];
|
||||
const sortedErrors = [...errors].sort((a, b) =>
|
||||
new Date(a.timestamp) - new Date(b.timestamp)
|
||||
);
|
||||
|
||||
for (let i = 0; i < sortedErrors.length; i++) {
|
||||
const windowStart = new Date(sortedErrors[i].timestamp);
|
||||
const windowEnd = new Date(windowStart.getTime() + windowMs);
|
||||
|
||||
const errorsInWindow = sortedErrors.filter(e => {
|
||||
const errorTime = new Date(e.timestamp);
|
||||
return errorTime >= windowStart && errorTime <= windowEnd;
|
||||
});
|
||||
|
||||
if (errorsInWindow.length >= 3) {
|
||||
clusters.push({
|
||||
start: windowStart,
|
||||
end: windowEnd,
|
||||
count: errorsInWindow.length
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
return clusters;
|
||||
}
|
||||
|
||||
/**
|
||||
* Detect framework fade (returns 0-100)
|
||||
* Measures time since critical components were last used
|
||||
* @private
|
||||
*/
|
||||
async _detectFrameworkFade() {
|
||||
try {
|
||||
const criticalComponents = [
|
||||
'MetacognitiveVerifier',
|
||||
'BoundaryEnforcer',
|
||||
'PluralisticDeliberationOrchestrator'
|
||||
];
|
||||
|
||||
const componentActivity = await Promise.all(
|
||||
criticalComponents.map(async (service) => {
|
||||
const logs = await this.memoryProxy.getRecentAuditLogs({
|
||||
limit: 1,
|
||||
filter: { service }
|
||||
});
|
||||
|
||||
if (logs.length === 0) return { service, ageMinutes: Infinity };
|
||||
|
||||
const age = (Date.now() - new Date(logs[0].timestamp)) / 1000 / 60;
|
||||
return { service, ageMinutes: age };
|
||||
})
|
||||
);
|
||||
|
||||
// Score: minutes since last use
|
||||
// 0-30 min = 0 points
|
||||
// 30-60 min = 50 points
|
||||
// 60+ min = 100 points
|
||||
const scores = componentActivity.map(c => {
|
||||
if (c.ageMinutes === Infinity) return 100;
|
||||
if (c.ageMinutes < 30) return 0;
|
||||
if (c.ageMinutes < 60) return 50;
|
||||
return 100;
|
||||
});
|
||||
|
||||
return Math.round(scores.reduce((a, b) => a + b, 0) / scores.length);
|
||||
} catch (error) {
|
||||
logger.error('[ContextPressureMonitor] Framework fade detection failed', {
|
||||
error: error.message
|
||||
});
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Assess context quality (returns 0-100)
|
||||
* Detects post-compaction degradation and session age issues
|
||||
* @private
|
||||
*/
|
||||
async _assessContextQuality() {
|
||||
try {
|
||||
const session = this.sessionState || await this.memoryProxy.getSessionState();
|
||||
if (!session) return 0;
|
||||
|
||||
let score = 0;
|
||||
|
||||
// Post-compaction flag (major degradation indicator)
|
||||
if (session.autoCompactions && session.autoCompactions.length > 0) {
|
||||
const lastCompaction = session.autoCompactions[session.autoCompactions.length - 1];
|
||||
const timeSinceCompaction = (Date.now() - new Date(lastCompaction.timestamp)) / 1000 / 60;
|
||||
|
||||
// Within 60 minutes of compaction = high risk
|
||||
if (timeSinceCompaction < 60) {
|
||||
score += 60;
|
||||
} else if (timeSinceCompaction < 120) {
|
||||
score += 30;
|
||||
}
|
||||
}
|
||||
|
||||
// Session age (very long sessions accumulate drift)
|
||||
const sessionAge = (Date.now() - new Date(session.startTime)) / 1000 / 60 / 60; // hours
|
||||
if (sessionAge > 6) score += 40;
|
||||
else if (sessionAge > 4) score += 20;
|
||||
|
||||
return Math.min(score, 100);
|
||||
} catch (error) {
|
||||
logger.error('[ContextPressureMonitor] Context quality assessment failed', {
|
||||
error: error.message
|
||||
});
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Analyze behavioral indicators (returns 0-100)
|
||||
* Detects tool retry patterns and thrashing
|
||||
* @private
|
||||
*/
|
||||
async _analyzeBehavior() {
|
||||
try {
|
||||
const recentActions = await this.memoryProxy.getRecentAuditLogs({ limit: 50 });
|
||||
if (!recentActions || recentActions.length === 0) return 0;
|
||||
|
||||
// Tool retry rate
|
||||
const toolCalls = recentActions.map(a => a.metadata?.tool).filter(Boolean);
|
||||
let retries = 0;
|
||||
|
||||
for (let i = 2; i < toolCalls.length; i++) {
|
||||
if (toolCalls[i] === toolCalls[i-1] && toolCalls[i] === toolCalls[i-2]) {
|
||||
retries++;
|
||||
}
|
||||
}
|
||||
|
||||
const retryScore = Math.min(retries * 20, 100);
|
||||
return retryScore;
|
||||
} catch (error) {
|
||||
logger.error('[ContextPressureMonitor] Behavioral analysis failed', {
|
||||
error: error.message
|
||||
});
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Measure task completion (returns 0-100)
|
||||
* Simple error rate metric in recent actions
|
||||
* @private
|
||||
*/
|
||||
async _measureTaskCompletion() {
|
||||
try {
|
||||
const recentErrors = await this.memoryProxy.getRecentAuditLogs({
|
||||
limit: 20,
|
||||
filter: { hasError: true }
|
||||
});
|
||||
|
||||
if (!recentErrors) return 0;
|
||||
|
||||
// Simple metric: error rate in last 20 actions
|
||||
const errorRate = (recentErrors.length / 20) * 100;
|
||||
return Math.round(errorRate);
|
||||
} catch (error) {
|
||||
logger.error('[ContextPressureMonitor] Task completion measurement failed', {
|
||||
error: error.message
|
||||
});
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Get degradation level from score
|
||||
* @private
|
||||
*/
|
||||
_getDegradationLevel(score) {
|
||||
if (score >= 60) return 'CRITICAL';
|
||||
if (score >= 40) return 'HIGH';
|
||||
if (score >= 20) return 'MODERATE';
|
||||
return 'LOW';
|
||||
}
|
||||
|
||||
/**
|
||||
* Get degradation recommendation
|
||||
* @private
|
||||
*/
|
||||
_getDegradationRecommendation(score) {
|
||||
if (score >= 60) {
|
||||
return 'RECOMMEND SESSION RESTART - Quality severely degraded';
|
||||
}
|
||||
if (score >= 40) {
|
||||
return 'WARN USER - Performance declining, consider checkpoint review';
|
||||
}
|
||||
return 'Monitoring - No action needed';
|
||||
}
|
||||
|
||||
/**
|
||||
* Reset monitoring state
|
||||
*/
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue