feat: Add performance degradation detection to context pressure monitoring
Implements 5-metric weighted degradation score to detect performance issues: - Error patterns (30%): Consecutive errors, clustering, severity - Framework fade (25%): Component staleness detection - Context quality (20%): Post-compaction degradation, session age - Behavioral indicators (15%): Tool retry patterns - Task completion (10%): Recent error rate Degradation levels: LOW (<20%), MODERATE (20-40%), HIGH (40-60%), CRITICAL (60%+) Displayed in 'ffs' command output with breakdown and recommendations. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
parent
4833ee1ff9
commit
b5d17f9dbc
4 changed files with 829 additions and 5 deletions
333
docs/plans/DEGRADATION_SCORE_IMPLEMENTATION.md
Normal file
333
docs/plans/DEGRADATION_SCORE_IMPLEMENTATION.md
Normal file
|
|
@ -0,0 +1,333 @@
|
||||||
|
# Degradation Score Implementation Plan
|
||||||
|
|
||||||
|
**Problem**: Pressure gauge showed 3% but performance severely degraded
|
||||||
|
**Root Cause**: Missing behavioral/quality metrics
|
||||||
|
**Framework Audit**: 690964aa9eac658bf5f14cb4
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Missing Metrics Identified
|
||||||
|
|
||||||
|
### 1. ERROR PATTERN ANALYSIS (30% weight)
|
||||||
|
- **Consecutive errors**: Track errors in sequence
|
||||||
|
- **Error clustering**: Detect error bursts (3+ in 10-minute window)
|
||||||
|
- **Error severity**: Weight by impact (blocked=3, warning=1)
|
||||||
|
- **Repeated failures**: Same tool/operation failing multiple times
|
||||||
|
|
||||||
|
### 2. FRAMEWORK FADE (25% weight)
|
||||||
|
- **Component staleness**: Time since MetacognitiveVerifier last used
|
||||||
|
- **BoundaryEnforcer usage**: Should be invoked for values decisions
|
||||||
|
- **Framework invocation rate**: Declining usage = fade
|
||||||
|
|
||||||
|
### 3. CONTEXT QUALITY (20% weight)
|
||||||
|
- **Post-compaction flag**: Session continued after compaction = quality loss
|
||||||
|
- **Knowledge domain shift**: Sudden change in task types
|
||||||
|
- **Session age**: Very long sessions = accumulated drift
|
||||||
|
|
||||||
|
### 4. BEHAVIORAL INDICATORS (15% weight)
|
||||||
|
- **Tool retry rate**: Same tool called 3+ times consecutively
|
||||||
|
- **Read without action**: Files read but not edited/used
|
||||||
|
- **Deployment thrashing**: Multiple restarts in short period
|
||||||
|
|
||||||
|
### 5. TASK COMPLETION (10% weight)
|
||||||
|
- **Time since last success**: How long since error-free completion
|
||||||
|
- **Success rate trend**: Declining completion rate
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Implementation
|
||||||
|
|
||||||
|
### File: `scripts/framework-components/ContextPressureMonitor.js`
|
||||||
|
|
||||||
|
Add new method:
|
||||||
|
|
||||||
|
```javascript
|
||||||
|
/**
|
||||||
|
* Calculate degradation score (0-100)
|
||||||
|
* Combines behavioral and quality metrics
|
||||||
|
*/
|
||||||
|
async calculateDegradationScore() {
|
||||||
|
const scores = {
|
||||||
|
errorPattern: await this._analyzeErrorPatterns(), // 30%
|
||||||
|
frameworkFade: await this._detectFrameworkFade(), // 25%
|
||||||
|
contextQuality: await this._assessContextQuality(), // 20%
|
||||||
|
behavioral: await this._analyzeBehavior(), // 15%
|
||||||
|
taskCompletion: await this._measureTaskCompletion() // 10%
|
||||||
|
};
|
||||||
|
|
||||||
|
const degradationScore =
|
||||||
|
scores.errorPattern * 0.30 +
|
||||||
|
scores.frameworkFade * 0.25 +
|
||||||
|
scores.contextQuality * 0.20 +
|
||||||
|
scores.behavioral * 0.15 +
|
||||||
|
scores.taskCompletion * 0.10;
|
||||||
|
|
||||||
|
return {
|
||||||
|
score: Math.round(degradationScore),
|
||||||
|
level: this._getDegradationLevel(degradationScore),
|
||||||
|
breakdown: scores,
|
||||||
|
recommendation: this._getRecommendation(degradationScore)
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Analyze error patterns (returns 0-100)
|
||||||
|
*/
|
||||||
|
async _analyzeErrorPatterns() {
|
||||||
|
const recentErrors = await this.memoryProxy.getRecentAuditLogs({
|
||||||
|
limit: 50,
|
||||||
|
filter: { hasError: true }
|
||||||
|
});
|
||||||
|
|
||||||
|
// Consecutive errors
|
||||||
|
let consecutive = 0;
|
||||||
|
let maxConsecutive = 0;
|
||||||
|
let currentStreak = 0;
|
||||||
|
|
||||||
|
recentErrors.forEach((e, i) => {
|
||||||
|
if (e.decision?.blocked || e.decision?.errors) {
|
||||||
|
currentStreak++;
|
||||||
|
maxConsecutive = Math.max(maxConsecutive, currentStreak);
|
||||||
|
} else {
|
||||||
|
currentStreak = 0;
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
// Error clustering (3+ errors in 10-minute windows)
|
||||||
|
const errorClusters = this._detectErrorClusters(recentErrors, 10 * 60 * 1000);
|
||||||
|
|
||||||
|
// Error severity weighting
|
||||||
|
const severityScore = recentErrors.reduce((sum, e) => {
|
||||||
|
if (e.decision?.blocked) return sum + 3;
|
||||||
|
if (e.decision?.errors) return sum + 1;
|
||||||
|
return sum;
|
||||||
|
}, 0);
|
||||||
|
|
||||||
|
// Combine metrics
|
||||||
|
const consecutiveScore = Math.min(maxConsecutive * 10, 100);
|
||||||
|
const clusterScore = Math.min(errorClusters.length * 15, 100);
|
||||||
|
const severityScoreNormalized = Math.min(severityScore * 2, 100);
|
||||||
|
|
||||||
|
return Math.round((consecutiveScore + clusterScore + severityScoreNormalized) / 3);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Detect framework fade (returns 0-100)
|
||||||
|
*/
|
||||||
|
async _detectFrameworkFade() {
|
||||||
|
const criticalComponents = [
|
||||||
|
'MetacognitiveVerifier',
|
||||||
|
'BoundaryEnforcer',
|
||||||
|
'PluralisticDeliberationOrchestrator'
|
||||||
|
];
|
||||||
|
|
||||||
|
const componentActivity = await Promise.all(
|
||||||
|
criticalComponents.map(async (service) => {
|
||||||
|
const logs = await this.memoryProxy.getRecentAuditLogs({
|
||||||
|
limit: 1,
|
||||||
|
filter: { service }
|
||||||
|
});
|
||||||
|
|
||||||
|
if (logs.length === 0) return { service, ageMinutes: Infinity };
|
||||||
|
|
||||||
|
const age = (Date.now() - logs[0].timestamp) / 1000 / 60;
|
||||||
|
return { service, ageMinutes: age };
|
||||||
|
})
|
||||||
|
);
|
||||||
|
|
||||||
|
// Score: minutes since last use
|
||||||
|
// 0-30 min = 0 points
|
||||||
|
// 30-60 min = 50 points
|
||||||
|
// 60+ min = 100 points
|
||||||
|
const scores = componentActivity.map(c => {
|
||||||
|
if (c.ageMinutes === Infinity) return 100;
|
||||||
|
if (c.ageMinutes < 30) return 0;
|
||||||
|
if (c.ageMinutes < 60) return 50;
|
||||||
|
return 100;
|
||||||
|
});
|
||||||
|
|
||||||
|
return Math.round(scores.reduce((a, b) => a + b, 0) / scores.length);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Assess context quality (returns 0-100)
|
||||||
|
*/
|
||||||
|
async _assessContextQuality() {
|
||||||
|
const session = await this.memoryProxy.getSessionState();
|
||||||
|
|
||||||
|
let score = 0;
|
||||||
|
|
||||||
|
// Post-compaction flag (major degradation indicator)
|
||||||
|
if (session.autoCompactions && session.autoCompactions.length > 0) {
|
||||||
|
const lastCompaction = session.autoCompactions[session.autoCompactions.length - 1];
|
||||||
|
const timeSinceCompaction = (Date.now() - lastCompaction.timestamp) / 1000 / 60;
|
||||||
|
|
||||||
|
// Within 60 minutes of compaction = high risk
|
||||||
|
if (timeSinceCompaction < 60) {
|
||||||
|
score += 60;
|
||||||
|
} else if (timeSinceCompaction < 120) {
|
||||||
|
score += 30;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Session age (very long sessions accumulate drift)
|
||||||
|
const sessionAge = (Date.now() - session.startTime) / 1000 / 60 / 60; // hours
|
||||||
|
if (sessionAge > 6) score += 40;
|
||||||
|
else if (sessionAge > 4) score += 20;
|
||||||
|
|
||||||
|
return Math.min(score, 100);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Analyze behavioral indicators (returns 0-100)
|
||||||
|
*/
|
||||||
|
async _analyzeBehavior() {
|
||||||
|
const recentActions = await this.memoryProxy.getRecentAuditLogs({ limit: 50 });
|
||||||
|
|
||||||
|
// Tool retry rate
|
||||||
|
const toolCalls = recentActions.map(a => a.metadata?.tool);
|
||||||
|
let retries = 0;
|
||||||
|
for (let i = 2; i < toolCalls.length; i++) {
|
||||||
|
if (toolCalls[i] === toolCalls[i-1] && toolCalls[i] === toolCalls[i-2]) {
|
||||||
|
retries++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
const retryScore = Math.min(retries * 20, 100);
|
||||||
|
return retryScore;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Measure task completion (returns 0-100)
|
||||||
|
*/
|
||||||
|
async _measureTaskCompletion() {
|
||||||
|
const recentErrors = await this.memoryProxy.getRecentAuditLogs({
|
||||||
|
limit: 20,
|
||||||
|
filter: { hasError: true }
|
||||||
|
});
|
||||||
|
|
||||||
|
// Simple metric: error rate in last 20 actions
|
||||||
|
const errorRate = (recentErrors.length / 20) * 100;
|
||||||
|
return Math.round(errorRate);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Get degradation level
|
||||||
|
*/
|
||||||
|
_getDegradationLevel(score) {
|
||||||
|
if (score >= 60) return 'CRITICAL';
|
||||||
|
if (score >= 40) return 'HIGH';
|
||||||
|
if (score >= 20) return 'MODERATE';
|
||||||
|
return 'LOW';
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Get recommendation
|
||||||
|
*/
|
||||||
|
_getRecommendation(score) {
|
||||||
|
if (score >= 60) {
|
||||||
|
return 'RECOMMEND SESSION RESTART - Quality severely degraded';
|
||||||
|
}
|
||||||
|
if (score >= 40) {
|
||||||
|
return 'WARN USER - Performance declining, consider checkpoint review';
|
||||||
|
}
|
||||||
|
return 'Monitoring - No action needed';
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Integration Points
|
||||||
|
|
||||||
|
### 1. Add to Pressure Analysis
|
||||||
|
|
||||||
|
Modify `analyzeContextPressure()` to include degradationScore:
|
||||||
|
|
||||||
|
```javascript
|
||||||
|
async analyzeContextPressure(tokenCount = null, tokenBudget = 200000) {
|
||||||
|
// ... existing metrics ...
|
||||||
|
|
||||||
|
const degradation = await this.calculateDegradationScore();
|
||||||
|
|
||||||
|
return {
|
||||||
|
level: this._determineLevel(overallScore),
|
||||||
|
score: overallScore,
|
||||||
|
degradation: degradation.score,
|
||||||
|
degradationLevel: degradation.level,
|
||||||
|
degradationBreakdown: degradation.breakdown,
|
||||||
|
recommendation: degradation.recommendation,
|
||||||
|
// ... rest of response
|
||||||
|
};
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
### 2. Token Checkpoint Reporting
|
||||||
|
|
||||||
|
Update checkpoint messages to include degradation:
|
||||||
|
|
||||||
|
```
|
||||||
|
📊 Context Pressure: NORMAL (4%) | Degradation: HIGH (45%) | Tokens: 50000/200000
|
||||||
|
⚠️ WARNING: Framework fade detected - MetacognitiveVerifier unused for 45 minutes
|
||||||
|
```
|
||||||
|
|
||||||
|
### 3. Framework Stats (ffs)
|
||||||
|
|
||||||
|
Add degradation section to `scripts/framework-stats.js`:
|
||||||
|
|
||||||
|
```
|
||||||
|
⚠️ DEGRADATION ANALYSIS
|
||||||
|
Score: 45%
|
||||||
|
Level: HIGH
|
||||||
|
Breakdown:
|
||||||
|
• Error patterns: 30%
|
||||||
|
• Framework fade: 60% ← CRITICAL
|
||||||
|
• Context quality: 40%
|
||||||
|
• Behavioral: 20%
|
||||||
|
• Task completion: 15%
|
||||||
|
Recommendation: Consider checkpoint review
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Testing
|
||||||
|
|
||||||
|
### Test Case 1: Framework Fade Detection
|
||||||
|
- Session runs for 2 hours without MetacognitiveVerifier
|
||||||
|
- Degradation score should be HIGH (40%+)
|
||||||
|
|
||||||
|
### Test Case 2: Post-Compaction
|
||||||
|
- Session continues after compaction
|
||||||
|
- Context quality score should be 60+
|
||||||
|
- Overall degradation should be HIGH
|
||||||
|
|
||||||
|
### Test Case 3: Error Clustering
|
||||||
|
- 5 consecutive errors occur
|
||||||
|
- Error pattern score should be 50+
|
||||||
|
- User should see warning
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Implementation Steps
|
||||||
|
|
||||||
|
1. **Add degradation methods** to ContextPressureMonitor.js
|
||||||
|
2. **Update analyzeContextPressure()** to calculate degradation
|
||||||
|
3. **Modify checkpoint reporting** to show degradation
|
||||||
|
4. **Update framework-stats.js** to display breakdown
|
||||||
|
5. **Test with real session data**
|
||||||
|
6. **Document in CLAUDE_Tractatus_Maintenance_Guide.md**
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Success Criteria
|
||||||
|
|
||||||
|
- ✅ Degradation score catches "random" performance drops
|
||||||
|
- ✅ Framework fade detected within 30 minutes
|
||||||
|
- ✅ Post-compaction quality loss flagged immediately
|
||||||
|
- ✅ User warned before performance becomes unacceptable
|
||||||
|
- ✅ False positive rate < 5%
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
**Estimated Implementation Time**: 4-6 hours
|
||||||
|
**Priority**: HIGH (governance integrity issue)
|
||||||
|
**Framework Audit ID**: 690964aa9eac658bf5f14cb4
|
||||||
150
scripts/analyze-degradation.js
Normal file
150
scripts/analyze-degradation.js
Normal file
|
|
@ -0,0 +1,150 @@
|
||||||
|
/**
|
||||||
|
* Analyze Performance Degradation Patterns
|
||||||
|
* Identifies missing metrics in pressure gauge
|
||||||
|
*/
|
||||||
|
|
||||||
|
const { MongoClient } = require('mongodb');
|
||||||
|
|
||||||
|
async function analyzeDegradation() {
|
||||||
|
const client = new MongoClient('mongodb://localhost:27017');
|
||||||
|
await client.connect();
|
||||||
|
const db = client.db('tractatus_dev');
|
||||||
|
|
||||||
|
const today = new Date('2025-11-04T00:00:00Z');
|
||||||
|
|
||||||
|
console.log('\n=== SESSION DEGRADATION ANALYSIS ===\n');
|
||||||
|
|
||||||
|
// 1. Error accumulation pattern (today)
|
||||||
|
const errors = await db.collection('framework_audit').find({
|
||||||
|
timestamp: { $gte: today },
|
||||||
|
$or: [
|
||||||
|
{ 'decision.blocked': true },
|
||||||
|
{ 'decision.errors': { $exists: true } }
|
||||||
|
]
|
||||||
|
}).toArray();
|
||||||
|
|
||||||
|
console.log(`Total Errors Today: ${errors.length}`);
|
||||||
|
|
||||||
|
// Group by hour
|
||||||
|
const errorsByHour = {};
|
||||||
|
errors.forEach(e => {
|
||||||
|
const hour = new Date(e.timestamp).getHours();
|
||||||
|
errorsByHour[hour] = (errorsByHour[hour] || 0) + 1;
|
||||||
|
});
|
||||||
|
|
||||||
|
console.log('\nErrors by Hour:');
|
||||||
|
Object.entries(errorsByHour).forEach(([hour, count]) => {
|
||||||
|
console.log(` ${hour}:00 - ${count} errors`);
|
||||||
|
});
|
||||||
|
|
||||||
|
// 2. Consecutive error detection
|
||||||
|
const recentAudits = await db.collection('framework_audit')
|
||||||
|
.find({ timestamp: { $gte: today } })
|
||||||
|
.sort({ timestamp: 1 })
|
||||||
|
.limit(100)
|
||||||
|
.toArray();
|
||||||
|
|
||||||
|
let consecutiveErrors = 0;
|
||||||
|
let maxConsecutive = 0;
|
||||||
|
let errorClusters = [];
|
||||||
|
let currentCluster = [];
|
||||||
|
|
||||||
|
recentAudits.forEach((a, i) => {
|
||||||
|
const hasError = a.decision?.blocked || a.decision?.errors;
|
||||||
|
if (hasError) {
|
||||||
|
consecutiveErrors++;
|
||||||
|
currentCluster.push({ service: a.service, time: a.timestamp });
|
||||||
|
maxConsecutive = Math.max(maxConsecutive, consecutiveErrors);
|
||||||
|
} else {
|
||||||
|
if (currentCluster.length > 2) {
|
||||||
|
errorClusters.push([...currentCluster]);
|
||||||
|
}
|
||||||
|
currentCluster = [];
|
||||||
|
consecutiveErrors = 0;
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
console.log(`\nConsecutive Error Patterns:`);
|
||||||
|
console.log(` Max consecutive errors: ${maxConsecutive}`);
|
||||||
|
console.log(` Error clusters (3+ errors): ${errorClusters.length}`);
|
||||||
|
|
||||||
|
// 3. Framework component activity
|
||||||
|
const componentActivity = await db.collection('framework_audit').aggregate([
|
||||||
|
{ $match: { timestamp: { $gte: today } } },
|
||||||
|
{
|
||||||
|
$group: {
|
||||||
|
_id: '$service',
|
||||||
|
count: { $sum: 1 },
|
||||||
|
lastActivity: { $max: '$timestamp' }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
]).toArray();
|
||||||
|
|
||||||
|
console.log('\nFramework Component Activity:');
|
||||||
|
const allComponents = [
|
||||||
|
'ContextPressureMonitor',
|
||||||
|
'BoundaryEnforcer',
|
||||||
|
'MetacognitiveVerifier',
|
||||||
|
'CrossReferenceValidator',
|
||||||
|
'InstructionPersistenceClassifier',
|
||||||
|
'PluralisticDeliberationOrchestrator'
|
||||||
|
];
|
||||||
|
|
||||||
|
allComponents.forEach(comp => {
|
||||||
|
const activity = componentActivity.find(c => c._id === comp);
|
||||||
|
if (activity) {
|
||||||
|
const age = (new Date() - activity.lastActivity) / 1000 / 60;
|
||||||
|
console.log(` ✓ ${comp}: ${activity.count} calls, ${age.toFixed(0)}m ago`);
|
||||||
|
} else {
|
||||||
|
console.log(` ✗ ${comp}: NEVER USED (framework fade!)`);
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
// 4. Session state analysis
|
||||||
|
const session = await db.collection('session_state').findOne({ sessionId: '2025-10-07-001' });
|
||||||
|
|
||||||
|
console.log('\n=== CURRENT PRESSURE GAUGE ===');
|
||||||
|
console.log(`Metrics tracked:`);
|
||||||
|
console.log(` • tokenUsage: ${session?.contextPressure?.metrics?.tokenUsage?.percentage || 'N/A'}%`);
|
||||||
|
console.log(` • conversationLength: ${session?.contextPressure?.metrics?.conversationLength?.value || 0}`);
|
||||||
|
console.log(` • taskComplexity: ${session?.contextPressure?.metrics?.taskComplexity?.value || 0}`);
|
||||||
|
console.log(` • errorFrequency: ${session?.contextPressure?.metrics?.errorFrequency?.raw || 0} (simple count)`);
|
||||||
|
console.log(` • instructionDensity: ${session?.contextPressure?.metrics?.instructionDensity?.value || 0}`);
|
||||||
|
console.log(`\nOverall Score: ${session?.contextPressure?.score || 0}%`);
|
||||||
|
|
||||||
|
console.log('\n=== MISSING DEGRADATION METRICS ===');
|
||||||
|
console.log('\n1. ERROR PATTERN ANALYSIS:');
|
||||||
|
console.log(' ✗ Consecutive error count (current: ' + maxConsecutive + ')');
|
||||||
|
console.log(' ✗ Error clustering detection');
|
||||||
|
console.log(' ✗ Repeated failures on similar tasks');
|
||||||
|
console.log(' ✗ Error severity weighting');
|
||||||
|
|
||||||
|
console.log('\n2. FRAMEWORK HEALTH:');
|
||||||
|
console.log(' ✗ Framework component fade detection');
|
||||||
|
console.log(' ✗ MetacognitiveVerifier usage frequency');
|
||||||
|
console.log(' ✗ Time since last successful task completion');
|
||||||
|
|
||||||
|
console.log('\n3. CONTEXT QUALITY:');
|
||||||
|
console.log(' ✗ Post-compaction context degradation');
|
||||||
|
console.log(' ✗ Knowledge domain shift detection');
|
||||||
|
console.log(' ✗ Continued session after compaction flag');
|
||||||
|
|
||||||
|
console.log('\n4. BEHAVIORAL INDICATORS:');
|
||||||
|
console.log(' ✗ Tool retry rate (same tool called multiple times)');
|
||||||
|
console.log(' ✗ File read without subsequent action rate');
|
||||||
|
console.log(' ✗ Deployment/restart frequency (thrashing)');
|
||||||
|
|
||||||
|
console.log('\n=== RECOMMENDATION ===');
|
||||||
|
console.log('\nAdd new pressure metric: "degradationScore" that combines:');
|
||||||
|
console.log(' 1. Consecutive errors (weight: 0.3)');
|
||||||
|
console.log(' 2. Framework fade (weight: 0.25)');
|
||||||
|
console.log(' 3. Error clustering (weight: 0.2)');
|
||||||
|
console.log(' 4. Post-compaction flag (weight: 0.15)');
|
||||||
|
console.log(' 5. Tool retry rate (weight: 0.1)');
|
||||||
|
console.log('\nThreshold: degradationScore > 40% = WARN user');
|
||||||
|
console.log('Threshold: degradationScore > 60% = RECOMMEND session restart');
|
||||||
|
|
||||||
|
await client.close();
|
||||||
|
}
|
||||||
|
|
||||||
|
analyzeDegradation().catch(console.error);
|
||||||
|
|
@ -186,7 +186,11 @@ async function main() {
|
||||||
// Calculate real-time pressure if tokens provided
|
// Calculate real-time pressure if tokens provided
|
||||||
let realTimePressure = null;
|
let realTimePressure = null;
|
||||||
if (currentTokens !== null && tokenBudget !== null) {
|
if (currentTokens !== null && tokenBudget !== null) {
|
||||||
realTimePressure = await ContextPressureMonitor.analyzePressure(currentTokens, tokenBudget, 1);
|
realTimePressure = await ContextPressureMonitor.analyzePressure({
|
||||||
|
tokenUsage: currentTokens,
|
||||||
|
tokenBudget: tokenBudget,
|
||||||
|
messageCount: 1
|
||||||
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
// Build report
|
// Build report
|
||||||
|
|
@ -347,6 +351,25 @@ async function main() {
|
||||||
console.log();
|
console.log();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Degradation Score (NEW)
|
||||||
|
if (realTimePressure && realTimePressure.degradation !== undefined) {
|
||||||
|
console.log('⚠️ DEGRADATION ANALYSIS');
|
||||||
|
console.log(` Score: ${realTimePressure.degradation}%`);
|
||||||
|
console.log(` Level: ${realTimePressure.degradationLevel}`);
|
||||||
|
if (realTimePressure.degradationBreakdown) {
|
||||||
|
console.log(' Breakdown:');
|
||||||
|
console.log(` • Error patterns: ${realTimePressure.degradationBreakdown.errorPattern}%`);
|
||||||
|
console.log(` • Framework fade: ${realTimePressure.degradationBreakdown.frameworkFade}%${realTimePressure.degradationBreakdown.frameworkFade >= 50 ? ' ← CRITICAL' : ''}`);
|
||||||
|
console.log(` • Context quality: ${realTimePressure.degradationBreakdown.contextQuality}%`);
|
||||||
|
console.log(` • Behavioral: ${realTimePressure.degradationBreakdown.behavioral}%`);
|
||||||
|
console.log(` • Task completion: ${realTimePressure.degradationBreakdown.taskCompletion}%`);
|
||||||
|
}
|
||||||
|
if (realTimePressure.degradationRecommendation) {
|
||||||
|
console.log(` Recommendation: ${realTimePressure.degradationRecommendation}`);
|
||||||
|
}
|
||||||
|
console.log();
|
||||||
|
}
|
||||||
|
|
||||||
// Auto-Compact Events
|
// Auto-Compact Events
|
||||||
if (report.autoCompacts) {
|
if (report.autoCompacts) {
|
||||||
console.log('🔄 AUTO-COMPACT EVENTS');
|
console.log('🔄 AUTO-COMPACT EVENTS');
|
||||||
|
|
|
||||||
|
|
@ -210,9 +210,9 @@ class ContextPressureMonitor {
|
||||||
/**
|
/**
|
||||||
* Calculate current pressure level
|
* Calculate current pressure level
|
||||||
* @param {Object} context - Current conversation/session context
|
* @param {Object} context - Current conversation/session context
|
||||||
* @returns {Object} Pressure analysis
|
* @returns {Promise<Object>} Pressure analysis
|
||||||
*/
|
*/
|
||||||
analyzePressure(context) {
|
async analyzePressure(context) {
|
||||||
try {
|
try {
|
||||||
// Calculate individual metric scores
|
// Calculate individual metric scores
|
||||||
const metricScores = {
|
const metricScores = {
|
||||||
|
|
@ -230,6 +230,18 @@ class ContextPressureMonitor {
|
||||||
const pressureName = this._determinePressureLevel(overallPressure);
|
const pressureName = this._determinePressureLevel(overallPressure);
|
||||||
const pressureLevel = this.pressureLevels[pressureName];
|
const pressureLevel = this.pressureLevels[pressureName];
|
||||||
|
|
||||||
|
// Calculate degradation score (async)
|
||||||
|
let degradation = null;
|
||||||
|
try {
|
||||||
|
if (this.memoryProxyInitialized) {
|
||||||
|
degradation = await this.calculateDegradationScore();
|
||||||
|
}
|
||||||
|
} catch (degradationError) {
|
||||||
|
logger.warn('[ContextPressureMonitor] Degradation calculation failed, continuing without it', {
|
||||||
|
error: degradationError.message
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
// Generate recommendations
|
// Generate recommendations
|
||||||
const recommendations = this._generateRecommendations(
|
const recommendations = this._generateRecommendations(
|
||||||
pressureLevel,
|
pressureLevel,
|
||||||
|
|
@ -261,6 +273,19 @@ class ContextPressureMonitor {
|
||||||
timestamp: new Date()
|
timestamp: new Date()
|
||||||
};
|
};
|
||||||
|
|
||||||
|
// Add degradation analysis if available
|
||||||
|
if (degradation) {
|
||||||
|
analysis.degradation = degradation.score;
|
||||||
|
analysis.degradationLevel = degradation.level;
|
||||||
|
analysis.degradationBreakdown = degradation.breakdown;
|
||||||
|
analysis.degradationRecommendation = degradation.recommendation;
|
||||||
|
|
||||||
|
// Add degradation warnings to main warnings if serious
|
||||||
|
if (degradation.level === 'CRITICAL' || degradation.level === 'HIGH') {
|
||||||
|
analysis.warnings.push(degradation.recommendation);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// Track statistics
|
// Track statistics
|
||||||
this.stats.total_analyses++;
|
this.stats.total_analyses++;
|
||||||
this.stats.by_level[pressureName]++;
|
this.stats.by_level[pressureName]++;
|
||||||
|
|
@ -368,8 +393,8 @@ class ContextPressureMonitor {
|
||||||
/**
|
/**
|
||||||
* Check if action should proceed given current pressure
|
* Check if action should proceed given current pressure
|
||||||
*/
|
*/
|
||||||
shouldProceed(action, context) {
|
async shouldProceed(action, context) {
|
||||||
const analysis = this.analyzePressure(context);
|
const analysis = await this.analyzePressure(context);
|
||||||
|
|
||||||
if (analysis.pressureLevel >= PRESSURE_LEVELS.DANGEROUS.level) {
|
if (analysis.pressureLevel >= PRESSURE_LEVELS.DANGEROUS.level) {
|
||||||
return {
|
return {
|
||||||
|
|
@ -931,6 +956,299 @@ class ContextPressureMonitor {
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Calculate degradation score (0-100)
|
||||||
|
* Combines behavioral and quality metrics to detect performance degradation
|
||||||
|
* that may not be captured by standard pressure metrics.
|
||||||
|
*
|
||||||
|
* @returns {Promise<Object>} Degradation analysis
|
||||||
|
*/
|
||||||
|
async calculateDegradationScore() {
|
||||||
|
try {
|
||||||
|
const scores = {
|
||||||
|
errorPattern: await this._analyzeErrorPatterns(), // 30%
|
||||||
|
frameworkFade: await this._detectFrameworkFade(), // 25%
|
||||||
|
contextQuality: await this._assessContextQuality(), // 20%
|
||||||
|
behavioral: await this._analyzeBehavior(), // 15%
|
||||||
|
taskCompletion: await this._measureTaskCompletion() // 10%
|
||||||
|
};
|
||||||
|
|
||||||
|
const degradationScore =
|
||||||
|
scores.errorPattern * 0.30 +
|
||||||
|
scores.frameworkFade * 0.25 +
|
||||||
|
scores.contextQuality * 0.20 +
|
||||||
|
scores.behavioral * 0.15 +
|
||||||
|
scores.taskCompletion * 0.10;
|
||||||
|
|
||||||
|
return {
|
||||||
|
score: Math.round(degradationScore),
|
||||||
|
level: this._getDegradationLevel(degradationScore),
|
||||||
|
breakdown: scores,
|
||||||
|
recommendation: this._getDegradationRecommendation(degradationScore)
|
||||||
|
};
|
||||||
|
} catch (error) {
|
||||||
|
logger.error('[ContextPressureMonitor] Failed to calculate degradation score', {
|
||||||
|
error: error.message
|
||||||
|
});
|
||||||
|
return {
|
||||||
|
score: 0,
|
||||||
|
level: 'LOW',
|
||||||
|
breakdown: {},
|
||||||
|
recommendation: 'Unable to calculate degradation score'
|
||||||
|
};
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Analyze error patterns (returns 0-100)
|
||||||
|
* Detects consecutive errors, clustering, and severity weighting
|
||||||
|
* @private
|
||||||
|
*/
|
||||||
|
async _analyzeErrorPatterns() {
|
||||||
|
try {
|
||||||
|
const recentErrors = await this.memoryProxy.getRecentAuditLogs({
|
||||||
|
limit: 50,
|
||||||
|
filter: { hasError: true }
|
||||||
|
});
|
||||||
|
|
||||||
|
if (!recentErrors || recentErrors.length === 0) {
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Consecutive errors
|
||||||
|
let maxConsecutive = 0;
|
||||||
|
let currentStreak = 0;
|
||||||
|
|
||||||
|
recentErrors.forEach((e) => {
|
||||||
|
if (e.decision?.blocked || e.decision?.errors) {
|
||||||
|
currentStreak++;
|
||||||
|
maxConsecutive = Math.max(maxConsecutive, currentStreak);
|
||||||
|
} else {
|
||||||
|
currentStreak = 0;
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
// Error clustering (3+ errors in 10-minute windows)
|
||||||
|
const errorClusters = this._detectErrorClusters(recentErrors, 10 * 60 * 1000);
|
||||||
|
|
||||||
|
// Error severity weighting
|
||||||
|
const severityScore = recentErrors.reduce((sum, e) => {
|
||||||
|
if (e.decision?.blocked) return sum + 3;
|
||||||
|
if (e.decision?.errors) return sum + 1;
|
||||||
|
return sum;
|
||||||
|
}, 0);
|
||||||
|
|
||||||
|
// Combine metrics
|
||||||
|
const consecutiveScore = Math.min(maxConsecutive * 10, 100);
|
||||||
|
const clusterScore = Math.min(errorClusters.length * 15, 100);
|
||||||
|
const severityScoreNormalized = Math.min(severityScore * 2, 100);
|
||||||
|
|
||||||
|
return Math.round((consecutiveScore + clusterScore + severityScoreNormalized) / 3);
|
||||||
|
} catch (error) {
|
||||||
|
logger.error('[ContextPressureMonitor] Error pattern analysis failed', {
|
||||||
|
error: error.message
|
||||||
|
});
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Detect error clusters in time windows
|
||||||
|
* @private
|
||||||
|
*/
|
||||||
|
_detectErrorClusters(errors, windowMs) {
|
||||||
|
const clusters = [];
|
||||||
|
const sortedErrors = [...errors].sort((a, b) =>
|
||||||
|
new Date(a.timestamp) - new Date(b.timestamp)
|
||||||
|
);
|
||||||
|
|
||||||
|
for (let i = 0; i < sortedErrors.length; i++) {
|
||||||
|
const windowStart = new Date(sortedErrors[i].timestamp);
|
||||||
|
const windowEnd = new Date(windowStart.getTime() + windowMs);
|
||||||
|
|
||||||
|
const errorsInWindow = sortedErrors.filter(e => {
|
||||||
|
const errorTime = new Date(e.timestamp);
|
||||||
|
return errorTime >= windowStart && errorTime <= windowEnd;
|
||||||
|
});
|
||||||
|
|
||||||
|
if (errorsInWindow.length >= 3) {
|
||||||
|
clusters.push({
|
||||||
|
start: windowStart,
|
||||||
|
end: windowEnd,
|
||||||
|
count: errorsInWindow.length
|
||||||
|
});
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return clusters;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Detect framework fade (returns 0-100)
|
||||||
|
* Measures time since critical components were last used
|
||||||
|
* @private
|
||||||
|
*/
|
||||||
|
async _detectFrameworkFade() {
|
||||||
|
try {
|
||||||
|
const criticalComponents = [
|
||||||
|
'MetacognitiveVerifier',
|
||||||
|
'BoundaryEnforcer',
|
||||||
|
'PluralisticDeliberationOrchestrator'
|
||||||
|
];
|
||||||
|
|
||||||
|
const componentActivity = await Promise.all(
|
||||||
|
criticalComponents.map(async (service) => {
|
||||||
|
const logs = await this.memoryProxy.getRecentAuditLogs({
|
||||||
|
limit: 1,
|
||||||
|
filter: { service }
|
||||||
|
});
|
||||||
|
|
||||||
|
if (logs.length === 0) return { service, ageMinutes: Infinity };
|
||||||
|
|
||||||
|
const age = (Date.now() - new Date(logs[0].timestamp)) / 1000 / 60;
|
||||||
|
return { service, ageMinutes: age };
|
||||||
|
})
|
||||||
|
);
|
||||||
|
|
||||||
|
// Score: minutes since last use
|
||||||
|
// 0-30 min = 0 points
|
||||||
|
// 30-60 min = 50 points
|
||||||
|
// 60+ min = 100 points
|
||||||
|
const scores = componentActivity.map(c => {
|
||||||
|
if (c.ageMinutes === Infinity) return 100;
|
||||||
|
if (c.ageMinutes < 30) return 0;
|
||||||
|
if (c.ageMinutes < 60) return 50;
|
||||||
|
return 100;
|
||||||
|
});
|
||||||
|
|
||||||
|
return Math.round(scores.reduce((a, b) => a + b, 0) / scores.length);
|
||||||
|
} catch (error) {
|
||||||
|
logger.error('[ContextPressureMonitor] Framework fade detection failed', {
|
||||||
|
error: error.message
|
||||||
|
});
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Assess context quality (returns 0-100)
|
||||||
|
* Detects post-compaction degradation and session age issues
|
||||||
|
* @private
|
||||||
|
*/
|
||||||
|
async _assessContextQuality() {
|
||||||
|
try {
|
||||||
|
const session = this.sessionState || await this.memoryProxy.getSessionState();
|
||||||
|
if (!session) return 0;
|
||||||
|
|
||||||
|
let score = 0;
|
||||||
|
|
||||||
|
// Post-compaction flag (major degradation indicator)
|
||||||
|
if (session.autoCompactions && session.autoCompactions.length > 0) {
|
||||||
|
const lastCompaction = session.autoCompactions[session.autoCompactions.length - 1];
|
||||||
|
const timeSinceCompaction = (Date.now() - new Date(lastCompaction.timestamp)) / 1000 / 60;
|
||||||
|
|
||||||
|
// Within 60 minutes of compaction = high risk
|
||||||
|
if (timeSinceCompaction < 60) {
|
||||||
|
score += 60;
|
||||||
|
} else if (timeSinceCompaction < 120) {
|
||||||
|
score += 30;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Session age (very long sessions accumulate drift)
|
||||||
|
const sessionAge = (Date.now() - new Date(session.startTime)) / 1000 / 60 / 60; // hours
|
||||||
|
if (sessionAge > 6) score += 40;
|
||||||
|
else if (sessionAge > 4) score += 20;
|
||||||
|
|
||||||
|
return Math.min(score, 100);
|
||||||
|
} catch (error) {
|
||||||
|
logger.error('[ContextPressureMonitor] Context quality assessment failed', {
|
||||||
|
error: error.message
|
||||||
|
});
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Analyze behavioral indicators (returns 0-100)
|
||||||
|
* Detects tool retry patterns and thrashing
|
||||||
|
* @private
|
||||||
|
*/
|
||||||
|
async _analyzeBehavior() {
|
||||||
|
try {
|
||||||
|
const recentActions = await this.memoryProxy.getRecentAuditLogs({ limit: 50 });
|
||||||
|
if (!recentActions || recentActions.length === 0) return 0;
|
||||||
|
|
||||||
|
// Tool retry rate
|
||||||
|
const toolCalls = recentActions.map(a => a.metadata?.tool).filter(Boolean);
|
||||||
|
let retries = 0;
|
||||||
|
|
||||||
|
for (let i = 2; i < toolCalls.length; i++) {
|
||||||
|
if (toolCalls[i] === toolCalls[i-1] && toolCalls[i] === toolCalls[i-2]) {
|
||||||
|
retries++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
const retryScore = Math.min(retries * 20, 100);
|
||||||
|
return retryScore;
|
||||||
|
} catch (error) {
|
||||||
|
logger.error('[ContextPressureMonitor] Behavioral analysis failed', {
|
||||||
|
error: error.message
|
||||||
|
});
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Measure task completion (returns 0-100)
|
||||||
|
* Simple error rate metric in recent actions
|
||||||
|
* @private
|
||||||
|
*/
|
||||||
|
async _measureTaskCompletion() {
|
||||||
|
try {
|
||||||
|
const recentErrors = await this.memoryProxy.getRecentAuditLogs({
|
||||||
|
limit: 20,
|
||||||
|
filter: { hasError: true }
|
||||||
|
});
|
||||||
|
|
||||||
|
if (!recentErrors) return 0;
|
||||||
|
|
||||||
|
// Simple metric: error rate in last 20 actions
|
||||||
|
const errorRate = (recentErrors.length / 20) * 100;
|
||||||
|
return Math.round(errorRate);
|
||||||
|
} catch (error) {
|
||||||
|
logger.error('[ContextPressureMonitor] Task completion measurement failed', {
|
||||||
|
error: error.message
|
||||||
|
});
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Get degradation level from score
|
||||||
|
* @private
|
||||||
|
*/
|
||||||
|
_getDegradationLevel(score) {
|
||||||
|
if (score >= 60) return 'CRITICAL';
|
||||||
|
if (score >= 40) return 'HIGH';
|
||||||
|
if (score >= 20) return 'MODERATE';
|
||||||
|
return 'LOW';
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Get degradation recommendation
|
||||||
|
* @private
|
||||||
|
*/
|
||||||
|
_getDegradationRecommendation(score) {
|
||||||
|
if (score >= 60) {
|
||||||
|
return 'RECOMMEND SESSION RESTART - Quality severely degraded';
|
||||||
|
}
|
||||||
|
if (score >= 40) {
|
||||||
|
return 'WARN USER - Performance declining, consider checkpoint review';
|
||||||
|
}
|
||||||
|
return 'Monitoring - No action needed';
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Reset monitoring state
|
* Reset monitoring state
|
||||||
*/
|
*/
|
||||||
|
|
|
||||||
Loading…
Add table
Reference in a new issue