- Make analyzeSession() async in check-session-pressure.js - Add await before monitor.analyzePressure() call - Wrap main execution in async IIFE with error handling - Update all ContextPressureMonitor tests to use async/await - Fix MetacognitiveVerifier edge case assertion (toBeLessThanOrEqual) Fixes TypeError: Cannot read properties of undefined (reading 'tokenUsage') that was blocking session initialization. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
673 lines
20 KiB
JavaScript
673 lines
20 KiB
JavaScript
/**
|
|
* Unit Tests for MetacognitiveVerifier
|
|
* Tests metacognitive self-verification before action execution
|
|
*/
|
|
|
|
const verifier = require('../../src/services/MetacognitiveVerifier.service');
|
|
|
|
describe('MetacognitiveVerifier', () => {
|
|
beforeEach(() => {
|
|
// Verifier is a singleton instance
|
|
});
|
|
|
|
describe('Alignment Verification', () => {
|
|
test('should verify action aligns with stated reasoning', () => {
|
|
const action = {
|
|
type: 'database_connect',
|
|
parameters: { port: 27027 }
|
|
};
|
|
|
|
const reasoning = {
|
|
explanation: 'User explicitly requested port 27027',
|
|
evidence: ['user instruction: use port 27027']
|
|
};
|
|
|
|
const result = verifier.verify(action, reasoning, {});
|
|
|
|
expect(result.checks.alignment.passed).toBe(true);
|
|
expect(result.checks.alignment.score).toBeGreaterThan(0.7);
|
|
});
|
|
|
|
test('should detect misalignment between action and reasoning', () => {
|
|
const action = {
|
|
type: 'database_connect',
|
|
parameters: { port: 27017 }
|
|
};
|
|
|
|
const reasoning = {
|
|
explanation: 'User explicitly requested port 27027',
|
|
evidence: ['user instruction: use port 27027']
|
|
};
|
|
|
|
const result = verifier.verify(action, reasoning, {});
|
|
|
|
expect(result.checks.alignment.passed).toBe(false);
|
|
expect(result.checks.alignment.issues).toContain('action parameters conflict with reasoning');
|
|
});
|
|
});
|
|
|
|
describe('Coherence Verification', () => {
|
|
test('should verify reasoning is internally consistent', () => {
|
|
const action = {
|
|
type: 'install_package',
|
|
parameters: { package: 'react' }
|
|
};
|
|
|
|
const reasoning = {
|
|
explanation: 'Installing React as requested',
|
|
steps: [
|
|
'User asked for React installation',
|
|
'React is the appropriate package',
|
|
'Install React via npm'
|
|
]
|
|
};
|
|
|
|
const result = verifier.verify(action, reasoning, {});
|
|
|
|
expect(result.checks.coherence.passed).toBe(true);
|
|
expect(result.checks.coherence.score).toBeGreaterThan(0.6);
|
|
});
|
|
|
|
test('should detect logical contradictions in reasoning', () => {
|
|
const action = {
|
|
type: 'install_package',
|
|
parameters: { package: 'vue' }
|
|
};
|
|
|
|
const reasoning = {
|
|
explanation: 'Installing React framework',
|
|
steps: [
|
|
'Install React',
|
|
'Actually using Vue',
|
|
'Run Vue installation'
|
|
]
|
|
};
|
|
|
|
const result = verifier.verify(action, reasoning, {});
|
|
|
|
expect(result.checks.coherence.passed).toBe(false);
|
|
expect(result.checks.coherence.issues).toContain('reasoning contains contradictions');
|
|
});
|
|
});
|
|
|
|
describe('Completeness Verification', () => {
|
|
test('should verify all necessary steps are included', () => {
|
|
const action = {
|
|
type: 'deploy',
|
|
parameters: { environment: 'production' }
|
|
};
|
|
|
|
const reasoning = {
|
|
explanation: 'Deploy to production',
|
|
steps: [
|
|
'Run tests',
|
|
'Build production bundle',
|
|
'Backup current version',
|
|
'Deploy new version',
|
|
'Verify deployment'
|
|
]
|
|
};
|
|
|
|
const result = verifier.verify(action, reasoning, {});
|
|
|
|
expect(result.checks.completeness.passed).toBe(true);
|
|
expect(result.checks.completeness.score).toBeGreaterThan(0.7);
|
|
});
|
|
|
|
test('should detect missing critical steps', () => {
|
|
const action = {
|
|
type: 'deploy',
|
|
parameters: { environment: 'production' }
|
|
};
|
|
|
|
const reasoning = {
|
|
explanation: 'Deploy to production',
|
|
steps: [
|
|
'Deploy new version'
|
|
]
|
|
};
|
|
|
|
const result = verifier.verify(action, reasoning, {});
|
|
|
|
expect(result.checks.completeness.passed).toBe(false);
|
|
expect(result.checks.completeness.missing_considerations).toContain('testing');
|
|
});
|
|
});
|
|
|
|
describe('Safety Verification', () => {
|
|
test('should verify safe operations pass safety check', () => {
|
|
const action = {
|
|
type: 'read_file',
|
|
parameters: { file: 'config.json' }
|
|
};
|
|
|
|
const reasoning = {
|
|
explanation: 'Reading configuration file to check settings'
|
|
};
|
|
|
|
const result = verifier.verify(action, reasoning, {});
|
|
|
|
expect(result.checks.safety.passed).toBe(true);
|
|
expect(result.checks.safety.risk_level).toBe('LOW');
|
|
});
|
|
|
|
test('should flag dangerous operations', () => {
|
|
const action = {
|
|
type: 'delete_all',
|
|
parameters: { table: 'users' }
|
|
};
|
|
|
|
const reasoning = {
|
|
explanation: 'Cleaning up user table'
|
|
};
|
|
|
|
const result = verifier.verify(action, reasoning, {});
|
|
|
|
expect(result.checks.safety.passed).toBe(false);
|
|
expect(result.checks.safety.risk_level).toBe('HIGH');
|
|
expect(result.checks.safety.concerns).toContain('destructive operation');
|
|
});
|
|
|
|
test('should require explicit confirmation for risky actions', () => {
|
|
const action = {
|
|
type: 'modify_schema',
|
|
parameters: { table: 'users' }
|
|
};
|
|
|
|
const reasoning = {
|
|
explanation: 'Update database schema'
|
|
};
|
|
|
|
const result = verifier.verify(action, reasoning, {});
|
|
|
|
expect(result.requires_confirmation).toBe(true);
|
|
expect(result.checks.safety.risk_level).toMatch(/MEDIUM|HIGH/);
|
|
});
|
|
});
|
|
|
|
describe('Alternative Consideration', () => {
|
|
test('should verify alternatives were considered', () => {
|
|
const action = {
|
|
type: 'implementation',
|
|
parameters: { approach: 'A' }
|
|
};
|
|
|
|
const reasoning = {
|
|
explanation: 'Using approach A',
|
|
alternatives_considered: [
|
|
'Approach A: Fast but uses more memory',
|
|
'Approach B: Slower but memory efficient',
|
|
'Selected A for performance priority'
|
|
]
|
|
};
|
|
|
|
const result = verifier.verify(action, reasoning, {});
|
|
|
|
expect(result.checks.alternatives.passed).toBe(true);
|
|
expect(result.checks.alternatives.score).toBeGreaterThan(0.7);
|
|
});
|
|
|
|
test('should flag lack of alternative consideration', () => {
|
|
const action = {
|
|
type: 'implementation',
|
|
parameters: { approach: 'A' }
|
|
};
|
|
|
|
const reasoning = {
|
|
explanation: 'Using approach A',
|
|
alternatives_considered: []
|
|
};
|
|
|
|
const result = verifier.verify(action, reasoning, {});
|
|
|
|
expect(result.checks.alternatives.passed).toBe(false);
|
|
expect(result.checks.alternatives.issues).toContain('no alternatives considered');
|
|
});
|
|
});
|
|
|
|
describe('Overall Confidence Calculation', () => {
|
|
test('should calculate high confidence when all checks pass', () => {
|
|
const action = {
|
|
type: 'safe_operation',
|
|
parameters: { file: 'test.txt' }
|
|
};
|
|
|
|
const reasoning = {
|
|
explanation: 'Safe file read operation',
|
|
evidence: ['user requested', 'file exists', 'read-only'],
|
|
steps: ['locate file', 'read contents', 'return data'],
|
|
alternatives_considered: ['direct read', 'streamed read'],
|
|
edgeCases: ['file not found', 'permission denied']
|
|
};
|
|
|
|
const result = verifier.verify(action, reasoning, {});
|
|
|
|
expect(result.confidence).toBeGreaterThan(0.7);
|
|
expect(result.decision).toBe('PROCEED');
|
|
});
|
|
|
|
test('should calculate low confidence when checks fail', () => {
|
|
const action = {
|
|
type: 'risky_operation',
|
|
parameters: { destructive: true }
|
|
};
|
|
|
|
const reasoning = {
|
|
explanation: 'Maybe do this',
|
|
evidence: [],
|
|
steps: ['do it']
|
|
};
|
|
|
|
const result = verifier.verify(action, reasoning, {});
|
|
|
|
expect(result.confidence).toBeLessThan(0.5);
|
|
expect(result.decision).toMatch(/BLOCK|REQUEST_CLARIFICATION|REQUIRE_REVIEW/);
|
|
});
|
|
|
|
test('should weight checks appropriately', () => {
|
|
// Safety failures should heavily impact confidence
|
|
const unsafeAction = {
|
|
type: 'delete_database'
|
|
};
|
|
|
|
const reasoning = {
|
|
explanation: 'Delete database',
|
|
evidence: ['complete analysis'],
|
|
steps: ['backup', 'delete', 'verify'],
|
|
alternatives_considered: ['archive instead']
|
|
};
|
|
|
|
const result = verifier.verify(unsafeAction, reasoning, {});
|
|
|
|
expect(result.confidence).toBeLessThan(0.6);
|
|
expect(result.checks.safety.passed).toBe(false);
|
|
});
|
|
});
|
|
|
|
describe('Pressure-Adjusted Verification', () => {
|
|
test('should increase verification strictness under high pressure', () => {
|
|
const action = {
|
|
type: 'database_update',
|
|
parameters: { table: 'users' }
|
|
};
|
|
|
|
const reasoning = {
|
|
explanation: 'Update users table'
|
|
};
|
|
|
|
const lowPressure = { pressure_level: 'NORMAL' };
|
|
const highPressure = { pressure_level: 'CRITICAL', token_usage: 0.9 };
|
|
|
|
const lowResult = verifier.verify(action, reasoning, lowPressure);
|
|
const highResult = verifier.verify(action, reasoning, highPressure);
|
|
|
|
// High pressure should reduce confidence (or keep it equal in edge cases)
|
|
expect(highResult.confidence).toBeLessThanOrEqual(lowResult.confidence);
|
|
expect(highResult.pressure_adjustment).toBeLessThanOrEqual(1.0);
|
|
});
|
|
|
|
test('should require higher confidence threshold under pressure', () => {
|
|
const action = {
|
|
type: 'moderate_risk',
|
|
parameters: {}
|
|
};
|
|
|
|
const reasoning = {
|
|
explanation: 'Moderate risk operation',
|
|
evidence: ['some evidence']
|
|
};
|
|
|
|
const criticalPressure = {
|
|
pressure_level: 'CRITICAL',
|
|
errors_recent: 10
|
|
};
|
|
|
|
const result = verifier.verify(action, reasoning, criticalPressure);
|
|
|
|
expect(result.threshold_adjusted).toBe(true);
|
|
expect(result.required_confidence).toBeGreaterThan(0.7);
|
|
});
|
|
|
|
test('should block operations at DANGEROUS pressure', () => {
|
|
const action = {
|
|
type: 'any_operation'
|
|
};
|
|
|
|
const reasoning = {
|
|
explanation: 'Well-reasoned action'
|
|
};
|
|
|
|
const dangerousPressure = {
|
|
pressure_level: 'DANGEROUS',
|
|
token_usage: 0.95
|
|
};
|
|
|
|
const result = verifier.verify(action, reasoning, dangerousPressure);
|
|
|
|
expect(result.decision).toBe('BLOCK');
|
|
expect(result.reason).toContain('pressure too high');
|
|
});
|
|
});
|
|
|
|
describe('Verification Decisions', () => {
|
|
test('should return PROCEED for high confidence actions', () => {
|
|
const result = verifier._makeDecision(0.85, {});
|
|
|
|
expect(result.decision).toBe('PROCEED');
|
|
expect(result.requires_confirmation).toBe(false);
|
|
});
|
|
|
|
test('should return REQUEST_CONFIRMATION for medium confidence', () => {
|
|
const result = verifier._makeDecision(0.65, {});
|
|
|
|
expect(result.decision).toBe('REQUEST_CONFIRMATION');
|
|
expect(result.requires_confirmation).toBe(true);
|
|
});
|
|
|
|
test('should return REQUEST_CLARIFICATION for low confidence', () => {
|
|
const result = verifier._makeDecision(0.45, {});
|
|
|
|
expect(result.decision).toBe('REQUEST_CLARIFICATION');
|
|
});
|
|
|
|
test('should return BLOCK for very low confidence', () => {
|
|
const result = verifier._makeDecision(0.2, {});
|
|
|
|
expect(result.decision).toBe('BLOCK');
|
|
});
|
|
});
|
|
|
|
describe('27027 Failure Mode Prevention', () => {
|
|
test('should detect when action conflicts with explicit instruction', () => {
|
|
const action = {
|
|
type: 'database_connect',
|
|
parameters: { port: 27017 }
|
|
};
|
|
|
|
const reasoning = {
|
|
explanation: 'Connecting to MongoDB on default port',
|
|
evidence: ['MongoDB default is 27017']
|
|
};
|
|
|
|
const context = {
|
|
explicit_instructions: [
|
|
{ text: 'use port 27027', timestamp: new Date() }
|
|
]
|
|
};
|
|
|
|
const result = verifier.verify(action, reasoning, context);
|
|
|
|
expect(result.checks.alignment.passed).toBe(false);
|
|
expect(result.decision).toMatch(/BLOCK|REQUEST_CLARIFICATION/);
|
|
});
|
|
|
|
test('should approve when action matches explicit instruction', () => {
|
|
const action = {
|
|
type: 'database_connect',
|
|
parameters: { port: 27027 }
|
|
};
|
|
|
|
const reasoning = {
|
|
explanation: 'Connecting to MongoDB on port 27027 as instructed',
|
|
evidence: ['User explicitly said port 27027']
|
|
};
|
|
|
|
const context = {
|
|
explicit_instructions: [
|
|
{ text: 'use port 27027', timestamp: new Date() }
|
|
]
|
|
};
|
|
|
|
const result = verifier.verify(action, reasoning, context);
|
|
|
|
expect(result.checks.alignment.passed).toBe(true);
|
|
expect(result.confidence).toBeGreaterThan(0.7);
|
|
});
|
|
});
|
|
|
|
describe('Evidence Quality Assessment', () => {
|
|
test('should assess evidence quality', () => {
|
|
const reasoning = {
|
|
explanation: 'Action is needed',
|
|
evidence: [
|
|
'User explicitly requested this',
|
|
'Documentation confirms approach',
|
|
'Tests validate correctness'
|
|
]
|
|
};
|
|
|
|
const quality = verifier._assessEvidenceQuality(reasoning);
|
|
|
|
expect(quality).toBeGreaterThan(0.7);
|
|
});
|
|
|
|
test('should penalize weak evidence', () => {
|
|
const reasoning = {
|
|
explanation: 'Action is needed',
|
|
evidence: [
|
|
'I think this is right',
|
|
'Maybe this works'
|
|
]
|
|
};
|
|
|
|
const quality = verifier._assessEvidenceQuality(reasoning);
|
|
|
|
expect(quality).toBeLessThan(0.5);
|
|
});
|
|
|
|
test('should penalize missing evidence', () => {
|
|
const reasoning = {
|
|
explanation: 'Action is needed',
|
|
evidence: []
|
|
};
|
|
|
|
const quality = verifier._assessEvidenceQuality(reasoning);
|
|
|
|
expect(quality).toBeLessThan(0.3);
|
|
});
|
|
});
|
|
|
|
describe('Edge Cases', () => {
|
|
test('should handle null action gracefully', () => {
|
|
expect(() => {
|
|
verifier.verify(null, { explanation: 'test' }, {});
|
|
}).not.toThrow();
|
|
|
|
const result = verifier.verify(null, { explanation: 'test' }, {});
|
|
expect(result.decision).toBe('BLOCK');
|
|
});
|
|
|
|
test('should handle null reasoning gracefully', () => {
|
|
expect(() => {
|
|
verifier.verify({ type: 'test' }, null, {});
|
|
}).not.toThrow();
|
|
|
|
const result = verifier.verify({ type: 'test' }, null, {});
|
|
expect(result.decision).toBe('BLOCK');
|
|
});
|
|
|
|
test('should handle empty context gracefully', () => {
|
|
const action = { type: 'test' };
|
|
const reasoning = { explanation: 'test' };
|
|
|
|
const result = verifier.verify(action, reasoning, {});
|
|
|
|
expect(result).toBeDefined();
|
|
expect(result.decision).toBeDefined();
|
|
});
|
|
});
|
|
|
|
describe('Detailed Failure Analysis', () => {
|
|
test('should provide detailed analysis for failed verifications', () => {
|
|
const action = {
|
|
type: 'risky_operation'
|
|
};
|
|
|
|
const reasoning = {
|
|
explanation: 'unclear reasoning'
|
|
};
|
|
|
|
const result = verifier.verify(action, reasoning, {});
|
|
|
|
expect(result.analysis).toBeDefined();
|
|
expect(result.analysis.failed_checks).toBeDefined();
|
|
expect(result.analysis.recommendations).toBeDefined();
|
|
});
|
|
|
|
test('should suggest improvements for low-confidence actions', () => {
|
|
const action = {
|
|
type: 'moderate_operation'
|
|
};
|
|
|
|
const reasoning = {
|
|
explanation: 'Basic explanation',
|
|
evidence: ['one piece of evidence']
|
|
};
|
|
|
|
const result = verifier.verify(action, reasoning, {});
|
|
|
|
if (result.confidence < 0.7) {
|
|
expect(result.suggestions).toBeDefined();
|
|
expect(result.suggestions.length).toBeGreaterThan(0);
|
|
}
|
|
});
|
|
});
|
|
|
|
describe('Singleton Pattern', () => {
|
|
test('should export singleton instance with required methods', () => {
|
|
expect(typeof verifier.verify).toBe('function');
|
|
expect(typeof verifier.getStats).toBe('function');
|
|
});
|
|
|
|
test('should maintain verification history across calls', () => {
|
|
verifier.verify({ type: 'test' }, { explanation: 'test' }, {});
|
|
|
|
const stats = verifier.getStats();
|
|
|
|
expect(stats.total_verifications).toBeDefined();
|
|
});
|
|
});
|
|
|
|
describe('Statistics Tracking', () => {
|
|
test('should track verification statistics', () => {
|
|
const stats = verifier.getStats();
|
|
|
|
expect(stats).toHaveProperty('total_verifications');
|
|
expect(stats).toHaveProperty('by_decision');
|
|
expect(stats).toHaveProperty('average_confidence');
|
|
});
|
|
|
|
test('should increment verification count after verify()', () => {
|
|
const before = verifier.getStats().total_verifications;
|
|
|
|
verifier.verify(
|
|
{ type: 'test' },
|
|
{ explanation: 'test' },
|
|
{}
|
|
);
|
|
|
|
const after = verifier.getStats().total_verifications;
|
|
|
|
expect(after).toBe(before + 1);
|
|
});
|
|
|
|
test('should track decision distribution', () => {
|
|
verifier.verify(
|
|
{ type: 'safe', parameters: {} },
|
|
{ explanation: 'safe', evidence: ['good evidence'], steps: ['step 1'], alternatives_considered: ['alt'] },
|
|
{}
|
|
);
|
|
|
|
verifier.verify(
|
|
{ type: 'unsafe' },
|
|
{ explanation: 'unclear' },
|
|
{}
|
|
);
|
|
|
|
const stats = verifier.getStats();
|
|
|
|
expect(stats.by_decision.PROCEED + stats.by_decision.BLOCK + stats.by_decision.REQUEST_CONFIRMATION + stats.by_decision.REQUEST_CLARIFICATION).toBeGreaterThan(0);
|
|
});
|
|
|
|
test('should calculate average confidence over time', () => {
|
|
verifier.verify({ type: 'test1' }, { explanation: 'good', evidence: ['a', 'b'], steps: ['1'], alternatives_considered: ['x'] }, {});
|
|
verifier.verify({ type: 'test2' }, { explanation: 'poor' }, {});
|
|
|
|
const stats = verifier.getStats();
|
|
|
|
expect(stats.average_confidence).toBeGreaterThan(0);
|
|
expect(stats.average_confidence).toBeLessThan(1);
|
|
});
|
|
});
|
|
|
|
describe('Reasoning Quality Metrics', () => {
|
|
test('should score high-quality reasoning highly', () => {
|
|
const reasoning = {
|
|
explanation: 'Detailed explanation with clear reasoning about why this action is needed and how it aligns with user intent',
|
|
evidence: [
|
|
'User explicitly requested this action',
|
|
'Documentation supports this approach',
|
|
'Previous similar actions succeeded'
|
|
],
|
|
steps: [
|
|
'Validate preconditions',
|
|
'Execute action',
|
|
'Verify results',
|
|
'Report completion'
|
|
],
|
|
alternatives_considered: [
|
|
'Alternative A: rejected because X',
|
|
'Alternative B: rejected because Y',
|
|
'Chosen approach: best because Z'
|
|
]
|
|
};
|
|
|
|
const score = verifier._assessReasoningQuality(reasoning);
|
|
|
|
expect(score).toBeGreaterThan(0.8);
|
|
});
|
|
|
|
test('should score low-quality reasoning poorly', () => {
|
|
const reasoning = {
|
|
explanation: 'Do it',
|
|
evidence: [],
|
|
steps: []
|
|
};
|
|
|
|
const score = verifier._assessReasoningQuality(reasoning);
|
|
|
|
expect(score).toBeLessThan(0.3);
|
|
});
|
|
});
|
|
|
|
describe('Context-Aware Verification', () => {
|
|
test('should consider recent errors in verification', () => {
|
|
const action = { type: 'database_operation' };
|
|
const reasoning = { explanation: 'database op' };
|
|
|
|
const errorContext = {
|
|
errors_recent: 5,
|
|
last_error_type: 'database_connection'
|
|
};
|
|
|
|
const result = verifier.verify(action, reasoning, errorContext);
|
|
|
|
// Should be more cautious after errors
|
|
expect(result.confidence_adjustment).toBeLessThan(1.0);
|
|
});
|
|
|
|
test('should consider conversation length in verification', () => {
|
|
const action = { type: 'operation' };
|
|
const reasoning = { explanation: 'do operation' };
|
|
|
|
const longConversation = {
|
|
conversation_length: 100
|
|
};
|
|
|
|
const result = verifier.verify(action, reasoning, longConversation);
|
|
|
|
// Long conversations should increase scrutiny
|
|
expect(result.confidence_adjustment).toBeLessThan(1.0);
|
|
});
|
|
});
|
|
});
|