/** * Unit Tests for MetacognitiveVerifier * Tests metacognitive self-verification before action execution */ const verifier = require('../../src/services/MetacognitiveVerifier.service'); describe('MetacognitiveVerifier', () => { beforeEach(() => { // Verifier is a singleton instance }); describe('Alignment Verification', () => { test('should verify action aligns with stated reasoning', () => { const action = { type: 'database_connect', parameters: { port: 27027 } }; const reasoning = { explanation: 'User explicitly requested port 27027', evidence: ['user instruction: use port 27027'] }; const result = verifier.verify(action, reasoning, {}); expect(result.checks.alignment.passed).toBe(true); expect(result.checks.alignment.score).toBeGreaterThan(0.7); }); test('should detect misalignment between action and reasoning', () => { const action = { type: 'database_connect', parameters: { port: 27017 } }; const reasoning = { explanation: 'User explicitly requested port 27027', evidence: ['user instruction: use port 27027'] }; const result = verifier.verify(action, reasoning, {}); expect(result.checks.alignment.passed).toBe(false); expect(result.checks.alignment.issues).toContain('action parameters conflict with reasoning'); }); }); describe('Coherence Verification', () => { test('should verify reasoning is internally consistent', () => { const action = { type: 'install_package', parameters: { package: 'react' } }; const reasoning = { explanation: 'Installing React as requested', steps: [ 'User asked for React installation', 'React is the appropriate package', 'Install React via npm' ] }; const result = verifier.verify(action, reasoning, {}); expect(result.checks.coherence.passed).toBe(true); expect(result.checks.coherence.score).toBeGreaterThan(0.6); }); test('should detect logical contradictions in reasoning', () => { const action = { type: 'install_package', parameters: { package: 'vue' } }; const reasoning = { explanation: 'Installing React framework', steps: [ 'Install React', 'Actually using Vue', 'Run Vue installation' ] }; const result = verifier.verify(action, reasoning, {}); expect(result.checks.coherence.passed).toBe(false); expect(result.checks.coherence.issues).toContain('reasoning contains contradictions'); }); }); describe('Completeness Verification', () => { test('should verify all necessary steps are included', () => { const action = { type: 'deploy', parameters: { environment: 'production' } }; const reasoning = { explanation: 'Deploy to production', steps: [ 'Run tests', 'Build production bundle', 'Backup current version', 'Deploy new version', 'Verify deployment' ] }; const result = verifier.verify(action, reasoning, {}); expect(result.checks.completeness.passed).toBe(true); expect(result.checks.completeness.score).toBeGreaterThan(0.7); }); test('should detect missing critical steps', () => { const action = { type: 'deploy', parameters: { environment: 'production' } }; const reasoning = { explanation: 'Deploy to production', steps: [ 'Deploy new version' ] }; const result = verifier.verify(action, reasoning, {}); expect(result.checks.completeness.passed).toBe(false); expect(result.checks.completeness.missing_considerations).toContain('testing'); }); }); describe('Safety Verification', () => { test('should verify safe operations pass safety check', () => { const action = { type: 'read_file', parameters: { file: 'config.json' } }; const reasoning = { explanation: 'Reading configuration file to check settings' }; const result = verifier.verify(action, reasoning, {}); expect(result.checks.safety.passed).toBe(true); expect(result.checks.safety.risk_level).toBe('LOW'); }); test('should flag dangerous operations', () => { const action = { type: 'delete_all', parameters: { table: 'users' } }; const reasoning = { explanation: 'Cleaning up user table' }; const result = verifier.verify(action, reasoning, {}); expect(result.checks.safety.passed).toBe(false); expect(result.checks.safety.risk_level).toBe('HIGH'); expect(result.checks.safety.concerns).toContain('destructive operation'); }); test('should require explicit confirmation for risky actions', () => { const action = { type: 'modify_schema', parameters: { table: 'users' } }; const reasoning = { explanation: 'Update database schema' }; const result = verifier.verify(action, reasoning, {}); expect(result.requires_confirmation).toBe(true); expect(result.checks.safety.risk_level).toMatch(/MEDIUM|HIGH/); }); }); describe('Alternative Consideration', () => { test('should verify alternatives were considered', () => { const action = { type: 'implementation', parameters: { approach: 'A' } }; const reasoning = { explanation: 'Using approach A', alternatives_considered: [ 'Approach A: Fast but uses more memory', 'Approach B: Slower but memory efficient', 'Selected A for performance priority' ] }; const result = verifier.verify(action, reasoning, {}); expect(result.checks.alternatives.passed).toBe(true); expect(result.checks.alternatives.score).toBeGreaterThan(0.7); }); test('should flag lack of alternative consideration', () => { const action = { type: 'implementation', parameters: { approach: 'A' } }; const reasoning = { explanation: 'Using approach A', alternatives_considered: [] }; const result = verifier.verify(action, reasoning, {}); expect(result.checks.alternatives.passed).toBe(false); expect(result.checks.alternatives.issues).toContain('no alternatives considered'); }); }); describe('Overall Confidence Calculation', () => { test('should calculate high confidence when all checks pass', () => { const action = { type: 'safe_operation', parameters: { file: 'test.txt' } }; const reasoning = { explanation: 'Safe file read operation', evidence: ['user requested', 'file exists', 'read-only'], steps: ['locate file', 'read contents', 'return data'], alternatives_considered: ['direct read', 'streamed read'], edgeCases: ['file not found', 'permission denied'] }; const result = verifier.verify(action, reasoning, {}); expect(result.confidence).toBeGreaterThan(0.7); expect(result.decision).toBe('PROCEED'); }); test('should calculate low confidence when checks fail', () => { const action = { type: 'risky_operation', parameters: { destructive: true } }; const reasoning = { explanation: 'Maybe do this', evidence: [], steps: ['do it'] }; const result = verifier.verify(action, reasoning, {}); expect(result.confidence).toBeLessThan(0.5); expect(result.decision).toMatch(/BLOCK|REQUEST_CLARIFICATION|REQUIRE_REVIEW/); }); test('should weight checks appropriately', () => { // Safety failures should heavily impact confidence const unsafeAction = { type: 'delete_database' }; const reasoning = { explanation: 'Delete database', evidence: ['complete analysis'], steps: ['backup', 'delete', 'verify'], alternatives_considered: ['archive instead'] }; const result = verifier.verify(unsafeAction, reasoning, {}); expect(result.confidence).toBeLessThan(0.6); expect(result.checks.safety.passed).toBe(false); }); }); describe('Pressure-Adjusted Verification', () => { test('should increase verification strictness under high pressure', () => { const action = { type: 'database_update', parameters: { table: 'users' } }; const reasoning = { explanation: 'Update users table' }; const lowPressure = { pressure_level: 'NORMAL' }; const highPressure = { pressure_level: 'CRITICAL', token_usage: 0.9 }; const lowResult = verifier.verify(action, reasoning, lowPressure); const highResult = verifier.verify(action, reasoning, highPressure); // High pressure should reduce confidence (or keep it equal in edge cases) expect(highResult.confidence).toBeLessThanOrEqual(lowResult.confidence); expect(highResult.pressure_adjustment).toBeLessThanOrEqual(1.0); }); test('should require higher confidence threshold under pressure', () => { const action = { type: 'moderate_risk', parameters: {} }; const reasoning = { explanation: 'Moderate risk operation', evidence: ['some evidence'] }; const criticalPressure = { pressure_level: 'CRITICAL', errors_recent: 10 }; const result = verifier.verify(action, reasoning, criticalPressure); expect(result.threshold_adjusted).toBe(true); expect(result.required_confidence).toBeGreaterThan(0.7); }); test('should block operations at DANGEROUS pressure', () => { const action = { type: 'any_operation' }; const reasoning = { explanation: 'Well-reasoned action' }; const dangerousPressure = { pressure_level: 'DANGEROUS', token_usage: 0.95 }; const result = verifier.verify(action, reasoning, dangerousPressure); expect(result.decision).toBe('BLOCK'); expect(result.reason).toContain('pressure too high'); }); }); describe('Verification Decisions', () => { test('should return PROCEED for high confidence actions', () => { const result = verifier._makeDecision(0.85, {}); expect(result.decision).toBe('PROCEED'); expect(result.requires_confirmation).toBe(false); }); test('should return REQUEST_CONFIRMATION for medium confidence', () => { const result = verifier._makeDecision(0.65, {}); expect(result.decision).toBe('REQUEST_CONFIRMATION'); expect(result.requires_confirmation).toBe(true); }); test('should return REQUEST_CLARIFICATION for low confidence', () => { const result = verifier._makeDecision(0.45, {}); expect(result.decision).toBe('REQUEST_CLARIFICATION'); }); test('should return BLOCK for very low confidence', () => { const result = verifier._makeDecision(0.2, {}); expect(result.decision).toBe('BLOCK'); }); }); describe('27027 Failure Mode Prevention', () => { test('should detect when action conflicts with explicit instruction', () => { const action = { type: 'database_connect', parameters: { port: 27017 } }; const reasoning = { explanation: 'Connecting to MongoDB on default port', evidence: ['MongoDB default is 27017'] }; const context = { explicit_instructions: [ { text: 'use port 27027', timestamp: new Date() } ] }; const result = verifier.verify(action, reasoning, context); expect(result.checks.alignment.passed).toBe(false); expect(result.decision).toMatch(/BLOCK|REQUEST_CLARIFICATION/); }); test('should approve when action matches explicit instruction', () => { const action = { type: 'database_connect', parameters: { port: 27027 } }; const reasoning = { explanation: 'Connecting to MongoDB on port 27027 as instructed', evidence: ['User explicitly said port 27027'] }; const context = { explicit_instructions: [ { text: 'use port 27027', timestamp: new Date() } ] }; const result = verifier.verify(action, reasoning, context); expect(result.checks.alignment.passed).toBe(true); expect(result.confidence).toBeGreaterThan(0.7); }); }); describe('Evidence Quality Assessment', () => { test('should assess evidence quality', () => { const reasoning = { explanation: 'Action is needed', evidence: [ 'User explicitly requested this', 'Documentation confirms approach', 'Tests validate correctness' ] }; const quality = verifier._assessEvidenceQuality(reasoning); expect(quality).toBeGreaterThan(0.7); }); test('should penalize weak evidence', () => { const reasoning = { explanation: 'Action is needed', evidence: [ 'I think this is right', 'Maybe this works' ] }; const quality = verifier._assessEvidenceQuality(reasoning); expect(quality).toBeLessThan(0.5); }); test('should penalize missing evidence', () => { const reasoning = { explanation: 'Action is needed', evidence: [] }; const quality = verifier._assessEvidenceQuality(reasoning); expect(quality).toBeLessThan(0.3); }); }); describe('Edge Cases', () => { test('should handle null action gracefully', () => { expect(() => { verifier.verify(null, { explanation: 'test' }, {}); }).not.toThrow(); const result = verifier.verify(null, { explanation: 'test' }, {}); expect(result.decision).toBe('BLOCK'); }); test('should handle null reasoning gracefully', () => { expect(() => { verifier.verify({ type: 'test' }, null, {}); }).not.toThrow(); const result = verifier.verify({ type: 'test' }, null, {}); expect(result.decision).toBe('BLOCK'); }); test('should handle empty context gracefully', () => { const action = { type: 'test' }; const reasoning = { explanation: 'test' }; const result = verifier.verify(action, reasoning, {}); expect(result).toBeDefined(); expect(result.decision).toBeDefined(); }); }); describe('Detailed Failure Analysis', () => { test('should provide detailed analysis for failed verifications', () => { const action = { type: 'risky_operation' }; const reasoning = { explanation: 'unclear reasoning' }; const result = verifier.verify(action, reasoning, {}); expect(result.analysis).toBeDefined(); expect(result.analysis.failed_checks).toBeDefined(); expect(result.analysis.recommendations).toBeDefined(); }); test('should suggest improvements for low-confidence actions', () => { const action = { type: 'moderate_operation' }; const reasoning = { explanation: 'Basic explanation', evidence: ['one piece of evidence'] }; const result = verifier.verify(action, reasoning, {}); if (result.confidence < 0.7) { expect(result.suggestions).toBeDefined(); expect(result.suggestions.length).toBeGreaterThan(0); } }); }); describe('Singleton Pattern', () => { test('should export singleton instance with required methods', () => { expect(typeof verifier.verify).toBe('function'); expect(typeof verifier.getStats).toBe('function'); }); test('should maintain verification history across calls', () => { verifier.verify({ type: 'test' }, { explanation: 'test' }, {}); const stats = verifier.getStats(); expect(stats.total_verifications).toBeDefined(); }); }); describe('Statistics Tracking', () => { test('should track verification statistics', () => { const stats = verifier.getStats(); expect(stats).toHaveProperty('total_verifications'); expect(stats).toHaveProperty('by_decision'); expect(stats).toHaveProperty('average_confidence'); }); test('should increment verification count after verify()', () => { const before = verifier.getStats().total_verifications; verifier.verify( { type: 'test' }, { explanation: 'test' }, {} ); const after = verifier.getStats().total_verifications; expect(after).toBe(before + 1); }); test('should track decision distribution', () => { verifier.verify( { type: 'safe', parameters: {} }, { explanation: 'safe', evidence: ['good evidence'], steps: ['step 1'], alternatives_considered: ['alt'] }, {} ); verifier.verify( { type: 'unsafe' }, { explanation: 'unclear' }, {} ); const stats = verifier.getStats(); expect(stats.by_decision.PROCEED + stats.by_decision.BLOCK + stats.by_decision.REQUEST_CONFIRMATION + stats.by_decision.REQUEST_CLARIFICATION).toBeGreaterThan(0); }); test('should calculate average confidence over time', () => { verifier.verify({ type: 'test1' }, { explanation: 'good', evidence: ['a', 'b'], steps: ['1'], alternatives_considered: ['x'] }, {}); verifier.verify({ type: 'test2' }, { explanation: 'poor' }, {}); const stats = verifier.getStats(); expect(stats.average_confidence).toBeGreaterThan(0); expect(stats.average_confidence).toBeLessThan(1); }); }); describe('Reasoning Quality Metrics', () => { test('should score high-quality reasoning highly', () => { const reasoning = { explanation: 'Detailed explanation with clear reasoning about why this action is needed and how it aligns with user intent', evidence: [ 'User explicitly requested this action', 'Documentation supports this approach', 'Previous similar actions succeeded' ], steps: [ 'Validate preconditions', 'Execute action', 'Verify results', 'Report completion' ], alternatives_considered: [ 'Alternative A: rejected because X', 'Alternative B: rejected because Y', 'Chosen approach: best because Z' ] }; const score = verifier._assessReasoningQuality(reasoning); expect(score).toBeGreaterThan(0.8); }); test('should score low-quality reasoning poorly', () => { const reasoning = { explanation: 'Do it', evidence: [], steps: [] }; const score = verifier._assessReasoningQuality(reasoning); expect(score).toBeLessThan(0.3); }); }); describe('Context-Aware Verification', () => { test('should consider recent errors in verification', () => { const action = { type: 'database_operation' }; const reasoning = { explanation: 'database op' }; const errorContext = { errors_recent: 5, last_error_type: 'database_connection' }; const result = verifier.verify(action, reasoning, errorContext); // Should be more cautious after errors expect(result.confidence_adjustment).toBeLessThan(1.0); }); test('should consider conversation length in verification', () => { const action = { type: 'operation' }; const reasoning = { explanation: 'do operation' }; const longConversation = { conversation_length: 100 }; const result = verifier.verify(action, reasoning, longConversation); // Long conversations should increase scrutiny expect(result.confidence_adjustment).toBeLessThan(1.0); }); }); });