From ecb55994b39b939c9fa0048a31312ad90dc95931 Mon Sep 17 00:00:00 2001 From: TheFlow Date: Tue, 7 Oct 2025 08:33:29 +1300 Subject: [PATCH] fix: refactor MetacognitiveVerifier check methods to return structured objects MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit MetacognitiveVerifier improvements (48.8% → 56.1% pass rate): 1. Refactored All Check Methods to Return Objects - _checkAlignment(): Returns {score, issues[]} - _checkCoherence(): Returns {score, issues[]} - _checkCompleteness(): Returns {score, missing[]} - _checkSafety(): Returns {score, riskLevel, concerns[]} - _checkAlternatives(): Returns {score, issues[]} 2. Updated Helper Methods for Backward Compatibility - _calculateConfidence(): Handles both object {score: X} and legacy number formats - _checkCriticalFailures(): Extracts .score from objects or uses legacy numbers 3. Enhanced Diagnostic Information - Alignment: Tracks specific conflicts with instructions - Coherence: Identifies missing steps and logical inconsistencies - Completeness: Lists unaddressed requirements, missing error handling - Safety: Categorizes risk levels (LOW/MEDIUM/CRITICAL), lists concerns - Alternatives: Notes missing exploration and rationale Test Results: - MetacognitiveVerifier: 23/41 passing (56.1%, +7.3%) - Overall: 108/192 (56.25%, +3 tests from 105/192) The structured return values provide detailed context for test assertions and enable richer verification feedback in production use. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude --- src/services/MetacognitiveVerifier.service.js | 71 ++++++++++++++++--- 1 file changed, 61 insertions(+), 10 deletions(-) diff --git a/src/services/MetacognitiveVerifier.service.js b/src/services/MetacognitiveVerifier.service.js index 69b03d67..5b0e73f5 100644 --- a/src/services/MetacognitiveVerifier.service.js +++ b/src/services/MetacognitiveVerifier.service.js @@ -233,6 +233,7 @@ class MetacognitiveVerifier { _checkAlignment(action, reasoning, context) { let score = 0.5; // Base score + const issues = []; // Check cross-reference validation const validation = this.validator.validate(action, context); @@ -240,8 +241,10 @@ class MetacognitiveVerifier { score += 0.3; } else if (validation.status === 'WARNING') { score += 0.1; + issues.push('Minor conflict detected with user instructions'); } else if (validation.status === 'REJECTED') { score -= 0.3; + issues.push('Action conflicts with explicit user instructions'); } // Check if action addresses stated user goal @@ -256,17 +259,25 @@ class MetacognitiveVerifier { context.recentUserStatements ); score += consistencyScore * 0.2; + if (consistencyScore < 0.5) { + issues.push('Action may not be consistent with recent user statements'); + } } - return Math.min(1.0, Math.max(0.0, score)); + return { + score: Math.min(1.0, Math.max(0.0, score)), + issues + }; } _checkCoherence(action, reasoning, context) { let score = 0.7; // Default to reasonable coherence + const issues = []; // Check if reasoning steps are provided if (!reasoning.steps || reasoning.steps.length === 0) { score -= 0.2; + issues.push('No reasoning steps provided'); } // Check for logical consistency @@ -279,48 +290,67 @@ class MetacognitiveVerifier { score += 0.2; } else { score -= 0.3; + issues.push('Logical inconsistency detected between assumptions and conclusions'); } } // Check for internal contradictions if (this._hasContradictions(reasoning)) { score -= 0.4; + issues.push('Internal contradictions detected in reasoning'); } - return Math.min(1.0, Math.max(0.0, score)); + return { + score: Math.min(1.0, Math.max(0.0, score)), + issues + }; } _checkCompleteness(action, reasoning, context) { let score = 0.6; // Base score + const missing = []; // Check if all stated requirements are addressed if (context.requirements) { - const addressedCount = context.requirements.filter(req => - this._isRequirementAddressed(req, action, reasoning) - ).length; + const unaddressed = context.requirements.filter(req => + !this._isRequirementAddressed(req, action, reasoning) + ); + const addressedCount = context.requirements.length - unaddressed.length; score += (addressedCount / context.requirements.length) * 0.3; + unaddressed.forEach(req => missing.push(`Requirement not addressed: ${req}`)); } // Check for edge cases consideration if (reasoning.edgeCases && reasoning.edgeCases.length > 0) { score += 0.1; + } else { + missing.push('No edge cases considered'); } // Check for error handling if (reasoning.errorHandling || action.errorHandling) { score += 0.1; + } else { + missing.push('No error handling specified'); } - return Math.min(1.0, Math.max(0.0, score)); + return { + score: Math.min(1.0, Math.max(0.0, score)), + missing + }; } _checkSafety(action, reasoning, context) { let score = 0.8; // Default to safe unless red flags + const concerns = []; + let riskLevel = 'LOW'; // Check boundary enforcement const boundaryCheck = this.enforcer.enforce(action, context); if (!boundaryCheck.allowed) { score -= 0.5; // Major safety concern + concerns.push('Action violates Tractatus boundaries'); + riskLevel = 'CRITICAL'; } // Check for destructive operations @@ -334,6 +364,8 @@ class MetacognitiveVerifier { for (const pattern of destructivePatterns) { if (pattern.test(actionText)) { score -= 0.2; + concerns.push('Destructive operation detected'); + riskLevel = riskLevel === 'LOW' ? 'MEDIUM' : riskLevel; break; } } @@ -341,35 +373,50 @@ class MetacognitiveVerifier { // Check if data backup is mentioned for risky operations if (score < 0.7 && !reasoning.backupMentioned) { score -= 0.1; + concerns.push('No backup mentioned for risky operation'); } // Check for validation before execution if (action.requiresValidation && !reasoning.validationPlanned) { score -= 0.1; + concerns.push('No validation planned before execution'); } - return Math.min(1.0, Math.max(0.0, score)); + return { + score: Math.min(1.0, Math.max(0.0, score)), + riskLevel, + concerns + }; } _checkAlternatives(action, reasoning, context) { let score = 0.5; // Base score + const issues = []; // Check if alternatives were considered if (reasoning.alternativesConsidered && reasoning.alternativesConsidered.length > 0) { score += 0.3; + } else { + issues.push('No alternatives considered'); } // Check if rationale for chosen approach is provided if (reasoning.chosenBecause) { score += 0.2; + } else { + issues.push('No rationale provided for chosen approach'); } // Lower score if action seems like first idea without exploration if (!reasoning.alternativesConsidered && !reasoning.explored) { score -= 0.2; + issues.push('Appears to be first idea without exploration'); } - return Math.min(1.0, Math.max(0.0, score)); + return { + score: Math.min(1.0, Math.max(0.0, score)), + issues + }; } _calculateConfidence(scores) { @@ -377,7 +424,9 @@ class MetacognitiveVerifier { for (const [dimension, dimensionConfig] of Object.entries(this.dimensions)) { const key = dimension.toLowerCase(); - const score = scores[key] || 0.5; + const scoreData = scores[key]; + // Handle both object format {score: X} and legacy number format + const score = typeof scoreData === 'object' ? (scoreData.score || 0.5) : (scoreData || 0.5); confidence += score * dimensionConfig.weight; } @@ -402,7 +451,9 @@ class MetacognitiveVerifier { for (const [dimension, config] of Object.entries(this.dimensions)) { const key = dimension.toLowerCase(); - const score = scores[key]; + const scoreData = scores[key]; + // Handle both object format {score: X} and legacy number format + const score = typeof scoreData === 'object' ? (scoreData.score || 0.5) : (scoreData || 0.5); if (score < config.criticalThreshold) { failures.push({