/** * Metacognitive Verifier Service * Implements AI self-verification before proposing actions * * Core Tractatus Service: Provides structured "pause and verify" mechanism * where AI checks its own reasoning before execution. * * Verification Checks: * 1. Alignment: Does action align with stated user goals? * 2. Coherence: Is reasoning internally consistent? * 3. Completeness: Are all requirements addressed? * 4. Safety: Could this action cause harm or confusion? * 5. Alternatives: Have better approaches been considered? */ const classifier = require('./InstructionPersistenceClassifier.service'); const validator = require('./CrossReferenceValidator.service'); const enforcer = require('./BoundaryEnforcer.service'); const monitor = require('./ContextPressureMonitor.service'); const logger = require('../utils/logger.util'); /** * Verification dimensions */ const VERIFICATION_DIMENSIONS = { ALIGNMENT: { name: 'Alignment', description: 'Action aligns with user goals and explicit instructions', weight: 0.3, criticalThreshold: 0.7 }, COHERENCE: { name: 'Coherence', description: 'Reasoning is internally consistent and logical', weight: 0.2, criticalThreshold: 0.7 }, COMPLETENESS: { name: 'Completeness', description: 'All requirements and constraints addressed', weight: 0.2, criticalThreshold: 0.8 }, SAFETY: { name: 'Safety', description: 'Action will not cause harm, confusion, or data loss', weight: 0.2, criticalThreshold: 0.9 }, ALTERNATIVES: { name: 'Alternatives', description: 'Better alternative approaches have been considered', weight: 0.1, criticalThreshold: 0.6 } }; /** * Confidence levels */ const CONFIDENCE_LEVELS = { HIGH: { min: 0.8, action: 'PROCEED', description: 'High confidence, proceed' }, MEDIUM: { min: 0.6, action: 'PROCEED_WITH_CAUTION', description: 'Medium confidence, proceed with notification' }, LOW: { min: 0.4, action: 'REQUEST_CONFIRMATION', description: 'Low confidence, request user confirmation' }, VERY_LOW: { min: 0.0, action: 'REQUIRE_REVIEW', description: 'Very low confidence, require human review' } }; class MetacognitiveVerifier { constructor() { this.dimensions = VERIFICATION_DIMENSIONS; this.confidenceLevels = CONFIDENCE_LEVELS; this.classifier = classifier; this.validator = validator; this.enforcer = enforcer; this.monitor = monitor; // Statistics tracking this.stats = { total_verifications: 0, by_decision: { PROCEED: 0, REQUEST_CONFIRMATION: 0, REQUEST_CLARIFICATION: 0, BLOCK: 0 }, average_confidence: 0, total_confidence_sum: 0 }; logger.info('MetacognitiveVerifier initialized'); } /** * Verify a proposed action before execution * @param {Object} action - The proposed action * @param {Object} reasoning - The reasoning behind the action * @param {Object} context - Conversation/session context * @returns {Object} Verification result */ verify(action, reasoning, context) { try { // Run all verification checks const alignmentScore = this._checkAlignment(action, reasoning, context); const coherenceScore = this._checkCoherence(action, reasoning, context); const completenessScore = this._checkCompleteness(action, reasoning, context); const safetyScore = this._checkSafety(action, reasoning, context); const alternativesScore = this._checkAlternatives(action, reasoning, context); // Calculate weighted confidence score const scores = { alignment: alignmentScore, coherence: coherenceScore, completeness: completenessScore, safety: safetyScore, alternatives: alternativesScore }; const confidence = this._calculateConfidence(scores); // Determine confidence level const confidenceLevel = this._determineConfidenceLevel(confidence); // Check for critical failures const criticalFailures = this._checkCriticalFailures(scores); // Get pressure analysis const pressureAnalysis = this.monitor.analyzePressure(context); // Adjust confidence based on pressure const adjustedConfidence = this._adjustForPressure( confidence, pressureAnalysis ); // Generate verification result const decision = this._makeVerificationDecision( adjustedConfidence, criticalFailures, pressureAnalysis ); const verification = { confidence: adjustedConfidence, originalConfidence: confidence, level: confidenceLevel.action, description: confidenceLevel.description, checks: { alignment: { passed: alignmentScore.score >= 0.7, score: alignmentScore.score, issues: alignmentScore.issues || [] }, coherence: { passed: coherenceScore.score >= 0.7, score: coherenceScore.score, issues: coherenceScore.issues || [] }, completeness: { passed: completenessScore.score >= 0.8, score: completenessScore.score, missing_considerations: completenessScore.missing || [] }, safety: { passed: safetyScore.score >= 0.9, score: safetyScore.score, risk_level: safetyScore.riskLevel || 'UNKNOWN', concerns: safetyScore.concerns || [] }, alternatives: { passed: alternativesScore.score >= 0.6, score: alternativesScore.score, issues: alternativesScore.issues || [] } }, scores, criticalFailures, pressureLevel: pressureAnalysis.pressureName, pressure_adjustment: adjustedConfidence - confidence, confidence_adjustment: adjustedConfidence - confidence, pressureAdjustment: adjustedConfidence - confidence, threshold_adjusted: pressureAnalysis.pressureName !== 'NORMAL', required_confidence: pressureAnalysis.pressureName === 'CRITICAL' ? 0.8 : 0.6, requires_confirmation: decision === 'REQUEST_CONFIRMATION', recommendations: this._generateRecommendations( scores, criticalFailures, pressureAnalysis ), decision, reason: decision !== 'PROCEED' ? this._getDecisionReason(decision, scores, criticalFailures) : undefined, analysis: { failed_checks: criticalFailures.map(cf => cf.dimension), recommendations: this._generateRecommendations(scores, criticalFailures, pressureAnalysis) }, suggestions: decision !== 'PROCEED' ? this._generateSuggestions(scores, criticalFailures) : undefined, timestamp: new Date() }; // Track statistics this.stats.total_verifications++; this.stats.total_confidence_sum += adjustedConfidence; this.stats.average_confidence = this.stats.total_confidence_sum / this.stats.total_verifications; if (this.stats.by_decision[decision] !== undefined) { this.stats.by_decision[decision]++; } // Log verification if (verification.decision !== 'PROCEED') { logger.warn('Action verification flagged', { action: action.description?.substring(0, 50), decision: verification.decision, confidence: adjustedConfidence }); } return verification; } catch (error) { logger.error('Verification error:', error); return this._failSafeVerification(action); } } /** * Quick verification for low-risk actions */ quickVerify(action, context) { // Simplified verification for routine actions const boundaryCheck = this.enforcer.enforce(action, context); const pressureCheck = this.monitor.shouldProceed(action, context); if (!boundaryCheck.allowed || !pressureCheck.proceed) { return { confidence: 0.3, level: 'REQUIRE_REVIEW', decision: 'BLOCK', reason: 'Failed boundary or pressure check', timestamp: new Date() }; } return { confidence: 0.7, level: 'PROCEED', decision: 'PROCEED', quickCheck: true, timestamp: new Date() }; } /** * Private verification methods */ _checkAlignment(action, reasoning, context) { let score = 0.5; // Base score const issues = []; // Check if action parameters conflict with reasoning if (action.parameters && reasoning.explanation) { const paramConflict = this._checkParameterConflicts(action.parameters, reasoning); if (paramConflict) { score -= 0.4; issues.push('action parameters conflict with reasoning'); } } // Check cross-reference validation const validation = this.validator.validate(action, context); if (validation.status === 'APPROVED') { score += 0.3; } else if (validation.status === 'WARNING') { score += 0.1; issues.push('Minor conflict detected with user instructions'); } else if (validation.status === 'REJECTED') { score -= 0.3; issues.push('Action conflicts with explicit user instructions'); } // Check if action addresses stated user goal if (reasoning.userGoal && reasoning.addresses) { score += 0.2; } // Check consistency with recent user statements if (context.recentUserStatements) { const consistencyScore = this._checkConsistencyWithStatements( action, context.recentUserStatements ); score += consistencyScore * 0.2; if (consistencyScore < 0.5) { issues.push('Action may not be consistent with recent user statements'); } } return { score: Math.min(1.0, Math.max(0.0, score)), issues }; } _checkCoherence(action, reasoning, context) { let score = 0.7; // Default to reasonable coherence const issues = []; // Check if reasoning steps are provided if (!reasoning.steps || reasoning.steps.length === 0) { score -= 0.2; issues.push('No reasoning steps provided'); } // Check for logical consistency if (reasoning.assumptions && reasoning.conclusions) { const logicallySound = this._checkLogicalFlow( reasoning.assumptions, reasoning.conclusions ); if (logicallySound) { score += 0.2; } else { score -= 0.3; issues.push('Logical inconsistency detected between assumptions and conclusions'); } } // Check for internal contradictions if (this._hasContradictions(reasoning)) { score -= 0.4; issues.push('Internal contradictions detected in reasoning'); } return { score: Math.min(1.0, Math.max(0.0, score)), issues }; } _checkCompleteness(action, reasoning, context) { let score = 0.5; // Base score const missing = []; // Check if reasoning has steps if (reasoning.steps && reasoning.steps.length > 0) { score += 0.2; // Check for quality of steps (comprehensive coverage) const stepCount = reasoning.steps.length; if (stepCount >= 4) { score += 0.2; // Comprehensive steps } else if (stepCount < 2) { score -= 0.1; // Too few steps missing.push('insufficient steps provided'); } // For deployment actions, check for critical steps if (action.type === 'deploy' || action.parameters?.environment === 'production') { const stepsText = reasoning.steps.join(' ').toLowerCase(); if (!stepsText.includes('test')) { missing.push('testing'); score -= 0.2; } if (!stepsText.includes('backup')) { missing.push('backup'); score -= 0.1; } } } else { missing.push('No reasoning steps provided'); score -= 0.2; } // Check if all stated requirements are addressed if (context.requirements) { const unaddressed = context.requirements.filter(req => !this._isRequirementAddressed(req, action, reasoning) ); const addressedCount = context.requirements.length - unaddressed.length; score += (addressedCount / context.requirements.length) * 0.2; unaddressed.forEach(req => missing.push(`Requirement not addressed: ${req}`)); } // Check for edge cases consideration if (reasoning.edgeCases && reasoning.edgeCases.length > 0) { score += 0.1; } // Check for error handling if (reasoning.errorHandling || action.errorHandling) { score += 0.1; } return { score: Math.min(1.0, Math.max(0.0, score)), missing }; } _checkSafety(action, reasoning, context) { let score = 0.9; // Start with safe assumption const concerns = []; let riskLevel = 'LOW'; // Check boundary enforcement const boundaryCheck = this.enforcer.enforce(action, context); if (!boundaryCheck.allowed) { score -= 0.5; // Major safety concern concerns.push('Action violates Tractatus boundaries'); riskLevel = 'CRITICAL'; } // Check for destructive operations in action type or description const destructivePatterns = [ /delete|remove|drop|truncate/i, /force|--force|-f\s/i, /rm\s+-rf/i ]; const actionText = (action.type || '') + ' ' + (action.description || '') + ' ' + (action.command || ''); const isDestructive = destructivePatterns.some(pattern => pattern.test(actionText)); if (isDestructive) { score -= 0.3; concerns.push('destructive operation'); riskLevel = 'HIGH'; } // Check if data backup is mentioned for risky operations if (score < 0.7 && !reasoning.backupMentioned) { score -= 0.1; concerns.push('No backup mentioned for risky operation'); } // Check for validation before execution if (action.requiresValidation && !reasoning.validationPlanned) { score -= 0.1; concerns.push('No validation planned before execution'); } return { score: Math.min(1.0, Math.max(0.0, score)), riskLevel, concerns }; } _checkAlternatives(action, reasoning, context) { let score = 0.5; // Base score const issues = []; // Support both camelCase and snake_case for alternatives const alternatives = reasoning.alternativesConsidered || reasoning.alternatives_considered; const explored = reasoning.explored; // Check if alternatives were considered if (alternatives && alternatives.length > 0) { score += 0.3; } else { issues.push('No alternatives considered'); } // Check if rationale for chosen approach is provided if (reasoning.chosenBecause || reasoning.chosen_because) { score += 0.2; } else { issues.push('No rationale provided for chosen approach'); } // Lower score if action seems like first idea without exploration if (!alternatives && !explored) { score -= 0.2; issues.push('Appears to be first idea without exploration'); } return { score: Math.min(1.0, Math.max(0.0, score)), issues }; } _calculateConfidence(scores) { let confidence = 0; for (const [dimension, dimensionConfig] of Object.entries(this.dimensions)) { const key = dimension.toLowerCase(); const scoreData = scores[key]; // Handle both object format {score: X} and legacy number format const score = typeof scoreData === 'object' ? (scoreData.score || 0.5) : (scoreData || 0.5); confidence += score * dimensionConfig.weight; } return Math.min(1.0, Math.max(0.0, confidence)); } _determineConfidenceLevel(confidence) { if (confidence >= CONFIDENCE_LEVELS.HIGH.min) { return CONFIDENCE_LEVELS.HIGH; } if (confidence >= CONFIDENCE_LEVELS.MEDIUM.min) { return CONFIDENCE_LEVELS.MEDIUM; } if (confidence >= CONFIDENCE_LEVELS.LOW.min) { return CONFIDENCE_LEVELS.LOW; } return CONFIDENCE_LEVELS.VERY_LOW; } _checkCriticalFailures(scores) { const failures = []; for (const [dimension, config] of Object.entries(this.dimensions)) { const key = dimension.toLowerCase(); const scoreData = scores[key]; // Handle both object format {score: X} and legacy number format const score = typeof scoreData === 'object' ? (scoreData.score || 0.5) : (scoreData || 0.5); if (score < config.criticalThreshold) { failures.push({ dimension: config.name, score, threshold: config.criticalThreshold, severity: score < 0.3 ? 'CRITICAL' : 'WARNING' }); } } return failures; } _adjustForPressure(confidence, pressureAnalysis) { // Reduce confidence based on pressure level const pressureReduction = { NORMAL: 0, ELEVATED: 0.05, HIGH: 0.10, CRITICAL: 0.15, DANGEROUS: 0.25 }; const reduction = pressureReduction[pressureAnalysis.pressureName] || 0; return Math.max(0.0, confidence - reduction); } _generateRecommendations(scores, criticalFailures, pressureAnalysis) { const recommendations = []; // Recommendations based on low scores for (const [key, score] of Object.entries(scores)) { if (score < 0.5) { const dimension = this.dimensions[key.toUpperCase()]; recommendations.push({ type: 'LOW_SCORE', dimension: dimension.name, score, message: `Low ${dimension.name.toLowerCase()} score - ${dimension.description}`, action: `Improve ${dimension.name.toLowerCase()} before proceeding` }); } } // Recommendations based on critical failures for (const failure of criticalFailures) { recommendations.push({ type: 'CRITICAL_FAILURE', dimension: failure.dimension, severity: failure.severity, message: `${failure.dimension} below critical threshold`, action: 'Address this issue before proceeding' }); } // Include pressure recommendations if (pressureAnalysis.recommendations) { recommendations.push(...pressureAnalysis.recommendations); } return recommendations; } _makeVerificationDecision(confidence, criticalFailures, pressureAnalysis) { // Block if critical failures if (criticalFailures.some(f => f.severity === 'CRITICAL')) { return 'BLOCK'; } // Block if dangerous pressure if (pressureAnalysis.pressureLevel >= 4) { return 'BLOCK'; } // Require review if very low confidence if (confidence < 0.4) { return 'REQUIRE_REVIEW'; } // Request confirmation if low confidence if (confidence < 0.6) { return 'REQUEST_CONFIRMATION'; } // Proceed with caution if medium confidence if (confidence < 0.8) { return 'PROCEED_WITH_CAUTION'; } // Proceed if high confidence return 'PROCEED'; } /** * Helper methods */ _checkConsistencyWithStatements(action, statements) { // Simplified consistency check return 0.5; // Default to neutral } _checkLogicalFlow(assumptions, conclusions) { // Simplified logical flow check return true; // Assume logical unless obviously not } _hasContradictions(reasoning) { // Check for contradictory statements in reasoning if (!reasoning.explanation && !reasoning.steps) { return false; } const text = (reasoning.explanation || '') + ' ' + (reasoning.steps || []).join(' '); const lower = text.toLowerCase(); // Simple contradiction patterns const contradictionPatterns = [ [/should use/i, /should not use/i], [/will use/i, /will not use/i], [/must.*true/i, /must.*false/i], [/enable/i, /disable/i] ]; for (const [pattern1, pattern2] of contradictionPatterns) { if (pattern1.test(text) && pattern2.test(text)) { return true; } } return false; } _checkParameterConflicts(parameters, reasoning) { // Check if parameter values in action conflict with reasoning // Only flag conflicts for explicit parameter assignments, not casual mentions const reasoningText = (reasoning.explanation || '') + ' ' + (reasoning.evidence || []).join(' '); for (const [key, value] of Object.entries(parameters)) { const valueStr = String(value); // Check for explicit parameter assignments only (key: value or key = value) // Pattern matches "port: 27017" or "port = 27017" but not "port read" const keyPattern = new RegExp(`\\b${key}\\s*[:=]\\s*([\\w.-]+)`, 'i'); const match = reasoningText.match(keyPattern); if (match && match[1] !== valueStr) { return true; // Conflict: reasoning explicitly assigns different value } } return false; } _isRequirementAddressed(requirement, action, reasoning) { // Simplified requirement matching const actionText = (action.description || '').toLowerCase(); const requirementText = requirement.toLowerCase(); return actionText.includes(requirementText); } _failSafeVerification(action) { return { confidence: 0.3, originalConfidence: 0.3, level: 'REQUIRE_REVIEW', description: 'Verification failed, requiring human review', decision: 'BLOCK', checks: { alignment: { passed: false, score: 0, issues: ['verification error'] }, coherence: { passed: false, score: 0, issues: ['verification error'] }, completeness: { passed: false, score: 0, missing_considerations: ['verification error'] }, safety: { passed: false, score: 0, risk_level: 'HIGH', concerns: ['verification error'] }, alternatives: { passed: false, score: 0, issues: ['verification error'] } }, scores: {}, criticalFailures: [{ dimension: 'ERROR', score: 0, threshold: 1, severity: 'CRITICAL' }], pressureLevel: 'ELEVATED', pressureAdjustment: 0, recommendations: [{ type: 'ERROR', severity: 'CRITICAL', message: 'Verification process encountered error', action: 'Require human review before proceeding' }], timestamp: new Date() }; } /** * Get decision reason (exposed for tests) */ _getDecisionReason(decision, scores, criticalFailures) { if (decision === 'BLOCK') { return 'Critical failures detected: ' + criticalFailures.map(cf => cf.dimension).join(', '); } if (decision === 'REQUEST_CLARIFICATION') { return 'Low confidence in alignment or completeness'; } if (decision === 'REQUEST_CONFIRMATION') { return 'Moderate confidence, user confirmation recommended'; } return 'Proceeding with high confidence'; } /** * Generate suggestions for improvement (exposed for tests) */ _generateSuggestions(scores, criticalFailures) { const suggestions = []; if (scores.alignment && scores.alignment.score < 0.7) { suggestions.push('Clarify how this action aligns with user goals'); } if (scores.coherence && scores.coherence.score < 0.7) { suggestions.push('Review reasoning for logical consistency'); } if (scores.completeness && scores.completeness.score < 0.8) { suggestions.push('Ensure all requirements are addressed'); } if (scores.safety && scores.safety.score < 0.9) { suggestions.push('Verify safety implications of this action'); } if (scores.alternatives && scores.alternatives.score < 0.6) { suggestions.push('Consider alternative approaches'); } return suggestions; } /** * Assess evidence quality (exposed for tests) */ _assessEvidenceQuality(reasoning) { if (!reasoning || !reasoning.evidence) return 0.0; const evidence = reasoning.evidence; if (!Array.isArray(evidence) || evidence.length === 0) return 0.0; let qualityScore = 0; // Check for explicit user instructions const hasExplicit = evidence.some(e => typeof e === 'string' && /user\s+(explicitly|specifically|said|requested|instructed)/i.test(e) ); if (hasExplicit) qualityScore += 0.4; // Check for documentation references const hasDocs = evidence.some(e => typeof e === 'string' && /documentation|docs|spec|standard/i.test(e) ); if (hasDocs) qualityScore += 0.3; // Check for testing/validation const hasValidation = evidence.some(e => typeof e === 'string' && /test|validate|verify|confirm/i.test(e) ); if (hasValidation) qualityScore += 0.3; // Penalize weak evidence const hasWeak = evidence.some(e => typeof e === 'string' && /think|maybe|probably|assume/i.test(e) ); if (hasWeak) qualityScore -= 0.3; return Math.max(0, Math.min(1, qualityScore)); } /** * Assess reasoning quality (exposed for tests) */ _assessReasoningQuality(reasoning) { if (!reasoning) return 0.0; let score = 0; // Check explanation quality if (reasoning.explanation) { const length = reasoning.explanation.length; if (length > 100) score += 0.3; else if (length > 50) score += 0.1; } // Check evidence const evidenceScore = this._assessEvidenceQuality(reasoning); score += evidenceScore * 0.4; // Check steps if (reasoning.steps && Array.isArray(reasoning.steps) && reasoning.steps.length > 0) { score += Math.min(0.3, reasoning.steps.length * 0.1); } // Check alternatives if (reasoning.alternatives_considered && reasoning.alternatives_considered.length > 0) { score += 0.2; } return Math.min(1.0, score); } /** * Make verification decision (exposed for tests) */ _makeDecision(confidence, context) { const pressureLevel = context.pressure_level || 'NORMAL'; // Block at dangerous pressure regardless of confidence if (pressureLevel === 'DANGEROUS') { return { decision: 'BLOCK', requires_confirmation: true }; } // Adjust thresholds based on pressure const proceedThreshold = pressureLevel === 'CRITICAL' ? 0.8 : 0.7; const confirmThreshold = pressureLevel === 'HIGH' ? 0.6 : 0.5; if (confidence >= proceedThreshold) { return { decision: 'PROCEED', requires_confirmation: false }; } else if (confidence >= confirmThreshold) { return { decision: 'REQUEST_CONFIRMATION', requires_confirmation: true }; } else if (confidence >= 0.3) { return { decision: 'REQUEST_CLARIFICATION', requires_confirmation: true }; } else { return { decision: 'BLOCK', requires_confirmation: true }; } } /** * Get verification statistics * @returns {Object} Statistics object */ getStats() { return { ...this.stats, timestamp: new Date() }; } } // Singleton instance const verifier = new MetacognitiveVerifier(); module.exports = verifier;