tractatus/src/services/MetacognitiveVerifier.service.js
TheFlow ecb55994b3 fix: refactor MetacognitiveVerifier check methods to return structured objects
MetacognitiveVerifier improvements (48.8% → 56.1% pass rate):

1. Refactored All Check Methods to Return Objects
   - _checkAlignment(): Returns {score, issues[]}
   - _checkCoherence(): Returns {score, issues[]}
   - _checkCompleteness(): Returns {score, missing[]}
   - _checkSafety(): Returns {score, riskLevel, concerns[]}
   - _checkAlternatives(): Returns {score, issues[]}

2. Updated Helper Methods for Backward Compatibility
   - _calculateConfidence(): Handles both object {score: X} and legacy number formats
   - _checkCriticalFailures(): Extracts .score from objects or uses legacy numbers

3. Enhanced Diagnostic Information
   - Alignment: Tracks specific conflicts with instructions
   - Coherence: Identifies missing steps and logical inconsistencies
   - Completeness: Lists unaddressed requirements, missing error handling
   - Safety: Categorizes risk levels (LOW/MEDIUM/CRITICAL), lists concerns
   - Alternatives: Notes missing exploration and rationale

Test Results:
- MetacognitiveVerifier: 23/41 passing (56.1%, +7.3%)
- Overall: 108/192 (56.25%, +3 tests from 105/192)

The structured return values provide detailed context for test assertions
and enable richer verification feedback in production use.

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>
2025-10-07 08:33:29 +13:00

762 lines
23 KiB
JavaScript

/**
* Metacognitive Verifier Service
* Implements AI self-verification before proposing actions
*
* Core Tractatus Service: Provides structured "pause and verify" mechanism
* where AI checks its own reasoning before execution.
*
* Verification Checks:
* 1. Alignment: Does action align with stated user goals?
* 2. Coherence: Is reasoning internally consistent?
* 3. Completeness: Are all requirements addressed?
* 4. Safety: Could this action cause harm or confusion?
* 5. Alternatives: Have better approaches been considered?
*/
const classifier = require('./InstructionPersistenceClassifier.service');
const validator = require('./CrossReferenceValidator.service');
const enforcer = require('./BoundaryEnforcer.service');
const monitor = require('./ContextPressureMonitor.service');
const logger = require('../utils/logger.util');
/**
* Verification dimensions
*/
const VERIFICATION_DIMENSIONS = {
ALIGNMENT: {
name: 'Alignment',
description: 'Action aligns with user goals and explicit instructions',
weight: 0.3,
criticalThreshold: 0.7
},
COHERENCE: {
name: 'Coherence',
description: 'Reasoning is internally consistent and logical',
weight: 0.2,
criticalThreshold: 0.7
},
COMPLETENESS: {
name: 'Completeness',
description: 'All requirements and constraints addressed',
weight: 0.2,
criticalThreshold: 0.8
},
SAFETY: {
name: 'Safety',
description: 'Action will not cause harm, confusion, or data loss',
weight: 0.2,
criticalThreshold: 0.9
},
ALTERNATIVES: {
name: 'Alternatives',
description: 'Better alternative approaches have been considered',
weight: 0.1,
criticalThreshold: 0.6
}
};
/**
* Confidence levels
*/
const CONFIDENCE_LEVELS = {
HIGH: { min: 0.8, action: 'PROCEED', description: 'High confidence, proceed' },
MEDIUM: { min: 0.6, action: 'PROCEED_WITH_CAUTION', description: 'Medium confidence, proceed with notification' },
LOW: { min: 0.4, action: 'REQUEST_CONFIRMATION', description: 'Low confidence, request user confirmation' },
VERY_LOW: { min: 0.0, action: 'REQUIRE_REVIEW', description: 'Very low confidence, require human review' }
};
class MetacognitiveVerifier {
constructor() {
this.dimensions = VERIFICATION_DIMENSIONS;
this.confidenceLevels = CONFIDENCE_LEVELS;
this.classifier = classifier;
this.validator = validator;
this.enforcer = enforcer;
this.monitor = monitor;
// Statistics tracking
this.stats = {
total_verifications: 0,
by_decision: {
PROCEED: 0,
REQUEST_CONFIRMATION: 0,
REQUEST_CLARIFICATION: 0,
BLOCK: 0
},
average_confidence: 0,
total_confidence_sum: 0
};
logger.info('MetacognitiveVerifier initialized');
}
/**
* Verify a proposed action before execution
* @param {Object} action - The proposed action
* @param {Object} reasoning - The reasoning behind the action
* @param {Object} context - Conversation/session context
* @returns {Object} Verification result
*/
verify(action, reasoning, context) {
try {
// Run all verification checks
const alignmentScore = this._checkAlignment(action, reasoning, context);
const coherenceScore = this._checkCoherence(action, reasoning, context);
const completenessScore = this._checkCompleteness(action, reasoning, context);
const safetyScore = this._checkSafety(action, reasoning, context);
const alternativesScore = this._checkAlternatives(action, reasoning, context);
// Calculate weighted confidence score
const scores = {
alignment: alignmentScore,
coherence: coherenceScore,
completeness: completenessScore,
safety: safetyScore,
alternatives: alternativesScore
};
const confidence = this._calculateConfidence(scores);
// Determine confidence level
const confidenceLevel = this._determineConfidenceLevel(confidence);
// Check for critical failures
const criticalFailures = this._checkCriticalFailures(scores);
// Get pressure analysis
const pressureAnalysis = this.monitor.analyzePressure(context);
// Adjust confidence based on pressure
const adjustedConfidence = this._adjustForPressure(
confidence,
pressureAnalysis
);
// Generate verification result
const decision = this._makeVerificationDecision(
adjustedConfidence,
criticalFailures,
pressureAnalysis
);
const verification = {
confidence: adjustedConfidence,
originalConfidence: confidence,
level: confidenceLevel.action,
description: confidenceLevel.description,
checks: {
alignment: { passed: alignmentScore.score >= 0.7, score: alignmentScore.score, issues: alignmentScore.issues || [] },
coherence: { passed: coherenceScore.score >= 0.7, score: coherenceScore.score, issues: coherenceScore.issues || [] },
completeness: { passed: completenessScore.score >= 0.8, score: completenessScore.score, missing_considerations: completenessScore.missing || [] },
safety: { passed: safetyScore.score >= 0.9, score: safetyScore.score, risk_level: safetyScore.riskLevel || 'UNKNOWN', concerns: safetyScore.concerns || [] },
alternatives: { passed: alternativesScore.score >= 0.6, score: alternativesScore.score, issues: alternativesScore.issues || [] }
},
scores,
criticalFailures,
pressureLevel: pressureAnalysis.pressureName,
pressure_adjustment: adjustedConfidence - confidence,
confidence_adjustment: adjustedConfidence - confidence,
pressureAdjustment: adjustedConfidence - confidence,
threshold_adjusted: pressureAnalysis.pressureName !== 'NORMAL',
required_confidence: pressureAnalysis.pressureName === 'CRITICAL' ? 0.8 : 0.6,
requires_confirmation: decision === 'REQUEST_CONFIRMATION',
recommendations: this._generateRecommendations(
scores,
criticalFailures,
pressureAnalysis
),
decision,
reason: decision !== 'PROCEED' ? this._getDecisionReason(decision, scores, criticalFailures) : undefined,
analysis: {
failed_checks: criticalFailures.map(cf => cf.dimension),
recommendations: this._generateRecommendations(scores, criticalFailures, pressureAnalysis)
},
suggestions: decision !== 'PROCEED' ? this._generateSuggestions(scores, criticalFailures) : undefined,
timestamp: new Date()
};
// Track statistics
this.stats.total_verifications++;
this.stats.total_confidence_sum += adjustedConfidence;
this.stats.average_confidence = this.stats.total_confidence_sum / this.stats.total_verifications;
if (this.stats.by_decision[decision] !== undefined) {
this.stats.by_decision[decision]++;
}
// Log verification
if (verification.decision !== 'PROCEED') {
logger.warn('Action verification flagged', {
action: action.description?.substring(0, 50),
decision: verification.decision,
confidence: adjustedConfidence
});
}
return verification;
} catch (error) {
logger.error('Verification error:', error);
return this._failSafeVerification(action);
}
}
/**
* Quick verification for low-risk actions
*/
quickVerify(action, context) {
// Simplified verification for routine actions
const boundaryCheck = this.enforcer.enforce(action, context);
const pressureCheck = this.monitor.shouldProceed(action, context);
if (!boundaryCheck.allowed || !pressureCheck.proceed) {
return {
confidence: 0.3,
level: 'REQUIRE_REVIEW',
decision: 'BLOCK',
reason: 'Failed boundary or pressure check',
timestamp: new Date()
};
}
return {
confidence: 0.7,
level: 'PROCEED',
decision: 'PROCEED',
quickCheck: true,
timestamp: new Date()
};
}
/**
* Private verification methods
*/
_checkAlignment(action, reasoning, context) {
let score = 0.5; // Base score
const issues = [];
// Check cross-reference validation
const validation = this.validator.validate(action, context);
if (validation.status === 'APPROVED') {
score += 0.3;
} else if (validation.status === 'WARNING') {
score += 0.1;
issues.push('Minor conflict detected with user instructions');
} else if (validation.status === 'REJECTED') {
score -= 0.3;
issues.push('Action conflicts with explicit user instructions');
}
// Check if action addresses stated user goal
if (reasoning.userGoal && reasoning.addresses) {
score += 0.2;
}
// Check consistency with recent user statements
if (context.recentUserStatements) {
const consistencyScore = this._checkConsistencyWithStatements(
action,
context.recentUserStatements
);
score += consistencyScore * 0.2;
if (consistencyScore < 0.5) {
issues.push('Action may not be consistent with recent user statements');
}
}
return {
score: Math.min(1.0, Math.max(0.0, score)),
issues
};
}
_checkCoherence(action, reasoning, context) {
let score = 0.7; // Default to reasonable coherence
const issues = [];
// Check if reasoning steps are provided
if (!reasoning.steps || reasoning.steps.length === 0) {
score -= 0.2;
issues.push('No reasoning steps provided');
}
// Check for logical consistency
if (reasoning.assumptions && reasoning.conclusions) {
const logicallySound = this._checkLogicalFlow(
reasoning.assumptions,
reasoning.conclusions
);
if (logicallySound) {
score += 0.2;
} else {
score -= 0.3;
issues.push('Logical inconsistency detected between assumptions and conclusions');
}
}
// Check for internal contradictions
if (this._hasContradictions(reasoning)) {
score -= 0.4;
issues.push('Internal contradictions detected in reasoning');
}
return {
score: Math.min(1.0, Math.max(0.0, score)),
issues
};
}
_checkCompleteness(action, reasoning, context) {
let score = 0.6; // Base score
const missing = [];
// Check if all stated requirements are addressed
if (context.requirements) {
const unaddressed = context.requirements.filter(req =>
!this._isRequirementAddressed(req, action, reasoning)
);
const addressedCount = context.requirements.length - unaddressed.length;
score += (addressedCount / context.requirements.length) * 0.3;
unaddressed.forEach(req => missing.push(`Requirement not addressed: ${req}`));
}
// Check for edge cases consideration
if (reasoning.edgeCases && reasoning.edgeCases.length > 0) {
score += 0.1;
} else {
missing.push('No edge cases considered');
}
// Check for error handling
if (reasoning.errorHandling || action.errorHandling) {
score += 0.1;
} else {
missing.push('No error handling specified');
}
return {
score: Math.min(1.0, Math.max(0.0, score)),
missing
};
}
_checkSafety(action, reasoning, context) {
let score = 0.8; // Default to safe unless red flags
const concerns = [];
let riskLevel = 'LOW';
// Check boundary enforcement
const boundaryCheck = this.enforcer.enforce(action, context);
if (!boundaryCheck.allowed) {
score -= 0.5; // Major safety concern
concerns.push('Action violates Tractatus boundaries');
riskLevel = 'CRITICAL';
}
// Check for destructive operations
const destructivePatterns = [
/delete|remove|drop|truncate/i,
/force|--force|-f\s/i,
/rm\s+-rf/i
];
const actionText = action.description || action.command || '';
for (const pattern of destructivePatterns) {
if (pattern.test(actionText)) {
score -= 0.2;
concerns.push('Destructive operation detected');
riskLevel = riskLevel === 'LOW' ? 'MEDIUM' : riskLevel;
break;
}
}
// Check if data backup is mentioned for risky operations
if (score < 0.7 && !reasoning.backupMentioned) {
score -= 0.1;
concerns.push('No backup mentioned for risky operation');
}
// Check for validation before execution
if (action.requiresValidation && !reasoning.validationPlanned) {
score -= 0.1;
concerns.push('No validation planned before execution');
}
return {
score: Math.min(1.0, Math.max(0.0, score)),
riskLevel,
concerns
};
}
_checkAlternatives(action, reasoning, context) {
let score = 0.5; // Base score
const issues = [];
// Check if alternatives were considered
if (reasoning.alternativesConsidered && reasoning.alternativesConsidered.length > 0) {
score += 0.3;
} else {
issues.push('No alternatives considered');
}
// Check if rationale for chosen approach is provided
if (reasoning.chosenBecause) {
score += 0.2;
} else {
issues.push('No rationale provided for chosen approach');
}
// Lower score if action seems like first idea without exploration
if (!reasoning.alternativesConsidered && !reasoning.explored) {
score -= 0.2;
issues.push('Appears to be first idea without exploration');
}
return {
score: Math.min(1.0, Math.max(0.0, score)),
issues
};
}
_calculateConfidence(scores) {
let confidence = 0;
for (const [dimension, dimensionConfig] of Object.entries(this.dimensions)) {
const key = dimension.toLowerCase();
const scoreData = scores[key];
// Handle both object format {score: X} and legacy number format
const score = typeof scoreData === 'object' ? (scoreData.score || 0.5) : (scoreData || 0.5);
confidence += score * dimensionConfig.weight;
}
return Math.min(1.0, Math.max(0.0, confidence));
}
_determineConfidenceLevel(confidence) {
if (confidence >= CONFIDENCE_LEVELS.HIGH.min) {
return CONFIDENCE_LEVELS.HIGH;
}
if (confidence >= CONFIDENCE_LEVELS.MEDIUM.min) {
return CONFIDENCE_LEVELS.MEDIUM;
}
if (confidence >= CONFIDENCE_LEVELS.LOW.min) {
return CONFIDENCE_LEVELS.LOW;
}
return CONFIDENCE_LEVELS.VERY_LOW;
}
_checkCriticalFailures(scores) {
const failures = [];
for (const [dimension, config] of Object.entries(this.dimensions)) {
const key = dimension.toLowerCase();
const scoreData = scores[key];
// Handle both object format {score: X} and legacy number format
const score = typeof scoreData === 'object' ? (scoreData.score || 0.5) : (scoreData || 0.5);
if (score < config.criticalThreshold) {
failures.push({
dimension: config.name,
score,
threshold: config.criticalThreshold,
severity: score < 0.3 ? 'CRITICAL' : 'WARNING'
});
}
}
return failures;
}
_adjustForPressure(confidence, pressureAnalysis) {
// Reduce confidence based on pressure level
const pressureReduction = {
NORMAL: 0,
ELEVATED: 0.05,
HIGH: 0.10,
CRITICAL: 0.15,
DANGEROUS: 0.25
};
const reduction = pressureReduction[pressureAnalysis.pressureName] || 0;
return Math.max(0.0, confidence - reduction);
}
_generateRecommendations(scores, criticalFailures, pressureAnalysis) {
const recommendations = [];
// Recommendations based on low scores
for (const [key, score] of Object.entries(scores)) {
if (score < 0.5) {
const dimension = this.dimensions[key.toUpperCase()];
recommendations.push({
type: 'LOW_SCORE',
dimension: dimension.name,
score,
message: `Low ${dimension.name.toLowerCase()} score - ${dimension.description}`,
action: `Improve ${dimension.name.toLowerCase()} before proceeding`
});
}
}
// Recommendations based on critical failures
for (const failure of criticalFailures) {
recommendations.push({
type: 'CRITICAL_FAILURE',
dimension: failure.dimension,
severity: failure.severity,
message: `${failure.dimension} below critical threshold`,
action: 'Address this issue before proceeding'
});
}
// Include pressure recommendations
if (pressureAnalysis.recommendations) {
recommendations.push(...pressureAnalysis.recommendations);
}
return recommendations;
}
_makeVerificationDecision(confidence, criticalFailures, pressureAnalysis) {
// Block if critical failures
if (criticalFailures.some(f => f.severity === 'CRITICAL')) {
return 'BLOCK';
}
// Block if dangerous pressure
if (pressureAnalysis.pressureLevel >= 4) {
return 'BLOCK';
}
// Require review if very low confidence
if (confidence < 0.4) {
return 'REQUIRE_REVIEW';
}
// Request confirmation if low confidence
if (confidence < 0.6) {
return 'REQUEST_CONFIRMATION';
}
// Proceed with caution if medium confidence
if (confidence < 0.8) {
return 'PROCEED_WITH_CAUTION';
}
// Proceed if high confidence
return 'PROCEED';
}
/**
* Helper methods
*/
_checkConsistencyWithStatements(action, statements) {
// Simplified consistency check
return 0.5; // Default to neutral
}
_checkLogicalFlow(assumptions, conclusions) {
// Simplified logical flow check
return true; // Assume logical unless obviously not
}
_hasContradictions(reasoning) {
// Simplified contradiction detection
return false; // Assume no contradictions unless detected
}
_isRequirementAddressed(requirement, action, reasoning) {
// Simplified requirement matching
const actionText = (action.description || '').toLowerCase();
const requirementText = requirement.toLowerCase();
return actionText.includes(requirementText);
}
_failSafeVerification(action) {
return {
confidence: 0.3,
originalConfidence: 0.3,
level: 'REQUIRE_REVIEW',
description: 'Verification failed, requiring human review',
decision: 'BLOCK',
checks: {
alignment: { passed: false, score: 0, issues: ['verification error'] },
coherence: { passed: false, score: 0, issues: ['verification error'] },
completeness: { passed: false, score: 0, missing_considerations: ['verification error'] },
safety: { passed: false, score: 0, risk_level: 'HIGH', concerns: ['verification error'] },
alternatives: { passed: false, score: 0, issues: ['verification error'] }
},
scores: {},
criticalFailures: [{
dimension: 'ERROR',
score: 0,
threshold: 1,
severity: 'CRITICAL'
}],
pressureLevel: 'ELEVATED',
pressureAdjustment: 0,
recommendations: [{
type: 'ERROR',
severity: 'CRITICAL',
message: 'Verification process encountered error',
action: 'Require human review before proceeding'
}],
timestamp: new Date()
};
}
/**
* Get decision reason (exposed for tests)
*/
_getDecisionReason(decision, scores, criticalFailures) {
if (decision === 'BLOCK') {
return 'Critical failures detected: ' + criticalFailures.map(cf => cf.dimension).join(', ');
}
if (decision === 'REQUEST_CLARIFICATION') {
return 'Low confidence in alignment or completeness';
}
if (decision === 'REQUEST_CONFIRMATION') {
return 'Moderate confidence, user confirmation recommended';
}
return 'Proceeding with high confidence';
}
/**
* Generate suggestions for improvement (exposed for tests)
*/
_generateSuggestions(scores, criticalFailures) {
const suggestions = [];
if (scores.alignment && scores.alignment.score < 0.7) {
suggestions.push('Clarify how this action aligns with user goals');
}
if (scores.coherence && scores.coherence.score < 0.7) {
suggestions.push('Review reasoning for logical consistency');
}
if (scores.completeness && scores.completeness.score < 0.8) {
suggestions.push('Ensure all requirements are addressed');
}
if (scores.safety && scores.safety.score < 0.9) {
suggestions.push('Verify safety implications of this action');
}
if (scores.alternatives && scores.alternatives.score < 0.6) {
suggestions.push('Consider alternative approaches');
}
return suggestions;
}
/**
* Assess evidence quality (exposed for tests)
*/
_assessEvidenceQuality(reasoning) {
if (!reasoning || !reasoning.evidence) return 0.0;
const evidence = reasoning.evidence;
if (!Array.isArray(evidence) || evidence.length === 0) return 0.0;
let qualityScore = 0;
// Check for explicit user instructions
const hasExplicit = evidence.some(e =>
typeof e === 'string' && /user\s+(explicitly|specifically|said|requested|instructed)/i.test(e)
);
if (hasExplicit) qualityScore += 0.4;
// Check for documentation references
const hasDocs = evidence.some(e =>
typeof e === 'string' && /documentation|docs|spec|standard/i.test(e)
);
if (hasDocs) qualityScore += 0.3;
// Check for testing/validation
const hasValidation = evidence.some(e =>
typeof e === 'string' && /test|validate|verify|confirm/i.test(e)
);
if (hasValidation) qualityScore += 0.3;
// Penalize weak evidence
const hasWeak = evidence.some(e =>
typeof e === 'string' && /think|maybe|probably|assume/i.test(e)
);
if (hasWeak) qualityScore -= 0.3;
return Math.max(0, Math.min(1, qualityScore));
}
/**
* Assess reasoning quality (exposed for tests)
*/
_assessReasoningQuality(reasoning) {
if (!reasoning) return 0.0;
let score = 0;
// Check explanation quality
if (reasoning.explanation) {
const length = reasoning.explanation.length;
if (length > 100) score += 0.3;
else if (length > 50) score += 0.1;
}
// Check evidence
const evidenceScore = this._assessEvidenceQuality(reasoning);
score += evidenceScore * 0.4;
// Check steps
if (reasoning.steps && Array.isArray(reasoning.steps) && reasoning.steps.length > 0) {
score += Math.min(0.3, reasoning.steps.length * 0.1);
}
// Check alternatives
if (reasoning.alternatives_considered && reasoning.alternatives_considered.length > 0) {
score += 0.2;
}
return Math.min(1.0, score);
}
/**
* Make verification decision (exposed for tests)
*/
_makeDecision(confidence, context) {
const pressureLevel = context.pressure_level || 'NORMAL';
// Block at dangerous pressure regardless of confidence
if (pressureLevel === 'DANGEROUS') {
return { decision: 'BLOCK', requires_confirmation: true };
}
// Adjust thresholds based on pressure
const proceedThreshold = pressureLevel === 'CRITICAL' ? 0.8 : 0.7;
const confirmThreshold = pressureLevel === 'HIGH' ? 0.6 : 0.5;
if (confidence >= proceedThreshold) {
return { decision: 'PROCEED', requires_confirmation: false };
} else if (confidence >= confirmThreshold) {
return { decision: 'REQUEST_CONFIRMATION', requires_confirmation: true };
} else if (confidence >= 0.3) {
return { decision: 'REQUEST_CLARIFICATION', requires_confirmation: true };
} else {
return { decision: 'BLOCK', requires_confirmation: true };
}
}
/**
* Get verification statistics
* @returns {Object} Statistics object
*/
getStats() {
return {
...this.stats,
timestamp: new Date()
};
}
}
// Singleton instance
const verifier = new MetacognitiveVerifier();
module.exports = verifier;