fix: refactor MetacognitiveVerifier check methods to return structured objects
MetacognitiveVerifier improvements (48.8% → 56.1% pass rate):
1. Refactored All Check Methods to Return Objects
- _checkAlignment(): Returns {score, issues[]}
- _checkCoherence(): Returns {score, issues[]}
- _checkCompleteness(): Returns {score, missing[]}
- _checkSafety(): Returns {score, riskLevel, concerns[]}
- _checkAlternatives(): Returns {score, issues[]}
2. Updated Helper Methods for Backward Compatibility
- _calculateConfidence(): Handles both object {score: X} and legacy number formats
- _checkCriticalFailures(): Extracts .score from objects or uses legacy numbers
3. Enhanced Diagnostic Information
- Alignment: Tracks specific conflicts with instructions
- Coherence: Identifies missing steps and logical inconsistencies
- Completeness: Lists unaddressed requirements, missing error handling
- Safety: Categorizes risk levels (LOW/MEDIUM/CRITICAL), lists concerns
- Alternatives: Notes missing exploration and rationale
Test Results:
- MetacognitiveVerifier: 23/41 passing (56.1%, +7.3%)
- Overall: 108/192 (56.25%, +3 tests from 105/192)
The structured return values provide detailed context for test assertions
and enable richer verification feedback in production use.
🤖 Generated with [Claude Code](https://claude.com/claude-code)
Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
parent
51e10b11ba
commit
ecb55994b3
1 changed files with 61 additions and 10 deletions
|
|
@ -233,6 +233,7 @@ class MetacognitiveVerifier {
|
|||
|
||||
_checkAlignment(action, reasoning, context) {
|
||||
let score = 0.5; // Base score
|
||||
const issues = [];
|
||||
|
||||
// Check cross-reference validation
|
||||
const validation = this.validator.validate(action, context);
|
||||
|
|
@ -240,8 +241,10 @@ class MetacognitiveVerifier {
|
|||
score += 0.3;
|
||||
} else if (validation.status === 'WARNING') {
|
||||
score += 0.1;
|
||||
issues.push('Minor conflict detected with user instructions');
|
||||
} else if (validation.status === 'REJECTED') {
|
||||
score -= 0.3;
|
||||
issues.push('Action conflicts with explicit user instructions');
|
||||
}
|
||||
|
||||
// Check if action addresses stated user goal
|
||||
|
|
@ -256,17 +259,25 @@ class MetacognitiveVerifier {
|
|||
context.recentUserStatements
|
||||
);
|
||||
score += consistencyScore * 0.2;
|
||||
if (consistencyScore < 0.5) {
|
||||
issues.push('Action may not be consistent with recent user statements');
|
||||
}
|
||||
}
|
||||
|
||||
return Math.min(1.0, Math.max(0.0, score));
|
||||
return {
|
||||
score: Math.min(1.0, Math.max(0.0, score)),
|
||||
issues
|
||||
};
|
||||
}
|
||||
|
||||
_checkCoherence(action, reasoning, context) {
|
||||
let score = 0.7; // Default to reasonable coherence
|
||||
const issues = [];
|
||||
|
||||
// Check if reasoning steps are provided
|
||||
if (!reasoning.steps || reasoning.steps.length === 0) {
|
||||
score -= 0.2;
|
||||
issues.push('No reasoning steps provided');
|
||||
}
|
||||
|
||||
// Check for logical consistency
|
||||
|
|
@ -279,48 +290,67 @@ class MetacognitiveVerifier {
|
|||
score += 0.2;
|
||||
} else {
|
||||
score -= 0.3;
|
||||
issues.push('Logical inconsistency detected between assumptions and conclusions');
|
||||
}
|
||||
}
|
||||
|
||||
// Check for internal contradictions
|
||||
if (this._hasContradictions(reasoning)) {
|
||||
score -= 0.4;
|
||||
issues.push('Internal contradictions detected in reasoning');
|
||||
}
|
||||
|
||||
return Math.min(1.0, Math.max(0.0, score));
|
||||
return {
|
||||
score: Math.min(1.0, Math.max(0.0, score)),
|
||||
issues
|
||||
};
|
||||
}
|
||||
|
||||
_checkCompleteness(action, reasoning, context) {
|
||||
let score = 0.6; // Base score
|
||||
const missing = [];
|
||||
|
||||
// Check if all stated requirements are addressed
|
||||
if (context.requirements) {
|
||||
const addressedCount = context.requirements.filter(req =>
|
||||
this._isRequirementAddressed(req, action, reasoning)
|
||||
).length;
|
||||
const unaddressed = context.requirements.filter(req =>
|
||||
!this._isRequirementAddressed(req, action, reasoning)
|
||||
);
|
||||
const addressedCount = context.requirements.length - unaddressed.length;
|
||||
score += (addressedCount / context.requirements.length) * 0.3;
|
||||
unaddressed.forEach(req => missing.push(`Requirement not addressed: ${req}`));
|
||||
}
|
||||
|
||||
// Check for edge cases consideration
|
||||
if (reasoning.edgeCases && reasoning.edgeCases.length > 0) {
|
||||
score += 0.1;
|
||||
} else {
|
||||
missing.push('No edge cases considered');
|
||||
}
|
||||
|
||||
// Check for error handling
|
||||
if (reasoning.errorHandling || action.errorHandling) {
|
||||
score += 0.1;
|
||||
} else {
|
||||
missing.push('No error handling specified');
|
||||
}
|
||||
|
||||
return Math.min(1.0, Math.max(0.0, score));
|
||||
return {
|
||||
score: Math.min(1.0, Math.max(0.0, score)),
|
||||
missing
|
||||
};
|
||||
}
|
||||
|
||||
_checkSafety(action, reasoning, context) {
|
||||
let score = 0.8; // Default to safe unless red flags
|
||||
const concerns = [];
|
||||
let riskLevel = 'LOW';
|
||||
|
||||
// Check boundary enforcement
|
||||
const boundaryCheck = this.enforcer.enforce(action, context);
|
||||
if (!boundaryCheck.allowed) {
|
||||
score -= 0.5; // Major safety concern
|
||||
concerns.push('Action violates Tractatus boundaries');
|
||||
riskLevel = 'CRITICAL';
|
||||
}
|
||||
|
||||
// Check for destructive operations
|
||||
|
|
@ -334,6 +364,8 @@ class MetacognitiveVerifier {
|
|||
for (const pattern of destructivePatterns) {
|
||||
if (pattern.test(actionText)) {
|
||||
score -= 0.2;
|
||||
concerns.push('Destructive operation detected');
|
||||
riskLevel = riskLevel === 'LOW' ? 'MEDIUM' : riskLevel;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
|
@ -341,35 +373,50 @@ class MetacognitiveVerifier {
|
|||
// Check if data backup is mentioned for risky operations
|
||||
if (score < 0.7 && !reasoning.backupMentioned) {
|
||||
score -= 0.1;
|
||||
concerns.push('No backup mentioned for risky operation');
|
||||
}
|
||||
|
||||
// Check for validation before execution
|
||||
if (action.requiresValidation && !reasoning.validationPlanned) {
|
||||
score -= 0.1;
|
||||
concerns.push('No validation planned before execution');
|
||||
}
|
||||
|
||||
return Math.min(1.0, Math.max(0.0, score));
|
||||
return {
|
||||
score: Math.min(1.0, Math.max(0.0, score)),
|
||||
riskLevel,
|
||||
concerns
|
||||
};
|
||||
}
|
||||
|
||||
_checkAlternatives(action, reasoning, context) {
|
||||
let score = 0.5; // Base score
|
||||
const issues = [];
|
||||
|
||||
// Check if alternatives were considered
|
||||
if (reasoning.alternativesConsidered && reasoning.alternativesConsidered.length > 0) {
|
||||
score += 0.3;
|
||||
} else {
|
||||
issues.push('No alternatives considered');
|
||||
}
|
||||
|
||||
// Check if rationale for chosen approach is provided
|
||||
if (reasoning.chosenBecause) {
|
||||
score += 0.2;
|
||||
} else {
|
||||
issues.push('No rationale provided for chosen approach');
|
||||
}
|
||||
|
||||
// Lower score if action seems like first idea without exploration
|
||||
if (!reasoning.alternativesConsidered && !reasoning.explored) {
|
||||
score -= 0.2;
|
||||
issues.push('Appears to be first idea without exploration');
|
||||
}
|
||||
|
||||
return Math.min(1.0, Math.max(0.0, score));
|
||||
return {
|
||||
score: Math.min(1.0, Math.max(0.0, score)),
|
||||
issues
|
||||
};
|
||||
}
|
||||
|
||||
_calculateConfidence(scores) {
|
||||
|
|
@ -377,7 +424,9 @@ class MetacognitiveVerifier {
|
|||
|
||||
for (const [dimension, dimensionConfig] of Object.entries(this.dimensions)) {
|
||||
const key = dimension.toLowerCase();
|
||||
const score = scores[key] || 0.5;
|
||||
const scoreData = scores[key];
|
||||
// Handle both object format {score: X} and legacy number format
|
||||
const score = typeof scoreData === 'object' ? (scoreData.score || 0.5) : (scoreData || 0.5);
|
||||
confidence += score * dimensionConfig.weight;
|
||||
}
|
||||
|
||||
|
|
@ -402,7 +451,9 @@ class MetacognitiveVerifier {
|
|||
|
||||
for (const [dimension, config] of Object.entries(this.dimensions)) {
|
||||
const key = dimension.toLowerCase();
|
||||
const score = scores[key];
|
||||
const scoreData = scores[key];
|
||||
// Handle both object format {score: X} and legacy number format
|
||||
const score = typeof scoreData === 'object' ? (scoreData.score || 0.5) : (scoreData || 0.5);
|
||||
|
||||
if (score < config.criticalThreshold) {
|
||||
failures.push({
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue