feat: enhance InstructionPersistenceClassifier with improved quadrant detection and persistence calculation
InstructionPersistenceClassifier improvements (44.1% → 58.8% pass rate): 1. Verification Field Alias - Add verification_required alias to classification results for test compatibility - Include in both classify() and _defaultClassification() outputs 2. Enhanced Quadrant Keywords - SYSTEM: Add fix, bug, error, authentication, security, implementation, function, method, class, module, component, service - STOCHASTIC: Add alternative(s), consider, possibility, investigate, research, discover, prototype, test, suggest, idea 3. Smart Quadrant Scoring - "For this project" pattern → strong OPERATIONAL indicator (+3 score) - Fix/debug bug patterns → strong SYSTEM indicator (+2 score) - Code/function/method patterns → SYSTEM indicator (+1 score) - Explore/investigate/research → strong STOCHASTIC indicator (+2 score) - Alternative(s) keyword → strong STOCHASTIC indicator (+2 score) - Reduced temporal scope bonuses from +2 to +1 (yield to strong indicators) 4. Persistence Calculation Fix - Add IMMEDIATE temporal scope adjustment (-0.15) for one-time actions - "print the current directory" now correctly returns LOW persistence Test Results: - InstructionPersistenceClassifier: 20/34 passing (58.8%, +14.7%) - Overall: 92/192 (47.9%, +5 tests from 87/192) Fixes: ✓ "Fix the authentication bug in user login code" → SYSTEM (was TACTICAL) ✓ "For this project, always validate inputs" → OPERATIONAL (was STRATEGIC) ✓ "Explore alternative solutions" → STOCHASTIC (was TACTICAL) ✓ "print the current directory" → LOW persistence (was MEDIUM) 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
parent
da7eee39fb
commit
7e8676dbb8
1 changed files with 35 additions and 6 deletions
|
|
@ -50,7 +50,9 @@ const QUADRANTS = {
|
|||
timeHorizon: 'continuous',
|
||||
persistence: 'HIGH',
|
||||
description: 'Technical infrastructure, architecture',
|
||||
keywords: ['code', 'technical', 'architecture', 'infrastructure', 'database', 'api'],
|
||||
keywords: ['code', 'technical', 'architecture', 'infrastructure', 'database', 'api',
|
||||
'fix', 'bug', 'error', 'authentication', 'security', 'system', 'implementation',
|
||||
'function', 'method', 'class', 'module', 'component', 'service'],
|
||||
verificationLevel: 'TECHNICAL_REVIEW',
|
||||
humanOversight: 'TECHNICAL_EXPERTISE',
|
||||
examples: ['MongoDB port is 27017', 'Use JWT for authentication']
|
||||
|
|
@ -60,7 +62,9 @@ const QUADRANTS = {
|
|||
timeHorizon: 'variable',
|
||||
persistence: 'CONTEXT_DEPENDENT',
|
||||
description: 'Innovation, exploration, experimentation',
|
||||
keywords: ['explore', 'experiment', 'innovate', 'brainstorm', 'creative', 'try'],
|
||||
keywords: ['explore', 'experiment', 'innovate', 'brainstorm', 'creative', 'try',
|
||||
'alternative', 'alternatives', 'consider', 'possibility', 'investigate',
|
||||
'research', 'discover', 'prototype', 'test', 'suggest', 'idea'],
|
||||
verificationLevel: 'OPTIONAL',
|
||||
humanOversight: 'INSIGHT_GENERATION',
|
||||
examples: ['Explore alternative approaches', 'Suggest creative solutions']
|
||||
|
|
@ -186,6 +190,7 @@ class InstructionPersistenceClassifier {
|
|||
persistenceScore: this.persistenceLevels[persistence].score,
|
||||
explicitness,
|
||||
verification,
|
||||
verification_required: verification, // Alias for test compatibility
|
||||
parameters,
|
||||
timestamp,
|
||||
source,
|
||||
|
|
@ -303,10 +308,32 @@ class InstructionPersistenceClassifier {
|
|||
}
|
||||
}
|
||||
|
||||
// Temporal scope alignment
|
||||
if (temporalScope === 'PERMANENT' && quadrant === 'STRATEGIC') score += 2;
|
||||
if (temporalScope === 'PROJECT' && quadrant === 'OPERATIONAL') score += 2;
|
||||
if (temporalScope === 'IMMEDIATE' && quadrant === 'TACTICAL') score += 2;
|
||||
// Strong quadrant indicators
|
||||
// "For this project" strongly suggests OPERATIONAL over STRATEGIC
|
||||
if (/\b(?:for|in|during)\s+this\s+project\b/i.test(text) && quadrant === 'OPERATIONAL') {
|
||||
score += 3;
|
||||
}
|
||||
|
||||
// Technical/code fix patterns strongly suggest SYSTEM
|
||||
if (/\b(?:fix|debug|resolve).*(?:bug|error|issue)\b/i.test(text) && quadrant === 'SYSTEM') {
|
||||
score += 2;
|
||||
}
|
||||
if (/\b(?:code|function|method|class|component)\b/i.test(text) && quadrant === 'SYSTEM') {
|
||||
score += 1;
|
||||
}
|
||||
|
||||
// Exploration patterns strongly suggest STOCHASTIC
|
||||
if (/\b(?:explore|investigate|research|discover)\b/i.test(text) && quadrant === 'STOCHASTIC') {
|
||||
score += 2;
|
||||
}
|
||||
if (/\balternative(?:s)?\b/i.test(text) && quadrant === 'STOCHASTIC') {
|
||||
score += 2;
|
||||
}
|
||||
|
||||
// Temporal scope alignment (weaker than strong indicators)
|
||||
if (temporalScope === 'PERMANENT' && quadrant === 'STRATEGIC') score += 1;
|
||||
if (temporalScope === 'PROJECT' && quadrant === 'OPERATIONAL') score += 1;
|
||||
if (temporalScope === 'IMMEDIATE' && quadrant === 'TACTICAL') score += 1;
|
||||
|
||||
// Context clues
|
||||
if (context.domain === 'technical' && quadrant === 'SYSTEM') score += 1;
|
||||
|
|
@ -367,6 +394,7 @@ class InstructionPersistenceClassifier {
|
|||
// Adjust for temporal scope
|
||||
if (temporalScope === 'PERMANENT') baseScore += 0.1;
|
||||
if (temporalScope === 'SESSION') baseScore -= 0.2;
|
||||
if (temporalScope === 'IMMEDIATE') baseScore -= 0.15; // One-time actions
|
||||
|
||||
// Adjust for explicitness
|
||||
if (explicitness > 0.8) baseScore += 0.1;
|
||||
|
|
@ -546,6 +574,7 @@ class InstructionPersistenceClassifier {
|
|||
persistenceScore: 0.5,
|
||||
explicitness: 0.5,
|
||||
verification: 'RECOMMENDED',
|
||||
verification_required: 'RECOMMENDED', // Alias for test compatibility
|
||||
parameters: {},
|
||||
timestamp,
|
||||
source: 'unknown',
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue