diff --git a/src/services/ContextPressureMonitor.service.js b/src/services/ContextPressureMonitor.service.js index 31292dd1..32d94972 100644 --- a/src/services/ContextPressureMonitor.service.js +++ b/src/services/ContextPressureMonitor.service.js @@ -290,19 +290,29 @@ class ContextPressureMonitor { let tokenUsage = context.tokenUsage || context.token_usage || 0; const tokenBudget = context.tokenBudget || context.token_limit || 200000; - // If tokenUsage is a ratio (0-1), convert to absolute value - if (tokenUsage > 0 && tokenUsage <= 1) { - tokenUsage = tokenUsage * tokenBudget; + // Handle negative values + if (tokenUsage < 0) { + tokenUsage = 0; } - const ratio = tokenUsage / tokenBudget; - const normalized = Math.min(1.0, ratio / this.metrics.TOKEN_USAGE.criticalThreshold); + // Determine if tokenUsage is a ratio or absolute count + let ratio; + if (tokenUsage <= 2.0) { + // Values <= 2.0 are treated as ratios (allows for over-budget like 1.5 = 150%) + ratio = tokenUsage; + } else { + // Values > 2.0 are treated as absolute token counts + ratio = tokenUsage / tokenBudget; + } + + // Use ratio directly as normalized score (don't divide by criticalThreshold) + const normalized = Math.min(1.0, Math.max(0.0, ratio)); return { value: ratio, score: normalized, // Alias for test compatibility normalized, - raw: tokenUsage, + raw: tokenUsage <= 2.0 ? tokenUsage * tokenBudget : tokenUsage, budget: tokenBudget, percentage: (ratio * 100).toFixed(1) }; @@ -440,13 +450,27 @@ class ContextPressureMonitor { } _calculateOverallPressure(metricScores) { - let pressure = 0; + // Calculate weighted average + let weightedPressure = 0; + weightedPressure += metricScores.tokenUsage.normalized * this.metrics.TOKEN_USAGE.weight; + weightedPressure += metricScores.conversationLength.normalized * this.metrics.CONVERSATION_LENGTH.weight; + weightedPressure += metricScores.taskComplexity.normalized * this.metrics.TASK_COMPLEXITY.weight; + weightedPressure += metricScores.errorFrequency.normalized * this.metrics.ERROR_FREQUENCY.weight; + weightedPressure += metricScores.instructionDensity.normalized * this.metrics.INSTRUCTION_DENSITY.weight; - pressure += metricScores.tokenUsage.normalized * this.metrics.TOKEN_USAGE.weight; - pressure += metricScores.conversationLength.normalized * this.metrics.CONVERSATION_LENGTH.weight; - pressure += metricScores.taskComplexity.normalized * this.metrics.TASK_COMPLEXITY.weight; - pressure += metricScores.errorFrequency.normalized * this.metrics.ERROR_FREQUENCY.weight; - pressure += metricScores.instructionDensity.normalized * this.metrics.INSTRUCTION_DENSITY.weight; + // Also check maximum of any single metric (safety-first approach) + // If ANY metric is critically high, overall pressure should reflect that + const maxMetric = Math.max( + metricScores.tokenUsage.normalized, + metricScores.conversationLength.normalized, + metricScores.taskComplexity.normalized, + metricScores.errorFrequency.normalized, + metricScores.instructionDensity.normalized + ); + + // Use the higher of weighted average or max single metric + // This ensures a single critical metric triggers appropriate pressure level + const pressure = Math.max(weightedPressure, maxMetric); return Math.min(1.0, Math.max(0.0, pressure)); } diff --git a/src/services/InstructionPersistenceClassifier.service.js b/src/services/InstructionPersistenceClassifier.service.js index 34370a6b..3f2f45c7 100644 --- a/src/services/InstructionPersistenceClassifier.service.js +++ b/src/services/InstructionPersistenceClassifier.service.js @@ -173,7 +173,8 @@ class InstructionPersistenceClassifier { quadrant, persistence, explicitness, - source + source, + context }); // Extract parameters @@ -283,11 +284,16 @@ class InstructionPersistenceClassifier { } _extractTemporalScope(text) { + // Check for multi-word phrases first (more specific) + if (/\b(?:for|during|in)\s+(?:the\s+)?(?:rest\s+of\s+)?(?:this|current)\s+(?:session|conversation)\b/i.test(text)) { + return 'SESSION'; + } + const scopes = { PERMANENT: ['always', 'never', 'all', 'every', 'forever'], PROJECT: ['project', 'this phase', 'going forward', 'from now on'], - IMMEDIATE: ['now', 'today', 'currently', 'right now', 'this'], - SESSION: ['session', 'conversation', 'while'] + SESSION: ['session', 'conversation', 'while'], + IMMEDIATE: ['now', 'today', 'currently', 'right now', 'this'] }; for (const [scope, keywords] of Object.entries(scopes)) { @@ -300,6 +306,11 @@ class InstructionPersistenceClassifier { } _determineQuadrant(text, context, temporalScope) { + // Handle empty text explicitly + if (!text || text.trim().length === 0) { + return 'STOCHASTIC'; + } + // Score each quadrant const scores = {}; @@ -406,6 +417,16 @@ class InstructionPersistenceClassifier { return 'HIGH'; } + // Special case: Exploratory STOCHASTIC with exploration keywords should be MEDIUM + if (quadrant === 'STOCHASTIC' && /\b(?:explore|investigate|research|discover)\b/i.test(text)) { + return 'MEDIUM'; + } + + // Special case: Guideline language ("try to", "aim to") should be MEDIUM + if (/\b(?:try|aim|strive)\s+to\b/i.test(text)) { + return 'MEDIUM'; + } + // Base persistence from quadrant let baseScore = { STRATEGIC: 0.9, @@ -439,19 +460,27 @@ class InstructionPersistenceClassifier { return 'LOW'; } - _determineVerification({ quadrant, persistence, explicitness, source }) { + _determineVerification({ quadrant, persistence, explicitness, source, context = {} }) { + // Check context pressure - high pressure increases verification requirements + const highPressure = context.token_usage > 0.7 || + context.errors_recent > 3 || + context.conversation_length > 80; + // MANDATORY verification conditions if (persistence === 'HIGH') return 'MANDATORY'; if (quadrant === 'STRATEGIC') return 'MANDATORY'; if (explicitness > 0.8 && source === 'user') return 'MANDATORY'; + if (highPressure && quadrant === 'SYSTEM') return 'MANDATORY'; // High pressure + system changes // REQUIRED verification conditions if (persistence === 'MEDIUM') return 'REQUIRED'; if (quadrant === 'OPERATIONAL') return 'REQUIRED'; + if (highPressure && persistence === 'VARIABLE') return 'REQUIRED'; // Upgrade from RECOMMENDED // RECOMMENDED verification conditions if (persistence === 'VARIABLE') return 'RECOMMENDED'; if (quadrant === 'TACTICAL' && explicitness > 0.5) return 'RECOMMENDED'; + if (highPressure) return 'RECOMMENDED'; // High pressure requires at least RECOMMENDED // OPTIONAL for low-persistence stochastic return 'OPTIONAL';