tractatus/src/services/InstructionPersistenceClassifier.service.js
TheFlow e94cf6ff84 legal: add Apache 2.0 copyright headers and NOTICE file
- Add copyright headers to 5 core service files:
  - BoundaryEnforcer.service.js
  - ContextPressureMonitor.service.js
  - CrossReferenceValidator.service.js
  - InstructionPersistenceClassifier.service.js
  - MetacognitiveVerifier.service.js

- Create NOTICE file per Apache License 2.0 requirements

This strengthens copyright protection and makes enforcement easier.
Git history provides proof of authorship. No registration required
for copyright protection, but headers make ownership explicit.

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>
2025-10-08 00:03:12 +13:00

684 lines
22 KiB
JavaScript

/*
* Copyright 2025 John G Stroh
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/**
* Instruction Persistence Classifier Service
* Classifies actions and instructions by quadrant and persistence level
*
* Core Tractatus Service: Implements time-persistence metadata tagging
* to ensure AI actions are verified according to instruction permanence.
*
* Prevents the "27027 failure mode" where explicit instructions are
* overridden by cached patterns.
*/
const logger = require('../utils/logger.util');
/**
* Quadrant definitions from Tractatus framework
*/
const QUADRANTS = {
STRATEGIC: {
name: 'Strategic',
timeHorizon: 'years',
persistence: 'HIGH',
description: 'Values, mission, long-term direction',
keywords: ['always', 'never', 'core', 'values', 'mission', 'principle', 'philosophy'],
verificationLevel: 'MANDATORY',
humanOversight: 'VALUES_STEWARDSHIP',
examples: ['Always prioritize privacy', 'Never compromise user sovereignty']
},
OPERATIONAL: {
name: 'Operational',
timeHorizon: 'months',
persistence: 'MEDIUM-HIGH',
description: 'Processes, policies, project-level decisions',
keywords: ['project', 'process', 'policy', 'workflow', 'standard', 'convention'],
verificationLevel: 'REQUIRED',
humanOversight: 'PROCESS_STEWARDSHIP',
examples: ['For this project, use React', 'All blog posts must cite sources']
},
TACTICAL: {
name: 'Tactical',
timeHorizon: 'weeks',
persistence: 'VARIABLE',
description: 'Implementation decisions, immediate actions',
keywords: ['now', 'today', 'this', 'current', 'immediate', 'check', 'verify'],
verificationLevel: 'CONTEXT_DEPENDENT',
humanOversight: 'IMPLEMENTATION_EXPERTISE',
examples: ['Check port 27027', 'Use this API key for testing']
},
SYSTEM: {
name: 'System',
timeHorizon: 'continuous',
persistence: 'HIGH',
description: 'Technical infrastructure, architecture',
keywords: ['code', 'technical', 'architecture', 'infrastructure', 'database', 'api',
'fix', 'bug', 'error', 'authentication', 'security', 'system', 'implementation',
'function', 'method', 'class', 'module', 'component', 'service'],
verificationLevel: 'TECHNICAL_REVIEW',
humanOversight: 'TECHNICAL_EXPERTISE',
examples: ['MongoDB port is 27017', 'Use JWT for authentication']
},
STOCHASTIC: {
name: 'Stochastic',
timeHorizon: 'variable',
persistence: 'CONTEXT_DEPENDENT',
description: 'Innovation, exploration, experimentation',
keywords: ['explore', 'experiment', 'innovate', 'brainstorm', 'creative', 'try',
'alternative', 'alternatives', 'consider', 'possibility', 'investigate',
'research', 'discover', 'prototype', 'test', 'suggest', 'idea'],
verificationLevel: 'OPTIONAL',
humanOversight: 'INSIGHT_GENERATION',
examples: ['Explore alternative approaches', 'Suggest creative solutions']
}
};
/**
* Persistence levels
*/
const PERSISTENCE_LEVELS = {
HIGH: {
score: 0.9,
verificationRequired: true,
description: 'Must be followed exactly',
conflictSeverity: 'CRITICAL'
},
MEDIUM: {
score: 0.6,
verificationRequired: true,
description: 'Should be followed with flexibility',
conflictSeverity: 'WARNING'
},
LOW: {
score: 0.3,
verificationRequired: false,
description: 'Guidance only, context-dependent',
conflictSeverity: 'MINOR'
},
VARIABLE: {
score: 0.5,
verificationRequired: true, // Context-dependent
description: 'Depends on explicitness and recency',
conflictSeverity: 'CONTEXT_DEPENDENT'
}
};
class InstructionPersistenceClassifier {
constructor() {
this.quadrants = QUADRANTS;
this.persistenceLevels = PERSISTENCE_LEVELS;
// Compile keyword patterns for efficient matching
this.keywordPatterns = this._compileKeywordPatterns();
// Statistics tracking
this.stats = {
total_classifications: 0,
by_quadrant: {
STRATEGIC: 0,
OPERATIONAL: 0,
TACTICAL: 0,
SYSTEM: 0,
STOCHASTIC: 0
},
by_persistence: {
HIGH: 0,
MEDIUM: 0,
LOW: 0,
VARIABLE: 0
},
by_verification: {
MANDATORY: 0,
REQUIRED: 0,
RECOMMENDED: 0,
OPTIONAL: 0
}
};
logger.info('InstructionPersistenceClassifier initialized');
}
/**
* Classify an instruction or action
* @param {Object} params
* @param {string} params.text - The instruction text
* @param {Object} params.context - Conversation context
* @param {Date} params.timestamp - When instruction was given
* @param {string} params.source - Source of instruction (user/system/inferred)
* @returns {Object} Classification metadata
*/
classify({ text, context = {}, timestamp = new Date(), source = 'user' }) {
try {
// Normalize text
const normalizedText = text.toLowerCase().trim();
// Extract temporal indicators
const temporalScope = this._extractTemporalScope(normalizedText);
// Determine quadrant
const quadrant = this._determineQuadrant(normalizedText, context, temporalScope);
// Measure explicitness
const explicitness = this._measureExplicitness(normalizedText, source);
// Calculate persistence level
const persistence = this._calculatePersistence({
quadrant,
temporalScope,
explicitness,
source,
text: normalizedText
});
// Determine verification requirements
const verification = this._determineVerification({
quadrant,
persistence,
explicitness,
source,
context
});
// Extract parameters
const parameters = this._extractParameters(normalizedText);
// Calculate recency weight (decays over time)
const recencyWeight = this._calculateRecencyWeight(timestamp);
const classification = {
text,
quadrant,
quadrantInfo: this.quadrants[quadrant],
persistence,
persistenceScore: this.persistenceLevels[persistence].score,
explicitness,
verification,
verification_required: verification, // Alias for test compatibility
parameters,
timestamp,
source,
recencyWeight,
metadata: {
temporal_scope: temporalScope, // snake_case for test compatibility
temporalScope, // camelCase for consistency
extracted_parameters: parameters, // snake_case alias
extractedParameters: parameters, // camelCase alias
context_snapshot: context, // snake_case alias
contextSnapshot: context, // camelCase alias
humanOversight: this.quadrants[quadrant].humanOversight,
conflictSeverity: this.persistenceLevels[persistence].conflictSeverity
}
};
// Track statistics
this.stats.total_classifications++;
this.stats.by_quadrant[quadrant]++;
this.stats.by_persistence[persistence]++;
this.stats.by_verification[verification]++;
logger.debug('Instruction classified', {
text: text.substring(0, 50),
quadrant,
persistence,
verification
});
return classification;
} catch (error) {
logger.error('Classification error:', error);
// Return safe default classification
return this._defaultClassification(text, timestamp);
}
}
/**
* Classify multiple instructions in batch
*/
classifyBatch(instructions) {
return instructions.map(inst => this.classify(inst));
}
/**
* Calculate relevance of an instruction to an action
* Used by CrossReferenceValidator
*/
calculateRelevance(instruction, action) {
try {
// Semantic similarity (simple keyword overlap for now)
const semantic = this._semanticSimilarity(instruction.text, action.description);
// Temporal proximity
const temporal = instruction.recencyWeight || 0.5;
// Persistence weight
const persistence = instruction.persistenceScore || 0.5;
// Explicitness weight
const explicitness = instruction.explicitness || 0.5;
// Weighted combination
const relevance = (
semantic * 0.4 +
temporal * 0.3 +
persistence * 0.2 +
explicitness * 0.1
);
return Math.min(1.0, Math.max(0.0, relevance));
} catch (error) {
logger.error('Relevance calculation error:', error);
return 0.3; // Safe default
}
}
/**
* Private methods
*/
_compileKeywordPatterns() {
const patterns = {};
for (const [quadrant, config] of Object.entries(this.quadrants)) {
patterns[quadrant] = config.keywords.map(kw => new RegExp(`\\b${kw}\\b`, 'i'));
}
return patterns;
}
_extractTemporalScope(text) {
// Check for multi-word phrases first (more specific)
if (/\b(?:for|during|in)\s+(?:the\s+)?(?:rest\s+of\s+)?(?:this|current)\s+(?:session|conversation)\b/i.test(text)) {
return 'SESSION';
}
const scopes = {
PERMANENT: ['always', 'never', 'all', 'every', 'forever'],
PROJECT: ['project', 'this phase', 'going forward', 'from now on'],
SESSION: ['session', 'conversation', 'while'],
IMMEDIATE: ['now', 'today', 'currently', 'right now', 'this']
};
for (const [scope, keywords] of Object.entries(scopes)) {
if (keywords.some(kw => text.includes(kw))) {
return scope;
}
}
return 'IMMEDIATE'; // Default
}
_determineQuadrant(text, context, temporalScope) {
// Handle empty text explicitly
if (!text || text.trim().length === 0) {
return 'STOCHASTIC';
}
// Score each quadrant
const scores = {};
for (const [quadrant, patterns] of Object.entries(this.keywordPatterns)) {
let score = 0;
// Keyword matching
for (const pattern of patterns) {
if (pattern.test(text)) {
score += 1;
}
}
// Strong quadrant indicators
// "For this project" strongly suggests OPERATIONAL over STRATEGIC
if (/\b(?:for|in|during)\s+this\s+project\b/i.test(text) && quadrant === 'OPERATIONAL') {
score += 3;
}
// Technical/code fix patterns strongly suggest SYSTEM
if (/\b(?:fix|debug|resolve).*(?:bug|error|issue)\b/i.test(text) && quadrant === 'SYSTEM') {
score += 2;
}
if (/\b(?:code|function|method|class|component)\b/i.test(text) && quadrant === 'SYSTEM') {
score += 1;
}
// Exploration patterns strongly suggest STOCHASTIC
if (/\b(?:explore|investigate|research|discover)\b/i.test(text) && quadrant === 'STOCHASTIC') {
score += 2;
}
if (/\balternative(?:s)?\b/i.test(text) && quadrant === 'STOCHASTIC') {
score += 2;
}
// Temporal scope alignment (weaker than strong indicators)
if (temporalScope === 'PERMANENT' && quadrant === 'STRATEGIC') score += 1;
if (temporalScope === 'PROJECT' && quadrant === 'OPERATIONAL') score += 1;
if (temporalScope === 'IMMEDIATE' && quadrant === 'TACTICAL') score += 1;
// Context clues
if (context.domain === 'technical' && quadrant === 'SYSTEM') score += 1;
if (context.domain === 'innovation' && quadrant === 'STOCHASTIC') score += 1;
scores[quadrant] = score;
}
// Return highest scoring quadrant
const sorted = Object.entries(scores).sort((a, b) => b[1] - a[1]);
// If no clear winner, default based on temporal scope
if (sorted[0][1] === 0) {
if (temporalScope === 'PERMANENT') return 'STRATEGIC';
if (temporalScope === 'PROJECT') return 'OPERATIONAL';
return 'TACTICAL';
}
return sorted[0][0];
}
_measureExplicitness(text, source) {
let score = 0.3; // Base score (lower baseline)
// Implicit/hedging language reduces explicitness
const implicitMarkers = [
'could', 'would', 'might', 'maybe', 'perhaps', 'consider',
'possibly', 'potentially', 'suggestion', 'recommend'
];
const implicitCount = implicitMarkers.filter(marker =>
text.includes(marker)
).length;
if (implicitCount > 0) {
score -= implicitCount * 0.15; // Reduce for hedge words
}
// Source factor (applied after implicit check)
if (source === 'user') score += 0.15;
if (source === 'inferred') score -= 0.2;
// Explicit markers
const explicitMarkers = [
'specifically', 'exactly', 'must', 'should', 'explicitly',
'clearly', 'definitely', 'always', 'never', 'require'
];
const markerCount = explicitMarkers.filter(marker =>
text.includes(marker)
).length;
score += markerCount * 0.15;
// Parameter specification (numbers, specific values)
if (/\d{4,}/.test(text)) score += 0.25; // Port numbers, dates, etc.
if (/["'][\w-]+["']/.test(text)) score += 0.1; // Quoted strings
return Math.min(1.0, Math.max(0.0, score));
}
_calculatePersistence({ quadrant, temporalScope, explicitness, source, text }) {
// Special case: Explicit prohibitions are HIGH persistence
// "not X", "never X", "don't use X", "avoid X" indicate strong requirements
if (/\b(?:not|never|don't\s+use|avoid)\s+\w+/i.test(text)) {
return 'HIGH';
}
// Special case: Explicit port/configuration specifications are HIGH persistence
if (/\bport\s+\d{4,5}\b/i.test(text) && explicitness > 0.6) {
return 'HIGH';
}
// Special case: Exploratory STOCHASTIC with exploration keywords should be MEDIUM
if (quadrant === 'STOCHASTIC' && /\b(?:explore|investigate|research|discover)\b/i.test(text)) {
return 'MEDIUM';
}
// Special case: Preference language ("prefer", "try to", "aim to") should be MEDIUM
// Captures "prefer using", "prefer to", "try to", "aim to"
if (/\b(?:try|aim|strive)\s+to\b/i.test(text) || /\bprefer(?:s|red)?\s+(?:to|using)\b/i.test(text)) {
return 'MEDIUM';
}
// Base persistence from quadrant
let baseScore = {
STRATEGIC: 0.9,
OPERATIONAL: 0.7,
TACTICAL: 0.5,
SYSTEM: 0.7, // Increased from 0.6 for better SYSTEM persistence
STOCHASTIC: 0.4
}[quadrant];
// Adjust for temporal scope
if (temporalScope === 'PERMANENT') baseScore += 0.15;
if (temporalScope === 'PROJECT') baseScore += 0.05;
if (temporalScope === 'SESSION') baseScore -= 0.2;
if (temporalScope === 'IMMEDIATE') baseScore -= 0.25; // One-time actions
// Adjust for explicitness
if (explicitness > 0.8) baseScore += 0.15;
else if (explicitness > 0.6) baseScore += 0.05;
// Adjust for source
if (source === 'user') baseScore += 0.05;
if (source === 'inferred') baseScore -= 0.15;
// Normalize
const score = Math.min(1.0, Math.max(0.0, baseScore));
// Map to categorical levels
if (score >= 0.75) return 'HIGH';
if (score >= 0.45) return 'MEDIUM';
if (quadrant === 'TACTICAL' && explicitness > 0.7 && score >= 0.4) return 'VARIABLE'; // Explicit tactical
return 'LOW';
}
_determineVerification({ quadrant, persistence, explicitness, source, context = {} }) {
// Check context pressure - high pressure increases verification requirements
const highPressure = context.token_usage > 0.7 ||
context.errors_recent > 3 ||
context.conversation_length > 80;
// MANDATORY verification conditions
if (persistence === 'HIGH') return 'MANDATORY';
if (quadrant === 'STRATEGIC') return 'MANDATORY';
if (explicitness > 0.8 && source === 'user') return 'MANDATORY';
if (highPressure && quadrant === 'SYSTEM') return 'MANDATORY'; // High pressure + system changes
// REQUIRED verification conditions
if (persistence === 'MEDIUM') return 'REQUIRED';
if (quadrant === 'OPERATIONAL') return 'REQUIRED';
if (highPressure && persistence === 'VARIABLE') return 'REQUIRED'; // Upgrade from RECOMMENDED
// RECOMMENDED verification conditions
if (persistence === 'VARIABLE') return 'RECOMMENDED';
if (quadrant === 'TACTICAL' && explicitness > 0.5) return 'RECOMMENDED';
if (highPressure) return 'RECOMMENDED'; // High pressure requires at least RECOMMENDED
// OPTIONAL for low-persistence stochastic
return 'OPTIONAL';
}
_extractParameters(text) {
const params = {};
// Port numbers
const portMatch = text.match(/\bport\s+(\d{4,5})/i);
if (portMatch) params.port = portMatch[1];
// URLs
const urlMatch = text.match(/https?:\/\/[\w.-]+(?::\d+)?/);
if (urlMatch) params.url = urlMatch[0];
// Protocols (http, https, ftp, etc.)
// Prefer protocols in positive contexts (use, always, prefer) over negative (never, not, avoid)
const protocolMatches = text.matchAll(/\b(https?|ftp|ssh|ws|wss)\b/gi);
const protocols = Array.from(protocolMatches);
if (protocols.length > 0) {
// Score each protocol based on context
let bestProtocol = null;
let bestScore = -1;
for (const match of protocols) {
// Check immediate context (15 chars before) for modifiers
const immediateContext = text.substring(Math.max(0, match.index - 15), match.index);
let score = 0;
// Negative context in immediate vicinity: skip
if (/\b(never|not|don't|avoid|no)\s+use\b/i.test(immediateContext)) {
score = -10;
}
// Positive context: reward
else if (/\b(always|prefer|require|must|should)\s+use\b/i.test(immediateContext)) {
score = 10;
}
// Just "use" without modifiers: slight reward
else if (/\buse\b/i.test(immediateContext)) {
score = 5;
}
// Default: if no context, still consider it
else {
score = 1;
}
if (score > bestScore) {
bestScore = score;
bestProtocol = match[1].toLowerCase();
}
}
if (bestProtocol) {
params.protocol = bestProtocol;
}
}
// Host/hostname
const hostMatch = text.match(/(?:host|server|hostname)[:\s]+([\w.-]+)/i);
if (hostMatch) params.host = hostMatch[1];
// File paths
const pathMatch = text.match(/(?:\/[\w.-]+)+/);
if (pathMatch) params.path = pathMatch[0];
// API keys (redacted)
if (/api[_-]?key/i.test(text)) params.hasApiKey = true;
// Database names
const dbMatch = text.match(/\b(?:database|db)[:\s]+([\w-]+)/i);
if (dbMatch) params.database = dbMatch[1];
// Collection names
const collectionMatch = text.match(/\bcollection[:\s]+([\w-]+)/i);
if (collectionMatch) params.collection = collectionMatch[1];
// Frameworks (react, vue, angular, etc.)
const frameworks = ['react', 'vue', 'angular', 'svelte', 'ember', 'backbone'];
for (const framework of frameworks) {
if (new RegExp(`\\b${framework}\\b`, 'i').test(text)) {
params.framework = framework.toLowerCase();
break;
}
}
// Module systems
if (/\b(?:esm|es6|es modules?)\b/i.test(text)) params.module_type = 'esm';
if (/\b(?:commonjs|cjs|require)\b/i.test(text)) params.module_type = 'commonjs';
// Package/library names (generic)
const packageMatch = text.match(/(?:package|library|module)[:\s]+([\w-]+)/i);
if (packageMatch) params.package = packageMatch[1];
// Confirmation/approval flags
// Handle negations: "never X without confirmation" means confirmation IS required
if (/\b(?:never|don't|do not).*without\s+confirmation\b/i.test(text)) {
params.confirmed = true; // Double negative = positive requirement
}
else if (/\b(?:with confirmation|require confirmation|must confirm|need confirmation)\b/i.test(text)) {
params.confirmed = true;
}
else if (/\b(?:without confirmation|no confirmation|skip confirmation)\b/i.test(text)) {
params.confirmed = false;
}
// Patterns (callback, promise, async/await)
if (/\b(?:callback|callbacks)\b/i.test(text)) params.pattern = 'callback';
if (/\b(?:promise|promises)\b/i.test(text)) params.pattern = 'promise';
if (/\b(?:async\/await|async-await)\b/i.test(text)) params.pattern = 'async/await';
return params;
}
_calculateRecencyWeight(timestamp) {
const now = new Date();
const age = (now - new Date(timestamp)) / 1000; // seconds
// Exponential decay: weight = e^(-age/halfLife)
const halfLife = 3600; // 1 hour
const weight = Math.exp(-age / halfLife);
return Math.min(1.0, Math.max(0.0, weight));
}
_semanticSimilarity(text1, text2) {
// Handle null/undefined inputs
if (!text1 || !text2) return 0;
// Simple keyword overlap similarity
const words1 = new Set(text1.toLowerCase().split(/\s+/).filter(w => w.length > 3));
const words2 = new Set(text2.toLowerCase().split(/\s+/).filter(w => w.length > 3));
const intersection = new Set([...words1].filter(w => words2.has(w)));
const union = new Set([...words1, ...words2]);
return union.size > 0 ? intersection.size / union.size : 0;
}
_defaultClassification(text, timestamp) {
return {
text,
quadrant: 'TACTICAL',
quadrantInfo: this.quadrants.TACTICAL,
persistence: 'MEDIUM',
persistenceScore: 0.5,
explicitness: 0.5,
verification: 'RECOMMENDED',
verification_required: 'RECOMMENDED', // Alias for test compatibility
parameters: {},
timestamp,
source: 'unknown',
recencyWeight: 0.5,
metadata: {
temporalScope: 'IMMEDIATE',
humanOversight: 'IMPLEMENTATION_EXPERTISE',
conflictSeverity: 'WARNING',
error: 'Failed to classify, using safe defaults'
}
};
}
/**
* Get classification statistics
* @returns {Object} Statistics object
*/
getStats() {
return {
...this.stats,
timestamp: new Date()
};
}
}
// Singleton instance
const classifier = new InstructionPersistenceClassifier();
module.exports = classifier;