tractatus/tests/poc/memory-tool/week2-full-rules-test.js
TheFlow 2298d36bed fix(submissions): restructure Economist package and fix article display
- Create Economist SubmissionTracking package correctly:
  * mainArticle = full blog post content
  * coverLetter = 216-word SIR— letter
  * Links to blog post via blogPostId
- Archive 'Letter to The Economist' from blog posts (it's the cover letter)
- Fix date display on article cards (use published_at)
- Target publication already displaying via blue badge

Database changes:
- Make blogPostId optional in SubmissionTracking model
- Economist package ID: 68fa85ae49d4900e7f2ecd83
- Le Monde package ID: 68fa2abd2e6acd5691932150

Next: Enhanced modal with tabs, validation, export

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>
2025-10-24 08:47:42 +13:00

308 lines
11 KiB
JavaScript

/**
* Phase 5 PoC - Week 2: Full Tractatus Rules Integration
*
* Goal: Load all 18 governance rules into memory tool and validate persistence
*
* Success Criteria:
* - All 18 rules stored successfully
* - All 18 rules retrieved with 100% fidelity
* - API latency measured and acceptable (<1000ms per operation)
* - Data integrity maintained across storage/retrieval
*/
const Anthropic = require('@anthropic-ai/sdk');
const { FilesystemMemoryBackend } = require('./basic-persistence-test');
const path = require('path');
const fs = require('fs').promises;
require('dotenv').config();
// Configuration
const MEMORY_BASE_PATH = path.join(__dirname, '../../../.memory-poc-week2');
const MODEL = 'claude-sonnet-4-5';
const INSTRUCTION_HISTORY_PATH = path.join(__dirname, '../../../.claude/instruction-history.json');
// Load Tractatus governance rules
async function loadTractatusRules() {
const data = await fs.readFile(INSTRUCTION_HISTORY_PATH, 'utf8');
const parsed = JSON.parse(data);
return parsed.instructions;
}
// Initialize Anthropic client
function createClient() {
const apiKey = process.env.CLAUDE_API_KEY;
if (!apiKey) {
throw new Error('CLAUDE_API_KEY environment variable not set');
}
return new Anthropic({ apiKey });
}
// Simulate memory tool handling (client-side implementation)
async function handleMemoryToolUse(toolUse, backend) {
const { input } = toolUse;
switch (input.command) {
case 'view':
try {
const data = await backend.view(input.path);
return {
type: 'tool_result',
tool_use_id: toolUse.id,
content: JSON.stringify(data, null, 2)
};
} catch (error) {
return {
type: 'tool_result',
tool_use_id: toolUse.id,
is_error: true,
content: `Error reading file: ${error.message}`
};
}
case 'create':
try {
const data = input.content ? JSON.parse(input.content) : input.data;
await backend.create(input.path, data);
return {
type: 'tool_result',
tool_use_id: toolUse.id,
content: 'File created successfully'
};
} catch (error) {
return {
type: 'tool_result',
tool_use_id: toolUse.id,
is_error: true,
content: `Error creating file: ${error.message}`
};
}
default:
return {
type: 'tool_result',
tool_use_id: toolUse.id,
is_error: true,
content: `Unsupported command: ${input.command}`
};
}
}
// Main test execution
async function runFullRulesTest() {
console.log('━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━');
console.log(' Phase 5 PoC Week 2: Full Tractatus Rules Test');
console.log('━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\n');
const backend = new FilesystemMemoryBackend(MEMORY_BASE_PATH);
const results = {
success: false,
rulesLoaded: 0,
rulesStored: 0,
rulesRetrieved: 0,
integrityChecks: { passed: 0, failed: 0 },
apiCalls: 0,
memoryOperations: 0,
timings: {},
errors: []
};
try {
// Step 1: Load Tractatus rules
console.log('[Step 1] Loading Tractatus governance rules...');
const loadStart = Date.now();
const rules = await loadTractatusRules();
results.timings.load = Date.now() - loadStart;
results.rulesLoaded = rules.length;
console.log(` ✓ Loaded ${rules.length} governance rules`);
console.log(` Time: ${results.timings.load}ms`);
// Show rule breakdown
const quadrantCounts = {};
const persistenceCounts = {};
rules.forEach(rule => {
quadrantCounts[rule.quadrant] = (quadrantCounts[rule.quadrant] || 0) + 1;
persistenceCounts[rule.persistence] = (persistenceCounts[rule.persistence] || 0) + 1;
});
console.log('\n Rule Distribution:');
Object.entries(quadrantCounts).forEach(([quadrant, count]) => {
console.log(` ${quadrant}: ${count}`);
});
console.log('\n Persistence Levels:');
Object.entries(persistenceCounts).forEach(([level, count]) => {
console.log(` ${level}: ${count}`);
});
// Step 2: Initialize backend
console.log('\n[Step 2] Initializing memory backend...');
await backend.initialize();
// Step 3: Store rules in filesystem first (baseline)
console.log('\n[Step 3] Storing rules to filesystem backend...');
const storeStart = Date.now();
const rulesData = {
version: '1.0',
updated_at: new Date().toISOString(),
total_rules: rules.length,
rules: rules
};
await backend.create('governance/tractatus-rules-complete.json', rulesData);
results.timings.store = Date.now() - storeStart;
results.rulesStored = rules.length;
console.log(` ✓ Stored ${rules.length} rules`);
console.log(` Time: ${results.timings.store}ms`);
console.log(` Latency per rule: ${(results.timings.store / rules.length).toFixed(2)}ms`);
// Step 4: Retrieve and validate
console.log('\n[Step 4] Retrieving rules from backend...');
const retrieveStart = Date.now();
const retrieved = await backend.view('governance/tractatus-rules-complete.json');
results.timings.retrieve = Date.now() - retrieveStart;
results.rulesRetrieved = retrieved.rules.length;
console.log(` ✓ Retrieved ${retrieved.rules.length} rules`);
console.log(` Time: ${results.timings.retrieve}ms`);
// Step 5: Data integrity validation
console.log('\n[Step 5] Validating data integrity...');
if (retrieved.rules.length !== rules.length) {
throw new Error(`Rule count mismatch: stored ${rules.length}, retrieved ${retrieved.rules.length}`);
}
// Check each rule
for (let i = 0; i < rules.length; i++) {
const original = rules[i];
const retrieved_rule = retrieved.rules[i];
const checks = [
{ field: 'id', match: original.id === retrieved_rule.id },
{ field: 'text', match: original.text === retrieved_rule.text },
{ field: 'quadrant', match: original.quadrant === retrieved_rule.quadrant },
{ field: 'persistence', match: original.persistence === retrieved_rule.persistence }
];
const allMatch = checks.every(c => c.match);
if (allMatch) {
results.integrityChecks.passed++;
} else {
results.integrityChecks.failed++;
console.log(` ✗ Rule ${original.id} failed integrity check`);
checks.forEach(check => {
if (!check.match) {
console.log(` ${check.field}: mismatch`);
}
});
}
}
const integrityRate = (results.integrityChecks.passed / rules.length) * 100;
console.log(`\n Integrity: ${results.integrityChecks.passed}/${rules.length} rules (${integrityRate.toFixed(1)}%)`);
if (results.integrityChecks.failed > 0) {
throw new Error(`Data integrity validation failed: ${results.integrityChecks.failed} rules corrupted`);
}
// Step 6: Test critical rules individually
console.log('\n[Step 6] Testing critical enforcement rules...');
const criticalRules = rules.filter(r =>
['inst_016', 'inst_017', 'inst_018'].includes(r.id)
);
console.log(` Testing ${criticalRules.length} critical rules:`);
for (const rule of criticalRules) {
await backend.create(`governance/${rule.id}.json`, rule);
const retrieved_single = await backend.view(`governance/${rule.id}.json`);
const match = JSON.stringify(rule) === JSON.stringify(retrieved_single);
const status = match ? '✓' : '✗';
console.log(` ${status} ${rule.id}: ${match ? 'PASS' : 'FAIL'}`);
if (!match) {
throw new Error(`Critical rule ${rule.id} failed validation`);
}
}
// Step 7: Performance summary
console.log('\n[Step 7] Performance Assessment...');
const totalLatency = results.timings.store + results.timings.retrieve;
const avgPerRule = totalLatency / rules.length;
console.log(` Store: ${results.timings.store}ms (${(results.timings.store / rules.length).toFixed(2)}ms/rule)`);
console.log(` Retrieve: ${results.timings.retrieve}ms`);
console.log(` Total: ${totalLatency}ms`);
console.log(` Average per rule: ${avgPerRule.toFixed(2)}ms`);
const target = 1000; // 1 second per batch operation
const status = totalLatency < target ? 'PASS' : 'WARN';
console.log(` Target: <${target}ms - ${status}`);
results.success = true;
results.totalLatency = totalLatency;
} catch (error) {
console.error('\n✗ TEST FAILED:', error.message);
if (error.stack) {
console.error('\nStack trace:', error.stack);
}
results.errors.push(error.message);
results.success = false;
} finally {
// Cleanup
console.log('\n[Cleanup] Removing test data...');
await backend.cleanup();
}
// Results summary
console.log('\n━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━');
console.log(' TEST RESULTS');
console.log('━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\n');
if (results.success) {
console.log('✅ SUCCESS: All 18 Tractatus rules validated');
console.log('\nKey Findings:');
console.log(` • Rules loaded: ${results.rulesLoaded}`);
console.log(` • Rules stored: ${results.rulesStored}`);
console.log(` • Rules retrieved: ${results.rulesRetrieved}`);
console.log(` • Data integrity: ${results.integrityChecks.passed}/${results.rulesLoaded} (${((results.integrityChecks.passed / results.rulesLoaded) * 100).toFixed(1)}%)`);
console.log(` • Performance: ${results.totalLatency}ms total`);
console.log(` • Average per rule: ${(results.totalLatency / results.rulesLoaded).toFixed(2)}ms`);
console.log('\nNext Steps:');
console.log(' 1. Test with real Claude API (memory tool operations)');
console.log(' 2. Measure API latency overhead');
console.log(' 3. Test context editing with 50+ turn conversation');
} else {
console.log('❌ FAILURE: Test did not pass');
console.log('\nErrors:');
results.errors.forEach(err => console.log(`${err}`));
}
console.log('\n━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\n');
return results;
}
// Run test
if (require.main === module) {
runFullRulesTest()
.then(results => {
process.exit(results.success ? 0 : 1);
})
.catch(error => {
console.error('Fatal error:', error);
process.exit(1);
});
}
module.exports = { runFullRulesTest };