{ "breadcrumb": { "home": "Home", "current": "Architecture" }, "hero": { "badge": "🔬 EARLY-STAGE RESEARCH • PROMISING APPROACH", "title": "Exploring Structural AI Safety", "subtitle": "Tractatus explores external governance—architectural boundaries operating outside the AI runtime that may be more resistant to adversarial manipulation than behavioral training alone.", "challenge_label": "The Challenge:", "challenge_text": "Behavioral training (Constitutional AI, RLHF) shows promise but can degrade under adversarial prompting, context pressure, or distribution shift.", "approach_label": "Our Approach:", "approach_text": "External architectural enforcement that operates independently of the AI's internal reasoning—making it structurally more difficult (though not impossible) to bypass through prompting.", "cta_architecture": "View Architecture", "cta_docs": "Read Documentation" }, "comparison": { "heading": "Why External Enforcement May Help", "behavioral_title": "Behavioral Training (Constitutional AI)", "structural_title": "Structural Enforcement (Tractatus)", "hypothesis_title": "The Core Hypothesis", "hypothesis_text": "Jailbreaks often work by manipulating the AI's internal reasoning. Tractatus boundaries operate external to that reasoning—the AI doesn't directly evaluate governance rules. While not foolproof, this architectural separation makes manipulation significantly harder.", "behavioral_item1": "Lives inside the AI model—accessible to adversarial prompts", "behavioral_item2": "Degrades under context pressure and long conversations", "behavioral_item3": "Can be manipulated by jailbreak techniques (DAN, roleplaying, hypotheticals)", "behavioral_item4": "Depends on AI's willingness to follow guidance", "behavioral_item5": "No verifiable audit trail independent of AI", "structural_item1": "Lives outside the AI model—not directly accessible to prompts", "structural_item2": "External services aim for consistent enforcement regardless of context", "structural_item3": "More difficult to bypass—AI actions pass through governance layer first", "structural_item4": "Architecturally resistant to manipulation via AI's internal state", "structural_item5": "Immutable audit trail stored independently of AI runtime" }, "architecture_diagram": { "title": "Runtime-Agnostic Architecture", "subtitle": "Tractatus works with any agentic AI system—Claude Code, LangChain, AutoGPT, CrewAI, or custom agents. The governance layer sits between your agent and its actions.", "layer1_title": "1. Agent Runtime Layer", "layer1_desc": "Your AI agent (any platform). Handles planning, reasoning, tool use. Tractatus is agnostic to implementation.", "layer2_title": "2. Governance Layer", "layer2_desc": "Six external services enforce boundaries, validate actions, monitor pressure. Architecturally more difficult for AI to bypass.", "layer3_title": "3. Persistent Storage", "layer3_desc": "Immutable audit logs, governance rules, instruction history. Independent of AI runtime—can't be altered by prompts." }, "services": { "heading": "Six Governance Services", "boundary": { "name": "Boundary­Enforcer", "description": "Blocks AI from making values decisions (privacy, ethics, strategic direction). Requires human approval.", "promise": "Early Promise: Values boundaries enforced externally—harder to manipulate through prompting." }, "instruction": { "name": "Instruction­Persistence­Classifier", "description": "Stores instructions externally with persistence levels (HIGH/MEDIUM/LOW). Aims to reduce directive fade.", "promise": "Early Promise: Instructions stored outside AI—more resistant to context manipulation." }, "validator": { "name": "Cross­Reference­Validator", "description": "Validates AI actions against instruction history. Aims to prevent pattern bias overriding explicit directives.", "promise": "Early Promise: Independent verification—AI claims checked against external source." }, "pressure": { "name": "Context­Pressure­Monitor", "description": "Monitors AI performance degradation. Escalates when context pressure threatens quality.", "promise": "Early Promise: Objective metrics may detect manipulation attempts early." }, "metacognitive": { "name": "Metacognitive­Verifier", "description": "Requires AI to pause and verify complex operations before execution. Structural safety check.", "promise": "Early Promise: Architectural gates aim to enforce verification steps." }, "deliberation": { "name": "Pluralistic­Deliberation­Orchestrator", "description": "Facilitates multi-stakeholder deliberation for values conflicts. AI provides facilitation, not authority.", "promise": "Early Promise: Human judgment required—architecturally enforced escalation for values." } }, "interactive": { "title": "Explore the Architecture Interactively", "subtitle": "Click any service node or the central core to see detailed information about how governance works.", "tip_label": "Tip:", "tip_text": "Click the central \"T\" to see how all services work together", "panel_default_title": "Explore the Governance Services", "panel_default_text": "Click any service node in the diagram (colored circles) or the central \"T\" to learn more about how Tractatus enforces AI safety." }, "data_viz": { "heading": "Framework in Action", "subtitle": "Interactive visualizations demonstrating how Tractatus governance services monitor and coordinate AI operations." }, "production": { "heading": "Production Reference Implementation", "subtitle": "Tractatus is deployed in production using Claude Code as the agent runtime. This demonstrates the framework's real-world viability.", "implementation_title": "Claude Code + Tractatus", "implementation_intro": "Our production deployment uses Claude Code as the agent runtime with Tractatus governance middleware. This combination provides:", "implementation_results_intro": "Results from 6-month production deployment:", "result1": "95% instruction persistence across session boundaries", "result2": "Zero values boundary violations in 127 test scenarios", "result3": "100% detection rate for pattern bias failures", "result4": "<10ms performance overhead for governance layer", "disclaimer": "*Single-agent deployment. Independent validation and multi-organization replication needed.", "testing_title": "Real-World Testing", "testing_text1": "This isn't just theory. Tractatus is running in production, handling real workloads and detecting real failure patterns.", "testing_text2": "Early results are promising—with documented incident prevention—but this needs independent validation and much wider testing.", "diagram_link": "View Claude Code Implementation Diagram →" }, "limitations": { "heading": "Limitations and Reality Check", "intro": "This is early-stage work. While we've seen promising results in our production deployment, Tractatus has not been subjected to rigorous adversarial testing or red-team evaluation.", "quote": "We have real promise but this is still in early development stage. This sounds like we have the complete issue resolved, we do not. We have a long way to go and it will require a mammoth effort by developers in every part of the industry to tame AI effectively. This is just a start.", "quote_attribution": "— Project Lead, Tractatus Framework", "known_heading": "Known Limitations:", "limitation1": "No dedicated red-team testing: We don't know how well these boundaries hold up against determined adversarial attacks.", "limitation2": "Small-scale validation: Six months of production use on a single project. Needs multi-organization replication.", "limitation3": "Integration challenges: Retrofitting governance into existing systems requires significant engineering effort.", "limitation4": "Performance at scale unknown: Testing limited to single-agent deployments. Multi-agent coordination untested.", "limitation5": "Evolving threat landscape: As AI capabilities grow, new failure modes will emerge that current architecture may not address.", "needs_heading": "What We Need:", "need1": "Independent researchers to validate (or refute) our findings", "need2": "Red-team evaluation to find weaknesses and bypass techniques", "need3": "Multi-organization pilot deployments across different domains", "need4": "Industry-wide collaboration on governance standards and patterns", "need5": "Quantitative studies measuring incident reduction and cost-benefit analysis", "conclusion": "This framework is a starting point for exploration, not a finished solution. Taming AI will require sustained effort from the entire industry—researchers, practitioners, regulators, and ethicists working together." }, "cta": { "heading": "Explore a Promising Approach to AI Safety", "subtitle": "Tractatus demonstrates how structural enforcement may complement behavioral training. We invite researchers and practitioners to evaluate, critique, and build upon this work.", "btn_docs": "Read Documentation", "btn_research": "View Research", "btn_implementation": "Implementation Guide" }, "diagram_services": { "overview": { "name": "Tractatus Governance Layer", "shortName": "Overview", "description": "Six external governance services working together to enforce AI safety boundaries outside the AI runtime.", "detail1": "All services operate externally to the AI—making manipulation harder", "detail2": "Instruction storage and validation work together to prevent directive fade", "detail3": "Boundary enforcement and deliberation coordinate on values decisions", "detail4": "Pressure monitoring adjusts verification requirements dynamically", "detail5": "Metacognitive gates ensure AI pauses before high-risk operations", "detail6": "Each service addresses a different failure mode in AI safety", "promise": "External architectural enforcement that is structurally more difficult to bypass than behavioral training alone." }, "boundary": { "name": "BoundaryEnforcer", "shortName": "Boundary", "description": "Blocks AI from making values decisions (privacy, ethics, strategic direction). Requires human approval.", "detail1": "Enforces Tractatus 12.1-12.7 boundaries", "detail2": "Values decisions architecturally require humans", "detail3": "Prevents AI autonomous decision-making on ethical questions", "detail4": "External enforcement - harder to bypass via prompting", "promise": "Values boundaries enforced externally—harder to manipulate through prompting." }, "instruction": { "name": "InstructionPersistenceClassifier", "shortName": "Instruction", "description": "Stores instructions externally with persistence levels (HIGH/MEDIUM/LOW). Aims to reduce directive fade.", "detail1": "Quadrant-based classification (STR/OPS/TAC/SYS/STO)", "detail2": "Time-persistence metadata tagging", "detail3": "Temporal horizon modeling (STRATEGIC, OPERATIONAL, TACTICAL)", "detail4": "External storage independent of AI runtime", "promise": "Instructions stored outside AI—more resistant to context manipulation." }, "validator": { "name": "CrossReferenceValidator", "shortName": "Validator", "description": "Validates AI actions against instruction history. Aims to prevent pattern bias overriding explicit directives.", "detail1": "Cross-references AI claims with external instruction history", "detail2": "Detects pattern-based overrides of explicit user directives", "detail3": "Independent verification layer", "detail4": "Helps prevent instruction drift", "promise": "Independent verification—AI claims checked against external source." }, "pressure": { "name": "ContextPressureMonitor", "shortName": "Pressure", "description": "Monitors AI performance degradation. Escalates when context pressure threatens quality.", "detail1": "Tracks token usage, complexity, error rates", "detail2": "Detects degraded operating conditions", "detail3": "Adjusts verification requirements under pressure", "detail4": "Objective metrics for quality monitoring", "promise": "Objective metrics may detect manipulation attempts early." }, "metacognitive": { "name": "MetacognitiveVerifier", "shortName": "Metacognitive", "description": "Requires AI to pause and verify complex operations before execution. Structural safety check.", "detail1": "AI self-checks alignment, coherence, safety before execution", "detail2": "Structural pause-and-verify gates", "detail3": "Selective verification (not constant)", "detail4": "Architectural enforcement of reflection steps", "promise": "Architectural gates aim to enforce verification steps." }, "deliberation": { "name": "PluralisticDeliberationOrchestrator", "shortName": "Deliberation", "description": "Facilitates multi-stakeholder deliberation for values conflicts where no single \"correct\" answer exists.", "detail1": "Non-hierarchical coordination for values conflicts", "detail2": "Stakeholder perspective representation", "detail3": "Consensus-building for ethical trade-offs", "detail4": "Addresses values pluralism in AI safety", "promise": "Facilitates deliberation across stakeholder perspectives for values conflicts." } } }