- Add HF Space link translations for researcher page collaboration section - Add HF Space resource translations for researcher page resources section - Add HF Space CTA translation for homepage hero section - Add interactive exploration section translations for architecture page All translations provided in English, German, and French. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
268 lines
No EOL
21 KiB
JSON
268 lines
No EOL
21 KiB
JSON
{
|
|
"breadcrumb": {
|
|
"home": "Home",
|
|
"current": "Architecture"
|
|
},
|
|
"hero": {
|
|
"badge": "🏛️ BUILT ON LIVING SYSTEMS PRINCIPLES",
|
|
"title": "Five Architectural Principles for AI Safety",
|
|
"subtitle": "Tractatus governance is <strong>woven into deployment architecture</strong>, not bolted on. Five principles guide how the framework evolves, maintains coherence, and resists bypass—making it structurally more difficult (though not impossible) to circumvent through prompting.",
|
|
"challenge_label": "The Problem:",
|
|
"challenge_text": "Behavioral training can be manipulated through cleverly crafted prompts. AI governance based solely on internal reasoning is vulnerable to jailbreaks.",
|
|
"approach_label": "Our Approach:",
|
|
"approach_text": "Architectural enforcement operating in the critical execution path—governance services validate every action before it executes, independent of the AI's internal reasoning.",
|
|
"cta_principles": "See the Five Principles",
|
|
"cta_docs": "Read Documentation"
|
|
},
|
|
"comparison": {
|
|
"heading": "Why External Enforcement May Help",
|
|
"behavioral_title": "Behavioral Training (Constitutional AI)",
|
|
"structural_title": "Structural Enforcement (Tractatus)",
|
|
"hypothesis_title": "The Core Hypothesis",
|
|
"hypothesis_text": "<strong>Jailbreaks often work by manipulating the AI's internal reasoning.</strong> Tractatus boundaries operate <em>external</em> to that reasoning—the AI doesn't directly evaluate governance rules. While not foolproof, this architectural separation makes manipulation significantly harder.",
|
|
"behavioral_item1": "Lives <strong>inside</strong> the AI model—accessible to adversarial prompts",
|
|
"behavioral_item2": "Degrades under context pressure and long conversations",
|
|
"behavioral_item3": "Can be manipulated by jailbreak techniques (DAN, roleplaying, hypotheticals)",
|
|
"behavioral_item4": "Depends on AI's willingness to follow guidance",
|
|
"behavioral_item5": "No verifiable audit trail independent of AI",
|
|
"structural_item1": "Lives <strong>outside</strong> the AI model—not directly accessible to prompts",
|
|
"structural_item2": "External services aim for consistent enforcement regardless of context",
|
|
"structural_item3": "<em>More difficult</em> to bypass—AI actions pass through governance layer first",
|
|
"structural_item4": "Architecturally resistant to manipulation via AI's internal state",
|
|
"structural_item5": "Immutable audit trail stored independently of AI runtime"
|
|
},
|
|
"principles": {
|
|
"heading": "Five Architectural Principles",
|
|
"subtitle": "These principles, adapted from Christopher Alexander's work on living systems, guide how Tractatus evolves and maintains coherence. They're not documentation—they're <strong>design criteria</strong> enforced architecturally.",
|
|
"not_separateness": {
|
|
"title": "Not-Separateness: Governance in the Critical Path",
|
|
"principle": "Governance woven into deployment architecture, not bolted on",
|
|
"description": "Tractatus governance services operate <strong>in the critical execution path</strong>—every action passes through validation before executing. This isn't monitoring after-the-fact, it's architectural enforcement that cannot be bypassed.",
|
|
"example_label": "Example: PreToolUse Hook",
|
|
"example": "When the AI attempts to edit a file, the PreToolUse hook intercepts <em>before execution</em>. BoundaryEnforcer, CrossReferenceValidator, and other services validate the action. If any service blocks, the edit never happens—architecturally impossible to bypass.",
|
|
"contrast": "<strong>Contrast:</strong> Bolt-on compliance systems monitor actions after they occur, creating separation between governance and execution. An AI agent could theoretically disable monitoring or exploit gaps. Tractatus eliminates that separation."
|
|
},
|
|
"deep_interlock": {
|
|
"title": "Deep Interlock: Services Reinforce Each Other",
|
|
"principle": "Coordinated governance, not isolated checks",
|
|
"description": "The six governance services don't operate in silos—they coordinate through mutual validation. High context pressure intensifies boundary checking. Instruction persistence affects cross-reference validation. Service outputs feed into each other, creating resilience through redundancy.",
|
|
"example_label": "Example: The 27027 Incident",
|
|
"example": "AI attempted to use default database port despite HIGH persistence instruction specifying port 27027. InstructionPersistenceClassifier flagged the instruction. ContextPressureMonitor detected 53.5% pressure. CrossReferenceValidator caught the conflict. BoundaryEnforcer blocked the action. Four services working together prevented the error.",
|
|
"benefit": "<strong>Why it matters:</strong> Single service bypass doesn't compromise governance. An attacker would need to circumvent multiple coordinated services simultaneously—exponentially harder than defeating isolated checks."
|
|
},
|
|
"gradients": {
|
|
"title": "Gradients Not Binary: Nuanced Responses",
|
|
"principle": "Intensity levels, not yes/no switches",
|
|
"description": "Governance operates on gradients: NORMAL → ELEVATED → HIGH → CRITICAL. Context pressure, security impact, and validation rigor all scale with intensity. This mirrors how living systems adapt—gradual responses, not mechanical on/off.",
|
|
"example_label": "Example: Context Pressure Monitoring",
|
|
"example": "At NORMAL pressure (0-25%), routine operations proceed smoothly. At ELEVATED (25-50%), validation becomes more thorough. At HIGH (50-75%), human review triggers more frequently. At CRITICAL (>75%), framework recommends session closedown. Graduated response prevents both alert fatigue and catastrophic failures.",
|
|
"contrast": "<strong>Contrast:</strong> Binary \"allowed/blocked\" systems create brittleness—either everything passes or nothing does. Gradients enable natural adaptation to varying risk levels."
|
|
},
|
|
"structure_preserving": {
|
|
"title": "Structure-Preserving: Audit Continuity",
|
|
"principle": "Changes enhance without breaking",
|
|
"description": "Framework changes must preserve wholeness—audit logs remain interpretable, decisions remain valid, institutional memory survives evolution. Version 4.2 logs are readable in version 4.4. Six-month-old audit decisions still make sense. Structure-preserving transformations maintain coherence across time.",
|
|
"example_label": "Example: Adding Framework Fade Detection",
|
|
"example": "When inst_064 (framework fade detection) was added, it monitored all six services without changing their core definitions. Pre-existing audit logs remained valid. Service behavior evolved, but historical decisions stayed interpretable. Enhancement without fracture.",
|
|
"regulatory": "<strong>Regulatory advantage:</strong> Regulators need stable audit trails. Structure-preserving evolution lets the framework adapt while maintaining compliance continuity—no need to re-interpret old decisions every version."
|
|
},
|
|
"living_process": {
|
|
"title": "Living Process: Evidence-Based Evolution",
|
|
"principle": "Grows from real failures, not theory",
|
|
"description": "Framework changes emerge from observed reality, not predetermined plans. When services went unused, we added fade detection. When selective verification reduced noise, we evolved triggering criteria. Real operational experience drives evolution—no building solutions to theoretical problems.",
|
|
"example_label": "Example: MetacognitiveVerifier Selective Mode",
|
|
"example": "Audit logs showed MetacognitiveVerifier activating on trivial operations, creating noise. Rather than theorize about thresholds, we analyzed real trigger patterns. Selective mode emerged from data—verify only complex operations (3+ file modifications, 5+ sequential steps). Performance improved based on evidence, not guesswork.",
|
|
"contrast": "<strong>Contrast:</strong> Over-engineered systems solve imagined problems. Living process builds only what reality proves necessary—lean, effective, grounded in operational truth."
|
|
},
|
|
"together": {
|
|
"title": "How the Five Principles Work Together",
|
|
"description": "These principles aren't independent—they form an interlocking pattern. <strong>Not-separateness</strong> requires <strong>deep interlock</strong> between services. <strong>Gradients</strong> enable natural adaptation. <strong>Living process</strong> drives changes that must be <strong>structure-preserving</strong> to maintain wholeness.",
|
|
"flow_1": "Not-Separateness (governance in critical path)",
|
|
"flow_2": "Deep Interlock (services coordinate)",
|
|
"flow_3": "Gradients (nuanced responses)",
|
|
"flow_4": "Living Process (evidence-based evolution)",
|
|
"flow_5": "Structure-Preserving (audit continuity)",
|
|
"result": "System Wholeness"
|
|
}
|
|
},
|
|
"architecture_diagram": {
|
|
"title": "Runtime-Agnostic Architecture",
|
|
"subtitle": "Tractatus works with <strong>any agentic AI system</strong>—Claude Code, LangChain, AutoGPT, CrewAI, or custom agents. The governance layer sits between your agent and its actions.",
|
|
"layer1_title": "1. Agent Runtime Layer",
|
|
"layer1_desc": "Your AI agent (any platform). Handles planning, reasoning, tool use. Tractatus is agnostic to implementation.",
|
|
"layer2_title": "2. Governance Layer",
|
|
"layer2_desc": "Six external services enforce boundaries, validate actions, monitor pressure. <strong>Architecturally more difficult for AI to bypass.</strong>",
|
|
"layer3_title": "3. Persistent Storage",
|
|
"layer3_desc": "Immutable audit logs, governance rules, instruction history. Independent of AI runtime—can't be altered by prompts."
|
|
},
|
|
"services": {
|
|
"heading": "Six Governance Services",
|
|
"subtitle": "These services implement the five principles in practice. Each service embodies <strong>not-separateness</strong> (operating in the critical path), <strong>deep interlock</strong> (coordinating with others), and <strong>gradients</strong> (intensity-based responses).",
|
|
"boundary": {
|
|
"name": "Boundary­Enforcer",
|
|
"description": "Blocks AI from making values decisions (privacy, ethics, strategic direction). Requires human approval.",
|
|
"promise": "<strong>Early Promise:</strong> Values boundaries enforced externally—harder to manipulate through prompting."
|
|
},
|
|
"instruction": {
|
|
"name": "Instruction­Persistence­Classifier",
|
|
"description": "Stores instructions externally with persistence levels (HIGH/MEDIUM/LOW). Aims to reduce directive fade.",
|
|
"promise": "<strong>Early Promise:</strong> Instructions stored outside AI—more resistant to context manipulation."
|
|
},
|
|
"validator": {
|
|
"name": "Cross­Reference­Validator",
|
|
"description": "Validates AI actions against instruction history. Aims to prevent pattern bias overriding explicit directives.",
|
|
"promise": "<strong>Early Promise:</strong> Independent verification—AI claims checked against external source."
|
|
},
|
|
"pressure": {
|
|
"name": "Context­Pressure­Monitor",
|
|
"description": "Monitors AI performance degradation. Escalates when context pressure threatens quality.",
|
|
"promise": "<strong>Early Promise:</strong> Objective metrics may detect manipulation attempts early."
|
|
},
|
|
"metacognitive": {
|
|
"name": "Metacognitive­Verifier",
|
|
"description": "Requires AI to pause and verify complex operations before execution. Structural safety check.",
|
|
"promise": "<strong>Early Promise:</strong> Architectural gates aim to enforce verification steps."
|
|
},
|
|
"deliberation": {
|
|
"name": "Pluralistic­Deliberation­Orchestrator",
|
|
"description": "Facilitates multi-stakeholder deliberation for values conflicts. AI provides facilitation, not authority.",
|
|
"promise": "<strong>Early Promise:</strong> Human judgment required—architecturally enforced escalation for values."
|
|
}
|
|
},
|
|
"interactive": {
|
|
"title": "Explore the Architecture Interactively",
|
|
"subtitle": "Click any service node or the central core to see detailed information about how governance works.",
|
|
"tip_label": "Tip:",
|
|
"tip_text": "Click the central <span class=\"font-semibold text-cyan-600\">\"T\"</span> to see how all services work together",
|
|
"panel_default_title": "Explore the Governance Services",
|
|
"panel_default_text": "Click any service node in the diagram (colored circles) or the central \"T\" to learn more about how Tractatus enforces AI safety."
|
|
},
|
|
"data_viz": {
|
|
"heading": "Framework in Action",
|
|
"subtitle": "Interactive visualizations demonstrating how Tractatus governance services monitor and coordinate AI operations."
|
|
},
|
|
"production": {
|
|
"heading": "Production Reference Implementation",
|
|
"subtitle": "Tractatus is deployed in production using <strong>Claude Code</strong> as the agent runtime. This demonstrates the framework's real-world viability.",
|
|
"implementation_title": "Claude Code + Tractatus",
|
|
"implementation_intro": "Our production deployment uses Claude Code as the agent runtime with Tractatus governance middleware. This combination provides:",
|
|
"implementation_results_intro": "Results from 6-month production deployment:",
|
|
"result1": "<strong>95% instruction persistence</strong> across session boundaries",
|
|
"result2": "<strong>Zero values boundary violations</strong> in 127 test scenarios",
|
|
"result3": "<strong>100% detection rate</strong> for pattern bias failures",
|
|
"result4": "<strong><10ms performance overhead</strong> for governance layer",
|
|
"disclaimer": "*Single-agent deployment. Independent validation and multi-organization replication needed.",
|
|
"testing_title": "Real-World Testing",
|
|
"testing_text1": "<strong>This isn't just theory.</strong> Tractatus is running in production, handling real workloads and detecting real failure patterns.",
|
|
"testing_text2": "Early results are <strong>promising</strong>—with documented incident prevention—but this needs independent validation and much wider testing.",
|
|
"diagram_link": "View Claude Code Implementation Diagram →"
|
|
},
|
|
"limitations": {
|
|
"heading": "Limitations and Reality Check",
|
|
"intro": "<strong>This is early-stage work.</strong> While we've seen promising results in our production deployment, Tractatus has not been subjected to rigorous adversarial testing or red-team evaluation.",
|
|
"quote": "We have real promise but this is still in early development stage. This sounds like we have the complete issue resolved, we do not. We have a long way to go and it will require a mammoth effort by developers in every part of the industry to tame AI effectively. This is just a start.",
|
|
"quote_attribution": "— Project Lead, Tractatus Framework",
|
|
"known_heading": "Known Limitations:",
|
|
"limitation1": "<strong>No dedicated red-team testing:</strong> We don't know how well these boundaries hold up against determined adversarial attacks.",
|
|
"limitation2": "<strong>Small-scale validation:</strong> Six months of production use on a single project. Needs multi-organization replication.",
|
|
"limitation3": "<strong>Integration challenges:</strong> Retrofitting governance into existing systems requires significant engineering effort.",
|
|
"limitation4": "<strong>Performance at scale unknown:</strong> Testing limited to single-agent deployments. Multi-agent coordination untested.",
|
|
"limitation5": "<strong>Evolving threat landscape:</strong> As AI capabilities grow, new failure modes will emerge that current architecture may not address.",
|
|
"needs_heading": "What We Need:",
|
|
"need1": "Independent researchers to validate (or refute) our findings",
|
|
"need2": "Red-team evaluation to find weaknesses and bypass techniques",
|
|
"need3": "Multi-organization pilot deployments across different domains",
|
|
"need4": "Industry-wide collaboration on governance standards and patterns",
|
|
"need5": "Quantitative studies measuring incident reduction and cost-benefit analysis",
|
|
"conclusion": "This framework is a starting point for exploration, not a finished solution. Taming AI will require sustained effort from the entire industry—researchers, practitioners, regulators, and ethicists working together."
|
|
},
|
|
"cta": {
|
|
"heading": "Explore a Promising Approach to AI Safety",
|
|
"subtitle": "Tractatus demonstrates how structural enforcement may complement behavioral training. We invite researchers and practitioners to evaluate, critique, and build upon this work.",
|
|
"btn_docs": "Read Documentation",
|
|
"btn_research": "View Research",
|
|
"btn_implementation": "Implementation Guide"
|
|
},
|
|
"diagram_services": {
|
|
"overview": {
|
|
"name": "Tractatus Governance Layer",
|
|
"shortName": "Overview",
|
|
"description": "Six external governance services working together to enforce AI safety boundaries outside the AI runtime.",
|
|
"detail1": "All services operate externally to the AI—making manipulation harder",
|
|
"detail2": "Instruction storage and validation work together to prevent directive fade",
|
|
"detail3": "Boundary enforcement and deliberation coordinate on values decisions",
|
|
"detail4": "Pressure monitoring adjusts verification requirements dynamically",
|
|
"detail5": "Metacognitive gates ensure AI pauses before high-risk operations",
|
|
"detail6": "Each service addresses a different failure mode in AI safety",
|
|
"promise": "External architectural enforcement that is structurally more difficult to bypass than behavioral training alone."
|
|
},
|
|
"boundary": {
|
|
"name": "BoundaryEnforcer",
|
|
"shortName": "Boundary",
|
|
"description": "Blocks AI from making values decisions (privacy, ethics, strategic direction). Requires human approval.",
|
|
"detail1": "Enforces Tractatus 12.1-12.7 boundaries",
|
|
"detail2": "Values decisions architecturally require humans",
|
|
"detail3": "Prevents AI autonomous decision-making on ethical questions",
|
|
"detail4": "External enforcement - harder to bypass via prompting",
|
|
"promise": "Values boundaries enforced externally—harder to manipulate through prompting."
|
|
},
|
|
"instruction": {
|
|
"name": "InstructionPersistenceClassifier",
|
|
"shortName": "Instruction",
|
|
"description": "Stores instructions externally with persistence levels (HIGH/MEDIUM/LOW). Aims to reduce directive fade.",
|
|
"detail1": "Quadrant-based classification (STR/OPS/TAC/SYS/STO)",
|
|
"detail2": "Time-persistence metadata tagging",
|
|
"detail3": "Temporal horizon modeling (STRATEGIC, OPERATIONAL, TACTICAL)",
|
|
"detail4": "External storage independent of AI runtime",
|
|
"promise": "Instructions stored outside AI—more resistant to context manipulation."
|
|
},
|
|
"validator": {
|
|
"name": "CrossReferenceValidator",
|
|
"shortName": "Validator",
|
|
"description": "Validates AI actions against instruction history. Aims to prevent pattern bias overriding explicit directives.",
|
|
"detail1": "Cross-references AI claims with external instruction history",
|
|
"detail2": "Detects pattern-based overrides of explicit user directives",
|
|
"detail3": "Independent verification layer",
|
|
"detail4": "Helps prevent instruction drift",
|
|
"promise": "Independent verification—AI claims checked against external source."
|
|
},
|
|
"pressure": {
|
|
"name": "ContextPressureMonitor",
|
|
"shortName": "Pressure",
|
|
"description": "Monitors AI performance degradation. Escalates when context pressure threatens quality.",
|
|
"detail1": "Tracks token usage, complexity, error rates",
|
|
"detail2": "Detects degraded operating conditions",
|
|
"detail3": "Adjusts verification requirements under pressure",
|
|
"detail4": "Objective metrics for quality monitoring",
|
|
"promise": "Objective metrics may detect manipulation attempts early."
|
|
},
|
|
"metacognitive": {
|
|
"name": "MetacognitiveVerifier",
|
|
"shortName": "Metacognitive",
|
|
"description": "Requires AI to pause and verify complex operations before execution. Structural safety check.",
|
|
"detail1": "AI self-checks alignment, coherence, safety before execution",
|
|
"detail2": "Structural pause-and-verify gates",
|
|
"detail3": "Selective verification (not constant)",
|
|
"detail4": "Architectural enforcement of reflection steps",
|
|
"promise": "Architectural gates aim to enforce verification steps."
|
|
},
|
|
"deliberation": {
|
|
"name": "PluralisticDeliberationOrchestrator",
|
|
"shortName": "Deliberation",
|
|
"description": "Facilitates multi-stakeholder deliberation for values conflicts where no single \"correct\" answer exists.",
|
|
"detail1": "Non-hierarchical coordination for values conflicts",
|
|
"detail2": "Stakeholder perspective representation",
|
|
"detail3": "Consensus-building for ethical trade-offs",
|
|
"detail4": "Addresses values pluralism in AI safety",
|
|
"promise": "Facilitates deliberation across stakeholder perspectives for values conflicts."
|
|
}
|
|
},
|
|
"interactive_exploration": {
|
|
"badge": "🔍 INTERACTIVE EXPLORATION",
|
|
"heading": "See the Framework in Action",
|
|
"intro": "Explore <strong>3,942 real governance decisions</strong> from production deployment. Filter by service, pressure level, and coordination patterns to understand how Deep Interlock operates in practice.",
|
|
"hf_cta": "🤗 Launch Interactive Explorer on Hugging Face",
|
|
"researcher_cta": "For Researchers →",
|
|
"footer_note": "Apache 2.0 licensed • All data anonymized • No sign-up required"
|
|
}
|
|
} |