diff --git a/public/locales/en/village-ai.json b/public/locales/en/village-ai.json index 9c49ba36..b32a2a1b 100644 --- a/public/locales/en/village-ai.json +++ b/public/locales/en/village-ai.json @@ -231,7 +231,9 @@ "story_title": "Story Assistance", "story_desc": "Writing prompts, structural advice, narrative enhancement. Cultural context decisions deferred to the storyteller, not resolved by the AI.", "memory_title": "AI Memory Transparency", - "memory_desc": "Members view and control what the AI remembers. Independent consent for triage memory, OCR memory, and summarisation memory." + "memory_desc": "Members view and control what the AI remembers. Independent consent for triage memory, OCR memory, and summarisation memory.", + "guardian_title": "Guardian Agents", + "guardian_desc": "Four-phase verification system using mathematical similarity rather than generative checking. Confidence badges, claim-level source analysis, and security transparency — all tenant-scoped." }, "limitations": { "heading": "Limitations and Open Questions", @@ -253,6 +255,33 @@ "paper_title": "Architectural Alignment Paper", "paper_desc": "Academic paper on governance during training", "researcher_title": "For Researchers", - "researcher_desc": "Open questions, collaboration opportunities, and data access" + "researcher_desc": "Open questions, collaboration opportunities, and data access", + "guardian_title": "Guardian Agents Philosophy", + "guardian_desc": "How Wittgenstein, Berlin, Ostrom, and Te Ao Māori converge in a production governance architecture" + }, + "guardian": { + "heading": "Guardian Agents: Verification Without Common-Mode Failure", + "intro": "The standard approach to AI safety verification — using additional AI models to check AI output — shares a structural flaw with the systems it checks. When both layers are probabilistic, both hallucinate, and both reward confident outputs, they share failure modes. This is common-mode failure: the checker confirms the error because it reasons the same way as the system it checks.", + "approach": "Guardian Agents resolve this by operating in a fundamentally different epistemic domain from the generation layer. The verification mechanism is embedding cosine similarity — a mathematical measurement of how closely an AI response aligns with source material. This is measurement, not interpretation. The watcher is not another speaker. The watcher is a measuring instrument.", + "phases_heading": "Four Verification Phases", + "phase1_title": "Response Verification", + "phase1_desc": "Every AI response measured against source material via embedding similarity. Score-derived confidence tiers (verified, partially verified, unverified) presented to the member — not binary safe/unsafe labels.", + "phase2_title": "Claim-Level Analysis", + "phase2_desc": "Individual claims mapped to sources or marked as unmatched. The system does not say \"this claim is wrong\" — it says \"we could not find this in your community's records.\" Absence of evidence is not evidence of absence.", + "phase3_title": "Anomaly Detection", + "phase3_desc": "Tenant-scoped baselines detect deviations in AI behaviour. What counts as anomalous in a parish archive differs from a neighbourhood coordination group — these are different values, not different calibrations.", + "phase4_title": "Adaptive Learning", + "phase4_desc": "Moderator decisions feed back into threshold tuning. Evidence burden is deliberately asymmetric: loosening safety thresholds requires 85% confidence, tightening requires 60%. A regression monitor watches every approved change.", + "foundations_heading": "Philosophical Foundations", + "foundations_intro": "The architectural choices in Guardian Agents are not engineering decisions that happen to align with philosophical positions — they are philosophical commitments that demanded specific engineering responses.", + "wittgenstein_title": "Wittgenstein", + "wittgenstein_desc": "Verification and generation must operate in different epistemic domains. The sayable (measurement) verifies what inevitably touches the unsayable (generation).", + "berlin_title": "Berlin", + "berlin_desc": "No objective function resolves values conflicts. Tenant-scoped governance prevents hidden value hierarchies. Asymmetric evidence burdens make trade-offs visible.", + "ostrom_title": "Ostrom", + "ostrom_desc": "Polycentric governance with genuinely independent verification centres. Moderators, regression monitors, and audit trails — no single authority is root.", + "teaomaori_title": "Te Ao Māori", + "teaomaori_desc": "Sovereign processing implements rangatiratanga — the community governs what happens to its own data. The platform exercises kaitiakitanga (guardianship), not ownership.", + "read_more": "Full analysis: Guardian Agents and the Philosophy of AI Accountability traces the complete philosophical genealogy — from early twentieth-century Vienna to contemporary Aotearoa New Zealand — and examines why these traditions converge on the same architectural requirements." } } diff --git a/public/village-ai.html b/public/village-ai.html index ead7e349..332cc1e1 100644 --- a/public/village-ai.html +++ b/public/village-ai.html @@ -611,6 +611,76 @@

AI Memory Transparency

Members view and control what the AI remembers. Independent consent for triage memory, OCR memory, and summarisation memory.

+
+

Guardian Agents

+

Four-phase verification system using mathematical similarity rather than generative checking. Confidence badges, claim-level source analysis, and security transparency — all tenant-scoped.

+
+ + + + +
+

Guardian Agents: Verification Without Common-Mode Failure

+
+

+ The standard approach to AI safety verification — using additional AI models to check AI output — shares a structural flaw with the systems it checks. When both layers are probabilistic, both hallucinate, and both reward confident outputs, they share failure modes. This is common-mode failure: the checker confirms the error because it reasons the same way as the system it checks. +

+

+ Guardian Agents resolve this by operating in a fundamentally different epistemic domain from the generation layer. The verification mechanism is embedding cosine similarity — a mathematical measurement of how closely an AI response aligns with source material. This is measurement, not interpretation. The watcher is not another speaker. The watcher is a measuring instrument. +

+
+ +

Four Verification Phases

+
+
+
Phase 1
+

Response Verification

+

Every AI response measured against source material via embedding similarity. Score-derived confidence tiers (verified, partially verified, unverified) presented to the member — not binary safe/unsafe labels.

+
+
+
Phase 2
+

Claim-Level Analysis

+

Individual claims mapped to sources or marked as unmatched. The system does not say “this claim is wrong” — it says “we could not find this in your community’s records.” Absence of evidence is not evidence of absence.

+
+
+
Phase 3
+

Anomaly Detection

+

Tenant-scoped baselines detect deviations in AI behaviour. What counts as anomalous in a parish archive differs from a neighbourhood coordination group — these are different values, not different calibrations.

+
+
+
Phase 4
+

Adaptive Learning

+

Moderator decisions feed back into threshold tuning. Evidence burden is deliberately asymmetric: loosening safety thresholds requires 85% confidence, tightening requires 60%. A regression monitor watches every approved change.

+
+
+ +

Philosophical Foundations

+

+ The architectural choices in Guardian Agents are not engineering decisions that happen to align with philosophical positions — they are philosophical commitments that demanded specific engineering responses. +

+
+
+

Wittgenstein

+

Verification and generation must operate in different epistemic domains. The sayable (measurement) verifies what inevitably touches the unsayable (generation).

+
+
+

Berlin

+

No objective function resolves values conflicts. Tenant-scoped governance prevents hidden value hierarchies. Asymmetric evidence burdens make trade-offs visible.

+
+
+

Ostrom

+

Polycentric governance with genuinely independent verification centres. Moderators, regression monitors, and audit trails — no single authority is root.

+
+
+

Te Ao Māori

+

Sovereign processing implements rangatiratanga — the community governs what happens to its own data. The platform exercises kaitiakitanga (guardianship), not ownership.

+
+
+ +
+

+ Full analysis: Guardian Agents and the Philosophy of AI Accountability traces the complete philosophical genealogy — from early twentieth-century Vienna to contemporary Aotearoa New Zealand — and examines why these traditions converge on the same architectural requirements. +

@@ -675,6 +745,10 @@

For Researchers

Open questions, collaboration opportunities, and data access

+ +

Guardian Agents Philosophy

+

How Wittgenstein, Berlin, Ostrom, and Te Ao Māori converge in a production governance architecture

+