tractatus/al-integration/agents/feedback_analyzer.py
TheFlow d600f6ed83
Some checks are pending
CI / Run Tests (push) Waiting to run
CI / Lint Code (push) Waiting to run
CI / CSP Compliance Check (push) Waiting to run
chore(license): Phase B — relicense source files from Apache 2.0 to EUPL-1.2
Phase B of PLAN_LICENSE_STANDARDISATION_EUPL12_20260419. Follows Phase A
(c85f310f, 4ddc54a0) which flipped the LICENSE file + README; this commit
propagates EUPL-1.2 through source-file headers.

21 files touched across 4 distinct Apache-reference variants:

- V1 (14 files) — full Apache header block (JS /* ... */): 2 routes + 1
  controller + 7 services + 2 models + 3 utils. Replaced with equivalent
  EUPL-1.2 block pointing at EC canonical URL.
- V2 (2 files) — inline JSDoc license line (Copyright Tractatus Project):
  src/routes/calendar.routes.js + src/models/ScheduledTask.model.js.
  Replaced with EUPL-1.2 v. 1.2 equivalent.
- V3 (4 files) — Python docstring 'License: Apache 2.0': all 4 al-integration
  Python files. Replaced with 'License: EUPL-1.2'.
- V4 (1 file) — al-integration/README.md bare 'Apache 2.0' under '## License'
  heading. Replaced with 'EUPL-1.2'.

Verification:
- grep -r "Apache License|Apache 2.0|apache.org/licenses" src/ al-integration/
  returns zero matches (modulo venv).
- Unit tests: 524/524 pass (npm run test:unit).
- Integration test failures (177) are DB-connection infrastructure, pre-existing,
  unrelated to this header-only change.

Sole author basis: TheFlow, 930+ commits, unilateral relicensing (same as Phase A).

Replacement infrastructure also committed: scripts/relicense-apache-to-eupl.js
(auto-detecting variant replacement, idempotent, --dry-run mode). Reusable for
Phase C (community-repo sweep) if pattern structure aligns.

Out-of-scope Apache mentions still in the repo (next pass, NOT Phase B):
- SESSION_HANDOFF_ENFORCEMENT_COMPLETE.md (root doc)
- CLAUDE_Tractatus_Maintenance_Guide.md (root doc)
- For Claude Web/tractatus-claude-web-complete/** (docs snapshot subdirectory)

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-04-19 20:32:09 +12:00

390 lines
12 KiB
Python

#!/usr/bin/env python3
"""
Feedback Analyzer Agent - Practical Agent Lightning Integration
USEFUL AL agent that helps you manage feedback by:
1. Categorizing feedback (website bug, framework issue, content gap, feature request)
2. Assessing severity (low, medium, high, critical)
3. Suggesting concrete actions
4. Prioritizing what to work on first
This is NOT about generating responses - it's about HELPING YOU TRIAGE and ACT.
Reward function based on:
- Correct categorization (validated by human review)
- High-priority items that improve ratings when fixed
- Low false-positive rate (don't waste your time)
License: EUPL-1.2
"""
from __future__ import annotations
import json
import os
from dataclasses import dataclass
from enum import Enum
from typing import Optional
from openai import OpenAI
import agentlightning as agl
class FeedbackCategory(Enum):
"""Feedback categories"""
WEBSITE_BUG = "website-bug" # Navigation, performance, broken links
FRAMEWORK_ISSUE = "framework-issue" # Tractatus functionality problems
CONTENT_GAP = "content-gap" # Documentation unclear or missing
FEATURE_REQUEST = "feature-request" # New capability suggestions
POSITIVE = "positive" # Praise, appreciation
NOISE = "noise" # Spam, irrelevant, unclear
class Severity(Enum):
"""Issue severity levels"""
LOW = "low" # Minor annoyance, low impact
MEDIUM = "medium" # Moderate issue, affects some users
HIGH = "high" # Significant problem, affects many users
CRITICAL = "critical" # Blocking issue, immediate attention needed
@dataclass
class FeedbackTask:
"""Feedback to be analyzed"""
feedback_id: str
rating: int # 1-5
comment: str
page: str
feedback_type: Optional[str] = None # From form dropdown
governance_passed: bool = True
@dataclass
class FeedbackAnalysis:
"""Analysis result"""
category: FeedbackCategory
severity: Severity
suggested_action: str
priority_score: float # 0.0 - 10.0
reasoning: str
confidence: float # 0.0 - 1.0
@agl.rollout
def feedback_analyzer_agent(
task: FeedbackTask,
llm: agl.LLM,
rollout: agl.Rollout
) -> dict:
"""
Analyzes feedback and suggests actionable improvements.
This agent HELPS YOU by:
- Categorizing feedback accurately
- Identifying critical issues quickly
- Suggesting specific actions
- Scoring priority for your attention
Args:
task: Feedback to analyze
llm: LLM endpoint configuration
rollout: Rollout metadata
Returns:
Analysis with category, severity, action, priority
"""
# Skip if governance blocked
if not task.governance_passed:
agl.emit_reward(-1.0)
return {
"status": "blocked",
"reason": "governance_violation"
}
# Construct analysis prompt
prompt = _construct_analysis_prompt(task)
# Emit prompt for AL tracing
agl.emit_message(
role="user",
content=prompt,
metadata={
"feedback_id": task.feedback_id,
"rating": task.rating,
"page": task.page,
"type": task.feedback_type
}
)
# Get LLM analysis
openai_client = OpenAI(
base_url=llm.endpoint,
api_key=os.getenv("OPENAI_API_KEY", "dummy")
)
try:
response = openai_client.chat.completions.create(
model=llm.model,
messages=[{"role": "user", "content": prompt}],
max_tokens=300,
temperature=0.3 # Lower temperature for consistency
)
response_text = response.choices[0].message.content or ""
# Emit response for AL tracing
agl.emit_message(
role="assistant",
content=response_text,
metadata={"feedback_id": task.feedback_id}
)
# Parse structured analysis
analysis = _parse_analysis(response_text, task)
# Calculate reward based on analysis quality
reward = _calculate_analysis_reward(task, analysis)
# Emit reward for AL training
agl.emit_reward(reward)
return {
"status": "success",
"analysis": {
"category": analysis.category.value,
"severity": analysis.severity.value,
"action": analysis.suggested_action,
"priority": analysis.priority_score,
"reasoning": analysis.reasoning,
"confidence": analysis.confidence
},
"reward": reward,
"rollout_id": rollout.rollout_id
}
except Exception as e:
agl.emit_exception(e)
agl.emit_reward(-0.5)
return {
"status": "error",
"error": str(e),
"reward": -0.5
}
def _construct_analysis_prompt(task: FeedbackTask) -> str:
"""
Construct analysis prompt for LLM.
Args:
task: Feedback task
Returns:
Prompt for analysis
"""
prompt = f"""You are analyzing user feedback for the Tractatus AI governance framework website.
Feedback Details:
- Page: {task.page}
- Rating: {task.rating}/5
- Type: {task.feedback_type or 'unspecified'}
- Comment: "{task.comment}"
Analyze this feedback and provide:
1. CATEGORY (choose one):
- website-bug: Navigation, performance, broken links, UI issues
- framework-issue: Tractatus functionality problems, governance concerns
- content-gap: Documentation unclear, missing examples, needs depth
- feature-request: New capability suggestions
- positive: Praise, appreciation, constructive positive feedback
- noise: Spam, irrelevant, unclear, test submission
2. SEVERITY (choose one):
- critical: Blocking issue, immediate attention required
- high: Significant problem affecting many users
- medium: Moderate issue affecting some users
- low: Minor annoyance, low impact
3. SUGGESTED_ACTION: Specific, actionable recommendation (1 sentence)
4. PRIORITY: Score 0.0-10.0 (10.0 = most urgent)
5. REASONING: Brief explanation (1-2 sentences)
6. CONFIDENCE: 0.0-1.0 (how confident are you in this analysis?)
Respond in JSON format:
{{
"category": "...",
"severity": "...",
"suggested_action": "...",
"priority_score": ...,
"reasoning": "...",
"confidence": ...
}}
JSON:"""
return prompt
def _parse_analysis(response_text: str, task: FeedbackTask) -> FeedbackAnalysis:
"""
Parse LLM response into structured analysis.
Args:
response_text: LLM response
task: Original feedback task
Returns:
Structured analysis
"""
try:
# Try to extract JSON from response
json_start = response_text.find('{')
json_end = response_text.rfind('}') + 1
if json_start >= 0 and json_end > json_start:
json_str = response_text[json_start:json_end]
data = json.loads(json_str)
else:
# Fallback: parse manually
data = _fallback_parse(response_text)
return FeedbackAnalysis(
category=FeedbackCategory(data.get("category", "noise")),
severity=Severity(data.get("severity", "low")),
suggested_action=data.get("suggested_action", "Review feedback manually"),
priority_score=float(data.get("priority_score", 1.0)),
reasoning=data.get("reasoning", ""),
confidence=float(data.get("confidence", 0.5))
)
except Exception as e:
# Fallback analysis if parsing fails
return FeedbackAnalysis(
category=FeedbackCategory.NOISE,
severity=Severity.LOW,
suggested_action="Manual review needed - parsing failed",
priority_score=1.0,
reasoning=f"Parse error: {str(e)}",
confidence=0.1
)
def _fallback_parse(text: str) -> dict:
"""Fallback parsing if JSON extraction fails."""
# Default low-confidence analysis
return {
"category": "noise",
"severity": "low",
"suggested_action": "Review manually",
"priority_score": 1.0,
"reasoning": "Could not parse structured response",
"confidence": 0.3
}
def _calculate_analysis_reward(task: FeedbackTask, analysis: FeedbackAnalysis) -> float:
"""
Calculate reward for analysis quality.
Reward is based on heuristics that predict usefulness:
- Rating alignment (low rating = likely real issue)
- Confidence level
- Actionability of suggestion
- Appropriate severity for rating
In production, this will be refined by:
- Human validation of categorization
- Whether actions taken improve ratings
- False positive rate tracking
Args:
task: Original feedback
analysis: Generated analysis
Returns:
Reward value -1.0 to 1.0
"""
reward = 0.0
# Rating-severity alignment
if task.rating <= 2 and analysis.severity in [Severity.HIGH, Severity.CRITICAL]:
reward += 0.3 # Good: low rating + high severity
elif task.rating >= 4 and analysis.severity == Severity.LOW:
reward += 0.2 # Good: high rating + low severity
elif task.rating <= 2 and analysis.severity == Severity.LOW:
reward -= 0.2 # Bad: low rating but low severity (missed issue)
# Confidence reward
reward += analysis.confidence * 0.2
# Category-type alignment (if form provides type)
if task.feedback_type:
if task.feedback_type == "website" and analysis.category == FeedbackCategory.WEBSITE_BUG:
reward += 0.2
elif task.feedback_type == "framework" and analysis.category == FeedbackCategory.FRAMEWORK_ISSUE:
reward += 0.2
elif task.feedback_type == "documentation" and analysis.category == FeedbackCategory.CONTENT_GAP:
reward += 0.2
# Actionability check
if len(analysis.suggested_action) > 20 and "review" not in analysis.suggested_action.lower():
reward += 0.2 # Specific actionable suggestion
else:
reward -= 0.1 # Vague suggestion
# Noise detection for high ratings (likely positive feedback)
if task.rating >= 4 and analysis.category == FeedbackCategory.POSITIVE:
reward += 0.2 # Correctly identified positive feedback
# Priority score sanity check
if analysis.severity == Severity.CRITICAL and analysis.priority_score >= 8.0:
reward += 0.1 # Good: critical severity + high priority
elif analysis.severity == Severity.LOW and analysis.priority_score <= 3.0:
reward += 0.1 # Good: low severity + low priority
# Clamp to [-1.0, 1.0]
return max(-1.0, min(1.0, reward))
if __name__ == "__main__":
# Test the analyzer with sample feedback
test_tasks = [
FeedbackTask(
feedback_id="test_001",
rating=1,
comment="The Agent Lightning page claims live integration but it's not actually running. This is misleading.",
page="/integrations/agent-lightning.html",
feedback_type="content"
),
FeedbackTask(
feedback_id="test_002",
rating=5,
comment="Excellent transparency about limitations. Rare to see this honesty in AI projects.",
page="/integrations/agent-lightning.html",
feedback_type="content"
),
FeedbackTask(
feedback_id="test_003",
rating=2,
comment="Navigation is confusing. Can't find the installation guide.",
page="/",
feedback_type="website"
),
]
print("Testing Feedback Analyzer Agent\n" + "="*50)
for task in test_tasks:
print(f"\nFeedback: {task.comment[:50]}...")
print(f"Rating: {task.rating}/5")
print(f"Expected: Useful categorization and action")
print("(Actual analysis requires LLM endpoint)")