#!/usr/bin/env python3 """ Feedback Analyzer Agent - Practical Agent Lightning Integration USEFUL AL agent that helps you manage feedback by: 1. Categorizing feedback (website bug, framework issue, content gap, feature request) 2. Assessing severity (low, medium, high, critical) 3. Suggesting concrete actions 4. Prioritizing what to work on first This is NOT about generating responses - it's about HELPING YOU TRIAGE and ACT. Reward function based on: - Correct categorization (validated by human review) - High-priority items that improve ratings when fixed - Low false-positive rate (don't waste your time) License: Apache 2.0 """ from __future__ import annotations import json import os from dataclasses import dataclass from enum import Enum from typing import Optional from openai import OpenAI import agentlightning as agl class FeedbackCategory(Enum): """Feedback categories""" WEBSITE_BUG = "website-bug" # Navigation, performance, broken links FRAMEWORK_ISSUE = "framework-issue" # Tractatus functionality problems CONTENT_GAP = "content-gap" # Documentation unclear or missing FEATURE_REQUEST = "feature-request" # New capability suggestions POSITIVE = "positive" # Praise, appreciation NOISE = "noise" # Spam, irrelevant, unclear class Severity(Enum): """Issue severity levels""" LOW = "low" # Minor annoyance, low impact MEDIUM = "medium" # Moderate issue, affects some users HIGH = "high" # Significant problem, affects many users CRITICAL = "critical" # Blocking issue, immediate attention needed @dataclass class FeedbackTask: """Feedback to be analyzed""" feedback_id: str rating: int # 1-5 comment: str page: str feedback_type: Optional[str] = None # From form dropdown governance_passed: bool = True @dataclass class FeedbackAnalysis: """Analysis result""" category: FeedbackCategory severity: Severity suggested_action: str priority_score: float # 0.0 - 10.0 reasoning: str confidence: float # 0.0 - 1.0 @agl.rollout def feedback_analyzer_agent( task: FeedbackTask, llm: agl.LLM, rollout: agl.Rollout ) -> dict: """ Analyzes feedback and suggests actionable improvements. This agent HELPS YOU by: - Categorizing feedback accurately - Identifying critical issues quickly - Suggesting specific actions - Scoring priority for your attention Args: task: Feedback to analyze llm: LLM endpoint configuration rollout: Rollout metadata Returns: Analysis with category, severity, action, priority """ # Skip if governance blocked if not task.governance_passed: agl.emit_reward(-1.0) return { "status": "blocked", "reason": "governance_violation" } # Construct analysis prompt prompt = _construct_analysis_prompt(task) # Emit prompt for AL tracing agl.emit_message( role="user", content=prompt, metadata={ "feedback_id": task.feedback_id, "rating": task.rating, "page": task.page, "type": task.feedback_type } ) # Get LLM analysis openai_client = OpenAI( base_url=llm.endpoint, api_key=os.getenv("OPENAI_API_KEY", "dummy") ) try: response = openai_client.chat.completions.create( model=llm.model, messages=[{"role": "user", "content": prompt}], max_tokens=300, temperature=0.3 # Lower temperature for consistency ) response_text = response.choices[0].message.content or "" # Emit response for AL tracing agl.emit_message( role="assistant", content=response_text, metadata={"feedback_id": task.feedback_id} ) # Parse structured analysis analysis = _parse_analysis(response_text, task) # Calculate reward based on analysis quality reward = _calculate_analysis_reward(task, analysis) # Emit reward for AL training agl.emit_reward(reward) return { "status": "success", "analysis": { "category": analysis.category.value, "severity": analysis.severity.value, "action": analysis.suggested_action, "priority": analysis.priority_score, "reasoning": analysis.reasoning, "confidence": analysis.confidence }, "reward": reward, "rollout_id": rollout.rollout_id } except Exception as e: agl.emit_exception(e) agl.emit_reward(-0.5) return { "status": "error", "error": str(e), "reward": -0.5 } def _construct_analysis_prompt(task: FeedbackTask) -> str: """ Construct analysis prompt for LLM. Args: task: Feedback task Returns: Prompt for analysis """ prompt = f"""You are analyzing user feedback for the Tractatus AI governance framework website. Feedback Details: - Page: {task.page} - Rating: {task.rating}/5 - Type: {task.feedback_type or 'unspecified'} - Comment: "{task.comment}" Analyze this feedback and provide: 1. CATEGORY (choose one): - website-bug: Navigation, performance, broken links, UI issues - framework-issue: Tractatus functionality problems, governance concerns - content-gap: Documentation unclear, missing examples, needs depth - feature-request: New capability suggestions - positive: Praise, appreciation, constructive positive feedback - noise: Spam, irrelevant, unclear, test submission 2. SEVERITY (choose one): - critical: Blocking issue, immediate attention required - high: Significant problem affecting many users - medium: Moderate issue affecting some users - low: Minor annoyance, low impact 3. SUGGESTED_ACTION: Specific, actionable recommendation (1 sentence) 4. PRIORITY: Score 0.0-10.0 (10.0 = most urgent) 5. REASONING: Brief explanation (1-2 sentences) 6. CONFIDENCE: 0.0-1.0 (how confident are you in this analysis?) Respond in JSON format: {{ "category": "...", "severity": "...", "suggested_action": "...", "priority_score": ..., "reasoning": "...", "confidence": ... }} JSON:""" return prompt def _parse_analysis(response_text: str, task: FeedbackTask) -> FeedbackAnalysis: """ Parse LLM response into structured analysis. Args: response_text: LLM response task: Original feedback task Returns: Structured analysis """ try: # Try to extract JSON from response json_start = response_text.find('{') json_end = response_text.rfind('}') + 1 if json_start >= 0 and json_end > json_start: json_str = response_text[json_start:json_end] data = json.loads(json_str) else: # Fallback: parse manually data = _fallback_parse(response_text) return FeedbackAnalysis( category=FeedbackCategory(data.get("category", "noise")), severity=Severity(data.get("severity", "low")), suggested_action=data.get("suggested_action", "Review feedback manually"), priority_score=float(data.get("priority_score", 1.0)), reasoning=data.get("reasoning", ""), confidence=float(data.get("confidence", 0.5)) ) except Exception as e: # Fallback analysis if parsing fails return FeedbackAnalysis( category=FeedbackCategory.NOISE, severity=Severity.LOW, suggested_action="Manual review needed - parsing failed", priority_score=1.0, reasoning=f"Parse error: {str(e)}", confidence=0.1 ) def _fallback_parse(text: str) -> dict: """Fallback parsing if JSON extraction fails.""" # Default low-confidence analysis return { "category": "noise", "severity": "low", "suggested_action": "Review manually", "priority_score": 1.0, "reasoning": "Could not parse structured response", "confidence": 0.3 } def _calculate_analysis_reward(task: FeedbackTask, analysis: FeedbackAnalysis) -> float: """ Calculate reward for analysis quality. Reward is based on heuristics that predict usefulness: - Rating alignment (low rating = likely real issue) - Confidence level - Actionability of suggestion - Appropriate severity for rating In production, this will be refined by: - Human validation of categorization - Whether actions taken improve ratings - False positive rate tracking Args: task: Original feedback analysis: Generated analysis Returns: Reward value -1.0 to 1.0 """ reward = 0.0 # Rating-severity alignment if task.rating <= 2 and analysis.severity in [Severity.HIGH, Severity.CRITICAL]: reward += 0.3 # Good: low rating + high severity elif task.rating >= 4 and analysis.severity == Severity.LOW: reward += 0.2 # Good: high rating + low severity elif task.rating <= 2 and analysis.severity == Severity.LOW: reward -= 0.2 # Bad: low rating but low severity (missed issue) # Confidence reward reward += analysis.confidence * 0.2 # Category-type alignment (if form provides type) if task.feedback_type: if task.feedback_type == "website" and analysis.category == FeedbackCategory.WEBSITE_BUG: reward += 0.2 elif task.feedback_type == "framework" and analysis.category == FeedbackCategory.FRAMEWORK_ISSUE: reward += 0.2 elif task.feedback_type == "documentation" and analysis.category == FeedbackCategory.CONTENT_GAP: reward += 0.2 # Actionability check if len(analysis.suggested_action) > 20 and "review" not in analysis.suggested_action.lower(): reward += 0.2 # Specific actionable suggestion else: reward -= 0.1 # Vague suggestion # Noise detection for high ratings (likely positive feedback) if task.rating >= 4 and analysis.category == FeedbackCategory.POSITIVE: reward += 0.2 # Correctly identified positive feedback # Priority score sanity check if analysis.severity == Severity.CRITICAL and analysis.priority_score >= 8.0: reward += 0.1 # Good: critical severity + high priority elif analysis.severity == Severity.LOW and analysis.priority_score <= 3.0: reward += 0.1 # Good: low severity + low priority # Clamp to [-1.0, 1.0] return max(-1.0, min(1.0, reward)) if __name__ == "__main__": # Test the analyzer with sample feedback test_tasks = [ FeedbackTask( feedback_id="test_001", rating=1, comment="The Agent Lightning page claims live integration but it's not actually running. This is misleading.", page="/integrations/agent-lightning.html", feedback_type="content" ), FeedbackTask( feedback_id="test_002", rating=5, comment="Excellent transparency about limitations. Rare to see this honesty in AI projects.", page="/integrations/agent-lightning.html", feedback_type="content" ), FeedbackTask( feedback_id="test_003", rating=2, comment="Navigation is confusing. Can't find the installation guide.", page="/", feedback_type="website" ), ] print("Testing Feedback Analyzer Agent\n" + "="*50) for task in test_tasks: print(f"\nFeedback: {task.comment[:50]}...") print(f"Rating: {task.rating}/5") print(f"Expected: Useful categorization and action") print("(Actual analysis requires LLM endpoint)")