#!/usr/bin/env python3
"""
Demo 1: Task Optimizer (Agent Lightning Standalone)

This demo shows Agent Lightning optimization WITHOUT governance.
It demonstrates pure performance optimization without values alignment.

Purpose: Baseline for comparison with Demo 2 (governed agent)
"""

import sys
import time
from typing import Dict, List, Any

try:
    from agentlightning import AgentLightningClient, RolloutConfig
    AL_AVAILABLE = True
except ImportError:
    print("⚠️  Agent Lightning not installed. Running in mock mode.")
    AL_AVAILABLE = False


class MockALClient:
    """Mock Agent Lightning client for demo purposes"""

    def __init__(self):
        self.training_rounds = 0

    def optimize(self, task: Dict[str, Any]) -> Dict[str, Any]:
        """Simulate AL optimization improving over time"""
        self.training_rounds += 1

        # Simulate learning: engagement improves each round
        # But no governance checks, so it might learn bad patterns
        base_engagement = 42
        improvement = (self.training_rounds * 12)
        engagement = min(94, base_engagement + improvement)

        return {
            "engagement": engagement,
            "training_rounds": self.training_rounds,
            "strategy": "clickbait" if engagement > 70 else "normal",
            "governance_checks": None  # No governance!
        }


class TaskOptimizer:
    """
    Agent Lightning Task Optimizer (Standalone)

    Optimizes content for maximum engagement using RL,
    WITHOUT any governance constraints.
    """

    def __init__(self, use_al: bool = AL_AVAILABLE):
        self.use_al = use_al
        if use_al:
            self.client = AgentLightningClient()
        else:
            self.client = MockALClient()

        self.history: List[Dict] = []

    def optimize_content(self, content: str, rounds: int = 5) -> Dict[str, Any]:
        """
        Optimize content for engagement without governance

        Args:
            content: The content to optimize
            rounds: Number of training rounds

        Returns:
            Optimization results with metrics
        """
        print(f"\nTraining agent on content optimization tasks...")
        print()

        results = []

        for round_num in range(1, rounds + 1):
            task = {
                "goal": "optimize_content_engagement",
                "content": content,
                "round": round_num
            }

            # AL optimizes for engagement (no governance checks)
            result = self.client.optimize(task)
            results.append(result)

            print(f"Round {round_num}: Engagement = {result['engagement']}%")
            time.sleep(0.2)  # Simulate training time

        return {
            "rounds": rounds,
            "results": results,
            "final_engagement": results[-1]["engagement"],
            "improvement": results[-1]["engagement"] - results[0]["engagement"],
            "governance_applied": False  # No governance!
        }


def run_demo():
    """Run the basic optimization demo"""

    print("=" * 60)
    print("Demo 1: Task Optimizer (AL Standalone)")
    print("=" * 60)
    print()
    print("Purpose: Show AL optimization WITHOUT governance")
    print("Learning: Performance ≠ Values Alignment")
    print()

    # Create optimizer
    optimizer = TaskOptimizer()

    # Sample content
    content = "Blog post: The Future of AI Safety"

    # Optimize
    start_time = time.time()
    result = optimizer.optimize_content(content, rounds=5)
    training_time = time.time() - start_time

    # Display results
    print()
    print("=" * 60)
    print("✓ Agent optimized successfully!")
    print("=" * 60)
    print(f"  Final engagement: {result['final_engagement']}%")
    print(f"  Training time: {training_time:.1f} seconds")
    print(f"  Improvement: {result['improvement']:.0f}% increase")
    print()

    # Critical warning
    print("⚠️  WARNING: No governance checks performed")
    print("=" * 60)
    print("   - Editorial guidelines: NOT checked")
    print("   - Accuracy verification: NOT checked")
    print("   - Harm assessment: NOT checked")
    print("   - Values alignment: NOT checked")
    print()
    print("This is a performance-only optimization.")
    print()

    # Show what was learned
    final_result = result['results'][-1]
    if 'strategy' in final_result:
        print(f"Strategy learned: {final_result['strategy']}")
        if final_result['strategy'] == 'clickbait':
            print("⚠️  Agent learned to use clickbait for engagement!")
            print("   Without governance, optimization can lead to")
            print("   values-misaligned behavior.")

    print()
    print("─" * 60)
    print("Next Steps:")
    print("  → Demo 2: See how Tractatus governance prevents this")
    print("  → Demo 3: See full production architecture")
    print("─" * 60)
    print()

    return result


if __name__ == "__main__":
    try:
        result = run_demo()
        sys.exit(0)
    except KeyboardInterrupt:
        print("\n\nDemo interrupted by user")
        sys.exit(1)
    except Exception as e:
        print(f"\n\nError running demo: {e}")
        import traceback
        traceback.print_exc()
        sys.exit(1)