villsim/tools/optimize_goap.py

#!/usr/bin/env python3
"""
GOAP Economy Optimizer for Village Simulation

This script optimizes the simulation parameters specifically for the GOAP AI system.
The goal is to achieve:
- Balanced action diversity (hunting, gathering, trading)
- Active economy with trading
- Good survival rates
- Meat production through hunting

Key insight: GOAP uses action COSTS to choose actions. Lower cost = preferred.
We need to tune:
1. Action energy costs (config.json)
2. GOAP action cost functions (goap/actions.py)
3. Goal priorities (goap/goals.py)

Usage:
    python tools/optimize_goap.py [--iterations 15] [--steps 300]
    python tools/optimize_goap.py --analyze  # Analyze current GOAP behavior
"""

import argparse
import json
import random
import sys
from collections import defaultdict
from datetime import datetime
from pathlib import Path

# Add parent directory for imports
sys.path.insert(0, str(Path(__file__).parent.parent))

from backend.config import get_config, reload_config
from backend.core.engine import GameEngine
from backend.domain.action import reset_action_config_cache
from backend.domain.resources import reset_resource_cache


def analyze_goap_behavior(num_steps: int = 100, num_agents: int = 10):
    """Analyze current GOAP behavior in detail."""
    print("\n" + "=" * 70)
    print("🔍 GOAP BEHAVIOR ANALYSIS")
    print("=" * 70)

    # Reset engine
    GameEngine._instance = None
    engine = GameEngine()
    engine.initialize(num_agents=num_agents)

    # Track statistics
    action_counts = defaultdict(int)
    goal_counts = defaultdict(int)
    reactive_count = 0
    planned_count = 0

    # Resource tracking
    resources_produced = defaultdict(int)
    resources_consumed = defaultdict(int)

    # Run simulation
    for step in range(num_steps):
        if not engine.is_running:
            print(f"  Simulation ended at step {step}")
            break

        log = engine.next_step()

        for action_data in log.agent_actions:
            decision = action_data.get("decision", {})
            result = action_data.get("result", {})

            action_type = decision.get("action", "unknown")
            action_counts[action_type] += 1

            # Track goal/reactive
            goal_name = decision.get("goal_name", "")
            reason = decision.get("reason", "")

            if goal_name:
                goal_counts[goal_name] += 1
                planned_count += 1
            elif "Reactive" in reason:
                goal_counts["(reactive)"] += 1
                reactive_count += 1

            # Track resources
            if result and result.get("success"):
                for res in result.get("resources_gained", []):
                    resources_produced[res.get("type", "unknown")] += res.get("quantity", 0)
                for res in result.get("resources_consumed", []):
                    resources_consumed[res.get("type", "unknown")] += res.get("quantity", 0)

    # Print results
    total_actions = sum(action_counts.values())

    print(f"\n📊 Action Distribution ({num_steps} turns, {num_agents} agents)")
    print("-" * 50)
    for action, count in sorted(action_counts.items(), key=lambda x: -x[1]):
        pct = count * 100 / total_actions if total_actions > 0 else 0
        bar = "█" * int(pct / 2)
        print(f"  {action:12} {count:4} ({pct:5.1f}%) {bar}")

    print(f"\n🎯 Goal Distribution")
    print("-" * 50)
    total_goals = sum(goal_counts.values())
    for goal, count in sorted(goal_counts.items(), key=lambda x: -x[1])[:15]:
        pct = count * 100 / total_goals if total_goals > 0 else 0
        print(f"  {goal:20} {count:4} ({pct:5.1f}%)")

    print(f"\n  Planned actions: {planned_count} ({planned_count*100/total_actions:.1f}%)")
    print(f"  Reactive actions: {reactive_count} ({reactive_count*100/total_actions:.1f}%)")

    print(f"\n📦 Resources Produced")
    print("-" * 50)
    for res, qty in sorted(resources_produced.items(), key=lambda x: -x[1]):
        print(f"  {res:12} {qty:4}")

    print(f"\n🔥 Resources Consumed")
    print("-" * 50)
    for res, qty in sorted(resources_consumed.items(), key=lambda x: -x[1]):
        print(f"  {res:12} {qty:4}")

    # Diagnose issues
    print(f"\n⚠️  ISSUES DETECTED:")
    print("-" * 50)

    hunt_pct = action_counts.get("hunt", 0) * 100 / total_actions if total_actions > 0 else 0
    gather_pct = action_counts.get("gather", 0) * 100 / total_actions if total_actions > 0 else 0

    if hunt_pct < 5:
        print("  ❌ Almost no hunting! Hunt action cost too high or meat not valued enough.")
        print("     → Reduce hunt energy cost or increase meat benefits")

    if resources_produced.get("meat", 0) == 0:
        print("  ❌ No meat produced! Agents never hunt successfully.")

    trade_pct = action_counts.get("trade", 0) * 100 / total_actions if total_actions > 0 else 0
    if trade_pct < 5:
        print("  ❌ Low trading activity. Market goals not prioritized.")

    if reactive_count > planned_count:
        print("  ⚠️  More reactive than planned actions. Goals may be too easily satisfied.")

    return {
        "action_counts": dict(action_counts),
        "goal_counts": dict(goal_counts),
        "resources_produced": dict(resources_produced),
        "resources_consumed": dict(resources_consumed),
    }


def test_config(config_overrides: dict, num_steps: int = 200, num_agents: int = 10, verbose: bool = True):
    """Test a configuration and return metrics."""
    # Save original config
    config_path = Path("config.json")
    with open(config_path) as f:
        original_config = json.load(f)

    # Apply overrides
    test_config = json.loads(json.dumps(original_config))
    for section, values in config_overrides.items():
        if section in test_config:
            test_config[section].update(values)
        else:
            test_config[section] = values

    # Save temp config
    temp_path = Path("config_temp.json")
    with open(temp_path, 'w') as f:
        json.dump(test_config, f, indent=2)

    # Reload config
    reload_config(str(temp_path))
    reset_action_config_cache()
    reset_resource_cache()

    # Run simulation
    GameEngine._instance = None
    engine = GameEngine()
    engine.initialize(num_agents=num_agents)

    action_counts = defaultdict(int)
    resources_produced = defaultdict(int)
    deaths = 0
    trades_completed = 0

    for step in range(num_steps):
        if not engine.is_running:
            break

        log = engine.next_step()
        deaths += len(log.deaths)

        for action_data in log.agent_actions:
            decision = action_data.get("decision", {})
            result = action_data.get("result", {})

            action_type = decision.get("action", "unknown")
            action_counts[action_type] += 1

            if result and result.get("success"):
                for res in result.get("resources_gained", []):
                    resources_produced[res.get("type", "unknown")] += res.get("quantity", 0)

                if action_type == "trade" and "Bought" in result.get("message", ""):
                    trades_completed += 1

    final_pop = len(engine.world.get_living_agents())

    # Cleanup
    engine.logger.close()
    temp_path.unlink(missing_ok=True)

    # Restore original config
    reload_config(str(config_path))
    reset_action_config_cache()
    reset_resource_cache()

    # Calculate score
    total_actions = sum(action_counts.values())
    hunt_ratio = action_counts.get("hunt", 0) / total_actions if total_actions > 0 else 0
    gather_ratio = action_counts.get("gather", 0) / total_actions if total_actions > 0 else 0
    trade_ratio = action_counts.get("trade", 0) / total_actions if total_actions > 0 else 0

    survival_rate = final_pop / num_agents

    # Score components
    # 1. Hunt ratio: want 10-25%
    hunt_score = min(25, hunt_ratio * 100) if hunt_ratio > 0.05 else 0

    # 2. Trade activity: want 5-15%
    trade_score = min(20, trade_ratio * 100 * 2)

    # 3. Resource diversity
    has_meat = resources_produced.get("meat", 0) > 0
    has_berries = resources_produced.get("berries", 0) > 0
    has_wood = resources_produced.get("wood", 0) > 0
    has_water = resources_produced.get("water", 0) > 0
    diversity_score = (int(has_meat) + int(has_berries) + int(has_wood) + int(has_water)) * 5

    # 4. Survival
    survival_score = survival_rate * 30

    # 5. Meat production bonus
    meat_score = min(15, resources_produced.get("meat", 0) / 5)

    total_score = hunt_score + trade_score + diversity_score + survival_score + meat_score

    if verbose:
        print(f"\n  Score: {total_score:.1f}/100")
        print(f"  ├─ Hunt: {hunt_ratio*100:.1f}% ({hunt_score:.1f} pts)")
        print(f"  ├─ Trade: {trade_ratio*100:.1f}% ({trade_score:.1f} pts)")
        print(f"  ├─ Diversity: {diversity_score:.1f} pts")
        print(f"  ├─ Survival: {survival_rate*100:.0f}% ({survival_score:.1f} pts)")
        print(f"  └─ Meat produced: {resources_produced.get('meat', 0)} ({meat_score:.1f} pts)")
        print(f"  Actions: hunt={action_counts.get('hunt',0)}, gather={action_counts.get('gather',0)}, trade={action_counts.get('trade',0)}")

    return {
        "score": total_score,
        "action_counts": dict(action_counts),
        "resources": dict(resources_produced),
        "survival_rate": survival_rate,
        "deaths": deaths,
    }


def optimize_for_goap(iterations: int = 15, steps: int = 300):
    """Run optimization focused on GOAP-specific parameters."""
    print("\n" + "=" * 70)
    print("🧬 GOAP ECONOMY OPTIMIZER")
    print("=" * 70)
    print(f"  Iterations: {iterations}")
    print(f"  Steps per test: {steps}")
    print("=" * 70)

    # Key parameters to optimize for GOAP
    # Focus on making hunting more attractive

    configs_to_test = [
        # Baseline
        {
            "name": "Baseline (current)",
            "config": {}
        },
        # Cheaper hunting
        {
            "name": "Cheaper Hunt (-5 energy)",
            "config": {
                "actions": {
                    "hunt_energy": -5,
                    "hunt_success": 0.8,
                }
            }
        },
        # More valuable meat
        {
            "name": "Valuable Meat (+45 hunger)",
            "config": {
                "resources": {
                    "meat_hunger": 45,
                    "meat_energy": 15,
                },
                "actions": {
                    "hunt_energy": -6,
                    "hunt_success": 0.8,
                }
            }
        },
        # Make berries less attractive
        {
            "name": "Nerfed Berries",
            "config": {
                "resources": {
                    "meat_hunger": 45,
                    "meat_energy": 15,
                    "berries_hunger": 8,
                    "berries_thirst": 2,
                },
                "actions": {
                    "hunt_energy": -5,
                    "gather_energy": -4,
                    "hunt_success": 0.85,
                    "hunt_meat_min": 2,
                    "hunt_meat_max": 4,
                }
            }
        },
        # Higher hunt output
        {
            "name": "High Hunt Output",
            "config": {
                "resources": {
                    "meat_hunger": 40,
                    "meat_energy": 12,
                },
                "actions": {
                    "hunt_energy": -6,
                    "hunt_success": 0.85,
                    "hunt_meat_min": 3,
                    "hunt_meat_max": 6,
                    "hunt_hide_min": 1,
                    "hunt_hide_max": 2,
                }
            }
        },
        # Balanced economy
        {
            "name": "Balanced Economy",
            "config": {
                "resources": {
                    "meat_hunger": 40,
                    "meat_energy": 15,
                    "berries_hunger": 8,
                },
                "actions": {
                    "hunt_energy": -5,
                    "gather_energy": -4,
                    "hunt_success": 0.8,
                    "hunt_meat_min": 2,
                    "hunt_meat_max": 5,
                },
                "economy": {
                    "buy_efficiency_threshold": 0.9,
                    "min_wealth_target": 40,
                }
            }
        },
        # Pro-hunting config
        {
            "name": "Pro-Hunting",
            "config": {
                "agent_stats": {
                    "hunger_decay": 3,  # Higher hunger decay = need more food
                },
                "resources": {
                    "meat_hunger": 50,  # Meat is very filling
                    "meat_energy": 15,
                    "berries_hunger": 6,  # Berries less filling
                },
                "actions": {
                    "hunt_energy": -4,  # Very cheap to hunt
                    "gather_energy": -4,
                    "hunt_success": 0.85,
                    "hunt_meat_min": 3,
                    "hunt_meat_max": 5,
                }
            }
        },
        # Full rebalance
        {
            "name": "Full Rebalance",
            "config": {
                "agent_stats": {
                    "start_hunger": 70,
                    "hunger_decay": 3,
                    "thirst_decay": 3,
                },
                "resources": {
                    "meat_hunger": 50,
                    "meat_energy": 15,
                    "berries_hunger": 8,
                    "berries_thirst": 3,
                    "water_thirst": 45,
                },
                "actions": {
                    "hunt_energy": -5,
                    "gather_energy": -4,
                    "chop_wood_energy": -5,
                    "get_water_energy": -3,
                    "hunt_success": 0.8,
                    "hunt_meat_min": 2,
                    "hunt_meat_max": 5,
                    "hunt_hide_min": 0,
                    "hunt_hide_max": 1,
                    "gather_min": 2,
                    "gather_max": 3,
                }
            }
        },
    ]

    best_config = None
    best_score = 0
    best_name = ""

    for cfg in configs_to_test:
        print(f"\n🧪 Testing: {cfg['name']}")
        print("-" * 50)

        result = test_config(cfg["config"], steps, verbose=True)

        if result["score"] > best_score:
            best_score = result["score"]
            best_config = cfg["config"]
            best_name = cfg["name"]
            print(f"  ⭐ New best!")

    print("\n" + "=" * 70)
    print("🏆 OPTIMIZATION COMPLETE")
    print("=" * 70)
    print(f"\n  Best Config: {best_name}")
    print(f"  Best Score: {best_score:.1f}/100")

    if best_config:
        print("\n  📝 Configuration to apply:")
        print("-" * 50)
        print(json.dumps(best_config, indent=2))

        # Ask to apply
        print("\n  Would you like to apply this configuration? (y/n)")

        # Save as optimized config
        output_path = Path("config_goap_optimized.json")
        with open("config.json") as f:
            full_config = json.load(f)

        for section, values in best_config.items():
            if section in full_config:
                full_config[section].update(values)
            else:
                full_config[section] = values

        with open(output_path, 'w') as f:
            json.dump(full_config, f, indent=2)

        print(f"\n  ✅ Saved to: {output_path}")
        print("  To apply: cp config_goap_optimized.json config.json")

    return best_config


def main():
    parser = argparse.ArgumentParser(description="Optimize GOAP economy parameters")
    parser.add_argument("--analyze", "-a", action="store_true", help="Analyze current behavior")
    parser.add_argument("--iterations", "-i", type=int, default=15, help="Optimization iterations")
    parser.add_argument("--steps", "-s", type=int, default=200, help="Steps per simulation")
    parser.add_argument("--apply", action="store_true", help="Auto-apply best config")

    args = parser.parse_args()

    if args.analyze:
        analyze_goap_behavior(args.steps)
    else:
        best = optimize_for_goap(args.iterations, args.steps)

        if args.apply and best:
            # Apply the config
            import shutil
            shutil.copy("config_goap_optimized.json", "config.json")
            print("\n  ✅ Configuration applied!")


if __name__ == "__main__":
    main()