villsim/backend/core/bdi/bdi_agent.py

"""BDI Agent AI that wraps GOAP planning with BDI reasoning.

This module provides the main BDI-based AI decision maker that:
1. Maintains persistent beliefs about the world
2. Manages desires based on personality
3. Commits to intentions (plans) and executes them
4. Uses GOAP planning to generate action sequences

Performance optimizations:
- Timeslicing: full BDI cycle only runs periodically
- Plan persistence: reuses plans across turns
- Cached belief updates: skips unchanged data
"""

from dataclasses import dataclass, field
from typing import Optional, TYPE_CHECKING

from backend.domain.action import ActionType
from backend.domain.resources import ResourceType
from backend.domain.personality import get_trade_price_modifier

from backend.core.bdi.belief import BeliefBase
from backend.core.bdi.desire import DesireManager
from backend.core.bdi.intention import IntentionManager

from backend.core.goap.planner import GOAPPlanner, ReactivePlanner
from backend.core.goap.goals import get_all_goals
from backend.core.goap.actions import get_all_actions

if TYPE_CHECKING:
    from backend.domain.agent import Agent
    from backend.core.market import OrderBook
    from backend.core.goap.goal import Goal
    from backend.core.goap.action import GOAPAction
    from backend.core.goap.planner import Plan


@dataclass
class TradeItem:
    """A single item to buy/sell in a trade."""
    order_id: str
    resource_type: ResourceType
    quantity: int
    price_per_unit: int


@dataclass
class AIDecision:
    """A decision made by the AI for an agent."""
    action: ActionType
    target_resource: Optional[ResourceType] = None
    order_id: Optional[str] = None
    quantity: int = 1
    price: int = 0
    reason: str = ""
    trade_items: list[TradeItem] = field(default_factory=list)
    adjust_order_id: Optional[str] = None
    new_price: Optional[int] = None

    # GOAP/BDI-specific fields
    goal_name: str = ""
    plan_length: int = 0
    bdi_info: dict = field(default_factory=dict)

    def to_dict(self) -> dict:
        return {
            "action": self.action.value,
            "target_resource": self.target_resource.value if self.target_resource else None,
            "order_id": self.order_id,
            "quantity": self.quantity,
            "price": self.price,
            "reason": self.reason,
            "trade_items": [
                {
                    "order_id": t.order_id,
                    "resource_type": t.resource_type.value,
                    "quantity": t.quantity,
                    "price_per_unit": t.price_per_unit,
                }
                for t in self.trade_items
            ],
            "adjust_order_id": self.adjust_order_id,
            "new_price": self.new_price,
            "goal_name": self.goal_name,
            "plan_length": self.plan_length,
            "bdi_info": self.bdi_info,
        }


class BDIAgentAI:
    """BDI-based AI decision maker that wraps GOAP planning.

    The BDI cycle:
    1. Update beliefs from sensors (agent state, market)
    2. Update desires based on beliefs and personality
    3. Check if current intention should continue
    4. If needed, generate new plan via GOAP
    5. Execute next action from intention

    Performance features:
    - Timeslicing: full deliberation only every N turns
    - Plan persistence: reuse plans across turns
    - Reactive fallback: simple decisions when not deliberating
    """

    # Class-level cache for planners (shared across instances)
    _planner_cache: Optional[GOAPPlanner] = None
    _reactive_cache: Optional[ReactivePlanner] = None
    _goals_cache: Optional[list] = None
    _actions_cache: Optional[list] = None

    def __init__(
        self,
        agent: "Agent",
        market: "OrderBook",
        step_in_day: int = 1,
        day_steps: int = 10,
        current_turn: int = 0,
        is_night: bool = False,
        # Persistent BDI state (passed in for continuity)
        beliefs: Optional[BeliefBase] = None,
        desires: Optional[DesireManager] = None,
        intentions: Optional[IntentionManager] = None,
    ):
        self.agent = agent
        self.market = market
        self.step_in_day = step_in_day
        self.day_steps = day_steps
        self.current_turn = current_turn
        self.is_night = is_night

        # Initialize or use existing BDI components
        self.beliefs = beliefs or BeliefBase()
        self.desires = desires or DesireManager(agent.personality)
        self.intentions = intentions or IntentionManager.from_personality(agent.personality)

        # Update beliefs from current state
        self.beliefs.update_from_sensors(
            agent=agent,
            market=market,
            step_in_day=step_in_day,
            day_steps=day_steps,
            current_turn=current_turn,
            is_night=is_night,
        )

        # Update desires from beliefs
        self.desires.update_from_beliefs(self.beliefs)

        # Get cached planners and goals/actions
        self.planner = self._get_planner()
        self.reactive_planner = self._get_reactive_planner()
        self.goals = self._get_goals()
        self.actions = self._get_actions()

        # Personality shortcuts
        self.p = agent.personality
        self.skills = agent.skills

    @classmethod
    def _get_planner(cls) -> GOAPPlanner:
        """Get cached GOAP planner."""
        if cls._planner_cache is None:
            from backend.config import get_config
            config = get_config()
            ai_config = config.ai
            cls._planner_cache = GOAPPlanner(
                max_iterations=ai_config.goap_max_iterations,
            )
        return cls._planner_cache

    @classmethod
    def _get_reactive_planner(cls) -> ReactivePlanner:
        """Get cached reactive planner."""
        if cls._reactive_cache is None:
            cls._reactive_cache = ReactivePlanner()
        return cls._reactive_cache

    @classmethod
    def _get_goals(cls) -> list:
        """Get cached goals list."""
        if cls._goals_cache is None:
            cls._goals_cache = get_all_goals()
        return cls._goals_cache

    @classmethod
    def _get_actions(cls) -> list:
        """Get cached actions list."""
        if cls._actions_cache is None:
            cls._actions_cache = get_all_actions()
        return cls._actions_cache

    @classmethod
    def reset_caches(cls) -> None:
        """Reset all caches (call after config reload)."""
        cls._planner_cache = None
        cls._reactive_cache = None
        cls._goals_cache = None
        cls._actions_cache = None

    def should_deliberate(self) -> bool:
        """Check if this agent should run full BDI deliberation this turn.

        Timeslicing: not every agent deliberates every turn.
        Agents are staggered based on their ID hash.
        """
        from backend.config import get_config
        config = get_config()

        # Get thinking interval from config (default to 1 = every turn)
        bdi_config = getattr(config, 'bdi', None)
        thinking_interval = getattr(bdi_config, 'thinking_interval', 1) if bdi_config else 1

        if thinking_interval <= 1:
            return True  # Deliberate every turn

        # Stagger agents across turns
        agent_hash = hash(self.agent.id) % thinking_interval
        return (self.current_turn % thinking_interval) == agent_hash

    def decide(self) -> AIDecision:
        """Make a decision using BDI reasoning with GOAP planning.

        Decision flow:
        1. Night time: mandatory sleep
        2. Check if should deliberate (timeslicing)
        3. If deliberating: run full BDI cycle
        4. If not: continue current intention or reactive fallback
        """
        # Night time - mandatory sleep
        if self.is_night:
            return AIDecision(
                action=ActionType.SLEEP,
                reason="Night time: sleeping",
                goal_name="Sleep",
                bdi_info={"mode": "night"},
            )

        # Check if we should run full deliberation
        if self.should_deliberate():
            return self._deliberate()
        else:
            return self._continue_or_react()

    def _deliberate(self) -> AIDecision:
        """Run full BDI deliberation cycle."""
        # Filter goals by desires
        filtered_goals = self.desires.filter_goals_by_desire(self.goals, self.beliefs)

        # Check if we should reconsider current intention
        should_replan = self.intentions.should_reconsider(
            beliefs=self.beliefs,
            desire_manager=self.desires,
            available_goals=filtered_goals,
        )

        if not should_replan and self.intentions.has_intention():
            # Continue with current intention
            action = self.intentions.get_next_action()
            if action:
                return self._convert_to_decision(
                    goap_action=action,
                    goal=self.intentions.current_intention.goal,
                    plan=self.intentions.current_intention.plan,
                    mode="continue",
                )

        # Need to plan for a goal
        world_state = self.beliefs.to_world_state()

        plan = self.planner.plan_for_goals(
            initial_state=world_state,
            goals=filtered_goals,
            available_actions=self.actions,
        )

        if plan and not plan.is_empty:
            # Commit to new intention
            self.intentions.commit_to_plan(
                goal=plan.goal,
                plan=plan,
                current_turn=self.current_turn,
            )

            goap_action = plan.first_action
            return self._convert_to_decision(
                goap_action=goap_action,
                goal=plan.goal,
                plan=plan,
                mode="new_plan",
            )

        # Fallback to reactive planning
        return self._reactive_fallback()

    def _continue_or_react(self) -> AIDecision:
        """Continue current intention or use reactive fallback (no deliberation)."""
        if self.intentions.has_intention():
            action = self.intentions.get_next_action()
            if action:
                return self._convert_to_decision(
                    goap_action=action,
                    goal=self.intentions.current_intention.goal,
                    plan=self.intentions.current_intention.plan,
                    mode="timeslice_continue",
                )

        # No intention, use reactive fallback
        return self._reactive_fallback()

    def _reactive_fallback(self) -> AIDecision:
        """Use reactive planning when no intention exists."""
        world_state = self.beliefs.to_world_state()

        best_action = self.reactive_planner.select_best_action(
            state=world_state,
            goals=self.goals,
            available_actions=self.actions,
        )

        if best_action:
            return self._convert_to_decision(
                goap_action=best_action,
                goal=None,
                plan=None,
                mode="reactive",
            )

        # Ultimate fallback - rest
        return AIDecision(
            action=ActionType.REST,
            reason="No valid action found, resting",
            bdi_info={"mode": "fallback"},
        )

    def _convert_to_decision(
        self,
        goap_action: "GOAPAction",
        goal: Optional["Goal"],
        plan: Optional["Plan"],
        mode: str = "deliberate",
    ) -> AIDecision:
        """Convert a GOAP action to an AIDecision with proper parameters."""
        action_type = goap_action.action_type
        target_resource = goap_action.target_resource

        # Build reason string
        if goal:
            reason = f"{goal.name}: {goap_action.name}"
        else:
            reason = f"Reactive: {goap_action.name}"

        # BDI debug info
        bdi_info = {
            "mode": mode,
            "dominant_desire": self.desires.dominant_desire.value if self.desires.dominant_desire else None,
            "commitment": self.intentions.commitment_strategy.value,
            "has_intention": self.intentions.has_intention(),
        }

        # Handle different action types
        if action_type == ActionType.CONSUME:
            return AIDecision(
                action=action_type,
                target_resource=target_resource,
                reason=reason,
                goal_name=goal.name if goal else "",
                plan_length=len(plan.actions) if plan else 0,
                bdi_info=bdi_info,
            )

        elif action_type == ActionType.TRADE:
            return self._create_trade_decision(goap_action, goal, plan, reason, bdi_info)

        elif action_type in [ActionType.HUNT, ActionType.GATHER, ActionType.CHOP_WOOD,
                            ActionType.GET_WATER, ActionType.WEAVE]:
            return AIDecision(
                action=action_type,
                target_resource=target_resource,
                reason=reason,
                goal_name=goal.name if goal else "",
                plan_length=len(plan.actions) if plan else 0,
                bdi_info=bdi_info,
            )

        elif action_type == ActionType.BUILD_FIRE:
            return AIDecision(
                action=action_type,
                target_resource=ResourceType.WOOD,
                reason=reason,
                goal_name=goal.name if goal else "",
                plan_length=len(plan.actions) if plan else 0,
                bdi_info=bdi_info,
            )

        elif action_type in [ActionType.REST, ActionType.SLEEP]:
            return AIDecision(
                action=action_type,
                reason=reason,
                goal_name=goal.name if goal else "",
                plan_length=len(plan.actions) if plan else 0,
                bdi_info=bdi_info,
            )

        # Default case
        return AIDecision(
            action=action_type,
            target_resource=target_resource,
            reason=reason,
            goal_name=goal.name if goal else "",
            plan_length=len(plan.actions) if plan else 0,
            bdi_info=bdi_info,
        )

    def _create_trade_decision(
        self,
        goap_action: "GOAPAction",
        goal: Optional["Goal"],
        plan: Optional["Plan"],
        reason: str,
        bdi_info: dict,
    ) -> AIDecision:
        """Create a trade decision with actual market parameters."""
        target_resource = goap_action.target_resource
        action_name = goap_action.name.lower()

        if "buy" in action_name:
            # Find the best order to buy from
            order = self.market.get_cheapest_order(target_resource)

            if order and order.seller_id != self.agent.id:
                # Check trust for this seller
                trust = self.beliefs.get_trade_trust(order.seller_id)

                # Skip distrusted sellers if we're picky
                if trust < -0.5 and self.p.price_sensitivity > 1.2:
                    # Try next cheapest? For now, fall back to gathering
                    return self._create_gather_fallback(target_resource, reason, goal, plan, bdi_info)

                # Calculate quantity to buy
                # Use max(1, ...) to avoid division by zero
                can_afford = self.agent.money // max(1, order.price_per_unit)
                space = self.agent.inventory_space()
                quantity = min(2, can_afford, space, order.quantity)

                if quantity > 0:
                    return AIDecision(
                        action=ActionType.TRADE,
                        target_resource=target_resource,
                        order_id=order.id,
                        quantity=quantity,
                        price=order.price_per_unit,
                        reason=f"{reason} @ {order.price_per_unit}c",
                        goal_name=goal.name if goal else "",
                        plan_length=len(plan.actions) if plan else 0,
                        bdi_info=bdi_info,
                    )

            # Can't buy - fallback to gathering
            return self._create_gather_fallback(target_resource, reason, goal, plan, bdi_info)

        elif "sell" in action_name:
            # Create a sell order
            quantity_available = self.agent.get_resource_count(target_resource)

            # Calculate minimum to keep
            min_keep = self._get_min_keep(target_resource)
            quantity_to_sell = min(3, quantity_available - min_keep)

            if quantity_to_sell > 0:
                price = self._calculate_sell_price(target_resource)

                return AIDecision(
                    action=ActionType.TRADE,
                    target_resource=target_resource,
                    quantity=quantity_to_sell,
                    price=price,
                    reason=f"{reason} @ {price}c",
                    goal_name=goal.name if goal else "",
                    plan_length=len(plan.actions) if plan else 0,
                    bdi_info=bdi_info,
                )

        # Invalid trade action - rest
        return AIDecision(
            action=ActionType.REST,
            reason="Trade not possible",
            bdi_info=bdi_info,
        )

    def _create_gather_fallback(
        self,
        resource_type: ResourceType,
        reason: str,
        goal: Optional["Goal"],
        plan: Optional["Plan"],
        bdi_info: dict,
    ) -> AIDecision:
        """Create a gather action as fallback when buying isn't possible."""
        action_map = {
            ResourceType.WATER: ActionType.GET_WATER,
            ResourceType.BERRIES: ActionType.GATHER,
            ResourceType.MEAT: ActionType.HUNT,
            ResourceType.WOOD: ActionType.CHOP_WOOD,
        }

        action = action_map.get(resource_type, ActionType.GATHER)

        return AIDecision(
            action=action,
            target_resource=resource_type,
            reason=f"{reason} (gathering instead)",
            goal_name=goal.name if goal else "",
            plan_length=len(plan.actions) if plan else 0,
            bdi_info=bdi_info,
        )

    def _get_min_keep(self, resource_type: ResourceType) -> int:
        """Get minimum quantity to keep for survival."""
        # Adjusted by hoarding rate from desires
        hoarding_mult = 0.5 + self.p.hoarding_rate

        base_min = {
            ResourceType.WATER: 2,
            ResourceType.MEAT: 1,
            ResourceType.BERRIES: 2,
            ResourceType.WOOD: 1,
            ResourceType.HIDE: 0,
        }

        return int(base_min.get(resource_type, 1) * hoarding_mult)

    def _calculate_sell_price(self, resource_type: ResourceType) -> int:
        """Calculate sell price based on fair value and market conditions."""
        from backend.core.ai import get_energy_cost
        from backend.config import get_config

        config = get_config()
        economy = getattr(config, 'economy', None)
        energy_to_money_ratio = getattr(economy, 'energy_to_money_ratio', 150) if economy else 150
        min_price = getattr(economy, 'min_price', 100) if economy else 100

        energy_cost = get_energy_cost(resource_type)
        fair_value = max(min_price, int(round(energy_cost * energy_to_money_ratio)))

        # Apply trading skill
        sell_modifier = get_trade_price_modifier(self.skills.trading, is_buying=False)

        # Get market signal
        signal = self.market.get_market_signal(resource_type)

        if signal == "sell":  # Scarcity
            price = int(round(fair_value * 1.3 * sell_modifier))
        elif signal == "hold":
            price = int(round(fair_value * sell_modifier))
        else:  # Surplus
            cheapest = self.market.get_cheapest_order(resource_type)
            if cheapest and cheapest.seller_id != self.agent.id:
                # Undercut, but respect floor (80% of fair value or min_price)
                floor_price = max(min_price, int(round(fair_value * 0.8)))
                price = max(floor_price, cheapest.price_per_unit - 1)
            else:
                price = int(round(fair_value * 0.8 * sell_modifier))

        return max(min_price, price)

    def record_action_result(self, success: bool, action_type: str) -> None:
        """Record the result of an action for learning and intention tracking."""
        # Update intention
        self.intentions.advance_intention(success)

        # Update beliefs/memory
        if success:
            self.beliefs.record_successful_action(action_type)
        else:
            self.beliefs.record_failed_action(action_type)


# Persistent BDI state storage for agents
_agent_bdi_state: dict[str, tuple[BeliefBase, DesireManager, IntentionManager]] = {}


def get_bdi_decision(
    agent: "Agent",
    market: "OrderBook",
    step_in_day: int = 1,
    day_steps: int = 10,
    current_turn: int = 0,
    is_night: bool = False,
) -> AIDecision:
    """Get a BDI-based AI decision for an agent.

    This is the main entry point for the BDI AI system.
    It maintains persistent BDI state for each agent.
    """
    # Get or create persistent BDI state
    if agent.id not in _agent_bdi_state:
        beliefs = BeliefBase()
        desires = DesireManager(agent.personality)
        intentions = IntentionManager.from_personality(agent.personality)
        _agent_bdi_state[agent.id] = (beliefs, desires, intentions)
    else:
        beliefs, desires, intentions = _agent_bdi_state[agent.id]

    # Create AI instance with persistent state
    ai = BDIAgentAI(
        agent=agent,
        market=market,
        step_in_day=step_in_day,
        day_steps=day_steps,
        current_turn=current_turn,
        is_night=is_night,
        beliefs=beliefs,
        desires=desires,
        intentions=intentions,
    )

    return ai.decide()


def reset_bdi_state() -> None:
    """Reset all BDI state (call on simulation reset)."""
    global _agent_bdi_state
    _agent_bdi_state.clear()
    BDIAgentAI.reset_caches()


def remove_agent_bdi_state(agent_id: str) -> None:
    """Remove BDI state for a specific agent (call on agent death)."""
    _agent_bdi_state.pop(agent_id, None)