"""Creative fatigue detection based on sustained terminal decline patterns.

This module focuses on identifying when creative assets have genuinely exhausted their
audience and need replacement, rather than detecting temporary fluctuations or normal
volatility during audience exploration.

Key Concept:
- Fatigue = sustained, irreversible decline after audience exhaustion
- NOT fatigue = temporary dips, volatility, or recoverable declines
"""

from datetime import timedelta
from typing import Dict, Any, Optional

import numpy as np
import pandas as pd


def detect_terminal_decline(
    time_series: pd.DataFrame,
    metric: str,
    benchmark: float,
    min_decline_days: int = 14,
    performance_threshold: float = 0.9,
    volatility_threshold: float = 0.15,
) -> Dict[str, Any]:
    """Detect sustained terminal decline indicating genuine creative fatigue.
    
    Identifies when a creative asset has entered an irreversible decline phase,
    indicating audience exhaustion and need for replacement. Distinguishes between:
    - Temporary dips (recovers) vs terminal decline (doesn't recover)
    - Healthy volatility (audience exploration) vs exhaustion (low volatility + poor performance)
    - Fluctuating performance vs sustained decline
    
    Args:
        time_series: Time series data with 'day' and metric columns, sorted by date
        metric: Metric name to analyse ('ctr' or 'cpc')
        benchmark: Benchmark value for the metric
        min_decline_days: Minimum consecutive days of decline to consider terminal (default: 14)
        performance_threshold: Performance ratio threshold vs benchmark (default: 0.9 = 90%)
        volatility_threshold: CV threshold for "low volatility" detection (default: 0.15)
    
    Returns:
        Dictionary with fatigue detection results:
            - is_terminal_decline (bool): True if creative shows terminal decline
            - decline_duration_days (int): Number of days in sustained decline
            - average_performance_ratio (float): Avg performance vs benchmark in decline period
            - recent_volatility (float): Coefficient of variation in recent period
            - audience_exhausted (bool): Low volatility + poor performance = exhausted
            - replacement_recommended (bool): Strong recommendation to replace creative
            - decline_start_date (pd.Timestamp | None): When terminal decline began
            - confidence (str): 'High', 'Medium', 'Low' confidence in detection
            - reason (str): Human-readable explanation
    """
    
    if len(time_series) < min_decline_days:
        return {
            "is_terminal_decline": False,
            "decline_duration_days": 0,
            "average_performance_ratio": 1.0,
            "recent_volatility": 0.0,
            "audience_exhausted": False,
            "replacement_recommended": False,
            "decline_start_date": None,
            "confidence": "Low",
            "reason": "Insufficient data for terminal decline detection",
        }
    
    # Get recent period (last min_decline_days)
    recent_data = time_series.tail(min_decline_days).copy()
    
    if len(recent_data) < min_decline_days or metric not in recent_data.columns:
        return {
            "is_terminal_decline": False,
            "decline_duration_days": 0,
            "average_performance_ratio": 1.0,
            "recent_volatility": 0.0,
            "audience_exhausted": False,
            "replacement_recommended": False,
            "decline_start_date": None,
            "confidence": "Low",
            "reason": "Insufficient recent data",
        }
    
    metric_values = recent_data[metric].dropna()
    
    if len(metric_values) < min_decline_days:
        return {
            "is_terminal_decline": False,
            "decline_duration_days": 0,
            "average_performance_ratio": 1.0,
            "recent_volatility": 0.0,
            "audience_exhausted": False,
            "replacement_recommended": False,
            "decline_start_date": None,
            "confidence": "Low",
            "reason": "Too many missing values in recent period",
        }
    
    # Calculate performance ratio vs benchmark
    if metric == "cpc":
        # For CPC, lower is better, so ratio > 1.0 means worse than benchmark
        performance_ratio = metric_values.mean() / benchmark if benchmark > 0 else 1.0
        is_below_benchmark = performance_ratio > (1.0 / performance_threshold)
    else:  # CTR
        # For CTR, higher is better, so ratio < 1.0 means worse than benchmark
        performance_ratio = metric_values.mean() / benchmark if benchmark > 0 else 1.0
        is_below_benchmark = performance_ratio < performance_threshold
    
    # Calculate recent volatility (coefficient of variation)
    mean_val = metric_values.mean()
    std_val = metric_values.std()
    recent_volatility = (std_val / mean_val) if mean_val > 0 else 0.0
    
    # Check for sustained decline: all recent values below benchmark
    if metric == "cpc":
        sustained_poor = (metric_values > benchmark).sum() >= (len(metric_values) * 0.7)
    else:  # CTR
        sustained_poor = (metric_values < benchmark).sum() >= (len(metric_values) * 0.7)
    
    # Check for trend (negative slope indicates decline for CTR, positive for CPC)
    try:
        x = np.arange(len(metric_values))
        slope, _ = np.polyfit(x, metric_values, 1)
        
        if metric == "cpc":
            has_negative_trend = slope > 0  # Increasing CPC = declining performance
        else:  # CTR
            has_negative_trend = slope < 0  # Decreasing CTR = declining performance
    except:
        has_negative_trend = False
    
    # Detect terminal decline: sustained poor performance + declining trend
    is_terminal = sustained_poor and has_negative_trend and is_below_benchmark
    
    # Audience exhaustion: low volatility + poor performance
    # (High volatility = still exploring audiences, Low volatility = exploration exhausted)
    audience_exhausted = is_below_benchmark and recent_volatility < volatility_threshold
    
    # Strong replacement recommendation if both conditions met
    replacement_recommended = is_terminal and audience_exhausted
    
    # Confidence assessment
    if replacement_recommended:
        confidence = "High"
        reason = f"Sustained decline for {min_decline_days}+ days with low volatility - audience exhausted"
    elif is_terminal:
        confidence = "Medium"
        reason = f"Sustained decline detected but volatility suggests audience search ongoing"
    elif audience_exhausted:
        confidence = "Medium"
        reason = "Low volatility with poor performance - may indicate exhaustion"
    else:
        confidence = "Low"
        if not is_below_benchmark:
            reason = "Performance within acceptable range"
        elif recent_volatility >= volatility_threshold:
            reason = "High volatility indicates active audience exploration (healthy)"
        else:
            reason = "No sustained decline pattern detected"
    
    # Find decline start date
    decline_start_date = None
    if is_terminal:
        decline_start_date = recent_data["day"].iloc[0]
    
    return {
        "is_terminal_decline": bool(is_terminal),
        "decline_duration_days": int(len(recent_data)) if is_terminal else 0,
        "average_performance_ratio": float(performance_ratio),
        "recent_volatility": float(recent_volatility),
        "audience_exhausted": bool(audience_exhausted),
        "replacement_recommended": bool(replacement_recommended),
        "decline_start_date": decline_start_date,
        "confidence": confidence,
        "reason": reason,
    }