#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Reporting and Visualization Generator for CreativeDynamics Library

Generates detailed HTML and CSV reports with visualization plots for
advertising creative performance analysis, wastage detection, and trend analysis.
"""

import logging
import os
from datetime import datetime, timedelta
from typing import Any, Dict, List, Optional, Tuple, Union

import matplotlib.dates as mdates
import matplotlib.pyplot as plt
import seaborn as sns
from creativedynamics.reporting.plot_styles import (
    apply_professional_style,
    get_colour,
    style_axis,
    create_professional_legend,
    COLOUR_PALETTES,
)
import numpy as np
import pandas as pd

# Get logger using centralized logging configuration
reporting_logger = logging.getLogger("CreativeDynamics.Reporting.Generator")


def safe_filename(filename: Union[str, int, float]) -> str:
    """Converts a string to a safe filename format.

    Replaces characters commonly disallowed or problematic in filenames across
    different operating systems with underscores or alternatives.

    Args:
        filename: The original string, int, or float (e.g., an item ID).

    Returns:
        A sanitized string suitable for use as part of a filename.
    """
    # Convert to string first to handle numeric IDs
    filename_str = str(filename)
    
    safe = (
        filename_str.replace("/", "_")
        .replace("\\", "_")
        .replace(":", "_")
        .replace("*", "_")
    )
    safe = safe.replace("?", "_").replace('"', "_").replace("<", "_").replace(">", "_")
    safe = safe.replace("|", "_").replace("&", "and")
    safe = safe.strip()
    reporting_logger.debug(f"Sanitized filename: '{filename}' -> '{safe}'")
    return safe


def plot_item_analysis(
    item_id: str,
    time_series: pd.DataFrame,
    metric: str = "CTR",
    change_points: Optional[List[Any]] = None,
    distances: Optional[List[Tuple[Any, float]]] = None,
    threshold: Optional[float] = None,
    segment_trends: Optional[List[Tuple[Any, Any, str]]] = None,
    output_dir: Optional[str] = None,
    wastage_analysis: Optional[Dict[str, Any]] = None,
    show_changepoints: bool = True,
) -> Optional[str]:
    """Generates and saves a plot visualizing the time-series analysis for an item.

    Creates a two-panel plot:

    1.  Top panel: Shows the specified metric over time, with detected change
        points marked and segment trends indicated by background shading and text.
        For CTR metrics with wastage analysis, also shows benchmark lines and
        highlights wastage periods.
    2.  Bottom panel: Shows the calculated signature distances between sliding
        windows over time, with the detection threshold line and points exceeding
        the threshold highlighted. This function was formerly `plot_creative_analysis`.

    Args:
        item_id: Identifier for the item/entity being plotted.
        time_series: Time series data for the item. Must contain
            'Day' and the specified metric column.
        metric: The metric being plotted. Defaults to 'CTR'.
        change_points: List of pd.Timestamp objects indicating
            detected change points. Defaults to None.
        distances: List of (pd.Timestamp, float) tuples for
            signature distances. Defaults to None.
        threshold: The distance threshold value. Defaults to None.
        segment_trends: List of
            (pd.Timestamp, pd.Timestamp, str) tuples indicating segment trends.
            Defaults to None.
        output_dir: Directory to save the plot PNG file. If None,
            defaults to 'output/plots' relative to the current working directory.
            The directory will be created if it doesn't exist.
        wastage_analysis: Dictionary containing wastage analysis
            results including benchmark values, periods, and wastage calculations.
            Used for enhanced CTR wastage visualization. Defaults to None.

    Returns:
        The absolute path to the saved plot file if successful, otherwise None.
    """
    reporting_logger.info(
        f"Creating analysis plot for item: {item_id}, metric: {metric}"
    )

    if time_series.empty or metric not in time_series.columns:
        warning_msg = (
            f"Cannot plot for {item_id} - metric '{metric}' missing or data empty."
        )
        reporting_logger.warning(warning_msg)
        return None

    reporting_logger.debug(
        f"Plot data shape: {time_series.shape}, columns: {time_series.columns.tolist()}"
    )

    # Apply professional styling
    apply_professional_style()
    
    fig, (ax1, ax2) = plt.subplots(
        2, 1, figsize=(14, 10), sharex=True, gridspec_kw={"height_ratios": [2, 1]}
    )
    
    # Set figure background
    fig.patch.set_facecolor(get_colour("background", "figure"))
    
    # Enhanced main metric plot with professional styling
    ax1.plot(
        time_series["day"],
        time_series[metric],
        color=get_colour("primary", "blue"),
        linewidth=2.5,
        marker="o",
        markersize=5,
        markeredgewidth=0.5,
        markeredgecolor="white",
        markerfacecolor=get_colour("primary", "blue"),
        alpha=0.9,
        label=metric,
        zorder=3,
    )

    # Add CTR wastage visualization if available
    if wastage_analysis and metric.lower() == "ctr":
        reporting_logger.debug("Adding CTR wastage visualization")
        # Add CTR benchmark line if available
        if "ctr_benchmark" in wastage_analysis and not pd.isna(
            wastage_analysis["ctr_benchmark"]
        ):
            ctr_benchmark = wastage_analysis["ctr_benchmark"]
            ax1.axhline(
                y=ctr_benchmark,
                color=get_colour("status", "success"),
                linestyle="--",
                alpha=0.85,
                linewidth=2.5,
                label=f"CTR Benchmark ({ctr_benchmark:.3f})",
                zorder=2,
            )

            # Highlight benchmark period if available
            if (
                "ctr_benchmark_period_start" in wastage_analysis
                and "ctr_benchmark_period_end" in wastage_analysis
                and wastage_analysis["ctr_benchmark_period_start"]
                and wastage_analysis["ctr_benchmark_period_end"]
            ):
                try:
                    benchmark_start = pd.to_datetime(
                        wastage_analysis["ctr_benchmark_period_start"]
                    )
                    benchmark_end = pd.to_datetime(
                        wastage_analysis["ctr_benchmark_period_end"]
                    )
                    ax1.axvspan(
                        benchmark_start,
                        benchmark_end,
                        facecolor=get_colour("status", "success"),
                        alpha=0.12,
                        edgecolor="none",
                        zorder=0,
                        label="Benchmark Period",
                    )
                    reporting_logger.debug(
                        f"Added benchmark period: {benchmark_start} to {benchmark_end}"
                    )
                except (ValueError, TypeError) as e:
                    reporting_logger.warning(
                        f"Could not parse benchmark period dates: {e}"
                    )

        # Highlight CTR wastage periods (declining periods after benchmark)
        if (
            "ctr_benchmark_period_end" in wastage_analysis
            and wastage_analysis["ctr_benchmark_period_end"]
            and "ctr_benchmark" in wastage_analysis
            and not pd.isna(wastage_analysis["ctr_benchmark"])
        ):
            try:
                benchmark_end = pd.to_datetime(
                    wastage_analysis["ctr_benchmark_period_end"]
                )
                ctr_benchmark = wastage_analysis["ctr_benchmark"]

                # Find periods after benchmark where CTR is below benchmark (wastage periods)
                post_benchmark_data = time_series[time_series["day"] > benchmark_end]
                if not post_benchmark_data.empty:
                    wastage_periods = post_benchmark_data[
                        post_benchmark_data[metric] < ctr_benchmark
                    ]
                    if not wastage_periods.empty:
                        reporting_logger.info(
                            f"Found {len(wastage_periods)} wastage periods for {item_id}"
                        )
                        # Group consecutive wastage days
                        wastage_dates = wastage_periods["day"].tolist()
                        if wastage_dates:
                            # Simple highlighting of wastage periods
                            for date in wastage_dates:
                                ax1.axvline(
                                    x=date,
                                    color=get_colour("status", "danger"),
                                    alpha=0.25,
                                    linewidth=0.8,
                                    zorder=0,
                                )

                            # Add wastage area fill
                            wastage_ctr = wastage_periods[metric].values
                            ax1.fill_between(
                                wastage_dates,
                                wastage_ctr,
                                ctr_benchmark,
                                where=(wastage_ctr < ctr_benchmark),
                                color=get_colour("status", "danger"),
                                alpha=0.15,
                                interpolate=True,
                                label="Engagement Gap",
                                zorder=1,
                            )
            except (ValueError, TypeError) as e:
                reporting_logger.warning(f"Could not create wastage visualization: {e}")

    if segment_trends:
        reporting_logger.debug(f"Adding segment trends: {len(segment_trends)} segments")
        y_min, y_max = ax1.get_ylim()
        if pd.isna(y_min) or pd.isna(y_max) or y_min == y_max:
            y_range = 1.0
            text_y_position = (y_min or 0) + 0.95
        else:
            y_range = y_max - y_min
            text_y_position = y_max - y_range * 0.05

        trend_color_map = {
            "improving": "green",
            "declining": "red",
            "stable": "blue",
            "short_segment": "grey",
            "error": "orange",
            "insufficient_data": "grey",
        }
        trend_alpha = 0.1
        text_alpha = 0.9

        # Render background shading for each segment (no text labels)
        for start_date, end_date, trend in segment_trends:
            color = trend_color_map.get(trend, "grey")
            ax1.axvspan(
                start_date, end_date, facecolor=color, alpha=trend_alpha, zorder=0
            )
        
        # Create a clean legend for segment trends instead of overlapping text
        from matplotlib.patches import Patch
        legend_elements = [
            Patch(facecolor='green', alpha=0.3, edgecolor='darkgreen', linewidth=1, label='Improving Trend'),
            Patch(facecolor='red', alpha=0.3, edgecolor='darkred', linewidth=1, label='Declining Trend'),
            Patch(facecolor='blue', alpha=0.3, edgecolor='darkblue', linewidth=1, label='Stable Trend'),
        ]
        ax1.legend(
            handles=legend_elements,
            loc='upper right',
            framealpha=0.95,
            edgecolor=get_colour("neutral", "border"),
            fancybox=True,
            fontsize=8,
            title="Trend Segments",
            title_fontsize=9
        )
        
        # Skip the old text rendering code
        if False:  # Disabled
            if (
                isinstance(start_date, pd.Timestamp)
                and isinstance(end_date, pd.Timestamp)
                and start_date <= end_date
            ):
                mid_date = start_date + (end_date - start_date) / 2
                ax1.text(
                    mid_date,
                    text_y_position,
                    trend.upper(),
                    horizontalalignment="center",
                    verticalalignment="top",
                    fontsize=9,
                    color=color,
                    alpha=text_alpha,
                    weight="bold",
                    bbox=dict(
                        boxstyle="round,pad=0.2",
                        facecolor="white",
                        alpha=0.7,
                        edgecolor="none",
                    ),
                )

    # Enhanced title with performance information
    title = f"{metric} Over Time for Item: {item_id}"
    if wastage_analysis and metric.lower() == "ctr":
        if "engagement_lost_clicks" in wastage_analysis and not pd.isna(
            wastage_analysis["engagement_lost_clicks"]
        ):
            engagement_gap = wastage_analysis["engagement_lost_clicks"]
            reference_value = wastage_analysis.get("reference_value_gbp", 0)
            title += f" (Gap: {engagement_gap:,} clicks, Ref: £{reference_value:.2f})"

    # Apply professional axis styling
    style_axis(ax1, title=title, ylabel=metric, title_size=15, show_grid=True)
    
    # Create professional legend
    create_professional_legend(ax1, loc="best", framealpha=0.95)

    # Only show changepoints if enabled (disabled for terminal decline focus)
    if change_points and show_changepoints:
        reporting_logger.debug(f"Adding change points: {len(change_points)} points")
        current_y_lim = ax1.get_ylim()
        text_y = current_y_lim[1] * 0.98 if not np.isnan(current_y_lim[1]) else 0
        for cp in change_points:
            ax1.axvline(
                x=cp, 
                color=get_colour("status", "danger"), 
                linestyle="--", 
                alpha=0.7, 
                linewidth=1.5,
                zorder=1
            )
            ax1.text(
                cp + timedelta(days=0.5),
                text_y,
                "CHANGE POINT",
                rotation=90,
                verticalalignment="top",
                fontsize=8,
                color=get_colour("status", "danger"),
                alpha=0.85,
                weight="semibold",
                bbox=dict(
                    boxstyle="round,pad=0.3",
                    facecolor="white",
                    edgecolor=get_colour("status", "danger"),
                    alpha=0.9,
                    linewidth=1.5
                ),
            )

    if distances:
        reporting_logger.debug(f"Adding signature distances: {len(distances)} points")
        if distances:
            dates, dist_values = zip(*distances)
            plot_dates = [
                d for d, dist in zip(dates, dist_values) if not np.isnan(dist)
            ]
            plot_dist_values = [dist for dist in dist_values if not np.isnan(dist)]
        else:
            plot_dates, plot_dist_values = [], []

        if plot_dates:
            ax2.plot(
                plot_dates,
                plot_dist_values,
                color=get_colour("status", "success"),
                linewidth=2.0,
                marker=".",
                markersize=5,
                alpha=0.85,
                label="Signature Distance",
                zorder=2,
            )
            if threshold is not None and not np.isnan(threshold):
                exceed_dates = [
                    d
                    for d, dist in zip(plot_dates, plot_dist_values)
                    if dist > threshold
                ]
                exceed_values = [dist for dist in plot_dist_values if dist > threshold]
                if exceed_dates:
                    ax2.plot(
                        exceed_dates,
                        exceed_values,
                        marker="x",
                        color=get_colour("status", "danger"),
                        markersize=8,
                        markeredgewidth=2,
                        linestyle="None",
                        label="Exceeds Threshold",
                        zorder=3,
                    )
                    reporting_logger.info(
                        f"Found {len(exceed_dates)} points exceeding threshold for {item_id}"
                    )
                ax2.axhline(
                    y=threshold,
                    color=get_colour("status", "danger"),
                    linestyle="--",
                    linewidth=2,
                    alpha=0.85,
                    label=f"Threshold ({threshold:.2f})",
                    zorder=1,
                )
            if all(d >= 0 for d in plot_dist_values):
                ax2.set_ylim(bottom=0)
        # Apply professional styling to distance plot
        style_axis(ax2, 
                  title="Signature Distance Between Consecutive Windows",
                  xlabel="Date",
                  ylabel="Distance",
                  title_size=12,
                  show_grid=True)
        create_professional_legend(ax2, loc="best", framealpha=0.95)

    ax2.xaxis.set_major_formatter(mdates.DateFormatter("%Y-%m-%d"))  # type: ignore[no-untyped-call]
    plt.xticks(rotation=45)
    plt.tight_layout()

    # Create plots subfolder within the provided output directory
    if output_dir is not None:
        save_dir = os.path.join(output_dir, "plots")
    else:
        save_dir = os.path.join(os.getcwd(), "output", "plots")

    output_file_path = None
    try:
        os.makedirs(save_dir, exist_ok=True)
        safe_id = safe_filename(item_id)
        output_file_path = os.path.join(save_dir, f"{safe_id}_{metric}_analysis.jpg")
        plt.savefig(
            output_file_path,
            dpi=200,
            bbox_inches='tight',
            facecolor='white',
            edgecolor='none',
        )
        reporting_logger.info(f"Plot saved successfully: {output_file_path}")
    except Exception as e:
        error_msg = f"Could not save plot to {save_dir}: {e}"
        reporting_logger.error(error_msg)
        output_file_path = None

    plt.close(fig)
    return output_file_path


def plot_combined_wastage_analysis(
    item_id: str,
    time_series: pd.DataFrame,
    cpc_wastage_analysis: Optional[Dict[str, Any]] = None,
    ctr_wastage_analysis: Optional[Dict[str, Any]] = None,
    output_dir: Optional[str] = None,
) -> Optional[str]:
    """Generates a combined CPC+CTR wastage breakdown plot with multi-panel visualization.

    Creates a 4-panel plot showing:
    1. CPC over time with benchmark and wastage highlighting
    2. CTR over time with benchmark and wastage highlighting
    3. Combined wastage breakdown (pie chart or bar chart)
    4. Wastage timeline showing both CPC and CTR wastage over time

    Args:
        item_id: Identifier for the item being plotted.
        time_series: Time series data containing both CPC and CTR columns.
        cpc_wastage_analysis: CPC wastage analysis results.
        ctr_wastage_analysis: CTR wastage analysis results.
        output_dir: Directory to save the plot PNG file.

    Returns:
        Path to saved plot file if successful, otherwise None.
    """
    reporting_logger.info(
        f"Creating combined wastage analysis plot for item: {item_id}"
    )

    if time_series.empty:
        warning_msg = f"Cannot create combined wastage plot for {item_id} - data empty."
        reporting_logger.warning(warning_msg)
        return None

    if not cpc_wastage_analysis and not ctr_wastage_analysis:
        warning_msg = f"No wastage analysis data available for {item_id}."
        reporting_logger.warning(warning_msg)
        return None

    reporting_logger.debug(f"Combined plot data shape: {time_series.shape}")

    # Apply professional styling
    apply_professional_style()
    
    fig, ((ax1, ax2), (ax3, ax4)) = plt.subplots(2, 2, figsize=(16, 12))
    fig.patch.set_facecolor(get_colour("background", "figure"))
    
    fig.suptitle(
        f"Performance and Wastage Analysis for Item: {item_id}",
        fontsize=18,
        fontweight="bold",
        color=COLOUR_PALETTES["typography"]["title"]["colour"] if "typography" in COLOUR_PALETTES else "#2C3E50",
        y=0.995,
    )

    # Panel 1: CPC Analysis
    if "cpc" in time_series.columns and cpc_wastage_analysis:
        reporting_logger.debug("Adding CPC analysis panel")
        ax1.plot(
            time_series["day"],
            time_series["cpc"],
            color=get_colour("primary", "blue"),
            linewidth=2.0,
            marker="o",
            markersize=4,
            markeredgewidth=0.5,
            markeredgecolor="white",
            alpha=0.9,
            label="CPC",
            zorder=3,
        )

        if "benchmark_cpc" in cpc_wastage_analysis and not pd.isna(
            cpc_wastage_analysis["benchmark_cpc"]
        ):
            cpc_benchmark = cpc_wastage_analysis["benchmark_cpc"]
            ax1.axhline(
                y=cpc_benchmark,
                color=get_colour("status", "success"),
                linestyle="--",
                alpha=0.85,
                linewidth=2.5,
                label=f"Benchmark (£{cpc_benchmark:.3f})",
                zorder=2,
            )

            # Highlight benchmark period
            if (
                "benchmark_period_start" in cpc_wastage_analysis
                and "benchmark_period_end" in cpc_wastage_analysis
            ):
                try:
                    benchmark_start = pd.to_datetime(
                        cpc_wastage_analysis["benchmark_period_start"]
                    )
                    benchmark_end = pd.to_datetime(
                        cpc_wastage_analysis["benchmark_period_end"]
                    )
                    ax1.axvspan(
                        benchmark_start,
                        benchmark_end,
                        facecolor=get_colour("status", "success"),
                        alpha=0.12,
                        edgecolor="none",
                        zorder=0,
                        label="Benchmark Period",
                    )
                except (ValueError, TypeError) as e:
                    reporting_logger.warning(
                        f"Could not parse CPC benchmark period: {e}"
                    )

        actual_wastage = cpc_wastage_analysis.get("actual_wastage_gbp", 0)
        style_axis(ax1,
                  title=f"CPC Analysis (Wastage: £{actual_wastage:.2f})",
                  ylabel="CPC (£)",
                  title_size=13,
                  show_grid=True)
        create_professional_legend(ax1, loc="best", framealpha=0.95)
    else:
        ax1.text(
            0.5,
            0.5,
            "CPC Data Not Available",
            ha="center",
            va="center",
            transform=ax1.transAxes,
        )
        ax1.set_title("CPC Analysis")

    # Panel 2: CTR Analysis
    if "ctr" in time_series.columns and ctr_wastage_analysis:
        reporting_logger.debug("Adding CTR analysis panel")
        ax2.plot(
            time_series["day"],
            time_series["ctr"],
            color=get_colour("primary", "red"),
            linewidth=2.0,
            marker="o",
            markersize=4,
            markeredgewidth=0.5,
            markeredgecolor="white",
            alpha=0.9,
            label="CTR",
            zorder=3,
        )

        if "ctr_benchmark" in ctr_wastage_analysis and not pd.isna(
            ctr_wastage_analysis["ctr_benchmark"]
        ):
            ctr_benchmark = ctr_wastage_analysis["ctr_benchmark"]
            ax2.axhline(
                y=ctr_benchmark,
                color=get_colour("status", "success"),
                linestyle="--",
                alpha=0.85,
                linewidth=2.5,
                label=f"Benchmark ({ctr_benchmark:.3f})",
                zorder=2,
            )

            # Highlight benchmark period
            if (
                "ctr_benchmark_period_start" in ctr_wastage_analysis
                and "ctr_benchmark_period_end" in ctr_wastage_analysis
            ):
                try:
                    benchmark_start = pd.to_datetime(
                        ctr_wastage_analysis["ctr_benchmark_period_start"]
                    )
                    benchmark_end = pd.to_datetime(
                        ctr_wastage_analysis["ctr_benchmark_period_end"]
                    )
                    ax2.axvspan(
                        benchmark_start,
                        benchmark_end,
                        facecolor=get_colour("status", "success"),
                        alpha=0.12,
                        edgecolor="none",
                        zorder=0,
                        label="Benchmark Period",
                    )
                except (ValueError, TypeError) as e:
                    reporting_logger.warning(
                        f"Could not parse CTR benchmark period: {e}"
                    )

        engagement_gap = ctr_wastage_analysis.get("engagement_lost_clicks", 0)
        reference_value = ctr_wastage_analysis.get("reference_value_gbp", 0)
        style_axis(ax2,
                  title=f"CTR Analysis (Gap: {engagement_gap:,} clicks, Ref: £{reference_value:.2f})",
                  ylabel="CTR",
                  title_size=13,
                  show_grid=True)
        create_professional_legend(ax2, loc="best", framealpha=0.95)
    else:
        ax2.text(
            0.5,
            0.5,
            "CTR Data Not Available",
            ha="center",
            va="center",
            transform=ax2.transAxes,
        )
        ax2.set_title("CTR Analysis")

    # Panel 3: Wastage Breakdown
    ax3.axis("off")
    actual_overspend = (
        cpc_wastage_analysis.get("actual_wastage_gbp", 0) if cpc_wastage_analysis else 0
    )
    reference_value = (
        ctr_wastage_analysis.get("reference_value_gbp", 0) if ctr_wastage_analysis else 0
    )
    engagement_gap = (
        ctr_wastage_analysis.get("engagement_lost_clicks", 0) if ctr_wastage_analysis else 0
    )

    summary_text_panel3 = "Spend Efficiency (Financial)\n\n"
    summary_text_panel3 += f"Actual Overspend: £{actual_overspend:,.2f}\n"
    summary_text_panel3 += "----------------------------------\n"
    summary_text_panel3 += "Engagement Performance\n\n"
    summary_text_panel3 += f"Engagement Gap: {engagement_gap:,} clicks\n"
    summary_text_panel3 += f"Reference Value: £{reference_value:,.2f}\n"
    summary_text_panel3 += "(Comparison value only)"

    ax3.text(
        0.05,
        0.95,
        summary_text_panel3,
        transform=ax3.transAxes,
        fontsize=12,
        verticalalignment="top",
        fontfamily="monospace",
        bbox=dict(boxstyle="round,pad=0.5", facecolor="aliceblue", alpha=0.5),
    )
    ax3.set_title("Wastage & Opportunity Cost")

    # Panel 4: Risk Assessment and Summary
    ax4.axis("off")  # Turn off axis for text display

    summary_text = f"PERFORMANCE ANALYSIS SUMMARY\n\n"

    if cpc_wastage_analysis:
        actual_wastage = cpc_wastage_analysis.get('actual_wastage_gbp', 0)
        summary_text += f"Spend Efficiency: £{actual_wastage:.2f}\n"
        summary_text += (
            f"CPC Benchmark: £{cpc_wastage_analysis.get('benchmark_cpc', 'N/A'):.3f}\n"
        )
        summary_text += f"CPC Status: {cpc_wastage_analysis.get('calculation_status', 'N/A')}\n\n"

    if ctr_wastage_analysis:
        engagement_gap = ctr_wastage_analysis.get('engagement_lost_clicks', 0)
        reference_value = ctr_wastage_analysis.get('reference_value_gbp', 0)
        summary_text += f"Engagement Gap: {engagement_gap:,} clicks\n"
        summary_text += f"Reference Value: £{reference_value:.2f}\n"
        summary_text += (
            f"CTR Benchmark: {ctr_wastage_analysis.get('ctr_benchmark', 'N/A'):.3f}\n"
        )
        summary_text += f"CTR Status: {ctr_wastage_analysis.get('calculation_status', 'N/A')}\n\n"

    # Add double counting risk if available
    if cpc_wastage_analysis and "double_counting_risk" in cpc_wastage_analysis:
        risk_level = cpc_wastage_analysis["double_counting_risk"]
        summary_text += f"Correlation Risk: {risk_level}\n"

        risk_color = {"Low": "green", "Medium": "orange", "High": "red"}.get(
            risk_level, "black"
        )
        ax4.text(
            0.05,
            0.95,
            summary_text,
            transform=ax4.transAxes,
            fontsize=10,
            verticalalignment="top",
            fontfamily="monospace",
        )

        # Highlight risk level
        ax4.text(
            0.05,
            0.05,
            f"Correlation Risk: {risk_level}",
            transform=ax4.transAxes,
            fontsize=12,
            fontweight="bold",
            color=risk_color,
            verticalalignment="bottom",
        )
        reporting_logger.info(f"Correlation risk for {item_id}: {risk_level}")
    else:
        ax4.text(
            0.05,
            0.95,
            summary_text,
            transform=ax4.transAxes,
            fontsize=10,
            verticalalignment="top",
            fontfamily="monospace",
        )

    # Rotate date labels for better readability
    for ax in [ax1, ax2]:
        ax.tick_params(axis="x", rotation=45)

    plt.tight_layout()

    # Save the plot
    if output_dir is not None:
        save_dir = os.path.join(output_dir, "plots")
    else:
        save_dir = os.path.join(os.getcwd(), "output", "plots")

    output_file_path = None
    try:
        os.makedirs(save_dir, exist_ok=True)
        safe_id = safe_filename(item_id)
        output_file_path = os.path.join(
            save_dir, f"{safe_id}_combined_wastage_analysis.jpg"
        )
        plt.savefig(
            output_file_path,
            dpi=200,
            bbox_inches='tight',
            facecolor='white',
            edgecolor='none',
        )
        reporting_logger.info(f"Combined wastage plot saved: {output_file_path}")
    except Exception as e:
        error_msg = f"Could not save combined wastage plot to {save_dir}: {e}"
        reporting_logger.error(error_msg)
        output_file_path = None

    plt.close(fig)
    return output_file_path


def generate_summary_report(
    results: Dict[str, Any],
    excluded_items: Dict[str, str],
    output_file: str = "output/analysis_summary",
    min_wastage: float = 0.0,
) -> str:
    """Generates summary HTML and CSV reports from analysis results.

    Creates two report files based on the provided base output path:

    1.  An HTML report with summary statistics, interactive filtering, and
        links to plots.
    2.  A CSV report containing the detailed results table.

    Args:
        results: The nested dictionary of analysis results returned by
            `analyze_all_items`.
        excluded_items: Dictionary mapping excluded item IDs to
            their exclusion reason (from `prepare_item_time_series`).
        output_file: Base path for the output reports. '.html'
            and '.csv' extensions will be appended. Defaults to
            'output/analysis_summary'.
        min_wastage: Minimum total wastage (e.g., GBP for CPC)
            to highlight in the HTML report if wastage analysis (example) is performed. Defaults to 0.0.

    Returns:
        The path to the generated HTML report file. Returns the path even
             if saving failed (errors are printed).
    """
    reporting_logger.info(f"Generating summary report: {output_file}")
    reporting_logger.info(
        f"Processing {len(results)} analyzed items and {len(excluded_items)} excluded items"
    )

    output_dir = os.path.dirname(output_file) if os.path.dirname(output_file) else "."
    if output_dir:
        os.makedirs(output_dir, exist_ok=True)
        reporting_logger.debug(f"Created output directory: {output_dir}")

    base_output_path, _ = os.path.splitext(output_file)
    html_output_file, csv_output_file = (
        base_output_path + ".html",
        base_output_path + ".csv",
    )

    report_data, all_metrics = [], set()
    total_analyzed_items, total_excluded_items = len(results), len(excluded_items)
    total_cpc_wastage, items_with_wastage_above_threshold, total_spend = 0.0, 0, 0.0

    reporting_logger.info("Processing analysis results for report generation")

    for item_id_val, item_results_val in results.items():
        # Skip internal metadata keys (e.g., _pipeline_metadata from analyzer).
        # Item IDs may be numeric (floats/ints), so only treat string keys
        # starting with '_' as metadata.
        if isinstance(item_id_val, str) and item_id_val.startswith("_"):
            reporting_logger.debug(f"Skipping metadata key: {item_id_val}")
            continue
            
        reporting_logger.debug(f"Processing report data for item: {item_id_val}")
        row: Dict[str, Any] = {"Item ID": item_id_val}

        # Set default values for new detailed statistics
        default_values: Dict[str, Any] = {
            "CPC Pattern": False,
            "CTR Pattern": False,
            "CPC Trend": "undetermined",
            "CTR Trend": "undetermined",
            "CPC Plot": "",
            "CTR Plot": "",
            "Total Wastage": 0.0,
            "Benchmark CPC": np.nan,
            "Benchmark CTR": np.nan,
            "Wastage CPC": 0.0,
            "Wastage CTR": 0.0,
            "Combined Wastage": 0.0,
            "CTR Bench": np.nan,
            "Lost Clicks": 0,
            "Correlation Risk": "Low",
            "CPC Calculation Status": "Missing Required Data",
            "CTR Calculation Status": "Missing Required Data",
        }
        row.update(default_values)

        # Get total spend from the first metric's results (it's the same for all)
        if item_results_val:
            # Get total spend from the CPC metric results, if available
            # Note: metrics are stored in lowercase ("cpc", "ctr") by the analyzer
            if "cpc" in item_results_val:
                # Use the actual spend value from the data
                spend = item_results_val["cpc"].get("total_spend", 0)
                row["Total Spend (GBP)"] = spend
                total_spend += spend
                reporting_logger.debug(f"Item {item_id_val} spend: £{spend}")
            else:
                row["Total Spend (GBP)"] = 0

        for metric, res_data in item_results_val.items():
            all_metrics.add(metric)
            reporting_logger.debug(
                f"Processing metric '{metric}' for item {item_id_val}"
            )

            if "wastage_analysis" in res_data:
                reporting_logger.debug(f"Found wastage_analysis in {metric}")

            # Store detailed statistics in standard formats for display in summary
            pattern_change = res_data.get("pattern_change_detected", False)
            trend = res_data.get("overall_trend", "undetermined")
            change_points = len(res_data.get("change_points", []))

            # Store traditional columns for backward compatibility
            row[f"{metric} Pattern Change Detected"] = pattern_change
            row[f"{metric} Overall Trend"] = trend
            row[f"{metric} Change Points"] = change_points

            # Store new simplified columns for the detailed summary table
            # Note: metrics are stored in lowercase by the analyzer
            if metric == "cpc":
                row["CPC Pattern"] = pattern_change
                row["CPC Trend"] = trend
                row["CPC Pattern Change"] = pattern_change  # Legacy column
            elif metric == "ctr":
                row["CTR Pattern"] = pattern_change
                row["CTR Trend"] = trend
                row["CTR Pattern Change"] = pattern_change  # Legacy column

            # Generate plot file paths based on standard naming convention
            safe_item_id = safe_filename(item_id_val)
            plot_filename = f"{safe_item_id}_{metric.lower()}_analysis.jpg"
            plot_path = f"plots/{plot_filename}"

            # Try to get the actual plot file path from results first
            plot_file_full_path = res_data.get("plot_file")
            if plot_file_full_path:
                try:
                    plot_path = os.path.relpath(plot_file_full_path, output_dir)
                except ValueError:
                    # If relative path fails, use the constructed path
                    plot_path = f"plots/{plot_filename}"

            row[f"{metric} Plot File"] = plot_path
            # Also store in simplified column
            if metric == "cpc":
                row["CPC Plot"] = plot_path
            elif metric == "ctr":
                row["CTR Plot"] = plot_path

            # Extract CPC financial metrics from spend_efficiency
            if metric == "cpc" and res_data.get("spend_efficiency"):
                spend_eff = res_data["spend_efficiency"]
                reporting_logger.debug(f"Processing CPC spend efficiency for {item_id_val}")

                # CPC Financial Metrics (from spend_efficiency)
                actual_wastage = spend_eff.get("actual_wastage_gbp", 0.0)
                benchmark_cpc = spend_eff.get("benchmark_cpc", np.nan)
                benchmark_start = spend_eff.get("benchmark_period_start", "")
                benchmark_end = spend_eff.get("benchmark_period_end", "")
                cpc_status = spend_eff.get("calculation_status", "N/A")
                
                # Legacy columns (backward compatibility)
                row["Benchmark CPC"] = benchmark_cpc
                row["Benchmark Period Start"] = str(benchmark_start) if benchmark_start else ""
                row["Benchmark Period End"] = str(benchmark_end) if benchmark_end else ""
                row["Wastage Calculation Status"] = cpc_status
                
                # New columns
                row["Actual Overspend (GBP)"] = actual_wastage
                row["CPC Calculation Status"] = cpc_status
                
                # Legacy column mappings for backward compatibility
                row["CPC Wastage (GBP)"] = actual_wastage  # Maps to Actual Wastage
                row["CPC Status"] = cpc_status
                
                # Get total spend from spend_efficiency if available
                total_spend_item = spend_eff.get(
                    "total_spend", row.get("Total Spend (GBP)", 0.0)
                )

                # Financial metrics
                row["Total Spend (GBP)"] = total_spend_item
                row["Actual Overspend (GBP)"] = actual_wastage

                # Calculate wastage percentage
                if total_spend_item > 0:
                    wastage_pct = round((actual_wastage / total_spend_item) * 100, 2)
                    row["Overspend %"] = wastage_pct
                    row["CPC Wastage %"] = wastage_pct  # Legacy column
                else:
                    row["Overspend %"] = 0.0
                    row["CPC Wastage %"] = 0.0

            # Extract CTR engagement metrics from engagement_performance
            if metric == "ctr" and res_data.get("engagement_performance"):
                engage_perf = res_data["engagement_performance"]
                reporting_logger.debug(f"Processing CTR engagement performance for {item_id_val}")
                
                # CTR Engagement Metrics (from engagement_performance)
                engagement_lost_clicks = engage_perf.get("engagement_lost_clicks", 0)
                ctr_benchmark = engage_perf.get("ctr_benchmark", np.nan)
                ctr_decline_pp = engage_perf.get("ctr_decline_percentage_points", 0.0)
                reference_value_gbp = engage_perf.get("reference_value_gbp", 0.0)
                ctr_actual_avg = engage_perf.get("ctr_actual_average", np.nan)
                ctr_benchmark_start = engage_perf.get("ctr_benchmark_period_start", "")
                ctr_benchmark_end = engage_perf.get("ctr_benchmark_period_end", "")
                ctr_status = engage_perf.get("calculation_status", "N/A")
                double_counting_risk = engage_perf.get("double_counting_risk", "Low")
                
                # Store CTR metrics in row
                row["Engagement Gap (Clicks)"] = engagement_lost_clicks
                row["Benchmark CTR"] = ctr_benchmark
                row["CTR Decline (pp)"] = ctr_decline_pp
                row["Reference Value (GBP)"] = reference_value_gbp
                row["CTR Benchmark (%)"] = ctr_benchmark
                row["CTR Actual Average (%)"] = ctr_actual_avg
                row["CTR Benchmark Start"] = str(ctr_benchmark_start) if ctr_benchmark_start else ""
                row["CTR Benchmark End"] = str(ctr_benchmark_end) if ctr_benchmark_end else ""
                row["CTR Calculation Status"] = ctr_status
                row["Correlation Risk"] = double_counting_risk
                row["CTR Status"] = ctr_status  # Legacy column
                
                # Calculate CTR Decline % for legacy column (as percentage, not pp)
                if not pd.isna(ctr_benchmark) and ctr_benchmark > 0 and not pd.isna(ctr_actual_avg):
                    ctr_decline_pct = ((ctr_actual_avg - ctr_benchmark) / ctr_benchmark) * 100
                    row["CTR Decline %"] = f"{ctr_decline_pct:.1f}%"
                else:
                    row["CTR Decline %"] = "N/A"

                # Get start date from the results
                if "start_date" in res_data:
                    row["Reporting Start Date"] = res_data["start_date"].strftime('%Y-%m-%d')
                else:
                    row["Reporting Start Date"] = "N/A"

                # CTR Decline % is now calculated in the CTR metric processing block above
                # No need for duplicate calculation here

                # Calculate lost clicks per day (robust to None/strings)
                # Get engagement gap from new structure
                total_lost_clicks = 0
                if "ctr" in item_results_val and "engagement_performance" in item_results_val["ctr"]:
                    raw_lost_clicks = item_results_val["ctr"]["engagement_performance"].get("engagement_lost_clicks", 0)
                    # Coerce to numeric, default to 0 for None/NaN/non-numeric
                    coerced = pd.to_numeric(raw_lost_clicks, errors="coerce")
                    total_lost_clicks = int(coerced) if pd.notna(coerced) else 0

                estimated_days = (
                    30  # Default assumption, could be calculated from actual data
                )
                if total_lost_clicks > 0:
                    lost_clicks_per_day = round(total_lost_clicks / estimated_days, 0)
                    row["Lost Clicks per Day"] = int(lost_clicks_per_day)
                else:
                    row["Lost Clicks per Day"] = 0

                # CTR Trend (simplified for now)
                pattern_change = row.get("CTR Pattern", False)
                if pattern_change:
                    row["CTR Trend"] = "Declining"
                else:
                    row["CTR Trend"] = "Stable"

                # Extract fatigue detection flags from analysis_metadata
                analysis_metadata = item_results_val.get("analysis_metadata", {})
                ctr_fatigue = analysis_metadata.get("ctr_fatigue", {})
                cpc_fatigue = analysis_metadata.get("cpc_fatigue", {})
                
                # Fatigue detection boolean flags
                row["Replacement Recommended"] = analysis_metadata.get("overall_replacement_recommended", False)
                row["CTR Terminal Decline"] = ctr_fatigue.get("is_terminal_decline", False)
                row["CPC Terminal Decline"] = cpc_fatigue.get("is_terminal_decline", False)
                row["CTR Audience Exhausted"] = ctr_fatigue.get("audience_exhausted", False)
                row["CPC Audience Exhausted"] = cpc_fatigue.get("audience_exhausted", False)
                
                # Calculate volatility indicators
                ctr_volatility = ctr_fatigue.get("recent_volatility", 0.0)
                cpc_volatility = cpc_fatigue.get("recent_volatility", 0.0)
                high_volatility = (ctr_volatility > 0.15) or (cpc_volatility > 0.15)
                
                # Human-readable Fatigue Status
                if row["Replacement Recommended"]:
                    row["Fatigue Status"] = "Creative Fatigued - Replace"
                    row["Recommended Action"] = "Replace this creative"
                elif ctr_fatigue.get("is_terminal_decline") or cpc_fatigue.get("is_terminal_decline"):
                    row["Fatigue Status"] = "Terminal Decline Detected"
                    row["Recommended Action"] = "Monitor, may recover"
                elif ctr_fatigue.get("audience_exhausted") or cpc_fatigue.get("audience_exhausted"):
                    row["Fatigue Status"] = "Audience Exhausted"
                    row["Recommended Action"] = "Consider replacing soon"
                elif high_volatility:
                    row["Fatigue Status"] = "High Volatility (Healthy)"
                    row["Recommended Action"] = "Let it run - actively optimising"
                else:
                    row["Fatigue Status"] = "Healthy Performance"
                    row["Recommended Action"] = "Continue monitoring"
                
                # Fatigue confidence and reason
                row["CTR Fatigue Confidence"] = ctr_fatigue.get("confidence", "N/A")
                row["CTR Fatigue Reason"] = ctr_fatigue.get("reason", "")
                
                # Creative Health Score (composite indicator)
                health_score = 10  # Start with perfect score
                cpc_wastage = row.get("Actual Overspend (GBP)", 0.0)
                total_spend_item = row.get("Total Spend (GBP)", 0.0)
                if total_spend_item > 0 and cpc_wastage > 0:
                    cpc_impact = (cpc_wastage / total_spend_item) * 100
                    if cpc_impact > 50:
                        health_score -= 5
                    elif cpc_impact > 20:
                        health_score -= 3
                    elif cpc_impact > 10:
                        health_score -= 2

                if (
                    isinstance(row["CTR Decline %"], str)
                    and row["CTR Decline %"] != "N/A"
                ):
                    try:
                        ctr_decline_pct = float(row["CTR Decline %"].replace("%", ""))
                        if ctr_decline_pct < -50:  # Significant CTR decline
                            health_score -= 3
                        elif ctr_decline_pct < -20:
                            health_score -= 2
                        elif ctr_decline_pct < -10:
                            health_score -= 1
                    except (ValueError, AttributeError):
                        pass

                health_score = max(1, health_score)  # Minimum score of 1
                # Convert to decimal format (0-1 scale)
                health_score_decimal = health_score / 10.0
                row["Creative Health Score"] = round(health_score_decimal, 2)

                reporting_logger.debug(
                    f"Item {item_id_val}: CPC wastage £{cpc_wastage}, Health score {health_score_decimal}"
                )

                # Accumulate financial overspend (spend_efficiency)
                current_overspend = row.get("Actual Overspend (GBP)", 0.0)
                if not pd.isna(current_overspend):
                    total_cpc_wastage += current_overspend
                    if current_overspend >= min_wastage:
                        items_with_wastage_above_threshold += 1

            elif metric == "ctr" and res_data.get("engagement_performance"):
                # Handle CTR metric engagement performance analysis
                engagement_info = res_data["engagement_performance"]
                reporting_logger.debug(f"Processing engagement performance analysis for {item_id_val}")
                
                # Populate CTR-specific benchmark values
                ctr_benchmark_val = engagement_info.get("ctr_benchmark", np.nan)
                if not np.isnan(ctr_benchmark_val):
                    row["Benchmark CTR"] = ctr_benchmark_val
                    
                # Populate engagement gap (clicks)
                row["Engagement Gap (Clicks)"] = engagement_info.get("engagement_lost_clicks", 0)
                
                # Store reference value (GBP)
                row["Reference Value (GBP)"] = engagement_info.get("reference_value_gbp", 0.0)
                
                reporting_logger.debug(
                    f"Item {item_id_val}: CTR benchmark {ctr_benchmark_val}, Engagement gap {row['Engagement Gap (Clicks)']} clicks"
                )
                
            elif metric == "CPC":
                # Default values for items without analysis
                default_wastage_values: Dict[str, Any] = {
                    "Benchmark CPC": np.nan,
                    "Benchmark Period Start": "",
                    "Benchmark Period End": "",
                    "Wastage Calculation Status": "N/A",
                    "Actual Overspend (GBP)": 0.0,
                    "Engagement Gap (Clicks)": 0,
                    "Reference Value (GBP)": 0.0,
                    "CTR Decline (pp)": 0.0,
                    "CTR Benchmark (%)": np.nan,
                    "CTR Actual Average (%)": np.nan,
                    "CTR Benchmark Start": "",
                    "CTR Benchmark End": "",
                    "Correlation Risk": "Unknown",
                    "CPC Calculation Status": "N/A",
                    "CTR Calculation Status": "N/A",
                }
                row.update(default_wastage_values)
        report_data.append(row)

    reporting_logger.info(f"Processed {len(report_data)} items for report")

    report_df = pd.DataFrame(report_data)

    # Create detailed column lists
    csv_columns = ["Item ID"]
    sorted_metrics = sorted(list(all_metrics), key=lambda x: (x != "CPC", x))

    for metric in sorted_metrics:
        csv_columns.extend(
            [
                f"{metric} Pattern Change Detected",
                f"{metric} Overall Trend",
                f"{metric} Change Points",
                f"{metric} Plot File",
            ]
        )

    # Enhanced detailed statistics columns with clear naming and detailed data
    detailed_columns = [
        "Item ID",
        "Fatigue Status",
        "Recommended Action",
        "Replacement Recommended",
        "CTR Terminal Decline",
        "CPC Terminal Decline",
        "CTR Audience Exhausted",
        "CPC Audience Exhausted",
        "CTR Fatigue Confidence",
        "CTR Fatigue Reason",
        "Total Spend (GBP)",
        "Actual Overspend (GBP)",
        "Overspend %",
        "CTR Decline %",
        "Lost Clicks per Day",
        "CTR Trend",
        "Creative Health Score",
        "Benchmark CPC",
        "Benchmark CTR",
        "Engagement Gap (Clicks)",
        "Reference Value (GBP)",
        "Reporting Start Date",
        "CPC Pattern Change",
        "CTR Pattern Change",
        "CPC Status",
        "CTR Status",
        "CPC Plot",
        "CTR Plot",
        "Correlation Risk",
    ]

    if "CPC" in all_metrics:
        # Enhanced columns with new metric structure
        csv_columns.extend(
            [
                "Benchmark CPC",
                "Benchmark Period Start",
                "Benchmark Period End",
                "Wastage Calculation Status",
            ]
        )

        # New metric columns
        csv_columns.extend(
            [
                "Total Spend (GBP)",
                "Actual Overspend (GBP)",
                "Overspend %",
                "Engagement Gap (Clicks)",
                "CTR Decline (pp)",
                "Reference Value (GBP)",
                "CTR Benchmark (%)",
                "CTR Actual Average (%)",
                "CTR Benchmark Start",
                "CTR Benchmark End",
                "Correlation Risk",
                "Double Counting Risk",
                "CPC Calculation Status",
                "CTR Calculation Status",
            ]
        )

    for col in csv_columns:
        if col not in report_df.columns:
            report_df[col] = np.nan

    # Create both traditional and detailed dataframes
    if not report_df.empty:
        # Create the detailed report with the new format
        detailed_df = pd.DataFrame(report_data)
        # Ensure all columns exist
        for col in detailed_columns:
            if col not in detailed_df.columns:
                detailed_df[col] = np.nan
        detailed_df = detailed_df[detailed_columns]

        # Sort by CPC Wastage (the main financial metric)
        if "CPC Wastage (GBP)" in detailed_df.columns:
            detailed_df = detailed_df.sort_values(
                by="CPC Wastage (GBP)", ascending=False, na_position="last"
            )

        report_df = detailed_df
        reporting_logger.info(f"Created detailed report with {len(report_df)} items")

    # Save CSV report
    try:
        report_df.to_csv(csv_output_file, index=False, float_format="%.2f")
        reporting_logger.info(f"CSV report saved to {csv_output_file}")
    except Exception as e:
        reporting_logger.error(f"Error saving CSV report to {csv_output_file}: {e}")

    # Calculate metric statistics
    metric_stats = {}
    if not report_df.empty:
        for metric in sorted_metrics:
            change_col, trend_col = (
                f"{metric} Pattern Change Detected",
                f"{metric} Overall Trend",
            )
            if change_col in report_df.columns and trend_col in report_df.columns:
                metric_stats[metric] = {
                    "pattern_change_count": report_df[change_col].sum(),
                    "improving_count": (report_df[trend_col] == "improving").sum(),
                    "stable_count": (report_df[trend_col] == "stable").sum(),
                    "declining_count": (report_df[trend_col] == "declining").sum(),
                }
                reporting_logger.debug(f"Metric {metric} stats: {metric_stats[metric]}")

    # Generate HTML content
    reporting_logger.info("Generating HTML report content")
    html_content = f"""
    <!DOCTYPE html><html><head><title>Time-Series Analysis Report</title>
    <style> /* Enhanced styles */
        body {{ font-family: sans-serif; margin: 20px; }} .container {{ max-width: 1400px; margin: auto; }}
        table {{ width: 100%; border-collapse: collapse; margin-top: 1em; }} 
        th, td {{ border: 1px solid #ccc; padding: 0.5em; text-align: left; }}
        th {{ background-color: #eee; }} 
        .pattern-change-detected {{ color: red; font-weight: bold; }} 
        .no-pattern-change {{ color: green; }}
        .declining {{ color: red; }}
        .stable {{ color: black; }}
        .improving {{ color: green; font-weight: bold; }}
        .undetermined {{ color: gray; }}
        .low {{ color: green; }}
        .medium {{ color: orange; }}
        .high {{ color: red; font-weight: bold; }}
        .calculated {{ color: black; }}
        .missing {{ color: gray; font-style: italic; }}
        .error {{ color: red; }}
    </style></head><body><div class="container">
    <h1>Time-Series Analysis Report</h1><p>Generated: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}</p>
    <h2>Summary</h2><p>Analyzed Items: {total_analyzed_items}, Excluded Items: {total_excluded_items}</p>
    """

    # Enhanced performance summary
    if "CPC" in all_metrics:
        # Calculate performance statistics
        total_ctr_wastage = (
            report_df["Reference Value (GBP)"].sum()
            if "Reference Value (GBP)" in report_df.columns
            else 0.0
        )
        total_cpc_only_wastage = (
            report_df["Actual Overspend (GBP)"].sum()
            if "Actual Overspend (GBP)" in report_df.columns
            else 0.0
        )
        total_lost_clicks = (
            report_df["Engagement Gap (Clicks)"].sum() 
            if "Engagement Gap (Clicks)" in report_df.columns 
            else 0
        )
        # Prefer Correlation Risk column; fall back to legacy Double Counting Risk
        risk_col = "Correlation Risk" if "Correlation Risk" in report_df.columns else (
            "Double Counting Risk" if "Double Counting Risk" in report_df.columns else None
        )
        high_risk_items = (
            len(report_df[report_df[risk_col] == "High"]) if risk_col else 0
        )

        html_content += f"""<h3>Performance Analysis Summary</h3>
        <p><strong>Total Spend:</strong> £{total_spend:,.2f}</p>
        
        <div style="border-left: 5px solid #28a745; padding-left: 15px; margin: 20px 0;">
            <h4>Spend Efficiency (Financial)</h4>
            <p><strong>Actual Overspend:</strong> £{total_cpc_wastage:,.2f}</p>
            <p><em>Excess spending vs optimal CPC rates</em></p>
        </div>
        
        <div style="border-left: 5px solid #007bff; padding-left: 15px; margin: 20px 0;">
            <h4>Engagement Performance</h4>
            <p><strong>Engagement Gap:</strong> {total_lost_clicks:,} clicks</p>
            <p><strong>Reference Value:</strong> £{total_ctr_wastage:,.2f} <em>(at current CPC, for comparison only)</em></p>
        </div>
        
        <p><strong>Items with High Double-Counting Risk:</strong> {high_risk_items}</p>
        <p style="background-color: #fff3cd; padding: 10px; border-radius: 5px;">
            <strong>⚠ Important:</strong> Spend efficiency and engagement performance measure different aspects 
            and should not be combined. Address each with appropriate strategies.
        </p>"""

        reporting_logger.info(
            f"Performance summary: Total spend £{total_spend:,.2f}, Actual overspend £{total_cpc_wastage:,.2f}, Engagement gap {total_lost_clicks} clicks"
        )

    for metric, stats in metric_stats.items():
        html_content += f"<h3>{metric}</h3><p>Items with Pattern Change Detected: {int(stats['pattern_change_count'])}</p>"

    html_content += "<h2>Item Results</h2>"
    if not report_df.empty:
        html_content += report_df.to_html(
            index=False,
            na_rep="N/A",
            float_format="%.2f",
            border=0,
            classes="results-table",
            escape=False,
        )
    else:
        html_content += "<p>No items analyzed.</p>"

    html_content += "<h2>Excluded Items</h2>"
    if excluded_items:
        excluded_df = pd.DataFrame(
            list(excluded_items.items()), columns=["Item ID", "Reason"]
        )
        html_content += excluded_df.to_html(index=False, border=0)
        reporting_logger.info(f"Added {len(excluded_items)} excluded items to report")
    else:
        html_content += "<p>None excluded.</p>"

    html_content += "</div></body></html>"

    # Save HTML report
    try:
        with open(html_output_file, "w", encoding="utf-8") as f:
            f.write(html_content)
        reporting_logger.info(f"HTML report saved to {html_output_file}")
    except Exception as e:
        reporting_logger.error(f"Error saving HTML report: {e}")

    reporting_logger.info("Report generation completed successfully")
    return html_output_file
