StreamLens/analyzer/tui/modern_views/statistical_analysis.py

"""
Statistical Analysis View - Timing analysis, outliers, and quality metrics
Focuses on understanding network performance and data quality
"""

import curses
import statistics
from typing import TYPE_CHECKING, List, Optional, Dict, Tuple
from ...models import FlowStats

if TYPE_CHECKING:
    from ...analysis.core import EthernetAnalyzer


class StatisticalAnalysisView:
    """
    Statistical Analysis View - F3

    Performance and quality analysis interface:
    - Timing statistics and outlier detection
    - Quality metrics and trends
    - Performance indicators
    - Network health assessment
    """

    def __init__(self, analyzer: 'EthernetAnalyzer'):
        self.analyzer = analyzer
        self.selected_flow = 0
        self.analysis_mode = 0  # 0=overview, 1=outliers, 2=quality, 3=timing
        self.scroll_offset = 0

    def draw(self, stdscr, selected_flow_key: Optional[str]):
        """Draw the Statistical Analysis view"""
        height, width = stdscr.getmaxyx()
        start_y = 3
        max_height = height - 2

        flows_list = self._get_flows_list()

        if not flows_list:
            stdscr.addstr(start_y + 2, 4, "No flows available for statistical analysis", curses.A_DIM)
            return

        # Statistical analysis header
        mode_names = ["Overview", "Outlier Analysis", "Quality Metrics", "Timing Analysis"]
        current_mode = mode_names[self.analysis_mode]
        stdscr.addstr(start_y, 4, f"STATISTICAL ANALYSIS - {current_mode}", curses.A_BOLD)

        # Mode selector
        mode_line = start_y + 1
        for i, mode_name in enumerate(mode_names):
            x_pos = 4 + i * 20
            if i == self.analysis_mode:
                stdscr.addstr(mode_line, x_pos, f"[{mode_name}]", curses.A_REVERSE)
            else:
                stdscr.addstr(mode_line, x_pos, f" {mode_name} ", curses.A_DIM)

        # Analysis content area
        content_y = start_y + 3
        content_height = max_height - content_y

        if self.analysis_mode == 0:
            self._draw_overview(stdscr, content_y, width, content_height, flows_list)
        elif self.analysis_mode == 1:
            self._draw_outlier_analysis(stdscr, content_y, width, content_height, flows_list)
        elif self.analysis_mode == 2:
            self._draw_quality_metrics(stdscr, content_y, width, content_height, flows_list)
        elif self.analysis_mode == 3:
            self._draw_timing_analysis(stdscr, content_y, width, content_height, flows_list)

    def _draw_overview(self, stdscr, start_y: int, width: int, height: int, flows_list: List[FlowStats]):
        """Draw statistical overview"""
        current_y = start_y

        # Overall statistics
        total_packets = sum(flow.frame_count for flow in flows_list)
        total_outliers = sum(len(flow.outlier_frames) for flow in flows_list)
        outlier_percentage = (total_outliers / total_packets * 100) if total_packets > 0 else 0

        stdscr.addstr(current_y, 4, "NETWORK PERFORMANCE SUMMARY", curses.A_UNDERLINE)
        current_y += 2

        # Key metrics
        metrics = [
            ("Total Flows", str(len(flows_list))),
            ("Total Packets", f"{total_packets:,}"),
            ("Total Outliers", f"{total_outliers:,} ({outlier_percentage:.2f}%)"),
            ("Enhanced Flows", str(sum(1 for f in flows_list if f.enhanced_analysis.decoder_type != "Standard"))),
        ]

        for metric_name, metric_value in metrics:
            stdscr.addstr(current_y, 4, f"{metric_name:20}: {metric_value}")
            current_y += 1

        current_y += 1

        # Flow performance table
        stdscr.addstr(current_y, 4, "FLOW PERFORMANCE RANKING", curses.A_UNDERLINE)
        current_y += 2

        # Table header
        header = f"{'Rank':>4} {'Flow':30} {'Packets':>8} {'Outliers':>9} {'Avg Δt':>10} {'Jitter':>8} {'Score':>6}"
        stdscr.addstr(current_y, 4, header, curses.A_BOLD)
        current_y += 1

        # Rank flows by performance
        ranked_flows = self._rank_flows_by_performance(flows_list)

        visible_flows = min(height - (current_y - start_y) - 2, len(ranked_flows))
        for i in range(visible_flows):
            flow, score = ranked_flows[i]

            is_selected = (i == self.selected_flow)
            attr = curses.A_REVERSE if is_selected else curses.A_NORMAL

            # Format flow line
            flow_desc = f"{flow.src_ip}:{flow.src_port} → {flow.dst_ip}:{flow.dst_port}"
            if len(flow_desc) > 28:
                flow_desc = f"{flow.src_ip[:8]}…:{flow.src_port} → {flow.dst_ip[:8]}…:{flow.dst_port}"

            outliers = len(flow.outlier_frames)
            outlier_pct = f"{outliers/flow.frame_count*100:.1f}%" if flow.frame_count > 0 else "0%"

            avg_timing = f"{flow.avg_inter_arrival*1000:.1f}ms" if flow.avg_inter_arrival > 0 else "N/A"
            jitter = f"{flow.std_inter_arrival*1000:.1f}ms" if flow.std_inter_arrival > 0 else "N/A"

            line = f"{i+1:>4} {flow_desc:30} {flow.frame_count:>8} {outlier_pct:>9} {avg_timing:>10} {jitter:>8} {score:>6.1f}"
            stdscr.addstr(current_y + i, 4, line[:width-8], attr)

    def _draw_outlier_analysis(self, stdscr, start_y: int, width: int, height: int, flows_list: List[FlowStats]):
        """Draw detailed outlier analysis"""
        current_y = start_y

        stdscr.addstr(current_y, 4, "OUTLIER ANALYSIS", curses.A_UNDERLINE)
        current_y += 2

        # Find flows with outliers
        outlier_flows = [(flow, len(flow.outlier_frames)) for flow in flows_list if flow.outlier_frames]
        outlier_flows.sort(key=lambda x: x[1], reverse=True)

        if not outlier_flows:
            stdscr.addstr(current_y, 4, "No outliers detected in any flows", curses.A_DIM)
            stdscr.addstr(current_y + 1, 4, "All packet timing appears normal", curses.A_DIM)
            return

        # Outlier summary
        total_outliers = sum(count for _, count in outlier_flows)
        stdscr.addstr(current_y, 4, f"Flows with outliers: {len(outlier_flows)}")
        current_y += 1
        stdscr.addstr(current_y, 4, f"Total outlier packets: {total_outliers}")
        current_y += 2

        # Detailed outlier breakdown
        stdscr.addstr(current_y, 4, "OUTLIER DETAILS", curses.A_BOLD)
        current_y += 1

        header = f"{'Flow':35} {'Outliers':>9} {'Rate':>8} {'Max Σ':>8} {'Timing':>12}"
        stdscr.addstr(current_y, 4, header, curses.A_UNDERLINE)
        current_y += 1

        visible_flows = min(height - (current_y - start_y) - 2, len(outlier_flows))
        for i in range(visible_flows):
            flow, outlier_count = outlier_flows[i]

            is_selected = (i == self.selected_flow)
            attr = curses.A_REVERSE if is_selected else curses.A_NORMAL

            flow_desc = f"{flow.src_ip}:{flow.src_port} → {flow.dst_ip}:{flow.dst_port}"
            if len(flow_desc) > 33:
                flow_desc = f"{flow.src_ip[:10]}…:{flow.src_port} → {flow.dst_ip[:10]}…:{flow.dst_port}"

            outlier_rate = f"{outlier_count/flow.frame_count*100:.1f}%" if flow.frame_count > 0 else "0%"
            max_sigma = self.analyzer.statistics_engine.get_max_sigma_deviation(flow)
            timing_info = f"{flow.avg_inter_arrival*1000:.1f}±{flow.std_inter_arrival*1000:.1f}ms"

            line = f"{flow_desc:35} {outlier_count:>9} {outlier_rate:>8} {max_sigma:>7.1f}σ {timing_info:>12}"
            stdscr.addstr(current_y + i, 4, line[:width-8], attr)

        # Selected flow outlier details
        if outlier_flows and self.selected_flow < len(outlier_flows):
            selected_flow, _ = outlier_flows[self.selected_flow]
            self._draw_selected_flow_outliers(stdscr, current_y + visible_flows + 1, width,
                                            height - (current_y + visible_flows + 1 - start_y), selected_flow)

    def _draw_quality_metrics(self, stdscr, start_y: int, width: int, height: int, flows_list: List[FlowStats]):
        """Draw quality metrics analysis"""
        current_y = start_y

        stdscr.addstr(current_y, 4, "QUALITY METRICS", curses.A_UNDERLINE)
        current_y += 2

        # Enhanced flows quality
        enhanced_flows = [f for f in flows_list if f.enhanced_analysis.decoder_type != "Standard"]

        if enhanced_flows:
            stdscr.addstr(current_y, 4, "ENHANCED DECODER QUALITY", curses.A_BOLD)
            current_y += 1

            header = f"{'Flow':30} {'Decoder':15} {'Quality':>8} {'Drift':>10} {'Errors':>8}"
            stdscr.addstr(current_y, 4, header, curses.A_UNDERLINE)
            current_y += 1

            for i, flow in enumerate(enhanced_flows[:height - (current_y - start_y) - 5]):
                is_selected = (i == self.selected_flow)
                attr = curses.A_REVERSE if is_selected else curses.A_NORMAL

                flow_desc = f"{flow.src_ip}:{flow.src_port} → {flow.dst_ip}:{flow.dst_port}"
                if len(flow_desc) > 28:
                    flow_desc = f"{flow.src_ip[:8]}…:{flow.src_port} → {flow.dst_ip[:8]}…:{flow.dst_port}"

                enhanced = flow.enhanced_analysis
                decoder_type = enhanced.decoder_type.replace("_Enhanced", "")
                quality = f"{enhanced.avg_frame_quality:.1f}%" if enhanced.avg_frame_quality > 0 else "N/A"
                drift = f"{enhanced.avg_clock_drift_ppm:.1f}ppm" if enhanced.avg_clock_drift_ppm != 0 else "N/A"

                error_count = (enhanced.rtc_sync_errors + enhanced.format_errors +
                             enhanced.overflow_errors + enhanced.sequence_gaps)

                line = f"{flow_desc:30} {decoder_type:15} {quality:>8} {drift:>10} {error_count:>8}"
                stdscr.addstr(current_y + i, 4, line[:width-8], attr)

            current_y += len(enhanced_flows) + 2

        # General quality indicators
        stdscr.addstr(current_y, 4, "GENERAL QUALITY INDICATORS", curses.A_BOLD)
        current_y += 1

        # Calculate network health metrics
        health_metrics = self._calculate_health_metrics(flows_list)

        for metric_name, metric_value, status in health_metrics:
            status_color = curses.A_BOLD if status == "GOOD" else curses.A_DIM if status == "WARNING" else curses.A_REVERSE
            stdscr.addstr(current_y, 4, f"{metric_name:25}: {metric_value:15} [{status}]", status_color)
            current_y += 1

    def _draw_timing_analysis(self, stdscr, start_y: int, width: int, height: int, flows_list: List[FlowStats]):
        """Draw detailed timing analysis"""
        current_y = start_y

        stdscr.addstr(current_y, 4, "TIMING ANALYSIS", curses.A_UNDERLINE)
        current_y += 2

        # Timing distribution summary
        all_inter_arrivals = []
        for flow in flows_list:
            all_inter_arrivals.extend(flow.inter_arrival_times)

        if all_inter_arrivals:
            mean_timing = statistics.mean(all_inter_arrivals)
            median_timing = statistics.median(all_inter_arrivals)
            std_timing = statistics.stdev(all_inter_arrivals) if len(all_inter_arrivals) > 1 else 0

            stdscr.addstr(current_y, 4, "NETWORK TIMING DISTRIBUTION", curses.A_BOLD)
            current_y += 1

            timing_stats = [
                ("Mean Inter-arrival", f"{mean_timing*1000:.3f} ms"),
                ("Median Inter-arrival", f"{median_timing*1000:.3f} ms"),
                ("Standard Deviation", f"{std_timing*1000:.3f} ms"),
                ("Coefficient of Variation", f"{std_timing/mean_timing:.3f}" if mean_timing > 0 else "N/A"),
            ]

            for stat_name, stat_value in timing_stats:
                stdscr.addstr(current_y, 4, f"{stat_name:25}: {stat_value}")
                current_y += 1

            current_y += 1

        # Per-flow timing details
        stdscr.addstr(current_y, 4, "PER-FLOW TIMING ANALYSIS", curses.A_BOLD)
        current_y += 1

        header = f"{'Flow':30} {'Mean':>10} {'Std Dev':>10} {'CV':>8} {'Range':>12}"
        stdscr.addstr(current_y, 4, header, curses.A_UNDERLINE)
        current_y += 1

        # Sort flows by timing variability
        timing_flows = [(flow, flow.std_inter_arrival / flow.avg_inter_arrival if flow.avg_inter_arrival > 0 else 0)
                       for flow in flows_list if flow.inter_arrival_times]
        timing_flows.sort(key=lambda x: x[1], reverse=True)

        visible_flows = min(height - (current_y - start_y) - 2, len(timing_flows))
        for i in range(visible_flows):
            flow, cv = timing_flows[i]

            is_selected = (i == self.selected_flow)
            attr = curses.A_REVERSE if is_selected else curses.A_NORMAL

            flow_desc = f"{flow.src_ip}:{flow.src_port} → {flow.dst_ip}:{flow.dst_port}"
            if len(flow_desc) > 28:
                flow_desc = f"{flow.src_ip[:8]}…:{flow.src_port} → {flow.dst_ip[:8]}…:{flow.dst_port}"

            mean_ms = f"{flow.avg_inter_arrival*1000:.1f}ms"
            std_ms = f"{flow.std_inter_arrival*1000:.1f}ms"
            cv_str = f"{cv:.3f}"

            if flow.inter_arrival_times:
                range_ms = f"{(max(flow.inter_arrival_times) - min(flow.inter_arrival_times))*1000:.1f}ms"
            else:
                range_ms = "N/A"

            line = f"{flow_desc:30} {mean_ms:>10} {std_ms:>10} {cv_str:>8} {range_ms:>12}"
            stdscr.addstr(current_y + i, 4, line[:width-8], attr)

    def _rank_flows_by_performance(self, flows_list: List[FlowStats]) -> List[Tuple[FlowStats, float]]:
        """Rank flows by performance score (lower is better)"""
        ranked = []

        for flow in flows_list:
            score = 0.0

            # Outlier penalty (higher percentage = higher score)
            if flow.frame_count > 0:
                outlier_rate = len(flow.outlier_frames) / flow.frame_count
                score += outlier_rate * 100  # 0-100 points

            # Timing variability penalty
            if flow.avg_inter_arrival > 0:
                cv = flow.std_inter_arrival / flow.avg_inter_arrival
                score += cv * 50  # 0-50+ points

            # Enhanced decoder bonus (negative score)
            if flow.enhanced_analysis.decoder_type != "Standard":
                score -= 10
                if flow.enhanced_analysis.avg_frame_quality > 80:
                    score -= 5  # Good quality bonus

            ranked.append((flow, score))

        ranked.sort(key=lambda x: x[1])  # Lower scores first (better performance)
        return ranked

    def _calculate_health_metrics(self, flows_list: List[FlowStats]) -> List[Tuple[str, str, str]]:
        """Calculate network health metrics"""
        metrics = []

        # Overall outlier rate
        total_packets = sum(flow.frame_count for flow in flows_list)
        total_outliers = sum(len(flow.outlier_frames) for flow in flows_list)
        outlier_rate = (total_outliers / total_packets * 100) if total_packets > 0 else 0

        outlier_status = "GOOD" if outlier_rate < 1.0 else "WARNING" if outlier_rate < 5.0 else "CRITICAL"
        metrics.append(("Network Outlier Rate", f"{outlier_rate:.2f}%", outlier_status))

        # Enhanced decoder coverage
        enhanced_count = sum(1 for f in flows_list if f.enhanced_analysis.decoder_type != "Standard")
        coverage = (enhanced_count / len(flows_list) * 100) if flows_list else 0
        coverage_status = "GOOD" if coverage > 50 else "WARNING" if coverage > 0 else "NONE"
        metrics.append(("Enhanced Coverage", f"{coverage:.1f}%", coverage_status))

        # Timing consistency
        all_cvs = []
        for flow in flows_list:
            if flow.avg_inter_arrival > 0:
                cv = flow.std_inter_arrival / flow.avg_inter_arrival
                all_cvs.append(cv)

        if all_cvs:
            avg_cv = statistics.mean(all_cvs)
            timing_status = "GOOD" if avg_cv < 0.1 else "WARNING" if avg_cv < 0.5 else "CRITICAL"
            metrics.append(("Timing Consistency", f"CV={avg_cv:.3f}", timing_status))

        return metrics

    def _draw_selected_flow_outliers(self, stdscr, start_y: int, width: int, height: int, flow: FlowStats):
        """Draw outlier details for selected flow"""
        if height < 3:
            return

        stdscr.addstr(start_y, 4, f"OUTLIER DETAILS: {flow.src_ip}:{flow.src_port} → {flow.dst_ip}:{flow.dst_port}", curses.A_BOLD)
        current_y = start_y + 1

        if flow.outlier_details:
            header = f"{'Frame#':>8} {'Inter-arrival':>15} {'Deviation':>12}"
            stdscr.addstr(current_y, 4, header, curses.A_UNDERLINE)
            current_y += 1

            visible_outliers = min(height - 3, len(flow.outlier_details))
            for i in range(visible_outliers):
                frame_num, timing = flow.outlier_details[i]

                # Calculate sigma deviation
                if flow.avg_inter_arrival > 0 and flow.std_inter_arrival > 0:
                    sigma = abs(timing - flow.avg_inter_arrival) / flow.std_inter_arrival
                    deviation = f"{sigma:.1f}σ"
                else:
                    deviation = "N/A"

                outlier_line = f"{frame_num:>8} {timing*1000:>12.3f}ms {deviation:>12}"
                stdscr.addstr(current_y + i, 4, outlier_line)

    def _get_flows_list(self) -> List[FlowStats]:
        """Get flows sorted for statistical analysis"""
        flows_list = list(self.analyzer.flows.values())

        # Sort by statistical interest: outliers first, then enhanced, then packet count
        flows_list.sort(key=lambda x: (
            len(x.outlier_frames),
            x.enhanced_analysis.decoder_type != "Standard",
            x.frame_count
        ), reverse=True)

        return flows_list

    def handle_input(self, key: int, flows_list: List[FlowStats]) -> str:
        """Handle input for Statistical Analysis view"""
        if key == curses.KEY_UP:
            self.selected_flow = max(0, self.selected_flow - 1)
            return 'selection_change'
        elif key == curses.KEY_DOWN:
            max_flows = len(flows_list) - 1
            self.selected_flow = min(max_flows, self.selected_flow + 1)
            return 'selection_change'
        elif key == curses.KEY_LEFT:
            self.analysis_mode = max(0, self.analysis_mode - 1)
            self.selected_flow = 0  # Reset selection when changing modes
            return 'mode_change'
        elif key == curses.KEY_RIGHT:
            self.analysis_mode = min(3, self.analysis_mode + 1)
            self.selected_flow = 0  # Reset selection when changing modes
            return 'mode_change'
        elif key >= ord('1') and key <= ord('4'):
            self.analysis_mode = key - ord('1')
            self.selected_flow = 0
            return 'mode_change'
        elif key == ord('r') or key == ord('R'):
            return 'refresh_stats'
        elif key == ord('o') or key == ord('O'):
            self.analysis_mode = 1  # Switch to outlier analysis
            return 'show_outliers'

        return 'none'