""" Flow Analysis Report Generator Generates comprehensive flow analysis reports with markup formatting """ import datetime from typing import Dict, List, Optional from pathlib import Path from ..models import FlowStats, FrameTypeStats class FlowReportGenerator: """Generate comprehensive flow analysis reports""" def __init__(self, analyzer): self.analyzer = analyzer def generate_report(self, output_path: Optional[str] = None, format_type: str = "markdown") -> str: """Generate comprehensive flow analysis report""" if format_type == "markdown": return self._generate_markdown_report(output_path) elif format_type == "html": return self._generate_html_report(output_path) else: return self._generate_text_report(output_path) def _generate_markdown_report(self, output_path: Optional[str] = None) -> str: """Generate markdown-formatted report""" flows = list(self.analyzer.flows.values()) # Sort flows by importance (enhanced first, then by packet count) flows.sort(key=lambda x: ( x.enhanced_analysis.decoder_type != "Standard", len(x.outlier_frames), x.frame_count ), reverse=True) report_lines = [] # Header timestamp = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S") report_lines.extend([ "# StreamLens Flow Analysis Report", f"**Generated:** {timestamp}", f"**Total Flows:** {len(flows)}", f"**Analysis Engine:** {self.analyzer.__class__.__name__}", "", "---", "" ]) # Executive Summary report_lines.extend(self._generate_executive_summary(flows)) # Detailed Flow Analysis report_lines.extend([ "## 📊 Detailed Flow Analysis", "" ]) for i, flow in enumerate(flows, 1): report_lines.extend(self._generate_flow_section(flow, i)) # Statistics Summary report_lines.extend(self._generate_statistics_summary(flows)) report_content = "\n".join(report_lines) # Save to file if path provided if output_path: output_file = Path(output_path) output_file.write_text(report_content, encoding='utf-8') return report_content def _generate_executive_summary(self, flows: List[FlowStats]) -> List[str]: """Generate executive summary section""" total_packets = sum(flow.frame_count for flow in flows) total_bytes = sum(flow.total_bytes for flow in flows) enhanced_flows = [f for f in flows if f.enhanced_analysis.decoder_type != "Standard"] high_outlier_flows = [f for f in flows if len(f.outlier_frames) > f.frame_count * 0.1] return [ "## 📋 Executive Summary", "", f"- **Total Network Flows:** {len(flows)}", f"- **Total Packets Analyzed:** {total_packets:,}", f"- **Total Data Volume:** {self._format_bytes(total_bytes)}", f"- **Enhanced Protocol Flows:** {len(enhanced_flows)} ({len(enhanced_flows)/len(flows)*100:.1f}%)", f"- **Flows with Timing Issues:** {len(high_outlier_flows)} ({len(high_outlier_flows)/len(flows)*100:.1f}%)", "", "### 🎯 Key Findings", "" ] def _generate_flow_section(self, flow: FlowStats, flow_num: int) -> List[str]: """Generate detailed section for a single flow""" lines = [] # Flow Header status_emoji = self._get_flow_status_emoji(flow) quality_score = self._get_quality_score(flow) lines.extend([ f"### {status_emoji} Flow #{flow_num}: {flow.src_ip}:{flow.src_port} → {flow.dst_ip}:{flow.dst_port}", "" ]) # Basic Information Table lines.extend([ "| Attribute | Value |", "|-----------|-------|", f"| **Protocol** | {flow.transport_protocol} |", f"| **Classification** | {flow.traffic_classification} |", f"| **Packets** | {flow.frame_count:,} |", f"| **Volume** | {self._format_bytes(flow.total_bytes)} |", f"| **Quality Score** | {quality_score}% |", f"| **Duration** | {flow.duration:.2f}s |", f"| **First Seen** | {self._format_timestamp(flow.first_seen)} |", f"| **Last Seen** | {self._format_timestamp(flow.last_seen)} |", "" ]) # Enhanced Analysis (if available) if flow.enhanced_analysis.decoder_type != "Standard": lines.extend(self._generate_enhanced_analysis_section(flow)) # Frame Type Breakdown if flow.frame_types: lines.extend(self._generate_frame_types_section(flow)) # Timing Analysis lines.extend(self._generate_timing_analysis_section(flow)) lines.append("") return lines def _generate_enhanced_analysis_section(self, flow: FlowStats) -> List[str]: """Generate enhanced analysis section""" ea = flow.enhanced_analysis lines = [ "#### 🔬 Enhanced Protocol Analysis", "", "| Metric | Value |", "|--------|-------|", f"| **Decoder Type** | {ea.decoder_type} |", f"| **Frame Quality** | {ea.avg_frame_quality:.1f}% |", f"| **Field Count** | {ea.field_count} |", f"| **Timing Accuracy** | {ea.timing_accuracy:.1f}% |", f"| **Signal Quality** | {ea.signal_quality:.1f}% |" ] if ea.decoder_type.startswith("Chapter10"): lines.extend([ f"| **Channel Count** | {ea.channel_count} |", f"| **Analog Channels** | {ea.analog_channels} |", f"| **PCM Channels** | {ea.pcm_channels} |", f"| **TMATS Frames** | {ea.tmats_frames} |", f"| **Clock Drift** | {ea.avg_clock_drift_ppm:.2f} ppm |", f"| **Timing Quality** | {ea.timing_quality} |" ]) lines.extend(["", ""]) return lines def _generate_frame_types_section(self, flow: FlowStats) -> List[str]: """Generate frame types breakdown section""" lines = [ "#### 📦 Frame Type Analysis", "", "| Frame Type | Count | % | Avg ΔT | Std σ | Outliers | Outlier Frames |", "|------------|-------|---|---------|--------|----------|----------------|" ] # Sort frame types by count sorted_types = sorted( flow.frame_types.items(), key=lambda x: x[1].count, reverse=True ) total_count = flow.frame_count for frame_type, stats in sorted_types: percentage = (stats.count / total_count * 100) if total_count > 0 else 0 # Format timing values delta_t = "" if stats.avg_inter_arrival > 0: dt_ms = stats.avg_inter_arrival * 1000 delta_t = f"{dt_ms:.1f}ms" if dt_ms < 1000 else f"{dt_ms/1000:.1f}s" sigma = "" if stats.std_inter_arrival > 0: sig_ms = stats.std_inter_arrival * 1000 sigma = f"{sig_ms:.1f}ms" if sig_ms < 1000 else f"{sig_ms/1000:.1f}s" outliers = len(stats.outlier_frames) outlier_str = f"⚠️ {outliers}" if outliers > 0 else f"{outliers}" # Format outlier frames (show first 5) outlier_frames = "" if stats.outlier_frames: frames = sorted(stats.outlier_frames[:5]) outlier_frames = ", ".join(map(str, frames)) if len(stats.outlier_frames) > 5: outlier_frames += f", +{len(stats.outlier_frames) - 5}" lines.append( f"| `{frame_type}` | {stats.count:,} | {percentage:.1f}% | {delta_t} | {sigma} | {outlier_str} | {outlier_frames} |" ) lines.extend(["", ""]) return lines def _generate_timing_analysis_section(self, flow: FlowStats) -> List[str]: """Generate timing analysis section""" lines = [ "#### ⏱️ Timing Analysis", "" ] if len(flow.inter_arrival_times) < 2: lines.extend([ "*Insufficient timing data for analysis*", "" ]) return lines # Overall timing metrics avg_ms = flow.avg_inter_arrival * 1000 std_ms = flow.std_inter_arrival * 1000 jitter_ms = flow.jitter * 1000 outlier_pct = len(flow.outlier_frames) / flow.frame_count * 100 if flow.frame_count > 0 else 0 lines.extend([ "| Timing Metric | Value |", "|---------------|-------|", f"| **Average Inter-arrival** | {avg_ms:.2f}ms |", f"| **Standard Deviation** | {std_ms:.2f}ms |", f"| **Jitter** | {jitter_ms:.2f}ms |", f"| **Outlier Percentage** | {outlier_pct:.1f}% |", f"| **Total Outliers** | {len(flow.outlier_frames)} |", "" ]) # Outlier Frame Details if flow.outlier_frames: lines.extend([ "##### 🚨 Outlier Frames", "", f"**Frame Numbers:** {', '.join(map(str, sorted(flow.outlier_frames)))}", "" ]) if flow.outlier_details: lines.extend([ "| Frame # | Inter-arrival Time | Deviation |", "|---------|-------------------|-----------|" ]) # Show up to 20 outliers in detail for frame_num, inter_time in sorted(flow.outlier_details[:20]): deviation = (inter_time - flow.avg_inter_arrival) / flow.std_inter_arrival if flow.std_inter_arrival > 0 else 0 lines.append( f"| {frame_num} | {inter_time * 1000:.3f}ms | {deviation:.1f}σ |" ) if len(flow.outlier_details) > 20: lines.append(f"| ... | +{len(flow.outlier_details) - 20} more | ... |") lines.append("") # Timing Quality Assessment if outlier_pct < 1: timing_assessment = "🟢 **Excellent** - Very stable timing" elif outlier_pct < 5: timing_assessment = "🟡 **Good** - Minor timing variations" elif outlier_pct < 10: timing_assessment = "🟠 **Fair** - Noticeable timing issues" else: timing_assessment = "🔴 **Poor** - Significant timing problems" lines.extend([ f"**Timing Quality:** {timing_assessment}", "" ]) return lines def _generate_statistics_summary(self, flows: List[FlowStats]) -> List[str]: """Generate overall statistics summary""" if not flows: return [] # Calculate aggregate statistics total_packets = sum(flow.frame_count for flow in flows) total_bytes = sum(flow.total_bytes for flow in flows) total_outliers = sum(len(flow.outlier_frames) for flow in flows) # Protocol distribution protocol_counts = {} for flow in flows: proto = flow.transport_protocol protocol_counts[proto] = protocol_counts.get(proto, 0) + 1 # Enhanced protocol distribution enhanced_types = {} for flow in flows: if flow.enhanced_analysis.decoder_type != "Standard": enhanced_types[flow.enhanced_analysis.decoder_type] = enhanced_types.get(flow.enhanced_analysis.decoder_type, 0) + 1 lines = [ "---", "", "## 📈 Statistical Summary", "", "### Protocol Distribution", "", "| Protocol | Flows | Percentage |", "|----------|-------|------------|" ] for protocol, count in sorted(protocol_counts.items(), key=lambda x: x[1], reverse=True): percentage = count / len(flows) * 100 lines.append(f"| {protocol} | {count} | {percentage:.1f}% |") if enhanced_types: lines.extend([ "", "### Enhanced Protocol Analysis", "", "| Enhanced Type | Flows | Percentage |", "|---------------|-------|------------|" ]) for enhanced_type, count in sorted(enhanced_types.items(), key=lambda x: x[1], reverse=True): percentage = count / len(flows) * 100 lines.append(f"| {enhanced_type} | {count} | {percentage:.1f}% |") lines.extend([ "", "### Overall Metrics", "", f"- **Total Analysis Duration:** {max(f.last_seen for f in flows if f.last_seen > 0) - min(f.first_seen for f in flows if f.first_seen > 0):.2f}s", f"- **Average Packets per Flow:** {total_packets / len(flows):.1f}", f"- **Average Bytes per Flow:** {self._format_bytes(total_bytes // len(flows))}", f"- **Overall Outlier Rate:** {total_outliers / total_packets * 100:.2f}%", "", "---", "", "*Report generated by StreamLens Network Analysis Tool*" ]) return lines def _get_flow_status_emoji(self, flow: FlowStats) -> str: """Get emoji for flow status""" if flow.enhanced_analysis.decoder_type != "Standard": return "🔬" # Enhanced elif len(flow.outlier_frames) > flow.frame_count * 0.1: return "⚠️" # Alert elif len(flow.outlier_frames) > 0: return "⚡" # Warning else: return "✅" # Normal def _get_quality_score(self, flow: FlowStats) -> int: """Calculate quality score for flow""" if flow.enhanced_analysis.decoder_type != "Standard": return int(flow.enhanced_analysis.avg_frame_quality) else: # Base quality on outlier percentage outlier_pct = len(flow.outlier_frames) / flow.frame_count * 100 if flow.frame_count > 0 else 0 return max(0, int(100 - outlier_pct * 10)) def _format_bytes(self, bytes_count: int) -> str: """Format byte count with units""" if bytes_count >= 1_000_000_000: return f"{bytes_count / 1_000_000_000:.2f} GB" elif bytes_count >= 1_000_000: return f"{bytes_count / 1_000_000:.2f} MB" elif bytes_count >= 1_000: return f"{bytes_count / 1_000:.2f} KB" else: return f"{bytes_count} B" def _format_timestamp(self, timestamp: float) -> str: """Format timestamp for display""" if timestamp == 0: return "N/A" dt = datetime.datetime.fromtimestamp(timestamp) return dt.strftime("%H:%M:%S.%f")[:-3]