Files
StreamLens/analyzer/reporting/flow_report.py

393 lines
15 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

"""
Flow Analysis Report Generator
Generates comprehensive flow analysis reports with markup formatting
"""
import datetime
from typing import Dict, List, Optional
from pathlib import Path
from ..models import FlowStats, FrameTypeStats
class FlowReportGenerator:
"""Generate comprehensive flow analysis reports"""
def __init__(self, analyzer):
self.analyzer = analyzer
def generate_report(self, output_path: Optional[str] = None, format_type: str = "markdown") -> str:
"""Generate comprehensive flow analysis report"""
if format_type == "markdown":
return self._generate_markdown_report(output_path)
elif format_type == "html":
return self._generate_html_report(output_path)
else:
return self._generate_text_report(output_path)
def _generate_markdown_report(self, output_path: Optional[str] = None) -> str:
"""Generate markdown-formatted report"""
flows = list(self.analyzer.flows.values())
# Sort flows by importance (enhanced first, then by packet count)
flows.sort(key=lambda x: (
x.enhanced_analysis.decoder_type != "Standard",
len(x.outlier_frames),
x.frame_count
), reverse=True)
report_lines = []
# Header
timestamp = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")
report_lines.extend([
"# StreamLens Flow Analysis Report",
f"**Generated:** {timestamp}",
f"**Total Flows:** {len(flows)}",
f"**Analysis Engine:** {self.analyzer.__class__.__name__}",
"",
"---",
""
])
# Executive Summary
report_lines.extend(self._generate_executive_summary(flows))
# Detailed Flow Analysis
report_lines.extend([
"## 📊 Detailed Flow Analysis",
""
])
for i, flow in enumerate(flows, 1):
report_lines.extend(self._generate_flow_section(flow, i))
# Statistics Summary
report_lines.extend(self._generate_statistics_summary(flows))
report_content = "\n".join(report_lines)
# Save to file if path provided
if output_path:
output_file = Path(output_path)
output_file.write_text(report_content, encoding='utf-8')
return report_content
def _generate_executive_summary(self, flows: List[FlowStats]) -> List[str]:
"""Generate executive summary section"""
total_packets = sum(flow.frame_count for flow in flows)
total_bytes = sum(flow.total_bytes for flow in flows)
enhanced_flows = [f for f in flows if f.enhanced_analysis.decoder_type != "Standard"]
high_outlier_flows = [f for f in flows if len(f.outlier_frames) > f.frame_count * 0.1]
return [
"## 📋 Executive Summary",
"",
f"- **Total Network Flows:** {len(flows)}",
f"- **Total Packets Analyzed:** {total_packets:,}",
f"- **Total Data Volume:** {self._format_bytes(total_bytes)}",
f"- **Enhanced Protocol Flows:** {len(enhanced_flows)} ({len(enhanced_flows)/len(flows)*100:.1f}%)",
f"- **Flows with Timing Issues:** {len(high_outlier_flows)} ({len(high_outlier_flows)/len(flows)*100:.1f}%)",
"",
"### 🎯 Key Findings",
""
]
def _generate_flow_section(self, flow: FlowStats, flow_num: int) -> List[str]:
"""Generate detailed section for a single flow"""
lines = []
# Flow Header
status_emoji = self._get_flow_status_emoji(flow)
quality_score = self._get_quality_score(flow)
lines.extend([
f"### {status_emoji} Flow #{flow_num}: {flow.src_ip}:{flow.src_port}{flow.dst_ip}:{flow.dst_port}",
""
])
# Basic Information Table
lines.extend([
"| Attribute | Value |",
"|-----------|-------|",
f"| **Protocol** | {flow.transport_protocol} |",
f"| **Classification** | {flow.traffic_classification} |",
f"| **Packets** | {flow.frame_count:,} |",
f"| **Volume** | {self._format_bytes(flow.total_bytes)} |",
f"| **Quality Score** | {quality_score}% |",
f"| **Duration** | {flow.duration:.2f}s |",
f"| **First Seen** | {self._format_timestamp(flow.first_seen)} |",
f"| **Last Seen** | {self._format_timestamp(flow.last_seen)} |",
""
])
# Enhanced Analysis (if available)
if flow.enhanced_analysis.decoder_type != "Standard":
lines.extend(self._generate_enhanced_analysis_section(flow))
# Frame Type Breakdown
if flow.frame_types:
lines.extend(self._generate_frame_types_section(flow))
# Timing Analysis
lines.extend(self._generate_timing_analysis_section(flow))
lines.append("")
return lines
def _generate_enhanced_analysis_section(self, flow: FlowStats) -> List[str]:
"""Generate enhanced analysis section"""
ea = flow.enhanced_analysis
lines = [
"#### 🔬 Enhanced Protocol Analysis",
"",
"| Metric | Value |",
"|--------|-------|",
f"| **Decoder Type** | {ea.decoder_type} |",
f"| **Frame Quality** | {ea.avg_frame_quality:.1f}% |",
f"| **Field Count** | {ea.field_count} |",
f"| **Timing Accuracy** | {ea.timing_accuracy:.1f}% |",
f"| **Signal Quality** | {ea.signal_quality:.1f}% |"
]
if ea.decoder_type.startswith("Chapter10"):
lines.extend([
f"| **Channel Count** | {ea.channel_count} |",
f"| **Analog Channels** | {ea.analog_channels} |",
f"| **PCM Channels** | {ea.pcm_channels} |",
f"| **TMATS Frames** | {ea.tmats_frames} |",
f"| **Clock Drift** | {ea.avg_clock_drift_ppm:.2f} ppm |",
f"| **Timing Quality** | {ea.timing_quality} |"
])
lines.extend(["", ""])
return lines
def _generate_frame_types_section(self, flow: FlowStats) -> List[str]:
"""Generate frame types breakdown section"""
lines = [
"#### 📦 Frame Type Analysis",
"",
"| Frame Type | Count | % | Avg ΔT | Std σ | Outliers | Outlier Frames |",
"|------------|-------|---|---------|--------|----------|----------------|"
]
# Sort frame types by count
sorted_types = sorted(
flow.frame_types.items(),
key=lambda x: x[1].count,
reverse=True
)
total_count = flow.frame_count
for frame_type, stats in sorted_types:
percentage = (stats.count / total_count * 100) if total_count > 0 else 0
# Format timing values
delta_t = ""
if stats.avg_inter_arrival > 0:
dt_ms = stats.avg_inter_arrival * 1000
delta_t = f"{dt_ms:.1f}ms" if dt_ms < 1000 else f"{dt_ms/1000:.1f}s"
sigma = ""
if stats.std_inter_arrival > 0:
sig_ms = stats.std_inter_arrival * 1000
sigma = f"{sig_ms:.1f}ms" if sig_ms < 1000 else f"{sig_ms/1000:.1f}s"
outliers = len(stats.outlier_frames)
outlier_str = f"⚠️ {outliers}" if outliers > 0 else f"{outliers}"
# Format outlier frames (show first 5)
outlier_frames = ""
if stats.outlier_frames:
frames = sorted(stats.outlier_frames[:5])
outlier_frames = ", ".join(map(str, frames))
if len(stats.outlier_frames) > 5:
outlier_frames += f", +{len(stats.outlier_frames) - 5}"
lines.append(
f"| `{frame_type}` | {stats.count:,} | {percentage:.1f}% | {delta_t} | {sigma} | {outlier_str} | {outlier_frames} |"
)
lines.extend(["", ""])
return lines
def _generate_timing_analysis_section(self, flow: FlowStats) -> List[str]:
"""Generate timing analysis section"""
lines = [
"#### ⏱️ Timing Analysis",
""
]
if len(flow.inter_arrival_times) < 2:
lines.extend([
"*Insufficient timing data for analysis*",
""
])
return lines
# Overall timing metrics
avg_ms = flow.avg_inter_arrival * 1000
std_ms = flow.std_inter_arrival * 1000
jitter_ms = flow.jitter * 1000
outlier_pct = len(flow.outlier_frames) / flow.frame_count * 100 if flow.frame_count > 0 else 0
lines.extend([
"| Timing Metric | Value |",
"|---------------|-------|",
f"| **Average Inter-arrival** | {avg_ms:.2f}ms |",
f"| **Standard Deviation** | {std_ms:.2f}ms |",
f"| **Jitter** | {jitter_ms:.2f}ms |",
f"| **Outlier Percentage** | {outlier_pct:.1f}% |",
f"| **Total Outliers** | {len(flow.outlier_frames)} |",
""
])
# Outlier Frame Details
if flow.outlier_frames:
lines.extend([
"##### 🚨 Outlier Frames",
"",
f"**Frame Numbers:** {', '.join(map(str, sorted(flow.outlier_frames)))}",
""
])
if flow.outlier_details:
lines.extend([
"| Frame # | Inter-arrival Time | Deviation |",
"|---------|-------------------|-----------|"
])
# Show up to 20 outliers in detail
for frame_num, inter_time in sorted(flow.outlier_details[:20]):
deviation = (inter_time - flow.avg_inter_arrival) / flow.std_inter_arrival if flow.std_inter_arrival > 0 else 0
lines.append(
f"| {frame_num} | {inter_time * 1000:.3f}ms | {deviation:.1f}σ |"
)
if len(flow.outlier_details) > 20:
lines.append(f"| ... | +{len(flow.outlier_details) - 20} more | ... |")
lines.append("")
# Timing Quality Assessment
if outlier_pct < 1:
timing_assessment = "🟢 **Excellent** - Very stable timing"
elif outlier_pct < 5:
timing_assessment = "🟡 **Good** - Minor timing variations"
elif outlier_pct < 10:
timing_assessment = "🟠 **Fair** - Noticeable timing issues"
else:
timing_assessment = "🔴 **Poor** - Significant timing problems"
lines.extend([
f"**Timing Quality:** {timing_assessment}",
""
])
return lines
def _generate_statistics_summary(self, flows: List[FlowStats]) -> List[str]:
"""Generate overall statistics summary"""
if not flows:
return []
# Calculate aggregate statistics
total_packets = sum(flow.frame_count for flow in flows)
total_bytes = sum(flow.total_bytes for flow in flows)
total_outliers = sum(len(flow.outlier_frames) for flow in flows)
# Protocol distribution
protocol_counts = {}
for flow in flows:
proto = flow.transport_protocol
protocol_counts[proto] = protocol_counts.get(proto, 0) + 1
# Enhanced protocol distribution
enhanced_types = {}
for flow in flows:
if flow.enhanced_analysis.decoder_type != "Standard":
enhanced_types[flow.enhanced_analysis.decoder_type] = enhanced_types.get(flow.enhanced_analysis.decoder_type, 0) + 1
lines = [
"---",
"",
"## 📈 Statistical Summary",
"",
"### Protocol Distribution",
"",
"| Protocol | Flows | Percentage |",
"|----------|-------|------------|"
]
for protocol, count in sorted(protocol_counts.items(), key=lambda x: x[1], reverse=True):
percentage = count / len(flows) * 100
lines.append(f"| {protocol} | {count} | {percentage:.1f}% |")
if enhanced_types:
lines.extend([
"",
"### Enhanced Protocol Analysis",
"",
"| Enhanced Type | Flows | Percentage |",
"|---------------|-------|------------|"
])
for enhanced_type, count in sorted(enhanced_types.items(), key=lambda x: x[1], reverse=True):
percentage = count / len(flows) * 100
lines.append(f"| {enhanced_type} | {count} | {percentage:.1f}% |")
lines.extend([
"",
"### Overall Metrics",
"",
f"- **Total Analysis Duration:** {max(f.last_seen for f in flows if f.last_seen > 0) - min(f.first_seen for f in flows if f.first_seen > 0):.2f}s",
f"- **Average Packets per Flow:** {total_packets / len(flows):.1f}",
f"- **Average Bytes per Flow:** {self._format_bytes(total_bytes // len(flows))}",
f"- **Overall Outlier Rate:** {total_outliers / total_packets * 100:.2f}%",
"",
"---",
"",
"*Report generated by StreamLens Network Analysis Tool*"
])
return lines
def _get_flow_status_emoji(self, flow: FlowStats) -> str:
"""Get emoji for flow status"""
if flow.enhanced_analysis.decoder_type != "Standard":
return "🔬" # Enhanced
elif len(flow.outlier_frames) > flow.frame_count * 0.1:
return "⚠️" # Alert
elif len(flow.outlier_frames) > 0:
return "" # Warning
else:
return "" # Normal
def _get_quality_score(self, flow: FlowStats) -> int:
"""Calculate quality score for flow"""
if flow.enhanced_analysis.decoder_type != "Standard":
return int(flow.enhanced_analysis.avg_frame_quality)
else:
# Base quality on outlier percentage
outlier_pct = len(flow.outlier_frames) / flow.frame_count * 100 if flow.frame_count > 0 else 0
return max(0, int(100 - outlier_pct * 10))
def _format_bytes(self, bytes_count: int) -> str:
"""Format byte count with units"""
if bytes_count >= 1_000_000_000:
return f"{bytes_count / 1_000_000_000:.2f} GB"
elif bytes_count >= 1_000_000:
return f"{bytes_count / 1_000_000:.2f} MB"
elif bytes_count >= 1_000:
return f"{bytes_count / 1_000:.2f} KB"
else:
return f"{bytes_count} B"
def _format_timestamp(self, timestamp: float) -> str:
"""Format timestamp for display"""
if timestamp == 0:
return "N/A"
dt = datetime.datetime.fromtimestamp(timestamp)
return dt.strftime("%H:%M:%S.%f")[:-3]