393 lines
15 KiB
Python
393 lines
15 KiB
Python
"""
|
||
Flow Analysis Report Generator
|
||
Generates comprehensive flow analysis reports with markup formatting
|
||
"""
|
||
|
||
import datetime
|
||
from typing import Dict, List, Optional
|
||
from pathlib import Path
|
||
from ..models import FlowStats, FrameTypeStats
|
||
|
||
|
||
class FlowReportGenerator:
|
||
"""Generate comprehensive flow analysis reports"""
|
||
|
||
def __init__(self, analyzer):
|
||
self.analyzer = analyzer
|
||
|
||
def generate_report(self, output_path: Optional[str] = None, format_type: str = "markdown") -> str:
|
||
"""Generate comprehensive flow analysis report"""
|
||
if format_type == "markdown":
|
||
return self._generate_markdown_report(output_path)
|
||
elif format_type == "html":
|
||
return self._generate_html_report(output_path)
|
||
else:
|
||
return self._generate_text_report(output_path)
|
||
|
||
def _generate_markdown_report(self, output_path: Optional[str] = None) -> str:
|
||
"""Generate markdown-formatted report"""
|
||
flows = list(self.analyzer.flows.values())
|
||
|
||
# Sort flows by importance (enhanced first, then by packet count)
|
||
flows.sort(key=lambda x: (
|
||
x.enhanced_analysis.decoder_type != "Standard",
|
||
len(x.outlier_frames),
|
||
x.frame_count
|
||
), reverse=True)
|
||
|
||
report_lines = []
|
||
|
||
# Header
|
||
timestamp = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")
|
||
report_lines.extend([
|
||
"# StreamLens Flow Analysis Report",
|
||
f"**Generated:** {timestamp}",
|
||
f"**Total Flows:** {len(flows)}",
|
||
f"**Analysis Engine:** {self.analyzer.__class__.__name__}",
|
||
"",
|
||
"---",
|
||
""
|
||
])
|
||
|
||
# Executive Summary
|
||
report_lines.extend(self._generate_executive_summary(flows))
|
||
|
||
# Detailed Flow Analysis
|
||
report_lines.extend([
|
||
"## 📊 Detailed Flow Analysis",
|
||
""
|
||
])
|
||
|
||
for i, flow in enumerate(flows, 1):
|
||
report_lines.extend(self._generate_flow_section(flow, i))
|
||
|
||
# Statistics Summary
|
||
report_lines.extend(self._generate_statistics_summary(flows))
|
||
|
||
report_content = "\n".join(report_lines)
|
||
|
||
# Save to file if path provided
|
||
if output_path:
|
||
output_file = Path(output_path)
|
||
output_file.write_text(report_content, encoding='utf-8')
|
||
|
||
return report_content
|
||
|
||
def _generate_executive_summary(self, flows: List[FlowStats]) -> List[str]:
|
||
"""Generate executive summary section"""
|
||
total_packets = sum(flow.frame_count for flow in flows)
|
||
total_bytes = sum(flow.total_bytes for flow in flows)
|
||
enhanced_flows = [f for f in flows if f.enhanced_analysis.decoder_type != "Standard"]
|
||
high_outlier_flows = [f for f in flows if len(f.outlier_frames) > f.frame_count * 0.1]
|
||
|
||
return [
|
||
"## 📋 Executive Summary",
|
||
"",
|
||
f"- **Total Network Flows:** {len(flows)}",
|
||
f"- **Total Packets Analyzed:** {total_packets:,}",
|
||
f"- **Total Data Volume:** {self._format_bytes(total_bytes)}",
|
||
f"- **Enhanced Protocol Flows:** {len(enhanced_flows)} ({len(enhanced_flows)/len(flows)*100:.1f}%)",
|
||
f"- **Flows with Timing Issues:** {len(high_outlier_flows)} ({len(high_outlier_flows)/len(flows)*100:.1f}%)",
|
||
"",
|
||
"### 🎯 Key Findings",
|
||
""
|
||
]
|
||
|
||
def _generate_flow_section(self, flow: FlowStats, flow_num: int) -> List[str]:
|
||
"""Generate detailed section for a single flow"""
|
||
lines = []
|
||
|
||
# Flow Header
|
||
status_emoji = self._get_flow_status_emoji(flow)
|
||
quality_score = self._get_quality_score(flow)
|
||
|
||
lines.extend([
|
||
f"### {status_emoji} Flow #{flow_num}: {flow.src_ip}:{flow.src_port} → {flow.dst_ip}:{flow.dst_port}",
|
||
""
|
||
])
|
||
|
||
# Basic Information Table
|
||
lines.extend([
|
||
"| Attribute | Value |",
|
||
"|-----------|-------|",
|
||
f"| **Protocol** | {flow.transport_protocol} |",
|
||
f"| **Classification** | {flow.traffic_classification} |",
|
||
f"| **Packets** | {flow.frame_count:,} |",
|
||
f"| **Volume** | {self._format_bytes(flow.total_bytes)} |",
|
||
f"| **Quality Score** | {quality_score}% |",
|
||
f"| **Duration** | {flow.duration:.2f}s |",
|
||
f"| **First Seen** | {self._format_timestamp(flow.first_seen)} |",
|
||
f"| **Last Seen** | {self._format_timestamp(flow.last_seen)} |",
|
||
""
|
||
])
|
||
|
||
# Enhanced Analysis (if available)
|
||
if flow.enhanced_analysis.decoder_type != "Standard":
|
||
lines.extend(self._generate_enhanced_analysis_section(flow))
|
||
|
||
# Frame Type Breakdown
|
||
if flow.frame_types:
|
||
lines.extend(self._generate_frame_types_section(flow))
|
||
|
||
# Timing Analysis
|
||
lines.extend(self._generate_timing_analysis_section(flow))
|
||
|
||
lines.append("")
|
||
return lines
|
||
|
||
def _generate_enhanced_analysis_section(self, flow: FlowStats) -> List[str]:
|
||
"""Generate enhanced analysis section"""
|
||
ea = flow.enhanced_analysis
|
||
|
||
lines = [
|
||
"#### 🔬 Enhanced Protocol Analysis",
|
||
"",
|
||
"| Metric | Value |",
|
||
"|--------|-------|",
|
||
f"| **Decoder Type** | {ea.decoder_type} |",
|
||
f"| **Frame Quality** | {ea.avg_frame_quality:.1f}% |",
|
||
f"| **Field Count** | {ea.field_count} |",
|
||
f"| **Timing Accuracy** | {ea.timing_accuracy:.1f}% |",
|
||
f"| **Signal Quality** | {ea.signal_quality:.1f}% |"
|
||
]
|
||
|
||
if ea.decoder_type.startswith("Chapter10"):
|
||
lines.extend([
|
||
f"| **Channel Count** | {ea.channel_count} |",
|
||
f"| **Analog Channels** | {ea.analog_channels} |",
|
||
f"| **PCM Channels** | {ea.pcm_channels} |",
|
||
f"| **TMATS Frames** | {ea.tmats_frames} |",
|
||
f"| **Clock Drift** | {ea.avg_clock_drift_ppm:.2f} ppm |",
|
||
f"| **Timing Quality** | {ea.timing_quality} |"
|
||
])
|
||
|
||
lines.extend(["", ""])
|
||
return lines
|
||
|
||
def _generate_frame_types_section(self, flow: FlowStats) -> List[str]:
|
||
"""Generate frame types breakdown section"""
|
||
lines = [
|
||
"#### 📦 Frame Type Analysis",
|
||
"",
|
||
"| Frame Type | Count | % | Avg ΔT | Std σ | Outliers | Outlier Frames |",
|
||
"|------------|-------|---|---------|--------|----------|----------------|"
|
||
]
|
||
|
||
# Sort frame types by count
|
||
sorted_types = sorted(
|
||
flow.frame_types.items(),
|
||
key=lambda x: x[1].count,
|
||
reverse=True
|
||
)
|
||
|
||
total_count = flow.frame_count
|
||
for frame_type, stats in sorted_types:
|
||
percentage = (stats.count / total_count * 100) if total_count > 0 else 0
|
||
|
||
# Format timing values
|
||
delta_t = ""
|
||
if stats.avg_inter_arrival > 0:
|
||
dt_ms = stats.avg_inter_arrival * 1000
|
||
delta_t = f"{dt_ms:.1f}ms" if dt_ms < 1000 else f"{dt_ms/1000:.1f}s"
|
||
|
||
sigma = ""
|
||
if stats.std_inter_arrival > 0:
|
||
sig_ms = stats.std_inter_arrival * 1000
|
||
sigma = f"{sig_ms:.1f}ms" if sig_ms < 1000 else f"{sig_ms/1000:.1f}s"
|
||
|
||
outliers = len(stats.outlier_frames)
|
||
outlier_str = f"⚠️ {outliers}" if outliers > 0 else f"{outliers}"
|
||
|
||
# Format outlier frames (show first 5)
|
||
outlier_frames = ""
|
||
if stats.outlier_frames:
|
||
frames = sorted(stats.outlier_frames[:5])
|
||
outlier_frames = ", ".join(map(str, frames))
|
||
if len(stats.outlier_frames) > 5:
|
||
outlier_frames += f", +{len(stats.outlier_frames) - 5}"
|
||
|
||
lines.append(
|
||
f"| `{frame_type}` | {stats.count:,} | {percentage:.1f}% | {delta_t} | {sigma} | {outlier_str} | {outlier_frames} |"
|
||
)
|
||
|
||
lines.extend(["", ""])
|
||
return lines
|
||
|
||
def _generate_timing_analysis_section(self, flow: FlowStats) -> List[str]:
|
||
"""Generate timing analysis section"""
|
||
lines = [
|
||
"#### ⏱️ Timing Analysis",
|
||
""
|
||
]
|
||
|
||
if len(flow.inter_arrival_times) < 2:
|
||
lines.extend([
|
||
"*Insufficient timing data for analysis*",
|
||
""
|
||
])
|
||
return lines
|
||
|
||
# Overall timing metrics
|
||
avg_ms = flow.avg_inter_arrival * 1000
|
||
std_ms = flow.std_inter_arrival * 1000
|
||
jitter_ms = flow.jitter * 1000
|
||
outlier_pct = len(flow.outlier_frames) / flow.frame_count * 100 if flow.frame_count > 0 else 0
|
||
|
||
lines.extend([
|
||
"| Timing Metric | Value |",
|
||
"|---------------|-------|",
|
||
f"| **Average Inter-arrival** | {avg_ms:.2f}ms |",
|
||
f"| **Standard Deviation** | {std_ms:.2f}ms |",
|
||
f"| **Jitter** | {jitter_ms:.2f}ms |",
|
||
f"| **Outlier Percentage** | {outlier_pct:.1f}% |",
|
||
f"| **Total Outliers** | {len(flow.outlier_frames)} |",
|
||
""
|
||
])
|
||
|
||
# Outlier Frame Details
|
||
if flow.outlier_frames:
|
||
lines.extend([
|
||
"##### 🚨 Outlier Frames",
|
||
"",
|
||
f"**Frame Numbers:** {', '.join(map(str, sorted(flow.outlier_frames)))}",
|
||
""
|
||
])
|
||
|
||
if flow.outlier_details:
|
||
lines.extend([
|
||
"| Frame # | Inter-arrival Time | Deviation |",
|
||
"|---------|-------------------|-----------|"
|
||
])
|
||
|
||
# Show up to 20 outliers in detail
|
||
for frame_num, inter_time in sorted(flow.outlier_details[:20]):
|
||
deviation = (inter_time - flow.avg_inter_arrival) / flow.std_inter_arrival if flow.std_inter_arrival > 0 else 0
|
||
lines.append(
|
||
f"| {frame_num} | {inter_time * 1000:.3f}ms | {deviation:.1f}σ |"
|
||
)
|
||
|
||
if len(flow.outlier_details) > 20:
|
||
lines.append(f"| ... | +{len(flow.outlier_details) - 20} more | ... |")
|
||
|
||
lines.append("")
|
||
|
||
# Timing Quality Assessment
|
||
if outlier_pct < 1:
|
||
timing_assessment = "🟢 **Excellent** - Very stable timing"
|
||
elif outlier_pct < 5:
|
||
timing_assessment = "🟡 **Good** - Minor timing variations"
|
||
elif outlier_pct < 10:
|
||
timing_assessment = "🟠 **Fair** - Noticeable timing issues"
|
||
else:
|
||
timing_assessment = "🔴 **Poor** - Significant timing problems"
|
||
|
||
lines.extend([
|
||
f"**Timing Quality:** {timing_assessment}",
|
||
""
|
||
])
|
||
|
||
return lines
|
||
|
||
def _generate_statistics_summary(self, flows: List[FlowStats]) -> List[str]:
|
||
"""Generate overall statistics summary"""
|
||
if not flows:
|
||
return []
|
||
|
||
# Calculate aggregate statistics
|
||
total_packets = sum(flow.frame_count for flow in flows)
|
||
total_bytes = sum(flow.total_bytes for flow in flows)
|
||
total_outliers = sum(len(flow.outlier_frames) for flow in flows)
|
||
|
||
# Protocol distribution
|
||
protocol_counts = {}
|
||
for flow in flows:
|
||
proto = flow.transport_protocol
|
||
protocol_counts[proto] = protocol_counts.get(proto, 0) + 1
|
||
|
||
# Enhanced protocol distribution
|
||
enhanced_types = {}
|
||
for flow in flows:
|
||
if flow.enhanced_analysis.decoder_type != "Standard":
|
||
enhanced_types[flow.enhanced_analysis.decoder_type] = enhanced_types.get(flow.enhanced_analysis.decoder_type, 0) + 1
|
||
|
||
lines = [
|
||
"---",
|
||
"",
|
||
"## 📈 Statistical Summary",
|
||
"",
|
||
"### Protocol Distribution",
|
||
"",
|
||
"| Protocol | Flows | Percentage |",
|
||
"|----------|-------|------------|"
|
||
]
|
||
|
||
for protocol, count in sorted(protocol_counts.items(), key=lambda x: x[1], reverse=True):
|
||
percentage = count / len(flows) * 100
|
||
lines.append(f"| {protocol} | {count} | {percentage:.1f}% |")
|
||
|
||
if enhanced_types:
|
||
lines.extend([
|
||
"",
|
||
"### Enhanced Protocol Analysis",
|
||
"",
|
||
"| Enhanced Type | Flows | Percentage |",
|
||
"|---------------|-------|------------|"
|
||
])
|
||
|
||
for enhanced_type, count in sorted(enhanced_types.items(), key=lambda x: x[1], reverse=True):
|
||
percentage = count / len(flows) * 100
|
||
lines.append(f"| {enhanced_type} | {count} | {percentage:.1f}% |")
|
||
|
||
lines.extend([
|
||
"",
|
||
"### Overall Metrics",
|
||
"",
|
||
f"- **Total Analysis Duration:** {max(f.last_seen for f in flows if f.last_seen > 0) - min(f.first_seen for f in flows if f.first_seen > 0):.2f}s",
|
||
f"- **Average Packets per Flow:** {total_packets / len(flows):.1f}",
|
||
f"- **Average Bytes per Flow:** {self._format_bytes(total_bytes // len(flows))}",
|
||
f"- **Overall Outlier Rate:** {total_outliers / total_packets * 100:.2f}%",
|
||
"",
|
||
"---",
|
||
"",
|
||
"*Report generated by StreamLens Network Analysis Tool*"
|
||
])
|
||
|
||
return lines
|
||
|
||
def _get_flow_status_emoji(self, flow: FlowStats) -> str:
|
||
"""Get emoji for flow status"""
|
||
if flow.enhanced_analysis.decoder_type != "Standard":
|
||
return "🔬" # Enhanced
|
||
elif len(flow.outlier_frames) > flow.frame_count * 0.1:
|
||
return "⚠️" # Alert
|
||
elif len(flow.outlier_frames) > 0:
|
||
return "⚡" # Warning
|
||
else:
|
||
return "✅" # Normal
|
||
|
||
def _get_quality_score(self, flow: FlowStats) -> int:
|
||
"""Calculate quality score for flow"""
|
||
if flow.enhanced_analysis.decoder_type != "Standard":
|
||
return int(flow.enhanced_analysis.avg_frame_quality)
|
||
else:
|
||
# Base quality on outlier percentage
|
||
outlier_pct = len(flow.outlier_frames) / flow.frame_count * 100 if flow.frame_count > 0 else 0
|
||
return max(0, int(100 - outlier_pct * 10))
|
||
|
||
def _format_bytes(self, bytes_count: int) -> str:
|
||
"""Format byte count with units"""
|
||
if bytes_count >= 1_000_000_000:
|
||
return f"{bytes_count / 1_000_000_000:.2f} GB"
|
||
elif bytes_count >= 1_000_000:
|
||
return f"{bytes_count / 1_000_000:.2f} MB"
|
||
elif bytes_count >= 1_000:
|
||
return f"{bytes_count / 1_000:.2f} KB"
|
||
else:
|
||
return f"{bytes_count} B"
|
||
|
||
def _format_timestamp(self, timestamp: float) -> str:
|
||
"""Format timestamp for display"""
|
||
if timestamp == 0:
|
||
return "N/A"
|
||
dt = datetime.datetime.fromtimestamp(timestamp)
|
||
return dt.strftime("%H:%M:%S.%f")[:-3] |