Files
StreamLens/debug_outlier_detection.py

105 lines
4.5 KiB
Python
Raw Permalink Normal View History

2025-07-30 23:48:32 -04:00
#!/usr/bin/env python3
"""Debug outlier detection for specific flow"""
import sys
sys.path.append('.')
from analyzer.analysis import EthernetAnalyzer
from analyzer.utils import PCAPLoader
import statistics
def analyze_flow_timing(pcap_file, src_ip="192.168.4.89"):
"""Analyze timing for a specific flow"""
# Create analyzer
analyzer = EthernetAnalyzer(outlier_threshold_sigma=3.0)
# Load PCAP
loader = PCAPLoader(pcap_file)
packets = loader.load_all()
print(f"Loaded {len(packets)} packets from {pcap_file}")
# Process packets
for i, packet in enumerate(packets, 1):
analyzer._process_single_packet(packet, i)
# Calculate statistics
analyzer.calculate_statistics()
# Find the specific flow
target_flow = None
for flow_key, flow in analyzer.flows.items():
if flow.src_ip == src_ip:
target_flow = flow
print(f"\nFound flow: {flow.src_ip}:{flow.src_port} -> {flow.dst_ip}:{flow.dst_port}")
break
if not target_flow:
print(f"Flow from {src_ip} not found!")
return
print(f"Total packets in flow: {target_flow.frame_count}")
print(f"Total outliers detected: {len(target_flow.outlier_frames)}")
print(f"Outlier frames: {target_flow.outlier_frames}")
# Analyze timing around problematic frames
problematic_frames = [1576, 1582, 1634, 1640]
print("\n=== Timing Analysis ===")
print(f"Average inter-arrival: {target_flow.avg_inter_arrival * 1000:.3f} ms")
print(f"Std deviation: {target_flow.std_inter_arrival * 1000:.3f} ms")
print(f"Outlier threshold (3σ): {(target_flow.avg_inter_arrival + 3 * target_flow.std_inter_arrival) * 1000:.3f} ms")
# Check timing for specific frames
print("\n=== Problematic Frame Analysis ===")
for frame_idx in problematic_frames:
if frame_idx <= len(target_flow.frame_numbers):
# Find the frame in the flow
try:
flow_idx = target_flow.frame_numbers.index(frame_idx)
if flow_idx > 0 and flow_idx < len(target_flow.inter_arrival_times) + 1:
# Inter-arrival time is between frame i-1 and i
inter_time = target_flow.inter_arrival_times[flow_idx - 1]
timestamp = target_flow.timestamps[flow_idx]
prev_timestamp = target_flow.timestamps[flow_idx - 1]
# Calculate deviation
deviation = (inter_time - target_flow.avg_inter_arrival) / target_flow.std_inter_arrival if target_flow.std_inter_arrival > 0 else 0
print(f"\nFrame {frame_idx}:")
print(f" Timestamp: {timestamp:.6f}")
print(f" Prev timestamp: {prev_timestamp:.6f}")
print(f" Inter-arrival: {inter_time * 1000:.3f} ms")
print(f" Deviation: {deviation:.2f}σ")
print(f" Is outlier: {frame_idx in target_flow.outlier_frames}")
except ValueError:
print(f"\nFrame {frame_idx} not found in flow")
# Show inter-arrival time distribution
print("\n=== Inter-arrival Time Distribution ===")
if target_flow.inter_arrival_times:
times_ms = [t * 1000 for t in target_flow.inter_arrival_times]
print(f"Min: {min(times_ms):.3f} ms")
print(f"Max: {max(times_ms):.3f} ms")
print(f"Median: {statistics.median(times_ms):.3f} ms")
# Show percentiles
sorted_times = sorted(times_ms)
n = len(sorted_times)
print(f"90th percentile: {sorted_times[int(n * 0.9)]:.3f} ms")
print(f"95th percentile: {sorted_times[int(n * 0.95)]:.3f} ms")
print(f"99th percentile: {sorted_times[int(n * 0.99)]:.3f} ms")
# Debug: Show first 20 inter-arrival times
print("\n=== First 20 Inter-arrival Times ===")
for i, (frame_num, inter_time) in enumerate(zip(target_flow.frame_numbers[1:21], target_flow.inter_arrival_times[:20])):
deviation = (inter_time - target_flow.avg_inter_arrival) / target_flow.std_inter_arrival if target_flow.std_inter_arrival > 0 else 0
outlier_mark = " *OUTLIER*" if frame_num in target_flow.outlier_frames else ""
print(f"Frame {frame_num}: {inter_time * 1000:.3f} ms ({deviation:.2f}σ){outlier_mark}")
if __name__ == "__main__":
if len(sys.argv) > 1:
analyze_flow_timing(sys.argv[1])
else:
# Default to the problematic file
analyze_flow_timing("1 PTPGM.pcapng")