Files
StreamLens/debug_outlier_detection.py

105 lines
4.5 KiB
Python
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#!/usr/bin/env python3
"""Debug outlier detection for specific flow"""
import sys
sys.path.append('.')
from analyzer.analysis import EthernetAnalyzer
from analyzer.utils import PCAPLoader
import statistics
def analyze_flow_timing(pcap_file, src_ip="192.168.4.89"):
"""Analyze timing for a specific flow"""
# Create analyzer
analyzer = EthernetAnalyzer(outlier_threshold_sigma=3.0)
# Load PCAP
loader = PCAPLoader(pcap_file)
packets = loader.load_all()
print(f"Loaded {len(packets)} packets from {pcap_file}")
# Process packets
for i, packet in enumerate(packets, 1):
analyzer._process_single_packet(packet, i)
# Calculate statistics
analyzer.calculate_statistics()
# Find the specific flow
target_flow = None
for flow_key, flow in analyzer.flows.items():
if flow.src_ip == src_ip:
target_flow = flow
print(f"\nFound flow: {flow.src_ip}:{flow.src_port} -> {flow.dst_ip}:{flow.dst_port}")
break
if not target_flow:
print(f"Flow from {src_ip} not found!")
return
print(f"Total packets in flow: {target_flow.frame_count}")
print(f"Total outliers detected: {len(target_flow.outlier_frames)}")
print(f"Outlier frames: {target_flow.outlier_frames}")
# Analyze timing around problematic frames
problematic_frames = [1576, 1582, 1634, 1640]
print("\n=== Timing Analysis ===")
print(f"Average inter-arrival: {target_flow.avg_inter_arrival * 1000:.3f} ms")
print(f"Std deviation: {target_flow.std_inter_arrival * 1000:.3f} ms")
print(f"Outlier threshold (3σ): {(target_flow.avg_inter_arrival + 3 * target_flow.std_inter_arrival) * 1000:.3f} ms")
# Check timing for specific frames
print("\n=== Problematic Frame Analysis ===")
for frame_idx in problematic_frames:
if frame_idx <= len(target_flow.frame_numbers):
# Find the frame in the flow
try:
flow_idx = target_flow.frame_numbers.index(frame_idx)
if flow_idx > 0 and flow_idx < len(target_flow.inter_arrival_times) + 1:
# Inter-arrival time is between frame i-1 and i
inter_time = target_flow.inter_arrival_times[flow_idx - 1]
timestamp = target_flow.timestamps[flow_idx]
prev_timestamp = target_flow.timestamps[flow_idx - 1]
# Calculate deviation
deviation = (inter_time - target_flow.avg_inter_arrival) / target_flow.std_inter_arrival if target_flow.std_inter_arrival > 0 else 0
print(f"\nFrame {frame_idx}:")
print(f" Timestamp: {timestamp:.6f}")
print(f" Prev timestamp: {prev_timestamp:.6f}")
print(f" Inter-arrival: {inter_time * 1000:.3f} ms")
print(f" Deviation: {deviation:.2f}σ")
print(f" Is outlier: {frame_idx in target_flow.outlier_frames}")
except ValueError:
print(f"\nFrame {frame_idx} not found in flow")
# Show inter-arrival time distribution
print("\n=== Inter-arrival Time Distribution ===")
if target_flow.inter_arrival_times:
times_ms = [t * 1000 for t in target_flow.inter_arrival_times]
print(f"Min: {min(times_ms):.3f} ms")
print(f"Max: {max(times_ms):.3f} ms")
print(f"Median: {statistics.median(times_ms):.3f} ms")
# Show percentiles
sorted_times = sorted(times_ms)
n = len(sorted_times)
print(f"90th percentile: {sorted_times[int(n * 0.9)]:.3f} ms")
print(f"95th percentile: {sorted_times[int(n * 0.95)]:.3f} ms")
print(f"99th percentile: {sorted_times[int(n * 0.99)]:.3f} ms")
# Debug: Show first 20 inter-arrival times
print("\n=== First 20 Inter-arrival Times ===")
for i, (frame_num, inter_time) in enumerate(zip(target_flow.frame_numbers[1:21], target_flow.inter_arrival_times[:20])):
deviation = (inter_time - target_flow.avg_inter_arrival) / target_flow.std_inter_arrival if target_flow.std_inter_arrival > 0 else 0
outlier_mark = " *OUTLIER*" if frame_num in target_flow.outlier_frames else ""
print(f"Frame {frame_num}: {inter_time * 1000:.3f} ms ({deviation:.2f}σ){outlier_mark}")
if __name__ == "__main__":
if len(sys.argv) > 1:
analyze_flow_timing(sys.argv[1])
else:
# Default to the problematic file
analyze_flow_timing("1 PTPGM.pcapng")