#!/usr/bin/env python3 """Debug outlier detection for specific flow""" import sys sys.path.append('.') from analyzer.analysis import EthernetAnalyzer from analyzer.utils import PCAPLoader import statistics def analyze_flow_timing(pcap_file, src_ip="192.168.4.89"): """Analyze timing for a specific flow""" # Create analyzer analyzer = EthernetAnalyzer(outlier_threshold_sigma=3.0) # Load PCAP loader = PCAPLoader(pcap_file) packets = loader.load_all() print(f"Loaded {len(packets)} packets from {pcap_file}") # Process packets for i, packet in enumerate(packets, 1): analyzer._process_single_packet(packet, i) # Calculate statistics analyzer.calculate_statistics() # Find the specific flow target_flow = None for flow_key, flow in analyzer.flows.items(): if flow.src_ip == src_ip: target_flow = flow print(f"\nFound flow: {flow.src_ip}:{flow.src_port} -> {flow.dst_ip}:{flow.dst_port}") break if not target_flow: print(f"Flow from {src_ip} not found!") return print(f"Total packets in flow: {target_flow.frame_count}") print(f"Total outliers detected: {len(target_flow.outlier_frames)}") print(f"Outlier frames: {target_flow.outlier_frames}") # Analyze timing around problematic frames problematic_frames = [1576, 1582, 1634, 1640] print("\n=== Timing Analysis ===") print(f"Average inter-arrival: {target_flow.avg_inter_arrival * 1000:.3f} ms") print(f"Std deviation: {target_flow.std_inter_arrival * 1000:.3f} ms") print(f"Outlier threshold (3σ): {(target_flow.avg_inter_arrival + 3 * target_flow.std_inter_arrival) * 1000:.3f} ms") # Check timing for specific frames print("\n=== Problematic Frame Analysis ===") for frame_idx in problematic_frames: if frame_idx <= len(target_flow.frame_numbers): # Find the frame in the flow try: flow_idx = target_flow.frame_numbers.index(frame_idx) if flow_idx > 0 and flow_idx < len(target_flow.inter_arrival_times) + 1: # Inter-arrival time is between frame i-1 and i inter_time = target_flow.inter_arrival_times[flow_idx - 1] timestamp = target_flow.timestamps[flow_idx] prev_timestamp = target_flow.timestamps[flow_idx - 1] # Calculate deviation deviation = (inter_time - target_flow.avg_inter_arrival) / target_flow.std_inter_arrival if target_flow.std_inter_arrival > 0 else 0 print(f"\nFrame {frame_idx}:") print(f" Timestamp: {timestamp:.6f}") print(f" Prev timestamp: {prev_timestamp:.6f}") print(f" Inter-arrival: {inter_time * 1000:.3f} ms") print(f" Deviation: {deviation:.2f}σ") print(f" Is outlier: {frame_idx in target_flow.outlier_frames}") except ValueError: print(f"\nFrame {frame_idx} not found in flow") # Show inter-arrival time distribution print("\n=== Inter-arrival Time Distribution ===") if target_flow.inter_arrival_times: times_ms = [t * 1000 for t in target_flow.inter_arrival_times] print(f"Min: {min(times_ms):.3f} ms") print(f"Max: {max(times_ms):.3f} ms") print(f"Median: {statistics.median(times_ms):.3f} ms") # Show percentiles sorted_times = sorted(times_ms) n = len(sorted_times) print(f"90th percentile: {sorted_times[int(n * 0.9)]:.3f} ms") print(f"95th percentile: {sorted_times[int(n * 0.95)]:.3f} ms") print(f"99th percentile: {sorted_times[int(n * 0.99)]:.3f} ms") # Debug: Show first 20 inter-arrival times print("\n=== First 20 Inter-arrival Times ===") for i, (frame_num, inter_time) in enumerate(zip(target_flow.frame_numbers[1:21], target_flow.inter_arrival_times[:20])): deviation = (inter_time - target_flow.avg_inter_arrival) / target_flow.std_inter_arrival if target_flow.std_inter_arrival > 0 else 0 outlier_mark = " *OUTLIER*" if frame_num in target_flow.outlier_frames else "" print(f"Frame {frame_num}: {inter_time * 1000:.3f} ms ({deviation:.2f}σ){outlier_mark}") if __name__ == "__main__": if len(sys.argv) > 1: analyze_flow_timing(sys.argv[1]) else: # Default to the problematic file analyze_flow_timing("1 PTPGM.pcapng")