#!/usr/bin/env python3 """Debug outlier count discrepancy""" import sys sys.path.append('.') from analyzer.analysis import EthernetAnalyzer from analyzer.utils import PCAPLoader from analyzer.analysis.background_analyzer import BackgroundAnalyzer import time def debug_outliers(pcap_file, src_ip="192.168.4.89"): """Debug outlier detection differences""" print("=== METHOD 1: Direct Processing ===") # Method 1: Direct processing (like my debug script) analyzer1 = EthernetAnalyzer(outlier_threshold_sigma=3.0) loader = PCAPLoader(pcap_file) packets = loader.load_all() for i, packet in enumerate(packets, 1): analyzer1._process_single_packet(packet, i) analyzer1.calculate_statistics() # Find flow flow1 = None for flow_key, flow in analyzer1.flows.items(): if flow.src_ip == src_ip: flow1 = flow break if flow1: print(f"Flow: {flow1.src_ip}:{flow1.src_port} -> {flow1.dst_ip}:{flow1.dst_port}") print(f"Packets: {flow1.frame_count}") print(f"Outliers: {len(flow1.outlier_frames)}") print(f"Outlier frames: {sorted(flow1.outlier_frames)[:20]}") print(f"Avg ΔT: {flow1.avg_inter_arrival * 1000:.3f} ms") print(f"Std σ: {flow1.std_inter_arrival * 1000:.3f} ms") print(f"3σ threshold: {(flow1.avg_inter_arrival + 3 * flow1.std_inter_arrival) * 1000:.3f} ms") print("\n=== METHOD 2: Background Processing (TUI) ===") # Method 2: Background processing (like TUI) analyzer2 = EthernetAnalyzer(outlier_threshold_sigma=3.0) bg_analyzer = BackgroundAnalyzer(analyzer2) bg_analyzer.start_parsing(pcap_file) # Wait for completion while bg_analyzer.is_parsing: time.sleep(0.1) # Find flow flow2 = None for flow_key, flow in analyzer2.flows.items(): if flow.src_ip == src_ip: flow2 = flow break if flow2: print(f"Flow: {flow2.src_ip}:{flow2.src_port} -> {flow2.dst_ip}:{flow2.dst_port}") print(f"Packets: {flow2.frame_count}") print(f"Outliers: {len(flow2.outlier_frames)}") print(f"Outlier frames: {sorted(flow2.outlier_frames)[:20]}") print(f"Avg ΔT: {flow2.avg_inter_arrival * 1000:.3f} ms") print(f"Std σ: {flow2.std_inter_arrival * 1000:.3f} ms") print(f"3σ threshold: {(flow2.avg_inter_arrival + 3 * flow2.std_inter_arrival) * 1000:.3f} ms") # Compare results print("\n=== COMPARISON ===") if flow1 and flow2: print(f"Direct outliers: {len(flow1.outlier_frames)}") print(f"Background outliers: {len(flow2.outlier_frames)}") if len(flow1.outlier_frames) != len(flow2.outlier_frames): print("\n⚠️ OUTLIER COUNT MISMATCH!") # Find differences set1 = set(flow1.outlier_frames) set2 = set(flow2.outlier_frames) only_in_1 = set1 - set2 only_in_2 = set2 - set1 if only_in_1: print(f"Only in direct: {sorted(only_in_1)}") if only_in_2: print(f"Only in background: {sorted(only_in_2)}") # Check timing differences print("\nTiming comparison:") print(f"Direct - Avg: {flow1.avg_inter_arrival * 1000:.6f} ms, Std: {flow1.std_inter_arrival * 1000:.6f} ms") print(f"Background - Avg: {flow2.avg_inter_arrival * 1000:.6f} ms, Std: {flow2.std_inter_arrival * 1000:.6f} ms") # Check inter-arrival times length print(f"\nInter-arrival times count:") print(f"Direct: {len(flow1.inter_arrival_times)}") print(f"Background: {len(flow2.inter_arrival_times)}") # Check first few inter-arrival times print("\nFirst 10 inter-arrival times comparison:") for i in range(min(10, len(flow1.inter_arrival_times), len(flow2.inter_arrival_times))): t1 = flow1.inter_arrival_times[i] * 1000 t2 = flow2.inter_arrival_times[i] * 1000 diff = abs(t1 - t2) print(f" [{i}] Direct: {t1:.6f} ms, Background: {t2:.6f} ms, Diff: {diff:.6f} ms") else: print("✅ Outlier counts match!") if __name__ == "__main__": if len(sys.argv) > 1: debug_outliers(sys.argv[1]) else: debug_outliers("1 PTPGM.pcapng")