Files
StreamLens/debug_outlier_discrepancy.py

114 lines
4.4 KiB
Python
Raw Permalink Normal View History

2025-07-30 23:48:32 -04:00
#!/usr/bin/env python3
"""Debug outlier count discrepancy"""
import sys
sys.path.append('.')
from analyzer.analysis import EthernetAnalyzer
from analyzer.utils import PCAPLoader
from analyzer.analysis.background_analyzer import BackgroundAnalyzer
import time
def debug_outliers(pcap_file, src_ip="192.168.4.89"):
"""Debug outlier detection differences"""
print("=== METHOD 1: Direct Processing ===")
# Method 1: Direct processing (like my debug script)
analyzer1 = EthernetAnalyzer(outlier_threshold_sigma=3.0)
loader = PCAPLoader(pcap_file)
packets = loader.load_all()
for i, packet in enumerate(packets, 1):
analyzer1._process_single_packet(packet, i)
analyzer1.calculate_statistics()
# Find flow
flow1 = None
for flow_key, flow in analyzer1.flows.items():
if flow.src_ip == src_ip:
flow1 = flow
break
if flow1:
print(f"Flow: {flow1.src_ip}:{flow1.src_port} -> {flow1.dst_ip}:{flow1.dst_port}")
print(f"Packets: {flow1.frame_count}")
print(f"Outliers: {len(flow1.outlier_frames)}")
print(f"Outlier frames: {sorted(flow1.outlier_frames)[:20]}")
print(f"Avg ΔT: {flow1.avg_inter_arrival * 1000:.3f} ms")
print(f"Std σ: {flow1.std_inter_arrival * 1000:.3f} ms")
print(f"3σ threshold: {(flow1.avg_inter_arrival + 3 * flow1.std_inter_arrival) * 1000:.3f} ms")
print("\n=== METHOD 2: Background Processing (TUI) ===")
# Method 2: Background processing (like TUI)
analyzer2 = EthernetAnalyzer(outlier_threshold_sigma=3.0)
bg_analyzer = BackgroundAnalyzer(analyzer2)
bg_analyzer.start_parsing(pcap_file)
# Wait for completion
while bg_analyzer.is_parsing:
time.sleep(0.1)
# Find flow
flow2 = None
for flow_key, flow in analyzer2.flows.items():
if flow.src_ip == src_ip:
flow2 = flow
break
if flow2:
print(f"Flow: {flow2.src_ip}:{flow2.src_port} -> {flow2.dst_ip}:{flow2.dst_port}")
print(f"Packets: {flow2.frame_count}")
print(f"Outliers: {len(flow2.outlier_frames)}")
print(f"Outlier frames: {sorted(flow2.outlier_frames)[:20]}")
print(f"Avg ΔT: {flow2.avg_inter_arrival * 1000:.3f} ms")
print(f"Std σ: {flow2.std_inter_arrival * 1000:.3f} ms")
print(f"3σ threshold: {(flow2.avg_inter_arrival + 3 * flow2.std_inter_arrival) * 1000:.3f} ms")
# Compare results
print("\n=== COMPARISON ===")
if flow1 and flow2:
print(f"Direct outliers: {len(flow1.outlier_frames)}")
print(f"Background outliers: {len(flow2.outlier_frames)}")
if len(flow1.outlier_frames) != len(flow2.outlier_frames):
print("\n⚠️ OUTLIER COUNT MISMATCH!")
# Find differences
set1 = set(flow1.outlier_frames)
set2 = set(flow2.outlier_frames)
only_in_1 = set1 - set2
only_in_2 = set2 - set1
if only_in_1:
print(f"Only in direct: {sorted(only_in_1)}")
if only_in_2:
print(f"Only in background: {sorted(only_in_2)}")
# Check timing differences
print("\nTiming comparison:")
print(f"Direct - Avg: {flow1.avg_inter_arrival * 1000:.6f} ms, Std: {flow1.std_inter_arrival * 1000:.6f} ms")
print(f"Background - Avg: {flow2.avg_inter_arrival * 1000:.6f} ms, Std: {flow2.std_inter_arrival * 1000:.6f} ms")
# Check inter-arrival times length
print(f"\nInter-arrival times count:")
print(f"Direct: {len(flow1.inter_arrival_times)}")
print(f"Background: {len(flow2.inter_arrival_times)}")
# Check first few inter-arrival times
print("\nFirst 10 inter-arrival times comparison:")
for i in range(min(10, len(flow1.inter_arrival_times), len(flow2.inter_arrival_times))):
t1 = flow1.inter_arrival_times[i] * 1000
t2 = flow2.inter_arrival_times[i] * 1000
diff = abs(t1 - t2)
print(f" [{i}] Direct: {t1:.6f} ms, Background: {t2:.6f} ms, Diff: {diff:.6f} ms")
else:
print("✅ Outlier counts match!")
if __name__ == "__main__":
if len(sys.argv) > 1:
debug_outliers(sys.argv[1])
else:
debug_outliers("1 PTPGM.pcapng")