Files
StreamLens/debug_outlier_discrepancy.py

114 lines
4.4 KiB
Python
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#!/usr/bin/env python3
"""Debug outlier count discrepancy"""
import sys
sys.path.append('.')
from analyzer.analysis import EthernetAnalyzer
from analyzer.utils import PCAPLoader
from analyzer.analysis.background_analyzer import BackgroundAnalyzer
import time
def debug_outliers(pcap_file, src_ip="192.168.4.89"):
"""Debug outlier detection differences"""
print("=== METHOD 1: Direct Processing ===")
# Method 1: Direct processing (like my debug script)
analyzer1 = EthernetAnalyzer(outlier_threshold_sigma=3.0)
loader = PCAPLoader(pcap_file)
packets = loader.load_all()
for i, packet in enumerate(packets, 1):
analyzer1._process_single_packet(packet, i)
analyzer1.calculate_statistics()
# Find flow
flow1 = None
for flow_key, flow in analyzer1.flows.items():
if flow.src_ip == src_ip:
flow1 = flow
break
if flow1:
print(f"Flow: {flow1.src_ip}:{flow1.src_port} -> {flow1.dst_ip}:{flow1.dst_port}")
print(f"Packets: {flow1.frame_count}")
print(f"Outliers: {len(flow1.outlier_frames)}")
print(f"Outlier frames: {sorted(flow1.outlier_frames)[:20]}")
print(f"Avg ΔT: {flow1.avg_inter_arrival * 1000:.3f} ms")
print(f"Std σ: {flow1.std_inter_arrival * 1000:.3f} ms")
print(f"3σ threshold: {(flow1.avg_inter_arrival + 3 * flow1.std_inter_arrival) * 1000:.3f} ms")
print("\n=== METHOD 2: Background Processing (TUI) ===")
# Method 2: Background processing (like TUI)
analyzer2 = EthernetAnalyzer(outlier_threshold_sigma=3.0)
bg_analyzer = BackgroundAnalyzer(analyzer2)
bg_analyzer.start_parsing(pcap_file)
# Wait for completion
while bg_analyzer.is_parsing:
time.sleep(0.1)
# Find flow
flow2 = None
for flow_key, flow in analyzer2.flows.items():
if flow.src_ip == src_ip:
flow2 = flow
break
if flow2:
print(f"Flow: {flow2.src_ip}:{flow2.src_port} -> {flow2.dst_ip}:{flow2.dst_port}")
print(f"Packets: {flow2.frame_count}")
print(f"Outliers: {len(flow2.outlier_frames)}")
print(f"Outlier frames: {sorted(flow2.outlier_frames)[:20]}")
print(f"Avg ΔT: {flow2.avg_inter_arrival * 1000:.3f} ms")
print(f"Std σ: {flow2.std_inter_arrival * 1000:.3f} ms")
print(f"3σ threshold: {(flow2.avg_inter_arrival + 3 * flow2.std_inter_arrival) * 1000:.3f} ms")
# Compare results
print("\n=== COMPARISON ===")
if flow1 and flow2:
print(f"Direct outliers: {len(flow1.outlier_frames)}")
print(f"Background outliers: {len(flow2.outlier_frames)}")
if len(flow1.outlier_frames) != len(flow2.outlier_frames):
print("\n⚠️ OUTLIER COUNT MISMATCH!")
# Find differences
set1 = set(flow1.outlier_frames)
set2 = set(flow2.outlier_frames)
only_in_1 = set1 - set2
only_in_2 = set2 - set1
if only_in_1:
print(f"Only in direct: {sorted(only_in_1)}")
if only_in_2:
print(f"Only in background: {sorted(only_in_2)}")
# Check timing differences
print("\nTiming comparison:")
print(f"Direct - Avg: {flow1.avg_inter_arrival * 1000:.6f} ms, Std: {flow1.std_inter_arrival * 1000:.6f} ms")
print(f"Background - Avg: {flow2.avg_inter_arrival * 1000:.6f} ms, Std: {flow2.std_inter_arrival * 1000:.6f} ms")
# Check inter-arrival times length
print(f"\nInter-arrival times count:")
print(f"Direct: {len(flow1.inter_arrival_times)}")
print(f"Background: {len(flow2.inter_arrival_times)}")
# Check first few inter-arrival times
print("\nFirst 10 inter-arrival times comparison:")
for i in range(min(10, len(flow1.inter_arrival_times), len(flow2.inter_arrival_times))):
t1 = flow1.inter_arrival_times[i] * 1000
t2 = flow2.inter_arrival_times[i] * 1000
diff = abs(t1 - t2)
print(f" [{i}] Direct: {t1:.6f} ms, Background: {t2:.6f} ms, Diff: {diff:.6f} ms")
else:
print("✅ Outlier counts match!")
if __name__ == "__main__":
if len(sys.argv) > 1:
debug_outliers(sys.argv[1])
else:
debug_outliers("1 PTPGM.pcapng")