Files
StreamLens/debug_realtime_outliers.py

116 lines
4.0 KiB
Python

#!/usr/bin/env python3
"""Debug real-time vs batch outlier calculation"""
import sys
sys.path.append('.')
from analyzer.analysis import EthernetAnalyzer
from analyzer.utils import PCAPLoader
from analyzer.analysis.background_analyzer import BackgroundAnalyzer
import time
def test_realtime_vs_batch(pcap_file, src_ip="192.168.4.89"):
"""Test outlier calculation with real-time vs batch processing"""
print("=== TEST 1: Batch Processing (Normal) ===")
analyzer1 = EthernetAnalyzer(enable_realtime=False, outlier_threshold_sigma=3.0)
loader = PCAPLoader(pcap_file)
packets = loader.load_all()
for i, packet in enumerate(packets, 1):
analyzer1._process_single_packet(packet, i)
analyzer1.calculate_statistics()
flow1 = None
for flow_key, flow in analyzer1.flows.items():
if flow.src_ip == src_ip:
flow1 = flow
break
if flow1:
print(f"Flow: {flow1.src_ip}:{flow1.src_port} -> {flow1.dst_ip}:{flow1.dst_port}")
print(f"Outliers: {len(flow1.outlier_frames)}")
print(f"Outlier frames: {sorted(flow1.outlier_frames)}")
print("\n=== TEST 2: Real-time Processing ===")
analyzer2 = EthernetAnalyzer(enable_realtime=True, outlier_threshold_sigma=3.0)
for i, packet in enumerate(packets, 1):
analyzer2._process_single_packet(packet, i)
analyzer2.calculate_statistics()
flow2 = None
for flow_key, flow in analyzer2.flows.items():
if flow.src_ip == src_ip:
flow2 = flow
break
if flow2:
print(f"Flow: {flow2.src_ip}:{flow2.src_port} -> {flow2.dst_ip}:{flow2.dst_port}")
print(f"Outliers: {len(flow2.outlier_frames)}")
print(f"Outlier frames: {sorted(flow2.outlier_frames)}")
print("\n=== TEST 3: Background Processing (TUI-style) ===")
analyzer3 = EthernetAnalyzer(enable_realtime=False, outlier_threshold_sigma=3.0)
bg_analyzer = BackgroundAnalyzer(analyzer3)
bg_analyzer.start_parsing(pcap_file)
while bg_analyzer.is_parsing:
time.sleep(0.1)
flow3 = None
for flow_key, flow in analyzer3.flows.items():
if flow.src_ip == src_ip:
flow3 = flow
break
if flow3:
print(f"Flow: {flow3.src_ip}:{flow3.src_port} -> {flow3.dst_ip}:{flow3.dst_port}")
print(f"Outliers: {len(flow3.outlier_frames)}")
print(f"Outlier frames: {sorted(flow3.outlier_frames)}")
print("\n=== TEST 4: Background Processing with Real-time ===")
analyzer4 = EthernetAnalyzer(enable_realtime=True, outlier_threshold_sigma=3.0)
bg_analyzer4 = BackgroundAnalyzer(analyzer4)
bg_analyzer4.start_parsing(pcap_file)
while bg_analyzer4.is_parsing:
time.sleep(0.1)
flow4 = None
for flow_key, flow in analyzer4.flows.items():
if flow.src_ip == src_ip:
flow4 = flow
break
if flow4:
print(f"Flow: {flow4.src_ip}:{flow4.src_port} -> {flow4.dst_ip}:{flow4.dst_port}")
print(f"Outliers: {len(flow4.outlier_frames)}")
print(f"Outlier frames: {sorted(flow4.outlier_frames)}")
print("\n=== COMPARISON ===")
if flow1 and flow2 and flow3 and flow4:
counts = [len(flow1.outlier_frames), len(flow2.outlier_frames),
len(flow3.outlier_frames), len(flow4.outlier_frames)]
print(f"Batch: {counts[0]} outliers")
print(f"Real-time: {counts[1]} outliers")
print(f"Background: {counts[2]} outliers")
print(f"Background+Real-time: {counts[3]} outliers")
if 19 in counts:
method = ["Batch", "Real-time", "Background", "Background+Real-time"][counts.index(19)]
print(f"\n✅ Found 19 outliers in: {method}")
else:
print(f"\n❌ No method shows 19 outliers")
if __name__ == "__main__":
if len(sys.argv) > 1:
test_realtime_vs_batch(sys.argv[1])
else:
test_realtime_vs_batch("1 PTPGM.pcapng")