progress?
This commit is contained in:
364
analyzer/analysis/background_analyzer.py
Normal file
364
analyzer/analysis/background_analyzer.py
Normal file
@@ -0,0 +1,364 @@
|
||||
"""
|
||||
Background PCAP analyzer with thread pool support for progressive loading
|
||||
"""
|
||||
|
||||
import threading
|
||||
import queue
|
||||
from concurrent.futures import ThreadPoolExecutor, as_completed
|
||||
from typing import Callable, Optional, List
|
||||
import time
|
||||
from dataclasses import dataclass
|
||||
import logging
|
||||
|
||||
try:
|
||||
from scapy.all import rdpcap, PcapReader, Packet
|
||||
except ImportError:
|
||||
print("Error: scapy library required. Install with: pip install scapy")
|
||||
import sys
|
||||
sys.exit(1)
|
||||
|
||||
from .core import EthernetAnalyzer
|
||||
|
||||
|
||||
@dataclass
|
||||
class ParsingProgress:
|
||||
"""Progress information for PCAP parsing"""
|
||||
total_packets: int
|
||||
processed_packets: int
|
||||
percent_complete: float
|
||||
packets_per_second: float
|
||||
elapsed_time: float
|
||||
estimated_time_remaining: float
|
||||
is_complete: bool = False
|
||||
error: Optional[str] = None
|
||||
|
||||
|
||||
class BackgroundAnalyzer:
|
||||
"""Analyzer that processes PCAP files in background threads"""
|
||||
|
||||
def __init__(self, analyzer: EthernetAnalyzer,
|
||||
num_threads: int = 4,
|
||||
batch_size: int = 1000,
|
||||
progress_callback: Optional[Callable[[ParsingProgress], None]] = None,
|
||||
flow_update_callback: Optional[Callable[[], None]] = None):
|
||||
"""
|
||||
Initialize background analyzer
|
||||
|
||||
Args:
|
||||
analyzer: Core analyzer instance
|
||||
num_threads: Number of worker threads
|
||||
batch_size: Packets to process per batch
|
||||
progress_callback: Callback for progress updates
|
||||
flow_update_callback: Callback for flow data updates
|
||||
"""
|
||||
self.analyzer = analyzer
|
||||
self.num_threads = num_threads
|
||||
self.batch_size = batch_size
|
||||
self.progress_callback = progress_callback
|
||||
self.flow_update_callback = flow_update_callback
|
||||
|
||||
# Threading components
|
||||
self.executor = ThreadPoolExecutor(max_workers=num_threads)
|
||||
self.packet_queue = queue.Queue(maxsize=num_threads * 2)
|
||||
self.stop_event = threading.Event()
|
||||
self.parse_lock = threading.Lock()
|
||||
|
||||
# Progress tracking
|
||||
self.total_packets = 0
|
||||
self.processed_packets = 0
|
||||
self.start_time = None
|
||||
self.is_parsing = False
|
||||
|
||||
# Flow update synchronization
|
||||
self.flow_lock = threading.RLock()
|
||||
|
||||
# Flow update batching
|
||||
self.packets_since_update = 0
|
||||
self.update_batch_size = 50 # Update UI every 50 packets (more frequent)
|
||||
self.update_lock = threading.Lock()
|
||||
|
||||
logging.basicConfig(level=logging.INFO)
|
||||
self.logger = logging.getLogger(__name__)
|
||||
|
||||
def start_parsing(self, pcap_file: str) -> None:
|
||||
"""Start parsing PCAP file in background"""
|
||||
if self.is_parsing:
|
||||
self.logger.warning("Already parsing a file")
|
||||
return
|
||||
|
||||
self.is_parsing = True
|
||||
self.stop_event.clear()
|
||||
self.start_time = time.time()
|
||||
self.processed_packets = 0
|
||||
|
||||
# Start reader thread
|
||||
reader_thread = threading.Thread(
|
||||
target=self._read_pcap_file,
|
||||
args=(pcap_file,),
|
||||
daemon=True
|
||||
)
|
||||
reader_thread.start()
|
||||
self.reader_thread = reader_thread
|
||||
|
||||
# Start worker threads
|
||||
futures = []
|
||||
for _ in range(self.num_threads):
|
||||
future = self.executor.submit(self._process_packet_batches)
|
||||
futures.append(future)
|
||||
|
||||
# Monitor progress in separate thread
|
||||
monitor_thread = threading.Thread(
|
||||
target=self._monitor_progress,
|
||||
args=(futures,),
|
||||
daemon=True
|
||||
)
|
||||
monitor_thread.start()
|
||||
self.monitor_thread = monitor_thread
|
||||
|
||||
def _read_pcap_file(self, pcap_file: str) -> None:
|
||||
"""Read PCAP file and queue packets for processing"""
|
||||
try:
|
||||
self.logger.info(f"Starting to read {pcap_file}")
|
||||
|
||||
# First, get total packet count for progress tracking
|
||||
with PcapReader(pcap_file) as reader:
|
||||
# Quick pass to count packets
|
||||
count = 0
|
||||
for _ in reader:
|
||||
count += 1
|
||||
self.total_packets = count
|
||||
|
||||
self.logger.info(f"Found {self.total_packets} packets to process")
|
||||
|
||||
# Now read and queue packets
|
||||
with PcapReader(pcap_file) as reader:
|
||||
batch = []
|
||||
batch_num = 0
|
||||
|
||||
for i, packet in enumerate(reader):
|
||||
if self.stop_event.is_set():
|
||||
break
|
||||
|
||||
batch.append((i + 1, packet))
|
||||
|
||||
if len(batch) >= self.batch_size:
|
||||
self.packet_queue.put(batch)
|
||||
batch = []
|
||||
batch_num += 1
|
||||
|
||||
# Queue remaining packets
|
||||
if batch:
|
||||
self.packet_queue.put(batch)
|
||||
|
||||
except Exception as e:
|
||||
self.logger.error(f"Error reading PCAP: {e}")
|
||||
self._report_progress(error=str(e))
|
||||
finally:
|
||||
# Signal end of packets
|
||||
for _ in range(self.num_threads):
|
||||
self.packet_queue.put(None)
|
||||
|
||||
def _process_packet_batches(self) -> None:
|
||||
"""Worker thread to process packet batches"""
|
||||
while not self.stop_event.is_set():
|
||||
try:
|
||||
batch = self.packet_queue.get(timeout=0.5) # Shorter timeout for faster exit
|
||||
if batch is None: # End signal
|
||||
break
|
||||
|
||||
# Process batch of packets
|
||||
for frame_num, packet in batch:
|
||||
if self.stop_event.is_set():
|
||||
break
|
||||
|
||||
try:
|
||||
# Thread-safe packet processing
|
||||
with self.flow_lock:
|
||||
self.analyzer.flow_manager.process_packet(packet, frame_num)
|
||||
|
||||
# Update progress
|
||||
with self.parse_lock:
|
||||
self.processed_packets += 1
|
||||
|
||||
# Check if we should trigger a flow update
|
||||
should_update = False
|
||||
with self.update_lock:
|
||||
self.packets_since_update += 1
|
||||
if self.packets_since_update >= self.update_batch_size:
|
||||
self.packets_since_update = 0
|
||||
should_update = True
|
||||
|
||||
# Trigger flow update callback if needed
|
||||
if should_update and self.flow_update_callback:
|
||||
try:
|
||||
self.flow_update_callback()
|
||||
except Exception as e:
|
||||
self.logger.error(f"Error in flow update callback: {e}")
|
||||
|
||||
except Exception as e:
|
||||
self.logger.error(f"Error processing packet {frame_num}: {e}")
|
||||
continue
|
||||
|
||||
except queue.Empty:
|
||||
# Check stop event more frequently
|
||||
if self.stop_event.is_set():
|
||||
break
|
||||
continue
|
||||
except KeyboardInterrupt:
|
||||
self.logger.info("Packet processing interrupted")
|
||||
break
|
||||
except Exception as e:
|
||||
self.logger.error(f"Error processing batch: {e}")
|
||||
if self.stop_event.is_set():
|
||||
break
|
||||
|
||||
def _monitor_progress(self, futures: List) -> None:
|
||||
"""Monitor parsing progress and send updates"""
|
||||
last_update_time = time.time()
|
||||
last_packet_count = 0
|
||||
|
||||
while self.is_parsing and not self.stop_event.is_set():
|
||||
try:
|
||||
current_time = time.time()
|
||||
|
||||
# Update every 0.5 seconds
|
||||
if current_time - last_update_time >= 0.5:
|
||||
with self.parse_lock:
|
||||
current_packets = self.processed_packets
|
||||
|
||||
# Calculate metrics
|
||||
elapsed = current_time - self.start_time
|
||||
packets_processed = current_packets - last_packet_count
|
||||
time_delta = current_time - last_update_time
|
||||
packets_per_second = packets_processed / time_delta if time_delta > 0 else 0
|
||||
|
||||
# Update for next iteration
|
||||
last_update_time = current_time
|
||||
last_packet_count = current_packets
|
||||
|
||||
# Report progress
|
||||
self._report_progress(
|
||||
packets_per_second=packets_per_second,
|
||||
elapsed_time=elapsed
|
||||
)
|
||||
|
||||
# Check if all workers are done
|
||||
if all(f.done() for f in futures):
|
||||
break
|
||||
|
||||
time.sleep(0.1)
|
||||
except KeyboardInterrupt:
|
||||
self.logger.info("Monitor thread interrupted")
|
||||
break
|
||||
except Exception as e:
|
||||
self.logger.error(f"Error in monitor thread: {e}")
|
||||
break
|
||||
|
||||
# Final update
|
||||
self.is_parsing = False
|
||||
self._report_progress(is_complete=True)
|
||||
|
||||
# Final flow update
|
||||
if self.flow_update_callback:
|
||||
try:
|
||||
self.flow_update_callback()
|
||||
except Exception as e:
|
||||
self.logger.error(f"Error in final flow update callback: {e}")
|
||||
|
||||
# Calculate final statistics
|
||||
with self.flow_lock:
|
||||
self.analyzer.statistics_engine.calculate_all_statistics()
|
||||
|
||||
def _report_progress(self, packets_per_second: float = 0,
|
||||
elapsed_time: float = 0,
|
||||
is_complete: bool = False,
|
||||
error: Optional[str] = None) -> None:
|
||||
"""Report parsing progress"""
|
||||
with self.parse_lock:
|
||||
processed = self.processed_packets
|
||||
total = self.total_packets
|
||||
|
||||
if total > 0:
|
||||
percent = (processed / total) * 100
|
||||
|
||||
# Estimate time remaining
|
||||
if packets_per_second > 0 and processed < total:
|
||||
remaining_packets = total - processed
|
||||
eta = remaining_packets / packets_per_second
|
||||
else:
|
||||
eta = 0
|
||||
else:
|
||||
percent = 0
|
||||
eta = 0
|
||||
|
||||
progress = ParsingProgress(
|
||||
total_packets=total,
|
||||
processed_packets=processed,
|
||||
percent_complete=percent,
|
||||
packets_per_second=packets_per_second,
|
||||
elapsed_time=elapsed_time,
|
||||
estimated_time_remaining=eta,
|
||||
is_complete=is_complete,
|
||||
error=error
|
||||
)
|
||||
|
||||
if self.progress_callback:
|
||||
self.progress_callback(progress)
|
||||
|
||||
def stop_parsing(self) -> None:
|
||||
"""Stop background parsing"""
|
||||
self.logger.info("Stopping background parsing")
|
||||
self.stop_event.set()
|
||||
self.is_parsing = False
|
||||
|
||||
def get_current_flows(self):
|
||||
"""Get current flows (thread-safe)"""
|
||||
with self.flow_lock:
|
||||
return dict(self.analyzer.flows)
|
||||
|
||||
def get_summary(self):
|
||||
"""Get current summary statistics (thread-safe)"""
|
||||
with self.flow_lock:
|
||||
return self.analyzer.get_summary()
|
||||
|
||||
def cleanup(self):
|
||||
"""Cleanup resources"""
|
||||
self.logger.info("Starting cleanup...")
|
||||
self.stop_parsing()
|
||||
|
||||
try:
|
||||
# Clear the queue to unblock waiting workers
|
||||
while not self.packet_queue.empty():
|
||||
try:
|
||||
self.packet_queue.get_nowait()
|
||||
except queue.Empty:
|
||||
break
|
||||
|
||||
# Send stop signals to all workers
|
||||
for _ in range(self.num_threads):
|
||||
try:
|
||||
self.packet_queue.put(None, timeout=0.1)
|
||||
except queue.Full:
|
||||
pass
|
||||
|
||||
# Wait briefly for threads to see stop signal
|
||||
time.sleep(0.1)
|
||||
|
||||
# Force shutdown with no wait - this kills threads immediately
|
||||
try:
|
||||
self.executor.shutdown(wait=False)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# Join daemon threads if they exist
|
||||
if hasattr(self, 'reader_thread') and self.reader_thread.is_alive():
|
||||
# Can't join daemon threads, they will be killed when main thread exits
|
||||
pass
|
||||
|
||||
if hasattr(self, 'monitor_thread') and self.monitor_thread.is_alive():
|
||||
# Can't join daemon threads, they will be killed when main thread exits
|
||||
pass
|
||||
|
||||
self.logger.info("Cleanup complete")
|
||||
except Exception as e:
|
||||
self.logger.error(f"Error during cleanup: {e}")
|
||||
@@ -86,6 +86,17 @@ class FlowManager:
|
||||
flow.total_bytes += packet_size
|
||||
flow.protocols.update(protocols)
|
||||
|
||||
# Update timeline statistics
|
||||
if flow.frame_count == 1:
|
||||
# First packet in flow
|
||||
flow.first_seen = timestamp
|
||||
flow.last_seen = timestamp
|
||||
flow.duration = 0.0
|
||||
else:
|
||||
# Update last seen and duration
|
||||
flow.last_seen = timestamp
|
||||
flow.duration = flow.last_seen - flow.first_seen
|
||||
|
||||
# Enhanced protocol detection
|
||||
dissection_results = self._dissect_packet(packet, frame_num)
|
||||
enhanced_protocols = self._extract_enhanced_protocols(dissection_results)
|
||||
|
||||
@@ -29,6 +29,12 @@ class StatisticsEngine:
|
||||
|
||||
def _calculate_single_flow_statistics(self, flow: FlowStats) -> None:
|
||||
"""Calculate statistics for a single flow"""
|
||||
# Ensure timeline statistics are calculated
|
||||
if len(flow.timestamps) >= 2:
|
||||
flow.duration = flow.timestamps[-1] - flow.timestamps[0]
|
||||
flow.first_seen = flow.timestamps[0]
|
||||
flow.last_seen = flow.timestamps[-1]
|
||||
|
||||
if len(flow.inter_arrival_times) < 2:
|
||||
return
|
||||
|
||||
@@ -36,9 +42,19 @@ class StatisticsEngine:
|
||||
flow.avg_inter_arrival = statistics.mean(flow.inter_arrival_times)
|
||||
flow.std_inter_arrival = statistics.stdev(flow.inter_arrival_times)
|
||||
|
||||
# Calculate jitter as coefficient of variation (normalized standard deviation)
|
||||
if flow.avg_inter_arrival > 0:
|
||||
flow.jitter = flow.std_inter_arrival / flow.avg_inter_arrival
|
||||
else:
|
||||
flow.jitter = 0.0
|
||||
|
||||
# Detect outliers (frames with inter-arrival time > threshold * std deviations from mean)
|
||||
threshold = flow.avg_inter_arrival + (self.outlier_threshold_sigma * flow.std_inter_arrival)
|
||||
|
||||
# Clear existing outliers to recalculate
|
||||
flow.outlier_frames.clear()
|
||||
flow.outlier_details.clear()
|
||||
|
||||
for i, inter_time in enumerate(flow.inter_arrival_times):
|
||||
if inter_time > threshold:
|
||||
# Frame number is i+2 because inter_arrival_times[i] is between frame i+1 and i+2
|
||||
|
||||
Reference in New Issue
Block a user