Files
StreamLens/analyzer/analysis/background_analyzer.py
2025-07-28 18:28:26 -04:00

364 lines
13 KiB
Python

"""
Background PCAP analyzer with thread pool support for progressive loading
"""
import threading
import queue
from concurrent.futures import ThreadPoolExecutor, as_completed
from typing import Callable, Optional, List
import time
from dataclasses import dataclass
import logging
try:
from scapy.all import rdpcap, PcapReader, Packet
except ImportError:
print("Error: scapy library required. Install with: pip install scapy")
import sys
sys.exit(1)
from .core import EthernetAnalyzer
@dataclass
class ParsingProgress:
"""Progress information for PCAP parsing"""
total_packets: int
processed_packets: int
percent_complete: float
packets_per_second: float
elapsed_time: float
estimated_time_remaining: float
is_complete: bool = False
error: Optional[str] = None
class BackgroundAnalyzer:
"""Analyzer that processes PCAP files in background threads"""
def __init__(self, analyzer: EthernetAnalyzer,
num_threads: int = 4,
batch_size: int = 1000,
progress_callback: Optional[Callable[[ParsingProgress], None]] = None,
flow_update_callback: Optional[Callable[[], None]] = None):
"""
Initialize background analyzer
Args:
analyzer: Core analyzer instance
num_threads: Number of worker threads
batch_size: Packets to process per batch
progress_callback: Callback for progress updates
flow_update_callback: Callback for flow data updates
"""
self.analyzer = analyzer
self.num_threads = num_threads
self.batch_size = batch_size
self.progress_callback = progress_callback
self.flow_update_callback = flow_update_callback
# Threading components
self.executor = ThreadPoolExecutor(max_workers=num_threads)
self.packet_queue = queue.Queue(maxsize=num_threads * 2)
self.stop_event = threading.Event()
self.parse_lock = threading.Lock()
# Progress tracking
self.total_packets = 0
self.processed_packets = 0
self.start_time = None
self.is_parsing = False
# Flow update synchronization
self.flow_lock = threading.RLock()
# Flow update batching
self.packets_since_update = 0
self.update_batch_size = 50 # Update UI every 50 packets (more frequent)
self.update_lock = threading.Lock()
logging.basicConfig(level=logging.INFO)
self.logger = logging.getLogger(__name__)
def start_parsing(self, pcap_file: str) -> None:
"""Start parsing PCAP file in background"""
if self.is_parsing:
self.logger.warning("Already parsing a file")
return
self.is_parsing = True
self.stop_event.clear()
self.start_time = time.time()
self.processed_packets = 0
# Start reader thread
reader_thread = threading.Thread(
target=self._read_pcap_file,
args=(pcap_file,),
daemon=True
)
reader_thread.start()
self.reader_thread = reader_thread
# Start worker threads
futures = []
for _ in range(self.num_threads):
future = self.executor.submit(self._process_packet_batches)
futures.append(future)
# Monitor progress in separate thread
monitor_thread = threading.Thread(
target=self._monitor_progress,
args=(futures,),
daemon=True
)
monitor_thread.start()
self.monitor_thread = monitor_thread
def _read_pcap_file(self, pcap_file: str) -> None:
"""Read PCAP file and queue packets for processing"""
try:
self.logger.info(f"Starting to read {pcap_file}")
# First, get total packet count for progress tracking
with PcapReader(pcap_file) as reader:
# Quick pass to count packets
count = 0
for _ in reader:
count += 1
self.total_packets = count
self.logger.info(f"Found {self.total_packets} packets to process")
# Now read and queue packets
with PcapReader(pcap_file) as reader:
batch = []
batch_num = 0
for i, packet in enumerate(reader):
if self.stop_event.is_set():
break
batch.append((i + 1, packet))
if len(batch) >= self.batch_size:
self.packet_queue.put(batch)
batch = []
batch_num += 1
# Queue remaining packets
if batch:
self.packet_queue.put(batch)
except Exception as e:
self.logger.error(f"Error reading PCAP: {e}")
self._report_progress(error=str(e))
finally:
# Signal end of packets
for _ in range(self.num_threads):
self.packet_queue.put(None)
def _process_packet_batches(self) -> None:
"""Worker thread to process packet batches"""
while not self.stop_event.is_set():
try:
batch = self.packet_queue.get(timeout=0.5) # Shorter timeout for faster exit
if batch is None: # End signal
break
# Process batch of packets
for frame_num, packet in batch:
if self.stop_event.is_set():
break
try:
# Thread-safe packet processing
with self.flow_lock:
self.analyzer.flow_manager.process_packet(packet, frame_num)
# Update progress
with self.parse_lock:
self.processed_packets += 1
# Check if we should trigger a flow update
should_update = False
with self.update_lock:
self.packets_since_update += 1
if self.packets_since_update >= self.update_batch_size:
self.packets_since_update = 0
should_update = True
# Trigger flow update callback if needed
if should_update and self.flow_update_callback:
try:
self.flow_update_callback()
except Exception as e:
self.logger.error(f"Error in flow update callback: {e}")
except Exception as e:
self.logger.error(f"Error processing packet {frame_num}: {e}")
continue
except queue.Empty:
# Check stop event more frequently
if self.stop_event.is_set():
break
continue
except KeyboardInterrupt:
self.logger.info("Packet processing interrupted")
break
except Exception as e:
self.logger.error(f"Error processing batch: {e}")
if self.stop_event.is_set():
break
def _monitor_progress(self, futures: List) -> None:
"""Monitor parsing progress and send updates"""
last_update_time = time.time()
last_packet_count = 0
while self.is_parsing and not self.stop_event.is_set():
try:
current_time = time.time()
# Update every 0.5 seconds
if current_time - last_update_time >= 0.5:
with self.parse_lock:
current_packets = self.processed_packets
# Calculate metrics
elapsed = current_time - self.start_time
packets_processed = current_packets - last_packet_count
time_delta = current_time - last_update_time
packets_per_second = packets_processed / time_delta if time_delta > 0 else 0
# Update for next iteration
last_update_time = current_time
last_packet_count = current_packets
# Report progress
self._report_progress(
packets_per_second=packets_per_second,
elapsed_time=elapsed
)
# Check if all workers are done
if all(f.done() for f in futures):
break
time.sleep(0.1)
except KeyboardInterrupt:
self.logger.info("Monitor thread interrupted")
break
except Exception as e:
self.logger.error(f"Error in monitor thread: {e}")
break
# Final update
self.is_parsing = False
self._report_progress(is_complete=True)
# Final flow update
if self.flow_update_callback:
try:
self.flow_update_callback()
except Exception as e:
self.logger.error(f"Error in final flow update callback: {e}")
# Calculate final statistics
with self.flow_lock:
self.analyzer.statistics_engine.calculate_all_statistics()
def _report_progress(self, packets_per_second: float = 0,
elapsed_time: float = 0,
is_complete: bool = False,
error: Optional[str] = None) -> None:
"""Report parsing progress"""
with self.parse_lock:
processed = self.processed_packets
total = self.total_packets
if total > 0:
percent = (processed / total) * 100
# Estimate time remaining
if packets_per_second > 0 and processed < total:
remaining_packets = total - processed
eta = remaining_packets / packets_per_second
else:
eta = 0
else:
percent = 0
eta = 0
progress = ParsingProgress(
total_packets=total,
processed_packets=processed,
percent_complete=percent,
packets_per_second=packets_per_second,
elapsed_time=elapsed_time,
estimated_time_remaining=eta,
is_complete=is_complete,
error=error
)
if self.progress_callback:
self.progress_callback(progress)
def stop_parsing(self) -> None:
"""Stop background parsing"""
self.logger.info("Stopping background parsing")
self.stop_event.set()
self.is_parsing = False
def get_current_flows(self):
"""Get current flows (thread-safe)"""
with self.flow_lock:
return dict(self.analyzer.flows)
def get_summary(self):
"""Get current summary statistics (thread-safe)"""
with self.flow_lock:
return self.analyzer.get_summary()
def cleanup(self):
"""Cleanup resources"""
self.logger.info("Starting cleanup...")
self.stop_parsing()
try:
# Clear the queue to unblock waiting workers
while not self.packet_queue.empty():
try:
self.packet_queue.get_nowait()
except queue.Empty:
break
# Send stop signals to all workers
for _ in range(self.num_threads):
try:
self.packet_queue.put(None, timeout=0.1)
except queue.Full:
pass
# Wait briefly for threads to see stop signal
time.sleep(0.1)
# Force shutdown with no wait - this kills threads immediately
try:
self.executor.shutdown(wait=False)
except Exception:
pass
# Join daemon threads if they exist
if hasattr(self, 'reader_thread') and self.reader_thread.is_alive():
# Can't join daemon threads, they will be killed when main thread exits
pass
if hasattr(self, 'monitor_thread') and self.monitor_thread.is_alive():
# Can't join daemon threads, they will be killed when main thread exits
pass
self.logger.info("Cleanup complete")
except Exception as e:
self.logger.error(f"Error during cleanup: {e}")