""" Background PCAP analyzer with thread pool support for progressive loading """ import threading import queue from concurrent.futures import ThreadPoolExecutor, as_completed from typing import Callable, Optional, List import time from dataclasses import dataclass import logging try: from scapy.all import rdpcap, PcapReader, Packet except ImportError: print("Error: scapy library required. Install with: pip install scapy") import sys sys.exit(1) from .core import EthernetAnalyzer @dataclass class ParsingProgress: """Progress information for PCAP parsing""" total_packets: int processed_packets: int percent_complete: float packets_per_second: float elapsed_time: float estimated_time_remaining: float is_complete: bool = False error: Optional[str] = None class BackgroundAnalyzer: """Analyzer that processes PCAP files in background threads""" def __init__(self, analyzer: EthernetAnalyzer, num_threads: int = 4, batch_size: int = 1000, progress_callback: Optional[Callable[[ParsingProgress], None]] = None, flow_update_callback: Optional[Callable[[], None]] = None): """ Initialize background analyzer Args: analyzer: Core analyzer instance num_threads: Number of worker threads batch_size: Packets to process per batch progress_callback: Callback for progress updates flow_update_callback: Callback for flow data updates """ self.analyzer = analyzer self.num_threads = num_threads self.batch_size = batch_size self.progress_callback = progress_callback self.flow_update_callback = flow_update_callback # Threading components self.executor = ThreadPoolExecutor(max_workers=num_threads) self.packet_queue = queue.Queue(maxsize=num_threads * 2) self.stop_event = threading.Event() self.parse_lock = threading.Lock() # Progress tracking self.total_packets = 0 self.processed_packets = 0 self.start_time = None self.is_parsing = False # Flow update synchronization self.flow_lock = threading.RLock() # Flow update batching self.packets_since_update = 0 self.update_batch_size = 50 # Update UI every 50 packets (more frequent) self.update_lock = threading.Lock() logging.basicConfig(level=logging.INFO) self.logger = logging.getLogger(__name__) def start_parsing(self, pcap_file: str) -> None: """Start parsing PCAP file in background""" if self.is_parsing: self.logger.warning("Already parsing a file") return self.is_parsing = True self.stop_event.clear() self.start_time = time.time() self.processed_packets = 0 # Start reader thread reader_thread = threading.Thread( target=self._read_pcap_file, args=(pcap_file,), daemon=True ) reader_thread.start() self.reader_thread = reader_thread # Start worker threads futures = [] for _ in range(self.num_threads): future = self.executor.submit(self._process_packet_batches) futures.append(future) # Monitor progress in separate thread monitor_thread = threading.Thread( target=self._monitor_progress, args=(futures,), daemon=True ) monitor_thread.start() self.monitor_thread = monitor_thread def _read_pcap_file(self, pcap_file: str) -> None: """Read PCAP file and queue packets for processing""" try: self.logger.info(f"Starting to read {pcap_file}") # First, get total packet count for progress tracking with PcapReader(pcap_file) as reader: # Quick pass to count packets count = 0 for _ in reader: count += 1 self.total_packets = count self.logger.info(f"Found {self.total_packets} packets to process") # Now read and queue packets with PcapReader(pcap_file) as reader: batch = [] batch_num = 0 for i, packet in enumerate(reader): if self.stop_event.is_set(): break batch.append((i + 1, packet)) if len(batch) >= self.batch_size: self.packet_queue.put(batch) batch = [] batch_num += 1 # Queue remaining packets if batch: self.packet_queue.put(batch) except Exception as e: self.logger.error(f"Error reading PCAP: {e}") self._report_progress(error=str(e)) finally: # Signal end of packets for _ in range(self.num_threads): self.packet_queue.put(None) def _process_packet_batches(self) -> None: """Worker thread to process packet batches""" while not self.stop_event.is_set(): try: batch = self.packet_queue.get(timeout=0.5) # Shorter timeout for faster exit if batch is None: # End signal break # Process batch of packets for frame_num, packet in batch: if self.stop_event.is_set(): break try: # Thread-safe packet processing with self.flow_lock: self.analyzer.flow_manager.process_packet(packet, frame_num) # Update progress with self.parse_lock: self.processed_packets += 1 # Check if we should trigger a flow update should_update = False with self.update_lock: self.packets_since_update += 1 if self.packets_since_update >= self.update_batch_size: self.packets_since_update = 0 should_update = True # Trigger flow update callback if needed if should_update and self.flow_update_callback: try: self.flow_update_callback() except Exception as e: self.logger.error(f"Error in flow update callback: {e}") except Exception as e: self.logger.error(f"Error processing packet {frame_num}: {e}") continue except queue.Empty: # Check stop event more frequently if self.stop_event.is_set(): break continue except KeyboardInterrupt: self.logger.info("Packet processing interrupted") break except Exception as e: self.logger.error(f"Error processing batch: {e}") if self.stop_event.is_set(): break def _monitor_progress(self, futures: List) -> None: """Monitor parsing progress and send updates""" last_update_time = time.time() last_packet_count = 0 while self.is_parsing and not self.stop_event.is_set(): try: current_time = time.time() # Update every 0.5 seconds if current_time - last_update_time >= 0.5: with self.parse_lock: current_packets = self.processed_packets # Calculate metrics elapsed = current_time - self.start_time packets_processed = current_packets - last_packet_count time_delta = current_time - last_update_time packets_per_second = packets_processed / time_delta if time_delta > 0 else 0 # Update for next iteration last_update_time = current_time last_packet_count = current_packets # Report progress self._report_progress( packets_per_second=packets_per_second, elapsed_time=elapsed ) # Check if all workers are done if all(f.done() for f in futures): break time.sleep(0.1) except KeyboardInterrupt: self.logger.info("Monitor thread interrupted") break except Exception as e: self.logger.error(f"Error in monitor thread: {e}") break # Final update self.is_parsing = False self._report_progress(is_complete=True) # Final flow update if self.flow_update_callback: try: self.flow_update_callback() except Exception as e: self.logger.error(f"Error in final flow update callback: {e}") # Calculate final statistics with self.flow_lock: self.analyzer.statistics_engine.calculate_all_statistics() def _report_progress(self, packets_per_second: float = 0, elapsed_time: float = 0, is_complete: bool = False, error: Optional[str] = None) -> None: """Report parsing progress""" with self.parse_lock: processed = self.processed_packets total = self.total_packets if total > 0: percent = (processed / total) * 100 # Estimate time remaining if packets_per_second > 0 and processed < total: remaining_packets = total - processed eta = remaining_packets / packets_per_second else: eta = 0 else: percent = 0 eta = 0 progress = ParsingProgress( total_packets=total, processed_packets=processed, percent_complete=percent, packets_per_second=packets_per_second, elapsed_time=elapsed_time, estimated_time_remaining=eta, is_complete=is_complete, error=error ) if self.progress_callback: self.progress_callback(progress) def stop_parsing(self) -> None: """Stop background parsing""" self.logger.info("Stopping background parsing") self.stop_event.set() self.is_parsing = False def get_current_flows(self): """Get current flows (thread-safe)""" with self.flow_lock: return dict(self.analyzer.flows) def get_summary(self): """Get current summary statistics (thread-safe)""" with self.flow_lock: return self.analyzer.get_summary() def cleanup(self): """Cleanup resources""" self.logger.info("Starting cleanup...") self.stop_parsing() try: # Clear the queue to unblock waiting workers while not self.packet_queue.empty(): try: self.packet_queue.get_nowait() except queue.Empty: break # Send stop signals to all workers for _ in range(self.num_threads): try: self.packet_queue.put(None, timeout=0.1) except queue.Full: pass # Wait briefly for threads to see stop signal time.sleep(0.1) # Force shutdown with no wait - this kills threads immediately try: self.executor.shutdown(wait=False) except Exception: pass # Join daemon threads if they exist if hasattr(self, 'reader_thread') and self.reader_thread.is_alive(): # Can't join daemon threads, they will be killed when main thread exits pass if hasattr(self, 'monitor_thread') and self.monitor_thread.is_alive(): # Can't join daemon threads, they will be killed when main thread exits pass self.logger.info("Cleanup complete") except Exception as e: self.logger.error(f"Error during cleanup: {e}")