364 lines
13 KiB
Python
364 lines
13 KiB
Python
"""
|
|
Background PCAP analyzer with thread pool support for progressive loading
|
|
"""
|
|
|
|
import threading
|
|
import queue
|
|
from concurrent.futures import ThreadPoolExecutor, as_completed
|
|
from typing import Callable, Optional, List
|
|
import time
|
|
from dataclasses import dataclass
|
|
import logging
|
|
|
|
try:
|
|
from scapy.all import rdpcap, PcapReader, Packet
|
|
except ImportError:
|
|
print("Error: scapy library required. Install with: pip install scapy")
|
|
import sys
|
|
sys.exit(1)
|
|
|
|
from .core import EthernetAnalyzer
|
|
|
|
|
|
@dataclass
|
|
class ParsingProgress:
|
|
"""Progress information for PCAP parsing"""
|
|
total_packets: int
|
|
processed_packets: int
|
|
percent_complete: float
|
|
packets_per_second: float
|
|
elapsed_time: float
|
|
estimated_time_remaining: float
|
|
is_complete: bool = False
|
|
error: Optional[str] = None
|
|
|
|
|
|
class BackgroundAnalyzer:
|
|
"""Analyzer that processes PCAP files in background threads"""
|
|
|
|
def __init__(self, analyzer: EthernetAnalyzer,
|
|
num_threads: int = 4,
|
|
batch_size: int = 1000,
|
|
progress_callback: Optional[Callable[[ParsingProgress], None]] = None,
|
|
flow_update_callback: Optional[Callable[[], None]] = None):
|
|
"""
|
|
Initialize background analyzer
|
|
|
|
Args:
|
|
analyzer: Core analyzer instance
|
|
num_threads: Number of worker threads
|
|
batch_size: Packets to process per batch
|
|
progress_callback: Callback for progress updates
|
|
flow_update_callback: Callback for flow data updates
|
|
"""
|
|
self.analyzer = analyzer
|
|
self.num_threads = num_threads
|
|
self.batch_size = batch_size
|
|
self.progress_callback = progress_callback
|
|
self.flow_update_callback = flow_update_callback
|
|
|
|
# Threading components
|
|
self.executor = ThreadPoolExecutor(max_workers=num_threads)
|
|
self.packet_queue = queue.Queue(maxsize=num_threads * 2)
|
|
self.stop_event = threading.Event()
|
|
self.parse_lock = threading.Lock()
|
|
|
|
# Progress tracking
|
|
self.total_packets = 0
|
|
self.processed_packets = 0
|
|
self.start_time = None
|
|
self.is_parsing = False
|
|
|
|
# Flow update synchronization
|
|
self.flow_lock = threading.RLock()
|
|
|
|
# Flow update batching
|
|
self.packets_since_update = 0
|
|
self.update_batch_size = 50 # Update UI every 50 packets (more frequent)
|
|
self.update_lock = threading.Lock()
|
|
|
|
logging.basicConfig(level=logging.INFO)
|
|
self.logger = logging.getLogger(__name__)
|
|
|
|
def start_parsing(self, pcap_file: str) -> None:
|
|
"""Start parsing PCAP file in background"""
|
|
if self.is_parsing:
|
|
self.logger.warning("Already parsing a file")
|
|
return
|
|
|
|
self.is_parsing = True
|
|
self.stop_event.clear()
|
|
self.start_time = time.time()
|
|
self.processed_packets = 0
|
|
|
|
# Start reader thread
|
|
reader_thread = threading.Thread(
|
|
target=self._read_pcap_file,
|
|
args=(pcap_file,),
|
|
daemon=True
|
|
)
|
|
reader_thread.start()
|
|
self.reader_thread = reader_thread
|
|
|
|
# Start worker threads
|
|
futures = []
|
|
for _ in range(self.num_threads):
|
|
future = self.executor.submit(self._process_packet_batches)
|
|
futures.append(future)
|
|
|
|
# Monitor progress in separate thread
|
|
monitor_thread = threading.Thread(
|
|
target=self._monitor_progress,
|
|
args=(futures,),
|
|
daemon=True
|
|
)
|
|
monitor_thread.start()
|
|
self.monitor_thread = monitor_thread
|
|
|
|
def _read_pcap_file(self, pcap_file: str) -> None:
|
|
"""Read PCAP file and queue packets for processing"""
|
|
try:
|
|
self.logger.info(f"Starting to read {pcap_file}")
|
|
|
|
# First, get total packet count for progress tracking
|
|
with PcapReader(pcap_file) as reader:
|
|
# Quick pass to count packets
|
|
count = 0
|
|
for _ in reader:
|
|
count += 1
|
|
self.total_packets = count
|
|
|
|
self.logger.info(f"Found {self.total_packets} packets to process")
|
|
|
|
# Now read and queue packets
|
|
with PcapReader(pcap_file) as reader:
|
|
batch = []
|
|
batch_num = 0
|
|
|
|
for i, packet in enumerate(reader):
|
|
if self.stop_event.is_set():
|
|
break
|
|
|
|
batch.append((i + 1, packet))
|
|
|
|
if len(batch) >= self.batch_size:
|
|
self.packet_queue.put(batch)
|
|
batch = []
|
|
batch_num += 1
|
|
|
|
# Queue remaining packets
|
|
if batch:
|
|
self.packet_queue.put(batch)
|
|
|
|
except Exception as e:
|
|
self.logger.error(f"Error reading PCAP: {e}")
|
|
self._report_progress(error=str(e))
|
|
finally:
|
|
# Signal end of packets
|
|
for _ in range(self.num_threads):
|
|
self.packet_queue.put(None)
|
|
|
|
def _process_packet_batches(self) -> None:
|
|
"""Worker thread to process packet batches"""
|
|
while not self.stop_event.is_set():
|
|
try:
|
|
batch = self.packet_queue.get(timeout=0.5) # Shorter timeout for faster exit
|
|
if batch is None: # End signal
|
|
break
|
|
|
|
# Process batch of packets
|
|
for frame_num, packet in batch:
|
|
if self.stop_event.is_set():
|
|
break
|
|
|
|
try:
|
|
# Thread-safe packet processing
|
|
with self.flow_lock:
|
|
self.analyzer.flow_manager.process_packet(packet, frame_num)
|
|
|
|
# Update progress
|
|
with self.parse_lock:
|
|
self.processed_packets += 1
|
|
|
|
# Check if we should trigger a flow update
|
|
should_update = False
|
|
with self.update_lock:
|
|
self.packets_since_update += 1
|
|
if self.packets_since_update >= self.update_batch_size:
|
|
self.packets_since_update = 0
|
|
should_update = True
|
|
|
|
# Trigger flow update callback if needed
|
|
if should_update and self.flow_update_callback:
|
|
try:
|
|
self.flow_update_callback()
|
|
except Exception as e:
|
|
self.logger.error(f"Error in flow update callback: {e}")
|
|
|
|
except Exception as e:
|
|
self.logger.error(f"Error processing packet {frame_num}: {e}")
|
|
continue
|
|
|
|
except queue.Empty:
|
|
# Check stop event more frequently
|
|
if self.stop_event.is_set():
|
|
break
|
|
continue
|
|
except KeyboardInterrupt:
|
|
self.logger.info("Packet processing interrupted")
|
|
break
|
|
except Exception as e:
|
|
self.logger.error(f"Error processing batch: {e}")
|
|
if self.stop_event.is_set():
|
|
break
|
|
|
|
def _monitor_progress(self, futures: List) -> None:
|
|
"""Monitor parsing progress and send updates"""
|
|
last_update_time = time.time()
|
|
last_packet_count = 0
|
|
|
|
while self.is_parsing and not self.stop_event.is_set():
|
|
try:
|
|
current_time = time.time()
|
|
|
|
# Update every 0.5 seconds
|
|
if current_time - last_update_time >= 0.5:
|
|
with self.parse_lock:
|
|
current_packets = self.processed_packets
|
|
|
|
# Calculate metrics
|
|
elapsed = current_time - self.start_time
|
|
packets_processed = current_packets - last_packet_count
|
|
time_delta = current_time - last_update_time
|
|
packets_per_second = packets_processed / time_delta if time_delta > 0 else 0
|
|
|
|
# Update for next iteration
|
|
last_update_time = current_time
|
|
last_packet_count = current_packets
|
|
|
|
# Report progress
|
|
self._report_progress(
|
|
packets_per_second=packets_per_second,
|
|
elapsed_time=elapsed
|
|
)
|
|
|
|
# Check if all workers are done
|
|
if all(f.done() for f in futures):
|
|
break
|
|
|
|
time.sleep(0.1)
|
|
except KeyboardInterrupt:
|
|
self.logger.info("Monitor thread interrupted")
|
|
break
|
|
except Exception as e:
|
|
self.logger.error(f"Error in monitor thread: {e}")
|
|
break
|
|
|
|
# Final update
|
|
self.is_parsing = False
|
|
self._report_progress(is_complete=True)
|
|
|
|
# Final flow update
|
|
if self.flow_update_callback:
|
|
try:
|
|
self.flow_update_callback()
|
|
except Exception as e:
|
|
self.logger.error(f"Error in final flow update callback: {e}")
|
|
|
|
# Calculate final statistics
|
|
with self.flow_lock:
|
|
self.analyzer.statistics_engine.calculate_all_statistics()
|
|
|
|
def _report_progress(self, packets_per_second: float = 0,
|
|
elapsed_time: float = 0,
|
|
is_complete: bool = False,
|
|
error: Optional[str] = None) -> None:
|
|
"""Report parsing progress"""
|
|
with self.parse_lock:
|
|
processed = self.processed_packets
|
|
total = self.total_packets
|
|
|
|
if total > 0:
|
|
percent = (processed / total) * 100
|
|
|
|
# Estimate time remaining
|
|
if packets_per_second > 0 and processed < total:
|
|
remaining_packets = total - processed
|
|
eta = remaining_packets / packets_per_second
|
|
else:
|
|
eta = 0
|
|
else:
|
|
percent = 0
|
|
eta = 0
|
|
|
|
progress = ParsingProgress(
|
|
total_packets=total,
|
|
processed_packets=processed,
|
|
percent_complete=percent,
|
|
packets_per_second=packets_per_second,
|
|
elapsed_time=elapsed_time,
|
|
estimated_time_remaining=eta,
|
|
is_complete=is_complete,
|
|
error=error
|
|
)
|
|
|
|
if self.progress_callback:
|
|
self.progress_callback(progress)
|
|
|
|
def stop_parsing(self) -> None:
|
|
"""Stop background parsing"""
|
|
self.logger.info("Stopping background parsing")
|
|
self.stop_event.set()
|
|
self.is_parsing = False
|
|
|
|
def get_current_flows(self):
|
|
"""Get current flows (thread-safe)"""
|
|
with self.flow_lock:
|
|
return dict(self.analyzer.flows)
|
|
|
|
def get_summary(self):
|
|
"""Get current summary statistics (thread-safe)"""
|
|
with self.flow_lock:
|
|
return self.analyzer.get_summary()
|
|
|
|
def cleanup(self):
|
|
"""Cleanup resources"""
|
|
self.logger.info("Starting cleanup...")
|
|
self.stop_parsing()
|
|
|
|
try:
|
|
# Clear the queue to unblock waiting workers
|
|
while not self.packet_queue.empty():
|
|
try:
|
|
self.packet_queue.get_nowait()
|
|
except queue.Empty:
|
|
break
|
|
|
|
# Send stop signals to all workers
|
|
for _ in range(self.num_threads):
|
|
try:
|
|
self.packet_queue.put(None, timeout=0.1)
|
|
except queue.Full:
|
|
pass
|
|
|
|
# Wait briefly for threads to see stop signal
|
|
time.sleep(0.1)
|
|
|
|
# Force shutdown with no wait - this kills threads immediately
|
|
try:
|
|
self.executor.shutdown(wait=False)
|
|
except Exception:
|
|
pass
|
|
|
|
# Join daemon threads if they exist
|
|
if hasattr(self, 'reader_thread') and self.reader_thread.is_alive():
|
|
# Can't join daemon threads, they will be killed when main thread exits
|
|
pass
|
|
|
|
if hasattr(self, 'monitor_thread') and self.monitor_thread.is_alive():
|
|
# Can't join daemon threads, they will be killed when main thread exits
|
|
pass
|
|
|
|
self.logger.info("Cleanup complete")
|
|
except Exception as e:
|
|
self.logger.error(f"Error during cleanup: {e}") |