progress?

2025-07-28 18:28:26 -04:00
parent 2ab3f1fe9e
commit 8d883f25c3
16 changed files with 2004 additions and 72 deletions
--- a/analyzer/analysis/background_analyzer.py
+++ b/analyzer/analysis/background_analyzer.py
@@ -0,0 +1,364 @@
+"""
+Background PCAP analyzer with thread pool support for progressive loading
+"""
+
+import threading
+import queue
+from concurrent.futures import ThreadPoolExecutor, as_completed
+from typing import Callable, Optional, List
+import time
+from dataclasses import dataclass
+import logging
+
+try:
+    from scapy.all import rdpcap, PcapReader, Packet
+except ImportError:
+    print("Error: scapy library required. Install with: pip install scapy")
+    import sys
+    sys.exit(1)
+
+from .core import EthernetAnalyzer
+
+
+@dataclass
+class ParsingProgress:
+    """Progress information for PCAP parsing"""
+    total_packets: int
+    processed_packets: int
+    percent_complete: float
+    packets_per_second: float
+    elapsed_time: float
+    estimated_time_remaining: float
+    is_complete: bool = False
+    error: Optional[str] = None
+
+
+class BackgroundAnalyzer:
+    """Analyzer that processes PCAP files in background threads"""
+    
+    def __init__(self, analyzer: EthernetAnalyzer, 
+                 num_threads: int = 4,
+                 batch_size: int = 1000,
+                 progress_callback: Optional[Callable[[ParsingProgress], None]] = None,
+                 flow_update_callback: Optional[Callable[[], None]] = None):
+        """
+        Initialize background analyzer
+        
+        Args:
+            analyzer: Core analyzer instance
+            num_threads: Number of worker threads
+            batch_size: Packets to process per batch
+            progress_callback: Callback for progress updates
+            flow_update_callback: Callback for flow data updates
+        """
+        self.analyzer = analyzer
+        self.num_threads = num_threads
+        self.batch_size = batch_size
+        self.progress_callback = progress_callback
+        self.flow_update_callback = flow_update_callback
+        
+        # Threading components
+        self.executor = ThreadPoolExecutor(max_workers=num_threads)
+        self.packet_queue = queue.Queue(maxsize=num_threads * 2)
+        self.stop_event = threading.Event()
+        self.parse_lock = threading.Lock()
+        
+        # Progress tracking
+        self.total_packets = 0
+        self.processed_packets = 0
+        self.start_time = None
+        self.is_parsing = False
+        
+        # Flow update synchronization
+        self.flow_lock = threading.RLock()
+        
+        # Flow update batching
+        self.packets_since_update = 0
+        self.update_batch_size = 50  # Update UI every 50 packets (more frequent)
+        self.update_lock = threading.Lock()
+        
+        logging.basicConfig(level=logging.INFO)
+        self.logger = logging.getLogger(__name__)
+    
+    def start_parsing(self, pcap_file: str) -> None:
+        """Start parsing PCAP file in background"""
+        if self.is_parsing:
+            self.logger.warning("Already parsing a file")
+            return
+        
+        self.is_parsing = True
+        self.stop_event.clear()
+        self.start_time = time.time()
+        self.processed_packets = 0
+        
+        # Start reader thread
+        reader_thread = threading.Thread(
+            target=self._read_pcap_file,
+            args=(pcap_file,),
+            daemon=True
+        )
+        reader_thread.start()
+        self.reader_thread = reader_thread
+        
+        # Start worker threads
+        futures = []
+        for _ in range(self.num_threads):
+            future = self.executor.submit(self._process_packet_batches)
+            futures.append(future)
+        
+        # Monitor progress in separate thread
+        monitor_thread = threading.Thread(
+            target=self._monitor_progress,
+            args=(futures,),
+            daemon=True
+        )
+        monitor_thread.start()
+        self.monitor_thread = monitor_thread
+    
+    def _read_pcap_file(self, pcap_file: str) -> None:
+        """Read PCAP file and queue packets for processing"""
+        try:
+            self.logger.info(f"Starting to read {pcap_file}")
+            
+            # First, get total packet count for progress tracking
+            with PcapReader(pcap_file) as reader:
+                # Quick pass to count packets
+                count = 0
+                for _ in reader:
+                    count += 1
+                self.total_packets = count
+            
+            self.logger.info(f"Found {self.total_packets} packets to process")
+            
+            # Now read and queue packets
+            with PcapReader(pcap_file) as reader:
+                batch = []
+                batch_num = 0
+                
+                for i, packet in enumerate(reader):
+                    if self.stop_event.is_set():
+                        break
+                    
+                    batch.append((i + 1, packet))
+                    
+                    if len(batch) >= self.batch_size:
+                        self.packet_queue.put(batch)
+                        batch = []
+                        batch_num += 1
+                
+                # Queue remaining packets
+                if batch:
+                    self.packet_queue.put(batch)
+            
+        except Exception as e:
+            self.logger.error(f"Error reading PCAP: {e}")
+            self._report_progress(error=str(e))
+        finally:
+            # Signal end of packets
+            for _ in range(self.num_threads):
+                self.packet_queue.put(None)
+    
+    def _process_packet_batches(self) -> None:
+        """Worker thread to process packet batches"""
+        while not self.stop_event.is_set():
+            try:
+                batch = self.packet_queue.get(timeout=0.5)  # Shorter timeout for faster exit
+                if batch is None:  # End signal
+                    break
+                
+                # Process batch of packets
+                for frame_num, packet in batch:
+                    if self.stop_event.is_set():
+                        break
+                    
+                    try:
+                        # Thread-safe packet processing
+                        with self.flow_lock:
+                            self.analyzer.flow_manager.process_packet(packet, frame_num)
+                        
+                        # Update progress
+                        with self.parse_lock:
+                            self.processed_packets += 1
+                        
+                        # Check if we should trigger a flow update
+                        should_update = False
+                        with self.update_lock:
+                            self.packets_since_update += 1
+                            if self.packets_since_update >= self.update_batch_size:
+                                self.packets_since_update = 0
+                                should_update = True
+                        
+                        # Trigger flow update callback if needed
+                        if should_update and self.flow_update_callback:
+                            try:
+                                self.flow_update_callback()
+                            except Exception as e:
+                                self.logger.error(f"Error in flow update callback: {e}")
+                        
+                    except Exception as e:
+                        self.logger.error(f"Error processing packet {frame_num}: {e}")
+                        continue
+                
+            except queue.Empty:
+                # Check stop event more frequently
+                if self.stop_event.is_set():
+                    break
+                continue
+            except KeyboardInterrupt:
+                self.logger.info("Packet processing interrupted")
+                break
+            except Exception as e:
+                self.logger.error(f"Error processing batch: {e}")
+                if self.stop_event.is_set():
+                    break
+    
+    def _monitor_progress(self, futures: List) -> None:
+        """Monitor parsing progress and send updates"""
+        last_update_time = time.time()
+        last_packet_count = 0
+        
+        while self.is_parsing and not self.stop_event.is_set():
+            try:
+                current_time = time.time()
+                
+                # Update every 0.5 seconds
+                if current_time - last_update_time >= 0.5:
+                    with self.parse_lock:
+                        current_packets = self.processed_packets
+                    
+                    # Calculate metrics
+                    elapsed = current_time - self.start_time
+                    packets_processed = current_packets - last_packet_count
+                    time_delta = current_time - last_update_time
+                    packets_per_second = packets_processed / time_delta if time_delta > 0 else 0
+                    
+                    # Update for next iteration
+                    last_update_time = current_time
+                    last_packet_count = current_packets
+                    
+                    # Report progress
+                    self._report_progress(
+                        packets_per_second=packets_per_second,
+                        elapsed_time=elapsed
+                    )
+                    
+                    # Check if all workers are done
+                    if all(f.done() for f in futures):
+                        break
+                
+                time.sleep(0.1)
+            except KeyboardInterrupt:
+                self.logger.info("Monitor thread interrupted")
+                break
+            except Exception as e:
+                self.logger.error(f"Error in monitor thread: {e}")
+                break
+        
+        # Final update
+        self.is_parsing = False
+        self._report_progress(is_complete=True)
+        
+        # Final flow update
+        if self.flow_update_callback:
+            try:
+                self.flow_update_callback()
+            except Exception as e:
+                self.logger.error(f"Error in final flow update callback: {e}")
+        
+        # Calculate final statistics
+        with self.flow_lock:
+            self.analyzer.statistics_engine.calculate_all_statistics()
+    
+    def _report_progress(self, packets_per_second: float = 0, 
+                        elapsed_time: float = 0,
+                        is_complete: bool = False,
+                        error: Optional[str] = None) -> None:
+        """Report parsing progress"""
+        with self.parse_lock:
+            processed = self.processed_packets
+            total = self.total_packets
+        
+        if total > 0:
+            percent = (processed / total) * 100
+            
+            # Estimate time remaining
+            if packets_per_second > 0 and processed < total:
+                remaining_packets = total - processed
+                eta = remaining_packets / packets_per_second
+            else:
+                eta = 0
+        else:
+            percent = 0
+            eta = 0
+        
+        progress = ParsingProgress(
+            total_packets=total,
+            processed_packets=processed,
+            percent_complete=percent,
+            packets_per_second=packets_per_second,
+            elapsed_time=elapsed_time,
+            estimated_time_remaining=eta,
+            is_complete=is_complete,
+            error=error
+        )
+        
+        if self.progress_callback:
+            self.progress_callback(progress)
+    
+    def stop_parsing(self) -> None:
+        """Stop background parsing"""
+        self.logger.info("Stopping background parsing")
+        self.stop_event.set()
+        self.is_parsing = False
+    
+    def get_current_flows(self):
+        """Get current flows (thread-safe)"""
+        with self.flow_lock:
+            return dict(self.analyzer.flows)
+    
+    def get_summary(self):
+        """Get current summary statistics (thread-safe)"""
+        with self.flow_lock:
+            return self.analyzer.get_summary()
+    
+    def cleanup(self):
+        """Cleanup resources"""
+        self.logger.info("Starting cleanup...")
+        self.stop_parsing()
+        
+        try:
+            # Clear the queue to unblock waiting workers
+            while not self.packet_queue.empty():
+                try:
+                    self.packet_queue.get_nowait()
+                except queue.Empty:
+                    break
+            
+            # Send stop signals to all workers
+            for _ in range(self.num_threads):
+                try:
+                    self.packet_queue.put(None, timeout=0.1)
+                except queue.Full:
+                    pass
+            
+            # Wait briefly for threads to see stop signal
+            time.sleep(0.1)
+            
+            # Force shutdown with no wait - this kills threads immediately
+            try:
+                self.executor.shutdown(wait=False)
+            except Exception:
+                pass
+            
+            # Join daemon threads if they exist
+            if hasattr(self, 'reader_thread') and self.reader_thread.is_alive():
+                # Can't join daemon threads, they will be killed when main thread exits
+                pass
+            
+            if hasattr(self, 'monitor_thread') and self.monitor_thread.is_alive():
+                # Can't join daemon threads, they will be killed when main thread exits  
+                pass
+            
+            self.logger.info("Cleanup complete")
+        except Exception as e:
+            self.logger.error(f"Error during cleanup: {e}")
--- a/analyzer/analysis/flow_manager.py
+++ b/analyzer/analysis/flow_manager.py
@@ -86,6 +86,17 @@ class FlowManager:
        flow.total_bytes += packet_size
        flow.protocols.update(protocols)
        
+        # Update timeline statistics
+        if flow.frame_count == 1:
+            # First packet in flow
+            flow.first_seen = timestamp
+            flow.last_seen = timestamp
+            flow.duration = 0.0
+        else:
+            # Update last seen and duration
+            flow.last_seen = timestamp
+            flow.duration = flow.last_seen - flow.first_seen
+        
        # Enhanced protocol detection
        dissection_results = self._dissect_packet(packet, frame_num)
        enhanced_protocols = self._extract_enhanced_protocols(dissection_results)
--- a/analyzer/analysis/statistics.py
+++ b/analyzer/analysis/statistics.py
@@ -29,6 +29,12 @@ class StatisticsEngine:
    
    def _calculate_single_flow_statistics(self, flow: FlowStats) -> None:
        """Calculate statistics for a single flow"""
+        # Ensure timeline statistics are calculated
+        if len(flow.timestamps) >= 2:
+            flow.duration = flow.timestamps[-1] - flow.timestamps[0]
+            flow.first_seen = flow.timestamps[0]
+            flow.last_seen = flow.timestamps[-1]
+        
        if len(flow.inter_arrival_times) < 2:
            return
        
@@ -36,9 +42,19 @@ class StatisticsEngine:
        flow.avg_inter_arrival = statistics.mean(flow.inter_arrival_times)
        flow.std_inter_arrival = statistics.stdev(flow.inter_arrival_times)
        
+        # Calculate jitter as coefficient of variation (normalized standard deviation)
+        if flow.avg_inter_arrival > 0:
+            flow.jitter = flow.std_inter_arrival / flow.avg_inter_arrival
+        else:
+            flow.jitter = 0.0
+        
        # Detect outliers (frames with inter-arrival time > threshold * std deviations from mean)
        threshold = flow.avg_inter_arrival + (self.outlier_threshold_sigma * flow.std_inter_arrival)
        
+        # Clear existing outliers to recalculate
+        flow.outlier_frames.clear()
+        flow.outlier_details.clear()
+        
        for i, inter_time in enumerate(flow.inter_arrival_times):
            if inter_time > threshold:
                # Frame number is i+2 because inter_arrival_times[i] is between frame i+1 and i+2