working to analyze timing issues

This commit is contained in:
2025-07-25 15:52:16 -04:00
parent 70c2a1b9d3
commit 4c6e23bff8
31 changed files with 3197 additions and 0 deletions

View File

@@ -0,0 +1,9 @@
"""
Analysis components for the Ethernet Traffic Analyzer
"""
from .core import EthernetAnalyzer
from .statistics import StatisticsEngine
from .flow_manager import FlowManager
__all__ = ['EthernetAnalyzer', 'StatisticsEngine', 'FlowManager']

115
analyzer/analysis/core.py Normal file
View File

@@ -0,0 +1,115 @@
"""
Core analysis engine for the Ethernet Traffic Analyzer
"""
import sys
import threading
from typing import List, Dict
try:
from scapy.all import rdpcap, sniff, Packet
except ImportError:
print("Error: scapy library required. Install with: pip install scapy")
sys.exit(1)
from .flow_manager import FlowManager
from .statistics import StatisticsEngine
from ..models import AnalysisResult
class EthernetAnalyzer:
"""Main analyzer class for ethernet traffic analysis"""
def __init__(self, enable_realtime: bool = False, outlier_threshold_sigma: float = 3.0):
self.statistics_engine = StatisticsEngine(outlier_threshold_sigma=outlier_threshold_sigma, enable_realtime=enable_realtime)
self.flow_manager = FlowManager(self.statistics_engine)
self.all_packets: List[Packet] = []
self.is_live = False
self.stop_capture = False
# Expose flows for backward compatibility
self.flows = self.flow_manager.flows
# Create a simple dissector for backward compatibility
self.dissector = SimpleFrameDissector(self.flow_manager)
def analyze_pcap(self, pcap_file: str) -> None:
"""Analyze a pcap file"""
print(f"Loading pcap file: {pcap_file}")
try:
packets = rdpcap(pcap_file)
self.all_packets = packets
print(f"Loaded {len(packets)} packets")
self._process_packets(packets)
except Exception as e:
print(f"Error loading pcap file: {e}")
sys.exit(1)
def start_live_capture(self, interface: str = None, filter_str: str = None) -> None:
"""Start live packet capture"""
self.is_live = True
print(f"Starting live capture on interface: {interface or 'default'}")
def packet_handler(packet):
if self.stop_capture:
return
self.all_packets.append(packet)
self._process_single_packet(packet, len(self.all_packets))
try:
sniff(iface=interface, filter=filter_str, prn=packet_handler,
stop_filter=lambda x: self.stop_capture)
except Exception as e:
print(f"Error during live capture: {e}")
def _process_packets(self, packets: List[Packet]) -> None:
"""Process a list of packets"""
for i, packet in enumerate(packets, 1):
self._process_single_packet(packet, i)
def _process_single_packet(self, packet: Packet, frame_num: int) -> None:
"""Process a single packet"""
self.flow_manager.process_packet(packet, frame_num)
def calculate_statistics(self) -> None:
"""Calculate timing statistics and detect outliers"""
self.statistics_engine.calculate_flow_statistics(self.flows)
def get_summary(self) -> Dict:
"""Get analysis summary"""
flow_summary = self.flow_manager.get_flows_summary()
return {
'total_packets': len(self.all_packets),
'unique_flows': flow_summary['total_flows'],
'unique_ips': flow_summary['unique_ips'],
'flows': flow_summary['flows']
}
def get_analysis_result(self) -> AnalysisResult:
"""Get structured analysis result"""
summary = self.get_summary()
return AnalysisResult(
total_packets=summary['total_packets'],
unique_flows=summary['unique_flows'],
unique_ips=summary['unique_ips'],
flows=summary['flows']
)
def get_high_jitter_flows(self, threshold: float = 0.1):
"""Get flows with high timing jitter"""
return self.statistics_engine.identify_high_jitter_flows(self.flows, threshold)
def get_summary_statistics(self) -> Dict:
"""Get summary statistics across all flows"""
return self.statistics_engine.get_flow_summary_statistics(self.flows)
class SimpleFrameDissector:
"""Simple frame dissector for backward compatibility"""
def __init__(self, flow_manager: FlowManager):
self.flow_manager = flow_manager
def dissect_frame(self, packet: Packet, frame_num: int) -> Dict:
"""Dissect a frame using the flow manager's dissection system"""
return self.flow_manager._dissect_packet(packet, frame_num)

View File

@@ -0,0 +1,328 @@
"""
Flow tracking and management
"""
from typing import Dict, Set, Tuple
from ..models import FlowStats, FrameTypeStats
from ..protocols import Chapter10Dissector, PTPDissector, IENADissector, StandardProtocolDissectors
try:
from scapy.all import Packet, IP, UDP, TCP
except ImportError:
print("Error: scapy library required. Install with: pip install scapy")
import sys
sys.exit(1)
class FlowManager:
"""Manages network flows and frame type classification"""
def __init__(self, statistics_engine=None):
self.flows: Dict[Tuple[str, str], FlowStats] = {}
self.statistics_engine = statistics_engine
# Initialize dissectors
self.specialized_dissectors = {
'chapter10': Chapter10Dissector(),
'ptp': PTPDissector(),
'iena': IENADissector()
}
self.standard_dissectors = StandardProtocolDissectors()
def process_packet(self, packet: Packet, frame_num: int) -> None:
"""Process a single packet and update flow statistics"""
if not packet.haslayer(IP):
return
ip_layer = packet[IP]
src_ip = ip_layer.src
dst_ip = ip_layer.dst
timestamp = float(packet.time)
packet_size = len(packet)
# Determine basic protocol
protocols = self._detect_basic_protocols(packet)
# Create flow key
flow_key = (src_ip, dst_ip)
# Initialize flow stats if new
if flow_key not in self.flows:
self.flows[flow_key] = FlowStats(
src_ip=src_ip,
dst_ip=dst_ip,
frame_count=0,
timestamps=[],
frame_numbers=[],
inter_arrival_times=[],
avg_inter_arrival=0.0,
std_inter_arrival=0.0,
outlier_frames=[],
outlier_details=[],
total_bytes=0,
protocols=set(),
detected_protocol_types=set(),
frame_types={}
)
# Update flow stats
flow = self.flows[flow_key]
flow.frame_count += 1
flow.timestamps.append(timestamp)
flow.frame_numbers.append(frame_num)
flow.total_bytes += packet_size
flow.protocols.update(protocols)
# Enhanced protocol detection
dissection_results = self._dissect_packet(packet, frame_num)
enhanced_protocols = self._extract_enhanced_protocols(dissection_results)
flow.detected_protocol_types.update(enhanced_protocols)
# Add fallback protocol detection
fallback_protocols = self._detect_fallback_protocols(packet, dissection_results)
flow.detected_protocol_types.update(fallback_protocols)
# Classify and track frame types
frame_type = self._classify_frame_type(packet, dissection_results)
self._update_frame_type_stats(flow, frame_type, frame_num, timestamp, packet_size)
# Calculate inter-arrival time
if len(flow.timestamps) > 1:
inter_arrival = timestamp - flow.timestamps[-2]
flow.inter_arrival_times.append(inter_arrival)
# Update real-time statistics if enabled
if self.statistics_engine and self.statistics_engine.enable_realtime:
self.statistics_engine.update_realtime_statistics(flow_key, flow)
def _detect_basic_protocols(self, packet: Packet) -> Set[str]:
"""Detect basic transport protocols"""
protocols = set()
if packet.haslayer(UDP):
protocols.add('UDP')
if packet.haslayer(TCP):
protocols.add('TCP')
if not protocols:
protocols.add('OTHER')
return protocols
def _dissect_packet(self, packet: Packet, frame_num: int) -> Dict:
"""Comprehensive packet dissection"""
result = {
'frame_number': frame_num,
'timestamp': float(packet.time),
'size': len(packet),
'layers': {},
'protocols': []
}
# Apply standard dissectors
standard_layers = self.standard_dissectors.dissect_all(packet)
result['layers'].update(standard_layers)
# Apply specialized protocol dissectors
for name, dissector in self.specialized_dissectors.items():
try:
if dissector.can_dissect(packet):
dissection = dissector.dissect(packet)
if dissection:
result['layers'][name] = dissection.fields
result['protocols'].append(dissection.protocol.name)
if dissection.errors:
result['layers'][name]['errors'] = dissection.errors
if dissection.payload:
result['layers'][name]['payload_size'] = len(dissection.payload)
except Exception as e:
result['layers'][name] = {'error': str(e)}
return result
def _extract_enhanced_protocols(self, dissection: Dict) -> Set[str]:
"""Extract enhanced protocol types from dissection"""
protocols = set()
if dissection.get('protocols'):
protocols.update(dissection['protocols'])
return protocols
def _detect_fallback_protocols(self, packet: Packet, dissection: Dict) -> Set[str]:
"""Detect protocol types with fallback to generic descriptions"""
protocol_types = set()
if packet.haslayer(UDP):
udp_layer = packet[UDP]
sport, dport = udp_layer.sport, udp_layer.dport
# Check for common protocols by port
port_protocols = {
(67, 68): 'DHCP',
(53,): 'DNS',
(123,): 'NTP',
(161, 162): 'SNMP',
(69,): 'TFTP',
(319, 320): 'PTP',
(50000, 50001): 'IENA'
}
for ports, protocol in port_protocols.items():
if sport in ports or dport in ports:
protocol_types.add(protocol)
break
else:
protocol_types.add('UDP')
if packet.haslayer(TCP):
tcp_layer = packet[TCP]
sport, dport = tcp_layer.sport, tcp_layer.dport
tcp_protocols = {
(80,): 'HTTP',
(443,): 'HTTPS',
(22,): 'SSH',
(23,): 'Telnet',
(21,): 'FTP',
(25,): 'SMTP',
(110,): 'POP3',
(143,): 'IMAP'
}
for ports, protocol in tcp_protocols.items():
if sport in ports or dport in ports:
protocol_types.add(protocol)
break
else:
protocol_types.add('TCP')
# Check for IGMP and ICMP
if packet.haslayer(IP):
ip_layer = packet[IP]
if ip_layer.proto == 2: # IGMP protocol number
protocol_types.add('IGMP')
elif ip_layer.proto == 1: # ICMP protocol number
protocol_types.add('ICMP')
# Check for multicast addresses
if packet.haslayer(IP):
ip_layer = packet[IP]
dst_ip = ip_layer.dst
if dst_ip.startswith('224.') or dst_ip.startswith('239.'):
protocol_types.add('Multicast')
return protocol_types
def _classify_frame_type(self, packet: Packet, dissection: Dict) -> str:
"""Classify the frame type based on dissection results"""
layers = dissection.get('layers', {})
# Check for Chapter 10 first
if 'chapter10' in layers and not layers['chapter10'].get('error'):
ch10_info = layers['chapter10']
# Check if it's a TMATS frame
if self._is_tmats_frame(packet, ch10_info):
return 'TMATS'
else:
return 'CH10-Data'
# Check for other specialized protocols
if 'ptp' in layers and not layers['ptp'].get('error'):
ptp_info = layers['ptp']
msg_type = ptp_info.get('message_type_name', 'Unknown')
return f'PTP-{msg_type}'
if 'iena' in layers and not layers['iena'].get('error'):
iena_info = layers['iena']
packet_type = iena_info.get('packet_type_name', 'Unknown')
return f'IENA-{packet_type}'
# Fallback to basic protocol classification
if packet.haslayer(UDP):
udp_layer = packet[UDP]
sport, dport = udp_layer.sport, udp_layer.dport
if sport == 53 or dport == 53:
return 'DNS'
elif sport in [67, 68] or dport in [67, 68]:
return 'DHCP'
elif sport == 123 or dport == 123:
return 'NTP'
else:
return 'UDP'
if packet.haslayer(TCP):
tcp_layer = packet[TCP]
sport, dport = tcp_layer.sport, tcp_layer.dport
if sport == 80 or dport == 80:
return 'HTTP'
elif sport == 443 or dport == 443:
return 'HTTPS'
else:
return 'TCP'
# Check for other protocols
if packet.haslayer(IP):
ip_layer = packet[IP]
if ip_layer.proto == 2:
return 'IGMP'
elif ip_layer.proto == 1:
return 'ICMP'
return 'OTHER'
def _is_tmats_frame(self, packet: Packet, ch10_info: Dict) -> bool:
"""Check if a Chapter 10 frame contains TMATS data"""
data_type = ch10_info.get('data_type', 0)
# Data type 0x01 is typically TMATS
if data_type == 0x01:
return True
# Also check for TMATS text patterns in the payload
if packet.haslayer('Raw'):
from scapy.all import Raw
raw_data = bytes(packet[Raw])
# Look for TMATS-like patterns (ASCII text with TMATS keywords)
try:
# Check if we can find TMATS signature patterns
text_sample = raw_data[50:200] # Sample middle section to avoid headers
if b'\\' in text_sample and (b':' in text_sample or b';' in text_sample):
# Look for TMATS-style key-value pairs
if any(keyword in text_sample.upper() for keyword in [b'TMATS', b'R-', b'G-', b'P-', b'T-']):
return True
except:
pass
return False
def _update_frame_type_stats(self, flow: FlowStats, frame_type: str,
frame_num: int, timestamp: float, packet_size: int):
"""Update statistics for a specific frame type"""
if frame_type not in flow.frame_types:
flow.frame_types[frame_type] = FrameTypeStats(frame_type=frame_type)
ft_stats = flow.frame_types[frame_type]
ft_stats.count += 1
ft_stats.total_bytes += packet_size
ft_stats.timestamps.append(timestamp)
ft_stats.frame_numbers.append(frame_num)
# Calculate inter-arrival time for this frame type
if len(ft_stats.timestamps) > 1:
inter_arrival = timestamp - ft_stats.timestamps[-2]
ft_stats.inter_arrival_times.append(inter_arrival)
def get_flows_summary(self) -> Dict:
"""Get summary of all flows"""
unique_ips = set()
for flow in self.flows.values():
unique_ips.add(flow.src_ip)
unique_ips.add(flow.dst_ip)
return {
'total_flows': len(self.flows),
'unique_ips': len(unique_ips),
'flows': self.flows
}

View File

@@ -0,0 +1,240 @@
"""
Statistical analysis engine for timing and outlier detection
"""
import statistics
from typing import Dict, List, Tuple
from ..models import FlowStats, FrameTypeStats
class StatisticsEngine:
"""Handles statistical calculations and outlier detection"""
def __init__(self, outlier_threshold_sigma: float = 3.0, enable_realtime: bool = False):
"""
Initialize statistics engine
Args:
outlier_threshold_sigma: Number of standard deviations for outlier detection
enable_realtime: Enable real-time running statistics calculation
"""
self.outlier_threshold_sigma = outlier_threshold_sigma
self.enable_realtime = enable_realtime
self.realtime_stats = {} # Cache for running statistics
def calculate_flow_statistics(self, flows: Dict[tuple, FlowStats]) -> None:
"""Calculate timing statistics and detect outliers for all flows"""
for flow in flows.values():
self._calculate_single_flow_statistics(flow)
def _calculate_single_flow_statistics(self, flow: FlowStats) -> None:
"""Calculate statistics for a single flow"""
if len(flow.inter_arrival_times) < 2:
return
# Calculate average and std deviation for overall flow
flow.avg_inter_arrival = statistics.mean(flow.inter_arrival_times)
flow.std_inter_arrival = statistics.stdev(flow.inter_arrival_times)
# Detect outliers (frames with inter-arrival time > threshold * std deviations from mean)
threshold = flow.avg_inter_arrival + (self.outlier_threshold_sigma * flow.std_inter_arrival)
for i, inter_time in enumerate(flow.inter_arrival_times):
if inter_time > threshold:
# Frame number is i+2 because inter_arrival_times[i] is between frame i+1 and i+2
frame_number = flow.frame_numbers[i + 1]
flow.outlier_frames.append(frame_number)
flow.outlier_details.append((frame_number, inter_time))
# Calculate statistics for each frame type
for frame_type, ft_stats in flow.frame_types.items():
self._calculate_frame_type_statistics(ft_stats)
def _calculate_frame_type_statistics(self, ft_stats: FrameTypeStats) -> None:
"""Calculate statistics for a specific frame type"""
if len(ft_stats.inter_arrival_times) < 2:
return
ft_stats.avg_inter_arrival = statistics.mean(ft_stats.inter_arrival_times)
ft_stats.std_inter_arrival = statistics.stdev(ft_stats.inter_arrival_times)
# Detect outliers for this frame type
ft_threshold = ft_stats.avg_inter_arrival + (self.outlier_threshold_sigma * ft_stats.std_inter_arrival)
for i, inter_time in enumerate(ft_stats.inter_arrival_times):
if inter_time > ft_threshold:
frame_number = ft_stats.frame_numbers[i + 1]
ft_stats.outlier_frames.append(frame_number)
ft_stats.outlier_details.append((frame_number, inter_time))
def get_flow_summary_statistics(self, flows: Dict[tuple, FlowStats]) -> Dict[str, float]:
"""Get summary statistics across all flows"""
all_inter_arrivals = []
total_packets = 0
total_outliers = 0
for flow in flows.values():
all_inter_arrivals.extend(flow.inter_arrival_times)
total_packets += flow.frame_count
total_outliers += len(flow.outlier_frames)
if not all_inter_arrivals:
return {}
return {
'overall_avg_inter_arrival': statistics.mean(all_inter_arrivals),
'overall_std_inter_arrival': statistics.stdev(all_inter_arrivals) if len(all_inter_arrivals) > 1 else 0,
'total_packets': total_packets,
'total_outliers': total_outliers,
'outlier_percentage': (total_outliers / total_packets * 100) if total_packets > 0 else 0
}
def identify_high_jitter_flows(self, flows: Dict[tuple, FlowStats],
jitter_threshold: float = 0.1) -> List[FlowStats]:
"""Identify flows with high timing jitter"""
high_jitter_flows = []
for flow in flows.values():
if flow.avg_inter_arrival > 0:
# Calculate coefficient of variation (CV) as a measure of jitter
cv = flow.std_inter_arrival / flow.avg_inter_arrival
if cv > jitter_threshold:
high_jitter_flows.append(flow)
# Sort by coefficient of variation (highest first)
high_jitter_flows.sort(key=lambda f: f.std_inter_arrival / f.avg_inter_arrival
if f.avg_inter_arrival > 0 else 0, reverse=True)
return high_jitter_flows
def calculate_inter_arrival_percentiles(self, flow: FlowStats) -> Dict[str, float]:
"""Calculate percentiles for inter-arrival times"""
if not flow.inter_arrival_times:
return {}
times = sorted(flow.inter_arrival_times)
n = len(times)
def percentile(p: float) -> float:
k = (n - 1) * p / 100
f = int(k)
c = k - f
if f == n - 1:
return times[f]
return times[f] * (1 - c) + times[f + 1] * c
return {
'p50': percentile(50), # Median
'p90': percentile(90),
'p95': percentile(95),
'p99': percentile(99),
'min': min(times),
'max': max(times)
}
def update_realtime_statistics(self, flow_key: tuple, flow: FlowStats) -> None:
"""Update real-time running statistics for a flow"""
if not self.enable_realtime or len(flow.inter_arrival_times) < 2:
return
# Initialize if first time
if flow_key not in self.realtime_stats:
self.realtime_stats[flow_key] = {
'count': 0,
'sum': 0.0,
'sum_squares': 0.0,
'outlier_count': 0,
'last_avg': 0.0,
'last_std': 0.0
}
stats = self.realtime_stats[flow_key]
# Use most recent inter-arrival time
new_time = flow.inter_arrival_times[-1]
stats['count'] += 1
stats['sum'] += new_time
stats['sum_squares'] += new_time * new_time
# Calculate running average and standard deviation
if stats['count'] >= 2:
avg = stats['sum'] / stats['count']
variance = (stats['sum_squares'] / stats['count']) - (avg * avg)
std = variance ** 0.5 if variance > 0 else 0.0
# Update flow statistics with running values
flow.avg_inter_arrival = avg
flow.std_inter_arrival = std
# Check for outliers in real-time
threshold = avg + (self.outlier_threshold_sigma * std)
if new_time > threshold:
frame_number = flow.frame_numbers[-1]
if frame_number not in flow.outlier_frames:
flow.outlier_frames.append(frame_number)
flow.outlier_details.append((frame_number, new_time))
stats['outlier_count'] += 1
stats['last_avg'] = avg
stats['last_std'] = std
# Update frame type statistics
for frame_type, ft_stats in flow.frame_types.items():
self._update_realtime_frame_type_stats(flow_key, frame_type, ft_stats)
def _update_realtime_frame_type_stats(self, flow_key: tuple, frame_type: str, ft_stats: FrameTypeStats) -> None:
"""Update real-time statistics for frame types"""
if len(ft_stats.inter_arrival_times) < 2:
return
ft_key = (flow_key, frame_type)
if ft_key not in self.realtime_stats:
self.realtime_stats[ft_key] = {
'count': 0,
'sum': 0.0,
'sum_squares': 0.0,
'outlier_count': 0,
'last_avg': 0.0,
'last_std': 0.0
}
stats = self.realtime_stats[ft_key]
new_time = ft_stats.inter_arrival_times[-1]
stats['count'] += 1
stats['sum'] += new_time
stats['sum_squares'] += new_time * new_time
if stats['count'] >= 2:
avg = stats['sum'] / stats['count']
variance = (stats['sum_squares'] / stats['count']) - (avg * avg)
std = variance ** 0.5 if variance > 0 else 0.0
ft_stats.avg_inter_arrival = avg
ft_stats.std_inter_arrival = std
# Check for frame type outliers
threshold = avg + (self.outlier_threshold_sigma * std)
if new_time > threshold:
frame_number = ft_stats.frame_numbers[-1]
if frame_number not in ft_stats.outlier_frames:
ft_stats.outlier_frames.append(frame_number)
ft_stats.outlier_details.append((frame_number, new_time))
stats['outlier_count'] += 1
stats['last_avg'] = avg
stats['last_std'] = std
def get_realtime_summary(self) -> Dict[str, any]:
"""Get summary of real-time statistics"""
if not self.enable_realtime:
return {}
total_flows = len([k for k in self.realtime_stats.keys() if isinstance(k, tuple) and len(k) == 2])
total_outliers = sum(stats['outlier_count'] for stats in self.realtime_stats.values())
return {
'realtime_enabled': True,
'tracked_flows': total_flows,
'total_outliers': total_outliers,
'update_frequency': 'per_packet'
}