Files
StreamLens/analyzer/analysis/flow_manager.py

572 lines
24 KiB
Python
Raw Normal View History

2025-07-25 15:52:16 -04:00
"""
Flow tracking and management
"""
from typing import Dict, Set, Tuple
from ..models import FlowStats, FrameTypeStats
from ..protocols import Chapter10Dissector, PTPDissector, IENADissector, StandardProtocolDissectors
from ..protocols.enhanced_chapter10 import EnhancedChapter10Decoder
from ..plugins.ch10_timing_analysis import Chapter10TimingAnalysisPlugin
2025-07-25 15:52:16 -04:00
try:
from scapy.all import Packet, IP, UDP, TCP
except ImportError:
print("Error: scapy library required. Install with: pip install scapy")
import sys
sys.exit(1)
class FlowManager:
"""Manages network flows and frame type classification"""
def __init__(self, statistics_engine=None):
self.flows: Dict[Tuple[str, str], FlowStats] = {}
self.statistics_engine = statistics_engine
# Initialize dissectors
self.specialized_dissectors = {
'chapter10': Chapter10Dissector(),
'ptp': PTPDissector(),
'iena': IENADissector()
}
self.standard_dissectors = StandardProtocolDissectors()
# Enhanced protocol decoders
self.enhanced_ch10_decoder = EnhancedChapter10Decoder()
self.ch10_timing_plugin = Chapter10TimingAnalysisPlugin()
2025-07-25 15:52:16 -04:00
def process_packet(self, packet: Packet, frame_num: int) -> None:
"""Process a single packet and update flow statistics"""
if not packet.haslayer(IP):
return
ip_layer = packet[IP]
src_ip = ip_layer.src
dst_ip = ip_layer.dst
timestamp = float(packet.time)
packet_size = len(packet)
2025-07-26 16:51:37 -04:00
# Extract transport layer information
transport_info = self._extract_transport_info(packet)
2025-07-25 15:52:16 -04:00
# Determine basic protocol
protocols = self._detect_basic_protocols(packet)
# Create flow key
flow_key = (src_ip, dst_ip)
# Initialize flow stats if new
if flow_key not in self.flows:
self.flows[flow_key] = FlowStats(
src_ip=src_ip,
dst_ip=dst_ip,
2025-07-26 16:51:37 -04:00
src_port=transport_info['src_port'],
dst_port=transport_info['dst_port'],
transport_protocol=transport_info['protocol'],
traffic_classification=self._classify_traffic(dst_ip),
2025-07-25 15:52:16 -04:00
frame_count=0,
timestamps=[],
frame_numbers=[],
inter_arrival_times=[],
avg_inter_arrival=0.0,
std_inter_arrival=0.0,
outlier_frames=[],
outlier_details=[],
total_bytes=0,
protocols=set(),
detected_protocol_types=set(),
frame_types={}
)
# Update flow stats
flow = self.flows[flow_key]
flow.frame_count += 1
flow.timestamps.append(timestamp)
flow.frame_numbers.append(frame_num)
flow.total_bytes += packet_size
flow.protocols.update(protocols)
2025-07-28 18:28:26 -04:00
# Update timeline statistics
if flow.frame_count == 1:
# First packet in flow
flow.first_seen = timestamp
flow.last_seen = timestamp
flow.duration = 0.0
else:
# Update last seen and duration
flow.last_seen = timestamp
flow.duration = flow.last_seen - flow.first_seen
2025-07-25 15:52:16 -04:00
# Enhanced protocol detection
dissection_results = self._dissect_packet(packet, frame_num)
enhanced_protocols = self._extract_enhanced_protocols(dissection_results)
flow.detected_protocol_types.update(enhanced_protocols)
# Add fallback protocol detection
fallback_protocols = self._detect_fallback_protocols(packet, dissection_results)
flow.detected_protocol_types.update(fallback_protocols)
# Classify and track frame types
frame_type = self._classify_frame_type(packet, dissection_results)
self._update_frame_type_stats(flow, frame_type, frame_num, timestamp, packet_size)
# Enhanced analysis for Chapter 10 flows
self._perform_enhanced_analysis(packet, flow, frame_num, transport_info)
2025-07-25 15:52:16 -04:00
# Calculate inter-arrival time
if len(flow.timestamps) > 1:
inter_arrival = timestamp - flow.timestamps[-2]
flow.inter_arrival_times.append(inter_arrival)
# Update real-time statistics if enabled
if self.statistics_engine and self.statistics_engine.enable_realtime:
self.statistics_engine.update_realtime_statistics(flow_key, flow)
def _detect_basic_protocols(self, packet: Packet) -> Set[str]:
"""Detect basic transport protocols"""
protocols = set()
if packet.haslayer(UDP):
protocols.add('UDP')
if packet.haslayer(TCP):
protocols.add('TCP')
if not protocols:
protocols.add('OTHER')
return protocols
def _dissect_packet(self, packet: Packet, frame_num: int) -> Dict:
"""Comprehensive packet dissection"""
result = {
'frame_number': frame_num,
'timestamp': float(packet.time),
'size': len(packet),
'layers': {},
'protocols': []
}
# Apply standard dissectors
standard_layers = self.standard_dissectors.dissect_all(packet)
result['layers'].update(standard_layers)
# Apply specialized protocol dissectors
for name, dissector in self.specialized_dissectors.items():
try:
if dissector.can_dissect(packet):
dissection = dissector.dissect(packet)
if dissection:
result['layers'][name] = dissection.fields
result['protocols'].append(dissection.protocol.name)
if dissection.errors:
result['layers'][name]['errors'] = dissection.errors
if dissection.payload:
result['layers'][name]['payload_size'] = len(dissection.payload)
except Exception as e:
result['layers'][name] = {'error': str(e)}
return result
def _extract_enhanced_protocols(self, dissection: Dict) -> Set[str]:
"""Extract enhanced protocol types from dissection"""
protocols = set()
if dissection.get('protocols'):
protocols.update(dissection['protocols'])
return protocols
def _detect_fallback_protocols(self, packet: Packet, dissection: Dict) -> Set[str]:
"""Detect protocol types with fallback to generic descriptions"""
protocol_types = set()
if packet.haslayer(UDP):
udp_layer = packet[UDP]
sport, dport = udp_layer.sport, udp_layer.dport
# Check for common protocols by port
port_protocols = {
(67, 68): 'DHCP',
(53,): 'DNS',
(123,): 'NTP',
(161, 162): 'SNMP',
(69,): 'TFTP',
(319, 320): 'PTP',
(50000, 50001): 'IENA'
}
for ports, protocol in port_protocols.items():
if sport in ports or dport in ports:
protocol_types.add(protocol)
break
else:
protocol_types.add('UDP')
if packet.haslayer(TCP):
tcp_layer = packet[TCP]
sport, dport = tcp_layer.sport, tcp_layer.dport
tcp_protocols = {
(80,): 'HTTP',
(443,): 'HTTPS',
(22,): 'SSH',
(23,): 'Telnet',
(21,): 'FTP',
(25,): 'SMTP',
(110,): 'POP3',
(143,): 'IMAP'
}
for ports, protocol in tcp_protocols.items():
if sport in ports or dport in ports:
protocol_types.add(protocol)
break
else:
protocol_types.add('TCP')
# Check for IGMP and ICMP
if packet.haslayer(IP):
ip_layer = packet[IP]
if ip_layer.proto == 2: # IGMP protocol number
protocol_types.add('IGMP')
elif ip_layer.proto == 1: # ICMP protocol number
protocol_types.add('ICMP')
# Check for multicast addresses
if packet.haslayer(IP):
ip_layer = packet[IP]
dst_ip = ip_layer.dst
if dst_ip.startswith('224.') or dst_ip.startswith('239.'):
protocol_types.add('Multicast')
return protocol_types
def _classify_frame_type(self, packet: Packet, dissection: Dict) -> str:
"""Classify the frame type based on dissection results"""
layers = dissection.get('layers', {})
# Check for Chapter 10 first
if 'chapter10' in layers and not layers['chapter10'].get('error'):
ch10_info = layers['chapter10']
# Check if it's a TMATS frame
if self._is_tmats_frame(packet, ch10_info):
return 'TMATS'
else:
2025-07-28 08:14:15 -04:00
# Use enhanced decoder information if available
if 'decoded_payload' in ch10_info:
decoded = ch10_info['decoded_payload']
data_type_name = decoded.get('data_type_name', 'CH10-Data')
2025-07-30 23:48:32 -04:00
# For timing analysis purposes, group frames by their actual timing behavior
# rather than their semantic meaning. Based on debug analysis:
# - Some timing frames have ~26s intervals (high-level timing)
# - Other frames (including some timing) have ~100ms intervals (data stream)
# Keep high-level timing frames separate (they have very different timing)
2025-07-28 08:14:15 -04:00
if 'ACTTS' in data_type_name:
return 'CH10-ACTTS'
2025-07-30 23:48:32 -04:00
# Note: Extended Timing frames often have the same ~100ms timing as data frames
# so they should be grouped with CH10-Data for accurate timing analysis
elif 'Sync' in data_type_name and 'Custom' in data_type_name:
return 'CH10-Sync'
elif 'Clock' in data_type_name and 'Custom' in data_type_name:
return 'CH10-Clock'
elif ('Time' in data_type_name or 'Timing' in data_type_name) and 'Custom' in data_type_name:
# Custom timing frames often have the 26s interval pattern
if 'Time' in data_type_name:
return 'CH10-Time'
else:
return 'CH10-Timing'
# Special data types that should remain separate
2025-07-28 08:14:15 -04:00
elif 'GPS NMEA' in data_type_name:
return 'CH10-GPS'
elif 'EAG ACMI' in data_type_name:
return 'CH10-ACMI'
elif 'Ethernet' in data_type_name:
return 'CH10-Ethernet'
elif 'Image' in data_type_name:
return 'CH10-Image'
elif 'UART' in data_type_name:
return 'CH10-UART'
elif 'CAN' in data_type_name:
return 'CH10-CAN'
2025-07-30 23:48:32 -04:00
# Everything else gets grouped as CH10-Data for consistent timing analysis
# This includes: Multi-Source, regular timing frames, custom data types, etc.
else:
return 'CH10-Data'
2025-07-28 08:14:15 -04:00
2025-07-25 15:52:16 -04:00
return 'CH10-Data'
# Check for other specialized protocols
if 'ptp' in layers and not layers['ptp'].get('error'):
ptp_info = layers['ptp']
msg_type = ptp_info.get('message_type_name', 'Unknown')
return f'PTP-{msg_type}'
if 'iena' in layers and not layers['iena'].get('error'):
iena_info = layers['iena']
packet_type = iena_info.get('packet_type_name', 'Unknown')
return f'IENA-{packet_type}'
# Fallback to basic protocol classification
if packet.haslayer(UDP):
udp_layer = packet[UDP]
sport, dport = udp_layer.sport, udp_layer.dport
if sport == 53 or dport == 53:
return 'DNS'
elif sport in [67, 68] or dport in [67, 68]:
return 'DHCP'
elif sport == 123 or dport == 123:
return 'NTP'
else:
return 'UDP'
if packet.haslayer(TCP):
tcp_layer = packet[TCP]
sport, dport = tcp_layer.sport, tcp_layer.dport
if sport == 80 or dport == 80:
return 'HTTP'
elif sport == 443 or dport == 443:
return 'HTTPS'
else:
return 'TCP'
# Check for other protocols
if packet.haslayer(IP):
ip_layer = packet[IP]
if ip_layer.proto == 2:
return 'IGMP'
elif ip_layer.proto == 1:
return 'ICMP'
return 'OTHER'
def _is_tmats_frame(self, packet: Packet, ch10_info: Dict) -> bool:
"""Check if a Chapter 10 frame contains TMATS data"""
data_type = ch10_info.get('data_type', 0)
# Data type 0x01 is typically TMATS
if data_type == 0x01:
return True
# Also check for TMATS text patterns in the payload
if packet.haslayer('Raw'):
from scapy.all import Raw
raw_data = bytes(packet[Raw])
# Look for TMATS-like patterns (ASCII text with TMATS keywords)
try:
# Check if we can find TMATS signature patterns
text_sample = raw_data[50:200] # Sample middle section to avoid headers
if b'\\' in text_sample and (b':' in text_sample or b';' in text_sample):
# Look for TMATS-style key-value pairs
if any(keyword in text_sample.upper() for keyword in [b'TMATS', b'R-', b'G-', b'P-', b'T-']):
return True
except:
pass
return False
def _update_frame_type_stats(self, flow: FlowStats, frame_type: str,
frame_num: int, timestamp: float, packet_size: int):
"""Update statistics for a specific frame type"""
if frame_type not in flow.frame_types:
flow.frame_types[frame_type] = FrameTypeStats(frame_type=frame_type)
ft_stats = flow.frame_types[frame_type]
ft_stats.count += 1
ft_stats.total_bytes += packet_size
ft_stats.timestamps.append(timestamp)
ft_stats.frame_numbers.append(frame_num)
# Calculate inter-arrival time for this frame type
if len(ft_stats.timestamps) > 1:
inter_arrival = timestamp - ft_stats.timestamps[-2]
ft_stats.inter_arrival_times.append(inter_arrival)
2025-07-26 16:51:37 -04:00
def _extract_transport_info(self, packet: Packet) -> Dict[str, any]:
"""Extract transport protocol and port information from packet"""
transport_info = {
'protocol': 'Unknown',
'src_port': 0,
'dst_port': 0
}
if packet.haslayer(UDP):
udp_layer = packet[UDP]
transport_info['protocol'] = 'UDP'
transport_info['src_port'] = udp_layer.sport
transport_info['dst_port'] = udp_layer.dport
elif packet.haslayer(TCP):
tcp_layer = packet[TCP]
transport_info['protocol'] = 'TCP'
transport_info['src_port'] = tcp_layer.sport
transport_info['dst_port'] = tcp_layer.dport
elif packet.haslayer(IP):
ip_layer = packet[IP]
if ip_layer.proto == 1:
transport_info['protocol'] = 'ICMP'
elif ip_layer.proto == 2:
transport_info['protocol'] = 'IGMP'
elif ip_layer.proto == 6:
transport_info['protocol'] = 'TCP'
elif ip_layer.proto == 17:
transport_info['protocol'] = 'UDP'
else:
transport_info['protocol'] = f'IP-{ip_layer.proto}'
return transport_info
def _perform_enhanced_analysis(self, packet: Packet, flow: FlowStats, frame_num: int, transport_info: Dict):
"""Perform enhanced analysis using specialized decoders"""
# Check if this packet can be analyzed by enhanced CH10 decoder
# Use basic dissector detection as a trigger for enhanced analysis
if "CHAPTER10" in flow.detected_protocol_types or "CH10" in flow.detected_protocol_types:
confidence = 1.0 # High confidence since basic dissector already detected it
else:
confidence = self.enhanced_ch10_decoder.can_decode(packet, transport_info)
if confidence > 0.5:
# Decode frame with full field extraction
frame_data = self.enhanced_ch10_decoder.decode_frame(packet, transport_info)
if frame_data:
# Update flow with enhanced decoder type
if flow.enhanced_analysis.decoder_type == "Standard":
flow.enhanced_analysis.decoder_type = "Chapter10_Enhanced"
flow.detected_protocol_types.add("Chapter10")
# Run timing analysis plugin
flow_duration = (flow.timestamps[-1] - flow.timestamps[0]) if len(flow.timestamps) > 1 else 1.0
flow_context = {
'flow_duration': flow_duration,
'flow_key': f"{flow.src_ip}->{flow.dst_ip}"
}
timing_result = self.ch10_timing_plugin.analyze_frame(frame_data, flow_context)
# Update enhanced analysis data
self._update_enhanced_analysis_data(flow, frame_data, timing_result)
def _update_enhanced_analysis_data(self, flow: FlowStats, frame_data, timing_result):
"""Update the enhanced analysis data structure"""
enhanced = flow.enhanced_analysis
# Store sample decoded field data for display (keep first few samples)
if len(enhanced.sample_decoded_fields) < 5: # Store up to 5 sample frames
frame_sample = {}
# Get all available fields from this frame
for field_name in self.enhanced_ch10_decoder.supported_fields:
field_value = frame_data.get_field(field_name.name)
if field_value is not None:
frame_sample[field_name.name] = field_value
if frame_sample: # Only store if we got some data
enhanced.sample_decoded_fields[f"frame_{len(enhanced.sample_decoded_fields)}"] = frame_sample
# Update available field names list
if not enhanced.available_field_names:
enhanced.available_field_names = [field.name for field in self.enhanced_ch10_decoder.supported_fields]
# Update timing analysis
if timing_result.internal_timestamp is not None:
enhanced.has_internal_timing = True
# Update running averages for timing
if timing_result.clock_drift_ppm is not None:
if enhanced.avg_clock_drift_ppm == 0:
enhanced.avg_clock_drift_ppm = timing_result.clock_drift_ppm
else:
# Simple running average
enhanced.avg_clock_drift_ppm = (enhanced.avg_clock_drift_ppm + timing_result.clock_drift_ppm) / 2
enhanced.max_clock_drift_ppm = max(enhanced.max_clock_drift_ppm, abs(timing_result.clock_drift_ppm))
# Update timing quality (use most recent)
enhanced.timing_quality = timing_result.timing_quality
# Update anomaly rate
if timing_result.anomaly_detected:
enhanced.anomaly_rate = (enhanced.anomaly_rate * (flow.frame_count - 1) + 1) / flow.frame_count
else:
enhanced.anomaly_rate = (enhanced.anomaly_rate * (flow.frame_count - 1)) / flow.frame_count
# Update confidence score
if enhanced.avg_confidence_score == 0:
enhanced.avg_confidence_score = timing_result.confidence_score
else:
enhanced.avg_confidence_score = (enhanced.avg_confidence_score + timing_result.confidence_score) / 2
# Update frame quality
frame_quality = frame_data.get_field('frame_quality_score', 0)
if frame_quality > 0:
if enhanced.avg_frame_quality == 0:
enhanced.avg_frame_quality = frame_quality
else:
enhanced.avg_frame_quality = (enhanced.avg_frame_quality + frame_quality) / 2
# Update error counts
if frame_data.get_field('rtc_sync_error', False):
enhanced.rtc_sync_errors += 1
if frame_data.get_field('format_error', False):
enhanced.format_errors += 1
if frame_data.get_field('overflow_error', False):
enhanced.overflow_errors += 1
# Update channel information
channel_id = frame_data.get_field('channel_id', 0)
if channel_id > 0:
enhanced.channel_count = max(enhanced.channel_count, channel_id)
# Update data type counters
if frame_data.get_field('is_analog_data', False):
enhanced.analog_channels = max(enhanced.analog_channels, 1)
if frame_data.get_field('is_pcm_data', False):
enhanced.pcm_channels = max(enhanced.pcm_channels, 1)
if frame_data.get_field('is_tmats_data', False):
enhanced.tmats_frames += 1
# Set primary data type
data_type_name = frame_data.get_field('data_type_name', 'Unknown')
if enhanced.primary_data_type == "Unknown":
enhanced.primary_data_type = data_type_name
2025-07-26 16:51:37 -04:00
def _classify_traffic(self, dst_ip: str) -> str:
"""Classify traffic as Unicast, Multicast, or Broadcast based on destination IP"""
try:
# Check for broadcast address
if dst_ip == '255.255.255.255':
return 'Broadcast'
# Check for multicast ranges
if dst_ip.startswith('224.') or dst_ip.startswith('239.'):
return 'Multicast'
# Check for other multicast ranges (224.0.0.0 to 239.255.255.255)
ip_parts = dst_ip.split('.')
if len(ip_parts) == 4:
first_octet = int(ip_parts[0])
if 224 <= first_octet <= 239:
return 'Multicast'
# Everything else is unicast
return 'Unicast'
except (ValueError, IndexError):
# If IP parsing fails, default to unknown
return 'Unknown'
2025-07-25 15:52:16 -04:00
def get_flows_summary(self) -> Dict:
"""Get summary of all flows"""
unique_ips = set()
for flow in self.flows.values():
unique_ips.add(flow.src_ip)
unique_ips.add(flow.dst_ip)
return {
'total_flows': len(self.flows),
'unique_ips': len(unique_ips),
'flows': self.flows
}