Files
StreamLens/analyzer/utils/signal_visualizer.py
2025-07-26 00:02:25 -04:00

627 lines
25 KiB
Python

"""
Signal visualization module for Chapter 10 telemetry data
"""
import threading
import queue
import time
from typing import Dict, List, Optional, Tuple, Any, TYPE_CHECKING
from dataclasses import dataclass
import struct
try:
import numpy as np
except ImportError:
print("Error: numpy library required. Install with: pip install numpy")
import sys
sys.exit(1)
# Lazy loading flags - matplotlib will only be imported when needed
MATPLOTLIB_AVAILABLE = None
matplotlib = None
plt = None
animation = None
def _ensure_matplotlib_loaded(force_backend=None):
"""Lazy load matplotlib only when needed"""
global MATPLOTLIB_AVAILABLE, matplotlib, plt, animation
if MATPLOTLIB_AVAILABLE is not None:
return MATPLOTLIB_AVAILABLE
try:
# Check if visualization is disabled via environment variable
import os
if os.environ.get('STREAMLENS_DISABLE_VISUALIZATION'):
raise ImportError("Visualization disabled via environment variable")
import matplotlib as mpl
matplotlib = mpl
# If a specific backend is requested (e.g., for GUI mode), use it
if force_backend:
matplotlib.use(force_backend)
else:
# Try backends in order of preference for TUI mode
backend_success = False
# For TUI mode, prefer Agg (non-interactive) to avoid GUI windows
try:
matplotlib.use('Agg')
backend_success = True
except Exception as e1:
try:
matplotlib.use('TkAgg')
backend_success = True
except Exception as e2:
try:
matplotlib.use('Qt5Agg')
backend_success = True
except Exception as e3:
raise ImportError(f"No suitable matplotlib backend available. Tried Agg ({e1}), TkAgg ({e2}), Qt5Agg ({e3})")
import matplotlib.pyplot as mplot
import matplotlib.animation as manim
plt = mplot
animation = manim
MATPLOTLIB_AVAILABLE = True
return True
except ImportError as e:
MATPLOTLIB_AVAILABLE = False
print(f"Warning: matplotlib not available: {e}. Signal visualization disabled.")
return False
if TYPE_CHECKING:
from ..models.flow_stats import FlowStats
from scapy.all import Packet
@dataclass
class TMATSMetadata:
"""TMATS metadata for signal reconstruction"""
channel_configs: Dict[str, Dict[str, Any]]
sample_rate: Optional[float] = None
data_format: Optional[str] = None
channels: List[str] = None
def __post_init__(self):
if self.channels is None:
self.channels = list(self.channel_configs.keys())
@dataclass
class SignalData:
"""Represents decoded signal data"""
timestamps: np.ndarray
channels: Dict[str, np.ndarray] # channel_name -> data array
metadata: TMATSMetadata
sample_rate: float
class TMATSParser:
"""Parser for TMATS (Telemetry Attributes Transfer Standard) data"""
def __init__(self):
self.tmats_data = {}
def parse_tmats_frame(self, payload: bytes) -> Optional[TMATSMetadata]:
"""Parse TMATS data from Chapter 10 payload"""
try:
# Skip Chapter 10 header and look for ASCII text
text_start = self._find_text_start(payload)
if text_start is None:
return None
text_data = payload[text_start:].decode('ascii', errors='ignore')
return self._parse_tmats_text(text_data)
except Exception as e:
print(f"TMATS parsing error: {e}")
return None
def _find_text_start(self, payload: bytes) -> Optional[int]:
"""Find start of ASCII text in payload"""
# Look for patterns that indicate start of TMATS text
for i in range(min(100, len(payload) - 10)):
# Look for ASCII text with TMATS-style patterns
try:
sample = payload[i:i+20].decode('ascii', errors='strict')
if '\\' in sample and (':' in sample or ';' in sample):
return i
except:
continue
return None
def _parse_tmats_text(self, text: str) -> TMATSMetadata:
"""Parse TMATS text format"""
channel_configs = {}
sample_rate = None
data_format = None
# Split into lines and parse key-value pairs
lines = text.split('\\')[1:] # Split on backslash, skip first empty element
for line in lines:
line = line.strip()
if not line:
continue
# Look for key-value pairs separated by colon or semicolon
if ':' in line:
key, value = line.split(':', 1)
elif ';' in line:
key, value = line.split(';', 1)
else:
continue
key = key.strip()
value = value.strip().rstrip(';')
# Parse channel-specific parameters
if key.startswith('R-'):
# R-parameters are channel-related
self._parse_channel_parameter(key, value, channel_configs)
elif key.startswith('G-'):
# G-parameters are global
if 'SAMPLE' in key.upper() or 'RATE' in key.upper():
try:
sample_rate = float(value)
except:
pass
elif 'FORMAT' in key.upper():
data_format = value
return TMATSMetadata(
channel_configs=channel_configs,
sample_rate=sample_rate,
data_format=data_format
)
def _parse_channel_parameter(self, key: str, value: str, configs: Dict):
"""Parse channel-specific TMATS parameters"""
# Extract channel number/ID from key like "R-1\G" or "R-CH1\N"
parts = key.split('\\')
if len(parts) < 2:
return
channel_part = parts[0] # e.g., "R-1" or "R-CH1"
param_part = parts[1] # e.g., "G", "N", "EU"
# Extract channel identifier
if channel_part.startswith('R-'):
channel_id = channel_part[2:]
else:
return
if channel_id not in configs:
configs[channel_id] = {}
# Map parameter codes to meaningful names
param_map = {
'G': 'gain',
'N': 'name',
'EU': 'units',
'MN': 'min_value',
'MX': 'max_value',
'OF': 'offset',
'FS': 'full_scale'
}
param_name = param_map.get(param_part, param_part.lower())
# Try to convert numeric values
try:
if param_name in ['gain', 'min_value', 'max_value', 'offset', 'full_scale']:
value = float(value)
except:
pass
configs[channel_id][param_name] = value
class Chapter10SignalDecoder:
"""Decoder for Chapter 10 analog and PCM data"""
def __init__(self, tmats_metadata: Optional[TMATSMetadata] = None):
self.tmats_metadata = tmats_metadata
def decode_analog_data(self, payload: bytes, channel_id: int, data_type: int) -> Optional[SignalData]:
"""Decode analog format data"""
try:
# Skip Chapter 10 header (24 bytes) and look for data
data_start = 24
if len(payload) <= data_start:
return None
raw_data = payload[data_start:]
# Determine data format based on data_type
if data_type == 0x72: # Analog Format 2
return self._decode_analog_format2(raw_data, channel_id)
elif data_type in [0x73, 0x74, 0x75]: # Other analog formats
return self._decode_analog_generic(raw_data, channel_id, data_type)
return None
except Exception as e:
print(f"Analog decode error: {e}")
return None
def decode_pcm_data(self, payload: bytes, channel_id: int) -> Optional[SignalData]:
"""Decode PCM format data"""
try:
# Skip Chapter 10 header
data_start = 24
if len(payload) <= data_start:
return None
raw_data = payload[data_start:]
# Basic PCM decoding - this would need to be enhanced based on specific format
return self._decode_pcm_generic(raw_data, channel_id)
except Exception as e:
print(f"PCM decode error: {e}")
return None
def _decode_analog_format2(self, raw_data: bytes, channel_id: int) -> Optional[SignalData]:
"""Decode Analog Format 2 data"""
if len(raw_data) < 4:
return None
# Parse analog format header (simplified)
try:
# Assume 16-bit samples for now
num_samples = len(raw_data) // 2
samples = struct.unpack(f'<{num_samples}h', raw_data)
# Convert to numpy array
data_array = np.array(samples, dtype=np.float32)
# Apply TMATS scaling if available
if self.tmats_metadata and str(channel_id) in self.tmats_metadata.channel_configs:
config = self.tmats_metadata.channel_configs[str(channel_id)]
gain = config.get('gain', 1.0)
offset = config.get('offset', 0.0)
data_array = data_array * gain + offset
# Generate timestamps (would be more sophisticated in real implementation)
sample_rate = 1000.0 # Default sample rate
if self.tmats_metadata and self.tmats_metadata.sample_rate and self.tmats_metadata.sample_rate > 0:
sample_rate = self.tmats_metadata.sample_rate
timestamps = np.arange(len(data_array)) / sample_rate
channel_name = f"CH{channel_id}"
if self.tmats_metadata and str(channel_id) in self.tmats_metadata.channel_configs:
channel_name = self.tmats_metadata.channel_configs[str(channel_id)].get('name', channel_name)
return SignalData(
timestamps=timestamps,
channels={channel_name: data_array},
metadata=self.tmats_metadata or TMATSMetadata({}),
sample_rate=sample_rate
)
except Exception as e:
print(f"Format 2 decode error: {e}")
return None
def _decode_analog_generic(self, raw_data: bytes, channel_id: int, data_type: int) -> Optional[SignalData]:
"""Generic analog data decoder"""
# This would be implemented based on specific format requirements
return self._decode_analog_format2(raw_data, channel_id) # Fallback for now
def _decode_pcm_generic(self, raw_data: bytes, channel_id: int) -> Optional[SignalData]:
"""Generic PCM decoder"""
# Basic PCM implementation - would need format-specific handling
try:
num_samples = len(raw_data) // 2
samples = struct.unpack(f'<{num_samples}H', raw_data) # Unsigned 16-bit
data_array = np.array(samples, dtype=np.float32)
# Use default sample rate if TMATS doesn't provide one
sample_rate = 1000.0 # Default sample rate
if self.tmats_metadata and self.tmats_metadata.sample_rate and self.tmats_metadata.sample_rate > 0:
sample_rate = self.tmats_metadata.sample_rate
timestamps = np.arange(len(data_array)) / sample_rate
channel_name = f"PCM_CH{channel_id}"
return SignalData(
timestamps=timestamps,
channels={channel_name: data_array},
metadata=self.tmats_metadata or TMATSMetadata({}),
sample_rate=sample_rate
)
except Exception as e:
print(f"PCM decode error: {e}")
return None
class SignalVisualizer:
"""Thread-safe matplotlib-based signal visualizer for Chapter 10 data"""
def __init__(self):
self.active_windows = {}
self.tmats_cache = {}
self.visualization_queue = queue.Queue()
self._processing_visualizations = False
self._force_file_output = False # Can be set externally to force file output
self._in_tui_context = False # Track if we're in TUI context
def visualize_flow_signals(self, flow: 'FlowStats', packets: List['Packet'], gui_mode: bool = False) -> None:
"""Visualize signals from a Chapter 10 flow"""
# IMPORTANT: For GUI mode with embedded plots, this method should NOT be called
# Embedded plots should use the _extract methods directly
if gui_mode:
print("WARNING: visualize_flow_signals called in GUI mode - should use embedded plots instead")
return # Don't create floating windows in GUI mode
# For TUI mode, use Agg backend to avoid GUI windows
if not _ensure_matplotlib_loaded():
print("Matplotlib not available - cannot visualize signals")
return
flow_key = f"{flow.src_ip}->{flow.dst_ip}"
# Extract TMATS metadata from flow
tmats_metadata = self._extract_tmats_from_flow(packets)
# Extract and decode signal data
signal_data = self._extract_signals_from_flow(packets, tmats_metadata)
if not signal_data:
print(f"No decodable Chapter 10 signal data found in flow {flow_key}")
return
# Create or update visualization window
self._create_signal_window(flow_key, signal_data, flow)
def _extract_tmats_from_flow(self, packets: List['Packet']) -> Optional[TMATSMetadata]:
"""Extract TMATS metadata from Chapter 10 packets in flow"""
parser = TMATSParser()
for packet in packets:
if not hasattr(packet, 'haslayer') or not packet.haslayer('Raw'):
continue
from scapy.all import Raw
raw_data = bytes(packet[Raw])
# Look for TMATS patterns
if b'TMATS' in raw_data or b'R-' in raw_data:
tmats_metadata = parser.parse_tmats_frame(raw_data)
if tmats_metadata:
return tmats_metadata
return None
def _extract_signals_from_flow(self, packets: List['Packet'], tmats_metadata: Optional[TMATSMetadata]) -> List[SignalData]:
"""Extract signal data from Chapter 10 packets and consolidate by channel"""
decoder = Chapter10SignalDecoder(tmats_metadata)
# Dictionary to collect signal data by channel and data type
channel_data = {} # key: (channel_id, data_type), value: list of signal_data objects
for packet in packets:
if not hasattr(packet, 'haslayer') or not packet.haslayer('Raw'):
continue
from scapy.all import Raw
raw_data = bytes(packet[Raw])
# Try to parse as Chapter 10
ch10_offset = self._find_chapter10_sync(raw_data)
if ch10_offset is None:
continue
try:
# Parse header to get data type and channel
header_start = ch10_offset
if len(raw_data) < header_start + 24:
continue
channel_id = struct.unpack('<H', raw_data[header_start+2:header_start+4])[0]
data_type = struct.unpack('<H', raw_data[header_start+12:header_start+14])[0]
# Decode based on data type
if data_type in [0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78]: # Analog formats
signal_data = decoder.decode_analog_data(raw_data[ch10_offset:], channel_id, data_type)
elif data_type in [0x04, 0x08]: # PCM formats
signal_data = decoder.decode_pcm_data(raw_data[ch10_offset:], channel_id)
else:
continue
if signal_data:
key = (channel_id, data_type)
if key not in channel_data:
channel_data[key] = []
channel_data[key].append(signal_data)
except Exception as e:
continue
# Consolidate signals by channel
consolidated_signals = []
for (channel_id, data_type), signal_list in channel_data.items():
if not signal_list:
continue
consolidated_signal = self._consolidate_channel_signals(signal_list, channel_id, data_type)
if consolidated_signal:
consolidated_signals.append(consolidated_signal)
return consolidated_signals
def _consolidate_channel_signals(self, signal_list: List[SignalData], channel_id: int, data_type: int) -> Optional[SignalData]:
"""Consolidate multiple SignalData objects from the same channel into one continuous signal"""
if not signal_list:
return None
# Use the first signal's metadata as the base
base_signal = signal_list[0]
# Concatenate all data from the same channel
all_timestamps = []
all_channel_data = {}
# Initialize channel data dictionaries
for channel_name in base_signal.channels.keys():
all_channel_data[channel_name] = []
# Sort signals by their first timestamp to maintain chronological order
signal_list.sort(key=lambda s: s.timestamps[0] if len(s.timestamps) > 0 else 0)
# Track time offset for continuous timeline
time_offset = 0.0
for i, signal_data in enumerate(signal_list):
if i == 0:
# First signal - use timestamps as-is
all_timestamps.extend(signal_data.timestamps)
else:
# Subsequent signals - add time offset to create continuous timeline
if len(all_timestamps) > 0:
# Use safe sample rate (avoid division by None or zero)
safe_sample_rate = signal_data.sample_rate if signal_data.sample_rate and signal_data.sample_rate > 0 else 1000.0
time_offset = all_timestamps[-1] + (1.0 / safe_sample_rate)
# Add offset timestamps
offset_timestamps = signal_data.timestamps + time_offset
all_timestamps.extend(offset_timestamps)
# Concatenate channel data
for channel_name, data in signal_data.channels.items():
if channel_name in all_channel_data:
all_channel_data[channel_name].extend(data)
# Convert lists to numpy arrays
consolidated_timestamps = np.array(all_timestamps)
consolidated_channels = {}
for channel_name, data_list in all_channel_data.items():
if data_list: # Only include channels that have data
consolidated_channels[channel_name] = np.array(data_list)
if not consolidated_channels:
return None
# Create consolidated SignalData object
return SignalData(
timestamps=consolidated_timestamps,
channels=consolidated_channels,
metadata=base_signal.metadata,
sample_rate=base_signal.sample_rate
)
def _find_chapter10_sync(self, raw_data: bytes) -> Optional[int]:
"""Find Chapter 10 sync pattern in raw data"""
sync_pattern = 0xEB25
for offset in range(len(raw_data) - 1):
if offset + 1 < len(raw_data):
try:
word = struct.unpack('<H', raw_data[offset:offset+2])[0]
if word == sync_pattern:
return offset
except:
continue
return None
def _create_signal_window(self, flow_key: str, signal_data_list: List[SignalData], flow: 'FlowStats'):
"""Create matplotlib window for signal visualization"""
# Check the current backend to determine output method
backend = matplotlib.get_backend()
if backend == 'Agg':
# Non-interactive backend - always save to files
self._run_signal_window(flow_key, signal_data_list, flow)
elif backend in ['TkAgg', 'Qt5Agg', 'Qt4Agg', 'GTKAgg', 'MacOSX']:
# Interactive backends - different handling for TUI vs GUI
if self._in_tui_context:
print(f"Note: Interactive matplotlib backend detected ({backend})")
print("Saving plots as files to avoid threading issues with TUI")
# Run visualization synchronously to avoid threading issues
self._run_signal_window(flow_key, signal_data_list, flow)
else:
# GUI mode - can use interactive display safely
self._run_signal_window(flow_key, signal_data_list, flow)
else:
# Other backends, use threading safely
thread = threading.Thread(
target=self._run_signal_window,
args=(flow_key, signal_data_list, flow),
daemon=True
)
thread.start()
def _run_signal_window(self, flow_key: str, signal_data_list: List[SignalData], flow: 'FlowStats'):
"""Run signal visualization (thread-safe)"""
try:
if not signal_data_list:
print("No signal data to visualize")
return
fig, axes = plt.subplots(len(signal_data_list), 1, figsize=(12, 8))
if len(signal_data_list) == 1:
axes = [axes]
fig.suptitle(f'Chapter 10 Signals - Flow: {flow_key}', fontsize=14)
for idx, signal_data in enumerate(signal_data_list):
ax = axes[idx] if idx < len(axes) else axes[-1]
# Plot each channel in the signal data
for channel_name, data in signal_data.channels.items():
ax.plot(signal_data.timestamps, data, label=channel_name, linewidth=0.8)
ax.set_xlabel('Time (s)')
ax.set_ylabel('Amplitude')
ax.grid(True, alpha=0.3)
ax.legend()
# Add metadata info
if signal_data.metadata and signal_data.metadata.channel_configs:
config_info = []
for ch_id, config in signal_data.metadata.channel_configs.items():
if 'units' in config:
config_info.append(f"CH{ch_id}: {config.get('units', 'Unknown')}")
if config_info:
ax.set_title(f"Channels: {', '.join(config_info)}")
plt.tight_layout()
# Handle display based on backend
backend = matplotlib.get_backend()
if backend == 'Agg' or self._in_tui_context:
# Save to file for non-interactive backend or TUI context
filename = f"signal_plot_{flow_key.replace('->', '_to_').replace('.', '_')}.png"
plt.savefig(filename, dpi=300, bbox_inches='tight')
print(f"Signal plot saved to {filename}")
plt.close(fig)
else:
# Store reference but DO NOT show for GUI mode embedded plots
# GUI mode should only use embedded widgets, not floating windows
self.active_windows[flow_key] = fig
# Do not call plt.show() - this should only be used for TUI mode file output
except Exception as e:
print(f"Signal visualization error: {e}")
def close_flow_window(self, flow_key: str):
"""Close visualization window for a flow"""
if flow_key in self.active_windows:
plt.close(self.active_windows[flow_key])
del self.active_windows[flow_key]
def close_all_windows(self):
"""Close all visualization windows"""
for fig in self.active_windows.values():
plt.close(fig)
self.active_windows.clear()
# Global visualizer instance
signal_visualizer = SignalVisualizer()