dtsstreaming/tmats_packet.py

#!/usr/bin/env python3
"""
TMATS Packet class for IRIG106 Chapter 10 TMATS frame parsing
"""

import struct
from typing import Dict, Optional, List

try:
    from scapy.layers.inet import IP, UDP
except ImportError:
    print("Error: scapy library not found. Install with: pip install scapy")
    exit(1)


class TMATSPacket:
    """Represents an IRIG106 Chapter 10 TMATS packet"""

    def __init__(self, packet, original_frame_num: Optional[int] = None):
        """
        Initialize TMATS packet from raw scapy packet

        Args:
            packet: Raw scapy packet
            original_frame_num: Original frame number in PCAP file
        """
        self.raw_packet = packet
        self.original_frame_num: Optional[int] = original_frame_num

        # Extract basic packet info
        self.timestamp = float(packet.time)
        self.packet_size = len(packet)

        # Extract IP/UDP info if available
        if packet.haslayer(IP) and packet.haslayer(UDP):
            ip_layer = packet[IP]
            udp_layer = packet[UDP]

            self.src_ip = ip_layer.src
            self.dst_ip = ip_layer.dst
            self.src_port = udp_layer.sport
            self.dst_port = udp_layer.dport
            self.payload = bytes(udp_layer.payload)
        else:
            self.src_ip = ""
            self.dst_ip = ""
            self.src_port = 0
            self.dst_port = 0
            self.payload = bytes()

        # Parse TMATS content
        self.tmats_info = self._parse_tmats_content()
        self.is_tmats = self.tmats_info is not None

    def _parse_tmats_content(self) -> Optional[Dict]:
        """Parse TMATS content from payload"""
        if len(self.payload) < 12:
            return None

        try:
            # Look for Chapter 10 sync pattern
            ch10_offset = None
            for offset in range(min(16, len(self.payload) - 24)):
                if len(self.payload) >= offset + 2:
                    sync_pattern = struct.unpack('<H', self.payload[offset:offset+2])[0]
                    if sync_pattern == 0xEB25:
                        ch10_offset = offset
                        break

            if ch10_offset is not None:
                # Full Chapter 10 header found
                self._parse_ch10_header(ch10_offset)
                # TMATS data starts after 24-byte Ch10 header
                data_start = ch10_offset + 24
            else:
                # Continuation frame - simple header structure
                # Based on analysis, TMATS data seems to start after a simple header
                # Look for ASCII data starting point
                data_start = self._find_ascii_data_start()
                self.ch10_header = None

            if data_start is None or data_start >= len(self.payload):
                return None

            # Extract TMATS ASCII data
            tmats_data = self.payload[data_start:]

            # Try to decode as ASCII
            try:
                ascii_content = tmats_data.decode('ascii', errors='ignore')

                # Check if this looks like TMATS data
                tmats_patterns = ['\\', 'R-1\\', 'G\\', 'T-', 'P-', 'COMMENT:', 'DSI', 'DST']
                if any(pattern in ascii_content for pattern in tmats_patterns):
                    return {
                        'raw_data': tmats_data,
                        'ascii_content': ascii_content,
                        'data_start_offset': data_start,
                        'data_length': len(tmats_data),
                        'has_ch10_header': ch10_offset is not None,
                        'is_continuation': ch10_offset is None
                    }
            except Exception:
                pass

            return None

        except (struct.error, IndexError):
            return None

    def _parse_ch10_header(self, offset: int) -> None:
        """Parse Chapter 10 header if present"""
        try:
            base = offset
            sync_pattern = struct.unpack('<H', self.payload[base:base+2])[0]
            channel_id = struct.unpack('<H', self.payload[base+2:base+4])[0]
            packet_length = struct.unpack('<I', self.payload[base+4:base+8])[0]
            data_length = struct.unpack('<I', self.payload[base+8:base+12])[0]
            header_version = self.payload[base+12]
            sequence_number = self.payload[base+13]
            packet_flags = self.payload[base+14]
            data_type = self.payload[base+15]

            self.ch10_header = {
                'sync_pattern': f'0x{sync_pattern:04X}',
                'channel_id': channel_id,
                'packet_length': packet_length,
                'data_length': data_length,
                'header_version': header_version,
                'sequence_number': sequence_number,
                'packet_flags': f'0x{packet_flags:02X}',
                'data_type': f'0x{data_type:02X}',
                'ch10_offset': offset
            }
        except (struct.error, IndexError):
            self.ch10_header = None

    def _find_ascii_data_start(self) -> Optional[int]:
        """Find where ASCII TMATS data starts in continuation frames"""
        # Look for start of ASCII data by scanning for printable characters
        for i in range(min(32, len(self.payload))):
            # Check if we have a sequence of printable ASCII characters
            if i + 10 < len(self.payload):
                sample = self.payload[i:i+10]
                try:
                    decoded = sample.decode('ascii')
                    # If we can decode it and it contains TMATS-like characters
                    if any(c in decoded for c in ['R', 'G', 'T', 'P', '\\', '-', ':']):
                        return i
                except:
                    continue

        # Fallback - assume data starts after a simple header (based on observation)
        return 12 if len(self.payload) > 12 else None

    def get_ascii_content(self) -> str:
        """Get the ASCII content of the TMATS data"""
        if self.tmats_info:
            return self.tmats_info['ascii_content']
        return ""

    def is_continuation_frame(self) -> bool:
        """Check if this is a TMATS continuation frame (without Ch10 header)"""
        if self.tmats_info:
            return self.tmats_info['is_continuation']
        return False

    def has_chapter10_header(self) -> bool:
        """Check if this frame has a full Chapter 10 header"""
        if self.tmats_info:
            return self.tmats_info['has_ch10_header']
        return False


class TMATSAssembler:
    """Assembles TMATS data from multiple frames"""

    def __init__(self):
        self.tmats_frames: List[TMATSPacket] = []
        self.assembled_content = ""
        self.tmats_files: List[str] = []

    def add_frame(self, tmats_packet: TMATSPacket) -> None:
        """Add a TMATS frame to the assembler"""
        if tmats_packet.is_tmats:
            self.tmats_frames.append(tmats_packet)

    def assemble(self) -> str:
        """Assemble TMATS frames into complete TMATS files, stopping at END markers"""
        if not self.tmats_frames:
            return ""

        # Sort frames by timestamp to ensure correct order
        sorted_frames = sorted(self.tmats_frames, key=lambda x: x.timestamp)

        # Assemble TMATS content, detecting file boundaries
        current_tmats = []
        self.tmats_files = []

        for frame in sorted_frames:
            content = frame.get_ascii_content()
            if not content:
                continue

            current_tmats.append(content)

            # Check if this frame contains a TMATS END marker
            if self._contains_tmats_end(content):
                # Complete TMATS file found
                complete_tmats = ''.join(current_tmats)
                self.tmats_files.append(complete_tmats)
                current_tmats = []  # Start new TMATS file

        # Handle any remaining partial TMATS content
        if current_tmats:
            partial_tmats = ''.join(current_tmats)
            self.tmats_files.append(partial_tmats)

        # Return the first complete TMATS file, or all if multiple unique files
        if self.tmats_files:
            # Check if we have multiple unique TMATS files
            unique_files = self._get_unique_tmats_files()
            if len(unique_files) == 1:
                self.assembled_content = unique_files[0]
            else:
                # Multiple unique TMATS files - show all with separators
                self.assembled_content = self._format_multiple_tmats_files(unique_files)
        else:
            self.assembled_content = ""

        return self.assembled_content

    def _contains_tmats_end(self, content: str) -> bool:
        """Check if content contains a TMATS END marker"""
        end_patterns = [
            'TMATS END',
            'TMATS_END',
            '-----END',
            'END----'
        ]
        return any(pattern in content for pattern in end_patterns)

    def _get_unique_tmats_files(self) -> List[str]:
        """Get unique TMATS files, removing duplicates"""
        unique_files = []

        for tmats_file in self.tmats_files:
            # Clean the content for comparison
            cleaned = self._clean_tmats_content(tmats_file)

            # Check if this is a duplicate of an existing file
            is_duplicate = False
            for existing_file in unique_files:
                existing_cleaned = self._clean_tmats_content(existing_file)
                if self._are_tmats_equivalent(cleaned, existing_cleaned):
                    is_duplicate = True
                    break

            if not is_duplicate and cleaned.strip():
                unique_files.append(tmats_file)

        return unique_files

    def _clean_tmats_content(self, content: str) -> str:
        """Clean TMATS content for comparison by removing junk characters"""
        # Remove non-printable characters except newlines
        cleaned = ''.join(c if c.isprintable() or c == '\n' else '' for c in content)

        # Remove leading junk characters that might vary between transmissions
        lines = cleaned.split('\n')
        clean_lines = []

        for line in lines:
            # Skip lines that are mostly junk characters
            if len(line.strip()) < 3:
                continue

            # Look for lines that start with valid TMATS patterns
            stripped = line.strip()
            if any(stripped.startswith(pattern) for pattern in ['G\\', 'R-', 'V-', 'T-', 'P-', 'COMMENT:']):
                clean_lines.append(stripped)
            elif any(pattern in stripped for pattern in ['TMATS END', '----']):
                clean_lines.append(stripped)

        return '\n'.join(clean_lines)

    def _are_tmats_equivalent(self, content1: str, content2: str) -> bool:
        """Check if two TMATS contents are equivalent (accounting for minor differences)"""
        # Simple comparison - if they're more than 80% similar, consider them equivalent
        lines1 = set(line.strip() for line in content1.split('\n') if line.strip())
        lines2 = set(line.strip() for line in content2.split('\n') if line.strip())

        if not lines1 or not lines2:
            return False

        # Calculate similarity
        intersection = lines1.intersection(lines2)
        union = lines1.union(lines2)

        similarity = len(intersection) / len(union) if union else 0
        return similarity > 0.8

    def _format_multiple_tmats_files(self, tmats_files: List[str]) -> str:
        """Format multiple TMATS files with separators"""
        if not tmats_files:
            return ""

        if len(tmats_files) == 1:
            return self._clean_tmats_content(tmats_files[0])

        # Multiple unique files - show with separators
        formatted_parts = []
        for i, tmats_file in enumerate(tmats_files):
            if i > 0:
                formatted_parts.append(f"\n{'='*60}\nTMATS FILE #{i+1}\n{'='*60}\n")
            formatted_parts.append(self._clean_tmats_content(tmats_file))

        return ''.join(formatted_parts)

    def get_frame_count(self) -> int:
        """Get the number of TMATS frames"""
        return len(self.tmats_frames)

    def get_file_count(self) -> int:
        """Get the number of unique TMATS files found"""
        return len(self.tmats_files)

    def get_total_length(self) -> int:
        """Get the total length of assembled TMATS data"""
        return len(self.assembled_content)