dtsstreaming/pcap_analyzer.py

#!/usr/bin/env python3
"""
PCAP Analyzer for IRIG106 Chapter 10 and IEEE1588 PTP frames
Analyzes ethernet traffic with Chapter 10 streaming data and PTP frames
"""

from datetime import datetime
from typing import Dict, List, Optional, Tuple
import argparse
import statistics

from chapter10_packet import Chapter10Packet
from ptp_packet import PTPPacket
from tmats_packet import TMATSPacket, TMATSAssembler

try:
    import scapy.all as scapy
    from scapy.layers.inet import IP, UDP
    from scapy.layers.l2 import Ether
except ImportError:
    print("Error: scapy library not found. Install with: pip install scapy")
    exit(1)

try:
    import pandas as pd
except ImportError:
    print("Error: pandas library not found. Install with: pip install pandas")
    exit(1)

try:
    import numpy as np
except ImportError:
    print("Error: numpy library not found. Install with: pip install numpy")
    exit(1)


class PcapAnalyzer:
    """Main analyzer class for PCAP files"""

    def __init__(self, pcap_file: str):
        self.pcap_file = pcap_file
        self.ch10_packets: List[Chapter10Packet] = []
        self.ptp_packets: List[PTPPacket] = []
        self.tmats_assembler = TMATSAssembler()

    def analyze(self):
        """Analyze the PCAP file"""
        print(f"Analyzing PCAP file: {self.pcap_file}")

        try:
            packets = scapy.rdpcap(self.pcap_file)
        except Exception as e:
            print(f"Error reading PCAP file: {e}")
            return

        print(f"Total packets: {len(packets)}")

        for i, packet in enumerate(packets):
            if i % 1000 == 0:
                print(f"Processing packet {i}...")

            self._process_packet(packet, i)

        print(f"Found {len(self.ch10_packets)} Chapter 10 packets")
        print(f"Found {len(self.ptp_packets)} PTP packets")
        print(f"Found {self.tmats_assembler.get_frame_count()} TMATS frames")

    def _process_packet(self, packet, packet_index):
        """Process individual packet"""
        if not packet.haslayer(IP) or not packet.haslayer(UDP):
            return

        udp_layer = packet[UDP]
        src_port = udp_layer.sport
        dst_port = udp_layer.dport
        payload = bytes(udp_layer.payload)
        original_frame_num = packet_index + 1  # Frame numbers are 1-based

        # Check for PTP packets (port 319 or 320)
        if src_port in [319, 320] or dst_port in [319, 320]:
            ptp_packet = PTPPacket(packet)
            if ptp_packet.ptp_header:
                self.ptp_packets.append(ptp_packet)

        # Check for potential Chapter 10 packets (need at least 28 bytes for header + prefix)
        if len(payload) >= 28:
            ch10_packet = Chapter10Packet(packet, original_frame_num)
            if ch10_packet.ch10_header:
                self.ch10_packets.append(ch10_packet)

        # Check for TMATS packets (can be smaller, includes continuation frames)
        if len(payload) >= 12:
            tmats_packet = TMATSPacket(packet, original_frame_num)
            if tmats_packet.is_tmats:
                self.tmats_assembler.add_frame(tmats_packet)

    def display_ch10_summary(self):
        """Display Chapter 10 summary statistics"""
        if not self.ch10_packets:
            print("No Chapter 10 packets found")
            return

        print("\n" + "="*80)
        print("CHAPTER 10 SUMMARY")
        print("="*80)

        # Basic counts
        total_packets = len(self.ch10_packets)
        print(f"Total Chapter 10 packets: {total_packets}")

        # Time span
        if total_packets > 0:
            start_time = min(pkt.timestamp for pkt in self.ch10_packets)
            end_time = max(pkt.timestamp for pkt in self.ch10_packets)
            duration = end_time - start_time
            print(f"Time span: {duration:.3f} seconds")
            print(f"Start time: {datetime.fromtimestamp(start_time).strftime('%H:%M:%S.%f')[:-3]}")
            print(f"End time: {datetime.fromtimestamp(end_time).strftime('%H:%M:%S.%f')[:-3]}")

        # Channel distribution
        channels = {}
        data_types = {}
        for pkt in self.ch10_packets:
            if pkt.ch10_header is not None:
                ch_id = pkt.ch10_header['channel_id']
                data_type = pkt.ch10_header['data_type']
                channels[ch_id] = channels.get(ch_id, 0) + 1
                data_types[data_type] = data_types.get(data_type, 0) + 1

        print(f"\nChannel distribution:")
        for ch_id in sorted(channels.keys()):
            count = channels[ch_id]
            percentage = (count / total_packets) * 100
            print(f"  Channel {ch_id}: {count} packets ({percentage:.1f}%)")

        print(f"\nData type distribution:")
        for data_type in sorted(data_types.keys()):
            count = data_types[data_type]
            percentage = (count / total_packets) * 100
            print(f"  Type {data_type}: {count} packets ({percentage:.1f}%)")

        # Size statistics
        sizes = [pkt.packet_size for pkt in self.ch10_packets]
        data_lengths = [pkt.ch10_header['data_length'] for pkt in self.ch10_packets if pkt.ch10_header is not None]

        print(f"\nPacket size statistics:")
        print(f"  Average: {statistics.mean(sizes):.1f} bytes")
        print(f"  Min: {min(sizes)} bytes")
        print(f"  Max: {max(sizes)} bytes")
        print(f"  Total data: {sum(data_lengths):,} bytes")

        # Rate calculations
        if duration > 0:
            packet_rate = total_packets / duration
            data_rate = sum(data_lengths) / duration
            print(f"\nRate statistics:")
            print(f"  Packet rate: {packet_rate:.1f} packets/sec")
            print(f"  Data rate: {data_rate/1024:.1f} KB/sec")

    def display_ch10_table(self):
        """Display Chapter 10 packets in table format"""
        if not self.ch10_packets:
            print("No Chapter 10 packets found")
            return

        print("\n" + "="*120)
        print("CHAPTER 10 PACKET ANALYSIS")
        print("="*120)

        # Create DataFrame for better table display
        data = []
        for i, pkt in enumerate(self.ch10_packets):
            if pkt.ch10_header is not None:
                header = pkt.ch10_header
                data.append({
                    'Packet#': i+1,
                    'Timestamp': datetime.fromtimestamp(pkt.timestamp).strftime('%H:%M:%S.%f')[:-3],
                    'Src IP': pkt.src_ip,
                    'Dst IP': pkt.dst_ip,
                    'Src Port': pkt.src_port,
                    'Dst Port': pkt.dst_port,
                    'Channel ID': header['channel_id'],
                    'Seq Num': header['sequence_number'],
                    'Data Type': header['data_type'],
                    'Pkt Length': header['packet_length'],
                    'Data Length': header['data_length'],
                    'Flags': header['packet_flags'],
                    'Size': pkt.packet_size
                })

        df = pd.DataFrame(data)
        print(df.to_string(index=False))

    def display_ptp_summary(self):
        """Display PTP summary statistics"""
        if not self.ptp_packets:
            print("No PTP packets found")
            return

        print("\n" + "="*80)
        print("IEEE1588 PTP SUMMARY")
        print("="*80)

        # Basic counts
        total_packets = len(self.ptp_packets)
        print(f"Total PTP packets: {total_packets}")

        # Time span
        if total_packets > 0:
            start_time = min(pkt.timestamp for pkt in self.ptp_packets)
            end_time = max(pkt.timestamp for pkt in self.ptp_packets)
            duration = end_time - start_time
            print(f"Time span: {duration:.3f} seconds")
            print(f"Start time: {datetime.fromtimestamp(start_time).strftime('%H:%M:%S.%f')[:-3]}")
            print(f"End time: {datetime.fromtimestamp(end_time).strftime('%H:%M:%S.%f')[:-3]}")

        # Message type distribution
        msg_types = {}
        domains = {}
        sources = {}

        for pkt in self.ptp_packets:
            if pkt.ptp_header is not None:
                msg_type = pkt.ptp_header['message_type']
                domain = pkt.ptp_header['domain_number']
                source = pkt.src_ip

                msg_types[msg_type] = msg_types.get(msg_type, 0) + 1
                domains[domain] = domains.get(domain, 0) + 1
                sources[source] = sources.get(source, 0) + 1

        print(f"\nMessage type distribution:")
        for msg_type in sorted(msg_types.keys()):
            count = msg_types[msg_type]
            percentage = (count / total_packets) * 100
            print(f"  {msg_type}: {count} packets ({percentage:.1f}%)")

        print(f"\nDomain distribution:")
        for domain in sorted(domains.keys()):
            count = domains[domain]
            percentage = (count / total_packets) * 100
            print(f"  Domain {domain}: {count} packets ({percentage:.1f}%)")

        print(f"\nSource IP distribution:")
        for source in sorted(sources.keys()):
            count = sources[source]
            percentage = (count / total_packets) * 100
            print(f"  {source}: {count} packets ({percentage:.1f}%)")

        # Rate calculations
        if duration > 0:
            packet_rate = total_packets / duration
            print(f"\nRate statistics:")
            print(f"  Overall PTP rate: {packet_rate:.1f} packets/sec")

            # Sync message rate
            sync_count = msg_types.get('Sync', 0)
            if sync_count > 0:
                sync_rate = sync_count / duration
                print(f"  Sync message rate: {sync_rate:.1f} packets/sec")

    def display_ptp_table(self):
        """Display PTP packets in table format"""
        if not self.ptp_packets:
            print("No PTP packets found")
            return

        print("\n" + "="*100)
        print("IEEE1588 PTP PACKET ANALYSIS")
        print("="*100)

        data = []
        for i, pkt in enumerate(self.ptp_packets):
            if pkt.ptp_header is not None:
                header = pkt.ptp_header
                data.append({
                    'Packet#': i+1,
                    'Timestamp': datetime.fromtimestamp(pkt.timestamp).strftime('%H:%M:%S.%f')[:-3],
                    'Src IP': pkt.src_ip,
                    'Dst IP': pkt.dst_ip,
                    'Message Type': header['message_type'],
                    'Domain': header['domain_number'],
                    'Sequence ID': header['sequence_id'],
                    'Flags': header['flags'],
                    'Correction': header['correction_field'],
                    'Interval': header['log_message_interval']
                })

        df = pd.DataFrame(data)
        print(df.to_string(index=False))

    def statistical_analysis(self):
        """Perform statistical analysis for intermittent issue detection"""
        print("\n" + "="*80)
        print("STATISTICAL ANALYSIS")
        print("="*80)

        if self.ch10_packets:
            self._analyze_ch10_statistics()

        if self.ptp_packets:
            self._analyze_ptp_statistics()

    def _analyze_ch10_statistics(self):
        """Analyze Chapter 10 packet statistics"""
        print("\nChapter 10 Statistics:")
        print("-" * 40)

        # Timing analysis
        timestamps = [pkt.timestamp for pkt in self.ch10_packets]
        if len(timestamps) > 1:
            intervals = [timestamps[i+1] - timestamps[i] for i in range(len(timestamps)-1)]

            print(f"Packet count: {len(self.ch10_packets)}")
            print(f"Time span: {timestamps[-1] - timestamps[0]:.3f} seconds")
            print(f"Average interval: {statistics.mean(intervals)*1000:.3f} ms")
            print(f"Min interval: {min(intervals)*1000:.3f} ms")
            print(f"Max interval: {max(intervals)*1000:.3f} ms")
            print(f"Std deviation: {statistics.stdev(intervals)*1000:.3f} ms")

            # Detect potential issues
            mean_interval = statistics.mean(intervals)
            std_interval = statistics.stdev(intervals)
            outliers = []
            outlier_frames = []

            for i, interval in enumerate(intervals):
                if abs(interval - mean_interval) > 3 * std_interval:
                    outliers.append(interval * 1000)  # Convert to ms
                    # Get the original frame number of the second packet in the interval
                    original_frame = self.ch10_packets[i + 1].original_frame_num
                    outlier_frames.append(original_frame)

            if outliers:
                print(f"WARNING: {len(outliers)} timing outliers detected!")
                print(f"Outlier details:")
                for i, (frame_num, interval_ms) in enumerate(zip(outlier_frames, outliers)):
                    if i < 10:  # Show first 10 outliers
                        print(f"  Frame {frame_num}: {interval_ms:.3f} ms interval")
                    elif i == 10:
                        print(f"  ... and {len(outliers) - 10} more outliers")

        # Channel ID analysis
        channel_ids = [pkt.ch10_header['channel_id'] for pkt in self.ch10_packets if pkt.ch10_header is not None]
        unique_channels = set(channel_ids)
        print(f"Unique channels: {sorted(unique_channels)}")

        for ch_id in unique_channels:
            count = channel_ids.count(ch_id)
            print(f"  Channel {ch_id}: {count} packets")

        # Sequence number analysis
        seq_numbers = [pkt.ch10_header['sequence_number'] for pkt in self.ch10_packets if pkt.ch10_header is not None]
        if len(set(seq_numbers)) < len(seq_numbers):
            duplicates = len(seq_numbers) - len(set(seq_numbers))
            print(f"WARNING: {duplicates} duplicate sequence numbers detected!")

        # Check for sequence gaps
        seq_gaps = []
        valid_packets = [pkt for pkt in self.ch10_packets if pkt.ch10_header is not None]
        for i in range(1, len(seq_numbers)):
            expected = (seq_numbers[i-1] + 1) % 256
            if seq_numbers[i] != expected:
                original_frame = valid_packets[i].original_frame_num
                seq_gaps.append((original_frame, seq_numbers[i-1], seq_numbers[i]))

        if seq_gaps:
            print(f"WARNING: {len(seq_gaps)} sequence number gaps detected!")
            print(f"Sequence gap details:")
            for i, (frame_num, prev, curr) in enumerate(seq_gaps):
                if i < 10:  # Show first 10 gaps
                    print(f"  Frame {frame_num}: expected {(prev + 1) % 256}, got {curr}")
                elif i == 10:
                    print(f"  ... and {len(seq_gaps) - 10} more gaps")

    def _analyze_ptp_statistics(self):
        """Analyze PTP packet statistics"""
        print("\nPTP Statistics:")
        print("-" * 40)

        # Message type distribution
        msg_types = [pkt.ptp_header['message_type'] for pkt in self.ptp_packets if pkt.ptp_header is not None]
        unique_types = set(msg_types)

        print(f"Total PTP packets: {len(self.ptp_packets)}")
        print("Message type distribution:")
        for msg_type in unique_types:
            count = msg_types.count(msg_type)
            print(f"  {msg_type}: {count} packets")

        # Timing analysis for Sync messages
        sync_packets = [pkt for pkt in self.ptp_packets if pkt.ptp_header is not None and pkt.ptp_header['message_type'] == 'Sync']
        if len(sync_packets) > 1:
            sync_times = [pkt.timestamp for pkt in sync_packets]
            sync_intervals = [sync_times[i+1] - sync_times[i] for i in range(len(sync_times)-1)]

            print(f"\nSync message analysis:")
            print(f"  Count: {len(sync_packets)}")
            print(f"  Average interval: {statistics.mean(sync_intervals)*1000:.3f} ms")
            print(f"  Min interval: {min(sync_intervals)*1000:.3f} ms")
            print(f"  Max interval: {max(sync_intervals)*1000:.3f} ms")
            print(f"  Std deviation: {statistics.stdev(sync_intervals)*1000:.3f} ms")

    def display_tmats_content(self):
        """Display assembled TMATS content"""
        if self.tmats_assembler.get_frame_count() == 0:
            print("No TMATS frames found")
            return

        print("\n" + "="*80)
        print("TMATS (TELEMETRY ATTRIBUTES TRANSFER STANDARD) CONTENT")
        print("="*80)

        print(f"TMATS frames found: {self.tmats_assembler.get_frame_count()}")

        # Assemble the TMATS content
        assembled_content = self.tmats_assembler.assemble()

        if assembled_content:
            print(f"TMATS files found: {self.tmats_assembler.get_file_count()}")
            print(f"Total TMATS length: {len(assembled_content)} characters")
            print("\nTMATS Content:")
            print("-" * 80)

            # The assembled content is already cleaned by the assembler
            lines = assembled_content.split('\n')
            for line in lines:
                if line.strip():  # Only print non-empty lines
                    print(line)

            print("-" * 80)

            # Show some statistics
            attribute_lines = [line for line in lines if '\\' in line and ':' in line]
            comment_lines = [line for line in lines if line.strip().startswith('COMMENT:')]

            print(f"Total lines: {len([l for l in lines if l.strip()])}")
            print(f"Attribute lines: {len(attribute_lines)}")
            print(f"Comment lines: {len(comment_lines)}")
        else:
            print("No TMATS content could be assembled")


def main():
    parser = argparse.ArgumentParser(description='Analyze PCAP files with Chapter 10 and PTP data')
    parser.add_argument('pcap_file', help='Path to PCAP file')
    parser.add_argument('--ch10-only', action='store_true', help='Show only Chapter 10 analysis')
    parser.add_argument('--ptp-only', action='store_true', help='Show only PTP analysis')
    parser.add_argument('--stats-only', action='store_true', help='Show only statistical analysis')
    parser.add_argument('--summary-only', action='store_true', help='Show only summary information')
    parser.add_argument('--no-tables', action='store_true', help='Skip detailed packet tables')
    parser.add_argument('--tmats', action='store_true', help='Display TMATS (Telemetry Attributes Transfer Standard) content')
    parser.add_argument('--tmats-only', action='store_true', help='Show only TMATS content')

    args = parser.parse_args()

    analyzer = PcapAnalyzer(args.pcap_file)
    analyzer.analyze()

    # Handle TMATS-only mode
    if args.tmats_only:
        analyzer.display_tmats_content()
        return

    # Show summaries first
    if not args.stats_only:
        if not args.ptp_only:
            analyzer.display_ch10_summary()
        if not args.ch10_only:
            analyzer.display_ptp_summary()

    # Show detailed tables unless suppressed
    if not args.stats_only and not args.summary_only and not args.no_tables:
        if not args.ptp_only:
            analyzer.display_ch10_table()
        if not args.ch10_only:
            analyzer.display_ptp_table()

    # Show TMATS content if requested
    if args.tmats:
        analyzer.display_tmats_content()

    # Show statistical analysis
    if not args.summary_only:
        analyzer.statistical_analysis()


if __name__ == '__main__':
    main()