dts-2026Presentation/embed_speaker_notes.py

#!/usr/bin/env python3
"""
Embed speaker notes into all three presentation PowerPoint files.
This script reads the speaker notes from the markdown files and adds them
to the corresponding slides in each PowerPoint presentation.
"""

from pptx import Presentation
from pptx.util import Inches, Pt
import re
import os


def extract_speaker_notes(markdown_path):
    """
    Extract speaker notes from a markdown file.
    Returns a dict mapping slide numbers to their speaker notes.
    """
    with open(markdown_path, "r", encoding="utf-8") as f:
        content = f.read()

    notes = {}

    # Pattern to match slide headers and their speaker notes
    # Looking for "## SLIDE X:" followed by content, then "### Speaker Notes:" and the notes
    slide_pattern = r"## SLIDE (\d+):.*?### Speaker Notes:\s*\n((?:>.*?\n)+)"

    matches = re.findall(slide_pattern, content, re.DOTALL)

    for match in matches:
        slide_num = int(match[0])
        # Clean up the notes - remove > prefixes and clean whitespace
        raw_notes = match[1]
        lines = raw_notes.split("\n")
        cleaned_lines = []
        for line in lines:
            # Remove leading > and whitespace
            cleaned = re.sub(r"^>\s*", "", line)
            # Remove stage directions but keep the text
            cleaned = re.sub(r"\*\[(.*?)\]\*", r"[\1]", cleaned)
            # Remove italic markers
            cleaned = cleaned.replace("*", "")
            cleaned_lines.append(cleaned)

        notes[slide_num] = "\n".join(cleaned_lines).strip()

    return notes


def add_notes_to_pptx(pptx_path, notes_dict):
    """
    Add speaker notes to a PowerPoint presentation.
    """
    prs = Presentation(pptx_path)

    slides_updated = 0
    for i, slide in enumerate(prs.slides, 1):
        if i in notes_dict:
            # Get or create notes slide
            notes_slide = slide.notes_slide
            text_frame = notes_slide.notes_text_frame
            text_frame.text = notes_dict[i]
            slides_updated += 1

    # Save the presentation
    prs.save(pptx_path)
    return slides_updated


def process_presentation(folder_name, pptx_name):
    """
    Process a single presentation - extract notes and embed them.
    """
    base_path = os.path.dirname(os.path.abspath(__file__))

    if folder_name:
        markdown_path = os.path.join(
            base_path, folder_name, "script_and_speaker_notes.md"
        )
        pptx_path = os.path.join(base_path, folder_name, pptx_name)
    else:
        markdown_path = os.path.join(base_path, "script_and_speaker_notes.md")
        pptx_path = os.path.join(base_path, pptx_name)

    print(f"\nProcessing: {pptx_name}")
    print(f"  Markdown: {markdown_path}")
    print(f"  PowerPoint: {pptx_path}")

    # Check files exist
    if not os.path.exists(markdown_path):
        print(f"  ERROR: Markdown file not found!")
        return False
    if not os.path.exists(pptx_path):
        print(f"  ERROR: PowerPoint file not found!")
        return False

    # Extract notes
    notes = extract_speaker_notes(markdown_path)
    print(f"  Extracted notes for {len(notes)} slides")

    # Add notes to PowerPoint
    slides_updated = add_notes_to_pptx(pptx_path, notes)
    print(f"  Updated {slides_updated} slides with speaker notes")

    return True


def main():
    print("=" * 60)
    print("Embedding Speaker Notes into PowerPoint Presentations")
    print("=" * 60)

    presentations = [
        ("The_Stories_Our_Data_Tells", "The_Stories_Our_Data_Tells.pptx"),
        ("From_Tragedy_to_Triumph", "From_Tragedy_to_Triumph.pptx"),
        ("When_the_Data_Surprised_Us", "When_the_Data_Surprised_Us.pptx"),
    ]

    success_count = 0
    for folder, pptx in presentations:
        if process_presentation(folder, pptx):
            success_count += 1

    print("\n" + "=" * 60)
    print(
        f"Complete! Successfully processed {success_count}/{len(presentations)} presentations."
    )
    print("=" * 60)


if __name__ == "__main__":
    main()