Private/Parsers/Invoke-SrtParser.ps1

function Invoke-SrtParser {
    <#
    .SYNOPSIS
        Parses SRT text content into an array of SrtEntry objects.
    .OUTPUTS
        SrtEntry[]
    #>

    [OutputType('SrtEntry[]')]
    param(
        [Parameter(Mandatory)]
        [string] $Content,

        [hashtable] $Warnings = @{}
    )

    $normalized = ConvertTo-NormalizedText -Text $Content
    $entries    = [System.Collections.Generic.List[SrtEntry]]::new()

    # Split on blank lines between blocks (two or more newlines)
    $blocks = $normalized -split '\n{2,}'

    $index = 1
    foreach ($block in $blocks) {
        $block = $block.Trim()
        if ([string]::IsNullOrWhiteSpace($block)) { continue }

        $lines = $block -split '\n'
        if ($lines.Count -lt 2) {
            $Warnings[$index] = "Block ${index} has fewer than 2 lines, skipping."
            continue
        }

        $entry       = [SrtEntry]::new()
        $entry.Index = $index

        # Line 0: block number (may be missing or out of order)
        $numberLine = $lines[0].Trim()
        if ($numberLine -match '^\d+$') {
            $entry.BlockNumber = [int]$numberLine
            $timeLineIndex     = 1
        } else {
            # No number line -- try treating first line as timestamp
            $Warnings[$index] = "Block ${index} is missing a sequence number."
            $entry.BlockNumber = $index
            $timeLineIndex     = 0
        }

        if ($timeLineIndex -ge $lines.Count) {
            $Warnings[$index] = "Block ${index} has no timestamp line."
            continue
        }

        # Timestamp line: HH:mm:ss,fff --> HH:mm:ss,fff
        $timeLine = $lines[$timeLineIndex].Trim()
        if ($timeLine -match '(\d{2}:\d{2}:\d{2}[,\.]\d{3})\s*-->\s*(\d{2}:\d{2}:\d{2}[,\.]\d{3})') {
            try {
                $entry.Start = ConvertFrom-SrtTimestamp -Timestamp $Matches[1]
                $entry.End   = ConvertFrom-SrtTimestamp -Timestamp $Matches[2]
            } catch {
                $msg = $_.Exception.Message
                $Warnings[$index] = "Block ${index} timestamp parse failed: $msg"
                continue
            }

            # Flag if dot separator was used instead of comma
            if ($timeLine -match '\d{2}:\d{2}:\d{2}\.') {
                $Warnings[$index] = "Block ${index} uses dot separator instead of comma in timestamp."
            }
        } else {
            $Warnings[$index] = "Block ${index} has invalid timestamp line: $timeLine"
            continue
        }

        # Remaining lines are subtitle text
        $textLines     = $lines[($timeLineIndex + 1)..($lines.Count - 1)] | ForEach-Object { $_.TrimEnd() }
        $entry.Lines   = $textLines
        $entry.RawText = $textLines -join [System.Environment]::NewLine

        # Detect HTML tags
        if ($entry.RawText -match '<(b|i|u|font)\b') {
            $entry.HasHtmlTags = $true
        }

        $entries.Add($entry)
        $index++
    }

    return $entries.ToArray()
}