Public/Utilities/Optimize-SubtitleFile.ps1
|
function Optimize-SubtitleFile { <# .SYNOPSIS Non-destructive cleanup: removes duplicates, sorts by start time, trims whitespace. .DESCRIPTION Performs the following optimizations: - Sort entries by start time - Remove exact duplicate entries (same start, end, and text) - Remove entries with no visible text - Trim leading/trailing whitespace from each line - Normalize Unicode (NFC) - Re-index from 1 .PARAMETER InputObject A SubtitleFile object. .EXAMPLE Import-SubtitleFile 'movie.srt' | Optimize-SubtitleFile #> [CmdletBinding()] [OutputType('SubtitleFile')] param( [Parameter(Mandatory, ValueFromPipeline)] [SubtitleFile] $InputObject ) process { # Sort by start time $sorted = @($InputObject.Entries | Sort-Object Start, End) # Remove duplicates (same start+end+text) $seen = [System.Collections.Generic.HashSet[string]]::new() $deduped = [System.Collections.Generic.List[SubtitleEntry]]::new() foreach ($entry in $sorted) { $key = '{0}|{1}|{2}' -f $entry.Start.Ticks, $entry.End.Ticks, ($entry.Lines -join '|') if ($seen.Add($key)) { $deduped.Add($entry) } } # Trim whitespace and normalize Unicode on each line foreach ($entry in $deduped) { $entry.Lines = $entry.Lines | ForEach-Object { $_.Trim().Normalize([System.Text.NormalizationForm]::FormC) } | Where-Object { $_ -ne '' } } # Remove entries with no visible text $cleaned = @($deduped | Where-Object { $_.Lines.Count -gt 0 }) # Re-index $i = 1 foreach ($entry in $cleaned) { $entry.Index = $i if ($entry -is [SrtEntry]) { $entry.BlockNumber = $i } $i++ } $InputObject.Entries = $cleaned Write-SubtitleLog -Message "Optimized: $($cleaned.Count) entries after dedup/sort/trim." -Level Info return $InputObject } } |