Private/Translation/Invoke-TranslationPriming.ps1
|
function Invoke-TranslationPriming { <# .SYNOPSIS Analyzes a subtitle file sample and returns structured content context for translation. .DESCRIPTION Sends a representative sample of entries to the AI provider and asks it to analyze the content: type, tone, register, domain terminology, speaker patterns, and cultural notes. The result is stored in Session.ContentContext and used by Build-TranslationSystemPrompt to produce a richer, content-aware system prompt. This runs once per session — subsequent calls reuse Session.ContentContext. .OUTPUTS PSCustomObject with labeled fields from the AI analysis. #> [OutputType([PSCustomObject])] param( [Parameter(Mandatory)] [SubtitleFile] $InputObject, [Parameter(Mandatory)] [hashtable] $Session, [Parameter(Mandatory)] [SecureString] $ApiKey, [string] $SourceLanguage = '', [string] $TargetLanguage = '', [int] $SampleSize = 20 ) # Collect evenly-spaced sample entries $entries = $InputObject.Entries $total = $entries.Count $effectiveN = [Math]::Min($SampleSize, $total) $step = if ($total -le $effectiveN) { 1 } else { [int]($total / $effectiveN) } $sampleLines = [System.Collections.Generic.List[string]]::new() for ($i = 0; $i -lt $total -and $sampleLines.Count -lt $effectiveN; $i += $step) { $text = $entries[$i].Lines -join ' ' if ($text.Trim()) { $sampleLines.Add("[$($i + 1)] $text") } } $srcLabel = if ($SourceLanguage) { $SourceLanguage } else { 'auto-detect' } $tgtLabel = if ($TargetLanguage) { $TargetLanguage } else { 'unknown' } $systemPrompt = @" You are a professional subtitle analyst. Analyze the subtitle sample below and return ONLY the labeled fields — no prose, no preamble. Source language: $srcLabel Target language: $tgtLabel Required output (one per line, exactly as shown): CONTENT_TYPE: <film|series|documentary|animation|news|sports|educational|other> CONTENT_TITLE: <title if inferable, else UNKNOWN> DOMINANT_TONE: <dramatic|comedic|action|romantic|neutral|tense|documentary|mixed> REGISTER: <formal|informal|colloquial|technical|mixed> TARGET_AUDIENCE: <general|children|adult|professional|academic> PACING: <fast|moderate|slow> (how quickly dialogue moves) DOMAIN_TERMS: <comma-separated list of domain-specific or recurring terms to translate consistently, or NONE> SPEAKER_PATTERNS: <description of how many speakers, any named characters, group vs solo dialogue> CULTURAL_NOTES: <idioms, references, humor that need localization care, or NONE> TRANSLATION_WARNINGS: <any structural challenges (e.g., wordplay, acrostics, number puns), or NONE> "@ $userContent = "Subtitle sample ($($sampleLines.Count) entries):`n`n" + ($sampleLines -join "`n") $provider = $Session.Provider Write-Verbose "Running translation priming analysis ($($sampleLines.Count) sample entries)..." # Use higher temperature for creative analysis $analysisProv = [TranslationProvider]::new() $analysisProv.Name = $provider.Name $analysisProv.Model = $provider.Model $analysisProv.BaseUrl = $provider.BaseUrl $analysisProv.MaxTokensPerBatch = $provider.MaxTokensPerBatch $analysisProv.RateLimitRpm = $provider.RateLimitRpm $analysisProv.Temperature = 0.7 $adapterResult = switch ($provider.Name) { 'Anthropic' { Invoke-AnthropicTranslation -SystemPrompt $systemPrompt -UserContent $userContent -Provider $analysisProv -ApiKey $ApiKey } 'OpenAI' { Invoke-OpenAITranslation -SystemPrompt $systemPrompt -UserContent $userContent -Provider $analysisProv -ApiKey $ApiKey } 'Google' { Invoke-GoogleTranslation -SystemPrompt $systemPrompt -UserContent $userContent -Provider $analysisProv -ApiKey $ApiKey } } if ($adapterResult.FinishReason -eq 'error') { Write-Warning "Translation priming failed: $($adapterResult.Content). Proceeding without content context." return $null } # Parse labeled lines into a hashtable $ctx = @{} foreach ($line in ($adapterResult.Content -split "`n")) { if ($line -match '^([A-Z_]+):\s*(.+)$') { $ctx[$Matches[1]] = $Matches[2].Trim() } } $result = [PSCustomObject]@{ ContentType = $ctx['CONTENT_TYPE'] ?? 'unknown' ContentTitle = $ctx['CONTENT_TITLE'] ?? 'UNKNOWN' DominantTone = $ctx['DOMINANT_TONE'] ?? 'neutral' Register = $ctx['REGISTER'] ?? 'mixed' TargetAudience = $ctx['TARGET_AUDIENCE'] ?? 'general' Pacing = $ctx['PACING'] ?? 'moderate' DomainTerms = $ctx['DOMAIN_TERMS'] ?? 'NONE' SpeakerPatterns = $ctx['SPEAKER_PATTERNS'] ?? '' CulturalNotes = $ctx['CULTURAL_NOTES'] ?? 'NONE' TranslationWarnings = $ctx['TRANSLATION_WARNINGS'] ?? 'NONE' RawAnalysis = $adapterResult.Content } Write-Verbose "Priming complete. Content: $($result.ContentType) / Tone: $($result.DominantTone) / Register: $($result.Register)" return $result } |