Private/Invoke-SummaryPipeline.ps1
|
# Copyright (c) 2026 Jeffrey Snover. All rights reserved. # Licensed under the MIT License. See LICENSE file in the project root. # Shared pipeline worker for POV summary extraction. # Single source of truth for: CHESS → RAG → Prompt → FIRE/Single-shot → Density → Unmapped resolution # Called by Invoke-POVSummary (interactive) and Invoke-DocumentSummary (batch). # Dot-sourced by AITriad.psm1 — do NOT export. function Invoke-SummaryPipeline { <# .SYNOPSIS Core summary extraction pipeline — shared by all callers. .DESCRIPTION Executes the 7-stage extraction pipeline: 1. CHESS pre-classification (identify relevant POVs) 2. RAG node selection (filter taxonomy to relevant nodes) 3. AutoFire Stage 1 sniff (pre-extraction) 4. Prompt construction (system + taxonomy + schema + document) 5. AI extraction (FIRE iterative or single-shot with density retry) 6. AutoFire Stage 2 sniff + re-run (post-extraction) 7. Unmapped concept resolution Does NOT handle: path resolution, file writing, metadata updates, console reporting, or batch coordination. Those are caller concerns. .PARAMETER SnapshotText The document text to summarize. .PARAMETER DocId Document slug ID (for logging). .PARAMETER Metadata Parsed metadata.json hashtable. .PARAMETER ApiKey Resolved AI API key. .PARAMETER Model AI model identifier. .PARAMETER Temperature Sampling temperature. .PARAMETER TaxonomyVersion Current taxonomy version string. .PARAMETER SystemPromptTemplate System prompt template with density placeholders. .PARAMETER OutputSchema JSON schema string for the AI response. .PARAMETER FullTaxonomy Bypass RAG — inject all taxonomy nodes. .PARAMETER IterativeExtraction Force FIRE iterative extraction. .PARAMETER AutoFire Enable two-stage FIRE sniff. .PARAMETER TaxonomyJsonOverride Pre-computed taxonomy JSON (for chunked pipeline passing parent-level taxonomy). #> [CmdletBinding()] param( [Parameter(Mandatory)][string]$SnapshotText, [Parameter(Mandatory)][string]$DocId, [Parameter(Mandatory)][object]$Metadata, [Parameter(Mandatory)][string]$ApiKey, [Parameter(Mandatory)][string]$Model, [double]$Temperature = 0.1, [Parameter(Mandatory)][string]$TaxonomyVersion, [Parameter(Mandatory)][string]$SystemPromptTemplate, [Parameter(Mandatory)][string]$OutputSchema, [switch]$FullTaxonomy, [switch]$IterativeExtraction, [switch]$AutoFire, [string]$TaxonomyJsonOverride = '' ) Set-StrictMode -Version Latest $ErrorActionPreference = 'Stop' $PipelineStart = Get-Date $WordCount = ($SnapshotText -split '\s+').Count $EstimatedTokens = [int]($SnapshotText.Length / 4) # ── Stage 1: CHESS + RAG taxonomy selection ─────────────────────────────── $TaxonomyJson = $null if ($TaxonomyJsonOverride) { $TaxonomyJson = $TaxonomyJsonOverride Write-Verbose "Pipeline: using caller-provided taxonomy context" } elseif ($FullTaxonomy) { $TaxFiles = @("accelerationist.json", "safetyist.json", "skeptic.json", "situations.json") $TaxContext = [ordered]@{} $TaxDir = Get-TaxonomyDir foreach ($F in $TaxFiles) { $P = Join-Path $TaxDir $F if (Test-Path $P) { $TaxContext[$F] = Get-Content $P -Raw | ConvertFrom-Json } } $TaxonomyJson = $TaxContext | ConvertTo-Json -Depth 20 -Compress:$false Write-Verbose "Pipeline: full taxonomy injected" } else { $QueryWords = ($SnapshotText -split '\s+') | Select-Object -First 500 $QueryText = "$($Metadata.title). $($QueryWords -join ' ')" try { $RelevantPovs = Get-DocumentPovClassification -QueryText $QueryText -ApiKey $ApiKey Write-Verbose "Pipeline: CHESS classified POVs: $($RelevantPovs -join ', ')" $TaxonomyJson = Get-RelevantTaxonomyNodes -Query $QueryText ` -Threshold 0.30 -MaxTotal 200 -MinPerCategory 3 ` -POV $RelevantPovs -IncludeSituations -Format context Write-Verbose "Pipeline: RAG selected ~$([int]($TaxonomyJson.Length / 4)) tokens of taxonomy" } catch { Write-Verbose "Pipeline: RAG failed ($($_.Exception.Message)) — falling back to full taxonomy" $TaxFiles = @("accelerationist.json", "safetyist.json", "skeptic.json", "situations.json") $TaxContext = [ordered]@{} $TaxDir = Get-TaxonomyDir foreach ($F in $TaxFiles) { $P = Join-Path $TaxDir $F if (Test-Path $P) { $TaxContext[$F] = Get-Content $P -Raw | ConvertFrom-Json } } $TaxonomyJson = $TaxContext | ConvertTo-Json -Depth 20 -Compress:$false } } # ── Stage 2: AutoFire Stage 1 — pre-extraction sniff (always runs) ─────── if (-not $IterativeExtraction) { if ($Metadata.PSObject.Properties['source_type']) { $SourceType = $Metadata.source_type } else { $SourceType = 'unknown' } $Sniff1 = Test-FireRequired -WordCount $WordCount -IsChunked:($EstimatedTokens -gt 20000) -SourceType $SourceType if ($Sniff1.ShouldFire) { Write-Verbose "Pipeline AutoFire Stage 1: $($Sniff1.Reason) — switching to FIRE" $IterativeExtraction = $true } } # ── Stage 3: Prompt construction ────────────────────────────────────────── $SystemPrompt = Build-DensityScaledPrompt -WordCount $WordCount -Template $SystemPromptTemplate # Policy registry context $PolicyBlock = '' $PolicyPath = Join-Path (Get-TaxonomyDir) 'policy_actions.json' if (Test-Path $PolicyPath) { try { $PolicyReg = Get-Content -Raw -Path $PolicyPath | ConvertFrom-Json if ($PolicyReg.policies -and $PolicyReg.policies.Count -gt 0) { $PolicyLines = $PolicyReg.policies | ForEach-Object { "$($_.id): $($_.action)" } $PB = $PolicyLines -join "`n" if ($PB.Length -gt 5000) { $PB = $PB.Substring(0, 5000) + "`n... (truncated)" } $PolicyBlock = "`n=== POLICY REGISTRY (use pol-NNN IDs when referencing policy actions) ===`n$PB" } } catch { } } if ($Metadata.PSObject.Properties['pov_tags']) { $PovTagLine = $Metadata.pov_tags -join ', ' } else { $PovTagLine = '' } if ($Metadata.PSObject.Properties['topic_tags']) { $TopicTagLine = $Metadata.topic_tags -join ', ' } else { $TopicTagLine = '' } if ($Metadata.PSObject.Properties['title']) { $TitleLine = $Metadata.title } else { $TitleLine = $DocId } $FullPrompt = @" $SystemPrompt === TAXONOMY (version $TaxonomyVersion) === $TaxonomyJson $PolicyBlock === OUTPUT SCHEMA (your response must match this structure) === $OutputSchema === DOCUMENT: $DocId === Title: $TitleLine POV tags (pre-classified): $PovTagLine Topic tags: $TopicTagLine --- DOCUMENT CONTENT --- $SnapshotText "@ Write-Verbose "Pipeline: prompt assembled ($([int]($FullPrompt.Length / 4)) tokens est.)" # ── Stage 4: AI extraction ──────────────────────────────────────────────── $SummaryObject = $null $FireStats = $null $AiBackend = '' if ($IterativeExtraction) { Write-Verbose "Pipeline: using FIRE iterative extraction" $FireResult = Invoke-IterativeExtraction ` -Prompt $FullPrompt -Model $Model -ApiKey $ApiKey -Temperature $Temperature $SummaryObject = $FireResult.Summary $AiBackend = $FireResult.Backend $FireStats = $FireResult.FireStats } else { # Single-shot with density validation + retry Write-Verbose "Pipeline: using single-shot extraction" $DensityFloors = Get-DensityFloors -WordCount $WordCount $MaxRetries = 1 $DensityRetryNudge = '' for ($Attempt = 0; $Attempt -le $MaxRetries; $Attempt++) { $AttemptPrompt = $FullPrompt if ($Attempt -gt 0 -and $DensityRetryNudge) { Write-Verbose "Pipeline: density retry $Attempt/$MaxRetries" $AttemptPrompt = $FullPrompt + "`n`n" + $DensityRetryNudge } $AiResult = Invoke-AIApi ` -Prompt $AttemptPrompt ` -Model $Model ` -ApiKey $ApiKey ` -Temperature $Temperature ` -MaxTokens 32768 ` -JsonMode ` -TimeoutSec 120 if ($null -eq $AiResult) { return @{ Success = $false; DocId = $DocId; Error = 'API call returned null' } } $AiBackend = $AiResult.Backend Write-Verbose "Pipeline: response from $AiBackend" $CleanedText = $AiResult.Text -replace '(?s)^```json\s*', '' -replace '(?s)\s*```$', '' try { $SummaryObject = $CleanedText.Trim() | ConvertFrom-Json } catch { # Try repair $Repaired = Repair-TruncatedJson -Text $CleanedText try { $SummaryObject = $Repaired | ConvertFrom-Json } catch { $SummaryObject = $null } } if ($null -eq $SummaryObject) { return @{ Success = $false; DocId = $DocId; Error = 'InvalidJson' } } # Density check $DensityCheck = Test-SummaryDensity -SummaryObject $SummaryObject -Floors $DensityFloors if ($DensityCheck.Pass) { break } $DensityRetryNudge = Build-DensityRetryNudge -Shortfalls $DensityCheck.Shortfalls Write-Verbose "Pipeline: density check failed — $($DensityCheck.Shortfalls -join '; ')" if ($Attempt -eq $MaxRetries) { Write-Verbose 'Pipeline: accepting under-dense result after retries' } } } if ($null -eq $SummaryObject) { return @{ Success = $false; DocId = $DocId; Error = 'No summary produced' } } # ── Stage 5: AutoFire Stage 2 — post-extraction sniff (always runs) ────── if (-not $IterativeExtraction -and $null -ne $SummaryObject) { $Sniff2 = Test-FireRequired -SummaryObject $SummaryObject if ($Sniff2.ShouldFire) { Write-Verbose "Pipeline AutoFire Stage 2: $($Sniff2.Reason) — re-running with FIRE" $FireResult = Invoke-IterativeExtraction ` -Prompt $FullPrompt -Model $Model -ApiKey $ApiKey -Temperature $Temperature if ($FireResult.Summary) { $SummaryObject = $FireResult.Summary $AiBackend = $FireResult.Backend $FireStats = $FireResult.FireStats } } } # ── Stage 6: Unmapped concept resolution ────────────────────────────────── if ($SummaryObject.unmapped_concepts -and @($SummaryObject.unmapped_concepts).Count -gt 0) { try { $Resolution = Resolve-UnmappedConcepts -UnmappedConcepts @($SummaryObject.unmapped_concepts) if ($Resolution.Resolved.Count -gt 0) { $SummaryObject.unmapped_concepts = $Resolution.Remaining Write-Verbose "Pipeline: resolved $($Resolution.Resolved.Count) unmapped concept(s)" } } catch { Write-Verbose "Pipeline: unmapped resolution failed — $($_.Exception.Message)" } } # ── Stage 7: Collect stats and return ───────────────────────────────────── $Elapsed = (Get-Date) - $PipelineStart $Camps = @('accelerationist', 'safetyist', 'skeptic') $TotalPoints = 0; $NullNodes = 0 foreach ($Camp in $Camps) { $CampData = $SummaryObject.pov_summaries.$Camp if ($CampData -and $CampData.key_points) { $TotalPoints += @($CampData.key_points).Count $NullNodes += @($CampData.key_points | Where-Object { $null -eq $_.taxonomy_node_id }).Count } } if ($SummaryObject.factual_claims) { $FactualCount = @($SummaryObject.factual_claims).Count } else { $FactualCount = 0 } if ($SummaryObject.unmapped_concepts) { $UnmappedCount = @($SummaryObject.unmapped_concepts).Count } else { $UnmappedCount = 0 } return @{ Success = $true DocId = $DocId Summary = $SummaryObject Backend = $AiBackend TotalPoints = $TotalPoints NullNodes = $NullNodes FactualCount = $FactualCount UnmappedCount = $UnmappedCount ElapsedSeconds = [Math]::Round($Elapsed.TotalSeconds, 1) FireStats = $FireStats TaxonomyJson = $TaxonomyJson UsedFire = [bool]$IterativeExtraction } } |