Private/Invoke-DocumentSummary.ps1

# Copyright (c) 2026 Jeffrey Snover. All rights reserved.
# Licensed under the MIT License. See LICENSE file in the project root.

<#
.SYNOPSIS
    Generates a multi-POV AI summary for a single document.
.DESCRIPTION
    Core summarization worker called by Invoke-POVSummary and Invoke-BatchSummary.
    Reads the document's snapshot.md, builds a density-scaled prompt with the
    current taxonomy, and calls the AI API to produce a structured JSON summary
    containing per-POV key_points, factual_claims, and unmapped_concepts.
 
    Documents under ~20,000 estimated tokens use a single API call with density
    validation and one retry. Larger documents are automatically split into
    semantically coherent chunks (via Split-DocumentChunks), each chunk is
    summarized independently, and results are merged (via Merge-ChunkSummaries)
    with deduplication.
 
    The output is written to summaries/<doc-id>.json and the source's
    metadata.json is updated with summary_status='current'.
.PARAMETER Doc
    Hashtable with document context: DocId, Meta, PovTags, SnapshotFile, MetaFile.
    Built by the calling cmdlet from the source directory.
.PARAMETER ApiKey
    AI API key for the configured backend.
.PARAMETER Model
    AI model identifier (e.g., 'gemini-3.1-flash-lite'). Must be registered in
    ai-models.json.
.PARAMETER Temperature
    Sampling temperature for the AI call. Lower values produce more deterministic
    output.
.PARAMETER TaxonomyVersion
    Current taxonomy version string (from TAXONOMY_VERSION file).
.PARAMETER TaxonomyJson
    Serialized JSON of the full taxonomy, injected into the prompt for node
    mapping.
.PARAMETER SystemPromptTemplate
    The system prompt template with {{WORD_COUNT}}, {{KP_MIN}}, etc. placeholders
    for density scaling.
.PARAMETER ChunkSystemPromptTemplate
    Optional override prompt for chunk-level summarization. If empty, the
    'pov-summary-chunk-system' prompt is loaded from Prompts/.
.PARAMETER OutputSchema
    JSON schema string that the AI response must conform to.
.PARAMETER SummariesDir
    Absolute path to the summaries output directory.
.PARAMETER Now
    ISO timestamp for the generated_at field.
.EXAMPLE
    # Typically called internally by Invoke-POVSummary:
    $Result = Invoke-DocumentSummary -Doc $DocContext -ApiKey $Key -Model 'gemini-3.1-flash-lite' `
        -Temperature 0.1 -TaxonomyVersion '4.2' -TaxonomyJson $TaxJson `
        -SystemPromptTemplate $Prompt -OutputSchema $Schema -SummariesDir $OutDir -Now (Get-Date -Format 'o')
    if ($Result.Success) { Write-Host "Generated $($Result.TotalPoints) key points" }
#>

function Invoke-DocumentSummary {
    [CmdletBinding()]
    param(
        [Parameter(Mandatory)][hashtable]$Doc,
        [Parameter(Mandatory)][string]$ApiKey,
        [Parameter(Mandatory)][string]$Model,
        [Parameter(Mandatory)][double]$Temperature,
        [Parameter(Mandatory)][string]$TaxonomyVersion,
        [Parameter(Mandatory)][string]$TaxonomyJson,
        [Parameter(Mandatory)][string]$SystemPromptTemplate,
        [string]$ChunkSystemPromptTemplate = '',
        [Parameter(Mandatory)][string]$OutputSchema,
        [Parameter(Mandatory)][string]$SummariesDir,
        [Parameter(Mandatory)][string]$Now,
        [switch]$IterativeExtraction,
        [switch]$AutoFire
    )

    Set-StrictMode -Version Latest

    $ThisDocId = $Doc.DocId
    $Meta      = $Doc.Meta
    $ChunkThresholdTokens = 20000   # Documents above this get chunked
    $script:ContextRotStages = @()  # accumulator for context-rot instrumentation

    Write-Host "`n ┌─ $ThisDocId" -ForegroundColor White
    Write-Host " │ pov: $($Doc.PovTags -join ', ') | model: $Model" -ForegroundColor Gray

    # -- Load snapshot --------------------------------------------------------
    $SnapshotText = Get-Content $Doc.SnapshotFile -Raw
    if ([string]::IsNullOrWhiteSpace($SnapshotText)) {
        Write-Host " └─ SKIP $ThisDocId — snapshot.md is empty" -ForegroundColor Yellow
        return @{ Success = $false; DocId = $ThisDocId; Error = 'EmptySnapshot' }
    }
    $EstimatedTokens = [int]($SnapshotText.Length / 4)
    Write-Host " │ snapshot: $($SnapshotText.Length) chars (~$EstimatedTokens tokens est.)" -ForegroundColor Gray

    # -- Decide: single-call or chunked pipeline ------------------------------
    if ($EstimatedTokens -gt $ChunkThresholdTokens) {
        Write-Host " │ ✨ Large document — using chunked pipeline" -ForegroundColor Cyan
        return Invoke-ChunkedSummary @PSBoundParameters
    }

    # ========================================================================
    # SINGLE-CALL PATH (small documents) — delegates to shared pipeline
    # ========================================================================

    Write-Host " │ Running extraction pipeline..." -ForegroundColor Gray

    $PipelineResult = Invoke-SummaryPipeline `
        -SnapshotText          $SnapshotText `
        -DocId                 $ThisDocId `
        -Metadata              $Meta `
        -ApiKey                $ApiKey `
        -Model                 $Model `
        -Temperature           $Temperature `
        -TaxonomyVersion       $TaxonomyVersion `
        -SystemPromptTemplate  $SystemPromptTemplate `
        -OutputSchema          $OutputSchema `
        -IterativeExtraction:$IterativeExtraction `
        -AutoFire:$AutoFire

    if (-not $PipelineResult.Success) {
        Write-Host " └─ ✗ FAILED: $ThisDocId — $($PipelineResult.Error)" -ForegroundColor Red
        return @{ Success = $false; DocId = $ThisDocId; Error = $PipelineResult.Error }
    }

    $Elapsed = [TimeSpan]::FromSeconds($PipelineResult.ElapsedSeconds)
    Write-Host " │ ✓ Pipeline complete ($($PipelineResult.Backend)): $([int]$Elapsed.TotalSeconds)s" -ForegroundColor Green

    return Finalize-Summary -SummaryObject $PipelineResult.Summary -ThisDocId $ThisDocId `
        -TaxonomyVersion $TaxonomyVersion -Model $Model -Temperature $Temperature `
        -Now $Now -SummariesDir $SummariesDir -Doc $Doc -Elapsed $Elapsed `
        -TaxonomyJson $PipelineResult.TaxonomyJson `
        -FireStats $PipelineResult.FireStats
}

# ============================================================================
# CHUNKED PIPELINE (large documents)
# ============================================================================

function Invoke-ChunkedSummary {
    [CmdletBinding()]
    param(
        [Parameter(Mandatory)][hashtable]$Doc,
        [Parameter(Mandatory)][string]$ApiKey,
        [Parameter(Mandatory)][string]$Model,
        [Parameter(Mandatory)][double]$Temperature,
        [Parameter(Mandatory)][string]$TaxonomyVersion,
        [Parameter(Mandatory)][string]$TaxonomyJson,
        [Parameter(Mandatory)][string]$SystemPromptTemplate,
        [string]$ChunkSystemPromptTemplate = '',
        [Parameter(Mandatory)][string]$OutputSchema,
        [Parameter(Mandatory)][string]$SummariesDir,
        [Parameter(Mandatory)][string]$Now
    )

    $ThisDocId = $Doc.DocId
    $Meta      = $Doc.Meta
    $SnapshotText = Get-Content $Doc.SnapshotFile -Raw

    # -- Split into chunks ----------------------------------------------------
    $Chunks = @(Split-DocumentChunks -Text $SnapshotText -MaxChunkTokens 6000 -MinChunkTokens 1500)
    $ChunkCount = $Chunks.Count
    Write-Host " │ split into $ChunkCount chunks" -ForegroundColor Cyan

    # -- Context-rot: chunking metrics ----------------------------------------
    $InputTokensEst = [int]($SnapshotText.Length / 4)
    $ChunkTokensSum = 0
    foreach ($c in $Chunks) { $ChunkTokensSum += [int]($c.Length / 4) }
    $script:ContextRotStages += @(New-ContextRotStage `
        -Stage 'chunking' -InUnits 'tokens_est' -InCount $InputTokensEst `
        -OutUnits 'tokens_est' -OutCount $ChunkTokensSum `
        -Flags @{ chunk_count = $ChunkCount })

    # -- Load chunk-specific system prompt ------------------------------------
    if ($ChunkSystemPromptTemplate) {
        $ChunkSystemPrompt = $ChunkSystemPromptTemplate
    } else {
        $ChunkSystemPrompt = Get-Prompt -Name 'pov-summary-chunk-system'
    }
    $DocHeader = Build-DocHeader -Doc $Doc -Meta $Meta -ThisDocId $ThisDocId

    # -- Process each chunk sequentially (API rate limits) --------------------
    $StartTime = Get-Date
    $ChunkResults = [System.Collections.Generic.List[object]]::new()
    $FailedChunks = 0
    $ChunkRAGMetrics = [System.Collections.Generic.List[object]]::new()
    $ChunkExtractionStats = @{ TotalPoints = 0; NullNodes = 0; FactualClaims = 0; UnmappedConcepts = 0; PromptChars = 0 }

    for ($i = 0; $i -lt $ChunkCount; $i++) {
        $ChunkNum = $i + 1
        $ChunkText = $Chunks[$i]
        $ChunkTokens = [int]($ChunkText.Length / 4)

        Write-Host " │ chunk $ChunkNum/$ChunkCount (~$ChunkTokens tokens)..." -ForegroundColor Gray -NoNewline

        # Per-chunk relevance filtering: use chunk text as query for better node selection
        $ChunkTaxonomy = $null
        $script:LastRAGMetrics = $null
        try {
            $ChunkRelevant = Get-RelevantTaxonomyNodes -Query $ChunkText `
                -MaxTotal 150 -TopK 60 -MinPerCategory 2 `
                -IncludeSituations -Format context -ApiKey $ApiKey
            if ($ChunkRelevant) {
                $ChunkTaxonomy = $ChunkRelevant
                Write-Verbose " Chunk $ChunkNum`: RAG-filtered to ~40 nodes"
            }
        }
        catch {
            Write-Verbose " Chunk $ChunkNum`: RAG fallback — using compact taxonomy"
        }
        if ($script:LastRAGMetrics) {
            $null = $ChunkRAGMetrics.Add($script:LastRAGMetrics)
            $script:LastRAGMetrics = $null
        }
        if (-not $ChunkTaxonomy) {
            $ChunkTaxonomy = Build-CompactTaxonomy
        }

        $ChunkSysInstruction = @"
$ChunkSystemPrompt
 
=== OUTPUT SCHEMA (your response must match this structure) ===
$OutputSchema
"@


        $ChunkPrompt = @"
=== TAXONOMY (version $TaxonomyVersion) ===
$ChunkTaxonomy
 
$DocHeader
 
--- DOCUMENT SECTION $ChunkNum OF $ChunkCount ---
$ChunkText
"@


        try {
            $AIResult = Invoke-AIApi `
                -Prompt     $ChunkPrompt `
                -SystemInstruction $ChunkSysInstruction `
                -Model      $Model `
                -ApiKey     $ApiKey `
                -Temperature $Temperature `
                -MaxTokens  65536 `
                -JsonMode `
                -TimeoutSec 600 `
                -MaxRetries 3 `
                -RetryDelays @(5, 15, 45)

            if ($null -eq $AIResult) {
                Write-Host " ✗ null response" -ForegroundColor Red
                $FailedChunks++
                continue
            }

            if ($AIResult.PSObject.Properties['Truncated'] -and $AIResult.Truncated) {
                Write-Host " ⚠ TRUNCATED — output exceeded model limit, extraction may be incomplete" -ForegroundColor Yellow
            }

            $ChunkObj = Parse-AIResponse -RawText $AIResult.Text -ThisDocId "$ThisDocId-chunk$ChunkNum" -SummariesDir $SummariesDir
            if ($null -eq $ChunkObj) {
                Write-Host " ✗ bad JSON" -ForegroundColor Red
                $FailedChunks++
                continue
            }

            $ChunkResults.Add($ChunkObj)
            $ChunkPts = 0
            $ChunkNulls = 0
            foreach ($c in @('accelerationist','safetyist','skeptic')) {
                if ($ChunkObj.pov_summaries.$c -and $ChunkObj.pov_summaries.$c.key_points) {
                    $pts = @($ChunkObj.pov_summaries.$c.key_points)
                    $ChunkPts += $pts.Count
                    $ChunkNulls += @($pts | Where-Object { $null -eq $_.taxonomy_node_id }).Count
                }
            }
            $ChunkFacts = if ($ChunkObj.factual_claims) { @($ChunkObj.factual_claims).Count } else { 0 }
            $ChunkUnmapped = if ($ChunkObj.unmapped_concepts) { @($ChunkObj.unmapped_concepts).Count } else { 0 }
            $ChunkExtractionStats.TotalPoints += $ChunkPts
            $ChunkExtractionStats.NullNodes += $ChunkNulls
            $ChunkExtractionStats.FactualClaims += $ChunkFacts
            $ChunkExtractionStats.UnmappedConcepts += $ChunkUnmapped
            $ChunkExtractionStats.PromptChars += $ChunkPrompt.Length
            if ($ChunkPts -eq 0) {
                Write-Host " ⚠ 0 points (chunk may be non-substantive or extraction failed silently)" -ForegroundColor Yellow
            } elseif ($ChunkPts -lt 3) {
                Write-Host " ⚠ $ChunkPts points (sparse)" -ForegroundColor Yellow
            } else {
                Write-Host " ✓ $ChunkPts points" -ForegroundColor Green
            }

        } catch {
            Write-Host " ✗ $_" -ForegroundColor Red
            $FailedChunks++
        }
    }

    $Elapsed = (Get-Date) - $StartTime

    if ($ChunkResults.Count -eq 0) {
        Write-Host " └─ ✗ All $ChunkCount chunks failed" -ForegroundColor Red
        return @{ Success = $false; DocId = $ThisDocId; Error = "All $ChunkCount chunks failed" }
    }

    if ($FailedChunks -gt 0) {
        Write-Host " │ ⚠ $FailedChunks/$ChunkCount chunks failed (proceeding with $($ChunkResults.Count) successful)" -ForegroundColor Yellow
    }

    # -- Context-rot: aggregated per-chunk RAG + extraction metrics -----------
    if ($ChunkRAGMetrics.Count -gt 0) {
        $TotalIn = 0; $TotalOut = 0; $TotalForced = 0
        $BeliefsSum = 0; $DesiresSum = 0; $IntentionsSum = 0
        $MinNodes = [int]::MaxValue; $MaxNodes = 0
        foreach ($rm in $ChunkRAGMetrics) {
            $TotalIn += $rm.in_count; $TotalOut += $rm.out_count
            $TotalForced += ($rm.flags.below_threshold_forced ?? 0)
            $BeliefsSum += ($rm.flags.beliefs_selected ?? 0)
            $DesiresSum += ($rm.flags.desires_selected ?? 0)
            $IntentionsSum += ($rm.flags.intentions_selected ?? 0)
            if ($rm.out_count -lt $MinNodes) { $MinNodes = [int]$rm.out_count }
            if ($rm.out_count -gt $MaxNodes) { $MaxNodes = [int]$rm.out_count }
        }
        $script:ContextRotStages += @(New-ContextRotStage `
            -Stage 'rag_filtering' -InUnits 'nodes' -InCount ([int]($TotalIn / $ChunkRAGMetrics.Count)) `
            -OutUnits 'nodes' -OutCount ([int]($TotalOut / $ChunkRAGMetrics.Count)) `
            -Flags @{
                chunk_count            = $ChunkRAGMetrics.Count
                avg_nodes_selected     = [Math]::Round($TotalOut / $ChunkRAGMetrics.Count, 0)
                min_nodes_selected     = $MinNodes
                max_nodes_selected     = $MaxNodes
                total_below_threshold  = $TotalForced
                avg_beliefs            = [Math]::Round($BeliefsSum / $ChunkRAGMetrics.Count, 0)
                avg_desires            = [Math]::Round($DesiresSum / $ChunkRAGMetrics.Count, 0)
                avg_intentions         = [Math]::Round($IntentionsSum / $ChunkRAGMetrics.Count, 0)
            })
    }
    if ($ChunkExtractionStats.TotalPoints -gt 0 -or $ChunkExtractionStats.PromptChars -gt 0) {
        $TotalItems = $ChunkExtractionStats.TotalPoints + $ChunkExtractionStats.FactualClaims + $ChunkExtractionStats.UnmappedConcepts
        $NullRate = if ($ChunkExtractionStats.TotalPoints -gt 0) {
            [Math]::Round($ChunkExtractionStats.NullNodes / $ChunkExtractionStats.TotalPoints, 4)
        } else { 0 }
        $script:ContextRotStages += @(New-ContextRotStage `
            -Stage 'extraction' -InUnits 'prompt_chars' -InCount $ChunkExtractionStats.PromptChars `
            -OutUnits 'items' -OutCount $TotalItems `
            -Flags @{
                null_node_rate    = $NullRate
                total_points      = $ChunkExtractionStats.TotalPoints
                factual_claims    = $ChunkExtractionStats.FactualClaims
                unmapped_concepts = $ChunkExtractionStats.UnmappedConcepts
                chunk_count       = $ChunkResults.Count
            })
    }

    # -- Merge chunk results --------------------------------------------------
    Write-Host " │ merging $($ChunkResults.Count) chunk results..." -ForegroundColor Cyan
    $MergedObject = Merge-ChunkSummaries -ChunkResults @($ChunkResults)

    # Capture context-rot merge metrics before stripping the internal field
    if ($MergedObject['_merge_metrics']) {
        $script:ContextRotStages += @($MergedObject['_merge_metrics'])
        $MergedObject.Remove('_merge_metrics')
    }

    # Convert ordered hashtable to PSCustomObject for consistent downstream handling
    $SummaryObject = [PSCustomObject]$MergedObject

    Write-Host " │ ✓ Merged ($([int]$Elapsed.TotalSeconds)s total, $ChunkCount chunks)" -ForegroundColor Green

    # -- Density check on merged result (warn only, no retry for chunked) ----
    # Scale floors by merge discount — chunked extraction produces less per-unit
    # than single-pass because each chunk has limited cross-chunk context
    $WordCount = ($SnapshotText -split '\s+').Count
    $DensityFloors = Get-DensityFloors -WordCount $WordCount
    $MergeDiscount = 1 / [Math]::Sqrt($ChunkCount)
    $DensityFloors.KpMin = [Math]::Max(3, [int]($DensityFloors.KpMin * $MergeDiscount))
    $DensityFloors.UcMin = [Math]::Max(1, [int]($DensityFloors.UcMin * $MergeDiscount))
    $DensityFloors.TotalFloor = [Math]::Max(6, [int]($DensityFloors.TotalFloor * $MergeDiscount))
    $DensityCheck = Test-SummaryDensity -SummaryObject $SummaryObject -Floors $DensityFloors
    if (-not $DensityCheck.Pass) {
        Write-Host " │ ⚠ Merged density below floor: $($DensityCheck.Shortfalls -join '; ')" -ForegroundColor Yellow
    }

    return Finalize-Summary -SummaryObject $SummaryObject -ThisDocId $ThisDocId `
        -TaxonomyVersion $TaxonomyVersion -Model $Model -Temperature $Temperature `
        -Now $Now -SummariesDir $SummariesDir -Doc $Doc -Elapsed $Elapsed -ChunkCount $ChunkCount `
        -TaxonomyJson $TaxonomyJson
}

# ============================================================================
# SHARED HELPERS
# ============================================================================

function Get-DensityFloors {
    param([int]$WordCount)

    $kpTarget = [Math]::Max(3,  [int]($WordCount / 500))
    return @{
        KpMin      = $kpTarget
        UcMin      = [Math]::Max(2,  [int]($WordCount / 2000))
        TotalFloor = [Math]::Max(6,  $kpTarget * 2)
    }
}

function Test-SummaryDensity {
    param(
        [object]$SummaryObject,
        [hashtable]$Floors
    )

    $Camps = @('accelerationist','safetyist','skeptic')
    $Shortfalls = [System.Collections.Generic.List[string]]::new()

    # Collect empty_cells declared by the model (licensed emptiness per REC-1)
    $EmptyCellSet = [System.Collections.Generic.HashSet[string]]::new()
    if ($SummaryObject.PSObject.Properties['empty_cells'] -and $SummaryObject.empty_cells) {
        foreach ($ec in @($SummaryObject.empty_cells)) {
            if ($ec.PSObject.Properties['camp'] -and $ec.PSObject.Properties['category']) {
                [void]$EmptyCellSet.Add("$($ec.camp)|$($ec.category)")
            }
        }
    }

    # Total key_points across all camps (replaces per-camp floors)
    $TotalKp = 0
    foreach ($Camp in $Camps) {
        $CampData = $SummaryObject.pov_summaries.$Camp
        $Count = 0
        if ($CampData -and $CampData.PSObject.Properties['key_points'] -and $CampData.key_points) {
            $Count = @($CampData.key_points).Count
        }
        $TotalKp += $Count

        # Flag camps with zero points and no empty_cell declaration
        if ($Count -eq 0) {
            $Categories = @('Desires','Beliefs','Intentions')
            $AllDeclared = $true
            foreach ($cat in $Categories) {
                if (-not $EmptyCellSet.Contains("$Camp|$cat")) {
                    $AllDeclared = $false
                    break
                }
            }
            if (-not $AllDeclared) {
                $null = $Shortfalls.Add("$Camp has 0 key_points without empty_cells declarations")
            }
        }
    }

    $TotalFloor = if ($Floors.ContainsKey('TotalFloor')) { $Floors.TotalFloor } else { 6 }
    if ($TotalKp -lt $TotalFloor) {
        $null = $Shortfalls.Add("total key_points: $TotalKp < $TotalFloor across all camps")
    }

    $UcCount = 0
    if ($SummaryObject.PSObject.Properties['unmapped_concepts'] -and $SummaryObject.unmapped_concepts) {
        $UcCount = @($SummaryObject.unmapped_concepts).Count
    }
    if ($UcCount -lt $Floors.UcMin) {
        $null = $Shortfalls.Add("unmapped_concepts: $UcCount < $($Floors.UcMin) min")
    }

    return @{
        Pass       = ($Shortfalls.Count -eq 0)
        Shortfalls = @($Shortfalls)
    }
}

function Build-DensityRetryNudge {
    param([string[]]$Shortfalls)

    $Lines = @(
        "IMPORTANT: Your previous response did not meet extraction effort requirements."
        "Specific shortfalls:"
    )
    foreach ($s in $Shortfalls) {
        $Lines += " - $s"
    }
    $Lines += @(
        ""
        "Go back through the document and extract MORE points. Examine ALL nine BDI"
        "cells (3 POV camps x 3 categories). For each cell, extract every distinct"
        "point the document actually contains. If a cell is genuinely empty, declare"
        "it in the empty_cells array with a reason. Every distinct claim, argument,"
        "or piece of evidence should be its own key_point or factual_claim."
        "Include a canonical_proposition for every key_point."
    )
    return ($Lines -join "`n")
}

function Build-DensityScaledPrompt {
    param(
        [int]$WordCount,
        [string]$Template
    )

    $Floors = Get-DensityFloors -WordCount $WordCount
    $kpMin = $Floors.KpMin
    $kpMax = [Math]::Max(8,  [int]($WordCount / 200))
    $ucMin = $Floors.UcMin
    $ucMax = [Math]::Max(5,  [int]($WordCount / 800))
    $totalFloor = $Floors.TotalFloor

    Write-Host " │ ~$WordCount words → key_points $kpMin-$kpMax/camp (target), total floor $totalFloor, unmapped $ucMin-$ucMax" -ForegroundColor Gray

    return $Template `
        -replace '{{WORD_COUNT}}',   $WordCount `
        -replace '{{KP_MIN}}',       $kpMin `
        -replace '{{KP_MAX}}',       $kpMax `
        -replace '{{UC_MIN}}',       $ucMin `
        -replace '{{UC_MAX}}',       $ucMax `
        -replace '{{TOTAL_FLOOR}}',  $totalFloor
}

function Build-CompactTaxonomy {
    <#
    .SYNOPSIS
        Builds a compact taxonomy context (~5-10K tokens) for prompt injection
        when full RAG embedding is unavailable.
    .DESCRIPTION
        Loads the four taxonomy files and emits only id, category, and label per node.
        This is ~95% smaller than the full taxonomy JSON while still providing
        enough context for the LLM to map claims to node IDs.
    #>

    $TaxDir = Get-TaxonomyDir
    $Lines = [System.Text.StringBuilder]::new()
    [void]$Lines.AppendLine("=== COMPACT TAXONOMY (id | category | label — full descriptions omitted for brevity) ===")
    [void]$Lines.AppendLine("")

    $TaxFiles = [ordered]@{
        'accelerationist.json' = 'Accelerationist'
        'safetyist.json'       = 'Safetyist'
        'skeptic.json'         = 'Skeptic'
        'situations.json'      = 'Situations'
    }

    foreach ($FileName in $TaxFiles.Keys) {
        $FilePath = Join-Path $TaxDir $FileName
        if (-not (Test-Path $FilePath)) { continue }
        $Data = Get-Content $FilePath -Raw | ConvertFrom-Json
        [void]$Lines.AppendLine("--- $($TaxFiles[$FileName]) ---")
        foreach ($Node in $Data.nodes) {
            $Cat = if ($null -ne $Node.PSObject.Properties['category'] -and $Node.category) { "[$($Node.category)]" } else { '' }
            [void]$Lines.AppendLine(" $($Node.id) $Cat $($Node.label)")
        }
        [void]$Lines.AppendLine("")
    }

    Write-Verbose "Pipeline: compact taxonomy built (~$([int]($Lines.Length / 4)) tokens est.)"
    return $Lines.ToString()
}

function Build-DocHeader {
    param(
        [hashtable]$Doc,
        [object]$Meta,
        [string]$ThisDocId
    )

    if ($Meta.title) { $Title = $Meta.title } else { $Title = $ThisDocId }
    $PovTags  = $Doc.PovTags -join ', '
    if ($null -ne $Meta.PSObject.Properties['topic_tags'] -and $Meta.topic_tags) { $TopicTags = $Meta.topic_tags -join ', ' } else { $TopicTags = '(none)' }

    return @"
=== DOCUMENT: $ThisDocId ===
Title: $Title
POV tags (pre-classified): $PovTags
Topic tags: $TopicTags
"@

}

function Parse-AIResponse {
    param(
        [string]$RawText,
        [string]$ThisDocId,
        [string]$SummariesDir
    )

    $CleanText = $RawText -replace '(?s)^```json\s*', '' -replace '(?s)\s*```$', ''
    $CleanText = $CleanText.Trim()

    try {
        return ($CleanText | ConvertFrom-Json)
    } catch {
        Write-Host " │ ⚠ JSON parse failed — attempting repair" -ForegroundColor Yellow
        $Repaired = Repair-TruncatedJson -Text $RawText
        if ($Repaired) {
            try {
                return ($Repaired | ConvertFrom-Json)
            } catch {
                # fall through
            }
        }
        $DebugPath = Join-Path $SummariesDir "${ThisDocId}.debug-raw.txt"
        Write-Utf8NoBom -Path $DebugPath -Value $RawText 
        Write-Host " │ ✗ Invalid JSON. Raw saved: $DebugPath" -ForegroundColor Red
        return $null
    }
}

$script:ValidStances = @('strongly_aligned','aligned','neutral','opposed','strongly_opposed','not_applicable')

$script:StanceKeywordMap = @{
    strongly_aligned = @('strongly_align','strongly_support','fully_align','fully_support',
                         'enthusiastic','wholehearted','completely_agree','fully_endorse',
                         'strongly agree','fully agree','totally agree','completely support')
    aligned          = @('align','support','agree','endorse','advocate','favor','embrace',
                         'concur','approve','accept','positive','promote','back','champion',
                         'sympathetic','pro','affirmative','encourage')
    opposed          = @('oppose','disagree','reject','dispute','challenge','contest','resist',
                         'deny','refute','counter','critical','against','negative','skeptical',
                         'pushback','push back','object','sucks','bad','wrong','flawed','harmful')
    strongly_opposed = @('strongly_oppose','strongly_disagree','vehemently','adamant',
                         'completely_reject','fundamentally_oppose','categorically',
                         'strongly reject','totally reject','completely disagree','dangerous',
                         'catastrophic','existential threat','reckless','unconscionable')
    not_applicable   = @('not_applicable','n/a','irrelevant','no_stance','no stance','unrelated',
                         'does not address','not relevant','outside scope')
}

function Resolve-Stance {
    param([string]$Raw)

    if ([string]::IsNullOrWhiteSpace($Raw)) { return 'neutral' }

    $Lower = $Raw.Trim().ToLowerInvariant() -replace '[_\-]',' '

    if ($Lower -in $script:ValidStances) { return $Lower }
    $Normalized = $Lower -replace '\s+','_'
    if ($Normalized -in $script:ValidStances) { return $Normalized }

    foreach ($Stance in 'strongly_opposed','strongly_aligned','opposed','aligned','not_applicable') {
        foreach ($Kw in $script:StanceKeywordMap[$Stance]) {
            $KwNorm = $Kw -replace '[_\-]',' '
            if ($Lower -match [regex]::Escape($KwNorm)) { return $Stance }
        }
    }

    return 'neutral'
}

function Finalize-Summary {
    param(
        [object]$SummaryObject,
        [string]$ThisDocId,
        [string]$TaxonomyVersion,
        [string]$Model,
        [double]$Temperature,
        [string]$Now,
        [string]$SummariesDir,
        [hashtable]$Doc,
        [TimeSpan]$Elapsed,
        [int]$ChunkCount = 0,
        [string]$TaxonomyJson = '',
        [object]$FireStats = $null
    )

    # -- Schema validation (Gap 3.4) --------------------------------------------
    # Use a helper that works on both PSCustomObject and OrderedDictionary
    # (Merge-ChunkSummaries returns OrderedDictionary, single-call returns PSCustomObject)
    function Has-Field($Obj, [string]$Name) {
        if ($Obj -is [System.Collections.IDictionary]) { return $Obj.Contains($Name) }
        if ($Obj.PSObject.Properties[$Name]) { return $true }
        return $false
    }
    function Get-Field($Obj, [string]$Name) {
        if ($Obj -is [System.Collections.IDictionary]) { return $Obj[$Name] }
        return $Obj.$Name
    }

    $Camps        = @('accelerationist','safetyist','skeptic')
    $SchemaErrors = [System.Collections.Generic.List[string]]::new()

    if (-not (Has-Field $SummaryObject 'pov_summaries')) {
        $SchemaErrors.Add("Missing required field: pov_summaries")
    } else {
        $PovSums = Get-Field $SummaryObject 'pov_summaries'
        foreach ($Camp in $Camps) {
            $CampData = Get-Field $PovSums $Camp
            if ($null -eq $CampData) {
                $SchemaErrors.Add("Missing camp in pov_summaries: $Camp")
            } else {
                $KP = Get-Field $CampData 'key_points'
                if ($null -eq $KP) {
                    $SchemaErrors.Add("pov_summaries.$Camp.key_points missing or not an array")
                } else {
                    # Force-array: ConvertFrom-Json unwraps single-element arrays to scalars
                    $KP = @($KP)
                    if ($CampData -is [System.Collections.IDictionary]) { $CampData['key_points'] = $KP }
                    else { $CampData.key_points = $KP }
                }
            }
        }
    }
    if (-not (Has-Field $SummaryObject 'factual_claims')) {
        $SchemaErrors.Add("Missing required field: factual_claims")
    } else {
        $FC = Get-Field $SummaryObject 'factual_claims'
        # Force-array: ConvertFrom-Json unwraps single-element arrays to scalars
        if ($null -ne $FC) {
            $FC = @($FC)
            # Write back the forced array so downstream code sees an array
            if ($SummaryObject -is [System.Collections.IDictionary]) { $SummaryObject['factual_claims'] = $FC }
            else { $SummaryObject.factual_claims = $FC }
        } else {
            $SchemaErrors.Add("Missing required field: factual_claims")
        }
    }
    if (-not (Has-Field $SummaryObject 'unmapped_concepts')) {
        # Not an error — field may be absent if all concepts were resolved. Default to empty array.
        if ($SummaryObject -is [System.Collections.IDictionary]) { $SummaryObject['unmapped_concepts'] = @() }
        else { $SummaryObject | Add-Member -NotePropertyName 'unmapped_concepts' -NotePropertyValue @() -Force }
    } else {
        $UC = Get-Field $SummaryObject 'unmapped_concepts'
        # Force-array: ConvertFrom-Json unwraps single-element arrays to scalars; null → empty
        $UC = if ($null -ne $UC) { @($UC) } else { @() }
        if ($SummaryObject -is [System.Collections.IDictionary]) { $SummaryObject['unmapped_concepts'] = $UC }
        else { $SummaryObject.unmapped_concepts = $UC }
    }

    foreach ($Err in $SchemaErrors) {
        Write-Host " │ ✗ Schema: $Err" -ForegroundColor Red
    }

    if ($SchemaErrors.Count -gt 0 -and -not (Has-Field $SummaryObject 'pov_summaries')) {
        return @{ Success = $false; DocId = $ThisDocId; Error = "Schema validation failed: $($SchemaErrors -join '; ')" }
    }

    # -- Validate stance values and gather counts --------------------------------
    $TotalPoints  = 0
    $NullNodes    = 0

    foreach ($Camp in $Camps) {
        $CampData = $SummaryObject.pov_summaries.$Camp
        if ($CampData) {
            if ($CampData.key_points) {
                foreach ($kp in $CampData.key_points) {
                    if ($kp.stance -notin $script:ValidStances) { $kp.stance = Resolve-Stance $kp.stance }
                }
                $TotalPoints += @($CampData.key_points).Count
                $NullNodes   += @($CampData.key_points | Where-Object { $null -eq $_.taxonomy_node_id }).Count
            }
        }
    }

    # -- Validate taxonomy_node_id existence (Gap 3.1) ---------------------------
    $InvalidNodeIds = [System.Collections.Generic.List[string]]::new()
    $NodeIdSet = Get-TaxonomyNodeIdSet
    if ($null -ne $NodeIdSet -and $NodeIdSet.Count -gt 0) {
        foreach ($Camp in $Camps) {
            $CampData = $SummaryObject.pov_summaries.$Camp
            if (-not $CampData -or -not $CampData.key_points) { continue }
            foreach ($kp in $CampData.key_points) {
                if (-not $kp.taxonomy_node_id) { continue }
                if (-not $NodeIdSet.Contains($kp.taxonomy_node_id)) {
                    $InvalidNodeIds.Add("$Camp/$($kp.taxonomy_node_id)")
                    $BadId = $kp.taxonomy_node_id
                    $kp.taxonomy_node_id = $null
                    $NullNodes++
                    if (-not $SummaryObject.PSObject.Properties['unmapped_concepts']) {
                        $SummaryObject | Add-Member -NotePropertyName 'unmapped_concepts' -NotePropertyValue @() -Force
                    }
                    $SummaryObject.unmapped_concepts = @($SummaryObject.unmapped_concepts) + @(
                        [PSCustomObject]@{
                            suggested_label = $BadId
                            concept         = $kp.point
                            reason          = "Hallucinated node ID '$BadId' not found in taxonomy — moved from $Camp key_points"
                        }
                    )
                }
            }
        }
        if ($InvalidNodeIds.Count -gt 0) {
            Write-Host " │ ⚠ Hallucinated node IDs moved to unmapped: $($InvalidNodeIds -join ', ')" -ForegroundColor Yellow
        }
    }

    # -- Cross-camp consistency check (Gap 3.3) --------------------------------
    $NodeStanceMap = @{}
    $CrossCampWarnings = [System.Collections.Generic.List[string]]::new()
    foreach ($Camp in $Camps) {
        $CampData = $SummaryObject.pov_summaries.$Camp
        if (-not $CampData -or -not $CampData.key_points) { continue }
        foreach ($kp in $CampData.key_points) {
            if (-not $kp.taxonomy_node_id) { continue }
            $Key = "$($kp.taxonomy_node_id)|$($kp.stance)"
            if ($NodeStanceMap.ContainsKey($Key)) {
                $PriorCamp = $NodeStanceMap[$Key]
                if ($PriorCamp -ne $Camp) {
                    $CrossCampWarnings.Add("Cross-camp agreement: $PriorCamp + $Camp both '$($kp.stance)' on $($kp.taxonomy_node_id)")
                }
            } else {
                $NodeStanceMap[$Key] = $Camp
            }
        }
    }
    foreach ($Warn in $CrossCampWarnings) {
        Write-Host " │ ⚠ $Warn" -ForegroundColor Yellow
    }

    $SoProps = $SummaryObject.PSObject.Properties
    if ($SoProps['factual_claims'] -and $null -ne $SummaryObject.factual_claims) { $FactualClaims = $SummaryObject.factual_claims } else { $FactualClaims = @() }
    if ($SoProps['unmapped_concepts'] -and $null -ne $SummaryObject.unmapped_concepts) { $UnmappedConcs = $SummaryObject.unmapped_concepts } else { $UnmappedConcs = @() }
    $FactualCount   = @($FactualClaims).Count
    $UnmappedCount  = @($UnmappedConcs).Count

    if ($ChunkCount -gt 0) { $ChunkLabel = " ($ChunkCount chunks)" } else { $ChunkLabel = '' }
    Write-Host " │ points: $TotalPoints ($NullNodes unmapped) factual: $FactualCount new_concepts: $UnmappedCount$ChunkLabel" -ForegroundColor Gray

    # -- Cross-POV fuzzy match on unmapped concepts ----------------------------
    if ($UnmappedCount -gt 0) {
        try {
            $Resolution = Resolve-UnmappedConcepts -UnmappedConcepts @($SummaryObject.unmapped_concepts)
            if (@($Resolution.Resolved).Count -gt 0) {
                foreach ($R in @($Resolution.Resolved)) {
                    Write-Host " │ ✔ Resolved: '$($R.ConceptLabel)' → $($R.MatchedNodeId) (score $($R.Score))" -ForegroundColor Green
                }
                $UnmappedConcs = @($Resolution.Remaining)
                if ($SoProps['unmapped_concepts']) {
                    $SummaryObject.unmapped_concepts = $UnmappedConcs
                } else {
                    $SummaryObject | Add-Member -NotePropertyName 'unmapped_concepts' -NotePropertyValue $UnmappedConcs -Force
                }
                $UnmappedCount = $UnmappedConcs.Count
            }
        }
        catch {
            Write-Host " │ ⚠ Unmapped concept resolution failed: $($_.Exception.Message)" -ForegroundColor Yellow
        }
    }

    # -- Write summaries/<doc-id>.json ----------------------------------------
    # Detect RAG vs full taxonomy from the context format
    $IsRagFiltered = $TaxonomyJson -match '^\s*=== RELEVANT TAXONOMY NODES'
    if ($IsRagFiltered) {
        $EstNodeCount = ([regex]::Matches($TaxonomyJson, '^\s{2}\w', [System.Text.RegularExpressions.RegexOptions]::Multiline)).Count
    }
    else {
        $EstNodeCount = ([regex]::Matches($TaxonomyJson, '"id"\s*:')).Count
    }

    if ($ChunkCount -gt 0) { $TaxFilter = 'rag_per_chunk' } elseif ($IsRagFiltered) { $TaxFilter = 'rag' } else { $TaxFilter = 'full' }

    $UsedFire = $null -ne $FireStats
    if ($UsedFire) { $ExtractionMode = 'fire' } else { $ExtractionMode = 'single_shot' }
    $ModelInfo = [ordered]@{
        model             = $Model
        temperature       = $Temperature
        max_tokens        = 32768
        extraction_mode   = $ExtractionMode
        taxonomy_filter   = $TaxFilter
        taxonomy_nodes    = $EstNodeCount
    }

    if ($UsedFire) {
        $ModelInfo['fire_confidence_threshold'] = 0.7
        $ModelInfo['fire_stats'] = [ordered]@{
            api_calls          = $FireStats.total_api_calls
            iterations         = $FireStats.total_iterations
            claims_total       = $FireStats.claims_total
            claims_confident   = $FireStats.claims_confident
            claims_iterated    = $FireStats.claims_iterated
            elapsed_seconds    = $FireStats.elapsed_seconds
            termination_reason = $FireStats.termination_reason
        }
    }

    if ($ChunkCount -gt 0) {
        $ModelInfo['chunked']     = $true
        $ModelInfo['chunk_count'] = $ChunkCount
    }

    if ($SoProps['pov_summaries']) { $PovSummariesVal = $SummaryObject.pov_summaries } else { $PovSummariesVal = [ordered]@{} }
    # -- Build context-rot metrics from stages collected during processing ----
    $ContextRotStages = @($script:ContextRotStages)
    $ContextRotObj = if ($ContextRotStages.Count -gt 0) {
        New-ContextRotMetrics -Pipeline 'summary' -DocId $ThisDocId -Stages $ContextRotStages
    } else { $null }

    $FinalSummary = [ordered]@{
        doc_id            = $ThisDocId
        taxonomy_version  = $TaxonomyVersion
        generated_at      = $Now
        model_info        = $ModelInfo
        context_rot       = $ContextRotObj
        pov_summaries     = $PovSummariesVal
        factual_claims    = @($FactualClaims)
        unmapped_concepts = @($UnmappedConcs)
    }
    $AllWarnings = [System.Collections.Generic.List[string]]::new()
    foreach ($e in $SchemaErrors)       { $AllWarnings.Add("Schema: $e") }
    foreach ($n in $InvalidNodeIds)     { $AllWarnings.Add("Hallucinated node ID: $n") }
    foreach ($w in $CrossCampWarnings)  { $AllWarnings.Add($w) }
    if ($AllWarnings.Count -gt 0) {
        $FinalSummary['warnings'] = @($AllWarnings)
    }

    $SummaryPath = Join-Path $SummariesDir "${ThisDocId}.json"
    try {
        Write-Utf8NoBom -Path $SummaryPath -Value ($FinalSummary | ConvertTo-Json -Depth 20) 
    }
    catch {
        Write-Host " └─ ✗ Failed to write summary: $($_.Exception.Message)" -ForegroundColor Red
        return @{ Success = $false; DocId = $ThisDocId; Error = "Failed to write summary file: $($_.Exception.Message)" }
    }

    # -- Update metadata.json -------------------------------------------------
    try {
        $MetaRaw     = Get-Content $Doc.MetaFile -Raw
        $MetaUpdated = $MetaRaw | ConvertFrom-Json -AsHashtable
        $MetaUpdated['summary_version'] = $TaxonomyVersion
        $MetaUpdated['summary_status']  = 'current'
        $MetaUpdated['summary_updated'] = $Now

        # Summary statistics for Source objects
        $claimsByPov = @{ accelerationist = 0; safetyist = 0; skeptic = 0; situations = 0 }
        foreach ($claim in @($FactualClaims)) {
            if (-not $claim.PSObject.Properties['linked_taxonomy_nodes']) { continue }
            foreach ($nodeId in @($claim.linked_taxonomy_nodes)) {
                if     ($nodeId -like 'acc-*') { $claimsByPov['accelerationist']++ }
                elseif ($nodeId -like 'saf-*') { $claimsByPov['safetyist']++ }
                elseif ($nodeId -like 'skp-*') { $claimsByPov['skeptic']++ }
                elseif ($nodeId -like 'sit-*') { $claimsByPov['situations']++ }
            }
        }
        $MetaUpdated['total_claims']      = $FactualCount
        $MetaUpdated['claims_by_pov']     = $claimsByPov
        $MetaUpdated['total_facts']       = $TotalPoints
        $MetaUpdated['unmapped_concepts'] = $UnmappedCount
        if ($ContextRotObj) {
            $WorstStage = $ContextRotStages | Sort-Object { $_.ratio } | Select-Object -First 1
            $MetaUpdated['context_rot'] = [ordered]@{
                cumulative_retention = $ContextRotObj.cumulative_retention
                worst_stage          = $WorstStage.stage
                worst_ratio          = $WorstStage.ratio
            }
        }

        Write-Utf8NoBom -Path $Doc.MetaFile -Value ($MetaUpdated | ConvertTo-Json -Depth 10) 
    }
    catch {
        Write-Host " │ ⚠ Summary written but metadata update failed: $($_.Exception.Message)" -ForegroundColor Yellow
    }

    Write-Host " └─ ✓ Done: summaries/$ThisDocId.json" -ForegroundColor Green

    return @{
        Success       = $true
        DocId         = $ThisDocId
        TotalPoints   = $TotalPoints
        NullNodes     = $NullNodes
        FactualCount  = $FactualCount
        UnmappedCount = $UnmappedCount
        ElapsedSecs   = [int]$Elapsed.TotalSeconds
        ChunkCount    = $ChunkCount
    }
}