Private/Invoke-DocumentSummary.ps1

# Copyright (c) 2026 Jeffrey Snover. All rights reserved.
# Licensed under the MIT License. See LICENSE file in the project root.

# Per-document AI summarization worker.
# Small documents (<= 20K tokens) use a single LLM call.
# Large documents are split into chunks, processed in parallel, and merged.

function Invoke-DocumentSummary {
    [CmdletBinding()]
    param(
        [Parameter(Mandatory)][hashtable]$Doc,
        [Parameter(Mandatory)][string]$ApiKey,
        [Parameter(Mandatory)][string]$Model,
        [Parameter(Mandatory)][double]$Temperature,
        [Parameter(Mandatory)][string]$TaxonomyVersion,
        [Parameter(Mandatory)][string]$TaxonomyJson,
        [Parameter(Mandatory)][string]$SystemPromptTemplate,
        [string]$ChunkSystemPromptTemplate = '',
        [Parameter(Mandatory)][string]$OutputSchema,
        [Parameter(Mandatory)][string]$SummariesDir,
        [Parameter(Mandatory)][string]$Now
    )

    Set-StrictMode -Version Latest

    $ThisDocId = $Doc.DocId
    $Meta      = $Doc.Meta
    $ChunkThresholdTokens = 20000   # Documents above this get chunked

    Write-Host "`n `u{250C}`u{2500} $ThisDocId" -ForegroundColor White
    Write-Host " `u{2502} pov: $($Doc.PovTags -join ', ') | model: $Model" -ForegroundColor Gray

    # -- Load snapshot --------------------------------------------------------
    $SnapshotText = Get-Content $Doc.SnapshotFile -Raw
    if ([string]::IsNullOrWhiteSpace($SnapshotText)) {
        Write-Host " `u{2514}`u{2500} SKIP $ThisDocId `u{2014} snapshot.md is empty" -ForegroundColor Yellow
        return @{ Success = $false; DocId = $ThisDocId; Error = 'EmptySnapshot' }
    }
    $EstimatedTokens = [int]($SnapshotText.Length / 4)
    Write-Host " `u{2502} snapshot: $($SnapshotText.Length) chars (~$EstimatedTokens tokens est.)" -ForegroundColor Gray

    # -- Decide: single-call or chunked pipeline ------------------------------
    if ($EstimatedTokens -gt $ChunkThresholdTokens) {
        Write-Host " `u{2502} `u{2728} Large document — using chunked pipeline" -ForegroundColor Cyan
        return Invoke-ChunkedSummary @PSBoundParameters
    }

    # ========================================================================
    # SINGLE-CALL PATH (small documents)
    # ========================================================================

    # -- Build density-scaled system prompt -----------------------------------
    $WordCount = ($SnapshotText -split '\s+').Count
    $DensityFloors = Get-DensityFloors -WordCount $WordCount
    $SystemPrompt = Build-DensityScaledPrompt -WordCount $WordCount -Template $SystemPromptTemplate

    # -- Build prompt ---------------------------------------------------------
    $DocHeader = Build-DocHeader -Doc $Doc -Meta $Meta -ThisDocId $ThisDocId
    $FullPrompt = @"
$SystemPrompt

=== TAXONOMY (version $TaxonomyVersion) ===
$TaxonomyJson

=== OUTPUT SCHEMA (your response must match this structure) ===
$OutputSchema

$DocHeader

--- DOCUMENT CONTENT ---
$SnapshotText
"@


    # -- Call AI API (with density validation + retry) -------------------------
    $MaxDensityRetries = 1
    $StartTime = Get-Date
    $SummaryObject = $null

    for ($Attempt = 0; $Attempt -le $MaxDensityRetries; $Attempt++) {
        $AttemptPrompt = $FullPrompt
        if ($Attempt -gt 0) {
            Write-Host " `u{2502} `u{21BB} Retry $Attempt/$MaxDensityRetries — density too low" -ForegroundColor Yellow
            $AttemptPrompt = $FullPrompt + "`n`n" + $DensityRetryNudge
        }

        $AIResult = Invoke-AIApi `
            -Prompt     $AttemptPrompt `
            -Model      $Model `
            -ApiKey     $ApiKey `
            -Temperature $Temperature `
            -MaxTokens  65536 `
            -JsonMode `
            -TimeoutSec 300 `
            -MaxRetries 3 `
            -RetryDelays @(5, 15, 45)

        if ($null -eq $AIResult) {
            Write-Host " `u{2514}`u{2500} `u{2717} FAILED: $ThisDocId" -ForegroundColor Red
            return @{ Success = $false; DocId = $ThisDocId; Error = 'API call returned null' }
        }

        $Elapsed = (Get-Date) - $StartTime
        Write-Host " `u{2502} `u{2713} Response ($($AIResult.Backend)): $([int]$Elapsed.TotalSeconds)s" -ForegroundColor Green

        $SummaryObject = Parse-AIResponse -RawText $AIResult.Text -ThisDocId $ThisDocId -SummariesDir $SummariesDir
        if ($null -eq $SummaryObject) {
            return @{ Success = $false; DocId = $ThisDocId; Error = 'InvalidJson' }
        }

        $DensityCheck = Test-SummaryDensity -SummaryObject $SummaryObject -Floors $DensityFloors
        if ($DensityCheck.Pass) {
            break
        }

        # Build a nudge message for the retry with specific shortfalls
        $DensityRetryNudge = Build-DensityRetryNudge -Shortfalls $DensityCheck.Shortfalls
        Write-Host " `u{2502} `u{26A0} Density check FAILED: $($DensityCheck.Shortfalls -join '; ')" -ForegroundColor Yellow

        if ($Attempt -eq $MaxDensityRetries) {
            Write-Host " `u{2502} `u{26A0} Accepting under-dense result after $($Attempt + 1) attempt(s)" -ForegroundColor Yellow
        }
    }

    return Finalize-Summary -SummaryObject $SummaryObject -ThisDocId $ThisDocId `
        -TaxonomyVersion $TaxonomyVersion -Model $Model -Temperature $Temperature `
        -Now $Now -SummariesDir $SummariesDir -Doc $Doc -Elapsed $Elapsed
}

# ============================================================================
# CHUNKED PIPELINE (large documents)
# ============================================================================

function Invoke-ChunkedSummary {
    [CmdletBinding()]
    param(
        [Parameter(Mandatory)][hashtable]$Doc,
        [Parameter(Mandatory)][string]$ApiKey,
        [Parameter(Mandatory)][string]$Model,
        [Parameter(Mandatory)][double]$Temperature,
        [Parameter(Mandatory)][string]$TaxonomyVersion,
        [Parameter(Mandatory)][string]$TaxonomyJson,
        [Parameter(Mandatory)][string]$SystemPromptTemplate,
        [string]$ChunkSystemPromptTemplate = '',
        [Parameter(Mandatory)][string]$OutputSchema,
        [Parameter(Mandatory)][string]$SummariesDir,
        [Parameter(Mandatory)][string]$Now
    )

    $ThisDocId = $Doc.DocId
    $Meta      = $Doc.Meta
    $SnapshotText = Get-Content $Doc.SnapshotFile -Raw

    # -- Split into chunks ----------------------------------------------------
    $Chunks = @(Split-DocumentChunks -Text $SnapshotText -MaxChunkTokens 15000 -MinChunkTokens 2000)
    $ChunkCount = $Chunks.Count
    Write-Host " `u{2502} split into $ChunkCount chunks" -ForegroundColor Cyan

    # -- Load chunk-specific system prompt ------------------------------------
    $ChunkSystemPrompt = if ($ChunkSystemPromptTemplate) {
        $ChunkSystemPromptTemplate
    } else {
        Get-Prompt -Name 'pov-summary-chunk-system'
    }
    $DocHeader = Build-DocHeader -Doc $Doc -Meta $Meta -ThisDocId $ThisDocId

    # -- Process each chunk sequentially (API rate limits) --------------------
    $StartTime = Get-Date
    $ChunkResults = [System.Collections.Generic.List[object]]::new()
    $FailedChunks = 0

    for ($i = 0; $i -lt $ChunkCount; $i++) {
        $ChunkNum = $i + 1
        $ChunkText = $Chunks[$i]
        $ChunkTokens = [int]($ChunkText.Length / 4)

        Write-Host " `u{2502} chunk $ChunkNum/$ChunkCount (~$ChunkTokens tokens)..." -ForegroundColor Gray -NoNewline

        $ChunkPrompt = @"
$ChunkSystemPrompt

=== TAXONOMY (version $TaxonomyVersion) ===
$TaxonomyJson

=== OUTPUT SCHEMA (your response must match this structure) ===
$OutputSchema

$DocHeader

--- DOCUMENT SECTION $ChunkNum OF $ChunkCount ---
$ChunkText
"@


        try {
            $AIResult = Invoke-AIApi `
                -Prompt     $ChunkPrompt `
                -Model      $Model `
                -ApiKey     $ApiKey `
                -Temperature $Temperature `
                -MaxTokens  65536 `
                -JsonMode `
                -TimeoutSec 300 `
                -MaxRetries 3 `
                -RetryDelays @(5, 15, 45)

            if ($null -eq $AIResult) {
                Write-Host " `u{2717} null response" -ForegroundColor Red
                $FailedChunks++
                continue
            }

            $ChunkObj = Parse-AIResponse -RawText $AIResult.Text -ThisDocId "$ThisDocId-chunk$ChunkNum" -SummariesDir $SummariesDir
            if ($null -eq $ChunkObj) {
                Write-Host " `u{2717} bad JSON" -ForegroundColor Red
                $FailedChunks++
                continue
            }

            $ChunkResults.Add($ChunkObj)
            $ChunkPts = 0
            foreach ($c in @('accelerationist','safetyist','skeptic')) {
                if ($ChunkObj.pov_summaries.$c -and $ChunkObj.pov_summaries.$c.key_points) {
                    $ChunkPts += @($ChunkObj.pov_summaries.$c.key_points).Count
                }
            }
            Write-Host " `u{2713} $ChunkPts points" -ForegroundColor Green

        } catch {
            Write-Host " `u{2717} $_" -ForegroundColor Red
            $FailedChunks++
        }
    }

    $Elapsed = (Get-Date) - $StartTime

    if ($ChunkResults.Count -eq 0) {
        Write-Host " `u{2514}`u{2500} `u{2717} All $ChunkCount chunks failed" -ForegroundColor Red
        return @{ Success = $false; DocId = $ThisDocId; Error = "All $ChunkCount chunks failed" }
    }

    if ($FailedChunks -gt 0) {
        Write-Host " `u{2502} `u{26A0} $FailedChunks/$ChunkCount chunks failed (proceeding with $($ChunkResults.Count) successful)" -ForegroundColor Yellow
    }

    # -- Merge chunk results --------------------------------------------------
    Write-Host " `u{2502} merging $($ChunkResults.Count) chunk results..." -ForegroundColor Cyan
    $MergedObject = Merge-ChunkSummaries -ChunkResults @($ChunkResults)

    # Convert ordered hashtable to PSCustomObject for consistent downstream handling
    $SummaryObject = [PSCustomObject]$MergedObject

    Write-Host " `u{2502} `u{2713} Merged ($([int]$Elapsed.TotalSeconds)s total, $ChunkCount chunks)" -ForegroundColor Green

    # -- Density check on merged result (warn only, no retry for chunked) ----
    $WordCount = ($SnapshotText -split '\s+').Count
    $DensityFloors = Get-DensityFloors -WordCount $WordCount
    $DensityCheck = Test-SummaryDensity -SummaryObject $SummaryObject -Floors $DensityFloors
    if (-not $DensityCheck.Pass) {
        Write-Host " `u{2502} `u{26A0} Merged density below floor: $($DensityCheck.Shortfalls -join '; ')" -ForegroundColor Yellow
    }

    return Finalize-Summary -SummaryObject $SummaryObject -ThisDocId $ThisDocId `
        -TaxonomyVersion $TaxonomyVersion -Model $Model -Temperature $Temperature `
        -Now $Now -SummariesDir $SummariesDir -Doc $Doc -Elapsed $Elapsed -ChunkCount $ChunkCount
}

# ============================================================================
# SHARED HELPERS
# ============================================================================

function Get-DensityFloors {
    param([int]$WordCount)

    return @{
        KpMin = [Math]::Max(3,  [int]($WordCount / 500))
        FcMin = [Math]::Max(3,  [int]($WordCount / 800))
        UcMin = [Math]::Max(2,  [int]($WordCount / 2000))
    }
}

function Test-SummaryDensity {
    param(
        [object]$SummaryObject,
        [hashtable]$Floors
    )

    $Camps = @('accelerationist','safetyist','skeptic')
    $Shortfalls = [System.Collections.Generic.List[string]]::new()

    foreach ($Camp in $Camps) {
        $CampData = $SummaryObject.pov_summaries.$Camp
        $Count = 0
        if ($CampData -and $CampData.key_points) {
            $Count = @($CampData.key_points).Count
        }
        if ($Count -lt $Floors.KpMin) {
            $null = $Shortfalls.Add("$Camp key_points: $Count < $($Floors.KpMin) min")
        }
    }

    $FcCount = 0
    if ($SummaryObject.factual_claims) {
        $FcCount = @($SummaryObject.factual_claims).Count
    }
    if ($FcCount -lt $Floors.FcMin) {
        $null = $Shortfalls.Add("factual_claims: $FcCount < $($Floors.FcMin) min")
    }

    $UcCount = 0
    if ($SummaryObject.unmapped_concepts) {
        $UcCount = @($SummaryObject.unmapped_concepts).Count
    }
    if ($UcCount -lt $Floors.UcMin) {
        $null = $Shortfalls.Add("unmapped_concepts: $UcCount < $($Floors.UcMin) min")
    }

    return @{
        Pass       = ($Shortfalls.Count -eq 0)
        Shortfalls = @($Shortfalls)
    }
}

function Build-DensityRetryNudge {
    param([string[]]$Shortfalls)

    $Lines = @(
        "IMPORTANT: Your previous response was REJECTED because it did not meet the"
        "required output density minimums. Specific shortfalls:"
    )
    foreach ($s in $Shortfalls) {
        $Lines += " - $s"
    }
    $Lines += @(
        ""
        "Go back through the document and extract MORE points. The document contains"
        "substantially more content than you captured. Read each section, paragraph,"
        "and data point carefully. Every distinct claim, argument, or piece of evidence"
        "should be its own key_point or factual_claim."
    )
    return ($Lines -join "`n")
}

function Build-DensityScaledPrompt {
    param(
        [int]$WordCount,
        [string]$Template
    )

    $Floors = Get-DensityFloors -WordCount $WordCount
    $kpMin = $Floors.KpMin
    $kpMax = [Math]::Max(8,  [int]($WordCount / 200))
    $fcMin = $Floors.FcMin
    $fcMax = [Math]::Max(8,  [int]($WordCount / 300))
    $ucMin = $Floors.UcMin
    $ucMax = [Math]::Max(5,  [int]($WordCount / 800))

    Write-Host " `u{2502} ~$WordCount words `u{2192} key_points $kpMin-$kpMax/camp, claims $fcMin-$fcMax, unmapped $ucMin-$ucMax" -ForegroundColor Gray

    return $Template `
        -replace '{{WORD_COUNT}}', $WordCount `
        -replace '{{KP_MIN}}',     $kpMin `
        -replace '{{KP_MAX}}',     $kpMax `
        -replace '{{FC_MIN}}',     $fcMin `
        -replace '{{FC_MAX}}',     $fcMax `
        -replace '{{UC_MIN}}',     $ucMin `
        -replace '{{UC_MAX}}',     $ucMax
}

function Build-DocHeader {
    param(
        [hashtable]$Doc,
        [object]$Meta,
        [string]$ThisDocId
    )

    $Title    = if ($Meta.title) { $Meta.title } else { $ThisDocId }
    $PovTags  = $Doc.PovTags -join ', '
    $TopicTags = if ($null -ne $Meta.PSObject.Properties['topic_tags'] -and $Meta.topic_tags) { $Meta.topic_tags -join ', ' } else { '(none)' }

    return @"
=== DOCUMENT: $ThisDocId ===
Title: $Title
POV tags (pre-classified): $PovTags
Topic tags: $TopicTags
"@

}

function Parse-AIResponse {
    param(
        [string]$RawText,
        [string]$ThisDocId,
        [string]$SummariesDir
    )

    $CleanText = $RawText -replace '(?s)^```json\s*', '' -replace '(?s)\s*```$', ''
    $CleanText = $CleanText.Trim()

    try {
        return ($CleanText | ConvertFrom-Json -Depth 20)
    } catch {
        Write-Host " `u{2502} `u{26A0} JSON parse failed `u{2014} attempting repair" -ForegroundColor Yellow
        $Repaired = Repair-TruncatedJson -Text $RawText
        if ($Repaired) {
            try {
                return ($Repaired | ConvertFrom-Json -Depth 20)
            } catch {
                # fall through
            }
        }
        $DebugPath = Join-Path $SummariesDir "${ThisDocId}.debug-raw.txt"
        Set-Content -Path $DebugPath -Value $RawText -Encoding UTF8
        Write-Host " `u{2502} `u{2717} Invalid JSON. Raw saved: $DebugPath" -ForegroundColor Red
        return $null
    }
}

function Finalize-Summary {
    param(
        [object]$SummaryObject,
        [string]$ThisDocId,
        [string]$TaxonomyVersion,
        [string]$Model,
        [double]$Temperature,
        [string]$Now,
        [string]$SummariesDir,
        [hashtable]$Doc,
        [TimeSpan]$Elapsed,
        [int]$ChunkCount = 0
    )

    # Validate stance values and gather counts
    $ValidStances = @('strongly_aligned','aligned','neutral','opposed','strongly_opposed','not_applicable')
    $Camps        = @('accelerationist','safetyist','skeptic')
    $TotalPoints  = 0
    $NullNodes    = 0

    foreach ($Camp in $Camps) {
        $CampData = $SummaryObject.pov_summaries.$Camp
        if ($CampData) {
            if ($CampData.key_points) {
                foreach ($kp in $CampData.key_points) {
                    if ($kp.stance -notin $ValidStances) { $kp.stance = 'neutral' }
                }
                $TotalPoints += @($CampData.key_points).Count
                $NullNodes   += @($CampData.key_points | Where-Object { $null -eq $_.taxonomy_node_id }).Count
            }
        }
    }

    $FactualCount   = if ($SummaryObject.factual_claims)    { @($SummaryObject.factual_claims).Count }    else { 0 }
    $UnmappedCount  = if ($SummaryObject.unmapped_concepts) { @($SummaryObject.unmapped_concepts).Count } else { 0 }

    $ChunkLabel = if ($ChunkCount -gt 0) { " ($ChunkCount chunks)" } else { '' }
    Write-Host " `u{2502} points: $TotalPoints ($NullNodes unmapped) factual: $FactualCount new_concepts: $UnmappedCount$ChunkLabel" -ForegroundColor Gray

    # -- Write summaries/<doc-id>.json ----------------------------------------
    $FinalSummary = [ordered]@{
        doc_id            = $ThisDocId
        taxonomy_version  = $TaxonomyVersion
        generated_at      = $Now
        ai_model          = $Model
        temperature       = $Temperature
        pov_summaries     = $SummaryObject.pov_summaries
        factual_claims    = $SummaryObject.factual_claims
        unmapped_concepts = $SummaryObject.unmapped_concepts
    }

    if ($ChunkCount -gt 0) {
        $FinalSummary['chunked'] = $true
        $FinalSummary['chunk_count'] = $ChunkCount
    }

    $SummaryPath = Join-Path $SummariesDir "${ThisDocId}.json"
    try {
        Set-Content -Path $SummaryPath -Value ($FinalSummary | ConvertTo-Json -Depth 20) -Encoding UTF8
    }
    catch {
        Write-Host " `u{2514}`u{2500} `u{2717} Failed to write summary: $($_.Exception.Message)" -ForegroundColor Red
        return @{ Success = $false; DocId = $ThisDocId; Error = "Failed to write summary file: $($_.Exception.Message)" }
    }

    # -- Update metadata.json -------------------------------------------------
    try {
        $MetaRaw     = Get-Content $Doc.MetaFile -Raw
        $MetaUpdated = $MetaRaw | ConvertFrom-Json -AsHashtable
        $MetaUpdated['summary_version'] = $TaxonomyVersion
        $MetaUpdated['summary_status']  = 'current'
        $MetaUpdated['summary_updated'] = $Now
        Set-Content -Path $Doc.MetaFile -Value ($MetaUpdated | ConvertTo-Json -Depth 10) -Encoding UTF8
    }
    catch {
        Write-Host " `u{2502} `u{26A0} Summary written but metadata update failed: $($_.Exception.Message)" -ForegroundColor Yellow
    }

    Write-Host " `u{2514}`u{2500} `u{2713} Done: summaries/$ThisDocId.json" -ForegroundColor Green

    return @{
        Success       = $true
        DocId         = $ThisDocId
        TotalPoints   = $TotalPoints
        NullNodes     = $NullNodes
        FactualCount  = $FactualCount
        UnmappedCount = $UnmappedCount
        ElapsedSecs   = [int]$Elapsed.TotalSeconds
        ChunkCount    = $ChunkCount
    }
}