Public/Test-ExtractionQuality.ps1

# Copyright (c) 2026 Jeffrey Snover. All rights reserved.
# Licensed under the MIT License. See LICENSE file in the project root.

function Test-ExtractionQuality {
    <#
    .SYNOPSIS
        Measures AI extraction quality against human-annotated gold-standard data.
    .DESCRIPTION
        Compares AI-generated summaries against gold-standard annotations to compute:
        - Key Point Recall: % of expected key_points found in actual
        - Key Point Precision: % of actual key_points that match an expected one
        - Mapping Accuracy: % of actual key_points with correct taxonomy_node_id
        - Factual Claim Recall: % of expected claims found
        - Unmapped Concept Recall: % of expected unmapped concepts detected
    .PARAMETER DocId
        Test a single document.
    .PARAMETER All
        Test all documents with gold-standard files.
    .PARAMETER GoldDir
        Path to gold-standard directory. Default: tests/gold-standard/
    .PARAMETER PassThru
        Return results object for piping.
    .EXAMPLE
        Test-ExtractionQuality -DocId 'ai-safety-debate-2026'
    .EXAMPLE
        Test-ExtractionQuality -All
    #>

    [CmdletBinding()]
    param(
        [string]$DocId = '',
        [switch]$All,
        [string]$GoldDir = '',
        [switch]$PassThru
    )

    Set-StrictMode -Version Latest
    $ErrorActionPreference = 'Stop'

    if ([string]::IsNullOrWhiteSpace($GoldDir)) {
        $GoldDir = Join-Path (Join-Path $script:RepoRoot 'tests') 'gold-standard'
    }

    if (-not (Test-Path $GoldDir)) {
        New-ActionableError -Goal 'locate gold-standard directory' `
            -Problem "Directory not found: $GoldDir" `
            -Location 'Test-ExtractionQuality' `
            -NextSteps @('Create tests/gold-standard/ and add annotated .gold.json files') -Throw
    }

    $SummariesDir = Get-SummariesDir

    # ── Collect gold files ────────────────────────────────────────────────────
    if ($DocId) {
        $Path = Join-Path $GoldDir "$DocId.gold.json"
        if (-not (Test-Path $Path)) {
            New-ActionableError -Goal "load gold standard for $DocId" `
                -Problem "Gold file not found: $Path" `
                -Location 'Test-ExtractionQuality' `
                -NextSteps @("Create $Path from _template.gold.json") -Throw
        }
        $GoldFiles = @(Get-Item $Path)
    }
    elseif ($All) {
        $GoldFiles = @(Get-ChildItem -Path $GoldDir -Filter '*.gold.json' -File |
            Where-Object { $_.Name -ne '_template.gold.json' } |
            Sort-Object Name)
    }
    else {
        New-ActionableError -Goal 'run extraction quality test' `
            -Problem 'Specify -DocId or -All' `
            -Location 'Test-ExtractionQuality' `
            -NextSteps @('Use -DocId <slug> for one document', 'Use -All for all gold-standard documents') -Throw
        $GoldFiles = @()
    }

    if ($GoldFiles.Count -eq 0) {
        Write-Host " No gold-standard files found in $GoldDir" -ForegroundColor Yellow
        Write-Host " Create .gold.json files from _template.gold.json" -ForegroundColor Gray
        return
    }

    Write-Host "`n EXTRACTION QUALITY TEST ($($GoldFiles.Count) document(s))" -ForegroundColor Cyan
    Write-Host " $('─' * 50)" -ForegroundColor DarkGray

    $AllResults = [System.Collections.Generic.List[PSObject]]::new()

    foreach ($GoldFile in $GoldFiles) {
        $GoldDocId = $GoldFile.BaseName -replace '\.gold$', ''
        $Gold = Get-Content -Raw -Path $GoldFile.FullName | ConvertFrom-Json

        # Load actual summary
        $SumPath = Join-Path $SummariesDir "$GoldDocId.json"
        if (-not (Test-Path $SumPath)) {
            Write-Host " $GoldDocId`: SKIP — no summary file" -ForegroundColor DarkGray
            continue
        }

        $Summary = Get-Content -Raw -Path $SumPath | ConvertFrom-Json

        # ── Key Point Recall & Precision ──────────────────────────────────
        $ExpectedKP = @($Gold.expected_key_points)
        $ActualKP = [System.Collections.Generic.List[string]]::new()

        foreach ($Camp in @('accelerationist', 'safetyist', 'skeptic')) {
            $CampData = $Summary.pov_summaries.$Camp
            if ($CampData -and $CampData.key_points) {
                foreach ($KP in @($CampData.key_points)) {
                    if ($KP.taxonomy_node_id) {
                        $ActualKP.Add($KP.taxonomy_node_id)
                    }
                }
            }
        }

        $ExpectedNodeIds = @($ExpectedKP | ForEach-Object { $_.taxonomy_node_id } | Where-Object { $_ })
        $MatchedExpected = @($ExpectedNodeIds | Where-Object { $_ -in $ActualKP })
        $MatchedActual = @($ActualKP | Where-Object { $_ -in $ExpectedNodeIds })

        if ($ExpectedNodeIds.Count -gt 0) { $KPRecall = [Math]::Round($MatchedExpected.Count / $ExpectedNodeIds.Count * 100, 1) } else { $KPRecall = 0 }
        if ($ActualKP.Count -gt 0) { $KPPrecision = [Math]::Round($MatchedActual.Count / $ActualKP.Count * 100, 1) } else { $KPPrecision = 0 }

        # ── Mapping Accuracy ──────────────────────────────────────────────
        $CorrectMappings = $MatchedActual.Count
        if ($ActualKP.Count -gt 0) { $MappingAccuracy = [Math]::Round($CorrectMappings / $ActualKP.Count * 100, 1) } else { $MappingAccuracy = 0 }

        # ── Factual Claim Recall ──────────────────────────────────────────
        $ExpectedClaims = @($Gold.expected_factual_claims)
        if ($Summary.factual_claims) { $ActualClaims = @($Summary.factual_claims) } else { $ActualClaims = @() }

        $ClaimMatches = 0
        foreach ($EC in $ExpectedClaims) {
            $ECNodes = @($EC.linked_taxonomy_nodes)
            # Match if any actual claim shares linked taxonomy nodes
            foreach ($AC in $ActualClaims) {
                if ($AC.PSObject.Properties['linked_taxonomy_nodes']) { $ACNodes = @($AC.linked_taxonomy_nodes) } else { $ACNodes = @() }
                $Overlap = @($ECNodes | Where-Object { $_ -in $ACNodes })
                if ($Overlap.Count -gt 0) {
                    $ClaimMatches++
                    break
                }
            }
        }

        if ($ExpectedClaims.Count -gt 0) { $ClaimRecall = [Math]::Round($ClaimMatches / $ExpectedClaims.Count * 100, 1) } else { $ClaimRecall = 0 }

        # ── Unmapped Concept Recall ───────────────────────────────────────
        $ExpectedUnmapped = @($Gold.expected_unmapped_concepts)
        if ($Summary.unmapped_concepts) { $ActualUnmapped = @($Summary.unmapped_concepts) } else { $ActualUnmapped = @() }

        $UnmappedMatches = 0
        foreach ($EU in $ExpectedUnmapped) {
            $ExpPov = $EU.suggested_pov
            # Match if any actual unmapped concept has the same suggested POV
            foreach ($AU in $ActualUnmapped) {
                if ($AU.PSObject.Properties['suggested_pov']) { $ActPov = $AU.suggested_pov } else { $ActPov = '' }
                if ($ActPov -eq $ExpPov) {
                    $UnmappedMatches++
                    break
                }
            }
        }

        if ($ExpectedUnmapped.Count -gt 0) { $UnmappedRecall = [Math]::Round($UnmappedMatches / $ExpectedUnmapped.Count * 100, 1) } else { $UnmappedRecall = 0 }

        # ── Display ───────────────────────────────────────────────────────
        Write-Host "`n $GoldDocId`:" -ForegroundColor White
        if ($KPRecall -ge 70) { $KPColor = 'Green' } elseif ($KPRecall -ge 50) { $KPColor = 'Yellow' } else { $KPColor = 'Red' }
        Write-Host " KP Recall: $KPRecall% ($($MatchedExpected.Count)/$($ExpectedNodeIds.Count))" -ForegroundColor $KPColor
        Write-Host " KP Precision: $KPPrecision% ($($MatchedActual.Count)/$($ActualKP.Count))" -ForegroundColor $KPColor
        Write-Host " Mapping Accuracy: $MappingAccuracy%" -ForegroundColor $(if ($MappingAccuracy -ge 70) { 'Green' } else { 'Yellow' })
        Write-Host " Claim Recall: $ClaimRecall% ($ClaimMatches/$($ExpectedClaims.Count))" -ForegroundColor $(if ($ClaimRecall -ge 70) { 'Green' } else { 'Yellow' })
        Write-Host " Unmapped Recall: $UnmappedRecall% ($UnmappedMatches/$($ExpectedUnmapped.Count))" -ForegroundColor $(if ($UnmappedRecall -ge 50) { 'Green' } else { 'Yellow' })

        $AllResults.Add([PSCustomObject][ordered]@{
            DocId            = $GoldDocId
            KPRecall         = $KPRecall
            KPPrecision      = $KPPrecision
            MappingAccuracy  = $MappingAccuracy
            ClaimRecall      = $ClaimRecall
            UnmappedRecall   = $UnmappedRecall
            ExpectedKP       = $ExpectedNodeIds.Count
            ActualKP         = $ActualKP.Count
            ExpectedClaims   = $ExpectedClaims.Count
            ExpectedUnmapped = $ExpectedUnmapped.Count
        })
    }

    # ── Aggregate ─────────────────────────────────────────────────────────
    if ($AllResults.Count -gt 1) {
        Write-Host "`n AGGREGATE ($($AllResults.Count) documents):" -ForegroundColor Cyan
        Write-Host " Avg KP Recall: $([Math]::Round(($AllResults | ForEach-Object { $_.KPRecall } | Measure-Object -Average).Average, 1))%" -ForegroundColor White
        Write-Host " Avg KP Precision: $([Math]::Round(($AllResults | ForEach-Object { $_.KPPrecision } | Measure-Object -Average).Average, 1))%" -ForegroundColor White
        Write-Host " Avg Mapping Acc: $([Math]::Round(($AllResults | ForEach-Object { $_.MappingAccuracy } | Measure-Object -Average).Average, 1))%" -ForegroundColor White
        Write-Host " Avg Claim Recall: $([Math]::Round(($AllResults | ForEach-Object { $_.ClaimRecall } | Measure-Object -Average).Average, 1))%" -ForegroundColor White
        Write-Host " Avg Unmapped Recall: $([Math]::Round(($AllResults | ForEach-Object { $_.UnmappedRecall } | Measure-Object -Average).Average, 1))%" -ForegroundColor White
    }

    Write-Host ""

    if ($PassThru) {
        return $AllResults.ToArray()
    }
}