AITriad

0.4.1

Public/Test-ExtractionQuality.ps1

                                # Copyright (c) 2026 Jeffrey Snover. All rights reserved.

# Licensed under the MIT License. See LICENSE file in the project root.

function Test-ExtractionQuality {

    <#

    .SYNOPSIS

        Measures AI extraction quality against human-annotated gold-standard data.

    .DESCRIPTION

        Compares AI-generated summaries against gold-standard annotations to compute:

        - Key Point Recall: % of expected key_points found in actual

        - Key Point Precision: % of actual key_points that match an expected one

        - Mapping Accuracy: % of actual key_points with correct taxonomy_node_id

        - Factual Claim Recall: % of expected claims found

        - Unmapped Concept Recall: % of expected unmapped concepts detected

    .PARAMETER DocId

        Test a single document.

    .PARAMETER All

        Test all documents with gold-standard files.

    .PARAMETER GoldDir

        Path to gold-standard directory. Default: tests/gold-standard/

    .PARAMETER PassThru

        Return results object for piping.

    .EXAMPLE

        Test-ExtractionQuality -DocId 'ai-safety-debate-2026'

    .EXAMPLE

        Test-ExtractionQuality -All

    #>

    [CmdletBinding()]

    param(

        [string]$DocId = '',

        [switch]$All,

        [string]$GoldDir = '',

        [switch]$PassThru

    )

    Set-StrictMode -Version Latest

    $ErrorActionPreference = 'Stop'

    if ([string]::IsNullOrWhiteSpace($GoldDir)) {

        $GoldDir = Join-Path (Join-Path $script:RepoRoot 'tests') 'gold-standard'

    }

    if (-not (Test-Path $GoldDir)) {

        New-ActionableError -Goal 'locate gold-standard directory' `

            -Problem "Directory not found: $GoldDir" `

            -Location 'Test-ExtractionQuality' `

            -NextSteps @('Create tests/gold-standard/ and add annotated .gold.json files') -Throw

    }

    $SummariesDir = Get-SummariesDir

    # ── Collect gold files ────────────────────────────────────────────────────

    if ($DocId) {

        $Path = Join-Path $GoldDir "$DocId.gold.json"

        if (-not (Test-Path $Path)) {

            New-ActionableError -Goal "load gold standard for $DocId" `

                -Problem "Gold file not found: $Path" `

                -Location 'Test-ExtractionQuality' `

                -NextSteps @("Create $Path from _template.gold.json") -Throw

        }

        $GoldFiles = @(Get-Item $Path)

    }

    elseif ($All) {

        $GoldFiles = @(Get-ChildItem -Path $GoldDir -Filter '*.gold.json' -File |

            Where-Object { $_.Name -ne '_template.gold.json' } |

            Sort-Object Name)

    }

    else {

        New-ActionableError -Goal 'run extraction quality test' `

            -Problem 'Specify -DocId or -All' `

            -Location 'Test-ExtractionQuality' `

            -NextSteps @('Use -DocId <slug> for one document', 'Use -All for all gold-standard documents') -Throw

        $GoldFiles = @()

    }

    if ($GoldFiles.Count -eq 0) {

        Write-Host "  No gold-standard files found in $GoldDir" -ForegroundColor Yellow

        Write-Host "  Create .gold.json files from _template.gold.json" -ForegroundColor Gray

        return

    }

    Write-Host "`n  EXTRACTION QUALITY TEST ($($GoldFiles.Count) document(s))" -ForegroundColor Cyan

    Write-Host "  $('─' * 50)" -ForegroundColor DarkGray

    $AllResults = [System.Collections.Generic.List[PSObject]]::new()

    foreach ($GoldFile in $GoldFiles) {

        $GoldDocId = $GoldFile.BaseName -replace '\.gold$', ''

        $Gold = Get-Content -Raw -Path $GoldFile.FullName | ConvertFrom-Json

        # Load actual summary

        $SumPath = Join-Path $SummariesDir "$GoldDocId.json"

        if (-not (Test-Path $SumPath)) {

            Write-Host "  $GoldDocId`: SKIP — no summary file" -ForegroundColor DarkGray

            continue

        }

        $Summary = Get-Content -Raw -Path $SumPath | ConvertFrom-Json

        # ── Key Point Recall & Precision ──────────────────────────────────

        $ExpectedKP = @($Gold.expected_key_points)

        $ActualKP = [System.Collections.Generic.List[string]]::new()

        foreach ($Camp in @('accelerationist', 'safetyist', 'skeptic')) {

            $CampData = $Summary.pov_summaries.$Camp

            if ($CampData -and $CampData.key_points) {

                foreach ($KP in @($CampData.key_points)) {

                    if ($KP.taxonomy_node_id) {

                        $ActualKP.Add($KP.taxonomy_node_id)

                    }

                }

            }

        }

        $ExpectedNodeIds = @($ExpectedKP | ForEach-Object { $_.taxonomy_node_id } | Where-Object { $_ })

        $MatchedExpected = @($ExpectedNodeIds | Where-Object { $_ -in $ActualKP })

        $MatchedActual = @($ActualKP | Where-Object { $_ -in $ExpectedNodeIds })

        if ($ExpectedNodeIds.Count -gt 0) { $KPRecall = [Math]::Round($MatchedExpected.Count / $ExpectedNodeIds.Count * 100, 1) } else { $KPRecall = 0 }

        if ($ActualKP.Count -gt 0) { $KPPrecision = [Math]::Round($MatchedActual.Count / $ActualKP.Count * 100, 1) } else { $KPPrecision = 0 }

        # ── Mapping Accuracy ──────────────────────────────────────────────

        $CorrectMappings = $MatchedActual.Count

        if ($ActualKP.Count -gt 0) { $MappingAccuracy = [Math]::Round($CorrectMappings / $ActualKP.Count * 100, 1) } else { $MappingAccuracy = 0 }

        # ── Factual Claim Recall ──────────────────────────────────────────

        $ExpectedClaims = @($Gold.expected_factual_claims)

        if ($Summary.factual_claims) { $ActualClaims = @($Summary.factual_claims) } else { $ActualClaims = @() }

        $ClaimMatches = 0

        foreach ($EC in $ExpectedClaims) {

            $ECNodes = @($EC.linked_taxonomy_nodes)

            # Match if any actual claim shares linked taxonomy nodes

            foreach ($AC in $ActualClaims) {

                if ($AC.PSObject.Properties['linked_taxonomy_nodes']) { $ACNodes = @($AC.linked_taxonomy_nodes) } else { $ACNodes = @() }

                $Overlap = @($ECNodes | Where-Object { $_ -in $ACNodes })

                if ($Overlap.Count -gt 0) {

                    $ClaimMatches++

                    break

                }

            }

        }

        if ($ExpectedClaims.Count -gt 0) { $ClaimRecall = [Math]::Round($ClaimMatches / $ExpectedClaims.Count * 100, 1) } else { $ClaimRecall = 0 }

        # ── Unmapped Concept Recall ───────────────────────────────────────

        $ExpectedUnmapped = @($Gold.expected_unmapped_concepts)

        if ($Summary.unmapped_concepts) { $ActualUnmapped = @($Summary.unmapped_concepts) } else { $ActualUnmapped = @() }

        $UnmappedMatches = 0

        foreach ($EU in $ExpectedUnmapped) {

            $ExpPov = $EU.suggested_pov

            # Match if any actual unmapped concept has the same suggested POV

            foreach ($AU in $ActualUnmapped) {

                if ($AU.PSObject.Properties['suggested_pov']) { $ActPov = $AU.suggested_pov } else { $ActPov = '' }

                if ($ActPov -eq $ExpPov) {

                    $UnmappedMatches++

                    break

                }

            }

        }

        if ($ExpectedUnmapped.Count -gt 0) { $UnmappedRecall = [Math]::Round($UnmappedMatches / $ExpectedUnmapped.Count * 100, 1) } else { $UnmappedRecall = 0 }

        # ── Display ───────────────────────────────────────────────────────

        Write-Host "`n  $GoldDocId`:" -ForegroundColor White

        if ($KPRecall -ge 70) { $KPColor = 'Green' } elseif ($KPRecall -ge 50) { $KPColor = 'Yellow' } else { $KPColor = 'Red' }

        Write-Host "    KP Recall:         $KPRecall% ($($MatchedExpected.Count)/$($ExpectedNodeIds.Count))" -ForegroundColor $KPColor

        Write-Host "    KP Precision:      $KPPrecision% ($($MatchedActual.Count)/$($ActualKP.Count))" -ForegroundColor $KPColor

        Write-Host "    Mapping Accuracy:  $MappingAccuracy%" -ForegroundColor $(if ($MappingAccuracy -ge 70) { 'Green' } else { 'Yellow' })

        Write-Host "    Claim Recall:      $ClaimRecall% ($ClaimMatches/$($ExpectedClaims.Count))" -ForegroundColor $(if ($ClaimRecall -ge 70) { 'Green' } else { 'Yellow' })

        Write-Host "    Unmapped Recall:   $UnmappedRecall% ($UnmappedMatches/$($ExpectedUnmapped.Count))" -ForegroundColor $(if ($UnmappedRecall -ge 50) { 'Green' } else { 'Yellow' })

        $AllResults.Add([PSCustomObject][ordered]@{

            DocId            = $GoldDocId

            KPRecall         = $KPRecall

            KPPrecision      = $KPPrecision

            MappingAccuracy  = $MappingAccuracy

            ClaimRecall      = $ClaimRecall

            UnmappedRecall   = $UnmappedRecall

            ExpectedKP       = $ExpectedNodeIds.Count

            ActualKP         = $ActualKP.Count

            ExpectedClaims   = $ExpectedClaims.Count

            ExpectedUnmapped = $ExpectedUnmapped.Count

        })

    }

    # ── Aggregate ─────────────────────────────────────────────────────────

    if ($AllResults.Count -gt 1) {

        Write-Host "`n  AGGREGATE ($($AllResults.Count) documents):" -ForegroundColor Cyan

        Write-Host "    Avg KP Recall:       $([Math]::Round(($AllResults | ForEach-Object { $_.KPRecall } | Measure-Object -Average).Average, 1))%" -ForegroundColor White

        Write-Host "    Avg KP Precision:    $([Math]::Round(($AllResults | ForEach-Object { $_.KPPrecision } | Measure-Object -Average).Average, 1))%" -ForegroundColor White

        Write-Host "    Avg Mapping Acc:     $([Math]::Round(($AllResults | ForEach-Object { $_.MappingAccuracy } | Measure-Object -Average).Average, 1))%" -ForegroundColor White

        Write-Host "    Avg Claim Recall:    $([Math]::Round(($AllResults | ForEach-Object { $_.ClaimRecall } | Measure-Object -Average).Average, 1))%" -ForegroundColor White

        Write-Host "    Avg Unmapped Recall: $([Math]::Round(($AllResults | ForEach-Object { $_.UnmappedRecall } | Measure-Object -Average).Average, 1))%" -ForegroundColor White

    }

    Write-Host ""

    if ($PassThru) {

        return $AllResults.ToArray()

    }

}