Public/Test-RerankerBaseline.ps1

# Copyright (c) 2026 Jeffrey Snover. All rights reserved.
# Licensed under the MIT License. See LICENSE file in the project root.

function Test-RerankerBaseline {
    <#
    .SYNOPSIS
        Evaluates cross-encoder reranking on top-K bi-encoder candidates.
    .DESCRIPTION
        Uses the current production bi-encoder embeddings to retrieve top-K
        candidate nodes per golden test claim, then reranks with a cross-encoder
        model and measures MRR lift.
 
        Gate decision context: if reranking captures >80% of available lift,
        the synthetic corpus investment may not be justified.
    .PARAMETER TopK
        Number of bi-encoder candidates to rerank per claim (default: 10).
    .PARAMETER RerankerModel
        Cross-encoder model name (default: cross-encoder/ms-marco-MiniLM-L-6-v2).
    .PARAMETER GoldenSetPath
        Path to the golden test set JSON. Defaults to research/comp-linguist/_golden_test_set.json.
    .EXAMPLE
        Test-RerankerBaseline
        # Evaluates with default settings (top-10, ms-marco reranker).
    .EXAMPLE
        Test-RerankerBaseline -TopK 20 -RerankerModel 'cross-encoder/ms-marco-MiniLM-L-12-v2'
    #>

    [CmdletBinding()]
    param(
        [ValidateRange(1, 100)]
        [int]$TopK = 10,

        [string]$RerankerModel = 'cross-encoder/ms-marco-MiniLM-L-6-v2',

        [string]$GoldenSetPath
    )

    Set-StrictMode -Version Latest
    $ErrorActionPreference = 'Stop'

    # ── Resolve paths ───────────────────────────────────────────────────
    if (-not $GoldenSetPath) {
        $GoldenSetPath = Join-Path $script:RepoRoot 'research/comp-linguist/_golden_test_set.json'
    }
    if (-not (Test-Path $GoldenSetPath)) {
        throw (New-ActionableError `
            -Goal    'Run reranker baseline evaluation' `
            -Problem "Golden test set not found: $GoldenSetPath" `
            -Location 'Test-RerankerBaseline' `
            -NextSteps 'Build the golden test set first (CL prerequisite).')
    }

    $EvalScript = Join-Path $script:RepoRoot 'scripts/evaluate_embeddings.py'
    if (-not (Test-Path $EvalScript)) {
        throw (New-ActionableError `
            -Goal    'Run reranker baseline evaluation' `
            -Problem "evaluate_embeddings.py not found at $EvalScript" `
            -Location 'Test-RerankerBaseline' `
            -NextSteps 'Ensure scripts/evaluate_embeddings.py exists.')
    }

    if (Get-Command python -ErrorAction SilentlyContinue) { $PythonCmd = 'python' } else { $PythonCmd = 'python3' }

    # ── Invoke Python evaluation ────────────────────────────────────────
    Write-Host "`nReranker Baseline — cross-encoder on top-$TopK bi-encoder candidates" -ForegroundColor Cyan
    Write-Host "Reranker: $RerankerModel" -ForegroundColor DarkGray
    Write-Host "Golden set: $GoldenSetPath`n" -ForegroundColor DarkGray

    $TaxDir = Get-TaxonomyDir
    $PyArgs = @('rerank-baseline', '--golden-set', $GoldenSetPath, '--top-k', $TopK, '--reranker-model', $RerankerModel, '--taxonomy-dir', $TaxDir)

    $PrevEAP = $ErrorActionPreference
    $ErrorActionPreference = 'Continue'
    try {
        $Output = & $PythonCmd $EvalScript @PyArgs 2>&1
    }
    finally { $ErrorActionPreference = $PrevEAP }

    $StdOut = @($Output | Where-Object { $_ -is [string] }) -join "`n"
    $StdErr = @($Output | Where-Object { $_ -is [System.Management.Automation.ErrorRecord] }) | ForEach-Object { $_.ToString() }
    if ($StdErr) { $StdErr | ForEach-Object { Write-Host $_ -ForegroundColor DarkGray } }

    if ($LASTEXITCODE -ne 0) {
        throw (New-ActionableError `
            -Goal    'Run reranker baseline evaluation' `
            -Problem "evaluate_embeddings.py failed (exit code $LASTEXITCODE)" `
            -Location 'Test-RerankerBaseline' `
            -NextSteps "Check that sentence-transformers is installed: pip install sentence-transformers`nStderr: $StdErr")
    }

    $Result = $StdOut | ConvertFrom-Json

    # ── Display results ─────────────────────────────────────────────────
    Write-Host "`n$('═' * 72)" -ForegroundColor Cyan
    Write-Host " RERANKER BASELINE RESULTS" -ForegroundColor Cyan
    Write-Host "$('═' * 72)" -ForegroundColor Cyan

    Write-Host "`n $(''.PadRight(20)) $('MRR'.PadLeft(8)) $('R@1'.PadLeft(8)) $('R@3'.PadLeft(8)) $('R@5'.PadLeft(8)) $('R@10'.PadLeft(8))" -ForegroundColor White
    Write-Host " $('─' * 60)" -ForegroundColor DarkGray

    if ($Result.PSObject.Properties['baseline_biencoder'] -and $Result.baseline_biencoder) {
        $bl = $Result.baseline_biencoder
        $bMrr  = if ($bl.PSObject.Properties['global_mrr'])   { $bl.global_mrr }   else { 0 }
        $bR1   = if ($bl.PSObject.Properties['recall_at_1'])  { $bl.recall_at_1 }  else { 0 }
        $bR3   = if ($bl.PSObject.Properties['recall_at_3'])  { $bl.recall_at_3 }  else { 0 }
        $bR5   = if ($bl.PSObject.Properties['recall_at_5'])  { $bl.recall_at_5 }  else { 0 }
        $bR10  = if ($bl.PSObject.Properties['recall_at_10']) { $bl.recall_at_10 } else { 0 }
        Write-Host " $('Bi-encoder'.PadRight(20)) $("$bMrr".PadLeft(8)) $("$bR1".PadLeft(8)) $("$bR3".PadLeft(8)) $("$bR5".PadLeft(8)) $("$bR10".PadLeft(8))"

        if ($bl.PSObject.Properties['per_pov'] -and $bl.per_pov) {
            foreach ($PovProp in $bl.per_pov.PSObject.Properties) {
                $p = $PovProp.Value
                $pMrr = if ($p.PSObject.Properties['mrr']) { $p.mrr } else { 0 }
                $pR1  = if ($p.PSObject.Properties['recall_at_1']) { $p.recall_at_1 } else { 0 }
                $pCt  = if ($p.PSObject.Properties['count']) { $p.count } else { 0 }
                Write-Host " $($PovProp.Name.PadRight(18)) $("$pMrr".PadLeft(8)) $("$pR1".PadLeft(8)) $("(n=$pCt)".PadLeft(8))" -ForegroundColor DarkGray
            }
        }
    }

    if ($Result.PSObject.Properties['reranked'] -and $Result.reranked) {
        $rr = $Result.reranked
        $rMrr  = if ($rr.PSObject.Properties['global_mrr'])   { $rr.global_mrr }   else { 0 }
        $rR1   = if ($rr.PSObject.Properties['recall_at_1'])  { $rr.recall_at_1 }  else { 0 }
        $rR3   = if ($rr.PSObject.Properties['recall_at_3'])  { $rr.recall_at_3 }  else { 0 }
        $rR5   = if ($rr.PSObject.Properties['recall_at_5'])  { $rr.recall_at_5 }  else { 0 }
        $rR10  = if ($rr.PSObject.Properties['recall_at_10']) { $rr.recall_at_10 } else { 0 }
        Write-Host " $('+ Reranker'.PadRight(20)) $("$rMrr".PadLeft(8)) $("$rR1".PadLeft(8)) $("$rR3".PadLeft(8)) $("$rR5".PadLeft(8)) $("$rR10".PadLeft(8))" -ForegroundColor Green

        if ($rr.PSObject.Properties['per_pov'] -and $rr.per_pov) {
            foreach ($PovProp in $rr.per_pov.PSObject.Properties) {
                $p = $PovProp.Value
                $pMrr = if ($p.PSObject.Properties['mrr']) { $p.mrr } else { 0 }
                $pR1  = if ($p.PSObject.Properties['recall_at_1']) { $p.recall_at_1 } else { 0 }
                $pCt  = if ($p.PSObject.Properties['count']) { $p.count } else { 0 }
                Write-Host " $($PovProp.Name.PadRight(18)) $("$pMrr".PadLeft(8)) $("$pR1".PadLeft(8)) $("(n=$pCt)".PadLeft(8))" -ForegroundColor DarkGray
            }
        }
    }

    # ── Lift summary ────────────────────────────────────────────────────
    Write-Host "`n$('─' * 72)" -ForegroundColor DarkGray
    if ($Result.PSObject.Properties['lift'] -and $Result.lift) {
        $lift = $Result.lift
        $mrrDelta = if ($lift.PSObject.Properties['mrr_delta']) { $lift.mrr_delta } else { 0 }
        $r1Delta  = if ($lift.PSObject.Properties['recall_at_1_delta']) { $lift.recall_at_1_delta } else { 0 }
        $r5Delta  = if ($lift.PSObject.Properties['recall_at_5_delta']) { $lift.recall_at_5_delta } else { 0 }
        $Color = if ($mrrDelta -gt 0) { 'Green' } else { 'Yellow' }
        Write-Host " MRR lift: $("{0:+0.0000;-0.0000;0.0000}" -f $mrrDelta)" -ForegroundColor $Color
        Write-Host " R@1 lift: $("{0:+0.0000;-0.0000;0.0000}" -f $r1Delta)" -ForegroundColor $Color
        Write-Host " R@5 lift: $("{0:+0.0000;-0.0000;0.0000}" -f $r5Delta)" -ForegroundColor $Color
    }
    if ($Result.PSObject.Properties['elapsed_seconds']) {
        Write-Host " Elapsed: $($Result.elapsed_seconds)s" -ForegroundColor DarkGray
    }
    Write-Host ""

    return $Result
}