Public/Compare-EmbeddingModel.ps1
|
# Copyright (c) 2026 Jeffrey Snover. All rights reserved. # Licensed under the MIT License. See LICENSE file in the project root. function Compare-EmbeddingModel { <# .SYNOPSIS Encoder ablation: compares MRR across embedding models on the golden test set. .DESCRIPTION Re-embeds taxonomy nodes and golden test claims with each specified model, computes cosine similarity rankings, and reports MRR and Recall@K per model with per-POV breakdowns. Gate decision context: if an alternative encoder captures >80% of available lift, the synthetic corpus investment may not be justified. .PARAMETER Models Embedding model names to compare (sentence-transformers compatible). .PARAMETER GoldenSetPath Path to the golden test set JSON. Defaults to research/comp-linguist/_golden_test_set.json. .EXAMPLE Compare-EmbeddingModel # Compares default models (MiniLM, mpnet, BGE) on golden test set. .EXAMPLE Compare-EmbeddingModel -Models 'all-MiniLM-L6-v2', 'all-mpnet-base-v2' # Compares two specific models. #> [CmdletBinding()] param( [string[]]$Models = @('all-MiniLM-L6-v2', 'all-mpnet-base-v2', 'BAAI/bge-base-en-v1.5'), [string]$GoldenSetPath ) Set-StrictMode -Version Latest $ErrorActionPreference = 'Stop' # ── Resolve paths ─────────────────────────────────────────────────── if (-not $GoldenSetPath) { $GoldenSetPath = Join-Path $script:RepoRoot 'research/comp-linguist/_golden_test_set.json' } if (-not (Test-Path $GoldenSetPath)) { throw (New-ActionableError ` -Goal 'Run embedding model comparison' ` -Problem "Golden test set not found: $GoldenSetPath" ` -Location 'Compare-EmbeddingModel' ` -NextSteps 'Build the golden test set first (CL prerequisite).') } $EvalScript = Join-Path $script:RepoRoot 'scripts/evaluate_embeddings.py' if (-not (Test-Path $EvalScript)) { throw (New-ActionableError ` -Goal 'Run embedding model comparison' ` -Problem "evaluate_embeddings.py not found at $EvalScript" ` -Location 'Compare-EmbeddingModel' ` -NextSteps 'Ensure scripts/evaluate_embeddings.py exists.') } if (Get-Command python -ErrorAction SilentlyContinue) { $PythonCmd = 'python' } else { $PythonCmd = 'python3' } $ModelList = $Models -join ',' # ── Invoke Python evaluation ──────────────────────────────────────── Write-Host "`nEncoder Ablation — comparing $($Models.Count) models on golden test set" -ForegroundColor Cyan Write-Host "Models: $ModelList" -ForegroundColor DarkGray Write-Host "Golden set: $GoldenSetPath`n" -ForegroundColor DarkGray $TaxDir = Get-TaxonomyDir $Args = @('compare-models', '--golden-set', $GoldenSetPath, '--models', $ModelList, '--taxonomy-dir', $TaxDir) $PrevEAP = $ErrorActionPreference $ErrorActionPreference = 'Continue' try { $Output = & $PythonCmd $EvalScript @Args 2>&1 } finally { $ErrorActionPreference = $PrevEAP } $StdOut = @($Output | Where-Object { $_ -is [string] }) -join "`n" $StdErr = @($Output | Where-Object { $_ -is [System.Management.Automation.ErrorRecord] }) | ForEach-Object { $_.ToString() } if ($StdErr) { $StdErr | ForEach-Object { Write-Host $_ -ForegroundColor DarkGray } } if ($LASTEXITCODE -ne 0) { throw (New-ActionableError ` -Goal 'Run embedding model comparison' ` -Problem "evaluate_embeddings.py failed (exit code $LASTEXITCODE)" ` -Location 'Compare-EmbeddingModel' ` -NextSteps "Check that sentence-transformers is installed: pip install sentence-transformers`nStderr: $StdErr") } $Result = $StdOut | ConvertFrom-Json # ── Display results ───────────────────────────────────────────────── Write-Host "`n$('═' * 72)" -ForegroundColor Cyan Write-Host " ENCODER ABLATION RESULTS" -ForegroundColor Cyan Write-Host "$('═' * 72)" -ForegroundColor Cyan if ($Result.PSObject.Properties['baseline_reference'] -and $Result.baseline_reference) { $bl = $Result.baseline_reference if ($bl.PSObject.Properties['global_mrr']) { Write-Host "`n Production baseline (weighted multi-field): MRR = $($bl.global_mrr)" -ForegroundColor Yellow } } Write-Host "`n $('Model'.PadRight(35)) $('MRR'.PadLeft(8)) $('R@1'.PadLeft(8)) $('R@3'.PadLeft(8)) $('R@5'.PadLeft(8)) $('Dim'.PadLeft(6))" -ForegroundColor White Write-Host " $('─' * 73)" -ForegroundColor DarkGray $BestModel = $null $BestMrr = 0.0 $ModelNames = @() if ($Result.PSObject.Properties['models'] -and $Result.models) { foreach ($Prop in $Result.models.PSObject.Properties) { $ModelNames += $Prop.Name } } foreach ($Name in $ModelNames) { $m = $Result.models.$Name $mrr = if ($m.PSObject.Properties['global_mrr']) { $m.global_mrr } else { 0 } $r1 = if ($m.PSObject.Properties['recall_at_1']) { $m.recall_at_1 } else { 0 } $r3 = if ($m.PSObject.Properties['recall_at_3']) { $m.recall_at_3 } else { 0 } $r5 = if ($m.PSObject.Properties['recall_at_5']) { $m.recall_at_5 } else { 0 } $dim = if ($m.PSObject.Properties['dimension']) { $m.dimension } else { '?' } Write-Host " $($Name.PadRight(35)) $("$mrr".PadLeft(8)) $("$r1".PadLeft(8)) $("$r3".PadLeft(8)) $("$r5".PadLeft(8)) $("$dim".PadLeft(6))" if ($mrr -gt $BestMrr) { $BestMrr = $mrr; $BestModel = $Name } if ($m.PSObject.Properties['per_pov'] -and $m.per_pov) { foreach ($PovProp in $m.per_pov.PSObject.Properties) { $p = $PovProp.Value $pMrr = if ($p.PSObject.Properties['mrr']) { $p.mrr } else { 0 } $pR1 = if ($p.PSObject.Properties['recall_at_1']) { $p.recall_at_1 } else { 0 } $pCt = if ($p.PSObject.Properties['count']) { $p.count } else { 0 } Write-Host " $($PovProp.Name.PadRight(33)) $("$pMrr".PadLeft(8)) $("$pR1".PadLeft(8)) $("(n=$pCt)".PadLeft(8))" -ForegroundColor DarkGray } } } # ── Recommendation ────────────────────────────────────────────────── Write-Host "`n$('─' * 72)" -ForegroundColor DarkGray if ($Result.PSObject.Properties['recommendation'] -and $Result.recommendation) { $rec = $Result.recommendation $lift = if ($rec.PSObject.Properties['lift_vs_production']) { $rec.lift_vs_production } else { 0 } $liftPct = if ($BestMrr -gt 0) { [Math]::Round($lift / [Math]::Max(0.001, $rec.production_baseline_mrr) * 100, 1) } else { 0 } Write-Host " Best model: $BestModel (MRR $BestMrr)" -ForegroundColor Green Write-Host " Lift vs production: $("{0:+0.0000;-0.0000;0.0000}" -f $lift) ($liftPct%)" -ForegroundColor $(if ($lift -gt 0) { 'Green' } else { 'Yellow' }) } Write-Host "" return $Result } |