tests/Test-ModelComparison.ps1
|
# Model Comparison Test: 4-model head-to-head # gpt-5.5-pro vs gpt-5.4-mini | claude-opus-4-8 vs claude-sonnet-4-6 # All on same candidate, results saved to JSON Import-Module PowerCraft.AI -Force $oaiKey = Get-PCSecret -Name 'openai' $antKey = Get-PCSecret -Name 'anthropic' $sys = 'You are a professional research analyst. Return ONLY a valid JSON object. Do NOT simulate tool calls. If unknown, use "Unknown".' $schema = '{"current_title":"str","current_organisation":"str","still_in_role":"Yes/No/Unknown","telematics_activity":{"found":bool,"summary":"str","since_email_date":bool},"linkedin_active":"Active/Inactive/Unknown","confidence":"High/Medium/Low","source_urls":[]}' $candidate = @{name = 'Samuel Lavie'; email = 'laviesh@scopetechnology.com'; org = 'Scope Technology'; date = '2017-08-30' } $outputDir = "$PSScriptRoot\..\..\..\Data\2017 - 2018\LeadForge_2017 - 2018" $outputFile = Join-Path $outputDir "model-comparison-4way.json" if (-not (Test-Path $outputDir)) { New-Item -Path $outputDir -ItemType Directory -Force | Out-Null } $up = "Research: $($candidate.name), previously at $($candidate.org) ($($candidate.email)). Email date: $($candidate.date). Return JSON: $schema" Write-Host "`n$('=' * 70)" Write-Host " $($candidate.name) ($($candidate.email))" Write-Host ('=' * 70) # --- Helper: Call OpenAI Responses endpoint --- function Invoke-OAIResponses { param([string]$Model) $body = @{ model = $Model instructions = $sys input = $up max_output_tokens = 8192 text = @{ format = @{ type = 'json_object' } } } | ConvertTo-Json -Depth 5 $bodyBytes = [System.Text.Encoding]::UTF8.GetBytes($body) $sw = [System.Diagnostics.Stopwatch]::StartNew() $resp = Invoke-RestMethod -Uri "https://api.openai.com/v1/responses" -Method POST ` -Headers @{Authorization = "Bearer $oaiKey" } ` -ContentType 'application/json; charset=utf-8' ` -Body $bodyBytes -TimeoutSec 300 $sw.Stop() $msg = $resp.output | Where-Object { $_.type -eq 'message' } $text = if ($msg -and $msg.content) { $msg.content[0].text } elseif ($resp.output_text) { $resp.output_text } else { $null } return @{ text = $text; ms = $sw.ElapsedMilliseconds } } # --- Helper: Call Anthropic Messages endpoint --- function Invoke-AnthropicMessages { param([string]$Model) $body = @{ model = $Model max_tokens = 8192 system = $sys messages = @(@{role = 'user'; content = $up }) } | ConvertTo-Json -Depth 10 $bodyBytes = [System.Text.Encoding]::UTF8.GetBytes($body) $sw = [System.Diagnostics.Stopwatch]::StartNew() $resp = Invoke-RestMethod -Uri "https://api.anthropic.com/v1/messages" -Method POST ` -Headers @{'x-api-key' = $antKey; 'anthropic-version' = '2023-06-01' } ` -ContentType 'application/json; charset=utf-8' ` -Body $bodyBytes -TimeoutSec 300 $sw.Stop() return @{ text = $resp.content[0].text; ms = $sw.ElapsedMilliseconds } } # --- Run all 4 models --- $models = @( @{name = 'gpt-5.5-pro'; provider = 'openai' } @{name = 'gpt-5.4-mini'; provider = 'openai' } @{name = 'claude-opus-4-8'; provider = 'anthropic' } @{name = 'claude-sonnet-4-6'; provider = 'anthropic' } ) $allResults = @{} foreach ($m in $models) { Write-Host "`n [$($m.name)] calling..." try { if ($m.provider -eq 'openai') { $res = Invoke-OAIResponses -Model $m.name } else { $res = Invoke-AnthropicMessages -Model $m.name } $allResults[$m.name] = @{ response = $res.text; latency_ms = $res.ms; error = $null } Write-Host " [$($m.name)] OK ($($res.ms)ms)" Write-Host $res.text } catch { $errMsg = $_.Exception.Message $detail = if ($_.ErrorDetails.Message) { $_.ErrorDetails.Message } else { '' } $allResults[$m.name] = @{ response = $null; latency_ms = 0; error = "$errMsg $detail" } Write-Host " [$($m.name)] ERROR: $errMsg" if ($detail) { Write-Host " $detail" } } } # Save results $output = @{ timestamp = (Get-Date -Format 'o') candidate = $candidate prompt = $up results = $allResults } $output | ConvertTo-Json -Depth 10 | Set-Content -Path $outputFile -Encoding utf8 Write-Host "`n Results saved to: $outputFile" # Summary table Write-Host "`n$('=' * 70)" Write-Host " COMPARISON SUMMARY: $($candidate.name)" Write-Host ('=' * 70) Write-Host ("{0,-22} {1,-30} {2,-25} {3,-8} {4}" -f 'Model', 'Title', 'Organisation', 'Conf', 'Latency') Write-Host ("{0,-22} {1,-30} {2,-25} {3,-8} {4}" -f '-----', '-----', '------------', '----', '-------') foreach ($m in $models) { $r = $allResults[$m.name] if ($r.response) { # Strip markdown code fences if present $raw = $r.response if ($raw -match '(?s)^\s*```(?:json)?\s*\n(.+?)\n\s*```\s*$') { $raw = $Matches[1] } $parsed = try { $raw | ConvertFrom-Json } catch { $null } if ($parsed) { $title = if ($parsed.current_title -ne 'Unknown') { $parsed.current_title } else { '-' } $org = if ($parsed.current_organisation -ne 'Unknown') { $parsed.current_organisation } else { '-' } $conf = $parsed.confidence Write-Host ("{0,-22} {1,-30} {2,-25} {3,-8} {4}ms" -f $m.name, ($title.Substring(0, [Math]::Min(28, $title.Length))), ($org.Substring(0, [Math]::Min(23, $org.Length))), $conf, $r.latency_ms) } else { Write-Host ("{0,-22} [PARSE ERROR]" -f $m.name) } } else { Write-Host ("{0,-22} [ERROR: $($r.error.Substring(0, [Math]::Min(60, $r.error.Length)))]" -f $m.name) } } |