tools/diagnostics/Test-Gpt54DirectResponseMatrix.ps1

[CmdletBinding()]
param(
    [string]$Endpoint = 'https://example.openai.azure.com',
    [string]$ApiVersion = '2025-04-01-preview',
    [string]$Deployment = 'gpt-5.4',
    [string]$ApiKey,
    [string]$ApiKeyEnvVar = 'PSAOAI_API_AZURE_OPENAI_KEY',
    [string]$OutputRoot,
    [int]$TimeoutSec = 120,
    [switch]$SkipHttp
)

Set-StrictMode -Version Latest
$ErrorActionPreference = 'Stop'

function Save-TextArtifact {
    param(
        [Parameter(Mandatory)][string]$Path,
        [AllowNull()][AllowEmptyString()][string]$Content
    )

    $parent = Split-Path -Parent $Path
    if ($parent -and -not (Test-Path -Path $parent)) {
        [void](New-Item -ItemType Directory -Path $parent -Force)
    }

    if ($null -eq $Content) {
        $Content = ''
    }

    Set-Content -Path $Path -Value $Content -Encoding UTF8
}

function Save-JsonArtifact {
    param(
        [Parameter(Mandatory)][string]$Path,
        [Parameter(Mandatory)]$Data,
        [int]$Depth = 20
    )

    $json = $Data | ConvertTo-Json -Depth $Depth
    Save-TextArtifact -Path $Path -Content $json
}

function Resolve-ApiKeyValue {
    param(
        [string]$DirectApiKey,
        [string]$EnvVarName
    )

    if (-not [string]::IsNullOrWhiteSpace($DirectApiKey)) {
        return $DirectApiKey.Trim()
    }

    $raw = [System.Environment]::GetEnvironmentVariable($EnvVarName, 'Process')
    if ([string]::IsNullOrWhiteSpace($raw)) {
        $raw = [System.Environment]::GetEnvironmentVariable($EnvVarName, 'User')
    }
    if ([string]::IsNullOrWhiteSpace($raw)) {
        $raw = [System.Environment]::GetEnvironmentVariable($EnvVarName, 'Machine')
    }
    if ([string]::IsNullOrWhiteSpace($raw)) {
        return $null
    }

    $trimmed = $raw.Trim()

    if ($trimmed -match '^01000000d08c9ddf') {
        try {
            $secureValue = $trimmed | ConvertTo-SecureString -ErrorAction Stop
            $credential = New-Object System.Management.Automation.PSCredential('ignored', $secureValue)
            return $credential.GetNetworkCredential().Password
        }
        catch {
            return $trimmed
        }
    }

    return $trimmed
}

function Get-ChatUri {
    param(
        [Parameter(Mandatory)][string]$Endpoint,
        [Parameter(Mandatory)][string]$Deployment,
        [Parameter(Mandatory)][string]$ApiVersion
    )

    $normalizedEndpoint = $Endpoint.TrimEnd('/')
    $encodedDeployment = [System.Uri]::EscapeDataString($Deployment)
    return "$normalizedEndpoint/openai/deployments/$encodedDeployment/chat/completions?api-version=$ApiVersion"
}

function New-ReasoningMessages {
    param(
        [Parameter(Mandatory)][string]$DeveloperPrompt,
        [Parameter(Mandatory)][string]$UserPrompt
    )

    return @(
        [ordered]@{
            role = 'developer'
            content = $DeveloperPrompt
        },
        [ordered]@{
            role = 'user'
            content = $UserPrompt
        }
    )
}

function New-ChatMessages {
    param(
        [Parameter(Mandatory)][string]$SystemPrompt,
        [Parameter(Mandatory)][string]$UserPrompt
    )

    return @(
        [ordered]@{
            role = 'system'
            content = $SystemPrompt
        },
        [ordered]@{
            role = 'user'
            content = $UserPrompt
        }
    )
}

function Get-VariantDefinitions {
    $developerPrompt = 'You are a concise assistant. Answer directly in plain text.'
    $systemPrompt = 'You are a concise assistant. Answer directly in plain text.'
    $simpleUser = 'Reply with exactly: pong'
    $codingUser = 'Write one PowerShell function Get-Hello that returns the string hello. Include code only.'

    return @(
        [ordered]@{
            Name = 'reasoning-simple-256'
            RequestShape = 'reasoning developer/user + max_completion_tokens + reasoning_effort'
            Purpose = 'Minimal reasoning-path sanity check with a trivial prompt.'
            Body = [ordered]@{
                messages = (New-ReasoningMessages -DeveloperPrompt $developerPrompt -UserPrompt $simpleUser)
                max_completion_tokens = 256
                reasoning_effort = 'medium'
            }
        },
        [ordered]@{
            Name = 'reasoning-code-512'
            RequestShape = 'reasoning developer/user + max_completion_tokens + reasoning_effort'
            Purpose = 'Short coding-style request on the reasoning-path shape PSAOAI currently uses.'
            Body = [ordered]@{
                messages = (New-ReasoningMessages -DeveloperPrompt $developerPrompt -UserPrompt $codingUser)
                max_completion_tokens = 512
                reasoning_effort = 'medium'
            }
        },
        [ordered]@{
            Name = 'reasoning-code-2048'
            RequestShape = 'reasoning developer/user + max_completion_tokens + reasoning_effort'
            Purpose = 'Same coding-style request with a larger completion budget to detect token-budget gating.'
            Body = [ordered]@{
                messages = (New-ReasoningMessages -DeveloperPrompt $developerPrompt -UserPrompt $codingUser)
                max_completion_tokens = 2048
                reasoning_effort = 'medium'
            }
        },
        [ordered]@{
            Name = 'chat-simple-256'
            RequestShape = 'classic chat system/user + max_tokens'
            Purpose = 'Checks whether the deployment emits normal chat content on a non-reasoning request shape.'
            Body = [ordered]@{
                messages = (New-ChatMessages -SystemPrompt $systemPrompt -UserPrompt $simpleUser)
                temperature = 0.2
                top_p = 1
                stream = $false
                max_tokens = 256
            }
        }
    )
}

function Get-ErrorBodyText {
    param([Parameter(Mandatory)]$ErrorRecord)

    if ($ErrorRecord.ErrorDetails -and $ErrorRecord.ErrorDetails.Message) {
        return [string]$ErrorRecord.ErrorDetails.Message
    }

    $response = $ErrorRecord.Exception.Response
    if ($null -eq $response) {
        return $null
    }

    try {
        if ($response.Content) {
            $content = $response.Content.ReadAsStringAsync().GetAwaiter().GetResult()
            if (-not [string]::IsNullOrWhiteSpace($content)) {
                return $content
            }
        }
    }
    catch {
    }

    try {
        if ($response.GetResponseStream) {
            $stream = $response.GetResponseStream()
            if ($stream) {
                $reader = [System.IO.StreamReader]::new($stream)
                try {
                    $content = $reader.ReadToEnd()
                    if (-not [string]::IsNullOrWhiteSpace($content)) {
                        return $content
                    }
                }
                finally {
                    $reader.Close()
                }
            }
        }
    }
    catch {
    }

    return $null
}

function Get-ParsedSuccessSummary {
    param(
        [AllowNull()]$ParsedResponse,
        [string]$VariantName,
        [string]$RequestBodyPath,
        [string]$RawResponsePath,
        [int]$HttpStatusCode
    )

    $firstChoice = $null
    $message = $null
    $content = $null
    $contentLength = 0
    $contentType = $null
    $serializedContent = $null
    $toolCallsCount = 0
    $annotationsCount = 0
    $messagePropertyNames = @()
    $finishReason = $null
    $reasoningTokens = $null
    $completionTokens = $null
    $promptTokens = $null
    $totalTokens = $null
    $alternateFields = [ordered]@{}

    if ($null -ne $ParsedResponse -and $null -ne $ParsedResponse.choices -and $ParsedResponse.choices.Count -gt 0) {
        $firstChoice = $ParsedResponse.choices[0]
        $finishReason = $firstChoice.finish_reason
        $message = $firstChoice.message

        if ($null -ne $message) {
            $messagePropertyNames = @($message.PSObject.Properties.Name)
            $content = $message.content
            $alternateFields['role'] = $message.role
            $alternateFields['refusal'] = $message.refusal

            if ($null -ne $message.tool_calls) {
                $toolCallsCount = @($message.tool_calls).Count
            }
            if ($null -ne $message.annotations) {
                $annotationsCount = @($message.annotations).Count
            }
        }
    }

    if ($null -eq $content) {
        $contentLength = 0
        $contentType = $null
    }
    elseif ($content -is [string]) {
        $contentLength = $content.Length
        $contentType = $content.GetType().FullName
    }
    else {
        $serializedContent = $content | ConvertTo-Json -Depth 20 -Compress
        $contentLength = $serializedContent.Length
        $contentType = $content.GetType().FullName
    }

    if ($null -ne $ParsedResponse -and $null -ne $ParsedResponse.usage) {
        $completionTokens = $ParsedResponse.usage.completion_tokens
        $promptTokens = $ParsedResponse.usage.prompt_tokens
        $totalTokens = $ParsedResponse.usage.total_tokens
        if ($null -ne $ParsedResponse.usage.completion_tokens_details) {
            $reasoningTokens = $ParsedResponse.usage.completion_tokens_details.reasoning_tokens
        }
    }

    return [ordered]@{
        variant = $VariantName
        status = 'ok'
        requestBodyPath = $RequestBodyPath
        rawHttpResponsePath = $RawResponsePath
        httpStatusCode = $HttpStatusCode
        finishReason = $finishReason
        rawFirstMessageContentLength = $contentLength
        rawFirstMessageContentType = $contentType
        isEmpty = ($contentLength -eq 0)
        rawChoiceMessagePropertyNames = $messagePropertyNames
        rawToolCallsCount = $toolCallsCount
        rawAnnotationsCount = $annotationsCount
        usage = [ordered]@{
            prompt_tokens = $promptTokens
            completion_tokens = $completionTokens
            total_tokens = $totalTokens
            reasoning_tokens = $reasoningTokens
        }
        alternateFields = $alternateFields
    }
}

function Invoke-Variant {
    param(
        [Parameter(Mandatory)]$Variant,
        [Parameter(Mandatory)][string]$Uri,
        [Parameter(Mandatory)][hashtable]$Headers,
        [Parameter(Mandatory)][string]$OutputFolder,
        [Parameter(Mandatory)][int]$TimeoutSec,
        [switch]$SkipHttp
    )

    $variantFolder = Join-Path $OutputFolder $Variant.Name
    if (-not (Test-Path -Path $variantFolder)) {
        [void](New-Item -ItemType Directory -Path $variantFolder -Force)
    }

    $requestBodyPath = Join-Path $variantFolder 'request-body.json'
    $variantMetaPath = Join-Path $variantFolder 'variant.json'
    $rawResponsePath = Join-Path $variantFolder 'http-success-response.json'
    $rawErrorPath = Join-Path $variantFolder 'http-error.txt'
    $summaryPath = Join-Path $variantFolder 'summary.json'

    $bodyJson = $Variant.Body | ConvertTo-Json -Depth 20
    Save-TextArtifact -Path $requestBodyPath -Content $bodyJson
    Save-JsonArtifact -Path $variantMetaPath -Data ([ordered]@{
        name = $Variant.Name
        requestShape = $Variant.RequestShape
        purpose = $Variant.Purpose
        uri = $Uri
    })

    if ($SkipHttp) {
        $summary = [ordered]@{
            variant = $Variant.Name
            status = 'skipped'
            requestBodyPath = $requestBodyPath
            rawHttpResponsePath = $null
            httpStatusCode = $null
            finishReason = $null
            rawFirstMessageContentLength = $null
            rawFirstMessageContentType = $null
            isEmpty = $null
            rawChoiceMessagePropertyNames = @()
            rawToolCallsCount = $null
            rawAnnotationsCount = $null
            usage = $null
            alternateFields = $null
        }
        Save-JsonArtifact -Path $summaryPath -Data $summary
        return $summary
    }

    try {
        $response = Invoke-WebRequest -Uri $Uri -Method POST -Headers $Headers -Body $bodyJson -TimeoutSec $TimeoutSec -ContentType 'application/json; charset=utf-8' -ErrorAction Stop
        $rawContent = $response.Content
        Save-TextArtifact -Path $rawResponsePath -Content $rawContent

        $parsed = $null
        try {
            $parsed = $rawContent | ConvertFrom-Json -ErrorAction Stop
        }
        catch {
            $summary = [ordered]@{
                variant = $Variant.Name
                status = 'parse-error'
                requestBodyPath = $requestBodyPath
                rawHttpResponsePath = $rawResponsePath
                httpStatusCode = [int]$response.StatusCode
                finishReason = $null
                rawFirstMessageContentLength = $null
                rawFirstMessageContentType = $null
                isEmpty = $null
                rawChoiceMessagePropertyNames = @()
                rawToolCallsCount = $null
                rawAnnotationsCount = $null
                usage = $null
                alternateFields = [ordered]@{
                    parseError = $_.Exception.Message
                }
            }
            Save-JsonArtifact -Path $summaryPath -Data $summary
            return $summary
        }

        $summary = Get-ParsedSuccessSummary -ParsedResponse $parsed -VariantName $Variant.Name -RequestBodyPath $requestBodyPath -RawResponsePath $rawResponsePath -HttpStatusCode ([int]$response.StatusCode)
        Save-JsonArtifact -Path $summaryPath -Data $summary
        return $summary
    }
    catch {
        $errorBody = Get-ErrorBodyText -ErrorRecord $_
        $statusCode = $null
        try {
            if ($_.Exception.Response.StatusCode) {
                $statusCode = [int]$_.Exception.Response.StatusCode
            }
        }
        catch {
        }

        Save-TextArtifact -Path $rawErrorPath -Content ($(if ($errorBody) { $errorBody } else { $_ | Out-String }))

        $summary = [ordered]@{
            variant = $Variant.Name
            status = 'http-error'
            requestBodyPath = $requestBodyPath
            rawHttpResponsePath = $rawErrorPath
            httpStatusCode = $statusCode
            finishReason = $null
            rawFirstMessageContentLength = $null
            rawFirstMessageContentType = $null
            isEmpty = $null
            rawChoiceMessagePropertyNames = @()
            rawToolCallsCount = $null
            rawAnnotationsCount = $null
            usage = $null
            alternateFields = [ordered]@{
                errorMessage = $_.Exception.Message
            }
        }
        Save-JsonArtifact -Path $summaryPath -Data $summary
        return $summary
    }
}

function Save-MarkdownSummary {
    param(
        [Parameter(Mandatory)][string]$Path,
        [Parameter(Mandatory)]$Results,
        [Parameter(Mandatory)][string]$Uri
    )

    $lines = @(
        '# gpt-5.4 direct response matrix',
        '',
        "- URI: $Uri",
        "- Generated: $(Get-Date -Format o)",
        '',
        '| Variant | Status | HTTP | Finish | Content length | Content type | Empty | Reasoning tokens | Request body | Raw response |',
        '|---|---|---:|---|---:|---|---|---:|---|---|'
    )

    foreach ($result in $Results) {
        $usageReasoningTokens = if ($null -ne $result.usage) { $result.usage.reasoning_tokens } else { $null }
        $lines += "| $($result.variant) | $($result.status) | $($result.httpStatusCode) | $($result.finishReason) | $($result.rawFirstMessageContentLength) | $($result.rawFirstMessageContentType) | $($result.isEmpty) | $usageReasoningTokens | $($result.requestBodyPath) | $($result.rawHttpResponsePath) |"
    }

    Save-TextArtifact -Path $Path -Content ($lines -join [Environment]::NewLine)
}

$repoRoot = (Resolve-Path (Join-Path $PSScriptRoot '../..')).Path
if ([string]::IsNullOrWhiteSpace($OutputRoot)) {
    $timestamp = Get-Date -Format 'yyyyMMdd_HHmmss'
    $OutputRoot = Join-Path $repoRoot "temp/gpt54-direct-response-matrix/$timestamp"
}

if (-not (Test-Path -Path $OutputRoot)) {
    [void](New-Item -ItemType Directory -Path $OutputRoot -Force)
}

$resolvedApiKey = Resolve-ApiKeyValue -DirectApiKey $ApiKey -EnvVarName $ApiKeyEnvVar
if (-not $SkipHttp -and [string]::IsNullOrWhiteSpace($resolvedApiKey)) {
    throw "API key not found. Pass -ApiKey explicitly or set $ApiKeyEnvVar."
}

$uri = Get-ChatUri -Endpoint $Endpoint -Deployment $Deployment -ApiVersion $ApiVersion
$headers = @{
    'Content-Type' = 'application/json; charset=utf-8'
    'OpenAI-Debug' = 'true'
}
if (-not [string]::IsNullOrWhiteSpace($resolvedApiKey)) {
    $headers['api-key'] = $resolvedApiKey
}

$runMeta = [ordered]@{
    timestamp = (Get-Date).ToString('o')
    endpoint = $Endpoint
    apiVersion = $ApiVersion
    deployment = $Deployment
    uri = $uri
    outputRoot = $OutputRoot
    skipHttp = [bool]$SkipHttp
    apiKeyResolved = -not [string]::IsNullOrWhiteSpace($resolvedApiKey)
    variantNames = @((Get-VariantDefinitions).Name)
}
Save-JsonArtifact -Path (Join-Path $OutputRoot 'run-meta.json') -Data $runMeta

$results = @()
foreach ($variant in (Get-VariantDefinitions)) {
    Write-Host "== Running $($variant.Name)" -ForegroundColor Cyan
    $result = Invoke-Variant -Variant $variant -Uri $uri -Headers $headers -OutputFolder $OutputRoot -TimeoutSec $TimeoutSec -SkipHttp:$SkipHttp
    $results += [pscustomobject]$result
}

Save-JsonArtifact -Path (Join-Path $OutputRoot 'matrix-summary.json') -Data $results
Save-MarkdownSummary -Path (Join-Path $OutputRoot 'matrix-summary.md') -Results $results -Uri $uri

$results | Format-Table variant, status, httpStatusCode, finishReason, rawFirstMessageContentLength, rawFirstMessageContentType, isEmpty -AutoSize
Write-Host "Artifacts written to: $OutputRoot" -ForegroundColor Green