AITriad

0.8.0

Public/Compare-DebateRuns.ps1

                                # Copyright (c) 2026 Jeffrey Snover. All rights reserved.

# Licensed under the MIT License. See LICENSE file in the project root.

<#

.SYNOPSIS

    Compares two debate session JSON files side-by-side for A/B evaluation.

.DESCRIPTION

    Extracts key quality metrics from two debate sessions and produces a

    structured comparison. Designed for evaluating the impact of parameter

    changes (e.g., truncation limit, model, rounds) on debate quality.

.PARAMETER SessionA

    Path to the first debate session JSON file.

.PARAMETER SessionB

    Path to the second debate session JSON file.

.PARAMETER LabelA

    Display label for session A (default: 'A').

.PARAMETER LabelB

    Display label for session B (default: 'B').

.PARAMETER PassThru

    Return the comparison object instead of printing a table.

.EXAMPLE

    Compare-DebateRuns -SessionA debate-50k.json -SessionB debate-100k.json -LabelA '50K' -LabelB '100K'

#>

function Compare-DebateRuns {

    [CmdletBinding()]

    param(

        [Parameter(Mandatory)][ValidateScript({ Test-Path $_ })][string]$SessionA,

        [Parameter(Mandatory)][ValidateScript({ Test-Path $_ })][string]$SessionB,

        [string]$LabelA = 'A',

        [string]$LabelB = 'B',

        [switch]$PassThru

    )

    Set-StrictMode -Version Latest

    function Get-DebateMetrics([string]$Path) {

        $S = Get-Content $Path -Raw | ConvertFrom-Json

        $AnNodes = 0; $AnEdges = 0

        if ($S.argument_network) {

            if ($S.argument_network.nodes) { $AnNodes = @($S.argument_network.nodes).Count }

            if ($S.argument_network.edges) { $AnEdges = @($S.argument_network.edges).Count }

        }

        $ClaimCoverage = 0

        if ($S.claim_coverage) {

            $Total = @($S.claim_coverage).Count

            $Discussed = @($S.claim_coverage | Where-Object { $_.discussed }).Count

            if ($Total -gt 0) { $ClaimCoverage = [Math]::Round($Discussed / $Total * 100, 1) }

        }

        $AcceptRate = 0

        if ($S.extraction_summary) {

            $AcceptRate = [Math]::Round(($S.extraction_summary.acceptance_rate ?? 0) * 100, 1)

        }

        $UniqueNodes = 0

        $AllNodeIds = [System.Collections.Generic.HashSet[string]]::new()

        if ($S.transcript) {

            foreach ($Entry in $S.transcript) {

                if ($Entry.taxonomy_refs) {

                    foreach ($Ref in $Entry.taxonomy_refs) {

                        [void]$AllNodeIds.Add($Ref.node_id ?? $Ref)

                    }

                }

            }

            $UniqueNodes = $AllNodeIds.Count

        }

        $TaxUtilization = 0

        if ($S.taxonomy_gap_analysis -and $S.taxonomy_gap_analysis.pov_coverage) {

            $Rates = @()

            foreach ($Pov in @('accelerationist','safetyist','skeptic')) {

                $PovData = $S.taxonomy_gap_analysis.pov_coverage.$Pov

                if ($PovData -and $PovData.PSObject.Properties['utilization_rate']) {

                    $Rates += $PovData.utilization_rate

                }

            }

            if ($Rates.Count -gt 0) { $TaxUtilization = [Math]::Round(($Rates | Measure-Object -Average).Average * 100, 1) }

        }

        $NeutralScore = 0

        if ($S.neutral_evaluations -and @($S.neutral_evaluations).Count -gt 0) {

            $Last = @($S.neutral_evaluations)[-1]

            if ($Last.overall_score) { $NeutralScore = [Math]::Round($Last.overall_score, 2) }

        }

        $TotalAiTime = 0

        if ($S.diagnostics -and $S.diagnostics.overview) {

            $TotalAiTime = [Math]::Round(($S.diagnostics.overview.total_response_time_ms ?? 0) / 1000, 1)

        }

        $CharsTruncated = 0; $SectionsLost = 0

        if ($S.context_rot -and $S.context_rot.stages) {

            $TruncStage = @($S.context_rot.stages | Where-Object { $_.stage -eq 'document_truncation' }) | Select-Object -First 1

            if ($TruncStage -and $TruncStage.flags) {

                $CharsTruncated = $TruncStage.flags.chars_truncated ?? 0

                $SectionsLost = $TruncStage.flags.sections_lost ?? 0

            }

        }

        $Retention = 0

        if ($S.context_rot) { $Retention = $S.context_rot.cumulative_retention ?? 0 }

        return [ordered]@{

            an_nodes         = $AnNodes

            an_edges         = $AnEdges

            claim_coverage   = $ClaimCoverage

            accept_rate      = $AcceptRate

            unique_nodes     = $UniqueNodes

            tax_utilization  = $TaxUtilization

            neutral_score    = $NeutralScore

            total_ai_time_s  = $TotalAiTime

            chars_truncated  = $CharsTruncated

            sections_lost    = $SectionsLost

            cumulative_retention = $Retention

        }

    }

    $MetricsA = Get-DebateMetrics $SessionA

    $MetricsB = Get-DebateMetrics $SessionB

    $Comparison = [ordered]@{

        label_a  = $LabelA

        label_b  = $LabelB

        file_a   = $SessionA

        file_b   = $SessionB

        metrics  = [ordered]@{}

    }

    foreach ($Key in $MetricsA.Keys) {

        $VA = $MetricsA[$Key]

        $VB = $MetricsB[$Key]

        $Delta = $VB - $VA

        $Comparison.metrics[$Key] = [ordered]@{ a = $VA; b = $VB; delta = $Delta }

    }

    if ($PassThru) { return $Comparison }

    # Pretty-print table

    $Labels = @{

        an_nodes         = 'AN nodes'

        an_edges         = 'AN edges'

        claim_coverage   = 'Claim coverage %'

        accept_rate      = 'Extraction accept %'

        unique_nodes     = 'Unique taxonomy nodes'

        tax_utilization  = 'Taxonomy utilization %'

        neutral_score    = 'Neutral eval score'

        total_ai_time_s  = 'Total AI time (s)'

        chars_truncated  = 'Chars truncated'

        sections_lost    = 'Sections lost'

        cumulative_retention = 'Context retention'

    }

    $Header = "  A/B COMPARISON: $LabelA vs $LabelB"

    $Sep = '  ' + ('-' * 55)

    Write-Host "`n$Header" -ForegroundColor White

    Write-Host $Sep -ForegroundColor Gray

    Write-Host ('  {0,-25} {1,8} {2,8} {3,8}' -f 'Metric', $LabelA, $LabelB, 'Delta') -ForegroundColor Cyan

    foreach ($Key in $MetricsA.Keys) {

        $VA = $MetricsA[$Key]

        $VB = $MetricsB[$Key]

        $Delta = $VB - $VA

        $DeltaStr = if ($Delta -gt 0) { "+$Delta" } elseif ($Delta -eq 0) { '=' } else { "$Delta" }

        $Color = if ($Key -eq 'total_ai_time_s' -or $Key -eq 'chars_truncated' -or $Key -eq 'sections_lost') {

            if ($Delta -le 0) { 'Green' } elseif ($Delta -gt 0) { 'Yellow' } else { 'Gray' }

        } else {

            if ($Delta -gt 0) { 'Green' } elseif ($Delta -lt 0) { 'Yellow' } else { 'Gray' }

        }

        Write-Host ('  {0,-25} {1,8} {2,8} ' -f ($Labels[$Key] ?? $Key), $VA, $VB) -ForegroundColor Gray -NoNewline

        Write-Host ('{0,8}' -f $DeltaStr) -ForegroundColor $Color

    }

    Write-Host $Sep -ForegroundColor Gray

}