Public/Get-TaxonomyHealth.ps1

# Copyright (c) 2026 Jeffrey Snover. All rights reserved.
# Licensed under the MIT License. See LICENSE file in the project root.

function Get-TaxonomyHealth {
    <#
    .SYNOPSIS
        Displays a diagnostic report on taxonomy coverage and usage across all summaries.
    .DESCRIPTION
        Scans every summary JSON against the taxonomy to surface:
        - Orphan nodes (zero citations)
        - Most/least cited nodes
        - Unmapped concept frequency
        - Stance variance (nodes cited with both aligned and opposed stances)
        - Coverage balance across POVs and categories
        - Cross-cutting reference health

        No AI calls are made — this is a purely offline diagnostic.
    .PARAMETER RepoRoot
        Path to the repository root. Defaults to the module-resolved repo root.
    .PARAMETER OutputFile
        Optional path to write the full health data as JSON.
    .PARAMETER Detailed
        Show per-node and per-document breakdowns.
    .PARAMETER GraphMode
        Include graph-structural health metrics (echo chambers, cross-POV connectivity, etc.).
    .PARAMETER PassThru
        Return the health data hashtable for piping to other commands.
    .EXAMPLE
        Get-TaxonomyHealth
    .EXAMPLE
        Get-TaxonomyHealth -GraphMode
    .EXAMPLE
        Get-TaxonomyHealth -Detailed -OutputFile health.json
    .EXAMPLE
        $h = Get-TaxonomyHealth -PassThru
        Invoke-TaxonomyProposal -HealthData $h
    #>

    [CmdletBinding()]
    param(
        [string]$RepoRoot   = $script:RepoRoot,
        [string]$OutputFile  = '',
        [switch]$Detailed,
        [switch]$GraphMode,
        [switch]$PassThru
    )

    Set-StrictMode -Version Latest
    $ErrorActionPreference = 'Stop'

    # ── Compute health data ────────────────────────────────────────────────────
    Write-Step "Computing taxonomy health data"
    $HealthParams = @{ RepoRoot = $RepoRoot }
    if ($GraphMode) { $HealthParams['GraphMode'] = $true }
    $Health = Get-TaxonomyHealthData @HealthParams
    Write-OK "Scanned $($Health.SummaryCount) summaries against taxonomy v$($Health.TaxonomyVersion)"

    # ── 1. Summary Statistics ──────────────────────────────────────────────────
    $Stats = $Health.SummaryStats

    Write-Host "`n$('═' * 72)" -ForegroundColor Cyan
    Write-Host " TAXONOMY HEALTH REPORT" -ForegroundColor White
    Write-Host " Taxonomy v$($Health.TaxonomyVersion) | $($Health.SummaryCount) summaries | $(Get-Date -Format 'yyyy-MM-dd HH:mm')" -ForegroundColor Gray
    Write-Host "$('═' * 72)" -ForegroundColor Cyan

    Write-Host "`n SUMMARY STATISTICS" -ForegroundColor White
    Write-Host " $('─' * 40)" -ForegroundColor DarkGray
    Write-Info "Total key points : $($Stats.TotalKeyPoints)"
    Write-Info "Avg key points/doc : $($Stats.AvgKeyPoints)"
    Write-Info "Total factual claims : $($Stats.TotalClaims)"
    Write-Info "Total unmapped : $($Stats.TotalUnmapped)"
    if ($Stats.MaxKeyPointsDoc) {
        Write-Info "Most points : $($Stats.MaxKeyPointsDoc.KeyPoints) ($($Stats.MaxKeyPointsDoc.DocId))"
    }
    if ($Stats.MinKeyPointsDoc) {
        Write-Info "Fewest points : $($Stats.MinKeyPointsDoc.KeyPoints) ($($Stats.MinKeyPointsDoc.DocId))"
    }

    # ── 2. Coverage Balance ────────────────────────────────────────────────────
    Write-Host "`n COVERAGE BALANCE (nodes per POV x category)" -ForegroundColor White
    Write-Host " $('─' * 40)" -ForegroundColor DarkGray

    $Categories = @('Goals/Values', 'Data/Facts', 'Methods/Arguments')
    $PovKeys    = @('accelerationist', 'safetyist', 'skeptic')

    # Header row
    $Header = ' {0,-16}' -f ''
    foreach ($Cat in $Categories) {
        $Header += '{0,14}' -f $Cat
    }
    Write-Host $Header -ForegroundColor Gray

    # Data rows
    $AllCounts = [System.Collections.Generic.List[int]]::new()
    foreach ($Pov in $PovKeys) {
        $Row = ' {0,-16}' -f $Pov
        foreach ($Cat in $Categories) {
            $Count = $Health.CoverageBalance[$Pov][$Cat]
            $AllCounts.Add($Count)
            $Row += '{0,14}' -f $Count
        }
        Write-Host $Row -ForegroundColor White
    }

    # Check for imbalances (ratio > 2x between min and max in same category)
    foreach ($Cat in $Categories) {
        $Counts = @($PovKeys | ForEach-Object { $Health.CoverageBalance[$_][$Cat] })
        $Min = ($Counts | Measure-Object -Minimum).Minimum
        $Max = ($Counts | Measure-Object -Maximum).Maximum
        if ($Min -gt 0 -and $Max / $Min -gt 2) {
            Write-Warn "$Cat : imbalance detected (range $Min-$Max, ratio $([math]::Round($Max/$Min,1))x)"
        }
    }

    # ── 3. Most-Cited Nodes ────────────────────────────────────────────────────
    Write-Host "`n MOST-CITED NODES (top 10)" -ForegroundColor White
    Write-Host " $('─' * 40)" -ForegroundColor DarkGray

    foreach ($Node in $Health.MostCited) {
        $Tag = "[$($Node.Id)] ($($Node.POV))"
        Write-Host " $($Node.Citations.ToString().PadLeft(3)) citations $Tag" -ForegroundColor Green
        Write-Host " $($Node.Label)" -ForegroundColor Gray
    }

    if ($Health.MostCited.Count -eq 0) {
        Write-Info "(no citations found)"
    }

    # ── 4. Orphan Nodes ───────────────────────────────────────────────────────
    Write-Host "`n ORPHAN NODES (zero citations)" -ForegroundColor White
    Write-Host " $('─' * 40)" -ForegroundColor DarkGray

    $PovOrphans = $Health.OrphanNodes | Where-Object { $_.POV -ne 'cross-cutting' }
    if ($PovOrphans.Count -gt 0) {
        foreach ($Node in ($PovOrphans | Sort-Object POV, Id)) {
            Write-Host " [$($Node.Id)] ($($Node.POV)) $($Node.Label)" -ForegroundColor Yellow
        }
        Write-Warn "$($PovOrphans.Count) POV node(s) have zero citations"
    } else {
        Write-OK "All POV nodes have at least one citation"
    }

    # ── 5. High Stance Variance ───────────────────────────────────────────────
    Write-Host "`n HIGH STANCE VARIANCE (both aligned-family AND opposed-family)" -ForegroundColor White
    Write-Host " $('─' * 40)" -ForegroundColor DarkGray

    if ($Health.HighVarianceNodes.Count -gt 0) {
        foreach ($HV in $Health.HighVarianceNodes) {
            Write-Host " [$($HV.Id)] ($($HV.POV)) $($HV.Label)" -ForegroundColor Magenta
            $DistStr = ($HV.Distribution.GetEnumerator() | Sort-Object Name |
                ForEach-Object { "$($_.Key):$($_.Value)" }) -join ' '
            Write-Host " Stances ($($HV.TotalStances) total): $DistStr" -ForegroundColor DarkGray
        }
    } else {
        Write-OK "No nodes with high stance variance"
    }

    # ── 6. Unmapped Concept Frequency ─────────────────────────────────────────
    $DisplayLimit = if ($Detailed) { $Health.UnmappedConcepts.Count } else { 20 }

    Write-Host "`n UNMAPPED CONCEPTS (top $DisplayLimit by frequency)" -ForegroundColor White
    Write-Host " $('─' * 40)" -ForegroundColor DarkGray

    $Shown = 0
    foreach ($UC in $Health.UnmappedConcepts) {
        if ($Shown -ge $DisplayLimit) { break }
        $FreqStr = $UC.Frequency.ToString().PadLeft(2)
        $PovTag  = if ($UC.SuggestedPov) { "[$($UC.SuggestedPov)]" } else { '' }
        $Color   = if ($UC.Frequency -ge 3) { 'Red' } else { 'Yellow' }
        Write-Host " ${FreqStr}x $PovTag $($UC.Concept)" -ForegroundColor $Color
        $Shown++
    }

    if ($Health.StrongCandidates.Count -gt 0) {
        Write-Warn "$($Health.StrongCandidates.Count) concept(s) at frequency >= 3 — strong candidates for new nodes"
    }

    if ($Health.UnmappedConcepts.Count -eq 0) {
        Write-OK "No unmapped concepts found"
    }

    # ── 7. Cross-Cutting Reference Health ─────────────────────────────────────
    $CC = $Health.CrossCuttingHealth

    Write-Host "`n CROSS-CUTTING REFERENCE HEALTH" -ForegroundColor White
    Write-Host " $('─' * 40)" -ForegroundColor DarkGray

    Write-Info "Total cc nodes : $($CC.TotalNodes)"
    Write-Info "Referenced : $($CC.ReferencedCount)"
    Write-Info "Orphaned : $($CC.OrphanedCount)"

    if ($CC.OrphanedCount -gt 0) {
        foreach ($OrphanCC in ($CC.Orphaned | Sort-Object Id)) {
            Write-Host " [$($OrphanCC.Id)] $($OrphanCC.Label)" -ForegroundColor Yellow
        }
    }

    # ── 8. Graph Health (GraphMode only) ────────────────────────────────────────
    if ($GraphMode -and $Health.GraphHealth) {
        $GH = $Health.GraphHealth

        # Build label lookup for display
        $NodeLabelMap = @{}
        foreach ($NC in $Health.NodeCitations) { $NodeLabelMap[$NC.Id] = $NC.Label }

        Write-Host "`n GRAPH STRUCTURAL HEALTH" -ForegroundColor White
        Write-Host " $('─' * 40)" -ForegroundColor DarkGray

        # Echo chamber scores
        Write-Host "`n Echo Chamber Scores (intra-POV SUPPORTS:CONTRADICTS):" -ForegroundColor Cyan
        foreach ($PovKey in @('accelerationist', 'safetyist', 'skeptic')) {
            $EC = $GH.EchoChamberScores[$PovKey]
            $RatioStr = if ($EC.Ratio -eq [double]::PositiveInfinity) { 'Inf (no contradicts)' } else { "$($EC.Ratio):1" }
            $Color = if ($EC.Ratio -ge 10 -or $EC.Ratio -eq [double]::PositiveInfinity) { 'Red' }
                     elseif ($EC.Ratio -ge 5) { 'Yellow' }
                     else { 'Green' }
            Write-Host " $($PovKey.PadRight(18)) $($EC.SamePovSupports) supports / $($EC.SamePovContradicts) contradicts = $RatioStr" -ForegroundColor $Color
        }

        # Cross-POV connectivity
        Write-Host "`n Cross-POV Connectivity:" -ForegroundColor Cyan
        $CPov = $GH.CrossPovConnectivity
        $ConnColor = if ($CPov.Percentage -ge 50) { 'Green' } elseif ($CPov.Percentage -ge 30) { 'Yellow' } else { 'Red' }
        Write-Info "$($CPov.CrossPovEdges) / $($CPov.TotalEdges) edges cross POV boundaries ($($CPov.Percentage)%)"

        # Edge orphans
        if ($GH.EdgeOrphanCount -gt 0) {
            Write-Host "`n Edge Orphans ($($GH.EdgeOrphanCount) nodes with zero edges):" -ForegroundColor Yellow
            foreach ($OId in $GH.EdgeOrphans | Select-Object -First 10) {
                $OLabel = if ($NodeLabelMap.ContainsKey($OId)) { $NodeLabelMap[$OId] } else { $OId }
                Write-Host " $OId — $OLabel" -ForegroundColor DarkGray
            }
            if ($GH.EdgeOrphanCount -gt 10) {
                Write-Host " ... and $($GH.EdgeOrphanCount - 10) more" -ForegroundColor DarkGray
            }
        }
        else {
            Write-OK "No edge orphans — all nodes have at least one edge"
        }

        # Hub concentration
        $HC = $GH.HubConcentration
        $GiniColor = if ($HC.GiniCoefficient -ge 0.5) { 'Yellow' } else { 'Green' }
        Write-Host "`n Hub Concentration:" -ForegroundColor Cyan
        Write-Host " Gini coefficient : $($HC.GiniCoefficient)" -ForegroundColor $GiniColor
        Write-Info "Max degree: $($HC.MaxDegree) | Median degree: $($HC.MedianDegree)"

        # Missing edge type pairs
        if ($GH.MissingEdgeTypePairs.Count -gt 0) {
            Write-Host "`n Missing Edge Types ($($GH.MissingEdgeTypePairs.Count) cross-POV pairs with SUPPORTS but no CONTRADICTS):" -ForegroundColor Yellow
            foreach ($Pair in $GH.MissingEdgeTypePairs.SupportsNoContradicts | Select-Object -First 10) {
                Write-Host " $Pair" -ForegroundColor DarkGray
            }
            if ($GH.MissingEdgeTypePairs.Count -gt 10) {
                Write-Host " ... and $($GH.MissingEdgeTypePairs.Count - 10) more" -ForegroundColor DarkGray
            }
        }

        # Echo chamber nodes
        if ($GH.EchoChamberNodeCount -gt 0) {
            Write-Host "`n Echo Chamber Nodes ($($GH.EchoChamberNodeCount) with 3+ SUPPORTS, 0 cross-POV CONTRADICTS):" -ForegroundColor Yellow
            foreach ($ECId in $GH.EchoChamberNodes | Select-Object -First 10) {
                $ECLabel = if ($NodeLabelMap.ContainsKey($ECId)) { $NodeLabelMap[$ECId] } else { $ECId }
                Write-Host " $ECId — $ECLabel" -ForegroundColor DarkGray
            }
            if ($GH.EchoChamberNodeCount -gt 10) {
                Write-Host " ... and $($GH.EchoChamberNodeCount - 10) more" -ForegroundColor DarkGray
            }
        }
        else {
            Write-OK "No echo chamber nodes detected"
        }
    }

    # ── 9. Per-Document Breakdown (Detailed only) ──────────────────────────────
    if ($Detailed) {
        Write-Host "`n PER-DOCUMENT BREAKDOWN" -ForegroundColor White
        Write-Host " $('─' * 40)" -ForegroundColor DarkGray

        $PerDoc = $Stats.PerDoc | Sort-Object { $_.KeyPoints } -Descending
        foreach ($Doc in $PerDoc) {
            $TitleStr = if ($Doc.Title) { $Doc.Title } else { $Doc.DocId }
            Write-Host " $($Doc.KeyPoints.ToString().PadLeft(3)) pts $($Doc.FactualClaims.ToString().PadLeft(2)) claims $($Doc.UnmappedCount.ToString().PadLeft(2)) unmapped $TitleStr" -ForegroundColor Gray
        }
    }

    Write-Host "`n$('═' * 72)" -ForegroundColor Cyan

    # ── Optional JSON export ───────────────────────────────────────────────────
    if ($OutputFile) {
        # Convert to serializable structure
        $ExportData = @{
            taxonomy_version    = $Health.TaxonomyVersion
            summary_count       = $Health.SummaryCount
            generated_at        = $Health.GeneratedAt
            node_citations      = @($Health.NodeCitations | ForEach-Object {
                @{ id = $_.Id; pov = $_.POV; category = $_.Category; label = $_.Label; citations = $_.Citations; doc_ids = $_.DocIds }
            })
            orphan_nodes        = @($Health.OrphanNodes | ForEach-Object { $_.Id })
            most_cited          = @($Health.MostCited | ForEach-Object {
                @{ id = $_.Id; label = $_.Label; citations = $_.Citations }
            })
            unmapped_concepts   = @($Health.UnmappedConcepts | ForEach-Object {
                @{ concept = $_.Concept; frequency = $_.Frequency; suggested_pov = $_.SuggestedPov; suggested_category = $_.SuggestedCategory; contributing_docs = $_.ContributingDocs }
            })
            strong_candidates   = @($Health.StrongCandidates | ForEach-Object { $_.Concept })
            high_variance_nodes = @($Health.HighVarianceNodes | ForEach-Object {
                @{ id = $_.Id; pov = $_.POV; label = $_.Label; distribution = $_.Distribution }
            })
            coverage_balance    = $Health.CoverageBalance
            cross_cutting_health = @{
                total      = $CC.TotalNodes
                referenced = $CC.ReferencedCount
                orphaned   = @($CC.Orphaned | ForEach-Object { $_.Id })
            }
            summary_stats       = @{
                total_docs       = $Stats.TotalDocs
                total_key_points = $Stats.TotalKeyPoints
                avg_key_points   = $Stats.AvgKeyPoints
                total_claims     = $Stats.TotalClaims
                total_unmapped   = $Stats.TotalUnmapped
            }
        }

        if ($Health.GraphHealth) {
            $ExportData['graph_health'] = $Health.GraphHealth
        }

        try {
            $JsonOutput = $ExportData | ConvertTo-Json -Depth 20
            Set-Content -Path $OutputFile -Value $JsonOutput -Encoding UTF8
            Write-OK "Health data exported to: $OutputFile"
        }
        catch {
            Write-Warn "Failed to write $OutputFile — $($_.Exception.Message)"
        }
    }

    # ── PassThru ───────────────────────────────────────────────────────────────
    if ($PassThru) {
        return $Health
    }
}