Public/Invoke-HierarchyProposal.ps1

# Copyright (c) 2026 Jeffrey Snover. All rights reserved.
# Licensed under the MIT License. See LICENSE file in the project root.

function Invoke-HierarchyProposal {
    <#
    .SYNOPSIS
        Proposes parent-child hierarchy for flat taxonomy nodes using embeddings, edges, and AI.
    .DESCRIPTION
        Processes each POV/category bucket: clusters nodes via embeddings, enriches clusters
        with edge and graph-attribute evidence, then sends each bucket to an AI model to
        propose parent nodes and child assignments. Outputs a proposal JSON file for human review.
    .EXAMPLE
        Invoke-HierarchyProposal
        Invoke-HierarchyProposal -POV accelerationist -Category 'Methods/Arguments'
        Invoke-HierarchyProposal -DryRun
    #>

    [CmdletBinding(SupportsShouldProcess)]
    param(
        [ValidateSet('accelerationist', 'safetyist', 'skeptic', 'cross-cutting')]
        [string]$POV = '',

        [ValidateSet('Goals/Values', 'Data/Facts', 'Methods/Arguments')]
        [string]$Category = '',

        [ValidateScript({ Test-AIModelId $_ })]
        [ArgumentCompleter({ Get-AIModelCompletion @args })]
        [string]$Model = 'gemini-2.5-flash',

        [string]$ApiKey = '',

        [ValidateRange(0.0, 1.0)]
        [double]$Temperature = 0.3,

        [ValidateRange(0.20, 0.80)]
        [double]$MinSimilarity = 0.40,

        [string]$OutputDir = '',

        [switch]$DryRun,
        [switch]$Force
    )

    Set-StrictMode -Version Latest
    $ErrorActionPreference = 'Stop'

    # ── Resolve paths ────────────────────────────────────────────────────────
    $TaxDir = Get-TaxonomyDir

    if ([string]::IsNullOrWhiteSpace($OutputDir)) {
        $OutputDir = Join-Path (Get-DataRoot) 'taxonomy' 'hierarchy-proposals'
    }
    if (-not (Test-Path $OutputDir)) {
        $null = New-Item -Path $OutputDir -ItemType Directory -Force
    }

    # ── Resolve API key ──────────────────────────────────────────────────────
    $Backend = if ($Model -match '^gemini') { 'gemini' }
               elseif ($Model -match '^claude') { 'claude' }
               elseif ($Model -match '^groq')   { 'groq' }
               else { 'gemini' }

    $ResolvedKey = Resolve-AIApiKey -ExplicitKey $ApiKey -Backend $Backend
    if (-not $ResolvedKey) {
        Write-Fail "No API key found for backend '$Backend'. Set the appropriate environment variable."
        return
    }

    # ── Load taxonomy files ──────────────────────────────────────────────────
    Write-Step 'Loading taxonomy data'

    $PovFileMap = @{
        accelerationist = 'accelerationist.json'
        safetyist       = 'safetyist.json'
        skeptic         = 'skeptic.json'
        'cross-cutting' = 'cross-cutting.json'
    }

    $AllTaxData = @{}
    foreach ($PovKey in $PovFileMap.Keys) {
        $FilePath = Join-Path $TaxDir $PovFileMap[$PovKey]
        if (Test-Path $FilePath) {
            $AllTaxData[$PovKey] = Get-Content -Raw -Path $FilePath | ConvertFrom-Json
            Write-OK "$PovKey`: $($AllTaxData[$PovKey].nodes.Count) nodes"
        }
    }

    # ── Load embeddings ──────────────────────────────────────────────────────
    Write-Step 'Loading embeddings'
    $Embeddings = @{}
    $EmbeddingsPath = Join-Path $TaxDir 'embeddings.json'
    if (Test-Path $EmbeddingsPath) {
        try {
            $EmbJson = Get-Content -Raw -Path $EmbeddingsPath | ConvertFrom-Json
            foreach ($Prop in $EmbJson.nodes.PSObject.Properties) {
                $Embeddings[$Prop.Name] = [double[]]@($Prop.Value.vector)
            }
            Write-OK "Loaded embeddings for $($Embeddings.Count) nodes"
        }
        catch {
            Write-Warn "Could not load embeddings: $($_.Exception.Message)"
        }
    }
    else {
        Write-Warn 'embeddings.json not found — clustering will be skipped'
    }

    # ── Load edges ───────────────────────────────────────────────────────────
    Write-Step 'Loading edges'
    $EdgesPath = Join-Path $TaxDir 'edges.json'
    $AllEdges  = @()
    if (Test-Path $EdgesPath) {
        try {
            $EdgesData = Get-Content -Raw -Path $EdgesPath | ConvertFrom-Json
            $AllEdges  = @($EdgesData.edges | Where-Object { $_.status -eq 'approved' })
            Write-OK "Loaded $($AllEdges.Count) approved edges"
        }
        catch {
            Write-Warn "Could not load edges: $($_.Exception.Message)"
        }
    }

    # ── Build processing buckets ─────────────────────────────────────────────
    Write-Step 'Building processing buckets'

    $Buckets = [System.Collections.Generic.List[PSObject]]::new()

    $PovList = if ($POV) { @($POV) } else { @('accelerationist', 'safetyist', 'skeptic', 'cross-cutting') }

    foreach ($PovKey in $PovList) {
        if (-not $AllTaxData.ContainsKey($PovKey)) { continue }
        $Nodes = @($AllTaxData[$PovKey].nodes)

        if ($PovKey -eq 'cross-cutting') {
            # Cross-cutting has no categories — one bucket
            if (-not $Category) {
                $Buckets.Add([PSCustomObject]@{
                    POV      = $PovKey
                    Category = $null
                    Nodes    = $Nodes
                })
            }
        }
        else {
            $Categories = if ($Category) { @($Category) } else { @('Goals/Values', 'Data/Facts', 'Methods/Arguments') }
            foreach ($Cat in $Categories) {
                $CatNodes = @($Nodes | Where-Object { $_.category -eq $Cat })
                if ($CatNodes.Count -ge 2) {
                    $Buckets.Add([PSCustomObject]@{
                        POV      = $PovKey
                        Category = $Cat
                        Nodes    = $CatNodes
                    })
                }
            }
        }
    }

    Write-OK "$($Buckets.Count) buckets to process"
    foreach ($B in $Buckets) {
        $CatLabel = if ($B.Category) { $B.Category } else { '(all)' }
        Write-Info "$($B.POV) / $CatLabel`: $($B.Nodes.Count) nodes"
    }

    # ── Load prompts ─────────────────────────────────────────────────────────
    $SystemPrompt = Get-Prompt -Name 'hierarchy-proposal'
    $SchemaPrompt = Get-Prompt -Name 'hierarchy-proposal-schema'

    # ── Process each bucket ──────────────────────────────────────────────────
    $AllProposals = [System.Collections.Generic.List[PSObject]]::new()
    $BucketNum    = 0

    foreach ($Bucket in $Buckets) {
        $BucketNum++
        $CatLabel = if ($Bucket.Category) { $Bucket.Category } else { '(all)' }
        Write-Step "Bucket $BucketNum/$($Buckets.Count): $($Bucket.POV) / $CatLabel ($($Bucket.Nodes.Count) nodes)"

        # ── Phase 1.1: Cluster ───────────────────────────────────────────────
        $NodeIds = @($Bucket.Nodes | ForEach-Object { $_.id })
        $HasEmbeddings = ($NodeIds | Where-Object { $Embeddings.ContainsKey($_) }).Count

        $Clusters = @()
        if ($HasEmbeddings -ge 2) {
            # Scale MaxClusters by bucket size
            $MaxClusters = if ($Bucket.Nodes.Count -lt 10) { 2 }
                           elseif ($Bucket.Nodes.Count -lt 20) { 4 }
                           elseif ($Bucket.Nodes.Count -lt 40) { 6 }
                           else { 8 }

            $Clusters = Get-EmbeddingClusters `
                -NodeIds       $NodeIds `
                -Embeddings    $Embeddings `
                -MaxClusters   $MaxClusters `
                -MinSimilarity $MinSimilarity

            Write-OK "Clustering produced $($Clusters.Count) clusters"
        }
        else {
            Write-Warn "Only $HasEmbeddings nodes have embeddings — skipping clustering"
            # Fallback: each node is its own cluster
            $Clusters = @($NodeIds | ForEach-Object { ,@($_) })
        }

        # ── Phase 1.2: Enrich with edge evidence ────────────────────────────
        $ClusterData = [System.Collections.Generic.List[PSObject]]::new()

        foreach ($ClusterIds in $Clusters) {
            $IdSet = [System.Collections.Generic.HashSet[string]]::new(
                [string[]]@($ClusterIds),
                [System.StringComparer]::OrdinalIgnoreCase
            )

            # Count intra-cluster edges by type
            $IntraEdges = @{}
            foreach ($E in $AllEdges) {
                if ($IdSet.Contains($E.source) -and $IdSet.Contains($E.target)) {
                    $Type = $E.type
                    if (-not $IntraEdges.ContainsKey($Type)) { $IntraEdges[$Type] = 0 }
                    $IntraEdges[$Type]++
                }
            }

            # Cohesion score: (supportive edges) / (possible pairs)
            $SupportiveCount = ($IntraEdges['SUPPORTS'] ?? 0) +
                               ($IntraEdges['ASSUMES'] ?? 0) +
                               ($IntraEdges['SUPPORTED_BY'] ?? 0)
            $PossiblePairs   = $ClusterIds.Count * ($ClusterIds.Count - 1)
            $Cohesion        = if ($PossiblePairs -gt 0) {
                [Math]::Round($SupportiveCount / $PossiblePairs, 2)
            } else { 0.0 }

            # ── Phase 1.3: Enrich with graph attribute patterns ──────────────
            $SharedEpistemicType   = $null
            $SharedRhetorical      = @()
            $AttributeCoherence    = 0.0

            $ClusterNodes = @($Bucket.Nodes | Where-Object { $IdSet.Contains($_.id) })
            $NodesWithGA  = @($ClusterNodes | Where-Object {
                $_.PSObject.Properties['graph_attributes'] -and $null -ne $_.graph_attributes
            })

            if ($NodesWithGA.Count -ge 2) {
                # Check shared epistemic_type
                $EpTypes = @($NodesWithGA | ForEach-Object {
                    if ($_.graph_attributes.PSObject.Properties['epistemic_type']) {
                        $_.graph_attributes.epistemic_type
                    }
                } | Where-Object { $_ })
                $TypeGroups = $EpTypes | Group-Object | Sort-Object Count -Descending
                if ($TypeGroups.Count -gt 0 -and $TypeGroups[0].Count -ge ($NodesWithGA.Count * 0.5)) {
                    $SharedEpistemicType = $TypeGroups[0].Name
                }

                # Check shared rhetorical strategies
                $AllStrategies = @($NodesWithGA | ForEach-Object {
                    if ($_.graph_attributes.PSObject.Properties['rhetorical_strategy']) {
                        $S = $_.graph_attributes.rhetorical_strategy
                        if ($S) { $S -split ',\s*' }
                    }
                } | Where-Object { $_ })
                $StratGroups = $AllStrategies | Group-Object | Sort-Object Count -Descending
                $SharedRhetorical = @($StratGroups |
                    Where-Object { $_.Count -ge ($NodesWithGA.Count * 0.4) } |
                    ForEach-Object { $_.Name })

                # Attribute coherence: fraction of attributes that match the dominant pattern
                $Matches = 0
                $Total   = 0
                foreach ($N in $NodesWithGA) {
                    $Total++
                    if ($SharedEpistemicType -and
                        $N.graph_attributes.PSObject.Properties['epistemic_type'] -and
                        $N.graph_attributes.epistemic_type -eq $SharedEpistemicType) {
                        $Matches++
                    }
                }
                $AttributeCoherence = if ($Total -gt 0) {
                    [Math]::Round($Matches / $Total, 2)
                } else { 0.0 }
            }

            $ClusterData.Add([PSCustomObject]@{
                cluster_id              = $ClusterData.Count
                node_ids                = @($ClusterIds)
                size                    = $ClusterIds.Count
                intra_edges             = $IntraEdges
                cohesion_score          = $Cohesion
                shared_epistemic_type   = $SharedEpistemicType
                shared_rhetorical       = $SharedRhetorical
                attribute_coherence     = $AttributeCoherence
            })
        }

        # ── Build AI prompt ──────────────────────────────────────────────────
        # Node context: id, label, description, graph_attributes summary
        $NodeContext = foreach ($Node in $Bucket.Nodes) {
            $Entry = [ordered]@{
                id          = $Node.id
                label       = $Node.label
                description = $Node.description
            }
            if ($Node.PSObject.Properties['graph_attributes'] -and $null -ne $Node.graph_attributes) {
                $GA = $Node.graph_attributes
                foreach ($AttrName in @('epistemic_type', 'rhetorical_strategy',
                                        'intellectual_lineage', 'audience', 'emotional_register')) {
                    if ($GA.PSObject.Properties[$AttrName] -and $null -ne $GA.$AttrName) {
                        $Entry[$AttrName] = $GA.$AttrName
                    }
                }
            }
            if ($Bucket.POV -eq 'cross-cutting' -and
                $Node.PSObject.Properties['interpretations']) {
                $Entry['interpretations'] = $Node.interpretations
            }
            $Entry
        }

        $ClusterContext = foreach ($C in $ClusterData) {
            [ordered]@{
                cluster_id            = $C.cluster_id
                node_ids              = $C.node_ids
                size                  = $C.size
                cohesion_score        = $C.cohesion_score
                intra_edges           = $C.intra_edges
                shared_epistemic_type = $C.shared_epistemic_type
                attribute_coherence   = $C.attribute_coherence
            }
        }

        $NodeJson    = $NodeContext    | ConvertTo-Json -Depth 10 -Compress:$false
        $ClusterJson = $ClusterContext | ConvertTo-Json -Depth 10 -Compress:$false

        $CatLine = if ($Bucket.Category) { "Category: $($Bucket.Category)" } else { 'Category: (none — cross-cutting)' }

        $UserPrompt = @"
POV: $($Bucket.POV)
$CatLine
Node count: $($Bucket.Nodes.Count)

--- NODES ---
$NodeJson

--- PRE-COMPUTED CLUSTERS ---
$ClusterJson

$SchemaPrompt
"@


        $FullPrompt = "$SystemPrompt`n`n$UserPrompt"

        if ($DryRun) {
            Write-Info 'DryRun — showing prompt for first bucket only'
            Write-Host ''
            Write-Host ($FullPrompt.Substring(0, [Math]::Min(3000, $FullPrompt.Length)))
            Write-Host "`n... (truncated, total $($FullPrompt.Length) chars)"
            if ($BucketNum -eq 1) { return }
            continue
        }

        # ── Call AI ──────────────────────────────────────────────────────────
        Write-Info "Calling $Model ..."
        $Stopwatch = [System.Diagnostics.Stopwatch]::StartNew()
        try {
            $Result = Invoke-AIApi `
                -Prompt      $FullPrompt `
                -Model       $Model `
                -ApiKey      $ResolvedKey `
                -Temperature $Temperature `
                -MaxTokens   16384 `
                -JsonMode `
                -TimeoutSec  180
        }
        catch {
            Write-Fail "API call failed for $($Bucket.POV)/$CatLabel`: $_"
            continue
        }
        $Stopwatch.Stop()
        Write-OK "Response in $([Math]::Round($Stopwatch.Elapsed.TotalSeconds, 1))s"

        # ── Parse response ───────────────────────────────────────────────────
        $ResponseText = $Result.Text -replace '^\s*```json\s*', '' -replace '\s*```\s*$', ''
        $Proposal = $null
        try {
            $Proposal = $ResponseText | ConvertFrom-Json -Depth 20
        }
        catch {
            Write-Warn 'JSON parse failed, attempting repair...'
            $Repaired = Repair-TruncatedJson -Text $ResponseText
            try {
                $Proposal = $Repaired | ConvertFrom-Json -Depth 20
            }
            catch {
                Write-Fail "Could not parse response for $($Bucket.POV)/$CatLabel"
                continue
            }
        }

        # ── Validate proposal ────────────────────────────────────────────────
        $AssignedIds  = [System.Collections.Generic.HashSet[string]]::new([System.StringComparer]::OrdinalIgnoreCase)
        $ParentCount  = 0
        $ChildCount   = 0
        $OutlierCount = 0

        if ($Proposal.PSObject.Properties['parents']) {
            foreach ($Parent in @($Proposal.parents)) {
                $ParentCount++

                # Track promoted nodes
                if ($Parent.promoted_from) {
                    [void]$AssignedIds.Add($Parent.promoted_from)
                }

                foreach ($Child in @($Parent.children)) {
                    if ($AssignedIds.Contains($Child.node_id)) {
                        Write-Warn "Duplicate assignment: $($Child.node_id)"
                    }
                    [void]$AssignedIds.Add($Child.node_id)
                    $ChildCount++
                }
            }
        }

        if ($Proposal.PSObject.Properties['outliers']) {
            foreach ($Outlier in @($Proposal.outliers)) {
                [void]$AssignedIds.Add($Outlier.node_id)
                $OutlierCount++
            }
        }

        # Check coverage
        $Missing = @($NodeIds | Where-Object { -not $AssignedIds.Contains($_) })
        if ($Missing.Count -gt 0) {
            Write-Warn "$($Missing.Count) nodes not assigned: $($Missing[0..([Math]::Min(4, $Missing.Count - 1))] -join ', ')"
        }

        Write-OK "Proposed $ParentCount parents, $ChildCount children, $OutlierCount outliers"

        # Attach metadata
        $Proposal | Add-Member -NotePropertyName '_metadata' -NotePropertyValue ([ordered]@{
            generated_at  = (Get-Date).ToString('o')
            model         = $Model
            temperature   = $Temperature
            min_similarity = $MinSimilarity
            node_count    = $Bucket.Nodes.Count
            cluster_count = $ClusterData.Count
            missing_nodes = $Missing
        }) -Force

        $AllProposals.Add($Proposal)
    }

    # ── Write output ─────────────────────────────────────────────────────────
    if ($AllProposals.Count -eq 0) {
        Write-Warn 'No proposals generated'
        return
    }

    $Timestamp  = (Get-Date).ToString('yyyy-MM-dd-HHmmss')
    $OutputFile = Join-Path $OutputDir "hierarchy-proposal-$Timestamp.json"

    $OutputObj = [ordered]@{
        generated_at = (Get-Date).ToString('o')
        model        = $Model
        buckets      = $AllProposals.ToArray()
    }

    $Json = $OutputObj | ConvertTo-Json -Depth 30
    if ($PSCmdlet.ShouldProcess($OutputFile, 'Write hierarchy proposal')) {
        Set-Content -Path $OutputFile -Value $Json -Encoding UTF8
        Write-Step 'Done'
        Write-OK "Proposal saved to $OutputFile"
    }

    # ── Generate review Markdown ─────────────────────────────────────────────
    $ReviewFile = Join-Path $OutputDir "hierarchy-review-$Timestamp.md"
    $Md = [System.Text.StringBuilder]::new()
    [void]$Md.AppendLine("# Hierarchy Proposal Review — $Timestamp")
    [void]$Md.AppendLine('')
    [void]$Md.AppendLine("**Model:** $Model | **Generated:** $(Get-Date -Format 'yyyy-MM-dd HH:mm')")
    [void]$Md.AppendLine('')

    foreach ($Proposal in $AllProposals) {
        $PovLabel = $Proposal.pov
        $CatLabel = if ($Proposal.PSObject.Properties['category'] -and $Proposal.category) {
            $Proposal.category
        } else { '(cross-cutting)' }

        [void]$Md.AppendLine("---")
        [void]$Md.AppendLine('')
        [void]$Md.AppendLine("## $PovLabel / $CatLabel")
        [void]$Md.AppendLine('')

        if ($Proposal.PSObject.Properties['parents']) {
            $ParentIdx = 0
            foreach ($Parent in @($Proposal.parents)) {
                $ParentIdx++
                $ParentLabel = if ($Parent.promoted_from) {
                    $PromotedNode = $null
                    foreach ($PovKey in $PovFileMap.Keys) {
                        if ($AllTaxData.ContainsKey($PovKey)) {
                            $PromotedNode = $AllTaxData[$PovKey].nodes |
                                Where-Object { $_.id -eq $Parent.promoted_from } |
                                Select-Object -First 1
                            if ($PromotedNode) { break }
                        }
                    }
                    if ($PromotedNode) { "$($PromotedNode.label) ($($Parent.promoted_from))" }
                    else { $Parent.promoted_from }
                }
                else { $Parent.label }

                $StatusTag = if ($Parent.promoted_from) { 'PROMOTED' } else { 'NEW' }

                [void]$Md.AppendLine("### Parent $ParentIdx`: $ParentLabel [$StatusTag]")
                [void]$Md.AppendLine('')
                if ($Parent.description) {
                    [void]$Md.AppendLine("> $($Parent.description)")
                    [void]$Md.AppendLine('')
                }

                [void]$Md.AppendLine('| Child ID | Label | Relationship | Rationale |')
                [void]$Md.AppendLine('|----------|-------|-------------|-----------|')
                foreach ($Child in @($Parent.children)) {
                    # Look up child label
                    $ChildLabel = $Child.node_id
                    foreach ($PovKey in $PovFileMap.Keys) {
                        if ($AllTaxData.ContainsKey($PovKey)) {
                            $Found = $AllTaxData[$PovKey].nodes |
                                Where-Object { $_.id -eq $Child.node_id } |
                                Select-Object -First 1
                            if ($Found) { $ChildLabel = $Found.label; break }
                        }
                    }
                    $Rationale = ($Child.rationale -replace '\|', '/') -replace '\n', ' '
                    [void]$Md.AppendLine("| $($Child.node_id) | $ChildLabel | $($Child.relationship) | $Rationale |")
                }
                [void]$Md.AppendLine('')
                [void]$Md.AppendLine('**Verdict:** [ ] Accept [ ] Modify [ ] Reject')
                [void]$Md.AppendLine('')
            }
        }

        if ($Proposal.PSObject.Properties['outliers'] -and $Proposal.outliers.Count -gt 0) {
            [void]$Md.AppendLine('### Outliers (no parent assigned)')
            [void]$Md.AppendLine('')
            [void]$Md.AppendLine('| Node ID | Label | Reason |')
            [void]$Md.AppendLine('|---------|-------|--------|')
            foreach ($Outlier in @($Proposal.outliers)) {
                $OLabel = $Outlier.node_id
                foreach ($PovKey in $PovFileMap.Keys) {
                    if ($AllTaxData.ContainsKey($PovKey)) {
                        $Found = $AllTaxData[$PovKey].nodes |
                            Where-Object { $_.id -eq $Outlier.node_id } |
                            Select-Object -First 1
                        if ($Found) { $OLabel = $Found.label; break }
                    }
                }
                $Reason = ($Outlier.reason -replace '\|', '/') -replace '\n', ' '
                [void]$Md.AppendLine("| $($Outlier.node_id) | $OLabel | $Reason |")
            }
            [void]$Md.AppendLine('')
        }

        if ($Proposal._metadata.missing_nodes.Count -gt 0) {
            [void]$Md.AppendLine("**Warning:** $($Proposal._metadata.missing_nodes.Count) nodes not assigned by AI: ``$($Proposal._metadata.missing_nodes -join '``, ``')``")
            [void]$Md.AppendLine('')
        }
    }

    if ($PSCmdlet.ShouldProcess($ReviewFile, 'Write review Markdown')) {
        Set-Content -Path $ReviewFile -Value $Md.ToString() -Encoding UTF8
        Write-OK "Review document saved to $ReviewFile"
    }

    return [PSCustomObject]@{
        ProposalFile = $OutputFile
        ReviewFile   = $ReviewFile
        BucketCount  = $AllProposals.Count
        TotalParents = ($AllProposals | ForEach-Object {
            if ($_.PSObject.Properties['parents']) { $_.parents.Count } else { 0 }
        } | Measure-Object -Sum).Sum
    }
}