Public/Invoke-TaxonomyProposal.ps1

# Copyright (c) 2026 Jeffrey Snover. All rights reserved.
# Licensed under the MIT License. See LICENSE file in the project root.

function Invoke-TaxonomyProposal {
    <#
    .SYNOPSIS
        Uses AI to generate structured taxonomy improvement proposals based on health data.
    .DESCRIPTION
        Feeds taxonomy health metrics (orphan nodes, unmapped concepts, stance variance,
        coverage imbalances) to an AI model which returns structured NEW/SPLIT/MERGE/RELABEL
        proposals in JSON format.

        Proposals are written to taxonomy/proposals/proposal-{timestamp}.json.
    .PARAMETER Model
        AI model to use. Defaults to env default or 'gemini-3.1-flash-lite-preview'.
    .PARAMETER ApiKey
        AI API key. If omitted, resolved via backend-specific env var or AI_API_KEY.
    .PARAMETER Temperature
        Sampling temperature (0.0-1.0). Default: 0.3 (slightly creative).
    .PARAMETER RepoRoot
        Path to the repository root. Defaults to the module-resolved repo root.
    .PARAMETER DryRun
        Build and display the prompt preview, but do NOT call the API or write files.
    .PARAMETER OutputFile
        Path for the proposal JSON. Defaults to taxonomy/proposals/proposal-{timestamp}.json.
    .PARAMETER HealthData
        Pre-computed health data hashtable from Get-TaxonomyHealth -PassThru.
        If omitted, health data is computed fresh.
    .EXAMPLE
        Invoke-TaxonomyProposal -DryRun
    .EXAMPLE
        Invoke-TaxonomyProposal -Model 'gemini-2.5-flash'
    .EXAMPLE
        $h = Get-TaxonomyHealth -PassThru
        Invoke-TaxonomyProposal -HealthData $h
    #>

    [CmdletBinding(SupportsShouldProcess)]
    param(
        [ValidateScript({ Test-AIModelId $_ })]
        [ArgumentCompleter({ param($cmd, $param, $word) $script:ValidModelIds | Where-Object { $_ -like "$word*" } })]
        [string]$Model       = 'gemini-3.1-flash-lite-preview',

        [string]$ApiKey      = '',

        [ValidateRange(0.0, 1.0)]
        [double]$Temperature = 0.3,

        [string]$RepoRoot    = $script:RepoRoot,
        [switch]$DryRun,
        [string]$OutputFile  = '',
        [hashtable]$HealthData = $null
    )

    Set-StrictMode -Version Latest
    $ErrorActionPreference = 'Stop'

    # ── 1. Validate environment ────────────────────────────────────────────────
    Write-Step "Validating environment"

    if (-not (Test-Path $RepoRoot)) {
        Write-Fail "Repo root not found: $RepoRoot"
        throw "Repo root not found: $RepoRoot"
    }

    if (-not $DryRun) {
        $Backend = if     ($Model -match '^gemini') { 'gemini' }
                   elseif ($Model -match '^claude') { 'claude' }
                   elseif ($Model -match '^groq')   { 'groq'   }
                   else                             { 'gemini'  }
        $ResolvedKey = Resolve-AIApiKey -ExplicitKey $ApiKey -Backend $Backend
        if ([string]::IsNullOrWhiteSpace($ResolvedKey)) {
            $EnvHint = switch ($Backend) {
                'gemini' { 'GEMINI_API_KEY' }
                'claude' { 'ANTHROPIC_API_KEY' }
                'groq'   { 'GROQ_API_KEY' }
                default  { 'AI_API_KEY' }
            }
            Write-Fail "No API key found for $Backend backend."
            Write-Info "Set $EnvHint or AI_API_KEY, or pass -ApiKey."
            throw "No API key found for $Backend backend."
        }
        $ApiKey = $ResolvedKey
    }

    Write-OK "Model : $Model"
    Write-OK "Temperature : $Temperature"
    if ($DryRun) { Write-Warn "DRY RUN — no API call, no file writes" }

    # ── 2. Compute or accept health data ───────────────────────────────────────
    Write-Step "Preparing health data"

    if ($HealthData) {
        Write-OK "Using pre-computed health data ($($HealthData.SummaryCount) summaries)"
    } else {
        $HealthData = Get-TaxonomyHealthData -RepoRoot $RepoRoot
        Write-OK "Computed fresh health data ($($HealthData.SummaryCount) summaries)"
    }

    # ── 3. Build compact data representations ──────────────────────────────────
    Write-Step "Building prompt context"

    # Taxonomy nodes (compact: id, pov, category, label, description only)
    $CompactNodes = @()
    foreach ($PovKey in @('accelerationist', 'safetyist', 'skeptic', 'cross-cutting')) {
        $Entry = $script:TaxonomyData[$PovKey]
        if (-not $Entry) { continue }
        foreach ($Node in $Entry.nodes) {
            $CompactNodes += @{
                id          = $Node.id
                pov         = $PovKey
                category    = if ($PovKey -eq 'cross-cutting') { 'Cross-Cutting' } else { $Node.category }
                label       = $Node.label
                description = $Node.description
            }
        }
    }
    $TaxonomyNodesJson = $CompactNodes | ConvertTo-Json -Depth 10 -Compress

    # Unmapped concepts (freq >= 2, or top 30)
    $UnmappedForPrompt = @($HealthData.UnmappedConcepts |
        Where-Object { $_.Frequency -ge 2 } |
        ForEach-Object {
            @{
                concept           = $_.Concept
                frequency         = $_.Frequency
                suggested_pov     = $_.SuggestedPov
                suggested_category = $_.SuggestedCategory
                contributing_docs = $_.ContributingDocs
                reasons           = $_.Reasons
            }
        })
    if ($UnmappedForPrompt.Count -eq 0) {
        $UnmappedForPrompt = @($HealthData.UnmappedConcepts | Select-Object -First 30 |
            ForEach-Object {
                @{
                    concept           = $_.Concept
                    frequency         = $_.Frequency
                    suggested_pov     = $_.SuggestedPov
                    suggested_category = $_.SuggestedCategory
                    contributing_docs = $_.ContributingDocs
                    reasons           = $_.Reasons
                }
            })
    }
    $UnmappedJson = $UnmappedForPrompt | ConvertTo-Json -Depth 10 -Compress

    # Citation stats: orphans, most-cited, high-variance
    $CitationStats = @{
        orphan_nodes = @($HealthData.OrphanNodes | ForEach-Object {
            @{ id = $_.Id; pov = $_.POV; category = $_.Category; label = $_.Label }
        })
        most_cited = @($HealthData.MostCited | ForEach-Object {
            @{ id = $_.Id; pov = $_.POV; label = $_.Label; citations = $_.Citations; doc_count = $_.DocIds.Count }
        })
        high_variance = @($HealthData.HighVarianceNodes | ForEach-Object {
            @{ id = $_.Id; pov = $_.POV; label = $_.Label; total_stances = $_.TotalStances; distribution = $_.Distribution }
        })
    }
    $CitationStatsJson = $CitationStats | ConvertTo-Json -Depth 10 -Compress

    # Coverage balance
    $CoverageBalanceJson = $HealthData.CoverageBalance | ConvertTo-Json -Depth 10 -Compress

    Write-OK "Compact nodes : $($CompactNodes.Count)"
    Write-OK "Unmapped for prompt : $($UnmappedForPrompt.Count)"
    Write-OK "Orphan nodes : $($CitationStats.orphan_nodes.Count)"
    Write-OK "High-variance nodes : $($CitationStats.high_variance.Count)"

    # ── 4. Load prompt template ────────────────────────────────────────────────
    $SystemPrompt = Get-Prompt -Name 'taxonomy-proposal' -Replacements @{
        TAXONOMY_VERSION = $HealthData.TaxonomyVersion
        SUMMARY_COUNT    = $HealthData.SummaryCount.ToString()
    }

    # ── 5. Assemble full prompt ────────────────────────────────────────────────
    $FullPrompt = @"
$SystemPrompt

=== HEALTH DATA ===

--- EXISTING TAXONOMY NODES ---
$TaxonomyNodesJson

--- UNMAPPED CONCEPTS (sorted by frequency) ---
$UnmappedJson

--- CITATION STATISTICS (orphans, most-cited, high-variance) ---
$CitationStatsJson

--- COVERAGE BALANCE (nodes per POV per category) ---
$CoverageBalanceJson
"@


    $PromptLength = $FullPrompt.Length
    $EstTokens    = [int]($PromptLength / 4)
    Write-OK "Prompt assembled: $PromptLength chars (~$EstTokens tokens est.)"

    # ── 6. DRY RUN — print and return ─────────────────────────────────────────
    if ($DryRun) {
        Write-Host "`n$('─' * 72)" -ForegroundColor DarkGray
        Write-Host " DRY RUN: PROMPT PREVIEW" -ForegroundColor Yellow
        Write-Host "$('─' * 72)" -ForegroundColor DarkGray

        Write-Host "`n[SYSTEM PROMPT — first 800 chars]" -ForegroundColor Cyan
        Write-Host $SystemPrompt.Substring(0, [Math]::Min(800, $SystemPrompt.Length)) -ForegroundColor Gray
        Write-Host "... (truncated for display)" -ForegroundColor DarkGray

        Write-Host "`n[TAXONOMY NODES — $($CompactNodes.Count) nodes, first 400 chars]" -ForegroundColor Cyan
        Write-Host $TaxonomyNodesJson.Substring(0, [Math]::Min(400, $TaxonomyNodesJson.Length)) -ForegroundColor Gray
        Write-Host "..." -ForegroundColor DarkGray

        Write-Host "`n[UNMAPPED CONCEPTS — $($UnmappedForPrompt.Count) entries]" -ForegroundColor Cyan
        $UnmappedPreview = $UnmappedJson.Substring(0, [Math]::Min(400, $UnmappedJson.Length))
        Write-Host $UnmappedPreview -ForegroundColor Gray
        Write-Host "..." -ForegroundColor DarkGray

        Write-Host "`n[CITATION STATISTICS]" -ForegroundColor Cyan
        Write-Host $CitationStatsJson.Substring(0, [Math]::Min(400, $CitationStatsJson.Length)) -ForegroundColor Gray
        Write-Host "..." -ForegroundColor DarkGray

        Write-Host "`n[COVERAGE BALANCE]" -ForegroundColor Cyan
        Write-Host $CoverageBalanceJson -ForegroundColor Gray

        Write-Host "`n$('─' * 72)" -ForegroundColor DarkGray
        Write-Host " DRY RUN complete. No API call made. No files written." -ForegroundColor Yellow
        Write-Host "$('─' * 72)`n" -ForegroundColor DarkGray
        return
    }

    # ── 7. Call Invoke-AIApi ───────────────────────────────────────────────────
    Write-Step "Calling AI API ($Model)"

    $StartTime = Get-Date
    Write-Info "Sending request..."

    $AiResult = Invoke-AIApi `
        -Prompt      $FullPrompt `
        -Model       $Model `
        -ApiKey      $ApiKey `
        -Temperature $Temperature `
        -MaxTokens   16384 `
        -JsonMode `
        -TimeoutSec  120

    if ($null -eq $AiResult) {
        throw "AI API call returned null"
    }

    $Elapsed = (Get-Date) - $StartTime
    Write-OK "Response received from $($AiResult.Backend) in $([int]$Elapsed.TotalSeconds)s"

    # ── 8. Parse and validate response ─────────────────────────────────────────
    Write-Step "Parsing AI response"

    $RawText     = $AiResult.Text
    $CleanedText = $RawText -replace '(?s)^```json\s*', '' -replace '(?s)\s*```$', ''
    $CleanedText = $CleanedText.Trim()

    try {
        $ProposalObject = $CleanedText | ConvertFrom-Json -Depth 20
        Write-OK "Valid JSON received"
    }
    catch {
        Write-Warn "JSON parse failed — attempting repair"
        $Repaired = Repair-TruncatedJson -Text $RawText
        if ($Repaired) {
            try {
                $ProposalObject = $Repaired | ConvertFrom-Json -Depth 20
                Write-OK "JSON repaired successfully"
            }
            catch {
                $ProposalObject = $null
            }
        }
        if ($null -eq $ProposalObject) {
            $DebugPath = Join-Path $RepoRoot 'taxonomy' 'proposals' "proposal-debug-$(Get-Date -Format 'yyyyMMdd-HHmmss').txt"
            $ProposalsDir = Join-Path $RepoRoot 'taxonomy' 'proposals'
            if (-not (Test-Path $ProposalsDir)) { New-Item -ItemType Directory -Path $ProposalsDir -Force | Out-Null }
            Set-Content -Path $DebugPath -Value $RawText -Encoding UTF8
            Write-Fail "AI returned invalid JSON. Raw response saved: $DebugPath"
            throw "AI returned invalid JSON for taxonomy proposal"
        }
    }

    # Validate presence of proposals array
    if (-not $ProposalObject.proposals) {
        Write-Warn "Response missing 'proposals' array — may be empty or malformed"
        $ProposalObject | Add-Member -NotePropertyName 'proposals' -NotePropertyValue @() -ErrorAction SilentlyContinue
    }

    $ProposalCount = $ProposalObject.proposals.Count
    Write-OK "$ProposalCount proposal(s) generated"

    # ── 9. Write proposal file ─────────────────────────────────────────────────
    Write-Step "Writing proposal file"

    $ProposalsDir = Join-Path $RepoRoot 'taxonomy' 'proposals'
    if (-not (Test-Path $ProposalsDir)) {
        New-Item -ItemType Directory -Path $ProposalsDir -Force | Out-Null
    }

    $Timestamp = Get-Date -Format 'yyyyMMdd-HHmmss'
    if (-not $OutputFile) {
        $OutputFile = Join-Path $ProposalsDir "proposal-$Timestamp.json"
    }

    # Enrich with metadata
    $FinalProposal = [ordered]@{
        generated_at     = (Get-Date -Format 'yyyy-MM-ddTHH:mm:ssZ')
        model            = $Model
        taxonomy_version = $HealthData.TaxonomyVersion
        summary_count    = $HealthData.SummaryCount
        proposals        = $ProposalObject.proposals
    }

    $ProposalJson = $FinalProposal | ConvertTo-Json -Depth 20
    try {
        Set-Content -Path $OutputFile -Value $ProposalJson -Encoding UTF8
        Write-OK "Proposal written to: $OutputFile"
    }
    catch {
        Write-Fail "Failed to write proposal file — $($_.Exception.Message)"
        Write-Info "Proposal data was generated but NOT saved. Check path and permissions."
        throw
    }

    # ── 10. Print human-readable summary ───────────────────────────────────────
    Write-Host "`n$('═' * 72)" -ForegroundColor Cyan
    Write-Host " TAXONOMY PROPOSALS" -ForegroundColor White
    Write-Host " Model: $Model | Taxonomy v$($HealthData.TaxonomyVersion) | $ProposalCount proposal(s)" -ForegroundColor Gray
    Write-Host "$('═' * 72)" -ForegroundColor Cyan

    $ActionTypes = @('NEW', 'SPLIT', 'MERGE', 'RELABEL')
    foreach ($Action in $ActionTypes) {
        $Group = @($ProposalObject.proposals | Where-Object { $_.action -eq $Action })
        if ($Group.Count -eq 0) { continue }

        $ActionColor = switch ($Action) {
            'NEW'     { 'Green'   }
            'SPLIT'   { 'Cyan'    }
            'MERGE'   { 'Yellow'  }
            'RELABEL' { 'Magenta' }
        }

        Write-Host "`n [$Action] ($($Group.Count))" -ForegroundColor $ActionColor

        foreach ($P in $Group) {
            $IdStr = if ($P.suggested_id) { "[$($P.suggested_id)]" } else { '' }
            $TargetStr = if ($P.target_node_id) { " (target: $($P.target_node_id))" } else { '' }
            Write-Host " $IdStr $($P.label)$TargetStr" -ForegroundColor White
            Write-Host " POV: $($P.pov) | Category: $($P.category)" -ForegroundColor Gray
            if ($P.rationale) {
                $RatSnippet = if ($P.rationale.Length -gt 120) {
                    $P.rationale.Substring(0, 120) + '...'
                } else { $P.rationale }
                Write-Host " Rationale: $RatSnippet" -ForegroundColor DarkGray
            }
            if ($P.PSObject.Properties['children'] -and $P.children.Count -gt 0) {
                Write-Host " Children:" -ForegroundColor Gray
                foreach ($Child in $P.children) {
                    Write-Host " [$($Child.suggested_id)] $($Child.label)" -ForegroundColor Gray
                }
            }
            if ($P.PSObject.Properties['merge_node_ids'] -and $P.merge_node_ids.Count -gt 0) {
                Write-Host " Merging: $($P.merge_node_ids -join ', ') → $($P.surviving_node_id)" -ForegroundColor Gray
            }
        }
    }

    Write-Host "`n$('═' * 72)" -ForegroundColor Cyan
    Write-Host " Output: $OutputFile" -ForegroundColor Green
    Write-Host "$('═' * 72)`n" -ForegroundColor Cyan
}