Public/Get-IngestionPriority.ps1
|
# Copyright (c) 2026 Jeffrey Snover. All rights reserved. # Licensed under the MIT License. See LICENSE file in the project root. function Get-IngestionPriority { <# .SYNOPSIS Identifies and ranks research gaps to guide source ingestion priorities. .DESCRIPTION Scores taxonomy gaps by type (orphan nodes, one-sided conflicts, echo chambers, coverage imbalance, etc.) and ranks them to suggest which sources should be ingested next. Optionally calls an LLM to generate search queries per gap. .PARAMETER TopN Number of top gaps to return (1-50, default 10). .PARAMETER POV Filter to a single POV or 'all' (default). .PARAMETER OutputFile Optional path to write results as JSON. .PARAMETER NoAI Skip LLM-generated search queries; return raw ranked gaps only. .PARAMETER Model AI model override. .PARAMETER ApiKey AI API key override. .PARAMETER RepoRoot Path to the repository root. .EXAMPLE Get-IngestionPriority -NoAI .EXAMPLE Get-IngestionPriority -TopN 5 -OutputFile priority.json #> [CmdletBinding()] param( [ValidateRange(1, 50)] [int]$TopN = 10, [ValidateSet('accelerationist', 'safetyist', 'skeptic', 'all')] [string]$POV = 'all', [string]$OutputFile, [switch]$NoAI, [string]$Model, [string]$ApiKey, [string]$RepoRoot = $script:RepoRoot ) Set-StrictMode -Version Latest $ErrorActionPreference = 'Stop' if (-not $Model) { $Model = if ($env:AI_MODEL) { $env:AI_MODEL } else { 'gemini-3.1-flash-lite-preview' } } # ── Step 1: Gather health data ──────────────────────────────────────────── Write-Step 'Computing taxonomy health data with graph metrics' $Health = Get-TaxonomyHealthData -RepoRoot $RepoRoot -GraphMode # Build node label lookup $NodeLabelMap = @{} $NodePovMap = @{} foreach ($NC in $Health.NodeCitations) { $NodeLabelMap[$NC.Id] = $NC.Label $NodePovMap[$NC.Id] = $NC.POV } Write-OK "Scanned $($Health.SummaryCount) summaries, $($Health.NodeCitations.Count) nodes" # ── Step 2: Score gaps ──────────────────────────────────────────────────── Write-Step 'Scoring research gaps' $Gaps = [System.Collections.Generic.List[PSObject]]::new() $GapCounter = 0 # --- Orphan nodes (score 10) --- foreach ($Orphan in $Health.OrphanNodes) { if ($Orphan.POV -eq 'cross-cutting') { continue } if ($POV -ne 'all' -and $Orphan.POV -ne $POV) { continue } $GapCounter++ $Gaps.Add([PSCustomObject][ordered]@{ gap_id = "gap-$GapCounter" type = 'orphan_node' score = 10 pov = $Orphan.POV node_id = $Orphan.Id label = $Orphan.Label description = "Node [$($Orphan.Id)] '$($Orphan.Label)' has zero citations across all summaries." }) } # --- One-sided conflicts (score 8) --- $ConflictDir = Get-ConflictsDir if (Test-Path $ConflictDir) { foreach ($File in Get-ChildItem -Path $ConflictDir -Filter '*.json' -File) { try { $Conflict = Get-Content -Raw -Path $File.FullName | ConvertFrom-Json } catch { continue } $Instances = @($Conflict.instances) if ($Instances.Count -lt 2) { continue } $Stances = @($Instances | ForEach-Object { $_.stance } | Select-Object -Unique) if ($Stances.Count -eq 1) { $GapCounter++ $Gaps.Add([PSCustomObject][ordered]@{ gap_id = "gap-$GapCounter" type = 'one_sided_conflict' score = 8 pov = 'all' node_id = $Conflict.claim_id label = $Conflict.claim_label description = "Conflict '$($Conflict.claim_label)' has $($Instances.Count) instances all with stance '$($Stances[0])' — missing opposing viewpoint." }) } } } # --- High-frequency unmapped concepts (score 7) --- foreach ($UC in $Health.StrongCandidates) { $GapCounter++ $Gaps.Add([PSCustomObject][ordered]@{ gap_id = "gap-$GapCounter" type = 'unmapped_concept' score = 7 pov = if ($UC.SuggestedPov) { $UC.SuggestedPov } else { 'unknown' } node_id = $null label = $UC.Concept description = "Concept '$($UC.Concept)' appeared $($UC.Frequency) times across $($UC.ContributingDocs.Count) docs but is not mapped to any taxonomy node." }) } # --- Echo chamber nodes (score 6) --- if ($Health.GraphHealth -and $Health.GraphHealth.EchoChamberNodes) { foreach ($ECId in $Health.GraphHealth.EchoChamberNodes) { $ECPov = $NodePovMap[$ECId] if ($POV -ne 'all' -and $ECPov -ne $POV) { continue } $GapCounter++ $Gaps.Add([PSCustomObject][ordered]@{ gap_id = "gap-$GapCounter" type = 'echo_chamber_node' score = 6 pov = $ECPov node_id = $ECId label = if ($NodeLabelMap.ContainsKey($ECId)) { $NodeLabelMap[$ECId] } else { $ECId } description = "Node [$ECId] has many SUPPORTS edges but zero cross-POV CONTRADICTS — needs challenging sources." }) } } # --- Coverage imbalance (score 5) --- $Categories = @('Goals/Values', 'Data/Facts', 'Methods/Arguments') foreach ($Cat in $Categories) { $Counts = @(@('accelerationist', 'safetyist', 'skeptic') | ForEach-Object { $Health.CoverageBalance[$_][$Cat] }) $Min = ($Counts | Measure-Object -Minimum).Minimum $Max = ($Counts | Measure-Object -Maximum).Maximum if ($Min -gt 0 -and $Max / $Min -gt 2) { $WeakPov = @('accelerationist', 'safetyist', 'skeptic') | Where-Object { $Health.CoverageBalance[$_][$Cat] -eq $Min } | Select-Object -First 1 if ($POV -ne 'all' -and $WeakPov -ne $POV) { continue } $GapCounter++ $Gaps.Add([PSCustomObject][ordered]@{ gap_id = "gap-$GapCounter" type = 'coverage_imbalance' score = 5 pov = $WeakPov node_id = $null label = "$Cat coverage gap ($WeakPov)" description = "$WeakPov has only $Min nodes in $Cat vs max $Max — $([Math]::Round($Max/$Min, 1))x imbalance." }) } } # --- Single-POV citations (score 4) --- foreach ($NC in $Health.NodeCitations) { if ($NC.POV -eq 'cross-cutting') { continue } if ($NC.Citations -eq 0) { continue } if ($POV -ne 'all' -and $NC.POV -ne $POV) { continue } # Check if this node is only cited from docs tagged with a single POV $SourcesDir = Get-SourcesDir $DocPovs = [System.Collections.Generic.HashSet[string]]::new() foreach ($DId in $NC.DocIds) { $MetaPath = Join-Path $SourcesDir $DId 'metadata.json' if (Test-Path $MetaPath) { try { $Meta = Get-Content -Raw -Path $MetaPath | ConvertFrom-Json if ($Meta.pov_tags) { foreach ($PT in $Meta.pov_tags) { [void]$DocPovs.Add($PT) } } } catch { } } } if ($DocPovs.Count -eq 1) { $GapCounter++ $Gaps.Add([PSCustomObject][ordered]@{ gap_id = "gap-$GapCounter" type = 'single_pov_citations' score = 4 pov = $NC.POV node_id = $NC.Id label = $NC.Label description = "Node [$($NC.Id)] '$($NC.Label)' is only cited by docs tagged as '$($DocPovs | Select-Object -First 1)' — needs diverse sources." }) } } # Sort by score descending, take TopN $RankedGaps = @($Gaps | Sort-Object { $_.score } -Descending | Select-Object -First $TopN) Write-OK "Found $($Gaps.Count) total gaps, showing top $($RankedGaps.Count)" # ── Step 3: Optional AI search query generation ─────────────────────────── $AIRecommendations = $null if (-not $NoAI -and $RankedGaps.Count -gt 0) { Write-Step 'Generating search queries with AI' try { $Backend = if ($Model -match '^gemini') { 'gemini' } elseif ($Model -match '^claude') { 'claude' } elseif ($Model -match '^groq') { 'groq' } else { 'gemini' } $ResolvedKey = Resolve-AIApiKey -ExplicitKey $ApiKey -Backend $Backend if ([string]::IsNullOrWhiteSpace($ResolvedKey)) { Write-Warn "No API key found for $Backend — falling back to -NoAI mode" $NoAI = $true } else { $GapsText = ($RankedGaps | ForEach-Object { "- $($_.gap_id) [score=$($_.score), type=$($_.type), pov=$($_.pov)]: $($_.description)" }) -join "`n" $PromptBody = Get-Prompt -Name 'ingestion-priority' -Replacements @{ GAPS = $GapsText } $AIResult = Invoke-AIApi ` -Prompt $PromptBody ` -Model $Model ` -ApiKey $ResolvedKey ` -Temperature 0.2 ` -MaxTokens 4096 ` -JsonMode ` -TimeoutSec 120 ` -MaxRetries 3 ` -RetryDelays @(5, 15, 45) if ($AIResult -and $AIResult.Text) { $ResponseText = $AIResult.Text -replace '(?s)^```json\s*', '' -replace '(?s)\s*```$', '' $AIRecommendations = ($ResponseText | ConvertFrom-Json).recommendations Write-OK "AI generated $($AIRecommendations.Count) search recommendations ($($AIResult.Backend))" } else { Write-Warn "AI returned no result" } } } catch { Write-Warn "AI query generation failed: $_" } } # ── Step 4: Build result ────────────────────────────────────────────────── $ResultGaps = @($RankedGaps | ForEach-Object { $Gap = $_ $Entry = [ordered]@{ gap_id = $Gap.gap_id type = $Gap.type score = $Gap.score pov = $Gap.pov node_id = $Gap.node_id label = $Gap.label description = $Gap.description } if ($AIRecommendations) { $Rec = $AIRecommendations | Where-Object { $_.gap_id -eq $Gap.gap_id } | Select-Object -First 1 if ($Rec) { $Entry['search_query'] = $Rec.search_query $Entry['rationale'] = $Rec.rationale } } [PSCustomObject]$Entry }) $Result = [ordered]@{ generated_at = (Get-Date -Format 'o') total_gaps = $Gaps.Count shown = $ResultGaps.Count ai_enhanced = ($null -ne $AIRecommendations) gaps = $ResultGaps } # ── Step 5: Console output ──────────────────────────────────────────────── Write-Host "`n$('═' * 72)" -ForegroundColor Cyan Write-Host " INGESTION PRIORITY — $($Gaps.Count) gaps found, top $($ResultGaps.Count) shown" -ForegroundColor White Write-Host "$('═' * 72)" -ForegroundColor Cyan foreach ($G in $ResultGaps) { $ScoreColor = if ($G.score -ge 8) { 'Red' } elseif ($G.score -ge 6) { 'Yellow' } else { 'Gray' } Write-Host "`n [$($G.score.ToString().PadLeft(2))] $($G.type)" -ForegroundColor $ScoreColor -NoNewline Write-Host " ($($G.pov))" -ForegroundColor DarkGray Write-Host " $($G.label)" -ForegroundColor White Write-Host " $($G.description)" -ForegroundColor DarkGray if ($G.PSObject.Properties['search_query'] -and $G.search_query) { Write-Host " Search: $($G.search_query)" -ForegroundColor Cyan } if ($G.PSObject.Properties['rationale'] -and $G.rationale) { Write-Host " Why: $($G.rationale)" -ForegroundColor Gray } } Write-Host "`n$('═' * 72)" -ForegroundColor Cyan # ── JSON export ─────────────────────────────────────────────────────────── if ($OutputFile) { try { $Json = $Result | ConvertTo-Json -Depth 20 Set-Content -Path $OutputFile -Value $Json -Encoding UTF8 Write-OK "Exported to $OutputFile" } catch { Write-Warn "Failed to write $OutputFile — $($_.Exception.Message)" } } return $Result } |