Private/Get-FilteredCandidates.ps1
|
# Copyright (c) 2026 Jeffrey Snover. All rights reserved. # Licensed under the MIT License. See LICENSE file in the project root. # Embedding-based candidate pre-filtering for edge discovery. # Dot-sourced by AITriad.psm1 — do NOT export. function Get-CosineSimilarity { param([double[]]$A, [double[]]$B) $Dot = 0.0; $NormA = 0.0; $NormB = 0.0 for ($i = 0; $i -lt $A.Length; $i++) { $Dot += $A[$i] * $B[$i] $NormA += $A[$i] * $A[$i] $NormB += $B[$i] * $B[$i] } $Denom = [Math]::Sqrt($NormA) * [Math]::Sqrt($NormB) if ($Denom -eq 0) { return 0.0 } return $Dot / $Denom } function Get-FilteredCandidates { <# .SYNOPSIS Returns the top-K candidate nodes for edge discovery, ranked by embedding cosine similarity to the source node, with a cross-POV diversity floor. .PARAMETER SourceId ID of the source node being processed. .PARAMETER Embeddings Hashtable of node ID → [double[]] embedding vectors. .PARAMETER AllNodes All taxonomy nodes (PSObject[]). .PARAMETER NodePovMap Hashtable of node ID → POV string. .PARAMETER TopK Maximum number of candidates to return. Default: 40. .PARAMETER MinPerOtherPov Minimum candidates from each non-source POV, regardless of similarity rank. Ensures cross-POV coverage. Default: 4. .NOTES If the source node has no embedding, returns all non-source nodes (fallback). Nodes without embeddings are included last (similarity = -1.0) to fill gaps. #> [CmdletBinding()] param( [Parameter(Mandatory)][string]$SourceId, [Parameter(Mandatory)][hashtable]$Embeddings, [Parameter(Mandatory)][PSObject[]]$AllNodes, [Parameter(Mandatory)][hashtable]$NodePovMap, [int]$TopK = 40, [int]$MinPerOtherPov = 4 ) Set-StrictMode -Version Latest # No embeddings available or source has none — return all non-source nodes unchanged if ($Embeddings.Count -eq 0 -or -not $Embeddings.ContainsKey($SourceId)) { return @($AllNodes | Where-Object { $_.id -ne $SourceId }) } $SrcVec = $Embeddings[$SourceId] $SrcPov = if ($NodePovMap.ContainsKey($SourceId)) { $NodePovMap[$SourceId] } else { '' } # Score every candidate $Scored = [System.Collections.Generic.List[PSObject]]::new() foreach ($Node in $AllNodes) { if ($Node.id -eq $SourceId) { continue } $NodePov = if ($NodePovMap.ContainsKey($Node.id)) { $NodePovMap[$Node.id] } else { '' } $Sim = if ($Embeddings.ContainsKey($Node.id)) { Get-CosineSimilarity -A $SrcVec -B $Embeddings[$Node.id] } else { -1.0 # no embedding — lowest priority but not excluded } [void]$Scored.Add([PSCustomObject]@{ Node = $Node; Sim = $Sim; Pov = $NodePov }) } # Sort descending by similarity $Sorted = @($Scored | Sort-Object -Property Sim -Descending) # Greedy top-K selection $Selected = [System.Collections.Generic.List[PSObject]]::new() $SelectedIds = [System.Collections.Generic.HashSet[string]]::new() $PovCounts = @{} foreach ($Entry in $Sorted) { if ($Selected.Count -ge $TopK) { break } [void]$Selected.Add($Entry.Node) [void]$SelectedIds.Add($Entry.Node.id) $PovCounts[$Entry.Pov] = ($PovCounts[$Entry.Pov] ?? 0) + 1 } # Cross-POV diversity floor: guarantee MinPerOtherPov from every non-source POV $OtherPovs = @($NodePovMap.Values | Where-Object { $_ -ne $SrcPov } | Select-Object -Unique) foreach ($Pov in $OtherPovs) { $Have = if ($PovCounts.ContainsKey($Pov)) { $PovCounts[$Pov] } else { 0 } if ($Have -ge $MinPerOtherPov) { continue } $Need = $MinPerOtherPov - $Have $Extra = @($Sorted | Where-Object { $_.Pov -eq $Pov -and -not $SelectedIds.Contains($_.Node.id) } | Select-Object -First $Need) foreach ($Entry in $Extra) { [void]$Selected.Add($Entry.Node) [void]$SelectedIds.Add($Entry.Node.id) } } return $Selected.ToArray() } |