Public/Get-Tax.ps1
|
# Copyright (c) 2026 Jeffrey Snover. All rights reserved. # Licensed under the MIT License. See LICENSE file in the project root. function Get-Tax { <# .SYNOPSIS Returns taxonomy nodes filtered by POV, ID, label, description, or semantic similarity. .DESCRIPTION Queries the in-memory taxonomy loaded at module import time. Text filtering (default): -POV narrows the file scope, then any node whose ID matches ANY -Id pattern, OR whose label matches ANY -Label pattern, OR whose description matches ANY -Description pattern is returned. Semantic search (-Similar): Calls the Python embedding script to rank all nodes by cosine similarity to the query text. Returns results sorted by score. Requires embeddings.json (run Update-TaxEmbeddings first). .PARAMETER POV Name of the POV file without the .json extension (case-insensitive). Supports wildcards. Default: "*" (all POVs). .PARAMETER Id One or more wildcard patterns matched against node IDs. .PARAMETER Label One or more wildcard patterns matched against node labels. .PARAMETER Description One or more wildcard patterns matched against node descriptions. .PARAMETER Similar A text query for semantic similarity search. Mutually exclusive with -Id, -Label, and -Description. .PARAMETER Top Maximum number of results to return (only with -Similar or -Overlaps). Default: 20. .PARAMETER Overlaps Find node pairs with high embedding similarity (potential merge/consolidation candidates). Returns pairs sorted by similarity score descending. .PARAMETER Threshold Minimum cosine similarity to report (only with -Overlaps). Default: 0.80. .PARAMETER CrossPOV Only report pairs where nodes are from different POVs (only with -Overlaps). .EXAMPLE Get-Tax # Returns all nodes from every loaded POV. .EXAMPLE Get-Tax -POV skeptic # Returns only skeptic nodes. .EXAMPLE Get-Tax -Label "*bias*","*displacement*" # Returns nodes whose label matches either pattern. .EXAMPLE Get-Tax -Similar "alignment safety" # Ranked semantic search across all POVs. .EXAMPLE Get-Tax -POV safetyist -Similar "labor displacement" # Semantic search scoped to safetyist POV. .EXAMPLE Get-Tax -Similar "governance" -Top 5 # Top 5 semantically similar nodes. .EXAMPLE Get-Tax -Overlaps # All node pairs with cosine similarity > 0.80. .EXAMPLE Get-Tax -Overlaps -Threshold 0.90 -Top 10 # Top 10 most similar pairs above 0.90. .EXAMPLE Get-Tax -Overlaps -CrossPOV # Cross-POV overlaps only (most interesting for consolidation). .EXAMPLE 'acc-goals-001','saf-goals-001' | Get-Tax # Pipeline by value — accepts bare ID strings. .EXAMPLE Get-Tax -Id 'acc-goals-*' | Get-Tax # Pipeline by property name — objects with an Id property. #> [CmdletBinding(DefaultParameterSetName = 'Text')] param( [Parameter(Position = 0)] [string]$POV = '*', [Parameter(ParameterSetName = 'Text', ValueFromPipeline, ValueFromPipelineByPropertyName)] [string[]]$Id, [Parameter(ParameterSetName = 'Text')] [string[]]$Label, [Parameter(ParameterSetName = 'Text')] [string[]]$Description, [Parameter(Mandatory, ParameterSetName = 'Similar')] [string]$Similar, [Parameter(ParameterSetName = 'Similar')] [Parameter(ParameterSetName = 'Overlaps')] [ValidateRange(1, 1000)] [int]$Top = 20, [Parameter(Mandatory, ParameterSetName = 'Overlaps')] [switch]$Overlaps, [Parameter(ParameterSetName = 'Overlaps')] [ValidateRange(0.0, 1.0)] [double]$Threshold = 0.80, [Parameter(ParameterSetName = 'Overlaps')] [switch]$CrossPOV ) begin { Set-StrictMode -Version Latest $CollectedIds = [System.Collections.Generic.List[string]]::new() } process { # Accumulate pipeline-bound -Id values if ($Id) { foreach ($i in $Id) { if (-not [string]::IsNullOrWhiteSpace($i)) { $CollectedIds.Add($i) } } } } end { # Merge collected pipeline IDs with any directly specified if ($CollectedIds.Count -gt 0) { $Id = @($CollectedIds | Select-Object -Unique) } # -- Overlaps (pairwise similarity) code path ------------------------------ if ($PSCmdlet.ParameterSetName -eq 'Overlaps') { $EmbedScript = Join-Path $script:ModuleRoot '..' 'embed_taxonomy.py' if (-not (Test-Path $EmbedScript)) { Write-Error "embed_taxonomy.py not found at $EmbedScript" return } $EmbeddingsFile = Get-TaxonomyDir 'embeddings.json' if (-not (Test-Path $EmbeddingsFile)) { Write-Error "embeddings.json not found. Run Update-TaxEmbeddings first." return } $PythonCmd = if (Get-Command python -ErrorAction SilentlyContinue) { 'python' } else { 'python3' } $PyArgs = @('find-overlaps', '--threshold', $Threshold) if ($POV -ne '*') { $PyArgs += @('--pov', $POV) } if ($CrossPOV) { $PyArgs += '--cross-pov' } if ($Top -and $Top -gt 0) { $PyArgs += @('--top', $Top) } $PyResult = & $PythonCmd $EmbedScript @PyArgs 2>$null if ($LASTEXITCODE -ne 0) { Write-Error "embed_taxonomy.py find-overlaps failed (exit code $LASTEXITCODE)." return } $Results = $PyResult | ConvertFrom-Json if (-not $Results -or $Results.Count -eq 0) { Write-Host "No overlapping node pairs found above threshold $Threshold." -ForegroundColor Yellow return } # Build lookup for full node data $NodeLookup = @{} foreach ($Key in $script:TaxonomyData.Keys) { $Entry = $script:TaxonomyData[$Key] foreach ($Node in $Entry.nodes) { $NodeLookup[$Node.id] = @{ POV = $Key; Node = $Node } } } foreach ($Pair in $Results) { $InfoA = $NodeLookup[$Pair.node_a] $InfoB = $NodeLookup[$Pair.node_b] $LabelA = if ($InfoA) { $InfoA.Node.label } else { $Pair.node_a } $LabelB = if ($InfoB) { $InfoB.Node.label } else { $Pair.node_b } [PSCustomObject]@{ PSTypeName = 'TaxonomyNode.Overlap' Similarity = [math]::Round($Pair.similarity, 4) NodeA = $Pair.node_a PovA = $Pair.pov_a LabelA = $LabelA NodeB = $Pair.node_b PovB = $Pair.pov_b LabelB = $LabelB } } return } # -- Similar (semantic search) code path ---------------------------------- if ($PSCmdlet.ParameterSetName -eq 'Similar') { $EmbedScript = Join-Path $script:ModuleRoot '..' 'embed_taxonomy.py' if (-not (Test-Path $EmbedScript)) { Write-Error "embed_taxonomy.py not found at $EmbedScript" return } $EmbeddingsFile = Get-TaxonomyDir 'embeddings.json' if (-not (Test-Path $EmbeddingsFile)) { Write-Error "embeddings.json not found. Run Update-TaxEmbeddings first." return } # Build Python arguments $PyArgs = @('query', $Similar, '--top', $Top) if ($POV -ne '*') { $PyArgs += @('--pov', $POV) } $PythonCmd2 = if (Get-Command python -ErrorAction SilentlyContinue) { 'python' } else { 'python3' } $PyResult = & $PythonCmd2 $EmbedScript @PyArgs 2>$null if ($LASTEXITCODE -ne 0) { Write-Error "embed_taxonomy.py query failed (exit code $LASTEXITCODE). Is sentence-transformers installed?" return } $Results = $PyResult | ConvertFrom-Json if (-not $Results -or $Results.Count -eq 0) { Write-Warning "No similar nodes found." return } # Build a lookup from in-memory taxonomy for full node data $NodeLookup = @{} foreach ($Key in $script:TaxonomyData.Keys) { $Entry = $script:TaxonomyData[$Key] foreach ($Node in $Entry.nodes) { $NodeLookup[$Node.id] = @{ POV = $Key; Node = $Node } } } foreach ($Hit in $Results) { $Info = $NodeLookup[$Hit.id] if (-not $Info) { continue } $Obj = ConvertTo-TaxonomyNode -PovKey $Info.POV -Node $Info.Node -Score $Hit.score $Obj.PSObject.TypeNames.Insert(0, 'TaxonomyNode.Similar') $Obj } return } # -- Text filtering (default) code path ----------------------------------- $MatchingKeys = $script:TaxonomyData.Keys | Where-Object { $_ -like $POV.ToLower() } if (-not $MatchingKeys) { $Available = ($script:TaxonomyData.Keys | Sort-Object) -join ', ' Write-Warning "No POV matching '$POV'. Available: $Available" return } $HasId = ($null -ne $Id) -and ($Id.Length -gt 0) $HasLabel = ($null -ne $Label) -and ($Label.Length -gt 0) $HasDesc = ($null -ne $Description) -and ($Description.Length -gt 0) $HasTextFilter = $HasId -or $HasLabel -or $HasDesc foreach ($Key in $MatchingKeys | Sort-Object) { $Entry = $script:TaxonomyData[$Key] foreach ($Node in $Entry.nodes) { if ($HasTextFilter) { $Match = $false foreach ($Pat in $Id) { if ($Node.id -like $Pat) { $Match = $true; break } } if (-not $Match) { foreach ($Pat in $Label) { if ($Node.label -like $Pat) { $Match = $true; break } } } if (-not $Match) { foreach ($Pat in $Description) { if ($Node.description -like $Pat) { $Match = $true; break } } } if (-not $Match) { continue } } ConvertTo-TaxonomyNode -PovKey $Key -Node $Node } } } # end } |