Public/Test-TaxonomyIntegrity.ps1
|
# Copyright (c) 2026 Jeffrey Snover. All rights reserved. # Licensed under the MIT License. See LICENSE file in the project root. function Test-TaxonomyIntegrity { <# .SYNOPSIS Validate taxonomy data integrity across all files. .DESCRIPTION Checks: - All policy_id references resolve to registry entries - All registry entries are referenced by at least one node - member_count and source_povs are accurate - No duplicate policy_id references within a single node - Edge source/target IDs resolve to existing nodes or policies - Embeddings exist for all nodes and policies .PARAMETER Detailed Show per-issue details instead of just counts. .PARAMETER PassThru Return a summary object. .EXAMPLE Test-TaxonomyIntegrity .EXAMPLE Test-TaxonomyIntegrity -Detailed #> [CmdletBinding()] param( [switch]$Detailed, [switch]$PassThru ) Set-StrictMode -Version Latest $TaxDir = Get-TaxonomyDir $Issues = [System.Collections.Generic.List[PSCustomObject]]::new() $Checks = 0 $Passed = 0 # ── Load all data ── $PovFiles = @('accelerationist', 'safetyist', 'skeptic', 'cross-cutting') $AllNodeIds = [System.Collections.Generic.HashSet[string]]::new() $PolicyRefs = @{} # policy_id -> list of node_ids $DuplicateRefs = @() # nodes with duplicate policy_id refs $MissingPolicyId = @() # policy_actions without policy_id $ActualPovs = @{} # policy_id -> set of povs $ActualCounts = @{} # policy_id -> count foreach ($PovKey in $PovFiles) { $FilePath = Join-Path $TaxDir "$PovKey.json" if (-not (Test-Path $FilePath)) { continue } $FileData = Get-Content -Raw -Path $FilePath | ConvertFrom-Json foreach ($Node in $FileData.nodes) { [void]$AllNodeIds.Add($Node.id) if (-not $Node.PSObject.Properties['graph_attributes'] -or $null -eq $Node.graph_attributes) { continue } if (-not $Node.graph_attributes.PSObject.Properties['policy_actions']) { continue } $SeenIds = [System.Collections.Generic.HashSet[string]]::new() foreach ($PA in $Node.graph_attributes.policy_actions) { $Pid = if ($PA.PSObject.Properties['policy_id']) { $PA.policy_id } else { $null } if (-not $Pid) { $MissingPolicyId += [PSCustomObject]@{ NodeId = $Node.id; POV = $PovKey; Action = $PA.action } continue } if (-not $SeenIds.Add($Pid)) { $DuplicateRefs += [PSCustomObject]@{ NodeId = $Node.id; PolicyId = $Pid } } if (-not $PolicyRefs.ContainsKey($Pid)) { $PolicyRefs[$Pid] = [System.Collections.Generic.List[string]]::new() $ActualPovs[$Pid] = [System.Collections.Generic.HashSet[string]]::new() $ActualCounts[$Pid] = 0 } $PolicyRefs[$Pid].Add($Node.id) [void]$ActualPovs[$Pid].Add($PovKey) $ActualCounts[$Pid]++ } } } # ── Check 1: Policy registry ── $Checks++ $RegistryPath = Join-Path $TaxDir 'policy_actions.json' if (Test-Path $RegistryPath) { $Registry = Get-Content -Raw -Path $RegistryPath | ConvertFrom-Json $RegistryIds = [System.Collections.Generic.HashSet[string]]::new() foreach ($Pol in $Registry.policies) { [void]$RegistryIds.Add($Pol.id) } # Unresolved refs $Unresolved = @($PolicyRefs.Keys | Where-Object { -not $RegistryIds.Contains($_) }) if ($Unresolved.Count -gt 0) { $Issues.Add([PSCustomObject]@{ Check = 'PolicyRef'; Severity = 'Error'; Count = $Unresolved.Count; Detail = "policy_id refs not in registry: $($Unresolved -join ', ')" }) } else { $Passed++ } # Orphaned $Checks++ $Orphaned = @($RegistryIds | Where-Object { -not $PolicyRefs.ContainsKey($_) }) if ($Orphaned.Count -gt 0) { $Issues.Add([PSCustomObject]@{ Check = 'Orphaned'; Severity = 'Warning'; Count = $Orphaned.Count; Detail = "registry entries with no node refs: $($Orphaned[0..([Math]::Min(4, $Orphaned.Count-1))] -join ', ')$(if ($Orphaned.Count -gt 5) { ' ...' })" }) } else { $Passed++ } # member_count accuracy $Checks++ $CountMismatches = 0 foreach ($Pol in $Registry.policies) { $Actual = if ($ActualCounts.ContainsKey($Pol.id)) { $ActualCounts[$Pol.id] } else { 0 } if ($Pol.member_count -ne $Actual) { $CountMismatches++ } } if ($CountMismatches -gt 0) { $Issues.Add([PSCustomObject]@{ Check = 'MemberCount'; Severity = 'Warning'; Count = $CountMismatches; Detail = "$CountMismatches policies have inaccurate member_count" }) } else { $Passed++ } } else { $Issues.Add([PSCustomObject]@{ Check = 'Registry'; Severity = 'Error'; Count = 1; Detail = 'policy_actions.json not found' }) } # ── Check 2: Missing policy_id ── $Checks++ if ($MissingPolicyId.Count -gt 0) { $Issues.Add([PSCustomObject]@{ Check = 'MissingPolicyId'; Severity = 'Warning'; Count = $MissingPolicyId.Count; Detail = "$($MissingPolicyId.Count) policy_actions without policy_id" }) } else { $Passed++ } # ── Check 3: Duplicate refs ── $Checks++ if ($DuplicateRefs.Count -gt 0) { $Issues.Add([PSCustomObject]@{ Check = 'DuplicateRef'; Severity = 'Warning'; Count = $DuplicateRefs.Count; Detail = "$($DuplicateRefs.Count) duplicate policy_id refs within nodes" }) } else { $Passed++ } # ── Check 4: Edge integrity ── $Checks++ $EdgesPath = Join-Path $TaxDir 'edges.json' $BadEdges = 0 if (Test-Path $EdgesPath) { $EdgesData = Get-Content -Raw -Path $EdgesPath | ConvertFrom-Json $ValidIds = [System.Collections.Generic.HashSet[string]]::new($AllNodeIds) if ($Registry) { foreach ($Pol in $Registry.policies) { [void]$ValidIds.Add($Pol.id) } } foreach ($Edge in $EdgesData.edges) { if (-not $ValidIds.Contains($Edge.source) -or -not $ValidIds.Contains($Edge.target)) { $BadEdges++ } } } if ($BadEdges -gt 0) { $Issues.Add([PSCustomObject]@{ Check = 'EdgeRef'; Severity = 'Error'; Count = $BadEdges; Detail = "$BadEdges edges reference non-existent nodes/policies" }) } else { $Passed++ } # ── Check 5: Embedding coverage ── $Checks++ $EmbPath = Join-Path $TaxDir 'embeddings.json' $MissingEmb = 0 if (Test-Path $EmbPath) { $EmbData = Get-Content -Raw -Path $EmbPath | ConvertFrom-Json $EmbIds = [System.Collections.Generic.HashSet[string]]::new() foreach ($Prop in $EmbData.nodes.PSObject.Properties) { [void]$EmbIds.Add($Prop.Name) } foreach ($Nid in $AllNodeIds) { if (-not $EmbIds.Contains($Nid)) { $MissingEmb++ } } if ($Registry) { foreach ($Pol in $Registry.policies) { if (-not $EmbIds.Contains($Pol.id)) { $MissingEmb++ } } } } else { $MissingEmb = $AllNodeIds.Count } if ($MissingEmb -gt 0) { $Issues.Add([PSCustomObject]@{ Check = 'Embeddings'; Severity = 'Warning'; Count = $MissingEmb; Detail = "$MissingEmb nodes/policies missing embeddings" }) } else { $Passed++ } # ── Report ── Write-Host '' Write-Host '=== Taxonomy Integrity Check ===' -ForegroundColor Cyan Write-Host " Nodes: $($AllNodeIds.Count)" -ForegroundColor White Write-Host " Policies: $(if ($Registry) { $Registry.policies.Count } else { '?' })" -ForegroundColor White Write-Host " Checks: $Checks" -ForegroundColor White Write-Host " Passed: $Passed" -ForegroundColor Green Write-Host " Issues: $($Issues.Count)" -ForegroundColor $(if ($Issues.Count -gt 0) { 'Yellow' } else { 'Green' }) if ($Issues.Count -gt 0) { Write-Host '' foreach ($Issue in $Issues) { $Color = if ($Issue.Severity -eq 'Error') { 'Red' } else { 'Yellow' } Write-Host " [$($Issue.Severity)] $($Issue.Check): $($Issue.Detail)" -ForegroundColor $Color } } else { Write-Host '' Write-Host ' All checks passed!' -ForegroundColor Green } Write-Host '' if ($PassThru) { [PSCustomObject]@{ Nodes = $AllNodeIds.Count Policies = if ($Registry) { $Registry.policies.Count } else { 0 } Checks = $Checks Passed = $Passed Issues = $Issues.Count Details = @($Issues) } } } |