Public/Repair-PovLineage.ps1
|
# Copyright (c) 2026 Jeffrey Snover. All rights reserved. # Licensed under the MIT License. See LICENSE file in the project root. function Repair-PovLineage { <# .SYNOPSIS Enriches bare-string intellectual_lineage entries with descriptions, validated URLs, and categories. .DESCRIPTION Scans all taxonomy nodes' graph_attributes.intellectual_lineage arrays. Bare string entries (e.g., "Effective Altruism") are enriched with: - description: 1-2 sentence definition - url: Wikipedia or authoritative URL (validated via HEAD request) - category: philosophical_movement, economic_theory, etc. Processes unique values in batch (not per-node) to minimize AI calls. Caches results in a lineage-enrichments.json file for incremental re-runs. .PARAMETER POV Filter to a specific POV file. .PARAMETER Model AI model for enrichment. Default: gemini-3.1-flash-lite-preview. .PARAMETER ApiKey AI API key. Resolved from env if omitted. .PARAMETER BatchSize Number of lineage values per AI call. Default: 25. .PARAMETER SkipUrlValidation Skip HTTP HEAD URL validation (faster for testing). .EXAMPLE Repair-PovLineage -WhatIf .EXAMPLE Repair-PovLineage -POV accelerationist -BatchSize 10 .EXAMPLE Repair-PovLineage -SkipUrlValidation #> [CmdletBinding(SupportsShouldProcess)] param( [ValidateSet('accelerationist', 'safetyist', 'skeptic', 'situations')] [string]$POV, [ValidateScript({ Test-AIModelId $_ })] [ArgumentCompleter({ param($cmd, $param, $word) $script:ValidModelIds | Where-Object { $_ -like "$word*" } })] [string]$Model = 'gemini-3.1-flash-lite-preview', [string]$ApiKey, [ValidateRange(5, 50)] [int]$BatchSize = 25, [switch]$SkipUrlValidation, [switch]$FixUrls ) Set-StrictMode -Version Latest $ErrorActionPreference = 'Stop' $TaxDir = Get-TaxonomyDir $CacheDir = Join-Path (Get-DataRoot) 'calibration' if (-not (Test-Path $CacheDir)) { $null = New-Item -ItemType Directory -Path $CacheDir -Force } $CachePath = Join-Path $CacheDir 'lineage-enrichments.json' # ── Load cache ──────────────────────────────────────────────────────────── $Cache = @{} if (Test-Path $CachePath) { $CacheData = Get-Content $CachePath -Raw | ConvertFrom-Json -AsHashtable if ($CacheData) { $Cache = $CacheData } Write-Verbose "Loaded $($Cache.Count) cached enrichments" } # ── FixUrls mode: repair broken URLs from error file ────────────────────── if ($FixUrls) { $ErrorFilePath = Join-Path $CacheDir 'lineage-url-errors.json' if (-not (Test-Path $ErrorFilePath)) { Write-Warning "No URL error file found at $ErrorFilePath — run without -FixUrls first to generate it" return } $UrlErrors = Get-Content $ErrorFilePath -Raw | ConvertFrom-Json $Err404 = @($UrlErrors | Where-Object { $_.status -eq 404 }) $Err429 = @($UrlErrors | Where-Object { $_.status -eq 429 }) Write-Host "=== Fix Broken URLs ===" -ForegroundColor Cyan Write-Host " 404 (need new URL): $($Err404.Count)" Write-Host " 429 (retry): $($Err429.Count)" # Retry 429s with longer delay if ($Err429.Count -gt 0 -and -not $WhatIfPreference) { Write-Host "`nRetrying $($Err429.Count) rate-limited URLs..." -ForegroundColor Gray $Still429 = [System.Collections.Generic.List[object]]::new() foreach ($Err in $Err429) { Start-Sleep -Seconds 3 try { $Resp = Invoke-WebRequest -Uri $Err.url -Method Head -TimeoutSec 10 -ErrorAction Stop -UseBasicParsing if ($Resp.StatusCode -eq 200) { Write-Host " OK: $($Err.name)" -ForegroundColor Green } else { $Still429.Add($Err) } } catch { $Still429.Add($Err) } } if ($Still429.Count -gt 0) { Write-Host " Still failing: $($Still429.Count)" -ForegroundColor Yellow } } # Fix 404s via AI if ($Err404.Count -gt 0) { if ($WhatIfPreference) { Write-Host "`nWhatIf: Would fix $($Err404.Count) broken URLs in $([Math]::Ceiling($Err404.Count / $BatchSize)) AI batches" Write-Host "Sample broken URLs:" $Err404 | Select-Object -First 10 | ForEach-Object { Write-Host " $($_.name): $($_.url)" -ForegroundColor DarkGray } return } if ($Model -match '^gemini') { $Backend = 'gemini' } elseif ($Model -match '^claude') { $Backend = 'claude' } else { $Backend = 'gemini' } $ResolvedKey = Resolve-AIApiKey -ExplicitKey $ApiKey -Backend $Backend if ([string]::IsNullOrWhiteSpace($ResolvedKey)) { Write-Warning "No API key — cannot fix URLs" return } $FixedCount = 0 $UrlBatches = [Math]::Ceiling($Err404.Count / $BatchSize) for ($bi = 0; $bi -lt $Err404.Count; $bi += $BatchSize) { $Batch = @($Err404[$bi..[Math]::Min($bi + $BatchSize - 1, $Err404.Count - 1)]) $BatchNum = [Math]::Floor($bi / $BatchSize) + 1 Write-Host " URL batch $BatchNum/$UrlBatches ($($Batch.Count) URLs)..." -ForegroundColor Gray -NoNewline $UrlList = ($Batch | ForEach-Object { "- $($_.name) (broken: $($_.url))" }) -join "`n" $Prompt = @" For each intellectual lineage entry, find the correct URL. Prefer Wikipedia. If no Wikipedia article exists, suggest Stanford Encyclopedia of Philosophy, official project page, or seminal paper DOI. Broken URLs to fix: $UrlList Return JSON array: [{"name": "...", "url": "https://..."}] No markdown, no explanation. "@ try { $Result = Invoke-AIApi -Prompt $Prompt -Model $Model -ApiKey $ResolvedKey ` -Temperature 0.1 -MaxTokens 4096 -JsonMode -TimeoutSec 30 if ($Result -and $Result.Text) { $Fixed = ($Result.Text -replace '^\s*```json\s*', '' -replace '\s*```\s*$', '') | ConvertFrom-Json foreach ($F in @($Fixed)) { if ($F.name -and $F.url -and $Cache.ContainsKey($F.name)) { $Cache[$F.name].url = $F.url if ($Cache[$F.name].ContainsKey('url_status')) { $Cache[$F.name].Remove('url_status') } $FixedCount++ } } Write-Host " $(@($Fixed).Count) fixed" -ForegroundColor Green } } catch { Write-Host " failed: $($_.Exception.Message)" -ForegroundColor Red } if ($BatchNum -lt $UrlBatches) { Start-Sleep -Seconds 2 } } # Save updated cache $Cache | ConvertTo-Json -Depth 5 | Set-Content -Path $CachePath -Encoding UTF8 Write-Host "`nFixed $FixedCount URLs in cache" -ForegroundColor Green # Update taxonomy files with fixed URLs $TaxUpdated = 0 foreach ($PovName in @('accelerationist', 'safetyist', 'skeptic', 'situations')) { $FilePath = Join-Path $TaxDir "$PovName.json" if (-not (Test-Path $FilePath)) { continue } $Data = Get-Content $FilePath -Raw | ConvertFrom-Json $PovMod = $false foreach ($Node in $Data.nodes) { if (-not $Node.PSObject.Properties['graph_attributes'] -or -not $Node.graph_attributes) { continue } $GA = $Node.graph_attributes if (-not $GA.PSObject.Properties['intellectual_lineage']) { continue } $Lin = @($GA.intellectual_lineage) $Changed = $false $NewLin = @(foreach ($Entry in $Lin) { if ($Entry -is [PSCustomObject] -and $Entry.PSObject.Properties['name'] -and $Cache.ContainsKey($Entry.name)) { $Cached = $Cache[$Entry.name] if ($Cached.url -and $Entry.url -ne $Cached.url) { $Entry.url = $Cached.url $Changed = $true } } $Entry }) if ($Changed) { $PovMod = $true; $TaxUpdated++ } } if ($PovMod) { $Data | ConvertTo-Json -Depth 20 | Set-Content -Path $FilePath -Encoding UTF8 } } Write-Host "Updated $TaxUpdated node lineage entries in taxonomy files" -ForegroundColor Green } return } # ── Collect unique bare-string lineage values ───────────────────────────── $PovFiles = @('accelerationist', 'safetyist', 'skeptic', 'situations') if ($POV) { $PovFiles = @($POV) } $UniqueValues = [System.Collections.Generic.HashSet[string]]::new([System.StringComparer]::OrdinalIgnoreCase) $TaxData = @{} foreach ($PovName in $PovFiles) { $FilePath = Join-Path $TaxDir "$PovName.json" if (-not (Test-Path $FilePath)) { continue } $Data = Get-Content $FilePath -Raw | ConvertFrom-Json $TaxData[$PovName] = $Data foreach ($Node in $Data.nodes) { if (-not $Node.PSObject.Properties['graph_attributes'] -or -not $Node.graph_attributes) { continue } $GA = $Node.graph_attributes if (-not $GA.PSObject.Properties['intellectual_lineage']) { continue } foreach ($Entry in @($GA.intellectual_lineage)) { if ($Entry -is [string] -and -not [string]::IsNullOrWhiteSpace($Entry)) { [void]$UniqueValues.Add($Entry) } } } } Write-Host "Unique lineage values: $($UniqueValues.Count)" -ForegroundColor Cyan # ── Phase 0: Dedup via embedding similarity ─────────────────────────────── # Cluster near-duplicates (cosine ≥ 0.85), pick canonical representative, # replace all references to non-canonical members. $DedupThreshold = 0.85 $UniqueList = @($UniqueValues) $DedupMap = @{} # non-canonical → canonical $ClustersMerged = 0 # Count frequency of each value across all nodes $FreqMap = @{} foreach ($PovName in $TaxData.Keys) { foreach ($Node in $TaxData[$PovName].nodes) { if (-not $Node.PSObject.Properties['graph_attributes'] -or -not $Node.graph_attributes) { continue } $GA = $Node.graph_attributes if (-not $GA.PSObject.Properties['intellectual_lineage']) { continue } foreach ($Entry in @($GA.intellectual_lineage)) { if ($Entry -is [string]) { $FreqMap[$Entry] = ($FreqMap[$Entry] ?? 0) + 1 } } } } Write-Host "Computing embeddings for dedup..." -ForegroundColor Gray $Embeddings = Get-TextEmbedding -Texts $UniqueList -Ids $UniqueList if ($null -ne $Embeddings -and $Embeddings.Count -gt 0) { Write-Host "Clustering at cosine >= $DedupThreshold..." -ForegroundColor Gray # Simple greedy clustering: for each value, check if it's similar to an existing canonical $Canonicals = [System.Collections.Generic.List[string]]::new() $CanonicalVecs = [System.Collections.Generic.List[double[]]]::new() foreach ($Val in $UniqueList) { if (-not $Embeddings.ContainsKey($Val)) { $Canonicals.Add($Val) continue } $Vec = $Embeddings[$Val] $Merged = $false for ($j = 0; $j -lt $Canonicals.Count; $j++) { $CanVec = $CanonicalVecs[$j] # Cosine similarity (vectors are normalized) $Dot = 0.0 for ($k = 0; $k -lt $Vec.Count; $k++) { $Dot += $Vec[$k] * $CanVec[$k] } if ($Dot -ge $DedupThreshold) { # Guard: don't merge parenthetical-qualified variants — qualifiers # are semantically meaningful (e.g., "AI alignment research (positive vision)" # vs "AI alignment research" represent different intellectual traditions) $CanName = $Canonicals[$j] # Case 1: one is qualified, other is the bare base $IsParenVariant = ($Val -match '^(.+?)\s*\(' -and $CanName -eq $Matches[1].Trim()) -or ($CanName -match '^(.+?)\s*\(' -and $Val -eq $Matches[1].Trim()) # Case 2: both are qualified variants of the same base (different qualifiers) if (-not $IsParenVariant -and $Val -match '^(.+?)\s*\(' -and $CanName -match '^(.+?)\s*\(') { $ValBase = ($Val -replace '\s*\([^)]+\)\s*$','').Trim() $CanBase = ($CanName -replace '\s*\([^)]+\)\s*$','').Trim() if ($ValBase -eq $CanBase) { $IsParenVariant = $true } } if ($IsParenVariant) { continue } # Merge: pick the one with higher frequency as canonical $CanFreq = $FreqMap[$CanName] ?? 0 $ValFreq = $FreqMap[$Val] ?? 0 if ($ValFreq -gt $CanFreq) { # New value is more popular — swap canonical $DedupMap[$CanName] = $Val $Canonicals[$j] = $Val $CanonicalVecs[$j] = $Vec } else { $DedupMap[$Val] = $CanName } $ClustersMerged++ $Merged = $true break } } if (-not $Merged) { $Canonicals.Add($Val) $CanonicalVecs.Add($Vec) } } Write-Host "Dedup: $($UniqueList.Count) → $($Canonicals.Count) canonical values ($ClustersMerged merged)" -ForegroundColor Green if ($ClustersMerged -gt 0) { # Show sample merges $SampleMerges = @($DedupMap.GetEnumerator() | Select-Object -First 10) Write-Host " Sample merges:" -ForegroundColor Gray foreach ($M in $SampleMerges) { Write-Host " '$($M.Key)' → '$($M.Value)'" -ForegroundColor DarkGray } if ($DedupMap.Count -gt 10) { Write-Host " ... and $($DedupMap.Count - 10) more" -ForegroundColor DarkGray } # Apply dedup to taxonomy files (replace non-canonical references) if (-not $WhatIfPreference) { foreach ($PovName in $TaxData.Keys) { $Data = $TaxData[$PovName] $PovModified = $false foreach ($Node in $Data.nodes) { if (-not $Node.PSObject.Properties['graph_attributes'] -or -not $Node.graph_attributes) { continue } $GA = $Node.graph_attributes if (-not $GA.PSObject.Properties['intellectual_lineage']) { continue } $Lin = @($GA.intellectual_lineage) $Changed = $false $NewLin = @(foreach ($Entry in $Lin) { if ($Entry -is [string] -and $DedupMap.ContainsKey($Entry)) { $Changed = $true $DedupMap[$Entry] } else { $Entry } }) if ($Changed) { $GA.intellectual_lineage = $NewLin $PovModified = $true } } if ($PovModified) { $FilePath = Join-Path $TaxDir "$PovName.json" $Data | ConvertTo-Json -Depth 20 | Set-Content -Path $FilePath -Encoding UTF8 } } Write-Host " Dedup references updated in taxonomy files" -ForegroundColor Green } # Update UniqueValues to canonicals only $UniqueValues = [System.Collections.Generic.HashSet[string]]::new( [string[]]@($Canonicals), [System.StringComparer]::OrdinalIgnoreCase) } } else { Write-Host " Embedding unavailable — skipping dedup" -ForegroundColor Yellow } $NeedEnrichment = @($UniqueValues | Where-Object { -not $Cache.ContainsKey($_) }) $AlreadyCached = $UniqueValues.Count - $NeedEnrichment.Count Write-Host "Post-dedup unique: $($UniqueValues.Count)" Write-Host "Already cached: $AlreadyCached" Write-Host "Need enrichment: $($NeedEnrichment.Count)" if ($WhatIfPreference) { $Batches = [Math]::Ceiling($NeedEnrichment.Count / $BatchSize) Write-Host "`n── Plan ────────────────────────────────────────" -ForegroundColor Yellow if ($ClustersMerged -gt 0) { Write-Host " Dedup: $ClustersMerged near-duplicates merged (cosine >= $DedupThreshold)" } Write-Host " Enrich: $($NeedEnrichment.Count) values in $Batches AI batches ($BatchSize/batch)" Write-Host " Validate: $($UniqueValues.Count) URLs via HTTP HEAD" Write-Host " Update: $($PovFiles.Count) taxonomy files" Write-Host " Cache: $CachePath" Write-Host " Model: $Model | Temperature: 0.2" Write-Host " Est. cost: ~`$$([Math]::Round($Batches * 0.02, 2)) (Gemini free tier)" # Per-POV breakdown Write-Host "`n── Per-POV Breakdown ───────────────────────────" -ForegroundColor Yellow foreach ($PovName in $PovFiles) { $FilePath = Join-Path $TaxDir "$PovName.json" if (-not (Test-Path $FilePath)) { continue } $Data = (Get-Content $FilePath -Raw | ConvertFrom-Json).nodes $NodesWithLin = 0 $EntryCount = 0 foreach ($N in $Data) { if (-not $N.PSObject.Properties['graph_attributes'] -or -not $N.graph_attributes) { continue } $GA = $N.graph_attributes if (-not $GA.PSObject.Properties['intellectual_lineage']) { continue } $Lin = @($GA.intellectual_lineage) $Bare = @($Lin | Where-Object { $_ -is [string] }) if ($Bare.Count -gt 0) { $NodesWithLin++; $EntryCount += $Bare.Count } } Write-Host " $PovName`: $NodesWithLin nodes, $EntryCount bare entries" } # Sample values Write-Host "`n── Sample Values (first 15) ────────────────────" -ForegroundColor Yellow $NeedEnrichment | Select-Object -First 15 | ForEach-Object { Write-Host " $_" -ForegroundColor Gray } if ($NeedEnrichment.Count -gt 15) { Write-Host " ... and $($NeedEnrichment.Count - 15) more" -ForegroundColor DarkGray } # Target format example Write-Host "`n── Target Format ───────────────────────────────" -ForegroundColor Yellow Write-Host ' "Effective Altruism (long-termism)" →' -ForegroundColor DarkGray Write-Host ' {' -ForegroundColor Gray Write-Host ' "name": "Effective Altruism (long-termism)",' -ForegroundColor Gray Write-Host ' "description": "A philosophical movement applying evidence-based...",' -ForegroundColor Gray Write-Host ' "url": "https://en.wikipedia.org/wiki/Effective_altruism",' -ForegroundColor Gray Write-Host ' "category": "philosophical_movement"' -ForegroundColor Gray Write-Host ' }' -ForegroundColor Gray return } # ── Resolve API key ─────────────────────────────────────────────────────── if ($NeedEnrichment.Count -gt 0) { if ($Model -match '^gemini') { $Backend = 'gemini' } elseif ($Model -match '^claude') { $Backend = 'claude' } elseif ($Model -match '^openai') { $Backend = 'openai' } else { $Backend = 'gemini' } $ResolvedKey = Resolve-AIApiKey -ExplicitKey $ApiKey -Backend $Backend if ([string]::IsNullOrWhiteSpace($ResolvedKey)) { Write-Warning "No API key — can only apply cached enrichments" $NeedEnrichment = @() } } # ── Batch AI enrichment ─────────────────────────────────────────────────── $BatchNum = 0 $TotalBatches = [Math]::Ceiling($NeedEnrichment.Count / $BatchSize) for ($i = 0; $i -lt $NeedEnrichment.Count; $i += $BatchSize) { $BatchNum++ $Batch = @($NeedEnrichment[$i..[Math]::Min($i + $BatchSize - 1, $NeedEnrichment.Count - 1)]) Write-Host " Batch $BatchNum/$TotalBatches ($($Batch.Count) values)..." -ForegroundColor Gray -NoNewline $BatchList = ($Batch | ForEach-Object { "- $_" }) -join "`n" $Prompt = @" Enrich each intellectual lineage entry with a description, URL, and category. For each entry, provide: - name: the original name (verbatim) - description: 1-2 sentence definition accessible to a policy audience - url: Wikipedia or authoritative URL (prefer Wikipedia when available) - category: one of: philosophical_movement, economic_theory, political_philosophy, social_theory, scientific_paradigm, legal_framework, technology_movement, ethical_framework, academic_discipline, cultural_movement, other Entries to enrich: $BatchList Return a JSON array of objects. No markdown fences, no explanation. Example: [{"name":"Effective Altruism","description":"A philosophical movement...","url":"https://en.wikipedia.org/wiki/Effective_altruism","category":"philosophical_movement"}] "@ try { $Result = Invoke-AIApi -Prompt $Prompt -Model $Model -ApiKey $ResolvedKey ` -Temperature 0.2 -MaxTokens 8192 -JsonMode -TimeoutSec 60 if ($Result -and $Result.Text) { $CleanText = $Result.Text -replace '^\s*```json\s*', '' -replace '\s*```\s*$', '' $Enriched = $CleanText | ConvertFrom-Json foreach ($E in @($Enriched)) { if (-not $E.name) { continue } # Dedup guard: if enriched name is a near-duplicate of an existing # cache key (same category), reuse the existing key instead of creating # a new entry (prevents duplicate lineage entries — t/330) $ExistingMatch = $null foreach ($CKey in @($Cache.Keys)) { if ($Cache[$CKey].category -ne $E.category) { continue } # Quick string similarity check (Jaccard on words) $W1 = @($E.name.ToLower() -split '\W+' | Where-Object { $_.Length -gt 2 }) $W2 = @($CKey.ToLower() -split '\W+' | Where-Object { $_.Length -gt 2 }) if ($W1.Count -eq 0 -or $W2.Count -eq 0) { continue } $Inter = @($W1 | Where-Object { $_ -in $W2 }).Count $Union = ($W1 + $W2 | Select-Object -Unique).Count if ($Union -gt 0 -and ($Inter / $Union) -gt 0.75) { $ExistingMatch = $CKey break } } if ($ExistingMatch) { # Map enriched name to existing canonical if ($E.name -ne $ExistingMatch) { $DedupMap[$E.name] = $ExistingMatch Write-Verbose " Dedup guard: '$($E.name)' → existing '$ExistingMatch'" } } else { $Cache[$E.name] = @{ description = $E.description url = $E.url category = $E.category } } } Write-Host " $(@($Enriched).Count) enriched" -ForegroundColor Green } else { Write-Host " no response" -ForegroundColor Red } } catch { Write-Host " failed: $($_.Exception.Message)" -ForegroundColor Red } # Brief pause between batches to avoid rate limits if ($BatchNum -lt $TotalBatches) { Start-Sleep -Seconds 2 } } # ── Save cache ──────────────────────────────────────────────────────────── if ($Cache.Count -gt 0) { $Cache | ConvertTo-Json -Depth 5 | Set-Content -Path $CachePath -Encoding UTF8 Write-Host "Cache saved: $($Cache.Count) entries → $CachePath" -ForegroundColor Green } # ── URL validation ──────────────────────────────────────────────────────── $UrlValid = 0; $UrlInvalid = 0; $UrlSkipped = 0 if (-not $SkipUrlValidation) { Write-Host "`nValidating URLs..." -ForegroundColor Cyan foreach ($Key in @($Cache.Keys)) { $Entry = $Cache[$Key] if (-not $Entry.url -or $Entry.url -notmatch '^https?://') { $UrlSkipped++ continue } try { $Resp = Invoke-WebRequest -Uri $Entry.url -Method Head -TimeoutSec 5 -ErrorAction Stop -UseBasicParsing if ($Resp.StatusCode -eq 200) { $UrlValid++ } else { $UrlInvalid++; $Entry['url_status'] = $Resp.StatusCode } } catch { $UrlInvalid++ $Entry['url_status'] = 'error' } } Write-Host " Valid: $UrlValid | Invalid: $UrlInvalid | Skipped: $UrlSkipped" # Re-save cache with url_status $Cache | ConvertTo-Json -Depth 5 | Set-Content -Path $CachePath -Encoding UTF8 } # ── Apply enrichments to taxonomy files ─────────────────────────────────── Write-Host "`nApplying enrichments to taxonomy files..." -ForegroundColor Cyan $TotalUpdated = 0 foreach ($PovName in $TaxData.Keys) { $Data = $TaxData[$PovName] $Modified = $false foreach ($Node in $Data.nodes) { if (-not $Node.PSObject.Properties['graph_attributes'] -or -not $Node.graph_attributes) { continue } $GA = $Node.graph_attributes if (-not $GA.PSObject.Properties['intellectual_lineage']) { continue } $Lin = @($GA.intellectual_lineage) $NeedUpdate = $false foreach ($Entry in $Lin) { if ($Entry -is [string] -and $Cache.ContainsKey($Entry)) { $NeedUpdate = $true; break } } if (-not $NeedUpdate) { continue } # Replace bare strings with rich objects $NewLin = @(foreach ($Entry in $Lin) { if ($Entry -is [string] -and $Cache.ContainsKey($Entry)) { $Cached = $Cache[$Entry] [ordered]@{ name = $Entry description = $Cached.description url = $Cached.url category = $Cached.category } } elseif ($Entry -is [string]) { # No cache hit — keep as bare string $Entry } else { # Already a rich object $Entry } }) if ($PSCmdlet.ShouldProcess("$($Node.id) ($($NewLin.Count) lineage entries)", 'Enrich lineage')) { $GA.intellectual_lineage = $NewLin $Modified = $true $TotalUpdated++ } } if ($Modified) { $FilePath = Join-Path $TaxDir "$PovName.json" $Data | ConvertTo-Json -Depth 20 | Set-Content -Path $FilePath -Encoding UTF8 Write-Host " Saved $PovName.json" -ForegroundColor Green } } Write-Host "`n=== SUMMARY ===" -ForegroundColor Cyan Write-Host " Unique values: $($UniqueValues.Count)" Write-Host " Enriched (new): $($NeedEnrichment.Count)" Write-Host " From cache: $AlreadyCached" Write-Host " Nodes updated: $TotalUpdated" if (-not $SkipUrlValidation) { Write-Host " URLs valid: $UrlValid | invalid: $UrlInvalid" } } |