Public/Get-AITSource.ps1
|
# Copyright (c) 2026 Jeffrey Snover. All rights reserved. # Licensed under the MIT License. See LICENSE file in the project root. function Get-AITSource { <# .SYNOPSIS Lists and filters source documents in the repository. .DESCRIPTION Enumerates all source folders under sources/ by reading each metadata.json file. Supports filtering by document ID (wildcard), POV tag, topic tag, summary status, and source type. Default output (no parameters) lists all sources sorted by DatePublished descending. .PARAMETER DocId Wildcard pattern matched against the source document ID. .PARAMETER Title One or more wildcard patterns matched against the source title. A source matches if its title matches any of the supplied patterns. .PARAMETER Pov Filter to sources whose pov_tags contain this value. .PARAMETER Topic Filter to sources whose topic_tags contain this value. .PARAMETER Status Filter to sources with this exact summary_status. .PARAMETER SourceType Filter to sources with this exact source_type. .EXAMPLE Get-AITSource # Lists all sources sorted by date. .EXAMPLE Get-AITSource '*china*' # Sources whose ID matches *china*. .EXAMPLE Get-AITSource -Pov safetyist # Sources tagged with the safetyist POV. .EXAMPLE Get-AITSource -Title '*alignment*' # Sources whose title matches *alignment*. .EXAMPLE Get-AITSource -Title '*safety*', '*risk*' # Sources whose title matches either pattern. .PARAMETER Today Return only sources whose date_ingested is today. .EXAMPLE Get-AITSource -Status pending # Sources whose summary is pending. .EXAMPLE Get-AITSource -Today # Sources ingested today. #> [CmdletBinding()] [OutputType('AITSource')] param( [Parameter(Position = 0)] [string]$DocId, [string[]]$Title, [string]$Pov, [string]$Topic, [string]$Status, [string]$SourceType, [switch]$Today ) Set-StrictMode -Version Latest $ErrorActionPreference = 'Stop' $SourcesDir = Get-SourcesDir if (-not (Test-Path $SourcesDir)) { Write-Warning "Sources directory not found: $SourcesDir" return } # ── Fast path: read from _index.json when fresh ────────────────────────── $IndexPath = Join-Path $SourcesDir '_index.json' $UseIndex = $false if (Test-Path $IndexPath) { $IndexMTime = (Get-Item $IndexPath).LastWriteTimeUtc $NewestMeta = Get-ChildItem -Path $SourcesDir -Filter 'metadata.json' -Recurse -Depth 1 -ErrorAction SilentlyContinue | Sort-Object LastWriteTimeUtc -Descending | Select-Object -First 1 if ($null -eq $NewestMeta -or $IndexMTime -ge $NewestMeta.LastWriteTimeUtc) { $UseIndex = $true } else { Write-Verbose "Index is stale — falling back to folder scan" } } $Results = [System.Collections.Generic.List[object]]::new() if ($UseIndex) { # ── Index fast path — single file read ─────────────────────────────── try { $Index = Get-Content -Raw -Path $IndexPath | ConvertFrom-Json } catch { Write-Verbose "Failed to parse index — falling back to folder scan: $_" $UseIndex = $false } } if ($UseIndex) { $TodayStr = if ($Today) { Get-Date -Format 'yyyy-MM-dd' } else { $null } foreach ($Entry in $Index.sources) { $Props = $Entry.PSObject.Properties # --- Filters --- if ($DocId -and $Entry.id -notlike $DocId) { continue } if ($Title) { $SrcTitle = if ($Props['title']) { $Entry.title } else { $null } if (-not $SrcTitle) { continue } $TitleMatch = $false foreach ($Pattern in $Title) { if ($SrcTitle -like $Pattern) { $TitleMatch = $true; break } } if (-not $TitleMatch) { continue } } if ($Pov) { $PovArr = if ($Props['pov_tags']) { @($Entry.pov_tags) } else { @() } if ($PovArr -notcontains $Pov) { continue } } if ($Topic) { $TopicArr = if ($Props['topic_tags']) { @($Entry.topic_tags) } else { @() } if ($TopicArr -notcontains $Topic) { continue } } if ($Status) { $SumStatus = if ($Props['summary_status']) { $Entry.summary_status } else { $null } if ($SumStatus -ne $Status) { continue } } if ($SourceType) { $SrcType = if ($Props['source_type']) { $Entry.source_type } else { $null } if ($SrcType -ne $SourceType) { continue } } if ($Today) { $Ingested = if ($Props['date_ingested']) { $Entry.date_ingested } else { $null } if ($Ingested -ne $TodayStr) { continue } } # Build claims-by-pov from index $ClaimsPov = [PSCustomObject]@{ Accelerationist = 0; Safetyist = 0; Skeptic = 0; Situations = 0 } if ($Props['claims_by_pov'] -and $Entry.claims_by_pov) { $Cbp = $Entry.claims_by_pov $CbpProps = $Cbp.PSObject.Properties $ClaimsPov.Accelerationist = if ($CbpProps['accelerationist']) { [int]$Cbp.accelerationist } else { 0 } $ClaimsPov.Safetyist = if ($CbpProps['safetyist']) { [int]$Cbp.safetyist } else { 0 } $ClaimsPov.Skeptic = if ($CbpProps['skeptic']) { [int]$Cbp.skeptic } else { 0 } $ClaimsPov.Situations = if ($CbpProps['situations']) { [int]$Cbp.situations } else { 0 } } $DocDir = Join-Path $SourcesDir $Entry.id $Src = [PSCustomObject]@{ PSTypeName = 'AITSource' Id = $Entry.id Title = if ($Props['title']) { $Entry.title } else { $null } Url = $null Authors = @() DatePublished = if ($Props['date_published']) { $Entry.date_published } else { $null } DateIngested = if ($Props['date_ingested']) { $Entry.date_ingested } else { $null } ImportTime = $null SourceTime = $null SourceType = if ($Props['source_type']) { $Entry.source_type } else { $null } PovTags = if ($Props['pov_tags']) { @($Entry.pov_tags) } else { @() } TopicTags = if ($Props['topic_tags']) { @($Entry.topic_tags) } else { @() } RolodexAuthorIds = @() ArchiveStatus = $null SummaryVersion = $null SummaryStatus = if ($Props['summary_status']) { $Entry.summary_status } else { $null } SummaryUpdated = $null OneLiner = if ($Props['one_liner']) { $Entry.one_liner } else { $null } Provenance = @() ProvenanceStatus = $null ResolvedUrl = $null MDPath = $null Directory = $DocDir TotalClaims = if ($Props['total_claims']) { [int]$Entry.total_claims } else { 0 } ClaimsByPov = $ClaimsPov TotalFacts = if ($Props['total_facts']) { [int]$Entry.total_facts } else { 0 } UnmappedConcepts = if ($Props['unmapped_concepts']) { [int]$Entry.unmapped_concepts } else { 0 } ModelInfo = $null } $Results.Add($Src) } } else { # ── Full scan fallback — reads metadata.json + summary for each source ─ $Folders = Get-ChildItem -Path $SourcesDir -Directory if ($Folders.Count -eq 0) { Write-Warning "No source folders found in $SourcesDir" return } $SummariesDir = Get-SummariesDir foreach ($Folder in $Folders) { $MetaPath = Join-Path $Folder.FullName 'metadata.json' if (-not (Test-Path $MetaPath)) { continue } try { $Meta = Get-Content -Raw -Path $MetaPath | ConvertFrom-Json } catch { Write-Warning "Failed to parse ${MetaPath}: $_" continue } # Safe property accessor for metadata that may lack optional fields $Props = $Meta.PSObject.Properties # --- Filters --- if ($DocId -and $Meta.id -notlike $DocId) { continue } if ($Title) { if ($Props['title']) { $SrcTitle = $Meta.title } else { $SrcTitle = $null } if (-not $SrcTitle) { continue } $TitleMatch = $false foreach ($Pattern in $Title) { if ($SrcTitle -like $Pattern) { $TitleMatch = $true; break } } if (-not $TitleMatch) { continue } } if ($Pov) { if ($Props['pov_tags']) { $PovArr = $Meta.pov_tags } else { $PovArr = @() } if ($PovArr -notcontains $Pov) { continue } } if ($Topic) { if ($Props['topic_tags']) { $TopicArr = $Meta.topic_tags } else { $TopicArr = @() } if ($TopicArr -notcontains $Topic) { continue } } if ($Status) { if ($Props['summary_status']) { $SumStatus = $Meta.summary_status } else { $SumStatus = $null } if ($SumStatus -ne $Status) { continue } } if ($SourceType) { if ($Props['source_type']) { $SrcType = $Meta.source_type } else { $SrcType = $null } if ($SrcType -ne $SourceType) { continue } } if ($Today) { if ($Props['date_ingested']) { $Ingested = $Meta.date_ingested } else { $Ingested = $null } if ($Ingested -ne (Get-Date -Format 'yyyy-MM-dd')) { continue } } # Build snapshot.md path $SnapshotPath = Join-Path $Folder.FullName 'snapshot.md' if (Test-Path $SnapshotPath) { $MDPath = $SnapshotPath } else { $MDPath = $null } # Load summary file (needed for ModelInfo and fallback stats) $Summary = $null $SummaryPath = Join-Path $SummariesDir "$($Meta.id).json" if (Test-Path $SummaryPath) { try { $Summary = Get-Content -Raw -Path $SummaryPath | ConvertFrom-Json } catch { Write-Verbose "Could not parse summary for $($Meta.id): $($_.Exception.Message)" } } # Load summary statistics — prefer cached values in metadata, fall back to summary file $TotalClaims = 0 $ClaimsPov = [PSCustomObject]@{ Accelerationist = 0; Safetyist = 0; Skeptic = 0; Situations = 0 } $TotalFacts = 0 $UnmappedConcepts = 0 if ($Props['total_claims']) { # Stats cached in metadata (written by Invoke-POVSummary) $TotalClaims = [int]$Meta.total_claims if ($Props['total_facts']) { $TotalFacts = [int]$Meta.total_facts } else { $TotalFacts = 0 } if ($Props['unmapped_concepts'] -and $Meta.unmapped_concepts -is [int]) { $UnmappedConcepts = [int]$Meta.unmapped_concepts } else { $UnmappedConcepts = 0 } if ($Props['claims_by_pov'] -and $Meta.claims_by_pov) { $Cbp = $Meta.claims_by_pov $CbpProps = $Cbp.PSObject.Properties $ClaimsPov.Accelerationist = if ($CbpProps['accelerationist']) { [int]$Cbp.accelerationist } else { 0 } $ClaimsPov.Safetyist = if ($CbpProps['safetyist']) { [int]$Cbp.safetyist } else { 0 } $ClaimsPov.Skeptic = if ($CbpProps['skeptic']) { [int]$Cbp.skeptic } else { 0 } $ClaimsPov.Situations = if ($CbpProps['situations']) { [int]$Cbp.situations } else { 0 } } } elseif ($null -ne $Summary) { # Fall back to computing from summary file if ($Summary.factual_claims) { $TotalClaims = @($Summary.factual_claims).Count } foreach ($Claim in @($Summary.factual_claims)) { if (-not $Claim.PSObject.Properties['linked_taxonomy_nodes']) { continue } $Nodes = @($Claim.linked_taxonomy_nodes) if ($Nodes.Count -eq 0) { continue } foreach ($NodeId in $Nodes) { if ($NodeId -like 'acc-*') { $ClaimsPov.Accelerationist++ } elseif ($NodeId -like 'saf-*') { $ClaimsPov.Safetyist++ } elseif ($NodeId -like 'skp-*') { $ClaimsPov.Skeptic++ } elseif ($NodeId -like 'sit-*') { $ClaimsPov.Situations++ } } } foreach ($Pov_ in @('accelerationist', 'safetyist', 'skeptic')) { $PovData = $Summary.pov_summaries.$Pov_ if ($PovData -and $PovData.key_points) { $TotalFacts += @($PovData.key_points).Count } } if ($Summary.unmapped_concepts) { $UnmappedConcepts = @($Summary.unmapped_concepts).Count } } # Hydrate ModelInfo from summary's model_info or legacy ai_model field $MInfo = $null if ($null -ne $Summary) { $MInfo = [PSCustomObject]@{ Model = $null; Temperature = 0; MaxTokens = 0; ExtractionMode = $null TaxonomyFilter = $null; TaxonomyNodes = 0; FireConfidenceThreshold = 0 Chunked = $false; ChunkCount = 0; FireStats = $null } $SP = $Summary.PSObject.Properties if ($SP['model_info']) { $Mi = $Summary.model_info $Mp = $Mi.PSObject.Properties $MInfo.Model = if ($Mp['model']) { $Mi.model } else { $null } $MInfo.Temperature = if ($Mp['temperature']) { $Mi.temperature } else { 0 } $MInfo.MaxTokens = if ($Mp['max_tokens']) { $Mi.max_tokens } else { 0 } $MInfo.ExtractionMode = if ($Mp['extraction_mode']) { $Mi.extraction_mode } else { $null } $MInfo.TaxonomyFilter = if ($Mp['taxonomy_filter']) { $Mi.taxonomy_filter } else { $null } $MInfo.TaxonomyNodes = if ($Mp['taxonomy_nodes']) { $Mi.taxonomy_nodes } else { 0 } $MInfo.FireConfidenceThreshold = if ($Mp['fire_confidence_threshold']) { $Mi.fire_confidence_threshold } else { 0 } $MInfo.Chunked = if ($Mp['chunked']) { $Mi.chunked } else { $false } $MInfo.ChunkCount = if ($Mp['chunk_count']) { $Mi.chunk_count } else { 0 } $MInfo.FireStats = if ($Mp['fire_stats']) { $Mi.fire_stats } else { $null } } elseif ($SP['ai_model']) { # Legacy format $MInfo.Model = $Summary.ai_model $MInfo.Temperature = if ($SP['temperature']) { $Summary.temperature } else { 0 } } } $Src = [PSCustomObject]@{ PSTypeName = 'AITSource' Id = $Meta.id Title = if ($Props['title']) { $Meta.title } else { $null } Url = if ($Props['url']) { $Meta.url } else { $null } Authors = if ($Props['authors']) { $Meta.authors } else { @() } DatePublished = if ($Props['date_published']) { $Meta.date_published } else { $null } DateIngested = if ($Props['date_ingested']) { $Meta.date_ingested } else { $null } ImportTime = if ($Props['import_time']) { $Meta.import_time } else { $null } SourceTime = if ($Props['source_time']) { $Meta.source_time } else { $null } SourceType = if ($Props['source_type']) { $Meta.source_type } else { $null } PovTags = if ($Props['pov_tags']) { $Meta.pov_tags } else { @() } TopicTags = if ($Props['topic_tags']) { $Meta.topic_tags } else { @() } RolodexAuthorIds = if ($Props['rolodex_author_ids']) { $Meta.rolodex_author_ids } else { @() } ArchiveStatus = if ($Props['archive_status']) { $Meta.archive_status } else { $null } SummaryVersion = if ($Props['summary_version']) { $Meta.summary_version } else { $null } SummaryStatus = if ($Props['summary_status']) { $Meta.summary_status } else { $null } SummaryUpdated = if ($Props['summary_updated']) { $Meta.summary_updated } else { $null } OneLiner = if ($Props['one_liner']) { $Meta.one_liner } else { $null } Provenance = if ($Props['provenance']) { @($Meta.provenance) } else { @() } ProvenanceStatus = if ($Props['provenance_status']) { $Meta.provenance_status } else { $null } ResolvedUrl = if ($Props['resolved_url']) { $Meta.resolved_url } else { $null } MDPath = $MDPath Directory = $Folder.FullName TotalClaims = $TotalClaims ClaimsByPov = $ClaimsPov TotalFacts = $TotalFacts UnmappedConcepts = $UnmappedConcepts ModelInfo = $MInfo } $Results.Add($Src) } } if ($Results.Count -eq 0) { Write-Warning 'No sources matched the specified filters.' return } $Results | Sort-Object { [datetime]$d = [datetime]::MinValue if ($_.DatePublished -and [datetime]::TryParse([string]$_.DatePublished, [ref]$d)) { $d } else { [datetime]::MinValue } } -Descending } |