Public/Get-AITSource.ps1

# Copyright (c) 2026 Jeffrey Snover. All rights reserved.
# Licensed under the MIT License. See LICENSE file in the project root.

function Get-AITSource {
    <#
    .SYNOPSIS
        Lists and filters source documents in the repository.
    .DESCRIPTION
        Enumerates all source folders under sources/ by reading each
        metadata.json file. Supports filtering by document ID (wildcard),
        POV tag, topic tag, summary status, and source type.

        Default output (no parameters) lists all sources sorted by
        DatePublished descending.
    .PARAMETER DocId
        Wildcard pattern matched against the source document ID.
    .PARAMETER Title
        One or more wildcard patterns matched against the source title.
        A source matches if its title matches any of the supplied patterns.
    .PARAMETER Pov
        Filter to sources whose pov_tags contain this value.
    .PARAMETER Topic
        Filter to sources whose topic_tags contain this value.
    .PARAMETER Status
        Filter to sources with this exact summary_status.
    .PARAMETER SourceType
        Filter to sources with this exact source_type.
    .EXAMPLE
        Get-AITSource
        # Lists all sources sorted by date.
    .EXAMPLE
        Get-AITSource '*china*'
        # Sources whose ID matches *china*.
    .EXAMPLE
        Get-AITSource -Pov safetyist
        # Sources tagged with the safetyist POV.
    .EXAMPLE
        Get-AITSource -Title '*alignment*'
        # Sources whose title matches *alignment*.
    .EXAMPLE
        Get-AITSource -Title '*safety*', '*risk*'
        # Sources whose title matches either pattern.
    .PARAMETER Today
        Return only sources whose date_ingested is today.
    .EXAMPLE
        Get-AITSource -Status pending
        # Sources whose summary is pending.
    .EXAMPLE
        Get-AITSource -Today
        # Sources ingested today.
    #>

    [CmdletBinding()]
    [OutputType('AITSource')]
    param(
        [Parameter(Position = 0)]
        [string]$DocId,

        [string[]]$Title,

        [string]$Pov,

        [string]$Topic,

        [string]$Status,

        [string]$SourceType,

        [switch]$Today
    )

    Set-StrictMode -Version Latest
    $ErrorActionPreference = 'Stop'

    $SourcesDir = Get-SourcesDir

    if (-not (Test-Path $SourcesDir)) {
        Write-Warning "Sources directory not found: $SourcesDir"
        return
    }

    $Folders = Get-ChildItem -Path $SourcesDir -Directory
    if ($Folders.Count -eq 0) {
        Write-Warning "No source folders found in $SourcesDir"
        return
    }

    $SummariesDir = Get-SummariesDir
    $Results = [System.Collections.Generic.List[AITSource]]::new()

    foreach ($Folder in $Folders) {
        $MetaPath = Join-Path $Folder.FullName 'metadata.json'
        if (-not (Test-Path $MetaPath)) { continue }

        try {
            $Meta = Get-Content -Raw -Path $MetaPath | ConvertFrom-Json
        }
        catch {
            Write-Warning "Failed to parse ${MetaPath}: $_"
            continue
        }

        # Safe property accessor for metadata that may lack optional fields
        $Props = $Meta.PSObject.Properties

        # --- Filters ---
        if ($DocId -and $Meta.id -notlike $DocId) { continue }
        if ($Title) {
            if ($Props['title']) { $SrcTitle = $Meta.title } else { $SrcTitle = $null }
            if (-not $SrcTitle) { continue }
            $TitleMatch = $false
            foreach ($Pattern in $Title) {
                if ($SrcTitle -like $Pattern) { $TitleMatch = $true; break }
            }
            if (-not $TitleMatch) { continue }
        }
        if ($Pov) {
            if ($Props['pov_tags']) { $PovArr = $Meta.pov_tags } else { $PovArr = @() }
            if ($PovArr -notcontains $Pov) { continue }
        }
        if ($Topic) {
            if ($Props['topic_tags']) { $TopicArr = $Meta.topic_tags } else { $TopicArr = @() }
            if ($TopicArr -notcontains $Topic) { continue }
        }
        if ($Status) {
            if ($Props['summary_status']) { $SumStatus = $Meta.summary_status } else { $SumStatus = $null }
            if ($SumStatus -ne $Status) { continue }
        }
        if ($SourceType) {
            if ($Props['source_type']) { $SrcType = $Meta.source_type } else { $SrcType = $null }
            if ($SrcType -ne $SourceType) { continue }
        }
        if ($Today) {
            if ($Props['date_ingested']) { $Ingested = $Meta.date_ingested } else { $Ingested = $null }
            if ($Ingested -ne (Get-Date -Format 'yyyy-MM-dd')) { continue }
        }

        # Build snapshot.md path
        $SnapshotPath = Join-Path $Folder.FullName 'snapshot.md'
        if (Test-Path $SnapshotPath) { $MDPath = $SnapshotPath } else { $MDPath = $null }

        # Load summary file (needed for ModelInfo and fallback stats)
        $Summary     = $null
        $SummaryPath = Join-Path $SummariesDir "$($Meta.id).json"
        if (Test-Path $SummaryPath) {
            try {
                $Summary = Get-Content -Raw -Path $SummaryPath | ConvertFrom-Json
            }
            catch {
                Write-Verbose "Could not parse summary for $($Meta.id): $($_.Exception.Message)"
            }
        }

        # Load summary statistics — prefer cached values in metadata, fall back to summary file
        $TotalClaims      = 0
        $ClaimsPov        = [ClaimsByPov]::new()
        $TotalFacts       = 0
        $UnmappedConcepts = 0

        if ($Props['total_claims']) {
            # Stats cached in metadata (written by Invoke-POVSummary)
            $TotalClaims      = [int]$Meta.total_claims
            if ($Props['total_facts']) { $TotalFacts = [int]$Meta.total_facts } else { $TotalFacts = 0 }
            if ($Props['unmapped_concepts'] -and $Meta.unmapped_concepts -is [int]) { $UnmappedConcepts = [int]$Meta.unmapped_concepts } else { $UnmappedConcepts = 0 }
            if ($Props['claims_by_pov'] -and $Meta.claims_by_pov) {
                $Cbp = $Meta.claims_by_pov
                $CbpProps = $Cbp.PSObject.Properties
                $ClaimsPov.Accelerationist = if ($CbpProps['accelerationist']) { [int]$Cbp.accelerationist } else { 0 }
                $ClaimsPov.Safetyist       = if ($CbpProps['safetyist'])       { [int]$Cbp.safetyist }       else { 0 }
                $ClaimsPov.Skeptic         = if ($CbpProps['skeptic'])         { [int]$Cbp.skeptic }         else { 0 }
                $ClaimsPov.Situations      = if ($CbpProps['situations'])      { [int]$Cbp.situations }      else { 0 }
            }
        }
        elseif ($null -ne $Summary) {
            # Fall back to computing from summary file
            if ($Summary.factual_claims) {
                $TotalClaims = @($Summary.factual_claims).Count
            }

            foreach ($Claim in @($Summary.factual_claims)) {
                if (-not $Claim.PSObject.Properties['linked_taxonomy_nodes']) { continue }
                $Nodes = @($Claim.linked_taxonomy_nodes)
                if ($Nodes.Count -eq 0) { continue }
                foreach ($NodeId in $Nodes) {
                    if     ($NodeId -like 'acc-*') { $ClaimsPov.Accelerationist++ }
                    elseif ($NodeId -like 'saf-*') { $ClaimsPov.Safetyist++ }
                    elseif ($NodeId -like 'skp-*') { $ClaimsPov.Skeptic++ }
                    elseif ($NodeId -like 'sit-*') { $ClaimsPov.Situations++ }
                }
            }

            foreach ($Pov_ in @('accelerationist', 'safetyist', 'skeptic')) {
                $PovData = $Summary.pov_summaries.$Pov_
                if ($PovData -and $PovData.key_points) {
                    $TotalFacts += @($PovData.key_points).Count
                }
            }

            if ($Summary.unmapped_concepts) {
                $UnmappedConcepts = @($Summary.unmapped_concepts).Count
            }
        }

        # Hydrate ModelInfo from summary's model_info or legacy ai_model field
        $MInfo = $null
        if ($null -ne $Summary) {
            $MInfo = [AITModelInfo]::new()
            $SP = $Summary.PSObject.Properties
            if ($SP['model_info']) {
                $Mi = $Summary.model_info
                $Mp = $Mi.PSObject.Properties
                $MInfo.Model                  = if ($Mp['model'])                    { $Mi.model }                    else { $null }
                $MInfo.Temperature            = if ($Mp['temperature'])              { $Mi.temperature }              else { 0 }
                $MInfo.MaxTokens              = if ($Mp['max_tokens'])               { $Mi.max_tokens }               else { 0 }
                $MInfo.ExtractionMode         = if ($Mp['extraction_mode'])          { $Mi.extraction_mode }          else { $null }
                $MInfo.TaxonomyFilter         = if ($Mp['taxonomy_filter'])          { $Mi.taxonomy_filter }          else { $null }
                $MInfo.TaxonomyNodes          = if ($Mp['taxonomy_nodes'])           { $Mi.taxonomy_nodes }           else { 0 }
                $MInfo.FireConfidenceThreshold = if ($Mp['fire_confidence_threshold']) { $Mi.fire_confidence_threshold } else { 0 }
                $MInfo.Chunked                = if ($Mp['chunked'])                  { $Mi.chunked }                  else { $false }
                $MInfo.ChunkCount             = if ($Mp['chunk_count'])              { $Mi.chunk_count }              else { 0 }
                $MInfo.FireStats              = if ($Mp['fire_stats'])               { $Mi.fire_stats }               else { $null }
            }
            elseif ($SP['ai_model']) {
                # Legacy format
                $MInfo.Model       = $Summary.ai_model
                $MInfo.Temperature = if ($SP['temperature']) { $Summary.temperature } else { 0 }
            }
        }

        $Src                   = [AITSource]::new()
        $Src.Id                = $Meta.id
        $Src.Title             = if ($Props['title'])            { $Meta.title }            else { $null }
        $Src.Url               = if ($Props['url'])              { $Meta.url }              else { $null }
        $Src.Authors           = if ($Props['authors'])          { $Meta.authors }          else { @() }
        $Src.DatePublished     = if ($Props['date_published'])   { $Meta.date_published }   else { $null }
        $Src.DateIngested      = if ($Props['date_ingested'])    { $Meta.date_ingested }    else { $null }
        $Src.ImportTime        = if ($Props['import_time'])      { $Meta.import_time }      else { $null }
        $Src.SourceTime        = if ($Props['source_time'])      { $Meta.source_time }      else { $null }
        $Src.SourceType        = if ($Props['source_type'])      { $Meta.source_type }      else { $null }
        $Src.PovTags           = if ($Props['pov_tags'])         { $Meta.pov_tags }         else { @() }
        $Src.TopicTags         = if ($Props['topic_tags'])       { $Meta.topic_tags }       else { @() }
        $Src.RolodexAuthorIds  = if ($Props['rolodex_author_ids']) { $Meta.rolodex_author_ids } else { @() }
        $Src.ArchiveStatus     = if ($Props['archive_status'])   { $Meta.archive_status }   else { $null }
        $Src.SummaryVersion    = if ($Props['summary_version'])  { $Meta.summary_version }  else { $null }
        $Src.SummaryStatus     = if ($Props['summary_status'])   { $Meta.summary_status }   else { $null }
        $Src.SummaryUpdated    = if ($Props['summary_updated'])  { $Meta.summary_updated }  else { $null }
        $Src.OneLiner          = if ($Props['one_liner'])        { $Meta.one_liner }        else { $null }
        $Src.MDPath            = $MDPath
        $Src.Directory         = $Folder.FullName
        $Src.TotalClaims       = $TotalClaims
        $Src.ClaimsByPov       = $ClaimsPov
        $Src.TotalFacts        = $TotalFacts
        $Src.UnmappedConcepts  = $UnmappedConcepts
        $Src.ModelInfo         = $MInfo

        $Results.Add($Src)
    }

    if ($Results.Count -eq 0) {
        Write-Warning 'No sources matched the specified filters.'
        return
    }

    $Results | Sort-Object { if ($_.DatePublished) { [datetime]$_.DatePublished } else { [datetime]::MinValue } } -Descending
}