AITriad

0.1.0

Public/Invoke-AttributeExtraction.ps1

                                # Copyright (c) 2026 Jeffrey Snover. All rights reserved.

# Licensed under the MIT License. See LICENSE file in the project root.

function Invoke-AttributeExtraction {

    <#

    .SYNOPSIS

        Uses AI to generate rich graph attributes for taxonomy nodes (Phase 1 of LAG proposal).

    .DESCRIPTION

        Reads taxonomy JSON files, sends nodes in batches to an LLM, and writes

        graph_attributes back to each node. Attributes include epistemic_type,

        rhetorical_strategy, assumes, falsifiability, audience, emotional_register,

        policy_actionability, intellectual_lineage, and steelman_vulnerability.

        Attributes are stored as a new "graph_attributes" key on each node object,

        which is backwards-compatible with all existing tooling.

        Nodes that already have graph_attributes are skipped unless -Force is specified.

    .PARAMETER POV

        Process only this POV file. If omitted, processes all POV files and cross-cutting.

        Valid values: accelerationist, safetyist, skeptic, cross-cutting.

    .PARAMETER BatchSize

        Number of nodes to process per API call. Default: 8.

    .PARAMETER Model

        AI model to use. Defaults to 'gemini-2.5-flash'.

    .PARAMETER ApiKey

        AI API key. If omitted, resolved via backend-specific env var or AI_API_KEY.

    .PARAMETER Temperature

        Sampling temperature (0.0-1.0). Default: 0.2 (precise analytical output).

    .PARAMETER DryRun

        Build and display the prompt for the first batch, but do NOT call the API.

    .PARAMETER Force

        Re-generate attributes even for nodes that already have them.

    .PARAMETER RepoRoot

        Path to the repository root. Defaults to the module-resolved repo root.

    .EXAMPLE

        Invoke-AttributeExtraction -DryRun

    .EXAMPLE

        Invoke-AttributeExtraction -POV accelerationist

    .EXAMPLE

        Invoke-AttributeExtraction -Force -Model 'gemini-2.5-pro'

    #>

    [CmdletBinding(SupportsShouldProcess)]

    param(

        [ValidateSet('accelerationist', 'safetyist', 'skeptic', 'cross-cutting')]

        [string]$POV = '',

        [ValidateRange(1, 20)]

        [int]$BatchSize = 8,

        [ValidateScript({ Test-AIModelId $_ })]

        [ArgumentCompleter({ param($cmd, $param, $word) $script:ValidModelIds | Where-Object { $_ -like "$word*" } })]

        [string]$Model = 'gemini-2.5-flash',

        [string]$ApiKey = '',

        [ValidateRange(0.0, 1.0)]

        [double]$Temperature = 0.2,

        [switch]$DryRun,

        [switch]$Force,

        [string]$RepoRoot = $script:RepoRoot

    )

    Set-StrictMode -Version Latest

    # ── Step 1: Validate environment ──

    Write-Step 'Validating environment'

    if (-not (Test-Path $RepoRoot)) {

        Write-Fail "Repo root not found: $RepoRoot"

        throw "Repo root not found"

    }

    $TaxDir = Get-TaxonomyDir

    if (-not (Test-Path $TaxDir)) {

        Write-Fail "Taxonomy directory not found: $TaxDir"

        throw "Taxonomy directory not found"

    }

    if (-not $DryRun) {

        $Backend = if     ($Model -match '^gemini') { 'gemini' }

                   elseif ($Model -match '^claude') { 'claude' }

                   elseif ($Model -match '^groq')   { 'groq'   }

                   else                             { 'gemini'  }

        $ResolvedKey = Resolve-AIApiKey -ExplicitKey $ApiKey -Backend $Backend

        if ([string]::IsNullOrWhiteSpace($ResolvedKey)) {

            Write-Fail 'No API key found. Set GEMINI_API_KEY, ANTHROPIC_API_KEY, or AI_API_KEY.'

            throw 'No API key configured'

        }

    }

    # ── Step 2: Determine which files to process ──

    $PovFiles = @('accelerationist', 'safetyist', 'skeptic', 'cross-cutting')

    if ($POV) {

        $PovFiles = @($POV)

    }

    Write-OK "Processing: $($PovFiles -join ', ')"

    # ── Step 3: Load prompts ──

    $SystemPrompt = Get-Prompt -Name 'attribute-extraction'

    $SchemaPrompt = Get-Prompt -Name 'attribute-extraction-schema'

    # ── Step 4: Process each taxonomy file ──

    $TotalProcessed = 0

    $TotalSkipped   = 0

    $TotalFailed    = 0

    foreach ($PovKey in $PovFiles) {

        $FilePath = Join-Path $TaxDir "$PovKey.json"

        if (-not (Test-Path $FilePath)) {

            Write-Warn "File not found, skipping: $FilePath"

            continue

        }

        Write-Step "Loading $PovKey"

        $FileData = Get-Content -Raw -Path $FilePath | ConvertFrom-Json

        # Identify nodes needing attributes

        $AllNodes = @($FileData.nodes)

        if ($Force) {

            $NodesToProcess = $AllNodes

        } else {

            $NodesToProcess = @($AllNodes | Where-Object {

                -not $_.PSObject.Properties['graph_attributes'] -or

                $null -eq $_.graph_attributes

            })

        }

        $AlreadyDone = $AllNodes.Count - $NodesToProcess.Count

        if ($AlreadyDone -gt 0) {

            Write-Info "$AlreadyDone nodes already have attributes (use -Force to regenerate)"

        }

        if ($NodesToProcess.Count -eq 0) {

            Write-OK "$PovKey — nothing to process"

            $TotalSkipped += $AllNodes.Count

            continue

        }

        Write-Info "$($NodesToProcess.Count) nodes to process in $PovKey"

        # ── Step 5: Process in batches ──

        $Batches = [System.Collections.Generic.List[object[]]]::new()

        for ($i = 0; $i -lt $NodesToProcess.Count; $i += $BatchSize) {

            $End = [Math]::Min($i + $BatchSize, $NodesToProcess.Count)

            $Batch = @($NodesToProcess[$i..($End - 1)])

            $Batches.Add($Batch)

        }

        Write-Info "$($Batches.Count) batch(es) of up to $BatchSize nodes"

        $BatchNum = 0

        foreach ($Batch in $Batches) {

            $BatchNum++

            $NodeIds = ($Batch | ForEach-Object { $_.id }) -join ', '

            Write-Step "Batch $BatchNum/$($Batches.Count): $NodeIds"

            # Build node context for the prompt

            $NodeContext = foreach ($Node in $Batch) {

                $Entry = [ordered]@{

                    id          = $Node.id

                    pov         = $PovKey

                    label       = $Node.label

                    description = $Node.description

                }

                if ($Node.PSObject.Properties['category']) {

                    $Entry['category'] = $Node.category

                }

                # Include interpretations for cross-cutting nodes

                if ($PovKey -eq 'cross-cutting' -and $Node.PSObject.Properties['interpretations']) {

                    $Entry['interpretations'] = $Node.interpretations

                }

                $Entry

            }

            $NodeJson = $NodeContext | ConvertTo-Json -Depth 10

            $FullPrompt = @"

$SystemPrompt

--- INPUT NODES ---

$NodeJson

$SchemaPrompt

"@

            # ── DryRun: show first batch prompt and exit ──

            if ($DryRun) {

                Write-Host ''

                Write-Host '=== PROMPT PREVIEW (first batch) ===' -ForegroundColor Cyan

                Write-Host ''

                # Show system prompt (truncated)

                $Lines = $SystemPrompt -split "`n"

                if ($Lines.Count -gt 15) {

                    Write-Host ($Lines[0..14] -join "`n") -ForegroundColor DarkGray

                    Write-Host "  ... ($($Lines.Count) total lines)" -ForegroundColor DarkGray

                } else {

                    Write-Host $SystemPrompt -ForegroundColor DarkGray

                }

                Write-Host ''

                Write-Host '--- INPUT NODES ---' -ForegroundColor Yellow

                Write-Host $NodeJson -ForegroundColor White

                Write-Host ''

                Write-Host '--- SCHEMA ---' -ForegroundColor Yellow

                Write-Host ($SchemaPrompt.Substring(0, [Math]::Min(500, $SchemaPrompt.Length))) -ForegroundColor DarkGray

                Write-Host ''

                Write-Host "Total prompt length: ~$($FullPrompt.Length) chars (~$([Math]::Round($FullPrompt.Length / 4)) tokens est.)" -ForegroundColor Cyan

                Write-Host "Nodes in this batch: $($Batch.Count)" -ForegroundColor Cyan

                Write-Host "Total batches needed: $($Batches.Count) across $($PovFiles.Count) file(s)" -ForegroundColor Cyan

                return

            }

            # ── Call AI API ──

            $Stopwatch = [System.Diagnostics.Stopwatch]::StartNew()

            try {

                $Result = Invoke-AIApi `

                    -Prompt $FullPrompt `

                    -Model $Model `

                    -ApiKey $ResolvedKey `

                    -Temperature $Temperature `

                    -MaxTokens 16384 `

                    -JsonMode `

                    -TimeoutSec 120

            } catch {

                Write-Fail "API call failed for batch $BatchNum`: $_"

                $TotalFailed += $Batch.Count

                continue

            }

            $Stopwatch.Stop()

            Write-Info "API response in $([Math]::Round($Stopwatch.Elapsed.TotalSeconds, 1))s"

            # ── Parse response ──

            $ResponseText = $Result.Text -replace '^\s*```json\s*', '' -replace '\s*```\s*$', ''

            try {

                $Attributes = $ResponseText | ConvertFrom-Json -Depth 20

            } catch {

                Write-Warn "JSON parse failed, attempting repair..."

                $Repaired = Repair-TruncatedJson -Text $ResponseText

                try {

                    $Attributes = $Repaired | ConvertFrom-Json -Depth 20

                } catch {

                    Write-Fail "Could not parse response for batch $BatchNum"

                    $TotalFailed += $Batch.Count

                    continue

                }

            }

            # ── Apply attributes to nodes ──

            foreach ($Node in $Batch) {

                $NodeId = $Node.id

                if ($Attributes.PSObject.Properties[$NodeId]) {

                    $AttrObj = $Attributes.$NodeId

                    # Validate required fields

                    $RequiredFields = @(

                        'epistemic_type', 'rhetorical_strategy', 'assumes',

                        'falsifiability', 'audience', 'emotional_register',

                        'policy_actions', 'intellectual_lineage',

                        'steelman_vulnerability', 'possible_fallacies'

                    )

                    $Missing = @($RequiredFields | Where-Object {

                        -not $AttrObj.PSObject.Properties[$_]

                    })

                    if ($Missing.Count -gt 0) {

                        Write-Warn "$NodeId`: missing fields: $($Missing -join ', ')"

                    }

                    # Find the node in the original file data and set attributes

                    $OrigNode = $FileData.nodes | Where-Object { $_.id -eq $NodeId }

                    if ($OrigNode) {

                        if ($OrigNode.PSObject.Properties['graph_attributes']) {

                            $OrigNode.graph_attributes = $AttrObj

                        } else {

                            $OrigNode | Add-Member -NotePropertyName 'graph_attributes' -NotePropertyValue $AttrObj

                        }

                        $TotalProcessed++

                        Write-OK "$NodeId"

                    }

                } else {

                    Write-Warn "$NodeId`: not found in API response"

                    $TotalFailed++

                }

            }

        }

        # ── Step 6: Write updated file ──

        if ($TotalProcessed -gt 0 -or $Force) {

            if ($PSCmdlet.ShouldProcess($FilePath, 'Write updated taxonomy file')) {

                # Update last_modified

                $FileData.last_modified = (Get-Date).ToString('yyyy-MM-dd')

                $Json = $FileData | ConvertTo-Json -Depth 20

                try {

                    Set-Content -Path $FilePath -Value $Json -Encoding UTF8

                    Write-OK "Saved $PovKey ($FilePath)"

                }

                catch {

                    Write-Fail "Failed to write $PovKey taxonomy file — $($_.Exception.Message)"

                    Write-Info "Attributes were extracted but NOT saved to disk."

                    throw

                }

            }

        }

    }

    # ── Summary ──

    Write-Host ''

    Write-Host '=== Attribute Extraction Complete ===' -ForegroundColor Cyan

    Write-Host "  Processed:  $TotalProcessed nodes" -ForegroundColor Green

    Write-Host "  Skipped:    $TotalSkipped nodes (already had attributes)" -ForegroundColor Yellow

    Write-Host "  Failed:     $TotalFailed nodes" -ForegroundColor $(if ($TotalFailed -gt 0) { 'Red' } else { 'Green' })

    Write-Host ''

}