Public/Export-TaxonomyToGraph.ps1

# Copyright (c) 2026 Jeffrey Snover. All rights reserved.
# Licensed under the MIT License. See LICENSE file in the project root.

function Export-TaxonomyToGraph {
    <#
    .SYNOPSIS
        Exports the taxonomy graph to a Neo4j database for visualization and Cypher queries.
    .DESCRIPTION
        Reads all taxonomy JSON files, edges.json, summaries, and conflicts,
        then creates/updates nodes and relationships in a Neo4j instance.

        The Neo4j database is a read-only derived view — all edits happen in
        the JSON files, and the database is rebuilt on each export.

        Requires a running Neo4j instance (see Install-GraphDatabase).
    .PARAMETER Full
        Rebuild the entire graph from scratch (clears existing data first).
    .PARAMETER IncludeEmbeddings
        Include embedding vectors as node properties for graph-native similarity queries.
    .PARAMETER Uri
        Neo4j Bolt URI. Default: bolt://localhost:7687.
    .PARAMETER Credential
        PSCredential for Neo4j authentication. If omitted, uses neo4j/neo4j default.
    .PARAMETER RepoRoot
        Path to the repository root.
    .EXAMPLE
        Export-TaxonomyToGraph -Full
    .EXAMPLE
        Export-TaxonomyToGraph -Full -IncludeEmbeddings
    .EXAMPLE
        Export-TaxonomyToGraph -Uri "bolt://localhost:7687" -Credential (Get-Credential)
    #>

    [CmdletBinding(SupportsShouldProcess)]
    param(
        [switch]$Full,

        [switch]$IncludeEmbeddings,

        [string]$Uri = 'bolt://localhost:7687',

        [PSCredential]$Credential,

        [string]$RepoRoot = $script:RepoRoot
    )

    Set-StrictMode -Version Latest

    # ── Step 1: Check Neo4j connectivity ──
    Write-Step 'Checking Neo4j connection'

    # Derive HTTP API endpoint from bolt URI
    $HttpUri = $Uri -replace 'bolt://', 'http://' -replace ':7687', ':7474'
    $AuthHeader = @{}

    if ($Credential) {
        $Pair = "$($Credential.UserName):$($Credential.GetNetworkCredential().Password)"
    } else {
        $Neo4jPwd = if ($env:NEO4J_PASSWORD) { $env:NEO4J_PASSWORD } else { 'aitriad2026' }
        $Pair = "neo4j:$Neo4jPwd"
    }
    $Bytes = [System.Text.Encoding]::ASCII.GetBytes($Pair)
    $AuthHeader['Authorization'] = "Basic $([Convert]::ToBase64String($Bytes))"

    # Helper to run Cypher via HTTP API
    function Invoke-Cypher {
        param([string]$Query, [hashtable]$Parameters = @{})

        $Body = @{
            statements = @(
                @{
                    statement  = $Query
                    parameters = $Parameters
                }
            )
        } | ConvertTo-Json -Depth 10

        $Response = Invoke-RestMethod `
            -Uri "$HttpUri/db/neo4j/tx/commit" `
            -Method POST `
            -ContentType 'application/json' `
            -Headers $AuthHeader `
            -Body $Body `
            -ErrorAction Stop

        if ($Response.errors -and $Response.errors.Count -gt 0) {
            $ErrMsg = ($Response.errors | ForEach-Object { $_.message }) -join '; '
            throw "Cypher error: $ErrMsg"
        }

        return $Response
    }

    try {
        $null = Invoke-Cypher -Query 'RETURN 1 AS test'
        Write-OK "Connected to Neo4j at $Uri"
    } catch {
        Write-Fail "Cannot connect to Neo4j at $Uri — $_"
        Write-Info 'Run Install-GraphDatabase to set up Neo4j, or ensure it is running.'
        return
    }

    # ── Step 2: Clear database if Full ──
    if ($Full) {
        if ($PSCmdlet.ShouldProcess('Neo4j database', 'Clear all nodes and relationships')) {
            Write-Step 'Clearing existing graph data'
            $null = Invoke-Cypher -Query 'MATCH (n) DETACH DELETE n'
            Write-OK 'Database cleared'
        }
    }

    # ── Step 3: Create constraints and indexes ──
    Write-Step 'Creating indexes'
    $IndexQueries = @(
        'CREATE CONSTRAINT IF NOT EXISTS FOR (n:TaxonomyNode) REQUIRE n.id IS UNIQUE'
        'CREATE CONSTRAINT IF NOT EXISTS FOR (c:Conflict) REQUIRE c.claim_id IS UNIQUE'
        'CREATE CONSTRAINT IF NOT EXISTS FOR (s:Source) REQUIRE s.doc_id IS UNIQUE'
        'CREATE INDEX IF NOT EXISTS FOR (n:TaxonomyNode) ON (n.pov)'
        'CREATE INDEX IF NOT EXISTS FOR (n:TaxonomyNode) ON (n.category)'
    )
    foreach ($Q in $IndexQueries) {
        try { $null = Invoke-Cypher -Query $Q } catch { Write-Warn "Index: $_" }
    }
    Write-OK 'Indexes ready'

    # ── Step 4: Load and export taxonomy nodes ──
    Write-Step 'Exporting taxonomy nodes'
    $TaxDir = Get-TaxonomyDir
    $PovFiles = @('accelerationist', 'safetyist', 'skeptic', 'cross-cutting')
    $NodeCount = 0

    foreach ($PovKey in $PovFiles) {
        $FilePath = Join-Path $TaxDir "$PovKey.json"
        if (-not (Test-Path $FilePath)) { continue }

        $FileData = Get-Content -Raw -Path $FilePath | ConvertFrom-Json
        foreach ($Node in $FileData.nodes) {
            $Props = @{
                id          = $Node.id
                pov         = $PovKey
                label       = $Node.label
                description = $Node.description
            }
            if ($Node.PSObject.Properties['category']) {
                $Props['category'] = $Node.category
            }

            # Flatten graph_attributes into properties
            if ($Node.PSObject.Properties['graph_attributes']) {
                $Attrs = $Node.graph_attributes
                foreach ($Prop in $Attrs.PSObject.Properties) {
                    $Val = $Prop.Value
                    # Convert arrays to JSON strings for Neo4j compatibility
                    if ($Val -is [System.Array] -or $Val -is [System.Collections.IEnumerable] -and $Val -isnot [string]) {
                        $Val = ($Val | ConvertTo-Json -Compress)
                    }
                    $Props["attr_$($Prop.Name)"] = "$Val"
                }
            }

            $SetClauses = ($Props.Keys | ForEach-Object { "n.$_ = `$$_" }) -join ', '
            $Query = "MERGE (n:TaxonomyNode {id: `$id}) SET $SetClauses"

            # Add POV label
            $PovLabel = switch ($PovKey) {
                'accelerationist' { 'Accelerationist' }
                'safetyist'       { 'Safetyist' }
                'skeptic'         { 'Skeptic' }
                'cross-cutting'   { 'CrossCutting' }
            }
            $Query += ", n:$PovLabel"

            $null = Invoke-Cypher -Query $Query -Parameters $Props
            $NodeCount++
        }
    }
    Write-OK "Exported $NodeCount taxonomy nodes"

    # ── Step 5: Export edges ──
    Write-Step 'Exporting edges'
    $EdgesPath = Join-Path $TaxDir 'edges.json'
    $EdgeCount = 0
    $EdgeFailCount = 0

    if (Test-Path $EdgesPath) {
        $EdgesData = Get-Content -Raw -Path $EdgesPath | ConvertFrom-Json

        foreach ($Edge in $EdgesData.edges) {
            $EdgeProps = @{
                source_id   = $Edge.source
                target_id   = $Edge.target
                confidence  = [double]$Edge.confidence
                status      = $Edge.status
            }
            if ($Edge.PSObject.Properties['rationale'] -and $Edge.rationale) {
                $EdgeProps['rationale'] = $Edge.rationale
            }
            if ($Edge.PSObject.Properties['strength'] -and $Edge.strength) {
                $EdgeProps['strength'] = "$($Edge.strength)"
            }
            if ($Edge.PSObject.Properties['bidirectional']) {
                $EdgeProps['bidirectional'] = [bool]$Edge.bidirectional
            }
            if ($Edge.PSObject.Properties['discovered_at']) {
                $EdgeProps['discovered_at'] = $Edge.discovered_at
            }

            $SetParts = ($EdgeProps.Keys | Where-Object { $_ -notin 'source_id', 'target_id' } |
                ForEach-Object { "r.$_ = `$$_" }) -join ', '

            $Query = @"
MATCH (a:TaxonomyNode {id: `$source_id})
MATCH (b:TaxonomyNode {id: `$target_id})
MERGE (a)-[r:$($Edge.type)]->(b)
SET $SetParts
"@

            try {
                $null = Invoke-Cypher -Query $Query -Parameters $EdgeProps
                $EdgeCount++
            } catch {
                $EdgeFailCount++
                Write-Warn "Edge $($Edge.source) → $($Edge.target): $_"
            }
        }
    }
    if ($EdgeFailCount -gt 0) {
        Write-Warn "Exported $EdgeCount edges ($EdgeFailCount failed)"
    } else {
        Write-OK "Exported $EdgeCount edges"
    }

    # ── Step 6: Export conflicts ──
    Write-Step 'Exporting conflicts'
    $ConflictDir = Get-ConflictsDir
    $ConflictCount = 0
    $ConflictFailCount = 0

    if (Test-Path $ConflictDir) {
        foreach ($File in Get-ChildItem -Path $ConflictDir -Filter '*.json' -File) {
            try {
                $Conflict = Get-Content -Raw -Path $File.FullName | ConvertFrom-Json

                $ConflictProps = @{
                    claim_id       = $Conflict.claim_id
                    claim_label    = $Conflict.claim_label
                    description    = $Conflict.description
                    status         = $Conflict.status
                    instance_count = @($Conflict.instances).Count
                }

                $SetClauses = ($ConflictProps.Keys | ForEach-Object { "c.$_ = `$$_" }) -join ', '
                $Query = "MERGE (c:Conflict {claim_id: `$claim_id}) SET $SetClauses"
                $null = Invoke-Cypher -Query $Query -Parameters $ConflictProps

                # Link to taxonomy nodes
                if ($Conflict.PSObject.Properties['linked_taxonomy_nodes']) {
                    foreach ($NId in $Conflict.linked_taxonomy_nodes) {
                        $LinkQuery = @"
MATCH (c:Conflict {claim_id: `$claim_id})
MATCH (n:TaxonomyNode {id: `$node_id})
MERGE (c)-[:LINKED_TO]->(n)
"@

                        $null = Invoke-Cypher -Query $LinkQuery -Parameters @{
                            claim_id = $Conflict.claim_id
                            node_id  = $NId
                        }
                    }
                }

                $ConflictCount++
            } catch {
                $ConflictFailCount++
                Write-Warn "Conflict $($File.Name): $_"
            }
        }
    }
    if ($ConflictFailCount -gt 0) {
        Write-Warn "Exported $ConflictCount conflicts ($ConflictFailCount failed)"
    } else {
        Write-OK "Exported $ConflictCount conflicts"
    }

    # ── Step 7: Export embeddings (optional) ──
    if ($IncludeEmbeddings) {
        Write-Step 'Exporting embeddings'
        $EmbPath = Join-Path $TaxDir 'embeddings.json'
        $EmbCount = 0

        if (Test-Path $EmbPath) {
            $EmbData = Get-Content -Raw -Path $EmbPath | ConvertFrom-Json
            foreach ($Entry in $EmbData.PSObject.Properties) {
                $NodeId = $Entry.Name
                $Vector = @($Entry.Value)
                if ($Vector.Count -gt 0) {
                    $Query = 'MATCH (n:TaxonomyNode {id: $id}) SET n.embedding = $vector'
                    try {
                        $null = Invoke-Cypher -Query $Query -Parameters @{ id = $NodeId; vector = $Vector }
                        $EmbCount++
                    } catch {
                        Write-Warn "Embedding for $NodeId : $_"
                    }
                }
            }
        }
        Write-OK "Exported $EmbCount embeddings"
    }

    # ── Summary ──
    Write-Host ''
    Write-Host '=== Graph Export Complete ===' -ForegroundColor Cyan
    Write-Host " Taxonomy nodes: $NodeCount" -ForegroundColor Green
    Write-Host " Edges: $EdgeCount" -ForegroundColor Green
    Write-Host " Conflicts: $ConflictCount" -ForegroundColor Green
    Write-Host " Neo4j URI: $Uri" -ForegroundColor Cyan
    Write-Host ''
    Write-Host "Open Neo4j Browser at $($HttpUri -replace ':7474', ':7474/browser/') to explore." -ForegroundColor DarkGray
    Write-Host ''
}