Public/Extract-EnvironmentFacts.ps1
|
function Extract-EnvironmentFacts { <# .SYNOPSIS Sends document text to an AI LLM and receives structured, verifiable facts. .DESCRIPTION Uses the extraction prompt template to instruct an AI to analyze IT documentation and extract every verifiable claim about the environment. Returns structured fact objects that can be verified with Test-EnvironmentFacts. .PARAMETER DocumentText Raw text content from Import-Documentation. .PARAMETER SourceDocument Filename for tracking which document facts came from. .PARAMETER Provider AI provider to use. .PARAMETER ApiKey API key for the provider. Also reads from $env:LIVINGDOC_API_KEY. .PARAMETER Model Model to use. Defaults vary by provider. .PARAMETER Endpoint Custom API endpoint URL. Required for Custom provider. .PARAMETER FactsPath Path to save/append facts JSON. Default is .\facts.json. .EXAMPLE $docs = Import-Documentation -Path ".\docs\*.md" $docs | ForEach-Object { Extract-EnvironmentFacts -DocumentText $_.Content -SourceDocument $_.FileName } #> [CmdletBinding()] param( [Parameter(Mandatory, ValueFromPipeline, ValueFromPipelineByPropertyName)] [Alias('Content')] [string]$DocumentText, [Parameter(ValueFromPipelineByPropertyName)] [Alias('FileName')] [string]$SourceDocument = 'unknown', [Parameter()] [ValidateSet('Anthropic', 'OpenAI', 'Ollama', 'Custom')] [string]$Provider = 'Anthropic', [Parameter()] [string]$ApiKey, [Parameter()] [string]$Model, [Parameter()] [string]$Endpoint, [Parameter()] [string]$FactsPath = '.\facts.json' ) begin { $allFacts = [System.Collections.ArrayList]::new() # Load the extraction prompt template $templatePath = Join-Path (Join-Path $script:ModuleRoot 'Templates') 'extraction-prompt.txt' if (-not $script:ModuleRoot) { # Fallback if module root not set $templatePath = Join-Path (Join-Path $PSScriptRoot '..') (Join-Path 'Templates' 'extraction-prompt.txt') } if (Test-Path $templatePath) { $promptTemplate = Get-Content -Path $templatePath -Raw -Encoding UTF8 } else { Write-Warning "Prompt template not found at $templatePath. Using built-in template." $promptTemplate = @' You are an IT documentation analyst. Given the following IT documentation text, extract every verifiable claim about the IT environment. For each claim, provide: - category: server, network, service, user_group, dns, certificate, software, policy, backup, other - subject: the primary object (server name, IP, user, etc.) - claim_type: server_exists, server_ip, server_role, server_os, service_running, dns_record, user_exists, group_members, network_subnet, certificate_binding, software_version, gpo_exists, dhcp_scope, file_share, domain_info, backup_config, other - expected_value: what the document claims - verification_method: ad_computer, ad_user, ad_group, dns_resolve, dns_record, cim_os, cim_service, cim_disk, network_test, file_share, certificate, gpo, dhcp_scope, ad_domain, registry, unverifiable - confidence: 0.0-1.0 how confident you are in your extraction Return ONLY valid JSON in this format: { "facts": [ { "source_text": "the exact text this fact was extracted from", "category": "server", "claims": [ { "claim_type": "server_ip", "subject": "SQL01", "expected_value": "10.1.5.20", "verification_method": "dns_resolve" } ], "confidence": 0.95 } ] } Extract ALL verifiable facts. Be thorough. Do not skip anything that could be checked against a live environment. DOCUMENT TEXT: {document_text} '@ } # Load existing facts database if it exists $existingDb = $null if (Test-Path $FactsPath) { try { $existingDb = Get-Content -Path $FactsPath -Raw -Encoding UTF8 | ConvertFrom-Json Write-Verbose "Loaded existing facts database with $($existingDb.facts.Count) facts." } catch { Write-Warning "Could not parse existing facts file at $FactsPath. Starting fresh." $existingDb = $null } } } process { if ([string]::IsNullOrWhiteSpace($DocumentText)) { Write-Warning "Empty document text provided for '$SourceDocument'. Skipping." return } Write-Verbose "Extracting facts from '$SourceDocument' ($($DocumentText.Length) characters)..." # Build the prompt $userPrompt = $promptTemplate -replace '\{document_text\}', $DocumentText # Call the AI $aiParams = @{ Provider = $Provider UserPrompt = $userPrompt SystemPrompt = 'You are an IT documentation analyst. Extract verifiable infrastructure facts and return structured JSON only.' Temperature = 0.1 MaxTokens = 4096 } if ($ApiKey) { $aiParams['ApiKey'] = $ApiKey } if ($Model) { $aiParams['Model'] = $Model } if ($Endpoint) { $aiParams['Endpoint'] = $Endpoint } try { $aiResponse = Invoke-AICompletion @aiParams } catch { Write-Error "AI extraction failed for '$SourceDocument': $($_.Exception.Message)" return } # Parse the JSON response $parsedFacts = $null try { # Try to extract JSON from the response (AI might wrap it in markdown code blocks) $jsonText = $aiResponse if ($jsonText -match '```json\s*([\s\S]*?)\s*```') { $jsonText = $Matches[1] } elseif ($jsonText -match '```\s*([\s\S]*?)\s*```') { $jsonText = $Matches[1] } # Trim any leading/trailing non-JSON text $jsonText = $jsonText.Trim() if ($jsonText -notmatch '^\s*\{') { $startIdx = $jsonText.IndexOf('{') if ($startIdx -ge 0) { $jsonText = $jsonText.Substring($startIdx) } } $parsedFacts = $jsonText | ConvertFrom-Json } catch { Write-Error "Failed to parse AI response as JSON for '$SourceDocument': $($_.Exception.Message)" Write-Verbose "Raw AI response: $($aiResponse.Substring(0, [Math]::Min(500, $aiResponse.Length)))" return } if (-not $parsedFacts.facts) { Write-Warning "AI response did not contain a 'facts' array for '$SourceDocument'." return } # Process each extracted fact $factCounter = 0 foreach ($rawFact in $parsedFacts.facts) { $factCounter++ $factId = "fact-$(Get-Date -Format 'yyyyMMddHHmmss')-$factCounter" $fact = [PSCustomObject]@{ id = $factId source_document = $SourceDocument source_text = $rawFact.source_text category = $rawFact.category claims = [System.Collections.ArrayList]::new() confidence = if ($rawFact.confidence) { $rawFact.confidence } else { 0.8 } last_verified = $null overall_status = 'pending' } foreach ($rawClaim in $rawFact.claims) { $claim = [PSCustomObject]@{ claim_type = $rawClaim.claim_type subject = $rawClaim.subject expected_value = $rawClaim.expected_value verification_method = $rawClaim.verification_method actual_value = $null status = 'pending' last_checked = $null } [void]$fact.claims.Add($claim) } [void]$allFacts.Add($fact) } Write-Verbose "Extracted $factCounter facts with $($allFacts | ForEach-Object { $_.claims.Count } | Measure-Object -Sum | Select-Object -ExpandProperty Sum) total claims from '$SourceDocument'." } end { # Merge with existing database $mergedFacts = [System.Collections.ArrayList]::new() if ($existingDb -and $existingDb.facts) { foreach ($existing in $existingDb.facts) { [void]$mergedFacts.Add($existing) } } # Deduplicate: skip new facts where subject+claim_type already exists foreach ($newFact in $allFacts) { $isDuplicate = $false foreach ($newClaim in $newFact.claims) { foreach ($existingFact in $mergedFacts) { foreach ($existingClaim in $existingFact.claims) { if ($existingClaim.subject -eq $newClaim.subject -and $existingClaim.claim_type -eq $newClaim.claim_type -and $existingClaim.expected_value -eq $newClaim.expected_value) { $isDuplicate = $true break } } if ($isDuplicate) { break } } if ($isDuplicate) { break } } if (-not $isDuplicate) { [void]$mergedFacts.Add($newFact) } else { Write-Verbose "Skipping duplicate fact: $($newFact.source_text)" } } # Collect source documents $sourceDocNames = @($mergedFacts | Select-Object -ExpandProperty source_document -Unique) # Build the facts database $factsDatabase = [PSCustomObject]@{ metadata = [PSCustomObject]@{ created = if ($existingDb) { $existingDb.metadata.created } else { (Get-Date).ToString('o') } last_verified = $null source_documents = $sourceDocNames total_facts = $mergedFacts.Count verified = 0 drift_detected = 0 unverifiable = 0 } facts = $mergedFacts.ToArray() } # Save to file try { $parentDir = Split-Path $FactsPath -Parent if ($parentDir -and -not (Test-Path $parentDir)) { New-Item -ItemType Directory -Path $parentDir -Force | Out-Null } $factsDatabase | ConvertTo-Json -Depth 10 | Out-File -FilePath $FactsPath -Encoding UTF8 -Force Write-Verbose "Facts database saved to $FactsPath ($($mergedFacts.Count) facts)." } catch { Write-Warning "Could not save facts database to $FactsPath : $($_.Exception.Message)" } return $mergedFacts.ToArray() } } |