Private/Test-FireRequired.ps1
|
# Copyright (c) 2026 Jeffrey Snover. All rights reserved. # Licensed under the MIT License. See LICENSE file in the project root. # Two-stage FIRE sniff: determines whether iterative extraction is worth the cost. # Stage 1: Pre-extraction (zero API cost) — checks document characteristics. # Stage 2: Post-extraction — evaluates single-shot output quality signals. # Design: CL recommendation (p/41#7), 2-signal decision rule. # Dot-sourced by AITriad.psm1 — do NOT export. function Test-FireRequired { <# .SYNOPSIS Determines whether FIRE iterative extraction should be used for a document. .DESCRIPTION Two-stage evaluation: Stage 1 (pre-extraction, zero API cost): Checks document-level signals that predict single-shot will fail. If ANY signal fires → recommend FIRE immediately (skip single-shot). Stage 2 (post-extraction, evaluates single-shot output): Checks 5 quality signals on the AI's output. If 2+ signals fire → recommend re-running with FIRE. The 2-signal decision rule is fixed. Individual thresholds are configurable via the $Thresholds parameter. .PARAMETER WordCount Estimated word count of the document (for Stage 1). .PARAMETER IsChunked Whether the document was split into multiple chunks (for Stage 1). .PARAMETER SourceType Source format: 'pdf', 'html', 'docx', etc. (for Stage 1). .PARAMETER SummaryObject The single-shot summary result (for Stage 2). Pass $null for Stage 1 only. .PARAMETER Thresholds Hashtable of configurable thresholds. Defaults provided for all signals. .OUTPUTS PSCustomObject with: ShouldFire (bool), Stage (1 or 2), Signals (which fired), Reason (human-readable explanation). #> [CmdletBinding()] param( [int]$WordCount = 0, [switch]$IsChunked, [string]$SourceType = 'unknown', [object]$SummaryObject = $null, [hashtable]$Thresholds = @{} ) Set-StrictMode -Version Latest # ── Default thresholds (configurable) ───────────────────────────────────── $T = @{ # Stage 1 word_count_min = 8000 # Documents above this are likely complex complex_source_types = @('pdf') # Source types with layout complexity # Stage 2 low_confidence_rate = 0.30 # >30% of claims have fire_confidence < 0.5 specificity_collapse = 0.40 # >40% of claims rated "vague" warrant_deficit = 0.50 # >50% of claims have has_warrant=false unmapped_concept_rate = 0.40 # >40% of key_points are unmapped claim_clustering = 0.60 # >60% of claims map to same 3 nodes # Decision min_signals_required = 2 # Stage 2: how many signals must fire } # Override with caller-provided thresholds foreach ($Key in $Thresholds.Keys) { if ($T.ContainsKey($Key)) { $T[$Key] = $Thresholds[$Key] } } $FiredSignals = [System.Collections.Generic.List[string]]::new() # ── Stage 1: Pre-extraction (zero API cost) ─────────────────────────────── $WordCountMin = $T['word_count_min'] if ($WordCount -gt $WordCountMin) { $FiredSignals.Add("word_count=$WordCount (>$WordCountMin)") } if ($IsChunked) { $FiredSignals.Add('document_chunked') } if ($SourceType -in $T['complex_source_types']) { $FiredSignals.Add("complex_source_type=$SourceType") } # Stage 1 decision: ANY signal → go directly to FIRE if ($FiredSignals.Count -gt 0) { Write-Verbose "FIRE sniff Stage 1: $($FiredSignals.Count) signal(s) fired — $($FiredSignals -join ', ')" return [PSCustomObject][ordered]@{ ShouldFire = $true Stage = 1 Signals = @($FiredSignals) Reason = "Stage 1 pre-extraction: $($FiredSignals -join '; ')" } } # ── Stage 2: Post-extraction (requires SummaryObject) ───────────────────── if ($null -eq $SummaryObject) { # No summary to evaluate — Stage 1 didn't fire, can't do Stage 2 return [PSCustomObject][ordered]@{ ShouldFire = $false Stage = 1 Signals = @() Reason = 'Stage 1: no pre-extraction signals fired; no summary to evaluate for Stage 2' } } $Stage2Signals = [System.Collections.Generic.List[string]]::new() # Collect all claims across POV camps $AllKeyPoints = [System.Collections.Generic.List[object]]::new() $Camps = @('accelerationist', 'safetyist', 'skeptic') foreach ($Camp in $Camps) { $CampData = $SummaryObject.pov_summaries.$Camp if ($CampData -and $CampData.key_points) { foreach ($KP in @($CampData.key_points)) { $AllKeyPoints.Add($KP) } } } if ($SummaryObject.factual_claims) { $AllClaims = @($SummaryObject.factual_claims) } else { $AllClaims = @() } $TotalKP = $AllKeyPoints.Count $TotalClaims = $AllClaims.Count # Signal 1: Low-confidence rate if ($TotalClaims -gt 0) { $LowConf = @($AllClaims | Where-Object { if ($_.PSObject.Properties['fire_confidence']) { $FC = $_.fire_confidence } else { $FC = 0.5 } $FC -lt 0.5 }).Count $LowConfRate = $LowConf / $TotalClaims if ($LowConfRate -gt $T['low_confidence_rate']) { $Stage2Signals.Add("low_confidence_rate=$([Math]::Round($LowConfRate * 100))% ($LowConf/$TotalClaims claims)") } } # Signal 2: Specificity collapse if ($TotalClaims -gt 0) { $VagueClaims = @($AllClaims | Where-Object { $_.PSObject.Properties['evidence_criteria'] -and $_.evidence_criteria.PSObject.Properties['specificity'] -and $_.evidence_criteria.specificity -eq 'vague' }).Count $VagueRate = $VagueClaims / $TotalClaims if ($VagueRate -gt $T['specificity_collapse']) { $Stage2Signals.Add("specificity_collapse=$([Math]::Round($VagueRate * 100))% ($VagueClaims/$TotalClaims claims)") } } # Signal 3: Warrant deficit if ($TotalClaims -gt 0) { $NoWarrant = @($AllClaims | Where-Object { $_.PSObject.Properties['evidence_criteria'] -and $_.evidence_criteria.PSObject.Properties['has_warrant'] -and -not $_.evidence_criteria.has_warrant }).Count $NoWarrantRate = $NoWarrant / $TotalClaims if ($NoWarrantRate -gt $T['warrant_deficit']) { $Stage2Signals.Add("warrant_deficit=$([Math]::Round($NoWarrantRate * 100))% ($NoWarrant/$TotalClaims claims)") } } # Signal 4: Unmapped concept rate if ($TotalKP -gt 0) { $Unmapped = @($AllKeyPoints | Where-Object { -not $_.taxonomy_node_id }).Count $UnmappedRate = $Unmapped / $TotalKP if ($UnmappedRate -gt $T['unmapped_concept_rate']) { $Stage2Signals.Add("unmapped_concept_rate=$([Math]::Round($UnmappedRate * 100))% ($Unmapped/$TotalKP key_points)") } } # Signal 5: Claim clustering (>60% map to same 3 nodes) if ($TotalClaims -gt 3) { $NodeCounts = @{} foreach ($Claim in $AllClaims) { if ($Claim.PSObject.Properties['linked_taxonomy_nodes']) { foreach ($NodeId in @($Claim.linked_taxonomy_nodes)) { if (-not $NodeCounts.ContainsKey($NodeId)) { $NodeCounts[$NodeId] = 0 } $NodeCounts[$NodeId]++ } } } $Top3Count = ($NodeCounts.Values | Sort-Object -Descending | Select-Object -First 3 | Measure-Object -Sum).Sum $TotalMappings = ($NodeCounts.Values | Measure-Object -Sum).Sum if ($TotalMappings -gt 0) { $ClusterRate = $Top3Count / $TotalMappings if ($ClusterRate -gt $T['claim_clustering']) { $Stage2Signals.Add("claim_clustering=$([Math]::Round($ClusterRate * 100))% (top 3 nodes cover $Top3Count/$TotalMappings mappings)") } } } # Stage 2 decision: 2+ signals → recommend FIRE $ShouldFire = $Stage2Signals.Count -ge $T['min_signals_required'] if ($Stage2Signals.Count -gt 0) { Write-Verbose "FIRE sniff Stage 2: $($Stage2Signals.Count) signal(s) fired — $($Stage2Signals -join ', ')" } else { Write-Verbose 'FIRE sniff Stage 2: no signals fired — single-shot quality is adequate' } if ($ShouldFire) { $ReasonMsg = "Stage 2: $($Stage2Signals.Count) signals fired (>=$($T['min_signals_required']) required) — $($Stage2Signals -join '; ')" } else { $ReasonMsg = "Stage 2: $($Stage2Signals.Count) signal(s) fired (<$($T['min_signals_required']) required) — single-shot adequate" } return [PSCustomObject][ordered]@{ ShouldFire = $ShouldFire Stage = 2 Signals = @($Stage2Signals) Reason = $ReasonMsg } } |