Private/Test-FireRequired.ps1

# Copyright (c) 2026 Jeffrey Snover. All rights reserved.
# Licensed under the MIT License. See LICENSE file in the project root.

# Two-stage FIRE sniff: determines whether iterative extraction is worth the cost.
# Stage 1: Pre-extraction (zero API cost) — checks document characteristics.
# Stage 2: Post-extraction — evaluates single-shot output quality signals.
# Design: CL recommendation (p/41#7), 2-signal decision rule.
# Dot-sourced by AITriad.psm1 — do NOT export.

function Test-FireRequired {
    <#
    .SYNOPSIS
        Determines whether FIRE iterative extraction should be used for a document.
    .DESCRIPTION
        Two-stage evaluation:
 
        Stage 1 (pre-extraction, zero API cost):
          Checks document-level signals that predict single-shot will fail.
          If ANY signal fires → recommend FIRE immediately (skip single-shot).
 
        Stage 2 (post-extraction, evaluates single-shot output):
          Checks 5 quality signals on the AI's output. If 2+ signals fire →
          recommend re-running with FIRE.
 
        The 2-signal decision rule is fixed. Individual thresholds are configurable
        via the $Thresholds parameter.
    .PARAMETER WordCount
        Estimated word count of the document (for Stage 1).
    .PARAMETER IsChunked
        Whether the document was split into multiple chunks (for Stage 1).
    .PARAMETER SourceType
        Source format: 'pdf', 'html', 'docx', etc. (for Stage 1).
    .PARAMETER SummaryObject
        The single-shot summary result (for Stage 2). Pass $null for Stage 1 only.
    .PARAMETER Thresholds
        Hashtable of configurable thresholds. Defaults provided for all signals.
    .OUTPUTS
        PSCustomObject with: ShouldFire (bool), Stage (1 or 2), Signals (which fired),
        Reason (human-readable explanation).
    #>

    [CmdletBinding()]
    param(
        [int]$WordCount = 0,

        [switch]$IsChunked,

        [string]$SourceType = 'unknown',

        [object]$SummaryObject = $null,

        [hashtable]$Thresholds = @{}
    )

    Set-StrictMode -Version Latest

    # ── Default thresholds (configurable) ─────────────────────────────────────
    $T = @{
        # Stage 1
        word_count_min          = 8000    # Documents above this are likely complex
        complex_source_types    = @('pdf') # Source types with layout complexity

        # Stage 2
        low_confidence_rate     = 0.30    # >30% of claims have fire_confidence < 0.5
        specificity_collapse    = 0.40    # >40% of claims rated "vague"
        warrant_deficit         = 0.50    # >50% of claims have has_warrant=false
        unmapped_concept_rate   = 0.40    # >40% of key_points are unmapped
        claim_clustering        = 0.60    # >60% of claims map to same 3 nodes

        # Decision
        min_signals_required    = 2       # Stage 2: how many signals must fire
    }

    # Override with caller-provided thresholds
    foreach ($Key in $Thresholds.Keys) {
        if ($T.ContainsKey($Key)) { $T[$Key] = $Thresholds[$Key] }
    }

    $FiredSignals = [System.Collections.Generic.List[string]]::new()

    # ── Stage 1: Pre-extraction (zero API cost) ───────────────────────────────
    $WordCountMin = $T['word_count_min']
    if ($WordCount -gt $WordCountMin) {
        $FiredSignals.Add("word_count=$WordCount (>$WordCountMin)")
    }

    if ($IsChunked) {
        $FiredSignals.Add('document_chunked')
    }

    if ($SourceType -in $T['complex_source_types']) {
        $FiredSignals.Add("complex_source_type=$SourceType")
    }

    # Stage 1 decision: ANY signal → go directly to FIRE
    if ($FiredSignals.Count -gt 0) {
        Write-Verbose "FIRE sniff Stage 1: $($FiredSignals.Count) signal(s) fired — $($FiredSignals -join ', ')"
        return [PSCustomObject][ordered]@{
            ShouldFire = $true
            Stage      = 1
            Signals    = @($FiredSignals)
            Reason     = "Stage 1 pre-extraction: $($FiredSignals -join '; ')"
        }
    }

    # ── Stage 2: Post-extraction (requires SummaryObject) ─────────────────────
    if ($null -eq $SummaryObject) {
        # No summary to evaluate — Stage 1 didn't fire, can't do Stage 2
        return [PSCustomObject][ordered]@{
            ShouldFire = $false
            Stage      = 1
            Signals    = @()
            Reason     = 'Stage 1: no pre-extraction signals fired; no summary to evaluate for Stage 2'
        }
    }

    $Stage2Signals = [System.Collections.Generic.List[string]]::new()

    # Collect all claims across POV camps
    $AllKeyPoints = [System.Collections.Generic.List[object]]::new()
    $Camps = @('accelerationist', 'safetyist', 'skeptic')
    foreach ($Camp in $Camps) {
        $CampData = $SummaryObject.pov_summaries.$Camp
        if ($CampData -and $CampData.key_points) {
            foreach ($KP in @($CampData.key_points)) { $AllKeyPoints.Add($KP) }
        }
    }

    if ($SummaryObject.factual_claims) { $AllClaims = @($SummaryObject.factual_claims) } else { $AllClaims = @() }
    $TotalKP = $AllKeyPoints.Count
    $TotalClaims = $AllClaims.Count

    # Signal 1: Low-confidence rate
    if ($TotalClaims -gt 0) {
        $LowConf = @($AllClaims | Where-Object {
            if ($_.PSObject.Properties['fire_confidence']) { $FC = $_.fire_confidence } else { $FC = 0.5 }
            $FC -lt 0.5
        }).Count
        $LowConfRate = $LowConf / $TotalClaims
        if ($LowConfRate -gt $T['low_confidence_rate']) {
            $Stage2Signals.Add("low_confidence_rate=$([Math]::Round($LowConfRate * 100))% ($LowConf/$TotalClaims claims)")
        }
    }

    # Signal 2: Specificity collapse
    if ($TotalClaims -gt 0) {
        $VagueClaims = @($AllClaims | Where-Object {
            $_.PSObject.Properties['evidence_criteria'] -and
            $_.evidence_criteria.PSObject.Properties['specificity'] -and
            $_.evidence_criteria.specificity -eq 'vague'
        }).Count
        $VagueRate = $VagueClaims / $TotalClaims
        if ($VagueRate -gt $T['specificity_collapse']) {
            $Stage2Signals.Add("specificity_collapse=$([Math]::Round($VagueRate * 100))% ($VagueClaims/$TotalClaims claims)")
        }
    }

    # Signal 3: Warrant deficit
    if ($TotalClaims -gt 0) {
        $NoWarrant = @($AllClaims | Where-Object {
            $_.PSObject.Properties['evidence_criteria'] -and
            $_.evidence_criteria.PSObject.Properties['has_warrant'] -and
            -not $_.evidence_criteria.has_warrant
        }).Count
        $NoWarrantRate = $NoWarrant / $TotalClaims
        if ($NoWarrantRate -gt $T['warrant_deficit']) {
            $Stage2Signals.Add("warrant_deficit=$([Math]::Round($NoWarrantRate * 100))% ($NoWarrant/$TotalClaims claims)")
        }
    }

    # Signal 4: Unmapped concept rate
    if ($TotalKP -gt 0) {
        $Unmapped = @($AllKeyPoints | Where-Object { -not $_.taxonomy_node_id }).Count
        $UnmappedRate = $Unmapped / $TotalKP
        if ($UnmappedRate -gt $T['unmapped_concept_rate']) {
            $Stage2Signals.Add("unmapped_concept_rate=$([Math]::Round($UnmappedRate * 100))% ($Unmapped/$TotalKP key_points)")
        }
    }

    # Signal 5: Claim clustering (>60% map to same 3 nodes)
    if ($TotalClaims -gt 3) {
        $NodeCounts = @{}
        foreach ($Claim in $AllClaims) {
            if ($Claim.PSObject.Properties['linked_taxonomy_nodes']) {
                foreach ($NodeId in @($Claim.linked_taxonomy_nodes)) {
                    if (-not $NodeCounts.ContainsKey($NodeId)) { $NodeCounts[$NodeId] = 0 }
                    $NodeCounts[$NodeId]++
                }
            }
        }
        $Top3Count = ($NodeCounts.Values | Sort-Object -Descending | Select-Object -First 3 | Measure-Object -Sum).Sum
        $TotalMappings = ($NodeCounts.Values | Measure-Object -Sum).Sum
        if ($TotalMappings -gt 0) {
            $ClusterRate = $Top3Count / $TotalMappings
            if ($ClusterRate -gt $T['claim_clustering']) {
                $Stage2Signals.Add("claim_clustering=$([Math]::Round($ClusterRate * 100))% (top 3 nodes cover $Top3Count/$TotalMappings mappings)")
            }
        }
    }

    # Stage 2 decision: 2+ signals → recommend FIRE
    $ShouldFire = $Stage2Signals.Count -ge $T['min_signals_required']

    if ($Stage2Signals.Count -gt 0) {
        Write-Verbose "FIRE sniff Stage 2: $($Stage2Signals.Count) signal(s) fired — $($Stage2Signals -join ', ')"
    }
    else {
        Write-Verbose 'FIRE sniff Stage 2: no signals fired — single-shot quality is adequate'
    }

    if ($ShouldFire) {
        $ReasonMsg = "Stage 2: $($Stage2Signals.Count) signals fired (>=$($T['min_signals_required']) required) — $($Stage2Signals -join '; ')"
    }
    else {
        $ReasonMsg = "Stage 2: $($Stage2Signals.Count) signal(s) fired (<$($T['min_signals_required']) required) — single-shot adequate"
    }
    return [PSCustomObject][ordered]@{
        ShouldFire = $ShouldFire
        Stage      = 2
        Signals    = @($Stage2Signals)
        Reason     = $ReasonMsg
    }
}