Private/Get-AzLocalUpdateRunHealthEvidence.ps1

function Get-AzLocalUpdateRunHealthEvidence {
    <#
    .SYNOPSIS
        Returns same-cluster Critical healthCheckResult entries that occurred
        within a +/-2h window of a failed update run, used to enrich
        HealthCheck-category failures with the actual blocking checks.
 
    .DESCRIPTION
        When Get-AzLocalUpdateRunFailures categorises a failed run as
        ErrorCategory='HealthCheck' the operator typically wants to know
        WHICH health check fired. That information is NOT on the update run
        itself - it lives on the cluster's `updateSummaries/default` child
        resource as `properties.healthCheckResult[]`. This helper queries
        Azure Resource Graph for that array (using the same anti-mv-expand
        pattern as Get-AzLocalFleetHealthFailures) and filters down to:
 
          * the supplied ClusterResourceId
          * status == 'Failed'
          * severity == 'Critical' (Warning/Informational are excluded by
            default to keep the evidence list short and triage-focused)
          * timestamp within [RunStartTime - WindowBefore,
                              RunEndTime + WindowAfter]
 
        Each returned row is a minimal projection (Title, FailureReason,
        TargetResourceName, TargetResourceID, Timestamp, Remediation) so
        the Step.09 renderer can fold it into a collapsible <details>
        block without pulling in heavy fields.
 
    .PARAMETER ClusterResourceId
        Full ARM resource id of the cluster the failed run belongs to.
 
    .PARAMETER RunStartTime
        The run's StartedAt (UTC DateTime). Pre-window evidence is
        captured back to RunStartTime - WindowBefore.
 
    .PARAMETER RunEndTime
        The run's EndedAt (UTC DateTime). If the run is still in progress
        pass [datetime]::UtcNow.
 
    .PARAMETER WindowBefore
        TimeSpan to look back before RunStartTime. Default 2 hours -
        covers the readiness check that immediately preceded the run.
 
    .PARAMETER WindowAfter
        TimeSpan to look forward past RunEndTime. Default 1 hour -
        catches the post-failure health refresh that the orchestrator
        triggers on a failed run.
 
    .PARAMETER MinSeverity
        'Critical' (default) or 'Warning' to include Warning-severity
        entries as well. Informational is never returned.
 
    .PARAMETER SubscriptionId
        Optional subscription scope for the ARG query. Defaults to all
        subscriptions the caller can read.
 
    .OUTPUTS
        PSCustomObject[] - one row per matching healthCheckResult entry,
        sorted Timestamp descending (most recent first). Returns an
        empty array if the cluster doc cannot be found or no rows
        match the window/severity filter.
 
    .NOTES
        Author: Neil Bird, Microsoft.
        Added: v0.8.80 (HealthCheck failure enrichment)
        Module: AzLocal.UpdateManagement (private helper)
        Caller: Get-AzLocalUpdateRunFailures -EnrichWithHealthEvidence
 
        Reuses the same ARG query shape as Get-AzLocalFleetHealthFailures
        to dodge the documented mv-expand 128-row cap. Differences:
        scoped to a single cluster id, scoped to a time window, returns
        a slim projection.
    #>

    [CmdletBinding()]
    [OutputType([System.Collections.IEnumerable])]
    param(
        [Parameter(Mandatory = $true)]
        [ValidateNotNullOrEmpty()]
        [string]$ClusterResourceId,

        [Parameter(Mandatory = $true)]
        [datetime]$RunStartTime,

        [Parameter(Mandatory = $true)]
        [datetime]$RunEndTime,

        [Parameter(Mandatory = $false)]
        [timespan]$WindowBefore = ([timespan]::FromHours(2)),

        [Parameter(Mandatory = $false)]
        [timespan]$WindowAfter  = ([timespan]::FromHours(1)),

        [Parameter(Mandatory = $false)]
        [ValidateSet('Critical','Warning')]
        [string]$MinSeverity = 'Critical',

        [Parameter(Mandatory = $false)]
        [string]$SubscriptionId
    )

    $clusterId = $ClusterResourceId.ToLower()
    $windowFrom = $RunStartTime.ToUniversalTime() - $WindowBefore
    $windowTo   = $RunEndTime.ToUniversalTime()   + $WindowAfter

    # KQL: project the raw healthCheckResult array per cluster, scoped to
    # the supplied cluster id. We use `parent` id derived from segments
    # rather than a `tolower(id) startswith` so the query stays selective.
    $kql = @"
extensibilityresources
| where type =~ 'microsoft.azurestackhci/clusters/updatesummaries'
| extend segments = split(id, '/')
| extend
    SubscriptionId = tostring(segments[2]),
    ResourceGroup = tostring(segments[4]),
    ClusterName = tostring(segments[8])
| extend ClusterResourceId = tolower(strcat('/subscriptions/', SubscriptionId, '/resourceGroups/', ResourceGroup, '/providers/Microsoft.AzureStackHCI/clusters/', ClusterName))
| where ClusterResourceId == '$clusterId'
| project
    ClusterResourceId,
    HealthCheckResult = properties.healthCheckResult
"@


    Write-Verbose "Get-AzLocalUpdateRunHealthEvidence: ARG query for cluster '$clusterId' window [$windowFrom .. $windowTo] severity>=$MinSeverity"

    try {
        $rows = if ($SubscriptionId) {
            Invoke-AzResourceGraphQuery -Query $kql -SubscriptionId $SubscriptionId
        } else {
            Invoke-AzResourceGraphQuery -Query $kql
        }
    }
    catch {
        Write-Verbose "Get-AzLocalUpdateRunHealthEvidence ARG query failed: $($_.Exception.Message)"
        return @()
    }

    if (-not $rows -or $rows.Count -eq 0) { return @() }

    $allowedSeverities = if ($MinSeverity -eq 'Warning') { @('Critical','Warning') } else { @('Critical') }

    $evidence = New-Object System.Collections.ArrayList
    foreach ($cluster in @($rows)) {
        $hcr = $cluster.HealthCheckResult
        if (-not $hcr) { continue }
        foreach ($hc in @($hcr)) {
            $status = "$($hc.status)"
            if ($status -ne 'Failed') { continue }
            $sev = "$($hc.severity)"
            if ($allowedSeverities -notcontains $sev) { continue }

            $ts = $null
            if ($hc.timestamp) {
                try { $ts = ([datetime]$hc.timestamp).ToUniversalTime() } catch { $ts = $null }
            }
            if (-not $ts) { continue }
            if ($ts -lt $windowFrom -or $ts -gt $windowTo) { continue }

            [void]$evidence.Add([PSCustomObject]@{
                Timestamp          = $ts
                Severity           = $sev
                Title              = "$($hc.title)"
                FailureReason      = "$($hc.displayName)"
                Description        = "$($hc.description)"
                Remediation        = "$($hc.remediation)"
                TargetResourceName = "$($hc.targetResourceName)"
                TargetResourceType = "$($hc.targetResourceType)"
                TargetResourceID   = "$($hc.targetResourceID)"
            })
        }
    }

    if ($evidence.Count -eq 0) { return @() }

    # Most recent first - operators want to see the latest evidence at the
    # top of the collapsed <details> block.
    return @($evidence | Sort-Object -Property Timestamp -Descending)
}