modules/Invoke-AksKarpenterCost.ps1

#Requires -Version 7.4
<#
.SYNOPSIS
    AKS Karpenter cost wrapper. Cluster-level node cost rollup + idle node
    detection (Reader-only) plus opt-in Karpenter Provisioner inspection
    (Azure Kubernetes Service Cluster User Role on the cluster).
 
.DESCRIPTION
    Two finding tiers are emitted:
 
      * Reader-only (always enabled):
          - aks.node-cost-rollup Info KubeNodeInventory node-hours x 7d
          - aks.idle-node Medium avg node CPU < 10% over LookbackDays
 
      * Elevated (Azure Kubernetes Service Cluster User Role; gated by
        -EnableElevatedRbac, OFF by default):
          - karpenter.consolidation-disabled Medium spec.consolidation.enabled=false
          - karpenter.over-provisioned Medium avg node util <50% over LookbackDays
          - karpenter.no-node-limit High spec.limits is missing
 
    Karpenter findings require a kubeconfig to query the
    `provisioners.karpenter.sh` CRD via kubectl. When -EnableElevatedRbac is
    NOT set the wrapper SKIPS the kubectl branch entirely; no kubeconfig
    fetch and no kubectl process is launched.
 
    Reuses shared modules:
      * AksDiscovery (cluster discovery via Az.ResourceGraph)
      * KqlQuery (Container Insights queries)
      * KubeAuth (KubeAuthMode handling, identical to Invoke-Kubescape et al.)
      * Retry, Sanitize, Installer (Invoke-WithTimeout)
      * RbacTier (the per-wrapper opt-in mechanism shipped in this PR)
#>

[CmdletBinding(SupportsShouldProcess=$true, ConfirmImpact='Medium')]
param (
    [Parameter(Mandatory)]
    [ValidateNotNullOrEmpty()]
    [string] $SubscriptionId,

    [string] $ResourceGroup,
    [string] $ClusterName,
    [string] $LogAnalyticsWorkspaceId,

    [ValidateRange(1, 30)]
    [int] $LookbackDays = 7,

    [string] $KubeconfigPath,
    [string] $KubeContext,
    [string] $Namespace = '',

    [ValidateSet('Default', 'Kubelogin', 'WorkloadIdentity')]
    [string] $KubeAuthMode = 'Default',
    [string] $KubeloginServerId,
    [string] $KubeloginClientId,
    [string] $KubeloginTenantId,
    [string] $WorkloadIdentityClientId,
    [string] $WorkloadIdentityTenantId,
    [string] $WorkloadIdentityServiceAccountToken,

    [switch] $EnableElevatedRbac,

    [string] $OutputPath
)

Set-StrictMode -Version Latest
$ErrorActionPreference = 'Stop'

# ---------------------------------------------------------------------------
# Shared module dot-source with no-op shims for unit-test isolation. Mirrors
# the pattern used by Invoke-AksRightsizing.
# ---------------------------------------------------------------------------
$retryPath = Join-Path $PSScriptRoot 'shared' 'Retry.ps1'
if (Test-Path $retryPath) { . $retryPath }
if (-not (Get-Command Invoke-WithRetry -ErrorAction SilentlyContinue)) {
    function Invoke-WithRetry { param([scriptblock]$ScriptBlock) & $ScriptBlock }
}

$sanitizePath = Join-Path $PSScriptRoot 'shared' 'Sanitize.ps1'
if (Test-Path $sanitizePath) { . $sanitizePath }
if (-not (Get-Command Remove-Credentials -ErrorAction SilentlyContinue)) {
    function Remove-Credentials { param([string]$Text) return $Text }
}

$errorsPath = Join-Path $PSScriptRoot 'shared' 'Errors.ps1'
if (Test-Path $errorsPath) { . $errorsPath }

$envelopePath = Join-Path $PSScriptRoot 'shared' 'New-WrapperEnvelope.ps1'
if (Test-Path $envelopePath) { . $envelopePath }
if (-not (Get-Command New-WrapperEnvelope -ErrorAction SilentlyContinue)) { function New-WrapperEnvelope { param([string]$Source,[string]$Status='Failed',[string]$Message='',[object[]]$FindingErrors=@()) return [PSCustomObject]@{ Source=$Source; SchemaVersion='1.0'; Status=$Status; Message=$Message; Findings=@(); Errors=@($FindingErrors) } } }
if (-not (Get-Command New-FindingError -ErrorAction SilentlyContinue)) {
    function New-FindingError { param([string]$Source,[string]$Category,[string]$Reason,[string]$Remediation,[string]$Details) return [pscustomobject]@{ Source=$Source; Category=$Category; Reason=$Reason; Remediation=$Remediation; Details=$Details } }
}
if (-not (Get-Command Format-FindingErrorMessage -ErrorAction SilentlyContinue)) {
    function Format-FindingErrorMessage {
        param([Parameter(Mandatory)]$FindingError)
        $line = "[{0}] {1}: {2}" -f $FindingError.Source, $FindingError.Category, $FindingError.Reason
        if ($FindingError.Remediation) { $line += " Action: $($FindingError.Remediation)" }
        return $line
    }
}

$installerPath = Join-Path $PSScriptRoot 'shared' 'Installer.ps1'
if (-not (Get-Command Invoke-WithTimeout -ErrorAction SilentlyContinue) -and (Test-Path $installerPath)) { . $installerPath }
if (-not (Get-Command Invoke-WithTimeout -ErrorAction SilentlyContinue)) {
    function Invoke-WithTimeout {
        param (
            [Parameter(Mandatory)][string]$Command,
            [Parameter(Mandatory)][string[]]$Arguments,
            [int]$TimeoutSec = 300
        )
        $output = & $Command @Arguments 2>&1 | Out-String
        return [PSCustomObject]@{ ExitCode = $LASTEXITCODE; Output = $output.Trim() }
    }
}

$aksDiscoveryPath = Join-Path $PSScriptRoot 'shared' 'AksDiscovery.ps1'
if (-not (Get-Command Get-AksClustersInScope -ErrorAction SilentlyContinue) -and (Test-Path $aksDiscoveryPath)) { . $aksDiscoveryPath }

$kqlPath = Join-Path $PSScriptRoot 'shared' 'KqlQuery.ps1'
if (-not (Get-Command Invoke-LogAnalyticsQuery -ErrorAction SilentlyContinue) -and (Test-Path $kqlPath)) { . $kqlPath }

$kubeAuthPath = Join-Path $PSScriptRoot 'shared' 'KubeAuth.ps1'
if (-not (Get-Command Initialize-KubeAuth -ErrorAction SilentlyContinue) -and (Test-Path $kubeAuthPath)) { . $kubeAuthPath }

$rbacTierPath = Join-Path $PSScriptRoot 'shared' 'RbacTier.ps1'
if (-not (Get-Command Get-RbacTier -ErrorAction SilentlyContinue) -and (Test-Path $rbacTierPath)) { . $rbacTierPath }

# ---------------------------------------------------------------------------
# Result envelope (v1)
# ---------------------------------------------------------------------------
$result = [ordered]@{
    SchemaVersion = '1.0'
    Source        = 'aks-karpenter-cost'
    Status        = 'Success'
    Message       = ''
    Findings      = @()
    Subscription  = $SubscriptionId
    Timestamp     = (Get-Date).ToUniversalTime().ToString('o')
    RbacTier      = 'Reader'
}

# ---------------------------------------------------------------------------
# Opt-in elevated RBAC tier
# ---------------------------------------------------------------------------
$allowElevatedOps = $EnableElevatedRbac.IsPresent
if ($EnableElevatedRbac.IsPresent) {
    $allowElevatedOps = -not $WhatIfPreference
    if ($allowElevatedOps) {
        if (Get-Command Set-RbacTier -ErrorAction SilentlyContinue) {
            Set-RbacTier -Tier 'ClusterUser'
        }
        $result.RbacTier = 'ClusterUser'
    }
}

# ---------------------------------------------------------------------------
# Module preflight
# ---------------------------------------------------------------------------
if (-not (Get-Module -ListAvailable -Name Az.Accounts)) {
    if (Get-Command Reset-RbacTier -ErrorAction SilentlyContinue) { Reset-RbacTier }
    $result.Status  = 'Skipped'
    $result.Message = 'Az.Accounts module not installed. Run: Install-Module Az.Accounts -Scope CurrentUser'
    return [PSCustomObject]$result
}

if (-not (Get-Module -ListAvailable -Name Az.OperationalInsights)) {
    if (Get-Command Reset-RbacTier -ErrorAction SilentlyContinue) { Reset-RbacTier }
    $result.Status  = 'Skipped'
    $result.Message = 'Az.OperationalInsights module not installed. Run: Install-Module Az.OperationalInsights -Scope CurrentUser'
    return [PSCustomObject]$result
}

try {
    Import-Module Az.Accounts -ErrorAction SilentlyContinue -WarningAction SilentlyContinue
    $null = Get-AzContext -ErrorAction Stop
} catch {
    Write-Verbose 'Az context probe failed; downstream calls will surface concrete auth errors.'
}

# ---------------------------------------------------------------------------
# Helpers
# ---------------------------------------------------------------------------
function Resolve-WorkspaceIdFromCluster {
    [CmdletBinding()]
    param ([Parameter(Mandatory)][pscustomobject] $Cluster)

    if ($Cluster.PSObject.Properties['workspaceResourceId'] -and $Cluster.workspaceResourceId) {
        return [string]$Cluster.workspaceResourceId
    }
    $clusterId = [string]$Cluster.id
    if ([string]::IsNullOrWhiteSpace($clusterId)) { return '' }

    $diagUri = "https://management.azure.com$clusterId/providers/Microsoft.Insights/diagnosticSettings?api-version=2021-05-01-preview"
    try {
        $resp = Invoke-WithRetry -MaxAttempts 3 -InitialDelaySeconds 2 -MaxDelaySeconds 20 -ScriptBlock {
            Invoke-AzRestMethod -Method GET -Uri $diagUri -ErrorAction Stop
        }
        if (-not $resp -or $resp.StatusCode -ge 400 -or -not $resp.Content) { return '' }
        $payload = $resp.Content | ConvertFrom-Json -Depth 20
        $entries = if ($payload.PSObject.Properties['value']) { @($payload.value) } else { @() }
        foreach ($entry in $entries) {
            if ($entry.PSObject.Properties['properties'] -and $entry.properties.workspaceId) {
                return [string]$entry.properties.workspaceId
            }
        }
    } catch {
        Write-Verbose ("Diagnostic settings lookup failed for {0}: {1}" -f $Cluster.name, (Remove-Credentials -Text ([string]$_.Exception.Message)))
    }
    return ''
}

function Get-ClusterInsightsUrl {
    param([string]$ClusterId)
    if (-not $ClusterId) { return '' }
    return "https://portal.azure.com/#@/resource$ClusterId/insights"
}

function Get-AksClusterPortalDeepLink {
    param(
        [string]$ClusterId,
        [string]$ProvisionerName = ''
    )
    if (-not $ClusterId) { return '' }
    $encodedCluster = [System.Uri]::EscapeDataString($ClusterId)
    if (-not [string]::IsNullOrWhiteSpace($ProvisionerName)) {
        $encodedProvisioner = [System.Uri]::EscapeDataString($ProvisionerName)
        return "https://portal.azure.com/#view/Microsoft_Azure_ContainerService/ManagedClusterMenuBlade/~/karpenter/resourceId/$encodedCluster/provisioner/$encodedProvisioner"
    }
    return "https://portal.azure.com/#view/Microsoft_Azure_ContainerService/ManagedClusterMenuBlade/~/nodes/resourceId/$encodedCluster"
}

function Get-LogsQueryEvidenceUrl {
    param(
        [string]$WorkspaceId,
        [string]$Query
    )
    if ([string]::IsNullOrWhiteSpace($WorkspaceId) -or [string]::IsNullOrWhiteSpace($Query)) { return '' }
    $encodedWs = [System.Uri]::EscapeDataString($WorkspaceId)
    $encodedQ = [System.Uri]::EscapeDataString($Query)
    return "https://portal.azure.com/#blade/Microsoft_Azure_Monitoring_Logs/LogsBlade/resourceId/$encodedWs/source/LogsBlade.AnalyticsShareLinkToQuery/q/$encodedQ"
}

function Get-KarpenterManifestEvidenceUrl {
    param(
        [string]$ProvisionerName,
        [string]$ApiVersion = ''
    )
    if ([string]::IsNullOrWhiteSpace($ProvisionerName)) { return '' }
    $safeName = [System.Uri]::EscapeDataString($ProvisionerName)
    $resolvedApiVersion = if ([string]::IsNullOrWhiteSpace($ApiVersion)) { 'v1beta1' } else { $ApiVersion }
    return "https://kubernetes.default.svc/apis/karpenter.sh/$resolvedApiVersion/provisioners/$safeName"
}

function Resolve-KarpenterPillar {
    param([string]$RuleId)
    if ($RuleId -eq 'karpenter.consolidation-disabled') { return 'Cost Optimization; Reliability' }
    return 'Cost Optimization'
}

function Resolve-KarpenterImpact {
    param(
        [string]$RuleId,
        [Nullable[double]]$NodeHours = $null,
        [Nullable[double]]$ObservedPercent = $null
    )
    if ($RuleId -eq 'karpenter.no-node-limit') { return 'High' }
    if ($RuleId -eq 'karpenter.consolidation-disabled') { return 'Medium' }
    if ($NodeHours -ne $null) {
        if ($NodeHours -ge 500.0) { return 'High' }
        if ($NodeHours -ge 150.0) { return 'Medium' }
        return 'Low'
    }
    if ($ObservedPercent -ne $null) {
        if ($ObservedPercent -le 10.0) { return 'High' }
        if ($ObservedPercent -le 35.0) { return 'Medium' }
        return 'Low'
    }
    return 'Low'
}

function Resolve-KarpenterEffort {
    param([string]$RuleId)
    if ($RuleId -like 'karpenter.*') { return 'Medium' }
    return 'Low'
}

function Get-KarpenterBaselineTags {
    param(
        [string]$RuleId,
        [string]$RbacTier
    )
    $ruleTag = switch ($RuleId) {
        'aks.idle-node' { 'Karpenter-IdleNodes' }
        'karpenter.consolidation-disabled' { 'Karpenter-Consolidation' }
        'karpenter.no-node-limit' { 'Karpenter-ProvisionerLimits' }
        'karpenter.over-provisioned' { 'Karpenter-IdleNodes' }
        default { 'Karpenter-NodeHours' }
    }
    $rbacTag = if ($RbacTier -eq 'Reader') { 'RBAC-Reader' } else { 'RBAC-ClusterAdmin' }
    return @($ruleTag, $rbacTag)
}

function Get-KarpenterRemediationSnippets {
    param(
        [string]$RuleId,
        [string]$ProvisionerName
    )
    $target = if ([string]::IsNullOrWhiteSpace($ProvisionerName)) { '<provisioner>' } else { $ProvisionerName }
    switch ($RuleId) {
        'karpenter.no-node-limit' {
            return @(@{
                    language = 'yaml'
                    before   = "apiVersion: karpenter.sh/v1beta1`nkind: NodePool`nmetadata:`n name: $target`nspec:`n limits: null"
                    after    = "apiVersion: karpenter.sh/v1beta1`nkind: NodePool`nmetadata:`n name: $target`nspec:`n limits:`n cpu: '200'`n memory: 400Gi"
                })
        }
        'karpenter.consolidation-disabled' {
            return @(@{
                    language = 'yaml'
                    before   = "apiVersion: karpenter.sh/v1beta1`nkind: NodePool`nmetadata:`n name: $target`nspec:`n disruption:`n consolidationPolicy: WhenEmpty"
                    after    = "apiVersion: karpenter.sh/v1beta1`nkind: NodePool`nmetadata:`n name: $target`nspec:`n disruption:`n consolidationPolicy: WhenUnderutilized"
                })
        }
        default { return @() }
    }
}

function Get-KubectlClientVersion {
    try {
        $proc = Invoke-WithTimeout -Command 'kubectl' -Arguments @('version', '--client', '--output=yaml') -TimeoutSec 300
        if ($proc.ExitCode -ne 0) { return 'unknown' }
        $text = [string]$proc.Output
        $match = [regex]::Match($text, '(?im)^\s*gitVersion:\s*v?([0-9]+\.[0-9]+\.[0-9]+[^\s]*)')
        if ($match.Success) { return "v$($match.Groups[1].Value)" }
        return 'unknown'
    } catch {
        return 'unknown'
    }
}

function Get-KarpenterDocsUrl {
    param([string]$RuleId)
    switch ($RuleId) {
        'karpenter.consolidation-disabled' { 'https://karpenter.sh/docs/concepts/disruption/#consolidation' }
        'karpenter.over-provisioned'        { 'https://karpenter.sh/docs/concepts/nodepools/' }
        'karpenter.no-node-limit'           { 'https://karpenter.sh/docs/concepts/nodepools/#speclimits' }
        default                             { 'https://karpenter.sh/docs/' }
    }
}

function New-ProvisionerEntityId {
    param (
        [Parameter(Mandatory)][string] $ClusterId,
        [Parameter(Mandatory)][string] $ProvisionerName
    )
    $base = $ClusterId.Trim().TrimEnd('/')
    $safeName = ($ProvisionerName.Trim() -replace '[^A-Za-z0-9._-]', '-').ToLowerInvariant()
    return "$base/karpenter/provisioners/$safeName"
}

function Add-Finding {
    param (
        [Parameter(Mandatory)][pscustomobject] $Cluster,
        [Parameter(Mandatory)][string] $RuleId,
        [Parameter(Mandatory)][string] $Severity,
        [Parameter(Mandatory)][string] $Title,
        [Parameter(Mandatory)][string] $Detail,
        [Parameter(Mandatory)][string] $Remediation,
        [Parameter(Mandatory)][bool]   $Compliant,
        [Parameter(Mandatory)][ValidateSet('AzureResource', 'KarpenterProvisioner')]
        [string] $EntityType,
        [string] $ProvisionerName = '',
        [string] $LearnMoreUrl    = '',
        [string] $WorkspaceId     = '',
        [string] $EvidenceQuery   = '',
        [string] $KarpenterApiVersion = '',
        [hashtable] $Extra        = @{}
    )

    $clusterId = [string]$Cluster.id
    $entityRawId = if ($EntityType -eq 'KarpenterProvisioner' -and $ProvisionerName) {
        New-ProvisionerEntityId -ClusterId $clusterId -ProvisionerName $ProvisionerName
    } else {
        $clusterId
    }

    if (-not $LearnMoreUrl) {
        $LearnMoreUrl = if ($EntityType -eq 'KarpenterProvisioner') {
            Get-KarpenterDocsUrl -RuleId $RuleId
        } else {
            Get-ClusterInsightsUrl -ClusterId $clusterId
        }
    }

    $nodeHours = $null
    if ($Extra.ContainsKey('NodeHours') -and $null -ne $Extra['NodeHours']) {
        $nodeHours = [double]$Extra['NodeHours']
    }
    $observedPercent = $null
    if ($Extra.ContainsKey('ObservedPercent') -and $null -ne $Extra['ObservedPercent']) {
        $observedPercent = [double]$Extra['ObservedPercent']
    }
    $scoreDelta = if ($nodeHours -ne $null) { $nodeHours } elseif ($observedPercent -ne $null) { $observedPercent } else { $null }
    $evidenceUris = [System.Collections.Generic.List[string]]::new()
    $logEvidence = Get-LogsQueryEvidenceUrl -WorkspaceId $WorkspaceId -Query $EvidenceQuery
    if ($logEvidence) { $evidenceUris.Add($logEvidence) | Out-Null }
    if ($EntityType -eq 'KarpenterProvisioner' -and -not [string]::IsNullOrWhiteSpace($ProvisionerName)) {
        $manifestUri = Get-KarpenterManifestEvidenceUrl -ProvisionerName $ProvisionerName -ApiVersion $KarpenterApiVersion
        if ($manifestUri) { $evidenceUris.Add($manifestUri) | Out-Null }
    }

    $entityRefs = [System.Collections.Generic.List[string]]::new()
    if (-not [string]::IsNullOrWhiteSpace($clusterId)) { $entityRefs.Add($clusterId) | Out-Null }
    if (-not [string]::IsNullOrWhiteSpace($ProvisionerName)) { $entityRefs.Add($ProvisionerName) | Out-Null }

    $findingId = "aks-karpenter-cost/$RuleId/$($Cluster.name)/$([guid]::NewGuid().ToString('N'))"
    $row = [ordered]@{
        Id                   = $findingId
        Source               = 'aks-karpenter-cost'
        RuleId               = $RuleId
        Category             = 'Cost'
        Severity             = $Severity
        Compliant            = $Compliant
        Title                = $Title
        Detail               = $Detail
        Remediation          = $Remediation
        ResourceId           = $clusterId
        EntityRawId          = $entityRawId
        EntityType           = $EntityType
        LearnMoreUrl         = $LearnMoreUrl
        DeepLinkUrl          = Get-AksClusterPortalDeepLink -ClusterId $clusterId -ProvisionerName $ProvisionerName
        ClusterName          = [string]$Cluster.name
        ClusterResourceGroup = [string]$Cluster.resourceGroup
        ProvisionerName      = $ProvisionerName
        RbacTier             = $result.RbacTier
        Pillar               = Resolve-KarpenterPillar -RuleId $RuleId
        Impact               = Resolve-KarpenterImpact -RuleId $RuleId -NodeHours $nodeHours -ObservedPercent $observedPercent
        Effort               = Resolve-KarpenterEffort -RuleId $RuleId
        BaselineTags         = @(Get-KarpenterBaselineTags -RuleId $RuleId -RbacTier $result.RbacTier)
        EvidenceUris         = @($evidenceUris)
        ScoreDelta           = $scoreDelta
        EntityRefs           = @($entityRefs)
        RemediationSnippets  = @(Get-KarpenterRemediationSnippets -RuleId $RuleId -ProvisionerName $ProvisionerName)
    }
    foreach ($k in $Extra.Keys) {
        $row[$k] = $Extra[$k]
    }
    $script:findings.Add([PSCustomObject]$row) | Out-Null
}

function Invoke-KarpenterKubectl {
    <#
    .SYNOPSIS
        Run `kubectl get provisioners.karpenter.sh -A -o json` against the
        supplied kubeconfig and return parsed items. Returns @() on any
        non-zero exit (the caller logs the workspace error).
    #>

    [CmdletBinding()]
    param (
        [Parameter(Mandatory)][string] $KubeconfigPath,
        [string] $KubeContext,
        [string] $Namespace = ''
    )

    $kArgs = @('--kubeconfig', $KubeconfigPath, 'get', 'provisioners.karpenter.sh', '-o', 'json')
    if ($KubeContext)       { $kArgs += @('--context', $KubeContext) }
    if ($Namespace)         { $kArgs += @('-n', $Namespace) }
    else                    { $kArgs += @('-A') }

    $proc = Invoke-WithTimeout -Command 'kubectl' -Arguments $kArgs -TimeoutSec 300
    if ($proc.ExitCode -ne 0) {
        throw (Format-FindingErrorMessage (New-FindingError -Source 'wrapper:aks-karpenter-cost' -Category 'UnexpectedFailure' -Reason "kubectl get provisioners.karpenter.sh failed with exit code $($proc.ExitCode)." -Remediation 'Verify cluster access, kubeconfig context, and kubectl permissions for provisioners.karpenter.sh.' -Details ([string]$proc.Output)))
    }
    if ([string]::IsNullOrWhiteSpace($proc.Output)) { return @() }

    try {
        $parsed = $proc.Output | ConvertFrom-Json -Depth 20
    } catch {
        throw (Format-FindingErrorMessage (New-FindingError -Source 'wrapper:aks-karpenter-cost' -Category 'ConfigurationError' -Reason 'kubectl returned non-JSON output.' -Remediation 'Ensure kubectl returns JSON for provisioners.karpenter.sh and that no shell wrappers modify output.' -Details ([string]$_.Exception.Message)))
    }

    if ($parsed -and $parsed.PSObject.Properties['items']) { return @($parsed.items) }
    return @()
}

# ---------------------------------------------------------------------------
# Main
# ---------------------------------------------------------------------------
$findings = [System.Collections.Generic.List[object]]::new()
$workspaceErrors = [System.Collections.Generic.List[string]]::new()
$kubectlVersion = if ($EnableElevatedRbac.IsPresent) { 'unknown' } else { 'not-run' }
$karpenterVersion = if ($EnableElevatedRbac.IsPresent) { 'unknown' } else { 'not-run' }

try {
    try {
        $clusters = @(Get-AksClustersInScope -SubscriptionId $SubscriptionId -ResourceGroup $ResourceGroup -ClusterName $ClusterName)
    } catch {
        $result.Status  = 'Failed'
        $result.Message = "AKS discovery failed: $(Remove-Credentials -Text ([string]$_.Exception.Message))"
        return [PSCustomObject]$result
    }

    if (-not $clusters -or $clusters.Count -eq 0) {
        $result.Status  = 'Skipped'
        $result.Message = 'No AKS managed clusters in scope.'
        return [PSCustomObject]$result
    }

    foreach ($cluster in $clusters) {
        # ----- Reader-tier KQL findings -----
        $workspaceId = if ($LogAnalyticsWorkspaceId) { $LogAnalyticsWorkspaceId } else { Resolve-WorkspaceIdFromCluster -Cluster $cluster }
        if ([string]::IsNullOrWhiteSpace($workspaceId)) {
            $workspaceErrors.Add("Cluster $($cluster.name): Container Insights workspace not found.") | Out-Null
        } else {
            $timeFilter = "ago(${LookbackDays}d)"

            $nodeCostQuery = @"
let lookback = $timeFilter;
KubeNodeInventory
| where TimeGenerated >= lookback
| where ClusterName =~ '$($cluster.name)'
| summarize firstSeen = min(TimeGenerated), lastSeen = max(TimeGenerated) by Computer
| extend nodeHours = round(datetime_diff('hour', lastSeen, firstSeen), 2)
| summarize nodes = dcount(Computer), totalNodeHours = sum(nodeHours)
"@


            $idleNodeQuery = @"
let lookback = $timeFilter;
Perf
| where TimeGenerated >= lookback
| where ObjectName == 'K8SNode'
| where CounterName == 'cpuUsageNanoCores'
| summarize avg_cpu = avg(CounterValue), capacityNano = max(CounterValue) by Computer
| extend pct = iff(capacityNano > 0, (avg_cpu / capacityNano) * 100.0, 0.0)
| where pct < 10.0
| project Computer, observedPct = round(pct, 2), avg_cpu
"@


            try {
                $costRows = @()
                $resp = Invoke-WithRetry -MaxAttempts 4 -InitialDelaySeconds 2 -MaxDelaySeconds 20 -ScriptBlock {
                    Invoke-LogAnalyticsQuery -WorkspaceId $workspaceId -Query $nodeCostQuery -TimeoutSeconds 300
                }
                if ($resp -and $resp.PSObject.Properties['Results']) { $costRows = @($resp.Results) }
                foreach ($row in $costRows) {
                    $nodes = [int]($row.nodes ?? 0)
                    $hours = [double]($row.totalNodeHours ?? 0)
                    if ($nodes -le 0) { continue }
                    Add-Finding -Cluster $cluster -RuleId 'aks.node-cost-rollup' -Severity 'Info' -Compliant $true `
                        -EntityType 'AzureResource' `
                        -Title "AKS node cost rollup for $($cluster.name): $nodes node(s), $([math]::Round($hours,1)) node-hours over ${LookbackDays}d" `
                        -Detail  "Container Insights observed $nodes distinct node(s) totalling $([math]::Round($hours,1)) node-hour(s) in cluster '$($cluster.name)' over the last ${LookbackDays} day(s). Multiply by your VM SKU rate to obtain a cost estimate." `
                        -Remediation 'Review node hours in Cost Management; consider Karpenter consolidation or smaller VM SKUs if utilization is low.' `
                        -WorkspaceId $workspaceId -EvidenceQuery $nodeCostQuery `
                        -Extra @{ NodeCount = $nodes; NodeHours = [math]::Round($hours, 2) }
                }
            } catch {
                $workspaceErrors.Add("Cluster $($cluster.name) node-cost-rollup query failed: $(Remove-Credentials -Text ([string]$_.Exception.Message))") | Out-Null
            }

            try {
                $idleRows = @()
                $resp = Invoke-WithRetry -MaxAttempts 4 -InitialDelaySeconds 2 -MaxDelaySeconds 20 -ScriptBlock {
                    Invoke-LogAnalyticsQuery -WorkspaceId $workspaceId -Query $idleNodeQuery -TimeoutSeconds 300
                }
                if ($resp -and $resp.PSObject.Properties['Results']) { $idleRows = @($resp.Results) }
                foreach ($row in $idleRows) {
                    $pct = [double]($row.observedPct ?? 0)
                    Add-Finding -Cluster $cluster -RuleId 'aks.idle-node' -Severity 'Medium' -Compliant $false `
                        -EntityType 'AzureResource' `
                        -Title "Idle node $($row.Computer) in $($cluster.name): avg CPU $([math]::Round($pct,1))% over ${LookbackDays}d" `
                        -Detail  "Node '$($row.Computer)' averaged $([math]::Round($pct,2))% CPU utilization over the last ${LookbackDays} day(s)." `
                        -Remediation 'Cordon and drain the node, or enable Karpenter consolidation / cluster autoscaler scale-down to remove idle capacity.' `
                        -WorkspaceId $workspaceId -EvidenceQuery $idleNodeQuery `
                        -Extra @{ NodeName = [string]$row.Computer; ObservedPercent = [math]::Round($pct, 2) }
                }
            } catch {
                $workspaceErrors.Add("Cluster $($cluster.name) idle-node query failed: $(Remove-Credentials -Text ([string]$_.Exception.Message))") | Out-Null
            }
        }

        # ----- Elevated-tier Karpenter findings (gated) -----
        if (-not $allowElevatedOps) { continue }

        # Honor -WhatIf / ShouldProcess: skip ALL side-effecting calls
        # (kubectl invocations + Initialize-KubeAuth) when running in WhatIf
        # mode. The reader-tier KQL findings above are already accumulated.
        $shouldProcessTarget = "AKS cluster '$($cluster.name)' (elevated Karpenter inspection: kubectl + kube-auth)"
        if (-not $PSCmdlet.ShouldProcess($shouldProcessTarget, 'Invoke kubectl + Initialize-KubeAuth')) {
            continue
        }

        if (Get-Command Assert-RbacTier -ErrorAction SilentlyContinue) {
            try { Assert-RbacTier -Required 'ClusterUser' -Capability 'Karpenter Provisioner inspection' -OptInFlag '-EnableElevatedRbac' }
            catch {
                $workspaceErrors.Add("Cluster $($cluster.name) karpenter inspection skipped: $(Remove-Credentials -Text ([string]$_.Exception.Message))") | Out-Null
                continue
            }
        }

        if (-not (Get-Command kubectl -ErrorAction SilentlyContinue)) {
            $kubectlVersion = 'missing'
            $workspaceErrors.Add("Cluster $($cluster.name): kubectl not on PATH; install via 'az aks install-cli'.") | Out-Null
            continue
        }

        if ([string]::IsNullOrWhiteSpace($KubeconfigPath)) {
            $workspaceErrors.Add("Cluster $($cluster.name): -KubeconfigPath required when -EnableElevatedRbac is set.") | Out-Null
            continue
        }
        if ($kubectlVersion -eq 'unknown') {
            $kubectlVersion = Get-KubectlClientVersion
        }

        $kubeAuth = $null
        try {
            $kubeAuth = Initialize-KubeAuth `
                -Mode $KubeAuthMode `
                -KubeconfigPath $KubeconfigPath `
                -KubeContext $KubeContext `
                -KubeloginServerId $KubeloginServerId `
                -KubeloginClientId $KubeloginClientId `
                -KubeloginTenantId $KubeloginTenantId `
                -WorkloadIdentityClientId $WorkloadIdentityClientId `
                -WorkloadIdentityTenantId $WorkloadIdentityTenantId `
                -WorkloadIdentityServiceAccountToken $WorkloadIdentityServiceAccountToken
        } catch {
            $workspaceErrors.Add("Cluster $($cluster.name) kube-auth init failed: $(Remove-Credentials -Text ([string]$_.Exception.Message))") | Out-Null
            continue
        }

        try {
            $items = @()
            try {
                $items = @(
                    Invoke-WithRetry -MaxAttempts 3 -InitialDelaySeconds 2 -MaxDelaySeconds 20 -ScriptBlock {
                        Invoke-KarpenterKubectl -KubeconfigPath $kubeAuth.KubeconfigPath -KubeContext $KubeContext -Namespace $Namespace
                    }
                )
            } catch {
                $workspaceErrors.Add("Cluster $($cluster.name) karpenter list failed: $(Remove-Credentials -Text ([string]$_.Exception.Message))") | Out-Null
                continue
            }

            foreach ($prov in $items) {
                if (-not $prov) { continue }
                $name = if ($prov.PSObject.Properties['metadata'] -and $prov.metadata.name) { [string]$prov.metadata.name } else { 'unknown' }
                $spec = if ($prov.PSObject.Properties['spec'])     { $prov.spec }     else { $null }
                $provApiVersion = if ($prov.PSObject.Properties['apiVersion']) { [string]$prov.apiVersion } else { '' }
                if ($karpenterVersion -eq 'unknown' -and $provApiVersion -match '/(v[0-9a-zA-Z]+)$') {
                    $karpenterVersion = $Matches[1]
                }

                # consolidation-disabled
                $consolidationEnabled = $false
                if ($spec -and $spec.PSObject.Properties['consolidation'] -and $spec.consolidation.PSObject.Properties['enabled']) {
                    $consolidationEnabled = [bool]$spec.consolidation.enabled
                } elseif ($spec -and $spec.PSObject.Properties['disruption'] -and $spec.disruption.PSObject.Properties['consolidationPolicy']) {
                    $consolidationEnabled = ([string]$spec.disruption.consolidationPolicy -ne 'WhenEmpty')
                }
                if (-not $consolidationEnabled) {
                    Add-Finding -Cluster $cluster -RuleId 'karpenter.consolidation-disabled' -Severity 'Medium' -Compliant $false `
                        -EntityType 'KarpenterProvisioner' -ProvisionerName $name `
                        -Title "Karpenter Provisioner '$name' has consolidation disabled" `
                        -Detail  "Provisioner '$name' in cluster '$($cluster.name)' is not configured for consolidation. Karpenter will not bin-pack workloads onto fewer nodes." `
                        -Remediation 'Set spec.consolidation.enabled=true (or spec.disruption.consolidationPolicy=WhenUnderutilized) on the Provisioner / NodePool.' `
                        -WorkspaceId $workspaceId -KarpenterApiVersion $provApiVersion
                }

                # no-node-limit
                $hasLimits = $false
                if ($spec -and $spec.PSObject.Properties['limits']) {
                    $hasLimits = ($null -ne $spec.limits)
                }
                if (-not $hasLimits) {
                    Add-Finding -Cluster $cluster -RuleId 'karpenter.no-node-limit' -Severity 'High' -Compliant $false `
                        -EntityType 'KarpenterProvisioner' -ProvisionerName $name `
                        -Title "Karpenter Provisioner '$name' has no spec.limits set" `
                        -Detail  "Provisioner '$name' in cluster '$($cluster.name)' has no resource limits configured. A pod scheduling burst can scale node count without bound, creating runaway cost risk." `
                        -Remediation 'Add spec.limits.resources (e.g. cpu, memory) to cap how much capacity Karpenter may provision for this NodePool.' `
                        -WorkspaceId $workspaceId -KarpenterApiVersion $provApiVersion
                }

                # over-provisioned (Container Insights)
                if (-not [string]::IsNullOrWhiteSpace($workspaceId)) {
                    $overQuery = @"
let lookback = ago(${LookbackDays}d);
let provLabel = '$name';
KubeNodeInventory
| where TimeGenerated >= lookback
| where ClusterName =~ '$($cluster.name)'
| where Labels has provLabel or Labels has 'karpenter.sh/provisioner-name'
| summarize nodes = dcount(Computer) by Computer
| join kind=inner (
    Perf
    | where TimeGenerated >= lookback
    | where ObjectName == 'K8SNode'
    | where CounterName == 'cpuUsageNanoCores'
    | summarize avg_cpu = avg(CounterValue), capacityNano = max(CounterValue) by Computer
) on Computer
| extend pct = iff(capacityNano > 0, (avg_cpu / capacityNano) * 100.0, 0.0)
| summarize avgPct = avg(pct), nodeCount = count()
| where avgPct < 50.0
"@

                    try {
                        $overRows = @()
                        $resp = Invoke-WithRetry -MaxAttempts 3 -InitialDelaySeconds 2 -MaxDelaySeconds 20 -ScriptBlock {
                            Invoke-LogAnalyticsQuery -WorkspaceId $workspaceId -Query $overQuery -TimeoutSeconds 300
                        }
                        if ($resp -and $resp.PSObject.Properties['Results']) { $overRows = @($resp.Results) }
                        foreach ($row in $overRows) {
                            $pct = [double]($row.avgPct ?? 0)
                            $nc  = [int]($row.nodeCount ?? 0)
                            Add-Finding -Cluster $cluster -RuleId 'karpenter.over-provisioned' -Severity 'Medium' -Compliant $false `
                                -EntityType 'KarpenterProvisioner' -ProvisionerName $name `
                                -Title "Karpenter Provisioner '$name' over-provisioned: avg $([math]::Round($pct,1))% CPU across $nc node(s)" `
                                -Detail  "Average node CPU utilization for nodes managed by Provisioner '$name' was $([math]::Round($pct,2))% over ${LookbackDays}d (threshold 50%)." `
                                -Remediation 'Lower spec.limits, enable consolidation, or pick a smaller default instance type.' `
                                -WorkspaceId $workspaceId -EvidenceQuery $overQuery -KarpenterApiVersion $provApiVersion `
                                -Extra @{ ObservedPercent = [math]::Round($pct, 2); NodeCount = $nc }
                        }
                    } catch {
                        $workspaceErrors.Add("Cluster $($cluster.name) over-provisioned query for '$name' failed: $(Remove-Credentials -Text ([string]$_.Exception.Message))") | Out-Null
                    }
                }
            }
        } finally {
            if ($kubeAuth -and $kubeAuth.PSObject.Properties['Cleanup']) {
                try { & $kubeAuth.Cleanup } catch { Write-Verbose ("KubeAuth cleanup failed: {0}" -f (Remove-Credentials -Text ([string]$_.Exception.Message))) }
            }
        }
    }

    if ($OutputPath) {
        try {
            if (-not (Test-Path $OutputPath)) { New-Item -ItemType Directory -Path $OutputPath -Force | Out-Null }
            $rawOut = Join-Path $OutputPath "aks-karpenter-cost-$SubscriptionId.json"
            Set-Content -Path $rawOut -Value (Remove-Credentials (($findings | ConvertTo-Json -Depth 20))) -Encoding UTF8
        } catch {
            Write-Verbose "Failed writing karpenter cost raw output: $(Remove-Credentials -Text ([string]$_.Exception.Message))"
        }
    }
}
finally {
    if ($EnableElevatedRbac.IsPresent -and (Get-Command Reset-RbacTier -ErrorAction SilentlyContinue)) {
        Reset-RbacTier
    }
}

$result.ToolVersion = "kubectl=$kubectlVersion; karpenter=$karpenterVersion"
foreach ($finding in $findings) {
    if ($finding.PSObject.Properties['ToolVersion']) {
        $finding.ToolVersion = $result.ToolVersion
    } else {
        $finding | Add-Member -NotePropertyName ToolVersion -NotePropertyValue $result.ToolVersion -Force
    }
}

$result.Findings = @($findings)
if ($workspaceErrors.Count -gt 0 -and $findings.Count -gt 0) {
    $result.Status = 'PartialSuccess'
} elseif ($workspaceErrors.Count -gt 0 -and $findings.Count -eq 0) {
    $result.Status = 'Failed'
}

$baseMessage = "Scanned $($clusters.Count) AKS cluster(s) over ${LookbackDays} day(s); emitted $($findings.Count) cost finding(s); RBAC tier '$($result.RbacTier)'."
if ($workspaceErrors.Count -gt 0) {
    $result.Message = "$baseMessage Errors: $($workspaceErrors -join ' | ')"
} else {
    $result.Message = $baseMessage
}

return [PSCustomObject]$result