modules/Invoke-AksRightsizing.ps1

#Requires -Version 7.4
<#
.SYNOPSIS
    AKS rightsizing signals from Container Insights (Log Analytics KQL).
.DESCRIPTION
    Discovers AKS clusters in the requested scope, resolves each cluster's
    Container Insights workspace, and executes KQL checks for:
      - over-provisioned pods (CPU and memory)
      - under-provisioned pods (CPU and memory)
      - missing HPA candidates (static replicas with variability)
      - OOMKilled pods
#>

[CmdletBinding()]
param (
    [Parameter(Mandatory)]
    [ValidateNotNullOrEmpty()]
    [string] $SubscriptionId,

    [string] $ResourceGroup,
    [string] $ClusterName,
    [string] $LogAnalyticsWorkspaceId,

    [ValidateRange(1, 30)]
    [int] $LookbackDays = 7,

    [string] $OutputPath
)

Set-StrictMode -Version Latest
$ErrorActionPreference = 'Stop'

$retryPath = Join-Path $PSScriptRoot 'shared' 'Retry.ps1'
if (Test-Path $retryPath) { . $retryPath }
if (-not (Get-Command Invoke-WithRetry -ErrorAction SilentlyContinue)) {
    function Invoke-WithRetry { param([scriptblock]$ScriptBlock) & $ScriptBlock }
}

$sanitizePath = Join-Path $PSScriptRoot 'shared' 'Sanitize.ps1'
if (Test-Path $sanitizePath) { . $sanitizePath }
if (-not (Get-Command Remove-Credentials -ErrorAction SilentlyContinue)) {
    function Remove-Credentials { param([string]$Text) return $Text }
}

$errorsPath = Join-Path $PSScriptRoot 'shared' 'Errors.ps1'
if (Test-Path $errorsPath) { . $errorsPath }
if (-not (Get-Command New-FindingError -ErrorAction SilentlyContinue)) {
    function New-FindingError { param([string]$Source,[string]$Category,[string]$Reason,[string]$Remediation,[string]$Details) return [pscustomobject]@{ Source=$Source; Category=$Category; Reason=$Reason; Remediation=$Remediation; Details=$Details } }
}
if (-not (Get-Command Format-FindingErrorMessage -ErrorAction SilentlyContinue)) {
    function Format-FindingErrorMessage {
        param([Parameter(Mandatory)]$FindingError)
        $line = "[{0}] {1}: {2}" -f $FindingError.Source, $FindingError.Category, $FindingError.Reason
        if ($FindingError.Remediation) { $line += " Action: $($FindingError.Remediation)" }
        return $line
    }
}

$installerPath = Join-Path $PSScriptRoot 'shared' 'Installer.ps1'
if (Test-Path $installerPath) { . $installerPath }

$envelopePath = Join-Path $PSScriptRoot 'shared' 'New-WrapperEnvelope.ps1'
if (Test-Path $envelopePath) { . $envelopePath }
if (-not (Get-Command New-WrapperEnvelope -ErrorAction SilentlyContinue)) { function New-WrapperEnvelope { param([string]$Source,[string]$Status='Failed',[string]$Message='',[object[]]$FindingErrors=@()) return [PSCustomObject]@{ Source=$Source; SchemaVersion='1.0'; Status=$Status; Message=$Message; Findings=@(); Errors=@($FindingErrors) } } }
if (-not (Get-Command Invoke-WithTimeout -ErrorAction SilentlyContinue)) {
    function Invoke-WithTimeout {
        param (
            [Parameter(Mandatory)][string]$Command,
            [Parameter(Mandatory)][string[]]$Arguments,
            [int]$TimeoutSec = 300
        )
        $output = & $Command @Arguments 2>&1 | Out-String
        return [PSCustomObject]@{ ExitCode = $LASTEXITCODE; Output = $output.Trim() }
    }
}

$aksDiscoveryPath = Join-Path $PSScriptRoot 'shared' 'AksDiscovery.ps1'
if (-not (Get-Command Get-AksClustersInScope -ErrorAction SilentlyContinue) -and (Test-Path $aksDiscoveryPath)) { . $aksDiscoveryPath }

$kqlPath = Join-Path $PSScriptRoot 'shared' 'KqlQuery.ps1'
if (-not (Get-Command Invoke-LogAnalyticsQuery -ErrorAction SilentlyContinue) -and (Test-Path $kqlPath)) { . $kqlPath }

$result = [ordered]@{
    SchemaVersion = '1.0'
    Source        = 'aks-rightsizing'
    Status        = 'Success'
    Message       = ''
    Findings      = @()
    Subscription  = $SubscriptionId
    Timestamp     = (Get-Date).ToUniversalTime().ToString('o')
}

if (-not (Get-Module -ListAvailable -Name Az.Accounts)) {
    $result.Status  = 'Skipped'
    $result.Message = 'Az.Accounts module not installed. Run: Install-Module Az.Accounts -Scope CurrentUser'
    return [PSCustomObject]$result
}

if (-not (Get-Module -ListAvailable -Name Az.OperationalInsights)) {
    $result.Status  = 'Skipped'
    $result.Message = 'Az.OperationalInsights module not installed. Run: Install-Module Az.OperationalInsights -Scope CurrentUser'
    return [PSCustomObject]$result
}

try {
    Import-Module Az.Accounts -ErrorAction SilentlyContinue -WarningAction SilentlyContinue
    $null = Get-AzContext -ErrorAction Stop
} catch {
    Write-Verbose "Az context probe failed; continuing and letting downstream calls report concrete auth errors."
}

function Resolve-WorkspaceIdFromCluster {
    [CmdletBinding()]
    param (
        [Parameter(Mandatory)]
        [pscustomobject] $Cluster
    )

    if ($Cluster.PSObject.Properties['workspaceResourceId'] -and $Cluster.workspaceResourceId) {
        return [string]$Cluster.workspaceResourceId
    }

    $clusterId = [string]$Cluster.id
    if ([string]::IsNullOrWhiteSpace($clusterId)) { return '' }

    $diagUri = "https://management.azure.com$clusterId/providers/Microsoft.Insights/diagnosticSettings?api-version=2021-05-01-preview"
    try {
        $resp = Invoke-WithRetry -MaxAttempts 3 -InitialDelaySeconds 2 -MaxDelaySeconds 20 -ScriptBlock {
            Invoke-AzRestMethod -Method GET -Uri $using:diagUri -ErrorAction Stop
        }
        if (-not $resp -or $resp.StatusCode -ge 400 -or -not $resp.Content) { return '' }
        $payload = $resp.Content | ConvertFrom-Json -Depth 20
        $entries = if ($payload.PSObject.Properties['value']) { @($payload.value) } else { @() }
        foreach ($entry in $entries) {
            if ($entry.PSObject.Properties['properties'] -and $entry.properties.workspaceId) {
                return [string]$entry.properties.workspaceId
            }
        }
    } catch {
        Write-Verbose ("Diagnostic settings lookup failed for {0}: {1}" -f $Cluster.name, (Remove-Credentials -Text ([string]$_.Exception.Message)))
    }

    return ''
}

function Get-WorkspaceNameFromArmId {
    param([string]$WorkspaceArmId)
    if (-not $WorkspaceArmId) { return '' }
    if ($WorkspaceArmId -match '/workspaces/([^/]+)$') { return [string]$Matches[1] }
    return ''
}

function Get-ClusterInsightsUrl {
    param([string]$ClusterId)
    if (-not $ClusterId) { return '' }
    return "https://portal.azure.com/#@/resource$ClusterId/insights"
}

function Get-AksWorkloadDeepLinkUrl {
    param(
        [string]$ClusterId,
        [string]$Namespace,
        [string]$WorkloadName
    )

    if (-not $ClusterId) { return '' }
    $encodedClusterId = [System.Uri]::EscapeDataString($ClusterId)
    $encodedNamespace = [System.Uri]::EscapeDataString(($Namespace ?? '').Trim())
    $encodedWorkload = [System.Uri]::EscapeDataString(($WorkloadName ?? '').Trim())
    return "https://portal.azure.com/#blade/Microsoft_Azure_Monitoring/AzureMonitoringBrowseBlade/overview/resourceId/$encodedClusterId/namespace/$encodedNamespace/workload/$encodedWorkload"
}

function Get-AksQueryEvidenceUri {
    param(
        [string]$WorkspaceId,
        [string]$QueryName,
        [string]$QueryText
    )

    if (-not $WorkspaceId -or -not $QueryText) { return '' }
    $encodedWorkspace = [System.Uri]::EscapeDataString($WorkspaceId)
    $encodedQuery = [System.Uri]::EscapeDataString($QueryText)
    $encodedName = [System.Uri]::EscapeDataString(($QueryName ?? 'aks-rightsizing'))
    return "https://portal.azure.com/#blade/Microsoft_Azure_Monitoring/LogsBlade/resourceId/$encodedWorkspace/source/LogsBlade/query/$encodedQuery/queryTimeRange/P7D/queryDisplayName/$encodedName"
}

function Resolve-RightsizingPillar {
    param([string]$Category)
    switch -Regex (($Category ?? '').ToLowerInvariant()) {
        'overprovisioned|idle' { return 'Cost Optimization' }
        'underprovisioned|oomkilled|missinghpa' { return 'Performance Efficiency' }
        default { return 'Performance Efficiency' }
    }
}

function Get-RightsizingSignalPercent {
    param(
        [string]$Category,
        [double]$ObservedPercent
    )

    if (($Category ?? '') -match '(?i)overprovisioned|idle') {
        return [math]::Round([math]::Max(0, (100.0 - $ObservedPercent)), 2)
    }
    return [math]::Round([math]::Max(0, $ObservedPercent), 2)
}

function Resolve-RightsizingImpact {
    param(
        [string]$Category,
        [double]$ObservedPercent
    )

    $signal = Get-RightsizingSignalPercent -Category $Category -ObservedPercent $ObservedPercent
    if (($Category ?? '') -match '(?i)overprovisioned|idle') {
        if ($signal -ge 80) { return 'High' }
        if ($signal -ge 50) { return 'Medium' }
        return 'Low'
    }
    if ($signal -ge 90) { return 'High' }
    if ($signal -ge 50) { return 'Medium' }
    return 'Low'
}

function Resolve-RightsizingEffort {
    param([string]$Category)
    switch -Regex (($Category ?? '').ToLowerInvariant()) {
        'missinghpa' { return 'Medium' }
        'oomkilled' { return 'High' }
        default { return 'Low' }
    }
}

function Resolve-RightsizingBaselineTags {
    param(
        [string]$Category,
        [string]$MetricType
    )

    $tags = [System.Collections.Generic.List[string]]::new()
    $normalizedCategory = ($Category ?? '').Trim()
    $metric = ($MetricType ?? '').Trim()
    switch -Regex ($normalizedCategory.ToLowerInvariant()) {
        'overprovisionedcpu|underprovisionedcpu' { $tags.Add('AKS-RightSizing-CPU') | Out-Null }
        'overprovisionedmemory|underprovisionedmemory' { $tags.Add('AKS-RightSizing-Memory') | Out-Null }
        'missinghpa' { $tags.Add('AKS-MissingHPA') | Out-Null }
        'oomkilled' { $tags.Add('AKS-OOMKilled') | Out-Null }
    }
    if (-not [string]::IsNullOrWhiteSpace($normalizedCategory)) { $tags.Add("AKS-$normalizedCategory") | Out-Null }
    if (-not [string]::IsNullOrWhiteSpace($metric)) { $tags.Add("AKS-RightSizing-$($metric.ToUpperInvariant())") | Out-Null }
    return @($tags | Select-Object -Unique)
}

function New-RightsizingRemediationSnippets {
    param(
        [string]$Category,
        [string]$Namespace,
        [string]$WorkloadName,
        [string]$MetricType,
        [double]$RecommendedMillicores,
        [double]$RecommendedMemoryMiB
    )

    $ns = if ([string]::IsNullOrWhiteSpace($Namespace)) { 'default' } else { $Namespace.Trim() }
    $workload = if ([string]::IsNullOrWhiteSpace($WorkloadName)) { 'workload-name' } else { $WorkloadName.Trim() }
    $yaml = ''
    switch -Regex (($Category ?? '').ToLowerInvariant()) {
        'missinghpa' {
            $yaml = @"
apiVersion: autoscaling/v2
kind: HorizontalPodAutoscaler
metadata:
  name: $workload
  namespace: $ns
spec:
  minReplicas: 2
  maxReplicas: 10
  scaleTargetRef:
    apiVersion: apps/v1
    kind: Deployment
    name: $workload
"@

        }
        'overprovisionedcpu|underprovisionedcpu' {
            $targetCpu = if ($RecommendedMillicores -gt 0) { [math]::Round($RecommendedMillicores, 0) } else { 250 }
            $yaml = @"
kubectl patch deployment $workload -n $ns --type merge -p:
  spec:
    template:
      spec:
        containers:
          - name: app
            resources:
              requests:
                cpu: "${targetCpu}m"
              limits:
                cpu: "${targetCpu}m"
"@

        }
        default {
            $targetMemory = if ($RecommendedMemoryMiB -gt 0) { [math]::Round($RecommendedMemoryMiB, 0) } else { 512 }
            $yaml = @"
resources:
  requests:
    memory: "${targetMemory}Mi"
  limits:
    memory: "${targetMemory}Mi"
"@

        }
    }

    if ([string]::IsNullOrWhiteSpace($yaml)) { return @() }
    return @(@{ language = 'yaml'; content = $yaml.Trim() })
}

function Get-KubectlVersionText {
    try {
        $versionResult = Invoke-WithTimeout -Command 'kubectl' -Arguments @('version', '--client', '--output=json') -TimeoutSec 300
        if ($versionResult.ExitCode -ne 0 -or [string]::IsNullOrWhiteSpace($versionResult.Output)) { return '' }
        $payload = $versionResult.Output | ConvertFrom-Json -ErrorAction SilentlyContinue
        if ($payload -and $payload.clientVersion -and $payload.clientVersion.gitVersion) {
            return [string]$payload.clientVersion.gitVersion
        }
        return ''
    } catch {
        return ''
    }
}

function Get-AzAksModuleVersionText {
    try {
        $module = Get-Module -ListAvailable -Name Az.Aks | Sort-Object Version -Descending | Select-Object -First 1
        if ($module -and $module.Version) { return $module.Version.ToString() }
    } catch {
    }
    return ''
}

function Get-AksRightsizingToolVersion {
    $kubectlVersion = Get-KubectlVersionText
    $azAksVersion = Get-AzAksModuleVersionText
    if ($kubectlVersion -and $azAksVersion) { return "kubectl:$kubectlVersion; Az.Aks:$azAksVersion" }
    if ($kubectlVersion) { return "kubectl:$kubectlVersion" }
    if ($azAksVersion) { return "Az.Aks:$azAksVersion" }
    return ''
}

function Add-RightsizingFinding {
    param (
        [Parameter(Mandatory)][pscustomobject] $Cluster,
        [Parameter(Mandatory)][string] $Category,
        [Parameter(Mandatory)][string] $Severity,
        [Parameter(Mandatory)][string] $Title,
        [Parameter(Mandatory)][string] $Detail,
        [Parameter(Mandatory)][string] $Remediation,
        [Parameter(Mandatory)][bool] $Compliant,
        [string] $Namespace = '',
        [string] $WorkloadName = '',
        [string] $ContainerName = '',
        [string] $MetricType = '',
        [double] $ObservedPercent = 0,
        [double] $RecommendedMillicores = 0,
        [double] $RecommendedMemoryMiB = 0,
        [string] $WorkspaceId = '',
        [string] $QueryName = '',
        [string] $QueryText = '',
        [string] $ToolVersion = ''
    )

    $clusterId = [string]$Cluster.id
    $deepLinkUrl = Get-AksWorkloadDeepLinkUrl -ClusterId $clusterId -Namespace $Namespace -WorkloadName $WorkloadName
    $workloadInsightsUrl = Get-ClusterInsightsUrl -ClusterId $clusterId
    $queryEvidenceUrl = Get-AksQueryEvidenceUri -WorkspaceId $WorkspaceId -QueryName $QueryName -QueryText $QueryText
    $evidenceUris = [System.Collections.Generic.List[string]]::new()
    if (-not [string]::IsNullOrWhiteSpace($queryEvidenceUrl)) { $evidenceUris.Add($queryEvidenceUrl) | Out-Null }
    if (-not [string]::IsNullOrWhiteSpace($workloadInsightsUrl)) { $evidenceUris.Add($workloadInsightsUrl) | Out-Null }
    $baselineTags = @(Resolve-RightsizingBaselineTags -Category $Category -MetricType $MetricType)
    $scoreDelta = Get-RightsizingSignalPercent -Category $Category -ObservedPercent $ObservedPercent
    $entityRefs = @($clusterId, "namespace:$Namespace", "workload:$WorkloadName")
    $remediationSnippets = @(New-RightsizingRemediationSnippets -Category $Category -Namespace $Namespace -WorkloadName $WorkloadName -MetricType $MetricType -RecommendedMillicores $RecommendedMillicores -RecommendedMemoryMiB $RecommendedMemoryMiB)
    $findingId = "aks-rightsizing/$Category/$($cluster.name)/$([guid]::NewGuid().ToString('N'))"
    $finding = [ordered]@{
        Id                   = $findingId
        Source               = 'aks-rightsizing'
        Category             = 'Performance'
        Severity             = $Severity
        Compliant            = $Compliant
        Title                = $Title
        Detail               = $Detail
        Remediation          = $Remediation
        ResourceId           = $clusterId
        LearnMoreUrl         = (Get-ClusterInsightsUrl -ClusterId $clusterId)
        Pillar               = Resolve-RightsizingPillar -Category $Category
        Impact               = Resolve-RightsizingImpact -Category $Category -ObservedPercent $ObservedPercent
        Effort               = Resolve-RightsizingEffort -Category $Category
        DeepLinkUrl          = $deepLinkUrl
        RemediationSnippets  = $remediationSnippets
        EvidenceUris         = @($evidenceUris)
        BaselineTags         = @($baselineTags)
        ScoreDelta           = $scoreDelta
        EntityRefs           = @($entityRefs)
        ToolVersion          = $ToolVersion
        FindingCategory      = $Category
        ClusterName          = [string]$Cluster.name
        ClusterResourceGroup = [string]$Cluster.resourceGroup
        Namespace            = $Namespace
        WorkloadName         = $WorkloadName
        ContainerName        = $ContainerName
        MetricType           = $MetricType
        ObservedPercent      = [math]::Round($ObservedPercent, 2)
        RecommendedMillicores = [math]::Round($RecommendedMillicores, 2)
        RecommendedMemoryMiB = [math]::Round($RecommendedMemoryMiB, 2)
    }
    $script:findings.Add([PSCustomObject]$finding) | Out-Null
}

function Invoke-RightsizingKql {
    [CmdletBinding()]
    param(
        [Parameter(Mandatory)][string] $WorkspaceId,
        [Parameter(Mandatory)][string] $QueryName,
        [Parameter(Mandatory)][string] $QueryText
    )

    try {
        $ping = Invoke-WithTimeout -Command 'pwsh' -Arguments @('-NoProfile', '-NonInteractive', '-Command', 'exit 0') -TimeoutSec 300
        if ($ping.ExitCode -ne 0) {
            throw (Format-FindingErrorMessage (New-FindingError -Source 'wrapper:aks-rightsizing' -Category 'TimeoutExceeded' -Reason "Invoke-WithTimeout preflight failed for ${QueryName} (exit code $($ping.ExitCode))." -Remediation 'Verify pwsh can run non-interactive commands and that timeout helpers are functioning.' -Details ([string]$ping.Output)))
        }
    } catch {
        throw (Format-FindingErrorMessage (New-FindingError -Source 'wrapper:aks-rightsizing' -Category 'UnexpectedFailure' -Reason "Invoke-WithTimeout preflight failed for ${QueryName}." -Remediation 'Verify shared Installer.ps1 timeout helper availability and retry the rightsizing query.' -Details ([string]$_.Exception.Message)))
    }

    return Invoke-LogAnalyticsQuery -WorkspaceId $WorkspaceId -Query $QueryText -TimeoutSeconds 300
}

try {
    $clusters = @(Get-AksClustersInScope -SubscriptionId $SubscriptionId -ResourceGroup $ResourceGroup -ClusterName $ClusterName)
} catch {
    $result.Status  = 'Failed'
    $result.Message = "AKS discovery failed: $(Remove-Credentials -Text ([string]$_.Exception.Message))"
    return [PSCustomObject]$result
}

if (-not $clusters -or $clusters.Count -eq 0) {
    $result.Status  = 'Skipped'
    $result.Message = 'No AKS managed clusters in scope.'
    return [PSCustomObject]$result
}

$findings = [System.Collections.Generic.List[object]]::new()
$workspaceErrors = [System.Collections.Generic.List[string]]::new()
$clusterCount = 0
$toolVersion = Get-AksRightsizingToolVersion

foreach ($cluster in $clusters) {
    $clusterCount++
    $workspaceId = if ($LogAnalyticsWorkspaceId) { $LogAnalyticsWorkspaceId } else { Resolve-WorkspaceIdFromCluster -Cluster $cluster }
    if ([string]::IsNullOrWhiteSpace($workspaceId)) {
        $workspaceErrors.Add("Cluster $($cluster.name): Container Insights workspace not found.") | Out-Null
        continue
    }

    $workspaceName = Get-WorkspaceNameFromArmId -WorkspaceArmId $workspaceId
    $timeFilter = "ago(${LookbackDays}d)"

    $overCpuQuery = @"
let lookback = $timeFilter;
KubePodInventory
| where TimeGenerated >= lookback
| where ClusterName =~ '$($cluster.name)'
| where isnotempty(ContainerName)
| summarize cpuLimitNano = max(todouble(ContainerCpuLimitNanoCores)) by Namespace, Name, ContainerName
| join kind=inner (
    Perf
    | where TimeGenerated >= lookback
    | where ObjectName == 'K8SContainer'
    | where CounterName == 'cpuUsageNanoCores'
    | summarize p95_cpu = percentile(CounterValue, 95), avg_cpu = avg(CounterValue) by Namespace = InstanceName, ContainerName
) on Namespace, ContainerName
| where cpuLimitNano > 0 and p95_cpu < cpuLimitNano * 0.2
| project Namespace, WorkloadName = Name, ContainerName, p95_cpu, avg_cpu, cpuLimitNano, observedPct = (p95_cpu / cpuLimitNano) * 100.0, recommendedMillicores = round((p95_cpu * 1.25) / 1000000.0, 2)
"@


    $underCpuQuery = @"
let lookback = $timeFilter;
KubePodInventory
| where TimeGenerated >= lookback
| where ClusterName =~ '$($cluster.name)'
| where isnotempty(ContainerName)
| summarize cpuLimitNano = max(todouble(ContainerCpuLimitNanoCores)) by Namespace, Name, ContainerName
| join kind=inner (
    Perf
    | where TimeGenerated >= lookback
    | where ObjectName == 'K8SContainer'
    | where CounterName == 'cpuUsageNanoCores'
    | summarize p95_cpu = percentile(CounterValue, 95) by Namespace = InstanceName, ContainerName
) on Namespace, ContainerName
| where cpuLimitNano > 0 and p95_cpu >= cpuLimitNano * 0.95
| project Namespace, WorkloadName = Name, ContainerName, p95_cpu, cpuLimitNano, observedPct = (p95_cpu / cpuLimitNano) * 100.0
"@


    $overMemoryQuery = @"
let lookback = $timeFilter;
KubePodInventory
| where TimeGenerated >= lookback
| where ClusterName =~ '$($cluster.name)'
| where isnotempty(ContainerName)
| summarize memoryLimitBytes = max(todouble(ContainerMemoryLimitBytes)) by Namespace, Name, ContainerName
| join kind=inner (
    Perf
    | where TimeGenerated >= lookback
    | where ObjectName == 'K8SContainer'
    | where CounterName == 'memoryRssBytes'
    | summarize p95_mem = percentile(CounterValue, 95) by Namespace = InstanceName, ContainerName
) on Namespace, ContainerName
| where memoryLimitBytes > 0 and p95_mem < memoryLimitBytes * 0.2
| project Namespace, WorkloadName = Name, ContainerName, p95_mem, memoryLimitBytes, observedPct = (p95_mem / memoryLimitBytes) * 100.0, recommendedMemoryMiB = round((p95_mem * 1.25) / 1048576.0, 2)
"@


    $underMemoryQuery = @"
let lookback = $timeFilter;
KubePodInventory
| where TimeGenerated >= lookback
| where ClusterName =~ '$($cluster.name)'
| where isnotempty(ContainerName)
| summarize memoryLimitBytes = max(todouble(ContainerMemoryLimitBytes)) by Namespace, Name, ContainerName
| join kind=inner (
    Perf
    | where TimeGenerated >= lookback
    | where ObjectName == 'K8SContainer'
    | where CounterName == 'memoryRssBytes'
    | summarize p95_mem = percentile(CounterValue, 95) by Namespace = InstanceName, ContainerName
) on Namespace, ContainerName
| where memoryLimitBytes > 0 and p95_mem >= memoryLimitBytes * 0.95
| project Namespace, WorkloadName = Name, ContainerName, p95_mem, memoryLimitBytes, observedPct = (p95_mem / memoryLimitBytes) * 100.0
"@


    $missingHpaQuery = @"
let lookback = $timeFilter;
KubePodInventory
| where TimeGenerated >= lookback
| where ClusterName =~ '$($cluster.name)'
| summarize replicasMin = min(tolong(PodRestartCount)), replicasMax = max(tolong(PodRestartCount)) by Namespace, ControllerName
| where isnotempty(ControllerName)
| where replicasMin == replicasMax and replicasMin > 1
| project Namespace, ControllerName, replicasMin, replicasMax
"@


    $oomQuery = @"
let lookback = $timeFilter;
KubePodInventory
| where TimeGenerated >= lookback
| where ClusterName =~ '$($cluster.name)'
| where ContainerStatusReason =~ 'OOMKilled' or PodStatus =~ 'Failed'
| project Namespace, WorkloadName = Name, ContainerName, PodStatus, ContainerStatusReason
"@


    $querySet = @(
        @{ Name = 'over-cpu'; Query = $overCpuQuery },
        @{ Name = 'under-cpu'; Query = $underCpuQuery },
        @{ Name = 'over-memory'; Query = $overMemoryQuery },
        @{ Name = 'under-memory'; Query = $underMemoryQuery },
        @{ Name = 'missing-hpa'; Query = $missingHpaQuery },
        @{ Name = 'oomkilled'; Query = $oomQuery }
    )

    foreach ($entry in $querySet) {
        $queryResponse = $null
        try {
            $queryName = [string]$entry.Name
            $queryText = [string]$entry.Query
            $queryResponse = Invoke-WithRetry -MaxAttempts 4 -InitialDelaySeconds 2 -MaxDelaySeconds 20 -ScriptBlock {
                Invoke-RightsizingKql -WorkspaceId $workspaceId -QueryName $queryName -QueryText $queryText
            }
        } catch {
            $workspaceErrors.Add("Cluster $($cluster.name) query '$($entry.Name)' failed: $(Remove-Credentials -Text ([string]$_.Exception.Message))") | Out-Null
            continue
        }

        $rows = @()
        if ($queryResponse.PSObject.Properties['Results'] -and $queryResponse.Results) {
            $rows = @($queryResponse.Results)
        } elseif ($queryResponse.PSObject.Properties['Value'] -and $queryResponse.Value) {
            $rows = @($queryResponse.Value)
        }

        foreach ($row in $rows) {
            switch ($entry.Name) {
                'over-cpu' {
                    $pct = [double]($row.observedPct ?? 0)
                    $rec = [double]($row.recommendedMillicores ?? 0)
                    $title = "Over-provisioned pod $($row.Namespace)/$($row.WorkloadName): P95 CPU $([math]::Round($pct, 1))% of limit (recommend $([math]::Round($rec, 0))m)"
                    $detail = "Container '$($row.ContainerName)' in cluster '$($cluster.name)' has P95 CPU below 20% of limit over ${LookbackDays}d."
                    Add-RightsizingFinding -Cluster $cluster -Category 'OverProvisionedCpu' -Severity 'Medium' -Compliant $false -Title $title -Detail $detail -Remediation 'Reduce CPU limit/request for this workload or raise HPA target utilization.' -Namespace ([string]$row.Namespace) -WorkloadName ([string]$row.WorkloadName) -ContainerName ([string]$row.ContainerName) -MetricType 'cpu' -ObservedPercent $pct -RecommendedMillicores $rec -WorkspaceId $workspaceId -QueryName $queryName -QueryText $queryText -ToolVersion $toolVersion
                }
                'under-cpu' {
                    $pct = [double]($row.observedPct ?? 0)
                    $title = "Under-provisioned pod $($row.Namespace)/$($row.WorkloadName): P95 CPU $([math]::Round($pct, 1))% of limit"
                    $detail = "Container '$($row.ContainerName)' in cluster '$($cluster.name)' is at or above 95% CPU limit over ${LookbackDays}d."
                    Add-RightsizingFinding -Cluster $cluster -Category 'UnderProvisionedCpu' -Severity 'High' -Compliant $false -Title $title -Detail $detail -Remediation 'Increase CPU limit/request or reduce load with HPA.' -Namespace ([string]$row.Namespace) -WorkloadName ([string]$row.WorkloadName) -ContainerName ([string]$row.ContainerName) -MetricType 'cpu' -ObservedPercent $pct -WorkspaceId $workspaceId -QueryName $queryName -QueryText $queryText -ToolVersion $toolVersion
                }
                'over-memory' {
                    $pct = [double]($row.observedPct ?? 0)
                    $rec = [double]($row.recommendedMemoryMiB ?? 0)
                    $title = "Over-provisioned pod $($row.Namespace)/$($row.WorkloadName): P95 memory $([math]::Round($pct, 1))% of limit (recommend $([math]::Round($rec, 0))Mi)"
                    $detail = "Container '$($row.ContainerName)' in cluster '$($cluster.name)' has P95 memory below 20% of limit over ${LookbackDays}d."
                    Add-RightsizingFinding -Cluster $cluster -Category 'OverProvisionedMemory' -Severity 'Medium' -Compliant $false -Title $title -Detail $detail -Remediation 'Reduce memory limit/request for this workload.' -Namespace ([string]$row.Namespace) -WorkloadName ([string]$row.WorkloadName) -ContainerName ([string]$row.ContainerName) -MetricType 'memory' -ObservedPercent $pct -RecommendedMemoryMiB $rec -WorkspaceId $workspaceId -QueryName $queryName -QueryText $queryText -ToolVersion $toolVersion
                }
                'under-memory' {
                    $pct = [double]($row.observedPct ?? 0)
                    $title = "Under-provisioned pod $($row.Namespace)/$($row.WorkloadName): P95 memory $([math]::Round($pct, 1))% of limit"
                    $detail = "Container '$($row.ContainerName)' in cluster '$($cluster.name)' is at or above 95% memory limit over ${LookbackDays}d."
                    Add-RightsizingFinding -Cluster $cluster -Category 'UnderProvisionedMemory' -Severity 'High' -Compliant $false -Title $title -Detail $detail -Remediation 'Increase memory limit/request and review memory leaks.' -Namespace ([string]$row.Namespace) -WorkloadName ([string]$row.WorkloadName) -ContainerName ([string]$row.ContainerName) -MetricType 'memory' -ObservedPercent $pct -WorkspaceId $workspaceId -QueryName $queryName -QueryText $queryText -ToolVersion $toolVersion
                }
                'missing-hpa' {
                    $title = "Static replicas for $($row.Namespace)/$($row.ControllerName): candidate for HPA"
                    $detail = "Replica pattern remained static (min=max=$($row.replicasMin)) over ${LookbackDays}d in cluster '$($cluster.name)'."
                    Add-RightsizingFinding -Cluster $cluster -Category 'MissingHpa' -Severity 'Info' -Compliant $false -Title $title -Detail $detail -Remediation 'Create an HPA policy for this workload and tune min/max replicas.' -Namespace ([string]$row.Namespace) -WorkloadName ([string]$row.ControllerName) -WorkspaceId $workspaceId -QueryName $queryName -QueryText $queryText -ToolVersion $toolVersion
                }
                'oomkilled' {
                    $title = "Pod OOMKilled $($row.Namespace)/$($row.WorkloadName): increase memory limit"
                    $detail = "Container '$($row.ContainerName)' reported '$($row.ContainerStatusReason)' in cluster '$($cluster.name)'."
                    Add-RightsizingFinding -Cluster $cluster -Category 'OomKilled' -Severity 'High' -Compliant $false -Title $title -Detail $detail -Remediation 'Increase memory limit/request and investigate memory growth.' -Namespace ([string]$row.Namespace) -WorkloadName ([string]$row.WorkloadName) -ContainerName ([string]$row.ContainerName) -MetricType 'memory' -WorkspaceId $workspaceId -QueryName $queryName -QueryText $queryText -ToolVersion $toolVersion
                }
            }
        }
    }

    if ($OutputPath) {
        try {
            if (-not (Test-Path $OutputPath)) { New-Item -ItemType Directory -Path $OutputPath -Force | Out-Null }
            $sanitizedClusterName = ([string]$cluster.name) -replace '[^A-Za-z0-9._-]', '_'
            $rawOut = Join-Path $OutputPath "aks-rightsizing-$sanitizedClusterName-$workspaceName.json"
            $clusterFindings = @($findings | Where-Object { $_.ClusterName -eq $cluster.name })
            Set-Content -Path $rawOut -Value (Remove-Credentials ($clusterFindings | ConvertTo-Json -Depth 20)) -Encoding UTF8
        } catch {
            Write-Verbose "Failed writing rightsizing raw output for $($cluster.name): $(Remove-Credentials -Text ([string]$_.Exception.Message))"
        }
    }
}

$result.Findings = @($findings)
$result.ToolVersion = $toolVersion
if ($workspaceErrors.Count -gt 0 -and $findings.Count -gt 0) {
    $result.Status = 'PartialSuccess'
} elseif ($workspaceErrors.Count -gt 0 -and $findings.Count -eq 0) {
    $result.Status = 'Failed'
}

$baseMessage = "Scanned $clusterCount AKS cluster(s) over ${LookbackDays} day(s); emitted $($findings.Count) rightsizing finding(s)."
if ($workspaceErrors.Count -gt 0) {
    $result.Message = "$baseMessage Workspace/query errors: $($workspaceErrors -join ' | ')"
} else {
    $result.Message = $baseMessage
}

return [PSCustomObject]$result