Get-DefaultAlertRules.ps1
function Get-DefaultAlertRules { <# .SYNOPSIS Provide good default alert rules for environments. .DESCRIPTION Provide good default alert rules for environments. This will evolve over time to match future needs and upcoming resource types. This method works as entrypoint to create more specific alerting setup when needed. .EXAMPLE Get-DefaultAlertRules Returns current well known setup of alert defaults. #> [CmdletBinding()] [OutputType([PSCustomObject[]])] Param() @( [PSCustomObject]@{ ResourceType = 'Microsoft.Web/Sites' # Filter is expression that matches resource is it applicaple or not. # This can be used to extend alerts to specific resource instead of all instances of one type. Name = 'Few Server errors' Description = 'Too many server errors!' AlertValidationSteps = @("https://docs.microsoft.com/en-us/azure/azure-monitor/app/tutorial-runtime-exceptions#analyze-failures") AlertFixSteps = @("https://github.com/by-pinja/Pinja.Azure.Alerts/blob/master/doc/WebAppFixes.md") Criteria = { New-AzMetricAlertRuleV2Criteria -MetricName 'Http5xx' -TimeAggregation Total -Operator GreaterThan -Threshold 5 } Severity = "Warning" WindowSize = New-TimeSpan -Minutes 5 Frequency = New-TimeSpan -Minutes 5 } [PSCustomObject]@{ ResourceType = 'Microsoft.Web/Sites' Name = 'Many Server errors' Description = 'Way too many server errors!' AlertValidationSteps = @("https://docs.microsoft.com/en-us/azure/azure-monitor/app/tutorial-runtime-exceptions#analyze-failures") AlertFixSteps = @("https://github.com/by-pinja/Pinja.Azure.Alerts/blob/master/doc/WebAppFixes.md") Criteria = { New-AzMetricAlertRuleV2Criteria -MetricName 'Http5xx' -TimeAggregation Total -Operator GreaterThan -Threshold 100 } Severity = "Critical" WindowSize = New-TimeSpan -Minutes 5 Frequency = New-TimeSpan -Minutes 5 } [PSCustomObject]@{ ResourceType = 'Microsoft.Web/serverFarms' Name = 'CPU percentage' Description = 'CPU Usage too high!' AlertValidationSteps = @("https://docs.microsoft.com/en-us/azure/azure-monitor/app/tutorial-performance") AlertFixSteps = @("https://github.com/by-pinja/Pinja.Azure.Alerts/blob/master/doc/WebAppScaling.md") Criteria = { New-AzMetricAlertRuleV2Criteria -MetricName 'CpuPercentage' -TimeAggregation Average -Operator GreaterThan -Threshold 80 } Severity = "Warning" WindowSize = New-TimeSpan -Minutes 5 Frequency = New-TimeSpan -Minutes 5 } [PSCustomObject]@{ ResourceType = 'Microsoft.Web/serverFarms' Name = 'Memory percentage' Description = 'Memory Usage too high!' AlertValidationSteps = @("https://docs.microsoft.com/en-us/azure/azure-monitor/app/tutorial-performance") AlertFixSteps = @("https://github.com/by-pinja/Pinja.Azure.Alerts/blob/master/doc/WebAppScaling.md#scaling-apps-up") Criteria = { New-AzMetricAlertRuleV2Criteria -MetricName 'MemoryPercentage' -TimeAggregation Average -Operator GreaterThan -Threshold 90 } Severity = "Warning" WindowSize = New-TimeSpan -Minutes 5 Frequency = New-TimeSpan -Minutes 5 } [PSCustomObject]@{ ResourceType = 'Microsoft.Sql/servers/databases' Name = 'CPU Percentage' Description = 'CPU Usage too high!' AlertFixSteps = @("https://github.com/by-pinja/Pinja.Azure.Alerts/blob/master/doc/SqlServerTier.md") Criteria = { New-AzMetricAlertRuleV2Criteria -MetricName 'cpu_percent' -TimeAggregation Average -Operator GreaterThan -Threshold 80 } Severity = "Information" WindowSize = New-TimeSpan -Minutes 5 Frequency = New-TimeSpan -Minutes 5 } [PSCustomObject]@{ ResourceType = 'Microsoft.Sql/servers/databases' Name = 'DTU consumption' Description = 'DTU consumption too high!' AlertFixSteps = @("https://github.com/by-pinja/Pinja.Azure.Alerts/blob/master/doc/SqlServerTier.md") Criteria = { New-AzMetricAlertRuleV2Criteria -MetricName 'dtu_consumption_percent' -TimeAggregation Average -Operator GreaterThan -Threshold 80 } Severity = "Information" WindowSize = New-TimeSpan -Minutes 5 Frequency = New-TimeSpan -Minutes 5 } [PSCustomObject]@{ ResourceType = 'Microsoft.Sql/servers/databases' Name = 'SQL Storage' Description = 'SQL storage space is getting low!' AlertFixSteps = @("https://github.com/by-pinja/Pinja.Azure.Alerts/blob/master/doc/SqlServerStorageSize.md") Criteria = { New-AzMetricAlertRuleV2Criteria -MetricName 'storage_percent' -TimeAggregation Average -Operator GreaterThan -Threshold 80 } Severity = "Error" WindowSize = New-TimeSpan -Minutes 5 Frequency = New-TimeSpan -Minutes 5 } [PSCustomObject]@{ ResourceType = 'Microsoft.Cache/Redis' Name = 'Server load' Description = 'Server load too high!' Criteria = { New-AzMetricAlertRuleV2Criteria -MetricName 'serverLoad' -TimeAggregation Average -Operator GreaterThan -Threshold 80 } Severity = "Information" WindowSize = New-TimeSpan -Minutes 5 Frequency = New-TimeSpan -Minutes 5 } [PSCustomObject]@{ ResourceType = 'Microsoft.Cache/Redis' Name = 'Server memory' Description = 'Server memory percentace too high!' Criteria = { New-AzMetricAlertRuleV2Criteria -MetricName 'usedmemorypercentage' -TimeAggregation Average -Operator GreaterThan -Threshold 80 } Severity = "Information" WindowSize = New-TimeSpan -Minutes 5 Frequency = New-TimeSpan -Minutes 5 } [PSCustomObject]@{ ResourceType = 'Microsoft.Cache/Redis' Name = 'Redis processor' Description = 'Redis processor load too high!' Criteria = { New-AzMetricAlertRuleV2Criteria -MetricName 'percentProcessorTime' -TimeAggregation Average -Operator GreaterThan -Threshold 80 } Severity = "Information" WindowSize = New-TimeSpan -Minutes 5 Frequency = New-TimeSpan -Minutes 5 } [PSCustomObject]@{ ResourceType = 'Microsoft.Insights/webtests' Name = "Availability percent" Description = "Web app didn't respond multiple times in timely manner!" AlertValidationSteps = @("https://docs.microsoft.com/en-us/azure/azure-monitor/app/tutorial-runtime-exceptions#analyze-failures") AlertFixSteps = @("https://github.com/by-pinja/Pinja.Azure.Alerts/blob/master/doc/WebAppFixes.md") Criteria = { New-AzMetricAlertRuleV2DimensionSelection -DimensionName "availabilityResult/name" -ValuesToInclude $($_.Name) | New-AzMetricAlertRuleV2Criteria -MetricName "availabilityResults/availabilityPercentage" -TimeAggregation Average -Operator LessThan -Threshold 90 } Severity = "Critical" WindowSize = New-TimeSpan -Minutes 5 Frequency = New-TimeSpan -Minutes 5 } ) } |