modules/sinks/Send-FindingsToLogAnalytics.ps1

#Requires -Version 7.4
<#
.SYNOPSIS
    Send findings and entities to Azure Monitor Logs Ingestion API.
.DESCRIPTION
    Reads entities.json output from azure-analyzer, derives finding rows from
    entity observations, and sends findings/entities to configured DCR streams.
    Uses Invoke-WithRetry for transient failures and enforces HTTPS endpoints.
#>

[CmdletBinding()]
param ()

Set-StrictMode -Version Latest
$ErrorActionPreference = 'Stop'

$sharedDir = Join-Path $PSScriptRoot '..' 'shared'
foreach ($sharedModule in @('Sanitize', 'Retry', 'Errors')) {
    $sharedPath = Join-Path $sharedDir "$sharedModule.ps1"
    if (Test-Path $sharedPath) { . $sharedPath }
}
if (-not (Get-Command Remove-Credentials -ErrorAction SilentlyContinue)) {
    function Remove-Credentials { param ([string]$Text) return $Text }
}
if (-not (Get-Command Invoke-WithRetry -ErrorAction SilentlyContinue)) {
    function Invoke-WithRetry { param([scriptblock]$ScriptBlock) & $ScriptBlock }
}
foreach ($required in @('New-FindingError', 'Format-FindingErrorMessage')) {
    if (-not (Get-Command $required -ErrorAction SilentlyContinue)) {
        throw "Send-FindingsToLogAnalytics requires modules/shared/Errors.ps1 (missing command: $required). Ensure the shared/Errors.ps1 module is present and loadable before invoking the sink."
    }
}

function New-SinkFindingError {
    [CmdletBinding()]
    param (
        [Parameter(Mandatory)]
        [string] $Category,

        [Parameter(Mandatory)]
        [string] $Reason,

        [Parameter(Mandatory)]
        [string] $Remediation,

        [string] $Details = ''
    )

    $findingError = New-FindingError `
        -Source 'sink:log-analytics' `
        -Category $Category `
        -Reason $Reason `
        -Remediation $Remediation `
        -Details (Remove-Credentials ([string]$Details))

    return (Format-FindingErrorMessage $findingError)
}

$script:MaxIngestionBodyBytes = 1MB
$script:MaxIngestionRecordsPerBatch = 1500

function Test-LogAnalyticsEndpoint {
    [CmdletBinding()]
    param (
        [Parameter(Mandatory)]
        [string] $DceEndpoint
    )

    if ($DceEndpoint -notmatch '^https://') {
        throw (New-SinkFindingError `
            -Category 'InvalidParameter' `
            -Reason 'DceEndpoint must use HTTPS.' `
            -Remediation 'Provide a Logs Ingestion endpoint starting with https://.' `
            -Details "Received: $DceEndpoint")
    }

    try {
        $uri = [System.Uri]$DceEndpoint
    } catch {
        throw (New-SinkFindingError `
            -Category 'InvalidParameter' `
            -Reason 'DceEndpoint is not a valid URI.' `
            -Remediation 'Provide a valid Logs Ingestion endpoint URI.' `
            -Details "Input: $DceEndpoint. Error: $_")
    }

    if ($uri.Scheme -ne 'https') {
        throw (New-SinkFindingError `
            -Category 'InvalidParameter' `
            -Reason 'DceEndpoint must use HTTPS.' `
            -Remediation 'Use an https:// Logs Ingestion endpoint.' `
            -Details "Received scheme: $($uri.Scheme)")
    }
}

function Read-EntitiesFromJson {
    [CmdletBinding()]
    param (
        [Parameter(Mandatory)]
        [string] $EntitiesJson
    )

    if (-not (Test-Path $EntitiesJson)) {
        throw (New-SinkFindingError `
            -Category 'NotFound' `
            -Reason 'EntitiesJson file was not found.' `
            -Remediation 'Pass a valid path to entities.json generated by Invoke-AzureAnalyzer.' `
            -Details "Path: $EntitiesJson")
    }

    try {
        $parsed = Get-Content -Path $EntitiesJson -Raw | ConvertFrom-Json -ErrorAction Stop
    } catch {
        throw (New-SinkFindingError `
            -Category 'ConfigurationError' `
            -Reason 'Failed to parse entities JSON.' `
            -Remediation 'Validate that entities.json is valid JSON and re-run the sink.' `
            -Details "$_")
    }

    # Issue #187 / B3: entities.json envelope went from bare array (v3.0) to
    # { SchemaVersion, Entities, Edges } (v3.1). Sniff the shape here so the LA
    # sink continues to ingest entities (not the envelope) for both versions.
    if ($parsed -is [pscustomobject] -and $parsed.PSObject.Properties['Entities']) {
        return @($parsed.Entities)
    }
    return @($parsed)
}

function Get-LogAnalyticsAccessToken {
    [CmdletBinding()]
    param ()

    try {
        $token = Get-AzAccessToken -ResourceUrl 'https://monitor.azure.com/' -ErrorAction Stop
    } catch {
        throw (New-SinkFindingError `
            -Category 'AuthenticationFailed' `
            -Reason 'Failed to acquire Azure Monitor token via Get-AzAccessToken.' `
            -Remediation 'Run Connect-AzAccount with access to Azure Monitor Logs Ingestion and retry.' `
            -Details "$_")
    }

    $resolvedToken = if ($token.PSObject.Properties['Token']) { [string]$token.Token } else { '' }
    if ([string]::IsNullOrWhiteSpace($resolvedToken)) {
        throw (New-SinkFindingError `
            -Category 'AuthenticationFailed' `
            -Reason 'Get-AzAccessToken returned an empty token.' `
            -Remediation 'Refresh your Azure session and ensure token acquisition is allowed for https://monitor.azure.com/.')
    }

    return $resolvedToken
}

function Convert-ToLogAnalyticsScalar {
    [CmdletBinding()]
    param (
        [Parameter()]
        [AllowNull()]
        [object] $Value
    )

    if ($null -eq $Value) { return $null }
    if ($Value -is [string] -or $Value -is [bool] -or $Value -is [int] -or $Value -is [long] -or $Value -is [double] -or $Value -is [decimal]) {
        return $Value
    }
    if ($Value -is [datetime]) {
        return $Value.ToUniversalTime().ToString('o')
    }
    if ($Value -is [System.Collections.IEnumerable] -and $Value -isnot [string]) {
        return ($Value | ConvertTo-Json -Depth 30 -Compress)
    }
    return ($Value | ConvertTo-Json -Depth 30 -Compress)
}

function Convert-FindingToLogAnalyticsRecord {
    [CmdletBinding()]
    param (
        [Parameter(Mandatory)]
        [pscustomobject] $Finding
    )

    $runId = if ($Finding.PSObject.Properties['Provenance'] -and $Finding.Provenance -and $Finding.Provenance.PSObject.Properties['RunId']) {
        [string]$Finding.Provenance.RunId
    } else {
        ''
    }
    $provenanceSource = if ($Finding.PSObject.Properties['Provenance'] -and $Finding.Provenance -and $Finding.Provenance.PSObject.Properties['Source']) {
        [string]$Finding.Provenance.Source
    } else {
        ''
    }
    $provenanceTimestamp = if ($Finding.PSObject.Properties['Provenance'] -and $Finding.Provenance -and $Finding.Provenance.PSObject.Properties['Timestamp']) {
        [string]$Finding.Provenance.Timestamp
    } else {
        ''
    }
    $provenanceRawRecordRef = if ($Finding.PSObject.Properties['Provenance'] -and $Finding.Provenance -and $Finding.Provenance.PSObject.Properties['RawRecordRef']) {
        [string]$Finding.Provenance.RawRecordRef
    } else {
        ''
    }

    [PSCustomObject]@{
        TimeGenerated          = if ($provenanceTimestamp) { $provenanceTimestamp } else { (Get-Date).ToUniversalTime().ToString('o') }
        RunId                  = $runId
        FindingId              = [string]$Finding.Id
        Source                 = [string]$Finding.Source
        Category               = [string]$Finding.Category
        Title                  = [string]$Finding.Title
        Severity               = [string]$Finding.Severity
        Compliant              = [bool]$Finding.Compliant
        Detail                 = [string]$Finding.Detail
        Remediation            = [string]$Finding.Remediation
        ResourceId             = [string]$Finding.ResourceId
        LearnMoreUrl           = [string]$Finding.LearnMoreUrl
        EntityId               = [string]$Finding.EntityId
        EntityType             = [string]$Finding.EntityType
        Platform               = [string]$Finding.Platform
        SubscriptionId         = [string]$Finding.SubscriptionId
        SubscriptionName       = [string]$Finding.SubscriptionName
        ResourceGroup          = [string]$Finding.ResourceGroup
        ManagementGroupPath    = Convert-ToLogAnalyticsScalar -Value $Finding.ManagementGroupPath
        Frameworks             = Convert-ToLogAnalyticsScalar -Value $Finding.Frameworks
        Controls               = Convert-ToLogAnalyticsScalar -Value $Finding.Controls
        Confidence             = [string]$Finding.Confidence
        EvidenceCount          = if ($Finding.PSObject.Properties['EvidenceCount']) { [int]$Finding.EvidenceCount } else { 0 }
        MissingDimensions      = Convert-ToLogAnalyticsScalar -Value $Finding.MissingDimensions
        SchemaVersion          = [string]$Finding.SchemaVersion
        ProvenanceSource       = $provenanceSource
        ProvenanceTimestamp    = $provenanceTimestamp
        ProvenanceRawRecordRef = $provenanceRawRecordRef
    }
}

function Convert-EntityToLogAnalyticsRecord {
    [CmdletBinding()]
    param (
        [Parameter(Mandatory)]
        [pscustomobject] $Entity
    )

    $observations = @($Entity.Observations)
    $runIds = @($observations | ForEach-Object {
            if ($_.PSObject.Properties['Provenance'] -and $_.Provenance -and $_.Provenance.PSObject.Properties['RunId']) {
                [string]$_.Provenance.RunId
            }
        } | Where-Object { -not [string]::IsNullOrWhiteSpace($_) } | Select-Object -Unique)

    [PSCustomObject]@{
        TimeGenerated       = (Get-Date).ToUniversalTime().ToString('o')
        RunId               = ($runIds -join ',')
        FindingId           = ''
        EntityId            = [string]$Entity.EntityId
        EntityType          = [string]$Entity.EntityType
        Platform            = [string]$Entity.Platform
        DisplayName         = [string]$Entity.DisplayName
        SubscriptionId      = [string]$Entity.SubscriptionId
        SubscriptionName    = [string]$Entity.SubscriptionName
        ResourceGroup       = [string]$Entity.ResourceGroup
        ManagementGroupPath = Convert-ToLogAnalyticsScalar -Value $(if ($Entity.PSObject.Properties['ManagementGroupPath']) { $Entity.ManagementGroupPath } else { @() })
        WorstSeverity       = if ($Entity.PSObject.Properties['WorstSeverity']) { [string]$Entity.WorstSeverity } else { '' }
        CompliantCount      = if ($Entity.PSObject.Properties['CompliantCount']) { [int]$Entity.CompliantCount } else { 0 }
        NonCompliantCount   = if ($Entity.PSObject.Properties['NonCompliantCount']) { [int]$Entity.NonCompliantCount } else { 0 }
        ObservationCount    = $observations.Count
        Sources             = Convert-ToLogAnalyticsScalar -Value $(if ($Entity.PSObject.Properties['Sources']) { $Entity.Sources } else { @() })
        Frameworks          = Convert-ToLogAnalyticsScalar -Value $(if ($Entity.PSObject.Properties['Frameworks']) { $Entity.Frameworks } else { @() })
        Controls            = Convert-ToLogAnalyticsScalar -Value $(if ($Entity.PSObject.Properties['Controls']) { $Entity.Controls } else { @() })
        Confidence          = if ($Entity.PSObject.Properties['Confidence']) { [string]$Entity.Confidence } else { '' }
        MissingDimensions   = Convert-ToLogAnalyticsScalar -Value $(if ($Entity.PSObject.Properties['MissingDimensions']) { $Entity.MissingDimensions } else { @() })
        EntityRecord        = Convert-ToLogAnalyticsScalar -Value $Entity
    }
}

function New-LogAnalyticsBatches {
    [CmdletBinding()]
    param (
        [object[]] $Records = @()
    )

    if (-not $Records -or $Records.Count -eq 0) { return @() }

    $batches = [System.Collections.Generic.List[object[]]]::new()
    $currentBatch = [System.Collections.Generic.List[object]]::new()
    $currentBytes = 2 # []

    foreach ($record in $Records) {
        $recordJson = $record | ConvertTo-Json -Depth 30 -Compress
        $recordBytes = [System.Text.Encoding]::UTF8.GetByteCount($recordJson)
        if ($recordBytes -gt $script:MaxIngestionBodyBytes) {
            throw (New-SinkFindingError `
                -Category 'InvalidParameter' `
                -Reason 'A single record exceeds the Logs Ingestion payload limit.' `
                -Remediation 'Reduce record size before sending to Log Analytics (for example trim oversized fields).' `
                -Details "RecordBytes=$recordBytes; MaxBytes=$($script:MaxIngestionBodyBytes)")
        }

        $commaBytes = if ($currentBatch.Count -gt 0) { 1 } else { 0 }
        $wouldBeBytes = $currentBytes + $recordBytes + $commaBytes
        $wouldBeCount = $currentBatch.Count + 1
        $overLimit = ($wouldBeCount -gt $script:MaxIngestionRecordsPerBatch) -or ($wouldBeBytes -gt $script:MaxIngestionBodyBytes)

        if ($overLimit -and $currentBatch.Count -gt 0) {
            $batches.Add($currentBatch.ToArray())
            $currentBatch = [System.Collections.Generic.List[object]]::new()
            $currentBytes = 2
            $commaBytes = 0
            $wouldBeBytes = $currentBytes + $recordBytes
        }

        $currentBatch.Add($record)
        $currentBytes = $wouldBeBytes
    }

    if ($currentBatch.Count -gt 0) {
        $batches.Add($currentBatch.ToArray())
    }

    return $batches.ToArray()
}

function Invoke-LogAnalyticsIngestion {
    [CmdletBinding()]
    param (
        [object[]] $Records = @(),

        [Parameter(Mandatory)]
        [string] $DceEndpoint,

        [Parameter(Mandatory)]
        [string] $DcrImmutableId,

        [Parameter(Mandatory)]
        [string] $StreamName,

        [switch] $DryRun,

        [string] $DryRunOutputPath
    )

    if ([string]::IsNullOrWhiteSpace($DceEndpoint)) {
        throw (New-SinkFindingError `
            -Category 'InvalidParameter' `
            -Reason 'DceEndpoint is required.' `
            -Remediation 'Provide a non-empty Data Collection Endpoint URL.')
    }
    if ([string]::IsNullOrWhiteSpace($DcrImmutableId)) {
        throw (New-SinkFindingError `
            -Category 'InvalidParameter' `
            -Reason 'DcrImmutableId is required.' `
            -Remediation 'Provide a non-empty Data Collection Rule immutable ID.')
    }
    if ([string]::IsNullOrWhiteSpace($StreamName)) {
        throw (New-SinkFindingError `
            -Category 'InvalidParameter' `
            -Reason 'StreamName is required.' `
            -Remediation 'Provide a non-empty custom stream name from the DCR.')
    }

    Test-LogAnalyticsEndpoint -DceEndpoint $DceEndpoint
    $endpoint = $DceEndpoint.TrimEnd('/')
    $uri = "$endpoint/dataCollectionRules/$DcrImmutableId/streams/${StreamName}?api-version=2023-01-01"

    $batches = @(New-LogAnalyticsBatches -Records $Records)
    if ($batches.Count -eq 0) {
        return [PSCustomObject]@{
            RecordsProcessed = 0
            BatchesProcessed = 0
            Uri              = $uri
            DryRun           = [bool]$DryRun
        }
    }

    $token = $null
    if (-not $DryRun) {
        $token = Get-LogAnalyticsAccessToken
    }

    $dryRunRows = [System.Collections.Generic.List[object]]::new()
    $batchIndex = 0
    foreach ($batch in $batches) {
        $batchIndex++
        $body = $batch | ConvertTo-Json -Depth 30 -Compress
        $bodyBytes = [System.Text.Encoding]::UTF8.GetByteCount($body)

        if ($DryRun) {
            $dryRunRows.Add([PSCustomObject]@{
                    BatchIndex  = $batchIndex
                    Uri         = $uri
                    StreamName  = $StreamName
                    RecordCount = @($batch).Count
                    BodyBytes   = $bodyBytes
                    Body        = $body
                }) | Out-Null
            continue
        }

        $headers = @{
            Authorization          = "Bearer $token"
            'x-ms-client-request-id' = [guid]::NewGuid().ToString()
        }

        Invoke-WithRetry -ScriptBlock {
            Invoke-RestMethod -Method Post -Uri $uri -Headers $headers -ContentType 'application/json; charset=utf-8' -Body $body -TimeoutSec 300 -ErrorAction Stop | Out-Null
        } | Out-Null
    }

    if ($DryRun -and $DryRunOutputPath) {
        $dryRunJson = $dryRunRows.ToArray() | ConvertTo-Json -Depth 40
        $dryRunJson = Remove-Credentials $dryRunJson
        Set-Content -Path $DryRunOutputPath -Value $dryRunJson -Encoding UTF8
    }

    return [PSCustomObject]@{
        RecordsProcessed = @($Records).Count
        BatchesProcessed = $batches.Count
        Uri              = $uri
        DryRun           = [bool]$DryRun
        DryRunOutputPath = if ($DryRun) { $DryRunOutputPath } else { '' }
    }
}

function Send-FindingsToLogAnalytics {
    [CmdletBinding()]
    param (
        [Parameter(Mandatory)]
        [string] $EntitiesJson,

        [Parameter(Mandatory)]
        [string] $DceEndpoint,

        [Parameter(Mandatory)]
        [string] $DcrImmutableId,

        [Parameter(Mandatory)]
        [string] $StreamName,

        [switch] $DryRun
    )

    $entities = @(Read-EntitiesFromJson -EntitiesJson $EntitiesJson)
    $findings = [System.Collections.Generic.List[object]]::new()
    foreach ($entity in $entities) {
        foreach ($obs in @($entity.Observations)) {
            if ($obs) { $findings.Add($obs) | Out-Null }
        }
    }

    $records = @($findings.ToArray() | ForEach-Object { Convert-FindingToLogAnalyticsRecord -Finding $_ })
    $dryRunOutputPath = Join-Path (Split-Path -Parent $EntitiesJson) 'log-analytics-findings-dryrun.json'
    return Invoke-LogAnalyticsIngestion -Records $records -DceEndpoint $DceEndpoint -DcrImmutableId $DcrImmutableId -StreamName $StreamName -DryRun:$DryRun -DryRunOutputPath $dryRunOutputPath
}

function Send-EntitiesToLogAnalytics {
    [CmdletBinding()]
    param (
        [Parameter(Mandatory)]
        [string] $EntitiesJson,

        [Parameter(Mandatory)]
        [string] $DceEndpoint,

        [Parameter(Mandatory)]
        [string] $DcrImmutableId,

        [Parameter(Mandatory)]
        [string] $StreamName,

        [switch] $DryRun
    )

    $entities = @(Read-EntitiesFromJson -EntitiesJson $EntitiesJson)
    $records = @($entities | ForEach-Object { Convert-EntityToLogAnalyticsRecord -Entity $_ })
    $dryRunOutputPath = Join-Path (Split-Path -Parent $EntitiesJson) 'log-analytics-entities-dryrun.json'
    return Invoke-LogAnalyticsIngestion -Records $records -DceEndpoint $DceEndpoint -DcrImmutableId $DcrImmutableId -StreamName $StreamName -DryRun:$DryRun -DryRunOutputPath $dryRunOutputPath
}