modules/Azure/Discovery/Public/Start-CIEMAzureDiscovery.ps1

function Start-CIEMAzureDiscovery {
    <#
    .SYNOPSIS
        Runs a full Azure discovery scan collecting ARM resources, Entra entities, permissions, and relationships.
    #>

    [CmdletBinding()]
    [OutputType('CIEMAzureDiscoveryRun')]
    param(
        [Parameter()]
        [ValidateSet('All', 'ARM', 'Entra')]
        [string]$Scope = 'All'
    )

    $ErrorActionPreference = 'Stop'

    function Write-PhaseLog {
        param(
            [Parameter(Mandatory)]
            [string]$PhaseName,
            [Parameter(Mandatory)]
            [System.Diagnostics.Stopwatch]$Stopwatch,
            [Parameter()]
            [string]$Detail
        )

        $Stopwatch.Stop()
        $message = "Phase $PhaseName completed in $([math]::Round($Stopwatch.Elapsed.TotalSeconds, 2))s"
        if ($Detail) {
            $message += " - $Detail"
        }
        Write-CIEMLog -Message $message -Component 'Discovery'
    }

    function Save-ResourceTypes {
        param(
            [Parameter(Mandatory)]
            [AllowEmptyCollection()]
            [object[]]$Resources,
            [Parameter(Mandatory)]
            [object]$Connection,
            [Parameter(Mandatory)]
            [string]$DiscoveredAt
        )

        foreach ($typeGroup in @($Resources | Group-Object Type)) {
            if (-not $typeGroup.Name) {
                continue
            }

            $apiSource = if ($typeGroup.Name -match '^microsoft\.') { 'ResourceGraph' } else { 'Graph' }
            $graphTable = if ($typeGroup.Name -match '^microsoft\.resources/') {
                'ResourceContainers'
            }
            elseif ($typeGroup.Name -match '^microsoft\.authorization/') {
                'AuthorizationResources'
            }
            elseif ($apiSource -eq 'Graph') {
                $null
            }
            else {
                'Resources'
            }

            SaveCIEMAzureResourceType -Type $typeGroup.Name -ApiSource $apiSource -GraphTable $graphTable -ResourceCount $typeGroup.Count -DiscoveredAt $DiscoveredAt -Connection $Connection
        }
    }

    if (-not $script:AzureAuthContext -or -not $script:AzureAuthContext.IsConnected) {
        Write-CIEMLog "Start-CIEMAzureDiscovery: No auth context, calling Connect-CIEMAzure..." -Severity INFO -Component 'Discovery'
        Connect-CIEMAzure | Out-Null
    }

    $runningRuns = @(Get-CIEMAzureDiscoveryRun -Status 'Running')
    if ($runningRuns.Count -gt 0) {
        throw "A discovery run is already in progress (Id=$($runningRuns[0].Id)). Wait for it to complete or clear stale runs."
    }

    $run = New-CIEMAzureDiscoveryRun -Scope $Scope -Status 'Running' -StartedAt (Get-Date).ToString('o')
    Write-CIEMLog "Start-CIEMAzureDiscovery: run #$($run.Id) started, Scope=$Scope" -Severity INFO -Component 'Discovery'

    $warningCount = 0
    $errorMessages = [System.Collections.Generic.List[string]]::new()
    $runStart = [DateTimeOffset]::UtcNow.ToUnixTimeSeconds()
    $subscriptionIds = @($script:AzureAuthContext.SubscriptionIds)
    $armDiscoverySucceeded = $Scope -eq 'Entra'
    $entraDiscoverySucceeded = $Scope -eq 'ARM'
    $relationshipsSucceeded = $true
    $armRowCount = 0
    $entraRowCount = 0
    $armTypeCount = 0
    $entraTypeCount = 0
    $relationshipCount = 0

    try {
        if ($Scope -eq 'All' -or $Scope -eq 'ARM') {
            if ($subscriptionIds.Count -eq 0) {
                throw 'Start-CIEMAzureDiscovery requires at least one subscription ID for ARM discovery.'
            }

            $armDiscoverySucceeded = $true
            $armResources = [System.Collections.Generic.List[object]]::new()
            $armCollectTimer = [Diagnostics.Stopwatch]::StartNew()
            Write-Progress -Activity 'Azure Discovery' -Status 'Collecting ARM resources' -PercentComplete 10

            foreach ($table in @('Resources', 'ResourceContainers', 'AuthorizationResources')) {
                try {
                    $results = @(InvokeCIEMResourceGraphQuery -Query $table -SubscriptionId $subscriptionIds)
                    $armResources.AddRange($results)
                    Write-CIEMLog "ResourceGraph/${table}: $($results.Count) rows" -Component 'Discovery'
                }
                catch {
                    $armDiscoverySucceeded = $false
                    $warningCount++
                    $message = "ResourceGraph/${table} failed: $($_.Exception.Message)"
                    $errorMessages.Add($message)
                    Write-Warning $message
                }
            }

            try {
                $builtInRoles = @(GetCIEMBuiltInRoleDefinitions)
                $armResources.AddRange($builtInRoles)
                Write-CIEMLog "BuiltInRoleDefinitions: $($builtInRoles.Count) rows" -Component 'Discovery'
            }
            catch {
                $armDiscoverySucceeded = $false
                $warningCount++
                $message = "BuiltInRoleDefinitions failed: $($_.Exception.Message)"
                $errorMessages.Add($message)
                Write-Warning $message
            }

            $armRowCount = $armResources.Count
            $armTypeCount = (@($armResources | Group-Object Type)).Count
            Write-PhaseLog -PhaseName 'ARM collection' -Stopwatch $armCollectTimer -Detail "$armRowCount rows"

            $armPersistTimer = [Diagnostics.Stopwatch]::StartNew()
            InvokeCIEMTransaction {
                param($conn)

                if ($armResources.Count -gt 0) {
                    foreach ($resource in $armResources) {
                        $resource.LastSeenAt = $runStart
                    }

                    Save-CIEMAzureArmResource -InputObject $armResources -Connection $conn
                    Save-ResourceTypes -Resources $armResources -Connection $conn -DiscoveredAt (Get-Date).ToString('o')
                }

                if ($armDiscoverySucceeded) {
                    Invoke-PSUSQLiteQuery -Connection $conn -Query 'DELETE FROM azure_arm_resources WHERE last_seen_at < @last_seen_at' -Parameters @{ last_seen_at = $runStart } -AsNonQuery | Out-Null
                }
            }
            Write-PhaseLog -PhaseName 'ARM persistence' -Stopwatch $armPersistTimer -Detail "$armRowCount rows"
        }

        $entraResources = [System.Collections.Generic.List[object]]::new()
        $entraPermissions = [System.Collections.Generic.List[object]]::new()
        $relationships = [System.Collections.Generic.List[object]]::new()

        if ($Scope -eq 'All' -or $Scope -eq 'Entra') {
            $entraDiscoverySucceeded = $true

            $entityCollectTimer = [Diagnostics.Stopwatch]::StartNew()
            Write-Progress -Activity 'Azure Discovery' -Status 'Collecting Entra entities' -PercentComplete 55
            try {
                $entities = @(InvokeCIEMEntraEntityCollection)
                $entraResources.AddRange($entities)
                Write-CIEMLog "Entra entities: $($entities.Count) rows" -Component 'Discovery'
            }
            catch {
                $entraDiscoverySucceeded = $false
                $warningCount++
                $message = "EntraEntityCollection failed: $($_.Exception.Message)"
                $errorMessages.Add($message)
                Write-Warning $message
            }
            Write-PhaseLog -PhaseName 'Entra entity collection' -Stopwatch $entityCollectTimer -Detail "$($entraResources.Count) rows"

            $collectedServicePrincipals = @($entraResources | Where-Object { $_.Type -eq 'servicePrincipal' })
            if ($collectedServicePrincipals.Count -gt 0) {
                $permissionCollectTimer = [Diagnostics.Stopwatch]::StartNew()
                try {
                    $permissions = @(InvokeCIEMEntraPermissionCollection -ServicePrincipals $collectedServicePrincipals)
                    $entraPermissions.AddRange($permissions)
                    Write-CIEMLog "Entra permissions: $($permissions.Count) rows" -Component 'Discovery'
                }
                catch {
                    $entraDiscoverySucceeded = $false
                    $warningCount++
                    $message = "EntraPermissionCollection failed: $($_.Exception.Message)"
                    $errorMessages.Add($message)
                    Write-Warning $message
                }
                Write-PhaseLog -PhaseName 'Entra permission collection' -Stopwatch $permissionCollectTimer -Detail "$($entraPermissions.Count) rows"
            }

            $entraRowCount = $entraResources.Count + $entraPermissions.Count
            $entraTypeCount = (@((@($entraResources) + @($entraPermissions)) | Group-Object Type)).Count

            $entraPersistTimer = [Diagnostics.Stopwatch]::StartNew()
            InvokeCIEMTransaction {
                param($conn)

                if ($entraResources.Count -gt 0) {
                    foreach ($resource in $entraResources) {
                        $resource.LastSeenAt = $runStart
                    }
                    Save-CIEMAzureEntraResource -InputObject $entraResources -Connection $conn
                }

                if ($entraPermissions.Count -gt 0) {
                    foreach ($resource in $entraPermissions) {
                        $resource.LastSeenAt = $runStart
                    }
                    Save-CIEMAzureEntraResource -InputObject $entraPermissions -Connection $conn
                }

                Save-ResourceTypes -Resources (@($entraResources) + @($entraPermissions)) -Connection $conn -DiscoveredAt (Get-Date).ToString('o')

                if ($entraDiscoverySucceeded) {
                    Invoke-PSUSQLiteQuery -Connection $conn -Query 'DELETE FROM azure_entra_resources WHERE last_seen_at < @last_seen_at' -Parameters @{ last_seen_at = $runStart } -AsNonQuery | Out-Null
                }
            }
            Write-PhaseLog -PhaseName 'Entra persistence' -Stopwatch $entraPersistTimer -Detail "$entraRowCount rows"

            $collectedGroups = @($entraResources | Where-Object { $_.Type -eq 'group' })
            $collectedRoles = @($entraResources | Where-Object { $_.Type -eq 'directoryRole' })
            $collectedUsers = @($entraResources | Where-Object { $_.Type -eq 'user' })

            if ($collectedGroups.Count -gt 0 -or $collectedRoles.Count -gt 0 -or $collectedUsers.Count -gt 0) {
                $relationshipCollectTimer = [Diagnostics.Stopwatch]::StartNew()
                try {
                    $relationships.AddRange(@(
                        InvokeCIEMEntraRelationshipCollection -Groups $collectedGroups -DirectoryRoles $collectedRoles -Users $collectedUsers
                    ))
                    $relationshipCount = $relationships.Count
                    Write-CIEMLog "Entra relationships: $relationshipCount rows" -Component 'Discovery'
                }
                catch {
                    $relationshipsSucceeded = $false
                    $warningCount++
                    $message = "EntraRelationshipCollection failed: $($_.Exception.Message)"
                    $errorMessages.Add($message)
                    Write-Warning $message
                }
                Write-PhaseLog -PhaseName 'Entra relationship collection' -Stopwatch $relationshipCollectTimer -Detail "$relationshipCount rows"
            }
        }

        if ($relationshipsSucceeded -and $relationships.Count -gt 0) {
            $relationshipPersistTimer = [Diagnostics.Stopwatch]::StartNew()
            InvokeCIEMTransaction {
                param($conn)
                Remove-CIEMAzureResourceRelationship -All -Connection $conn -Confirm:$false
                Save-CIEMAzureResourceRelationship -InputObject $relationships -Connection $conn
            }
            Write-PhaseLog -PhaseName 'Relationship persistence' -Stopwatch $relationshipPersistTimer -Detail "$relationshipCount rows"
        }

        $loadTimer = [Diagnostics.Stopwatch]::StartNew()
        $allArmResources = @(Get-CIEMAzureArmResource)
        $allEntraResources = @(Get-CIEMAzureEntraResource)
        $allRelationships = @(Get-CIEMAzureResourceRelationship)
        Write-PhaseLog -PhaseName 'Build data load' -Stopwatch $loadTimer -Detail "$($allArmResources.Count) ARM, $($allEntraResources.Count) Entra, $($allRelationships.Count) relationships"

        $eraTimer = [Diagnostics.Stopwatch]::StartNew()
        InvokeCIEMTransaction {
            param($conn)
            Remove-CIEMAzureEffectiveRoleAssignment -All -Confirm:$false -Connection $conn
            $eraCount = InvokeCIEMAzureEffectiveRoleAssignmentBuild -ArmResources $allArmResources -EntraResources $allEntraResources -Relationships $allRelationships -Connection $conn -ComputedAt (Get-Date).ToString('o')
            Write-CIEMLog "EffectiveRoleAssignments: $eraCount rows inserted" -Component 'Discovery'
        }
        Write-PhaseLog -PhaseName 'ERA build' -Stopwatch $eraTimer

        $graphTimer = [Diagnostics.Stopwatch]::StartNew()
        InvokeCIEMTransaction {
            param($conn)
            Remove-CIEMGraphEdge -All -Confirm:$false -Connection $conn
            Remove-CIEMGraphNode -All -Confirm:$false -Connection $conn

            $collectedAt = (Get-Date).ToString('o')
            $nodeCount = InvokeCIEMGraphNodeBuild -ArmResources $allArmResources -EntraResources $allEntraResources -Connection $conn -CollectedAt $collectedAt
            Write-CIEMLog "GraphNodes: $nodeCount nodes created" -Component 'Discovery'

            $collectedEdgeCount = InvokeCIEMGraphEdgeBuild -Relationships $allRelationships -Connection $conn -CollectedAt $collectedAt
            Write-CIEMLog "GraphEdges: $collectedEdgeCount collected edges created" -Component 'Discovery'

            $computedEdgeCount = InvokeCIEMGraphComputedEdgeBuild -ArmResources $allArmResources -EntraResources $allEntraResources -Relationships $allRelationships -Connection $conn -CollectedAt $collectedAt
            Write-CIEMLog "GraphEdges: $computedEdgeCount computed edges created" -Component 'Discovery'
        }
        Write-PhaseLog -PhaseName 'Graph build' -Stopwatch $graphTimer

        $allArmResources = $null
        $allEntraResources = $null
        $allRelationships = $null

        $totalCollected = $armRowCount + $entraRowCount
        $finalStatus = if ($warningCount -gt 0 -and $totalCollected -gt 0) {
            'Partial'
        }
        elseif ($totalCollected -eq 0) {
            'Failed'
        }
        else {
            'Completed'
        }

        $run = Update-CIEMAzureDiscoveryRun -Id $run.Id `
            -Status $finalStatus `
            -CompletedAt (Get-Date).ToString('o') `
            -ArmTypeCount $armTypeCount `
            -ArmRowCount $armRowCount `
            -EntraTypeCount $entraTypeCount `
            -EntraRowCount $entraRowCount `
            -WarningCount $warningCount `
            -ErrorMessage ($errorMessages -join '; ') `
            -PassThru

        Write-CIEMLog "Discovery run #$($run.Id) finished: Status=$finalStatus, ARM=$armRowCount, Entra=$entraRowCount, Relationships=$relationshipCount, Warnings=$warningCount" -Severity INFO -Component 'Discovery'
        Write-Progress -Activity 'Azure Discovery' -Completed
        $run
    }
    catch {
        $errorMessage = $_.Exception.Message
        Write-CIEMLog "Discovery run #$($run.Id) FAILED: $errorMessage" -Severity ERROR -Component 'Discovery'
        Update-CIEMAzureDiscoveryRun -Id $run.Id -Status 'Failed' -CompletedAt (Get-Date).ToString('o') -ErrorMessage $errorMessage | Out-Null
        throw
    }
}