Scripts/pipeline.ps1

<#
.SYNOPSIS
Pipline file used to manage notebooks
.DESCRIPTION
Files used to push and pull notebooks from a local directory.
.PARAMETER environment
mandatory. name of one of the files stored in config/stages
.PARAMETER email
optional. email address filed under 'users' on workspace
email is fetched via LDAP query, so need to be connected to the VPN for it to be fetched if not specifying.
email variable is stored as an environment variable, so can be disconnected from VPN and can be re-run without being specified.
.PARAMETER branch
optional. name of the git branch being worked against
.PARAMETER pullNoteBooksFromWorkspace
optional. Downloads files from value of 'dataBricksPath' in config file to value of 'localoutputpath'.
.PARAMETER pushNoteBooksToWorkspace
optional. Publishes files to value of 'dataBricksPath' in config file from value of 'localoutputpath'.
.PARAMETER configFile
optional. Sets location of config file
.PARAMETER jobsFolder
optional. Sets location of folder that contains jobs to be deployed to environment.
.PARAMETER BearerToken
optional. Your Databricks Bearer token to authenticate to your workspace (see User Settings in Databricks WebUI)
.PARAMETER clearDataBricksPath
optional. Will remove all contents of the root of the dataBricksPath prior to pushing the local notebooks. Only works with pushNoteBooksToWorkspace
.PARAMETER clearLocalPath
optional. Will remove all contents of the root of the LocalPath prior to puling the local notebooks. Only works with pullNoteBooksFromWorkspace
.PARAMETER PullJobsFromWorkspace
optional. Will pull jobs from workspace and store them in the Jobs folder, which is et by parameter $jobsFolder.
Jobs are identified from their jobId. Store ints as an array in the config EG "JobIds" : [123, 345, 4356]. These jobIds are environment-specific.
NOTE - only hte settings are saved.
.PARAMETER PushJobsFromWorkspace
optional. Will push all the jobs that are stored in the environment-based $jobsFolder.
.PARAMETER returnJobDetails
optional. Will return the detils of the job when publishing them
.EXAMPLE
Will overwrite branch name in config file and push Notebooks to directory.
& ".\adb.cicd.tools\pipeline.ps1" -environment localdev -branch fakebranch -pushNoteBooksToWorkspace
 
Will overwrite branch name in config file and pull Notebooks from Workspace..
& ".\adb.cicd.tools\pipeline.ps1" -environment localdev -branch fakebranch -pullNoteBooksFromWorkspace
 
Will pull notebooks form default dataBricksPath
& ".\adb.cicd.tools\pipeline.ps1" -environment localdev -pullNoteBooksFromWorkspace
 
Will run notebook tests
& ".\adb.cicd.tools\pipeline.ps1" -environment localdev -pullNoteBooksFromWorkspace -runNotebookTests -email richie.lee@effem.com
 
get jobs
& ".\adb.cicd.tools\pipeline.ps1" -environment localdev -PullJobsFromWorkspace -email richie.lee@effem.com
 
useful commands I tend to run
 
& ".\adb.cicd.tools\pipeline.ps1" -environment localdev -email firstname.lastname@effem.com -pushNoteBooksToWorkspace
 
& ".\adb.cicd.tools\pipeline.ps1" -environment localdev -email firstname.lastname@effem.com -PullJobsFromWorkspace
 
.NOTES
Author: Richie Lee
#>

#Requires -Version 5.1
param(
    [Parameter(Mandatory = $true)]
    [string] $environment,
    [string] $email,
    [string] $branch,
    # [string] $notebookConfigEnvironment,
    [switch] $pullNoteBooksFromWorkspace,
    [switch] $PushNoteBooksToWorkspace,
    [string] $configFile = "$PSScriptroot\config\stages\$environment\$environment.json",
    [string] $jobsFolder = "$PSScriptroot\config\stages\$environment\jobs\",
    [string] $bearerToken,
   
    [switch] $clearDataBricksPath,
    [switch] $clearLocalPath,
    [switch] $PullJobsFromWorkspace,
    [switch] $PushJobsToWorkspace,
    [switch] $returnJobDetails,
    [switch] $getSecretScopes,
    [switch] $setSecrets
)
$ErrorActionPreference = "Stop"



[PSCustomObject]$config = Get-Content -Raw -Path $configFile -Encoding UTF8 | ConvertFrom-Json 

$localoutputPath = Split-Path -Path $PSScriptroot -Parent
$localoutputPath = Join-Path $localoutputPath $config.localOutputPath
if ((Test-Path $localoutputPath) -eq $false) { 
    "Creating directory $localoutputPath"
    New-Item -Path $localoutputPath -ItemType Directory -Force
}
$localoutputpath = Resolve-Path $localoutputpath -ErrorAction Stop
Write-Host "Local notebooks folder resolved to $localoutputPath"
if ($null -eq $localoutputPath) {
    Write-Error "variable localoutputpath still set to null somehow"
    Throw
}

if ($environment -eq 'localdev') {
    $config = Set-LocalDevBranch -branch $branch -config $config
    $config = Set-LocalDevEmail -email $email -config $config
}

if ([string]::IsNullOrEmpty($bearerToken)) {
    $myBearerTokenFile = ".\myBearerToken.txt"
    $myBearerTokenFileExists = Test-Path $myBearerTokenFile
    If ($myBearerTokenFileExists -eq $True) {
        $BearerToken = Get-Content -Path $myBearerTokenFile -Raw
    }
    else {
        Write-Error "$myBearerTokenFile does not exist. Create and add a bearerToken from workspace to file."
        Throw
    }
}

if ($config.dataBricksPath -eq '/Users/__email__/__branch__') {
    Write-Warning "dataBricksPath in config is set to default dataBricksPath - this may be incorrect and could result in overwriting other users work!"
}



$config | Format-List

if ($getSecretScopes) {
    $workspaceScopes = Get-SecretScopes  -BearerToken $bearerToken -Config $config
    $workspaceScopes
}

if ($setSecrets) {
    if ($environment -eq 'localdev') {
        $databricksSecrets = Import-Csv -Path ".\secrets.csv"
        foreach ($s in $databricksSecrets) {
            Set-DatabricksSecret -BearerToken $bearerToken -region $config.region -ScopeName $s.scopeName -SecretName $s.SecretName -SecretValue $s.SecretValue -AllUserAccess
        }
    }
}

if ($pullNotebooksFromWorkspace) {  
    if (($config.deleteLocallyIfNotInWorkspace -eq $true) -and ($clearLocalPath)) {
        Write-Error "deleteLocallyIfNotInWorkspace in config set to true and switch clearLocalPath is enabled. Options are mutually exclusive!"
        Throw
    }
    elseif ($config.deleteLocallyIfNotInWorkspace -eq $true) {
        Write-Host "Delete notebooks locally if they do not exist on Workspace."
        Remove-LocalNotebooks -localPath $localoutputPath -config $config
    }
    elseif ($clearLocalPath) {
        Get-ChildItem -Path $localoutputpath -Recurse | ForEach-Object { $_.Delete() }
    } 
    #download from notebooks
    Export-WorkspaceNotebooks -region $config.region -localOutputPath $localOutputPath -exportPath $config.dataBricksPath -bearerToken $bearerToken
}

if ($PushNotebooksToWorkspace) {
    if (($config.deleteOnWorkspaceIfNotInLocal -eq $true) -and ($clearDataBricksPath)) {
        Write-Error "deleteOnWorkspaceIfNotInLocal in config set to true and switch clearDataBricksPath is enabled. Options are mutually exclusive!"
        Throw
    }
    elseif ($config.deleteOnWorkspaceIfNotInLocal -eq $true) {
        Write-Host "Delete notebooks in workspace if they do not exist locally."
        $folderContents = Get-WorkspaceFolderContents -region $config.region -Path $config.dataBricksPath -bearerToken $bearerToken
        Remove-WorkspaceNotebooks -FolderContents $folderContents -localPath $localoutputPath -region $config.region -Path $config.dataBricksPath -bearerToken $bearerToken
    }
    elseif ($clearDataBricksPath) {
        Write-Verbose "Clearing out $($config.dataBricksPath) before uploading new version."
        Remove-DatabricksNotebook -BearerToken $BearerToken -Region $config.region -Path $config.dataBricksPath -Recursive
    }    
    #publish to workspace
    if ($returnJobDetails) {
        Import-WorkspaceNotebooks -region $config.region -localPath $localOutputPath -dataBricksPath $config.dataBricksPath -bearerToken $bearerToken -returnJobDetails
    }
    else {
        Import-WorkspaceNotebooks -region $config.region -localPath $localOutputPath -dataBricksPath $config.dataBricksPath -bearerToken $bearerToken
    }

}

if ($PullJobsFromWorkspace) {
    Export-WorkspaceJobs  -config $config -bearerToken $bearerToken -jobIds $config.JobIds -LocalOutputPath $jobsFolder
}

if ($PushJobsToWorkspace) {
    #jobs
    #loop through any jobs that are stored in the jobsFolder and publish
    #if the job already exists it will update the job on the workspace
    #libraries do not have to exist for them to be included, just pointed to the right location
    $jobFiles = Get-ChildItem $jobsFolder -Recurse
    ForEach ($jobFile in $jobFiles) {
        $jobSettings = Get-Content $jobFile.FullName
        Write-Verbose "Working on $($jobFile.FullName)..."
        Import-WorkspaceJobs  -config $config -jobSettings ($jobSettings | ConvertFrom-Json) -BearerToken $bearerToken
    }
}