
# TODO: need login?
# NOTE: This function is called as a script block by Start-ThreadJob AND THUS
function Add-AcmVm {
    [Parameter(Mandatory = $true)]
    [object] $vm,

    [Parameter(Mandatory = $true)]
    [string] $storageAccountName,

    [Parameter(Mandatory = $true)]
    [string] $storageAccountRG,

    # NOTE: [switch] type can't be passed in an argument list, which is required by Start-ThreadJob.
    [bool] $useExistingAgent = $false

  Write-Host "Enable MSI for VM $($vm.Name)"
  if ($vm.Identity -eq $null -or !($vm.Identity.Type -contains "SystemAssigned")) {
    Write-Host "Executing for VM $($vm.Name)"
    Update-AzVM -ResourceGroupName $vm.ResourceGroupName -VM $vm -IdentityType "SystemAssigned"
  else {
    Write-Host "The VM $($vm.Name) already has an System Assigned Identity"

  # Update $vm for new $vm.Identity.PrincipalId
  $vm = Get-AzVM -Name $vm.Name -ResourceGroupName $vm.ResourceGroupName

  Write-Host "Add role 'reader' to VM $($vm.Name)"
  try {
    New-AzRoleAssignment -ObjectId $vm.Identity.PrincipalId -RoleDefinitionName "Reader" -ResourceGroupName $vm.ResourceGroupName
  catch {
    if ($_ -contains 'already exists') {
      Write-Host "The VM $($vm.Name) already has role 'Reader' on resouce group $($vm.ResourceGroupName)"
    else {

  Write-Host "Add role 'Storage Account Contributor' to VM $($vm.Name)"
  try {
    New-AzRoleAssignment -ObjectId $vm.Identity.PrincipalId -RoleDefinitionName "Storage Account Contributor" -ResourceName $storageAccountName -ResourceType "Microsoft.Storage/storageAccounts" -ResourceGroupName $storageAccountRG
  catch {
    Write-Host $_
    if ($_ -contains 'already exists') {
      Write-Host "The VM $($vm.Name) already has role 'Storage Account Contributor' on storage account $($storageAccountName)"
    else {

  Write-Host "Install HpcAcmAgent for VM $($vm.Name)"
  $hasExistingAgent = $false
  if ($useExistingAgent) {
    $extensions = $vm.Extensions
    if ($extensions) {
      for ($i = 0; $i -lt $extensions.Count; $i++) {
        if ($extensions[$i].Id -like '*/extensions/HpcAcmAgent') {
          $hasExistingAgent = $true
    Write-Host "VM $($vm.Name) has existing agent: $($hasExistingAgent)"
  else {
    Write-Host "Try to remove existing agent from VM $($vm.Name)"
    try {
      Remove-AzVMExtension -ResourceGroupName $vm.ResourceGroupName -VMName $vm.Name -Name "HpcAcmAgent" -Force
    catch {}

  if (!$hasExistingAgent) {
    if ($vm.OSProfile.LinuxConfiguration) {
      $extesionType = "HpcAcmAgent"
    else {
      # Suppose there're only Linux and Windows
      $extesionType = "HpcAcmAgentWin"
    Set-AzVMExtension -Publisher "Microsoft.HpcPack" -ExtensionType $extesionType -ResourceGroupName $vm.ResourceGroupName `
      -TypeHandlerVersion 1.0 -VMName $vm.Name -Location $vm.Location -Name "HpcAcmAgent"

function Remove-AcmVm {
    [Parameter(Mandatory = $true)]
    [object] $vm,

    [Parameter(Mandatory = $true)]
    [string] $storageAccountName,

    [Parameter(Mandatory = $true)]
    [string] $storageAccountRG

  Write-Host "Uninstall HpcAcmAgent for VM $($vm.Name)"
  try {
    Remove-AzVMExtension -ResourceGroupName $vm.ResourceGroupName -VMName $vm.Name -Name "HpcAcmAgent" -Force
  catch {
    Write-Host "Caught exception: $($_)"

  Write-Host "Remove role 'Storage Account Contributor' from VM $($vm.Name)"
  try {
    Remove-AzRoleAssignment -ObjectId $vm.Identity.PrincipalId -RoleDefinitionName "Storage Account Contributor" -ResourceName $storageAccountName -ResourceType "Microsoft.Storage/storageAccounts" -ResourceGroupName $storageAccountRG
  catch {
    Write-Host "Caught exception: $($_)"

  Write-Host "Remove role 'reader' from VM $($vm.Name)"
  try {
    Remove-AzRoleAssignment -ObjectId $vm.Identity.PrincipalId -RoleDefinitionName "Reader" -ResourceGroupName $vm.ResourceGroupName
  catch {
    Write-Host "Caught exception: $($_)"

  Write-Host "Disable MSI for VM $($vm.Name)"
  if ($vm.Identity -and $vm.Identity.Type -contains "SystemAssigned") {
    try {
      Update-AzVM -ResourceGroupName $vm.ResourceGroupName -VM $vm -IdentityType "None"
    catch {
      Write-Host "Caught exception: $($_)"

function Add-AcmVmScaleSet {
    [Parameter(Mandatory = $true)]
    [object] $vmss,

    [Parameter(Mandatory = $true)]
    [string] $storageAccountName,

    [Parameter(Mandatory = $true)]
    [string] $storageAccountRG,

    # NOTE: [switch] type can't be passed in an argument list, which is required by Start-ThreadJob.
    [bool] $useExistingAgent = $false

  Write-Host "Enable MSI for VM Scale Set $($vmss.Name)"
  if ($vmss.Identity -eq $null -or !($vmss.Identity.Type -contains "SystemAssigned")) {
    Write-Host "Executing for VMSS $($vmss.Name)"
    Update-AzVmss -ResourceGroupName $vmss.resourceGroupName -VMScaleSetName $vmss.Name -IdentityType "SystemAssigned"
  else {
    Write-Host "The VMSS $($vmss.Name) already has an System Assigned Identity"

  # Update $vm for new $vm.Identity.PrincipalId
  $vmss = Get-AzVmss -Name $vmss.Name -ResourceGroupName $vmss.ResourceGroupName

  Write-Host "Add role 'reader' to VMSS $($vmss.Name)"
  try {
    New-AzRoleAssignment -ObjectId $vmss.Identity.PrincipalId -RoleDefinitionName "Reader" -ResourceGroupName $vmss.ResourceGroupName
  catch {
    if ($_ -contains 'already exists') {
      Write-Host "The VMSS $($vmss.Name) already has role 'Reader' on resouce group $($vmss.ResourceGroupName)"
    else {

  Write-Host "Add role 'Storage Account Contributor' to VMSS $($vmss.Name)"
  try {
    New-AzRoleAssignment -ObjectId $vmss.Identity.PrincipalId -RoleDefinitionName "Storage Account Contributor" -ResourceName $storageAccountName -ResourceType "Microsoft.Storage/storageAccounts" -ResourceGroupName $storageAccountRG
  catch {
    if ($_ -contains 'already exists') {
      Write-Host "The VMSS $($vmss.Name) already has role 'Storage Account Contributor' on storage account $($storageAccountName)"
    else {

  Write-Host "Install HpcAcmAgent for VM Scale Set $($vmss.Name)"
  $hasExistingAgent = $false
  if ($useExistingAgent) {
    $extensions = $vmss.VirtualMachineProfile.ExtensionProfile.Extensions
    if ($extensions) {
      for ($i = 0; $i -lt $extensions.Count; $i++) {
        if ($extensions[$i].Name -eq 'HpcAcmAgent') {
          $hasExistingAgent = $true
    Write-Host "VM scale set $($vmss.Name) has existing agent: $($hasExistingAgent)"
  else {
    Write-Host "Try to remove existing agent from VM scale set $($vmss.Name)"
    try {
      Remove-AzVmssExtension -VirtualMachineScaleSet $vmss -Name "HpcAcmAgent"
      Update-AzVmss -ResourceGroupName $vmss.ResourceGroupName -VMScaleSetName $vmss.Name -VirtualMachineScaleSet $vmss
      Update-AzVmssInstance -ResourceGroupName $vmss.ResourceGroupName -VMScaleSetName $vmss.Name -InstanceId "*"
    catch {}

  if (!$hasExistingAgent) {
    if ($vmss.VirtualMachineProfile.OsProfile.LinuxConfiguration) {
      $extesionType = "HpcAcmAgent"
    else {
      # Suppose there're only Linux and Windows
      $extesionType = "HpcAcmAgentWin"
    Add-AzVmssExtension -VirtualMachineScaleSet $vmss -Name "HpcAcmAgent" -Publisher "Microsoft.HpcPack" `
      -Type $extesionType -TypeHandlerVersion 1.0
    Update-AzVmss -ResourceGroupName $vmss.ResourceGroupName -VMScaleSetName $vmss.Name -VirtualMachineScaleSet $vmss
    Update-AzVmssInstance -ResourceGroupName $vmss.ResourceGroupName -VMScaleSetName $vmss.Name -InstanceId "*"

function Remove-AcmVmScaleSet {
    [Parameter(Mandatory = $true)]
    [object] $vmss,

    [Parameter(Mandatory = $true)]
    [string] $storageAccountName,

    [Parameter(Mandatory = $true)]
    [string] $storageAccountRG

  Write-Host "Uninstall HpcAcmAgent for VM Scale Set $($vmss.Name)"
  try {
    Remove-AzVmssExtension -VirtualMachineScaleSet $vmss -Name "HpcAcmAgent"
    Update-AzVmss -ResourceGroupName $vmss.ResourceGroupName -VMScaleSetName $vmss.Name -VirtualMachineScaleSet $vmss
    Update-AzVmssInstance -ResourceGroupName $vmss.ResourceGroupName -VMScaleSetName $vmss.Name -InstanceId "*"
  catch {
    Write-Host "Caught exception: $($_)"

  Write-Host "Remove role 'Storage Account Contributor' from VMSS $($vmss.Name)"
  try {
    Remove-AzRoleAssignment -ObjectId $vmss.Identity.PrincipalId -RoleDefinitionName "Storage Account Contributor" -ResourceName $storageAccountName -ResourceType "Microsoft.Storage/storageAccounts" -ResourceGroupName $storageAccountRG
  catch {
    Write-Host "Caught exception: $($_)"

  Write-Host "Remove role 'reader' from VMSS $($vmss.Name)"
  try {
    Remove-AzRoleAssignment -ObjectId $vmss.Identity.PrincipalId -RoleDefinitionName "Reader" -ResourceGroupName $vmss.ResourceGroupName
  catch {
    Write-Host "Caught exception: $($_)"

  Write-Host "Disable MSI for VMSS $($vmss.Name)"
  if ($vmss.Identity -and $vmss.Identity.Type -contains "SystemAssigned") {
    try {
      Update-AzVmss -ResourceGroupName $vmss.resourceGroupName -VMScaleSetName $vmss.Name -IdentityType "None"
    catch {
      Write-Host "Caught exception: $($_)"

function Set-AcmClusterTag {
    [Parameter(Mandatory = $true)]
    [string] $ResourceGroup,

    [Parameter(Mandatory = $true)]
    [string] $StorageAccountName,

    [Parameter(Mandatory = $true)]
    [string] $StorageAccountRG

  $rg = Get-AzResourceGroup -Name $ResourceGroup
  $tags = $rg.Tags
  $key = "StorageConfiguration"
  $value = "{ `"AccountName`": `"$($StorageAccountName)`", `"ResourceGroup`":`"$($StorageAccountRG)`" }"
  if ($tags -eq $null) {
    $tags = @{ "$key" = "$value" }
  else {
    $tags[$key] = $value
  Set-AzResourceGroup -Tags $tags -Name $ResourceGroup

function Reset-AcmClusterTag {
    [Parameter(Mandatory = $true)]
    [string] $ResourceGroup

  $rg = Get-AzResourceGroup -Name $ResourceGroup
  $tags = $rg.Tags
  if ($tags) {
    $key = "StorageConfiguration"
    Set-AzResourceGroup -Tags $tags -Name $ResourceGroup

function Login {
  $ErrorActionPreference = 'Stop'

  $needLogin = $true
  Try {
    $content = Get-AzContext
    if ($content) {
      $needLogin = ([string]::IsNullOrEmpty($content.Account))
  Catch {
    if ($_ -like "*Login-AzAccount to login*") {
      $needLogin = $true
    else {

  if ($needLogin) {

function Prepare-AcmAzureCtx {
  Select-AzSubscription -SubscriptionId $SubscriptionId

function ShowProgress {
  param($startTime, $timeout, $activity, $status, $op, $id, $pid)

  $now = Get-Date
  $elapsed = ($now - $startTime).TotalSeconds
  $args = @{
    Activity = $activity
    PercentComplete = $elapsed * 100 / $timeout
    SecondsRemaining = $timeout - $elapsed
  if ($id) {
    $args['Id'] = $id
  if ($pid) {
    $args['ParentId'] = $pid
  if ($status) {
    $args['Status'] = $status
  if ($op) {
    $args['CurrentOperation'] = $op
  Write-Progress @args

function HideProgress {
  Write-Progress -Activity "END" -Completed -Id $id

function Wait-AcmJob {
  param($jobs, $startTime, $timeout, $activity, $progId)

  $ids = $jobs.foreach('id')
  while ($true) {
    $elapsed = ($(Get-Date) - $startTime).TotalSeconds
    if ($elapsed -ge $Timeout) {
    # TODO: Optimize counting?
    $doneJobCount = $(Get-Job -Id $ids).where({ $_.state -in 'Completed', 'Failed', 'Stopped' }).Count
    if ($doneJobCount -eq $ids.Count) {
    ShowProgress $startTime $timeout $activity -Status "Waiting jobs to finish...." `
      -Op "Completed jobs: $($doneJobCount)/$($jobs.Count)" -Id $progId

    # NOTE: DO NOT simply
    # Receive-Job $jobs
    # because that will implicitly add the output to the return value and thus
    # pollute the caller's return value.

    $output = Receive-Job $jobs
    if ($output) {
      Write-Host $output
    Start-Sleep 1
  $output = Receive-Job $jobs
  if ($output) {
    Write-Host $output

function Remove-AcmJob {

  # Remove-Job somtimes don't return even with "-Force". So do it in another job and forget it.
  Start-ThreadJob -ScriptBlock {
    Stop-Job -Id $ids
    Remove-Job -Force -Id $ids
  } -ArgumentList $ids | Out-Null

function CollectResult {
  param($names, $jobs)

  $result = @()
  for ($idx = 1; $idx -lt $names.Length; $idx++) {
    $result += [PSCustomObject]@{
      Name = $names[$idx]
      Completed = $jobs[$idx].State -eq 'Completed'
      JobId = $jobs[$idx].Id
  return $result

function OutputResult {

  $result |
    Sort-Object -Property Completed, Name |
    Format-Table -Property @{Name = 'VM/VM Scale Set'; Expression = {$_.Name}}, Completed, JobId -Wrap |

  if ($result.Count -gt 0) {
    $completed = $result.where({ $_.Completed }).Count
    $summary = [PSCustomObject]@{
      Total = $result.Count
      Completed = $completed
      Percent = "$('{0:0.00}' -f ($completed * 100 / $result.Count))%"
    $summary | Format-Table -Property Total, Completed, Percent -Wrap | Out-Default

function Initialize-AcmCluster {
    [Parameter(Mandatory = $true)]
    [string] $SubscriptionId,

    [Parameter(Mandatory = $true)]
    [string] $ResourceGroup,

    [Parameter(Mandatory = $true)]
    [string] $AcmResourceGroup,

    [int] $Timeout,

    # NOTE: Do not change the default value and do not provide a bigger one,
    # as Start-ThreadJob won't accept a value > 50 and will raise an error.
    [int] $ConcurrentLimit = 50,

    [switch] $RetainJobs,

    [switch] $Return,

    [switch] $UseExistingAgent,

    [switch] $Uninitialize

  $startTime = Get-Date
  if ($Uninitialize) {
    $activity = 'Removing cluster from ACM service...'
  else {
    $activity = 'Adding cluster to ACM service...'

  $basetime = 360 # Max time to add one VM/VM scale set
  if (!$Timeout) {
    # timelimit will be recomputed later based on number of vms
    $timelimit = $basetime
  else {
    $timelimit = $Timeout

  ShowProgress $startTime $timelimit $activity -Status "Login to Azure..." -id 1
  Prepare-AcmAzureCtx $SubscriptionId | Out-Null

  ShowProgress $startTime $timelimit $activity -Status "Preparing for jobs..." -id 1

  $jobs = @()
  $names = @($null)
  $acmRg = Get-AzResourceGroup -Name $AcmResourceGroup
  $storageAccount = (Get-AzStorageAccount -ResourceGroupName $acmRg.ResourceGroupName)[0]
  # TODO: Filter out only running vms, but what about VM scale set?
  $vms = Get-AzVm -ResourceGroupName $ResourceGroup
  $vmssSet = Get-AzVmss -ResourceGroupName $ResourceGroup

  if (!$Timeout) {
    $total = $vms.Count + $vmssSet.Count
    $timelimit = $basetime * ([math]::Truncate($total / $ConcurrentLimit))
    if (($total % $ConcurrentLimit) -gt 0) {
      $timelimit += $basetime

  ShowProgress $startTime $timelimit $activity -Status "Starting jobs..." -id 1

  # Configure storage information for the resource group
  if ($Uninitialize) {
    $jobs += Start-ThreadJob -ScriptBlock ${function:Reset-AcmClusterTag} -ArgumentList $ResourceGroup
  else {
    $jobs += Start-ThreadJob -ScriptBlock ${function:Set-AcmClusterTag} `
      -ArgumentList $ResourceGroup, $storageAccount.StorageAccountName, $storageAccount.ResourceGroupName

  # Register each vm and vm scale set to ACM
  foreach ($vm in $vms) {
    $args = $vm, $storageAccount.StorageAccountName, $storageAccount.ResourceGroupName
    if ($Uninitialize) {
      $func = ${function:Remove-AcmVm}
    else {
      $func = ${function:Add-AcmVm}
      $args += $UseExistingAgent
    $jobs += Start-ThreadJob -ThrottleLimit $ConcurrentLimit -ScriptBlock $func -ArgumentList $args
    $names += $vm.Name
  foreach ($vmss in $vmssSet) {
    $args = $vmss, $storageAccount.StorageAccountName, $storageAccount.ResourceGroupName
    if ($Uninitialize) {
      $func = ${function:Remove-AcmVmScaleSet}
    else {
      $func = ${function:Add-AcmVmScaleSet}
      $args += $UseExistingAgent
    $jobs += Start-ThreadJob -ThrottleLimit $ConcurrentLimit -ScriptBlock $func -ArgumentList $args
    $names += $vmss.Name

  Wait-AcmJob $jobs $startTime $timelimit $activity -ProgId 1

  if (!$RetainJobs) {
    ShowProgress $startTime $timelimit $activity -Status "Cleaning jobs..." -id 1
    $ids = $jobs.foreach('Id')
    Remove-AcmJob $ids
  HideProgress 1

  $result = CollectResult $names $jobs
  OutputResult $result
  if ($Return) {
    return $result

function Add-AcmCluster {
Add an Azure cluster of VMs/VM scale sets to ACM.

.PARAMETER ResourceGroup
The name of an Azure resource group containing the VMs/VM scale sets to test.

.PARAMETER AcmResourceGroup
The name of an Azure resource group containing the ACM service.

.PARAMETER SubscriptionId
The ID of an Azure subscription containing both the ResourceGroup and AcmResourceGroup.

The timeout value for adding cluster to Acm. By default, an estimated value will be set based on the number of VMs/VM scale sets in a cluster. A value shorter than necesssary will fail the setup procedure. You could specify a larger value to ensure the success of setup.

.PARAMETER UseExistingAgent
When adding a VM to ACM, use existing HPC ACM agent if any. By default, exising agent will be uninstalled before installing. This is to ensure the newest version is installed and may also fix problems of a bad installation. But it takes longer time. This switch may save some time on VM setup by reusing existing agent, but has the risk of reusing a bad agent.

Do not remove PowerShell jobs after. This is for checking the job state for debug purpose.

Return the result. By default, the function returns nothing.

Add-AcmCluster -SubscriptionId a486e243-747b-42de-8c4c-379f8295a746 -ResourceGroup 'my-cluster-1' -AcmResourceGroup 'my-acm-cluster'
Add a cluster of VMs/VM scale sets to ACM.

    [Parameter(Mandatory = $true)]
    [string] $SubscriptionId,

    [Parameter(Mandatory = $true)]
    [string] $ResourceGroup,

    [Parameter(Mandatory = $true)]
    [string] $AcmResourceGroup,

    [int] $Timeout,

    [switch] $UseExistingAgent,

    [switch] $RetainJobs,

    [switch] $Return
  Initialize-AcmCluster @PSBoundParameters

function Remove-AcmCluster {
Remove an Azure cluster of VMs/VM scale sets from ACM.

.PARAMETER ResourceGroup
The name of an Azure resource group containing the VMs/VM scale sets to test.

.PARAMETER AcmResourceGroup
The name of an Azure resource group containing the ACM service.

.PARAMETER SubscriptionId
The ID of an Azure subscription containing both the ResourceGroup and AcmResourceGroup.

The timeout value for adding cluster to Acm. By default, an estimated value will be set based on the number of VMs/VM scale sets in a cluster. A value shorter than necesssary will fail the setup procedure. You could specify a larger value to ensure the success of setup.

Do not remove PowerShell jobs after. This is for checking the job state for debug purpose.

Return the result. By default, the function returns nothing.

Remove-AcmCluster -SubscriptionId a486e243-747b-42de-8c4c-379f8295a746 -ResourceGroup 'my-cluster-1' -AcmResourceGroup 'my-acm-cluster'
Remove a cluster of VMs/VM scale sets from ACM.

    [Parameter(Mandatory = $true)]
    [string] $SubscriptionId,

    [Parameter(Mandatory = $true)]
    [string] $ResourceGroup,

    [Parameter(Mandatory = $true)]
    [string] $AcmResourceGroup,

    [int] $Timeout,

    [switch] $RetainJobs,

    [switch] $Return
  Initialize-AcmCluster @PSBoundParameters -Uninitialize

function Wait-AcmDiagnosticJob {
  param($job, $conn, $startTime, $timeout, $activity, $status, $progId)

  $finished = $false
  while ($true) {
    $elapsed = ($(Get-Date) - $startTime).TotalSeconds
    if ($elapsed -ge $Timeout) {
    if ($job.State -in "Finished", "Failed", "Canceled") {
      $finished = $true
    $op = "Diagnostic job state: $($job.State)"
    ShowProgress $startTime $timeout $activity $status $op $progId
    $job = Get-AcmDiagnosticJob -Id $job.Id -Connection $conn
    Start-Sleep 1
  return $finished

Test Azure cluster of VMs/VM scale sets in ACM.

The URL of ACM web service. The value can be found by the result of Get-AcmAppInfo.

The issuer URL of ACM web service, may be empty if the ACM web service is not protected by Azure AD. The value can be found by the result of Get-AcmAppInfo.

The client id of ACM web service, may be empty if the ACM web service is not protected by Azure AD. The value can be found by the result of Get-AcmAppInfo.

.PARAMETER ClientSecret
The client secret of ACM web service, may be empty if the ACM web service is not protected by Azure AD. The value can be found by the result of Get-AcmAppInfo.

The timeout value for performing test on cluster. By default, an estimated value will be set based on the number of nodes in a cluster. A value shorter than necesssary will cause no test result, since the test can't complete without enough time. You could specify a larger value to ensure the test to complete.

Return the result. By default, the function returns nothing.

  $app = Get-AcmAppInfo -SubscriptionId 'my-id' -ResourceGroup 'my-group'; Test-AcmCluster @app

function Test-AcmCluster {
    [Parameter(Mandatory = $true)]
    [string] $ApiBasePoint,

    [string] $IssuerUrl,

    [string] $ClientId,

    [string] $ClientSecret,

    [int] $Timeout,

    [switch] $Return

  $startTime = Get-Date
  $activity = "Testing cluster in ACM service..."

  # The meanings of basetime and basesize are:
  # every basesize number of nodes requires basetime to run
  $basetime = 600
  $basesize = 80

  if (!$Timeout) {
    # timelimit will be recomputed later based on number of test nodes
    $timelimit = $basetime
  else {
    $timelimit = $Timeout

  $status = "Connecting to ACM service..."
  Write-Host $status
  ShowProgress $startTime $timelimit $activity -Status $status -id 1

  $args = @{
    ApiBasePoint = $ApiBasePoint
  # Allow unauthenticated access if the ACM service allows.
  if (![string]::IsNullOrEmpty($IssuerUrl)) {
    $args['IssuerUrl'] = $IssuerUrl
    $args['ClientId'] = $ClientId
    $args['ClientSecret'] = $ClientSecret
  $conn = Connect-Acm @args

  $status = "Getting ACM nodes..."
  Write-Host $status
  ShowProgress $startTime $timelimit $activity -Status $status -id 1

  $nodes = Get-AcmNode -Connection $conn -Count 100000
  $nodesInTest = $nodes.where({ $_.Health -eq 'OK' -and $_.State -eq 'Online' })
  $linuxNodeNames = $nodesInTest.where({ $_.NodeRegistrationInfo.DistroInfo -like '*Linux*' }).foreach('Name')
  $winNodeNames = $nodesInTest.where({ $_.NodeRegistrationInfo.DistroInfo -like '*Windows*' }).foreach('Name')
  $names = $linuxNodeNames + $winNodeNames

  if ($names.Count -gt 0) {
    if (!$Timeout) {
      # Recompute timelimit based on node number.
      $timelimit = [Math]::Truncate($names.Count / $basesize) * $basetime
      if ($names.Count % $basesize -gt 0) {
        $timelimit += $basetime
      $timelimit += 60 # Additional time for installation of prerequisites

    # First, install necessary tools
    if ($linuxNodeNames.Count -gt 0) {
      $status = "Installing test prerequisites on Linux nodes..."
      Write-Host $status
      ShowProgress $startTime $timelimit $activity -Status $status -id 1

      $job = Start-AcmDiagnosticJob -Connection $conn -Nodes $linuxNodeNames -Category 'Prerequisite' -Name 'Intel MPI Installation'
      $finished = Wait-AcmDiagnosticJob $job $conn $startTime $timelimit $activity $status -progId 1
      if (!$finished) {
        throw "Linux prerequisite installation timed out. Job id: $($job.id)"

    if ($winNodeNames.Count -gt 0) {
      $status = "Installing test prerequisites on Windows nodes..."
      Write-Host $status
      ShowProgress $startTime $timelimit $activity -Status $status -id 1

      $job = Start-AcmDiagnosticJob -Connection $conn -Nodes $winNodeNames -Category 'Prerequisite' -Name 'Microsoft MPI Installation'
      $finished = Wait-AcmDiagnosticJob $job $conn $startTime $timelimit $activity $status -progId 1
      if (!$finished) {
        throw "Windows prerequisite installation timed out. Job id: $($job.id)"

    # Then, do test
    $status = "Performing test on nodes..."
    Write-Host $status
    ShowProgress $startTime $timelimit $activity -Status $status -id 1

    $job = Start-AcmDiagnosticJob -Connection $conn -Nodes $names -Category 'MPI' -Name 'Pingpong'
    $finished = Wait-AcmDiagnosticJob $job $conn $startTime $timelimit $activity $status -progId 1
    if (!$finished) {
      throw "Test job timed out. Job id: $($job.id)"

    # Finally, get aggreation result
    $status = "Fetching test aggregation result..."
    Write-Host $status
    ShowProgress $startTime $timelimit $activity -Status $status -id 1

    $testResult = Get-AcmDiagnosticJobAggregationResult -Connection $conn -Id $job.Id
    $testResult = ConvertFrom-JsonNewtonsoft $testResult.ToString()

    # NOTE: Conversion to [string[]] is required, otherwise creating object will fail as
    # it can't find a proper constructor for HashSet type.
    $goodNodes = New-Object -TypeName System.Collections.Generic.HashSet[string] `
      -ArgumentList @(, ($testResult['GoodNodes'] -as [string[]]))
    $goodCount = $goodNodes.Count
  else {
    $goodNodes = $null
    $goodCount = 0

  HideProgress 1

  Write-Host "Generating result..."

  $nodes = $nodes.foreach({
    $val = [ordered]@{
      Name = $_.Name
      InTest = $_.Health -eq 'OK' -and $_.State -eq 'Online'
    if ($goodNodes -ne $null) {
      $val['Good'] = $goodNodes.Contains($_.Name)
    else {
      $val['Good'] = $null

  $nodes | Sort-Object -Property InTest, Good, Name |
    Format-Table -Wrap -Property `
      @{Name = 'Node'; Expression = {$_.Name}}, `
      @{Name = 'Good for Test'; Expression = {$_.InTest}}, `
      @{Name = 'Good in MPI Pingpong'; Expression = {$_.Good}} | Out-Default

  if ($nodes.Count -gt 0) {
    $summary = [PSCustomObject]@{
      Total = $nodes.Count
      Good = $goodCount
      Percent = "$('{0:0.00}' -f ($goodCount * 100 / $nodes.Count))%"
    $summary | Format-Table -Property Total, Good, Percent -Wrap | Out-Default

  if ($Return) {
    return $nodes

# TODO: optional param: app name
function Get-AcmAppInfo {
Get ACM app/service info for use of Test-AcmCluster.

.PARAMETER ResourceGroup
The name of an Azure resource group containing the ACM service.

.PARAMETER SubscriptionId
The ID of an Azure subscription containing the ResourceGroup.

    [Parameter(Mandatory = $true)]
    [string] $ResourceGroup,

    [Parameter(Mandatory = $true)]
    [string] $SubscriptionId

  $ErrorActionPreference = 'Stop'

  Prepare-AcmAzureCtx $SubscriptionId | Out-Null

  $app = $(Get-AzWebApp -ResourceGroupName $ResourceGroup)[0]
  $config = Invoke-AzResourceAction -ApiVersion 2016-08-01 -Action list `
    -ResourceGroupName $app.ResourceGroup `
    -ResourceType Microsoft.Web/sites/config `
    -ResourceName "$($app.Name)/authsettings" -Force
  $auth = $config.properties
  return @{
    'IssuerUrl' = $auth.issuer
    'ClientId' = $auth.clientId
    'ClientSecret' = $auth.clientSecret
    'ApiBasePoint' = "https://$($app.DefaultHostName)/v1"

function New-AcmTest {
Add an Azure cluster of VMs/VM scale sets to ACM and perform MPI Pingpong test on it.

.PARAMETER ResourceGroup
The name of an Azure resource group containing the VMs/VM scale sets to test.

.PARAMETER AcmResourceGroup
The name of an Azure resource group containing the ACM service.

.PARAMETER SubscriptionId
The ID of an Azure subscription containing both the ResourceGroup and AcmResourceGroup.

.PARAMETER SetupTimeout
The timeout value for adding cluster to Acm. By default, an estimated value will be set based on the number of VMs/VM scale sets in a cluster. A value shorter than necesssary will fail the setup procedure. You could specify a larger value to ensure the success of setup.

.PARAMETER TestTimeout
The timeout value for performing test on cluster. By default, an estimated value will be set based on the number of nodes in a cluster. A value shorter than necesssary will cause no test result, since the test can't complete without enough time. You could specify a larger value to ensure the test to complete.

Do not add cluster to ACM but only do test on it. This is for repeated test on a cluster that already has been added to ACM.

.PARAMETER UseExistingAgent
When adding a VM to ACM, use existing HPC ACM agent if any. By default, exising agent will be uninstalled before installing. This is to ensure the newest version is installed and may also fix problems of a bad installation. But it takes longer time. This switch may save some time on VM setup by reusing existing agent, but has the risk of reusing a bad agent.

New-AcmTest -SubscriptionId a486e243-747b-42de-8c4c-379f8295a746 -ResourceGroup 'my-cluster-1' -AcmResourceGroup 'my-acm-cluster'
Perform test on a cluster of VMs/VM scale sets that has not been added to ACM before.

New-AcmTest -SubscriptionId a486e243-747b-42de-8c4c-379f8295a746 -ResourceGroup 'my-cluster-1' -AcmResourceGroup 'my-acm-cluster' -NoSetup
Perform test on a cluster of VMs/VM scale sets that has been added to ACM already.

    [Parameter(Mandatory = $true)]
    [string] $ResourceGroup,

    [Parameter(Mandatory = $true)]
    [string] $AcmResourceGroup,

    [Parameter(Mandatory = $true)]
    [string] $SubscriptionId,

    [int] $SetupTimeout,

    [int] $TestTimeout,

    [switch] $NoSetup,

    [switch] $UseExistingAgent

  if (!$NoSetup) {
    Write-Host "Adding cluster to ACM service..."
    $args = @{
      SubscriptionId = $SubscriptionId
      ResourceGroup = $ResourceGroup
      AcmResourceGroup = $AcmResourceGroup
      UseExistingAgent = $UseExistingAgent
    if ($SetupTimeout) {
      $args['Timeout'] = $SetupTimeout
    Add-AcmCluster @args

  Write-Host "Getting ACM service app configuration..."
  $app = Get-AcmAppInfo -SubscriptionId $SubscriptionId -ResourceGroup $AcmResourceGroup
  if (!$app['IssuerUrl']) {
    Write-Warning "No authentication configuration is found for the ACM app in $($AcmResourceGroup)!"

  Write-Host "Testing cluster in ACM service..."
  if ($TestTimeout) {
    $app['Timeout'] = $TestTimeout
  Test-AcmCluster @app