Examples/05-SystemManagement.ps1

<#
.SYNOPSIS
    Examples for VergeOS system management and monitoring.

.DESCRIPTION
    This script demonstrates system administration tasks:
    - Checking VergeOS version information
    - Managing clusters (list, create, modify, remove)
    - Managing nodes
    - Node maintenance operations
    - System statistics and dashboard overview
    - System settings management
    - License information
    - Hardware device discovery (PCI, USB, GPU)

.NOTES
    Prerequisites:
    - PowerShell 7.4 or later
    - PSVergeOS module installed
    - Connected to a VergeOS system
#>


# Import the module
Import-Module PSVergeOS

#region Version Information
# ============================================================================
# GETTING VERGEOS VERSION INFORMATION
# ============================================================================

# Get version information from the connected system
Get-VergeVersion

# Display version in a formatted way
$version = Get-VergeVersion
Write-Host "Connected to VergeOS $($version.VergeOSVersion)"
Write-Host " Kernel: $($version.KernelVersion)"
Write-Host " vSAN: $($version.vSANVersion)"
Write-Host " QEMU: $($version.QEMUVersion)"

# Use version in scripts for compatibility checks
$ver = Get-VergeVersion
$majorVersion = [int]($ver.VergeOSVersion -split '\.')[0]
if ($majorVersion -lt 26) {
    Write-Warning "This script requires VergeOS 26.0 or later"
}

#endregion

#region Cluster Management
# ============================================================================
# MANAGING CLUSTERS
# ============================================================================

# List all clusters
Get-VergeCluster

# Get a specific cluster by name
Get-VergeCluster -Name "Production"

# View cluster resource utilization
Get-VergeCluster | Format-Table Name, Status, OnlineNodes, UsedCores, OnlineCores, UsedRAM, OnlineRAM

# Check cluster capacity
Get-VergeCluster | ForEach-Object {
    $cpuPct = if ($_.OnlineCores -gt 0) { [math]::Round(($_.UsedCores / $_.OnlineCores) * 100, 1) } else { 0 }
    $ramPct = if ($_.OnlineRAM -gt 0) { [math]::Round(($_.UsedRAM / $_.OnlineRAM) * 100, 1) } else { 0 }

    [PSCustomObject]@{
        Cluster    = $_.Name
        Status     = $_.Status
        CPUUsed    = "$($_.UsedCores)/$($_.OnlineCores) ($cpuPct%)"
        RAMUsed    = "$([math]::Round($_.UsedRAM/1024, 1))/$([math]::Round($_.OnlineRAM/1024, 1)) GB ($ramPct%)"
        VMs        = $_.RunningMachines
    }
} | Format-Table

# Get cluster details including CPU type
Get-VergeCluster | Select-Object Name, DefaultCPUType, RecommendedCPUType, NestedVirtualization

#endregion

#region Cluster Creation and Modification
# ============================================================================
# CREATING AND MODIFYING CLUSTERS
# ============================================================================

# Create a basic cluster
# New-VergeCluster -Name "Development" -Description "Development workloads"

# Create a cluster with compute enabled and resource limits
# New-VergeCluster -Name "Production" -Description "Production VMs" -Compute -MaxRAMPerVM 131072 -MaxCoresPerVM 32

# Create a cluster optimized for nested virtualization
# New-VergeCluster -Name "Lab-Cluster" -Compute -NestedVirtualization -AllowNestedVirtMigration $true -PassThru

# Create a cluster with specific CPU type and power settings
# New-VergeCluster -Name "HPC-Cluster" -Compute `
# -DefaultCPUType "EPYC-Milan" `
# -EnergyPerfPolicy Performance `
# -ScalingGovernor Performance `
# -PassThru

# Create a cluster for GPU workloads
# New-VergeCluster -Name "GPU-Cluster" -Compute `
# -NestedVirtualization `
# -AllowVGPUMigration `
# -Description "GPU passthrough workloads" `
# -PassThru

# Modify cluster settings - update resource limits
# Set-VergeCluster -Name "Production" -MaxRAMPerVM 262144 -MaxCoresPerVM 64

# Enable nested virtualization on existing cluster
# Set-VergeCluster -Name "Development" -NestedVirtualization $true

# Change cluster CPU type
# Set-VergeCluster -Name "Production" -DefaultCPUType "Cascadelake-Server"

# Update power management settings
# Set-VergeCluster -Name "Production" -EnergyPerfPolicy BalancePerformance -ScalingGovernor OnDemand

# Rename a cluster
# Set-VergeCluster -Name "OldName" -NewName "NewName" -PassThru

# Modify cluster using pipeline
# Get-VergeCluster -Name "Development" | Set-VergeCluster -Description "Updated description" -PassThru

# Disable a cluster
# Set-VergeCluster -Name "Maintenance-Cluster" -Enabled $false

# Update storage settings
# Set-VergeCluster -Name "Production" -StorageCachePerNode 8192 -StorageHugepages $true

# Configure temperature monitoring
# Set-VergeCluster -Name "Production" -MaxCoreTemp 85 -CriticalCoreTemp 95 -MaxCoreTempWarnPercent 10

#endregion

#region Cluster Deletion
# ============================================================================
# REMOVING CLUSTERS
# ============================================================================

# Remove a cluster by name (requires confirmation)
# Remove-VergeCluster -Name "Test-Cluster"

# Remove without confirmation prompt
# Remove-VergeCluster -Name "Temp-Cluster" -Confirm:$false

# Remove using pipeline
# Get-VergeCluster -Name "Temp-*" | Remove-VergeCluster

# Preview what would be deleted (WhatIf)
# Remove-VergeCluster -Name "Development" -WhatIf

# Safe cluster removal workflow
$clusterName = "Cluster-To-Remove"

# 1. Check if cluster has nodes or VMs
$cluster = Get-VergeCluster -Name $clusterName
if ($cluster) {
    Write-Host "Cluster: $($cluster.Name)"
    Write-Host " Total Nodes: $($cluster.TotalNodes)"
    Write-Host " Running VMs: $($cluster.RunningMachines)"

    if ($cluster.TotalNodes -gt 0) {
        Write-Warning "Cluster has $($cluster.TotalNodes) nodes. Reassign nodes before deletion."
        # Get-VergeNode -Cluster $clusterName
    }
    elseif ($cluster.RunningMachines -gt 0) {
        Write-Warning "Cluster has $($cluster.RunningMachines) running VMs. Stop/move VMs before deletion."
    }
    else {
        Write-Host "Cluster can be safely removed." -ForegroundColor Green
        # Remove-VergeCluster -Name $clusterName -Confirm:$false
    }
}

#endregion

#region Node Management
# ============================================================================
# MANAGING NODES
# ============================================================================

# List all nodes
Get-VergeNode

# List nodes with key information
Get-VergeNode | Format-Table Name, Status, Cluster, Cores, @{N='RAM_GB';E={[math]::Round($_.RAM/1024,1)}}, MaintenanceMode

# Find a specific node
Get-VergeNode -Name "node1"

# Filter nodes by cluster
Get-VergeNode -Cluster "Production"

# Find nodes in maintenance mode
Get-VergeNode -MaintenanceMode $true

# Check node health
Get-VergeNode | ForEach-Object {
    [PSCustomObject]@{
        Node          = $_.Name
        Status        = $_.Status
        NeedsRestart  = $_.NeedsRestart
        RestartReason = $_.RestartReason
        IOMMU         = $_.IOMMU
        Maintenance   = $_.MaintenanceMode
    }
} | Format-Table

# Get node version information
Get-VergeNode | Select-Object Name, VergeOSVersion, KernelVersion, vSANVersion

# Check for nodes needing restart
$needRestart = Get-VergeNode | Where-Object NeedsRestart
if ($needRestart) {
    Write-Warning "The following nodes need to be restarted:"
    $needRestart | Format-Table Name, RestartReason
}

#endregion

#region Node Maintenance Operations
# ============================================================================
# NODE MAINTENANCE MODE AND REBOOT
# ============================================================================

# Enable maintenance mode on a node (migrates VMs off)
# Enable-VergeNodeMaintenance -Name "node2"

# Preview what would happen (WhatIf)
Enable-VergeNodeMaintenance -Name "node2" -WhatIf

# Disable maintenance mode (allows VMs to run again)
# Disable-VergeNodeMaintenance -Name "node2"

# Preview disabling maintenance
Disable-VergeNodeMaintenance -Name "node2" -WhatIf

# Perform a maintenance reboot (safe reboot with VM migration)
# Restart-VergeNode -Name "node2"

# Preview maintenance reboot
Restart-VergeNode -Name "node2" -WhatIf

# Pipeline: Find and put specific nodes in maintenance
# Get-VergeNode -Cluster "Development" | Enable-VergeNodeMaintenance

# Maintenance workflow example
$nodeName = "node2"

# Check current state
$node = Get-VergeNode -Name $nodeName
Write-Host "Node: $($node.Name)"
Write-Host " Status: $($node.Status)"
Write-Host " Maintenance Mode: $($node.MaintenanceMode)"
Write-Host " Running VMs on this node: Check via Get-VergeVM"

# To perform maintenance:
# 1. Enable maintenance mode (VMs will migrate)
# Enable-VergeNodeMaintenance -Name $nodeName

# 2. Wait for VMs to migrate off
# while ((Get-VergeNode -Name $nodeName).Status -ne 'Maintenance') {
# Start-Sleep -Seconds 10
# }

# 3. Perform reboot if needed
# Restart-VergeNode -Name $nodeName

# 4. When done, disable maintenance
# Disable-VergeNodeMaintenance -Name $nodeName

#endregion

#region System Statistics
# ============================================================================
# SYSTEM DASHBOARD AND STATISTICS
# ============================================================================

# Get overall system statistics
Get-VergeSystemStatistics

# Quick health check
$stats = Get-VergeSystemStatistics
Write-Host "`nSystem Health Overview"
Write-Host "======================"
Write-Host "VMs: $($stats.VMsOnline) running / $($stats.VMsTotal) total"
Write-Host "Nodes: $($stats.NodesOnline) online / $($stats.NodesTotal) total"
Write-Host "Networks: $($stats.NetworksOnline) online / $($stats.NetworksTotal) total"
Write-Host "Tenants: $($stats.TenantsOnline) online / $($stats.TenantsTotal) total"
Write-Host "Alarms: $($stats.AlarmsTotal) ($($stats.AlarmsWarning) warnings, $($stats.AlarmsError) errors)"

# Check for issues
$stats = Get-VergeSystemStatistics
$issues = @()

if ($stats.NodesOnline -lt $stats.NodesTotal) {
    $issues += "WARN: Not all nodes online ($($stats.NodesOnline)/$($stats.NodesTotal))"
}
if ($stats.ClustersOnline -lt $stats.ClustersTotal) {
    $issues += "WARN: Not all clusters online ($($stats.ClustersOnline)/$($stats.ClustersTotal))"
}
if ($stats.AlarmsError -gt 0) {
    $issues += "ERROR: $($stats.AlarmsError) error alarm(s) present"
}
if ($stats.ClusterTiersError -gt 0) {
    $issues += "ERROR: $($stats.ClusterTiersError) storage tier(s) in error state"
}

if ($issues.Count -eq 0) {
    Write-Host "`n[OK] All systems healthy" -ForegroundColor Green
} else {
    Write-Host "`n[ATTENTION] Issues detected:" -ForegroundColor Yellow
    $issues | ForEach-Object { Write-Host " - $_" }
}

# Generate health report
$stats = Get-VergeSystemStatistics
[PSCustomObject]@{
    Timestamp        = Get-Date
    Server           = $stats.Server
    VMsRunning       = $stats.VMsOnline
    VMsTotal         = $stats.VMsTotal
    NodesOnline      = $stats.NodesOnline
    NodesTotal       = $stats.NodesTotal
    StorageTiers     = $stats.StorageTiersTotal
    ActiveAlarms     = $stats.AlarmsTotal
    AlarmWarnings    = $stats.AlarmsWarning
    AlarmErrors      = $stats.AlarmsError
} | Format-List

#endregion

#region System Settings
# ============================================================================
# SYSTEM SETTINGS
# ============================================================================

# List all system settings
Get-VergeSystemSetting | Format-Table Key, Value, DefaultValue, IsModified

# Find specific settings
Get-VergeSystemSetting -Key "cloud_name"
Get-VergeSystemSetting -Key "max*"

# Find settings that have been modified from defaults
Get-VergeSystemSetting | Where-Object IsModified | Format-Table Key, Value, DefaultValue

# Common settings to check
$importantSettings = @(
    'cloud_name'
    'cloud_domain'
    'ntp_servers'
    'max_connections'
    'default_tenant_network'
)

Write-Host "`nImportant System Settings:"
Write-Host "=========================="
foreach ($key in $importantSettings) {
    $setting = Get-VergeSystemSetting -Key $key
    if ($setting) {
        Write-Host "$($key): $($setting.Value)"
    }
}

# Check network settings
Get-VergeSystemSetting -Key "*network*" | Format-Table Key, Value
Get-VergeSystemSetting -Key "*mtu*" | Format-Table Key, Value

# Check storage-related settings
Get-VergeSystemSetting -Key "*sync*" | Format-Table Key, Value
Get-VergeSystemSetting -Key "*snap*" | Format-Table Key, Value

#endregion

#region License Information
# ============================================================================
# LICENSE MANAGEMENT
# ============================================================================

# Get license information
Get-VergeLicense

# Check license validity
$license = Get-VergeLicense | Select-Object -First 1
if ($license) {
    Write-Host "`nLicense Information"
    Write-Host "==================="
    Write-Host "Name: $($license.Name)"
    Write-Host "Valid: $($license.IsValid)"
    Write-Host "Valid Until: $($license.ValidUntil)"
    Write-Host "Auto-Renew: $($license.AutoRenewal)"

    # Check days until expiration
    if ($license.ValidUntil) {
        $daysRemaining = ($license.ValidUntil - (Get-Date)).Days
        if ($daysRemaining -lt 30) {
            Write-Warning "License expires in $daysRemaining days!"
        } else {
            Write-Host "Days Until Expiration: $daysRemaining"
        }
    }
}

# List all licenses with expiration dates
Get-VergeLicense | Select-Object Name, IsValid, ValidFrom, ValidUntil, AutoRenewal | Format-Table

#endregion

#region Node Drivers
# ============================================================================
# NODE DRIVERS (GPU, NETWORK, ETC.)
# ============================================================================

# List all custom drivers
Get-VergeNodeDriver

# List drivers for a specific node
Get-VergeNodeDriver -Node "node1"

# Filter by driver status
Get-VergeNodeDriver -Status Installed
Get-VergeNodeDriver -Status Verifying
Get-VergeNodeDriver -Status Error

# Find NVIDIA drivers
Get-VergeNodeDriver -DriverName "*nvidia*"

# Check driver status across all nodes
Get-VergeNodeDriver | Format-Table Node, DriverName, Status, StatusInfo

# Pipeline: Get drivers from a specific node
Get-VergeNode -Name "node1" | Get-VergeNodeDriver

#endregion

#region Node Hardware Devices
# ============================================================================
# HARDWARE DEVICE DISCOVERY (PCI, USB, GPU)
# ============================================================================

# List all PCI devices
Get-VergeNodeDevice -DeviceType PCI | Format-Table Node, Name, Class, Vendor -AutoSize

# List all USB devices
Get-VergeNodeDevice -DeviceType USB | Format-Table Node, Name, Vendor, USBVersion

# List all GPUs (display controllers)
Get-VergeNodeDevice -DeviceType GPU | Format-Table Node, Name, Vendor

# Get devices for a specific node
Get-VergeNodeDevice -Node "node1" -DeviceType PCI | Format-Table Name, Class

# Filter by device class
Get-VergeNodeDevice -DeviceType PCI -DeviceClass "Network controller" |
    Format-Table Node, Name, Vendor, Driver

Get-VergeNodeDevice -DeviceType PCI -DeviceClass "Mass storage" |
    Format-Table Node, Name, Vendor, Driver

# Pipeline: Get GPU devices from specific nodes
Get-VergeNode -Name "node1" | Get-VergeNodeDevice -DeviceType GPU

# Find devices with SR-IOV support
Get-VergeNodeDevice -DeviceType PCI |
    Where-Object { $_.SRIOVTotalVFs -gt 0 } |
    Format-Table Node, Name, SRIOVTotalVFs, SRIOVNumVFs

# Hardware inventory report
Write-Host "`nHardware Summary"
Write-Host "================"
$pci = Get-VergeNodeDevice -DeviceType PCI
$usb = Get-VergeNodeDevice -DeviceType USB
$gpu = Get-VergeNodeDevice -DeviceType GPU

Write-Host "PCI Devices: $($pci.Count)"
Write-Host "USB Devices: $($usb.Count)"
Write-Host "GPUs: $($gpu.Count)"

# Group PCI devices by class
Write-Host "`nPCI Devices by Class:"
Get-VergeNodeDevice -DeviceType PCI |
    Group-Object Class |
    Sort-Object Count -Descending |
    Format-Table @{N='Class';E={$_.Name}}, Count

#endregion

#region System Health Report
# ============================================================================
# COMPREHENSIVE SYSTEM HEALTH REPORT
# ============================================================================

function Get-VergeSystemHealthReport {
    <#
    .SYNOPSIS
        Generates a comprehensive system health report.
    #>


    $report = [ordered]@{}

    # Version info
    $version = Get-VergeVersion
    $report['Version'] = $version.VergeOSVersion
    $report['Server'] = $version.Server

    # Cluster health
    $clusters = Get-VergeCluster
    $report['Clusters'] = "$($clusters.Count) total"
    $report['ClusterStatus'] = ($clusters | ForEach-Object { "$($_.Name): $($_.Status)" }) -join ', '

    # Node health
    $nodes = Get-VergeNode
    $onlineNodes = ($nodes | Where-Object Status -eq 'Running').Count
    $report['Nodes'] = "$onlineNodes/$($nodes.Count) online"
    $needRestart = ($nodes | Where-Object NeedsRestart).Count
    $report['NodesNeedingRestart'] = $needRestart

    # Statistics
    $stats = Get-VergeSystemStatistics
    $report['VMs'] = "$($stats.VMsOnline)/$($stats.VMsTotal) running"
    $report['Networks'] = "$($stats.NetworksOnline)/$($stats.NetworksTotal) online"
    $report['Tenants'] = "$($stats.TenantsOnline)/$($stats.TenantsTotal) online"
    $report['Alarms'] = "$($stats.AlarmsTotal) ($($stats.AlarmsError) errors)"

    # License
    $license = Get-VergeLicense | Select-Object -First 1
    if ($license) {
        $daysRemaining = if ($license.ValidUntil) { ($license.ValidUntil - (Get-Date)).Days } else { 'N/A' }
        $report['LicenseValid'] = $license.IsValid
        $report['LicenseExpires'] = "$daysRemaining days"
    }

    # Output
    [PSCustomObject]$report
}

# Generate the report
Get-VergeSystemHealthReport | Format-List

# Export health report to file
# Get-VergeSystemHealthReport | Export-Csv "health-report-$(Get-Date -Format 'yyyyMMdd').csv" -NoTypeInformation

#endregion