VBAF.RL.Example-CastleLearning.ps1
|
#Requires -Version 5.1 <# .SYNOPSIS Q-Learning Castle Agent - Training Demo .DESCRIPTION Demonstrates Q-Learning agent learning to generate castles. Shows exploration/exploitation balance and reward improvement. #> $basePath = "C:\Users\henni\OneDrive\WindowsPowerShell" # Load dependencies . "$basePath\VBAF.RL.QTable.ps1" . "$basePath\VBAF.RL.ExperienceReplay.ps1" . "$basePath\VBAF.RL.QLearningAgent.ps1" Write-Host "`n╔══════════════════════════════════════════════╗" -ForegroundColor Cyan Write-Host "║ Q-LEARNING CASTLE AGENT - TRAINING DEMO ║" -ForegroundColor Cyan Write-Host "╚══════════════════════════════════════════════╝" -ForegroundColor Cyan $castleTypes = @( "Gothic", "FairyTale", "Fortress", "Palace", "Wizard", "Cathedral", "Oriental", "Ruins" ) Write-Host "`nAvailable Castle Types:" -ForegroundColor Yellow foreach ($type in $castleTypes) { Write-Host " • $type" } Write-Host "`nCreating Q-Learning Agent..." -ForegroundColor Yellow $agent = New-Object QLearningAgent -ArgumentList @(,$castleTypes) Write-Host " Alpha (learning rate): $($agent.Alpha)" Write-Host " Gamma (discount): $($agent.Gamma)" Write-Host " Epsilon (exploration): $($agent.Epsilon)" $episodes = 100 $stepsPerEpisode = 10 Write-Host "`nTraining Configuration:" -ForegroundColor Yellow Write-Host " Episodes: $episodes" Write-Host " Steps per episode: $stepsPerEpisode" Write-Host " Total interactions: $($episodes * $stepsPerEpisode)" $recentCastles = New-Object System.Collections.ArrayList Write-Host "`n" + ("═" * 60) -ForegroundColor Cyan Write-Host "TRAINING IN PROGRESS" -ForegroundColor Cyan Write-Host ("═" * 60) -ForegroundColor Cyan Write-Host "" for ($ep = 1; $ep -le $episodes; $ep++) { $episodeReward = 0.0 for ($step = 1; $step -le $stepsPerEpisode; $step++) { $context = @{ RecentTypes = $recentCastles } $state = $agent.GetState($context) $action = $agent.ChooseAction($state) $isVaried = ($recentCastles.Count -eq 0) -or ($recentCastles[-1] -ne $action) $visualBalance = Get-Random -Minimum 0.0 -Maximum 1.0 $engagement = Get-Random -Minimum 0.0 -Maximum 1.0 $outcome = @{ CastleType = $action IsVaried = $isVaried VisualBalance = $visualBalance Engagement = $engagement } $reward = $agent.CalculateReward($outcome) $episodeReward += $reward $recentCastles.Add($action) | Out-Null if ($recentCastles.Count -gt 5) { $recentCastles.RemoveAt(0) } $nextContext = @{ RecentTypes = $recentCastles } $nextState = $agent.GetState($nextContext) $agent.Learn($state, $action, $reward, $nextState) } $agent.EndEpisode($episodeReward) if ($ep % 10 -eq 0 -or $ep -eq 1 -or $ep -eq $episodes) { $stats = $agent.GetStats() $exploitPct = (1.0 - $stats.ExplorationRatio) * 100 Write-Host ("Episode {0,3} | Reward: {1,6:F2} | Epsilon: {2:F3} | Exploit: {3,5:F1}% | Q-Table: {4,3} entries" -f ` $ep, $episodeReward, $stats.Epsilon, $exploitPct, $stats.QTableSize) } } Write-Host "`n✓ Training complete!" -ForegroundColor Green Write-Host "`n" + ("═" * 60) -ForegroundColor Cyan Write-Host "FINAL RESULTS" -ForegroundColor Cyan Write-Host ("═" * 60) -ForegroundColor Cyan $finalStats = $agent.GetStats() Write-Host "`nLearning Progress:" -ForegroundColor Yellow Write-Host " Total Episodes: $($finalStats.Episode)" Write-Host " Total Reward: $($finalStats.TotalReward.ToString('F2'))" Write-Host " Average Reward: $($finalStats.AverageReward.ToString('F2'))" Write-Host " Recent Average (last 10): $($finalStats.RecentAverageReward.ToString('F2'))" Write-Host "`nExploration vs Exploitation:" -ForegroundColor Yellow Write-Host " Explorations: $($finalStats.ExplorationCount)" Write-Host " Exploitations: $($finalStats.ExploitationCount)" Write-Host " Final Epsilon: $($finalStats.Epsilon.ToString('F3'))" Write-Host "`nKnowledge Base:" -ForegroundColor Yellow Write-Host " Q-Table Entries: $($finalStats.QTableSize)" Write-Host " Experiences Stored: $($finalStats.MemorySize)" # FIX 1: Show Q-values for ACTUAL states used Write-Host "`nLearned Q-Values BY STATE:" -ForegroundColor Yellow Write-Host "(State = number of recent castles shown)" -ForegroundColor Gray $statesFound = @() for ($stateNum = 0; $stateNum -le 5; $stateNum++) { $qValues = $agent.GetQValues("$stateNum") # Check if this state has any non-zero values $hasLearning = $false foreach ($val in $qValues.Values) { if ($val -ne 0) { $hasLearning = $true break } } if ($hasLearning) { $statesFound += $stateNum Write-Host "`n State '$stateNum':" -ForegroundColor Cyan $sorted = $qValues.GetEnumerator() | Sort-Object Value -Descending foreach ($item in $sorted) { if ($item.Value -ne 0) { $color = if ($item.Value -gt 0) { "Green" } elseif ($item.Value -lt 0) { "Red" } else { "Gray" } Write-Host (" {0,-15} {1,8:F4}" -f $item.Key, $item.Value) -ForegroundColor $color } } } } if ($statesFound.Count -eq 0) { Write-Host "`n ⚠ No learning detected in any state!" -ForegroundColor Red Write-Host " This suggests the Q-Learning update isn't working." -ForegroundColor Red } else { Write-Host "`n✓ Learning detected in states: $($statesFound -join ', ')" -ForegroundColor Green } if ($finalStats.RecentAverageReward -gt $finalStats.AverageReward) { Write-Host "`n🎉 Agent is IMPROVING! Recent rewards higher than average!" -ForegroundColor Green } else { Write-Host "`n⚠ Agent performance stable" -ForegroundColor Yellow } Write-Host "" |