VBAF.RL.QLearningAgent.ps1
|
#Requires -Version 5.1 <# .SYNOPSIS Q-Learning Agent for Reinforcement Learning .DESCRIPTION Implements Q-Learning algorithm with epsilon-greedy exploration. Uses hashtable for Q-table storage (no separate QTable class needed). .NOTES Part of VBAF - Reinforcement Learning Module COMPLETE VERSION - All methods included #> # Set base path $basePath = "C:\Users\henni\OneDrive\WindowsPowerShell" # Load dependencies . "$basePath\VBAF.RL.QTable.ps1" . "$basePath\VBAF.RL.ExperienceReplay.ps1" class QLearningAgent { # Learning parameters [string[]]$Actions # Available actions [hashtable]$QTable # Stores learned Q-values (hashtable, not custom class!) [double]$LearningRate # Alpha (α) [double]$DiscountFactor # Gamma (γ) [double]$Epsilon # Exploration rate [double]$EpsilonDecay # How fast epsilon decreases [double]$MinEpsilon # Minimum epsilon value # Statistics [int]$TotalSteps [int]$TotalEpisodes [double]$TotalReward [System.Collections.ArrayList]$EpisodeRewards [int]$ExplorationCount [int]$ExploitationCount [double]$Alpha # Alias for LearningRate [double]$Gamma # Alias for DiscountFactor [int]$Episode # Alias for TotalEpisodes [int]$MemorySize # For compatibility # Simplified constructor with defaults QLearningAgent([string[]]$actions) { $this.Actions = $actions $this.QTable = @{} $this.LearningRate = 0.1 # Default alpha $this.DiscountFactor = 0.9 # Default gamma $this.Epsilon = 1.0 # Start with full exploration $this.EpsilonDecay = 0.995 # Decay 0.5% per episode $this.MinEpsilon = 0.01 # Always explore at least 1% $this.TotalSteps = 0 $this.TotalEpisodes = 0 $this.TotalReward = 0.0 $this.EpisodeRewards = New-Object System.Collections.ArrayList $this.ExplorationCount = 0 $this.ExploitationCount = 0 $this.Alpha = $this.LearningRate $this.Gamma = $this.DiscountFactor $this.Episode = 0 $this.MemorySize = 0 } # Full constructor with custom parameters QLearningAgent([string[]]$actions, [double]$learningRate, [double]$epsilon) { $this.Actions = $actions $this.QTable = @{} $this.LearningRate = $learningRate $this.DiscountFactor = 0.9 $this.Epsilon = $epsilon $this.EpsilonDecay = 0.995 $this.MinEpsilon = 0.01 $this.TotalSteps = 0 $this.TotalEpisodes = 0 $this.TotalReward = 0.0 $this.EpisodeRewards = New-Object System.Collections.ArrayList $this.ExplorationCount = 0 $this.ExploitationCount = 0 $this.Alpha = $this.LearningRate $this.Gamma = $this.DiscountFactor $this.Episode = 0 $this.MemorySize = 0 } # Get state from context (castle parade specific) [string] GetState([hashtable]$context) { # State = recent castle types if ($context.RecentTypes.Count -eq 0) { return "START" } # Use last 2 castles as state $recent = $context.RecentTypes if ($recent.Count -eq 1) { return $recent[-1] } else { return "$($recent[-2])|$($recent[-1])" } } # Calculate reward based on outcome [double] CalculateReward([hashtable]$outcome) { $reward = 0.0 # Reward for variety (not repeating same castle) if ($outcome.IsVaried) { $reward += 2.0 } else { $reward -= 1.0 # Penalty for repetition } # Reward for visual balance $reward += $outcome.VisualBalance * 1.5 # Reward for engagement $reward += $outcome.Engagement * 2.0 return $reward } # Get Q-value for state-action pair [double] GetQValue([string]$state, [string]$action) { $key = "$state|$action" if ($this.QTable.ContainsKey($key)) { return [double]$this.QTable[$key] } else { return 0.0 } } # Set Q-value for state-action pair [void] SetQValue([string]$state, [string]$action, [double]$value) { $key = "$state|$action" $this.QTable[$key] = $value } # Get best action for a state [string] GetBestAction([string]$state) { $bestAction = $this.Actions[0] $bestValue = $this.GetQValue($state, $bestAction) foreach ($action in $this.Actions) { $qValue = $this.GetQValue($state, $action) if ($qValue -gt $bestValue) { $bestValue = $qValue $bestAction = $action } } return $bestAction } # Get max Q-value for a state [double] GetMaxQValue([string]$state) { $maxValue = $this.GetQValue($state, $this.Actions[0]) foreach ($action in $this.Actions) { $qValue = $this.GetQValue($state, $action) if ($qValue -gt $maxValue) { $maxValue = $qValue } } return $maxValue } # Get all Q-values for a state (for analysis) [hashtable] GetQValues([string]$state) { $values = @{} foreach ($action in $this.Actions) { $values[$action] = $this.GetQValue($state, $action) } return $values } # Choose action using epsilon-greedy [string] ChooseAction([string]$state) { # Exploration: random action if ((Get-Random -Minimum 0.0 -Maximum 1.0) -lt $this.Epsilon) { $this.ExplorationCount++ $randomIndex = Get-Random -Minimum 0 -Maximum $this.Actions.Count return $this.Actions[$randomIndex] } # Exploitation: best known action $this.ExploitationCount++ return $this.GetBestAction($state) } # Learn from experience (Q-Learning update) [void] Learn([string]$state, [string]$action, [double]$reward, [string]$nextState) { # Current Q-value $currentQ = $this.GetQValue($state, $action) # Max Q-value for next state $maxNextQ = $this.GetMaxQValue($nextState) # Q-Learning update rule: # Q(s,a) ← Q(s,a) + α[r + γ·max(Q(s',a')) - Q(s,a)] $tdTarget = $reward + ($this.DiscountFactor * $maxNextQ) $tdError = $tdTarget - $currentQ $newQ = $currentQ + ($this.LearningRate * $tdError) # Update Q-table $this.SetQValue($state, $action, $newQ) # Increment steps and accumulate reward $this.TotalSteps++ $this.TotalReward += $reward } # End episode (decay epsilon) [void] EndEpisode([double]$episodeReward) { $this.TotalEpisodes++ $this.Episode = $this.TotalEpisodes $this.EpisodeRewards.Add($episodeReward) | Out-Null # Decay epsilon $this.Epsilon *= $this.EpsilonDecay if ($this.Epsilon -lt $this.MinEpsilon) { $this.Epsilon = $this.MinEpsilon } } # Decay epsilon manually (reduce exploration over time) [void] DecayEpsilon([double]$decayRate) { $this.Epsilon *= $decayRate if ($this.Epsilon -lt $this.MinEpsilon) { $this.Epsilon = $this.MinEpsilon } } # Reset Q-table [void] Reset() { $this.QTable.Clear() $this.TotalSteps = 0 $this.TotalEpisodes = 0 $this.TotalReward = 0.0 $this.EpisodeRewards.Clear() $this.Epsilon = 1.0 } # Get statistics [hashtable] GetStats() { $avgReward = if ($this.TotalEpisodes -gt 0) { $this.TotalReward / $this.TotalEpisodes } else { 0.0 } # Recent average (last 10 episodes) $recentAvg = 0.0 if ($this.EpisodeRewards.Count -gt 0) { $recentCount = [Math]::Min(10, $this.EpisodeRewards.Count) $start = $this.EpisodeRewards.Count - $recentCount $recentSum = 0.0 for ($i = $start; $i -lt $this.EpisodeRewards.Count; $i++) { $recentSum += [double]$this.EpisodeRewards[$i] } $recentAvg = $recentSum / $recentCount } $explorationRatio = if ($this.TotalSteps -gt 0) { $this.Epsilon } else { 1.0 } return @{ TotalSteps = $this.TotalSteps TotalEpisodes = $this.TotalEpisodes Episode = $this.TotalEpisodes TotalReward = $this.TotalReward QTableSize = $this.QTable.Count Epsilon = $this.Epsilon LearningRate = $this.LearningRate AverageReward = $avgReward RecentAverageReward = $recentAvg ExplorationRatio = $explorationRatio ExplorationCount = $this.ExplorationCount ExploitationCount = $this.ExploitationCount MemorySize = $this.MemorySize } } # Get statistics (alias for compatibility) [hashtable] GetStatistics() { return $this.GetStats() } } |