VBAF.RL.QTable.ps1

#Requires -Version 5.1

<#
.SYNOPSIS
    Q-Table for storing state-action values
.DESCRIPTION
    Hash table-based storage for Q-Learning.
    Maps (state, action) pairs to Q-values.
.NOTES
    Part of VBAF (Visual Business Automation Framework)
#>


class QTable {
    # Properties
    [hashtable]$Table           # Stores Q-values
    [double]$DefaultValue       # Initial Q-value for new state-action pairs
    [int]$AccessCount           # Statistics: total lookups
    [int]$UpdateCount           # Statistics: total updates
    
    # Constructor
    QTable([double]$defaultValue) {
        $this.Table = @{}
        $this.DefaultValue = $defaultValue
        $this.AccessCount = 0
        $this.UpdateCount = 0
    }
    
    # Default constructor (Q-values start at 0)
    QTable() {
        $this.Table = @{}
        $this.DefaultValue = 0.0
        $this.AccessCount = 0
        $this.UpdateCount = 0
    }
    
    # Create key from state and action
    hidden [string] MakeKey([string]$state, [string]$action) {
        return "$state|$action"
    }
    
    # Get Q-value for (state, action)
    [double] Get([string]$state, [string]$action) {
        $key = $this.MakeKey($state, $action)
        $this.AccessCount++
        
        if ($this.Table.ContainsKey($key)) {
            return $this.Table[$key]
        } else {
            # First time seeing this state-action: return default
            return $this.DefaultValue
        }
    }
    
    # Set Q-value for (state, action)
    [void] Set([string]$state, [string]$action, [double]$value) {
        $key = $this.MakeKey($state, $action)
        $this.Table[$key] = $value
        $this.UpdateCount++
    }
    
    # Update Q-value using Q-Learning formula
    # Q(s,a) ← Q(s,a) + α[r + γ·max(Q(s',a')) - Q(s,a)]
    [void] Update([string]$state, [string]$action, [double]$reward, 
                  [string]$nextState, [string[]]$possibleActions, 
                  [double]$alpha, [double]$gamma) {
        
        # Current Q-value
        $currentQ = $this.Get($state, $action)
        
        # Maximum Q-value in next state
        $maxNextQ = $this.DefaultValue
        if ($possibleActions.Count -gt 0) {
            foreach ($nextAction in $possibleActions) {
                $nextQ = $this.Get($nextState, $nextAction)
                if ($nextQ -gt $maxNextQ) {
                    $maxNextQ = $nextQ
                }
            }
        }
        
        # Q-Learning update
        $newQ = $currentQ + $alpha * ($reward + $gamma * $maxNextQ - $currentQ)
        
        # Store updated value
        $this.Set($state, $action, $newQ)
    }
    
    # Get best action for a state (highest Q-value)
    [string] GetBestAction([string]$state, [string[]]$possibleActions) {
        if ($possibleActions.Count -eq 0) {
            throw "No possible actions provided"
        }
        
        $bestAction = $possibleActions[0]
        $bestQ = $this.Get($state, $bestAction)
        
        for ($i = 1; $i -lt $possibleActions.Count; $i++) {
            $action = $possibleActions[$i]
            $q = $this.Get($state, $action)
            
            if ($q -gt $bestQ) {
                $bestQ = $q
                $bestAction = $action
            }
        }
        
        return $bestAction
    }
    
    # Get all Q-values for a state
    [hashtable] GetStateValues([string]$state, [string[]]$possibleActions) {
        $values = @{}
        
        foreach ($action in $possibleActions) {
            $values[$action] = $this.Get($state, $action)
        }
        
        return $values
    }
    
    # Export Q-table (save learned knowledge)
    [hashtable] ExportTable() {
        return @{
            Table = $this.Table
            DefaultValue = $this.DefaultValue
            AccessCount = $this.AccessCount
            UpdateCount = $this.UpdateCount
        }
    }
    
    # Import Q-table (load learned knowledge)
    [void] ImportTable([hashtable]$data) {
        $this.Table = $data.Table
        $this.DefaultValue = $data.DefaultValue
        $this.AccessCount = $data.AccessCount
        $this.UpdateCount = $data.UpdateCount
    }
    
    # Get statistics
    [hashtable] GetStats() {
        return @{
            TotalEntries = $this.Table.Count
            AccessCount = $this.AccessCount
            UpdateCount = $this.UpdateCount
            DefaultValue = $this.DefaultValue
        }
    }
    
    # Clear all learned values
    [void] Reset() {
        $this.Table.Clear()
        $this.AccessCount = 0
        $this.UpdateCount = 0
    }
}