VBAF.Public.New-Agent.ps1

function New-VBAFAgent {
    <#
    .SYNOPSIS
        Creates a new reinforcement learning agent.
    
    .DESCRIPTION
        Creates a Q-Learning agent that can learn from interaction with an environment.
        The agent uses epsilon-greedy exploration and can optionally use experience replay.
        
        Perfect for:
        - Game playing (grid worlds, puzzles)
        - Resource optimization
        - Sequential decision making
        - Multi-agent simulations
    
    .PARAMETER Actions
        Array of available actions the agent can take.
        Example: @("up", "down", "left", "right")
    
    .PARAMETER LearningRate
        Learning rate (alpha) for Q-value updates.
        Typical values: 0.05 to 0.3
        Default: 0.1
        
        Higher = learns faster but less stable
        Lower = learns slower but more stable
    
    .PARAMETER DiscountFactor
        Discount factor (gamma) for future rewards.
        Range: 0.0 to 1.0
        Default: 0.9
        
        0.0 = only immediate rewards matter
        1.0 = future rewards as important as immediate
    
    .PARAMETER Epsilon
        Initial exploration rate (epsilon).
        Range: 0.0 to 1.0
        Default: 1.0 (full exploration)
        
        Probability of taking random action vs. best known action.
        Decays over time as agent learns.
    
    .PARAMETER EpsilonDecay
        Epsilon decay rate per episode.
        Range: 0.9 to 0.999
        Default: 0.995
        
        epsilon *= decay after each episode
    
    .PARAMETER MinEpsilon
        Minimum epsilon value.
        Range: 0.0 to 0.2
        Default: 0.01
        
        Agent always explores at least this much.
    
    .PARAMETER UseExperienceReplay
        Enable experience replay memory.
        Improves learning stability.
    
    .PARAMETER MemorySize
        Size of experience replay buffer.
        Only used if UseExperienceReplay is true.
        Default: 1000
    
    .EXAMPLE
        # Simple grid world agent
        $agent = New-VBAFAgent -Actions @("up", "down", "left", "right")
        
        # Agent decides action
        $action = $agent.ChooseAction($currentState)
        
        # Agent learns from outcome
        $agent.Learn($state, $action, $reward, $nextState)
    
    .EXAMPLE
        # Castle generation agent with experience replay
        $castleTypes = @("Gothic", "FairyTale", "Cathedral", "Wizard", "Palace", "Oriental", "Fortress", "Ruins")
        $agent = New-VBAFAgent -Actions $castleTypes -LearningRate 0.15 -UseExperienceReplay -MemorySize 500
    
    .EXAMPLE
        # Conservative learning agent (low learning rate, slow epsilon decay)
        $agent = New-VBAFAgent -Actions @("buy", "sell", "hold") -LearningRate 0.05 -EpsilonDecay 0.999 -MinEpsilon 0.05
    
    .OUTPUTS
        QLearningAgent object with methods:
        - ChooseAction($state) - Select action using epsilon-greedy
        - Learn($state, $action, $reward, $nextState) - Update Q-values
        - GetQValue($state, $action) - Get Q-value for state-action pair
        - GetBestAction($state) - Get best known action for state
        - EndEpisode($episodeReward) - Decay epsilon
        - GetStats() - Get agent statistics
    
    .NOTES
        Author: Henning
        Part of VBAF Module
        
    .LINK
        Train-VBAFAgent
        Get-VBAFAgentStats
    #>

    
    [CmdletBinding()]
    #[OutputType([QLearningAgent])]
    param(
        [Parameter(Mandatory = $true, Position = 0)]
        [ValidateNotNullOrEmpty()]
        [ValidateCount(1, 1000)]
        [string[]]$Actions,
        
        [Parameter(Mandatory = $false)]
        [ValidateRange(0.001, 1.0)]
        [double]$LearningRate = 0.1,
        
        [Parameter(Mandatory = $false)]
        [ValidateRange(0.0, 1.0)]
        [double]$DiscountFactor = 0.9,
        
        [Parameter(Mandatory = $false)]
        [ValidateRange(0.0, 1.0)]
        [double]$Epsilon = 1.0,
        
        [Parameter(Mandatory = $false)]
        [ValidateRange(0.9, 0.9999)]
        [double]$EpsilonDecay = 0.995,
        
        [Parameter(Mandatory = $false)]
        [ValidateRange(0.0, 0.2)]
        [double]$MinEpsilon = 0.01,
        
        [Parameter(Mandatory = $false)]
        [switch]$UseExperienceReplay,
        
        [Parameter(Mandatory = $false)]
        [ValidateRange(10, 100000)]
        [int]$MemorySize = 1000
    )
    
    begin {
        Write-Verbose "Creating Q-Learning agent"
        Write-Verbose " Actions: $($Actions.Count) available"
        Write-Verbose " Learning rate: $LearningRate"
        Write-Verbose " Discount factor: $DiscountFactor"
        Write-Verbose " Initial epsilon: $Epsilon"
    }
    
    process {
        try {
            # Create Q-Learning agent
            $args = @(,$Actions; $LearningRate; $Epsilon)
            $agent = New-Object QLearningAgent -ArgumentList $args
            
            # Set additional parameters
            $agent.DiscountFactor = $DiscountFactor
            $agent.Gamma = $DiscountFactor
            $agent.EpsilonDecay = $EpsilonDecay
            $agent.MinEpsilon = $MinEpsilon
            
            Write-Verbose "✓ Q-Learning agent created"
            
            # Create experience replay if requested
            if ($UseExperienceReplay) {
                $agent.ExperienceReplay = New-Object ExperienceReplay -ArgumentList $MemorySize
                $agent.MemorySize = $MemorySize
                Write-Verbose "✓ Experience replay enabled (size: $MemorySize)"
            }
            
            Write-Verbose "Agent ready for learning!"
            
            return $agent
            
        } catch {
            Write-Error "Failed to create agent: $_"
            throw
        }
    }
    
    end {
        Write-Verbose "New-VBAFAgent completed"
    }
}