VBAF.RL.Example-CastleLearning.ps1

#Requires -Version 5.1

<#
.SYNOPSIS
    Q-Learning Castle Agent - Training Demo
.DESCRIPTION
    Demonstrates Q-Learning agent learning to generate castles.
    Shows exploration/exploitation balance and reward improvement.
#>


$basePath = $PSScriptRoot

# Load dependencies
. "$basePath\VBAF.RL.QTable.ps1"
. "$basePath\VBAF.RL.ExperienceReplay.ps1"
. "$basePath\VBAF.RL.QLearningAgent.ps1"

Write-Host "`n+----------------------------------------------+" -ForegroundColor Cyan
Write-Host "� Q-LEARNING CASTLE AGENT - TRAINING DEMO �" -ForegroundColor Cyan
Write-Host "+----------------------------------------------+" -ForegroundColor Cyan

$castleTypes = @(
    "Gothic", "FairyTale", "Fortress", "Palace",
    "Wizard", "Cathedral", "Oriental", "Ruins"
)

Write-Host "`nAvailable Castle Types:" -ForegroundColor Yellow
foreach ($type in $castleTypes) {
    Write-Host " � $type"
}

Write-Host "`nCreating Q-Learning Agent..." -ForegroundColor Yellow
$agent = New-Object QLearningAgent -ArgumentList @(,$castleTypes)

Write-Host " Alpha (learning rate): $($agent.Alpha)"
Write-Host " Gamma (discount): $($agent.Gamma)"
Write-Host " Epsilon (exploration): $($agent.Epsilon)"

$episodes = 100
$stepsPerEpisode = 10

Write-Host "`nTraining Configuration:" -ForegroundColor Yellow
Write-Host " Episodes: $episodes"
Write-Host " Steps per episode: $stepsPerEpisode"
Write-Host " Total interactions: $($episodes * $stepsPerEpisode)"

$recentCastles = New-Object System.Collections.ArrayList

Write-Host "`n" + ("-" * 60) -ForegroundColor Cyan
Write-Host "TRAINING IN PROGRESS" -ForegroundColor Cyan
Write-Host ("-" * 60) -ForegroundColor Cyan
Write-Host ""

for ($ep = 1; $ep -le $episodes; $ep++) {
    $episodeReward = 0.0
    
    for ($step = 1; $step -le $stepsPerEpisode; $step++) {
        $context = @{ RecentTypes = $recentCastles }
        $state = $agent.GetState($context)
        
        $action = $agent.ChooseAction($state)
        
        $isVaried = ($recentCastles.Count -eq 0) -or ($recentCastles[-1] -ne $action)
        $visualBalance = Get-Random -Minimum 0.0 -Maximum 1.0
        $engagement = Get-Random -Minimum 0.0 -Maximum 1.0
        
        $outcome = @{
            CastleType = $action
            IsVaried = $isVaried
            VisualBalance = $visualBalance
            Engagement = $engagement
        }
        
        $reward = $agent.CalculateReward($outcome)
        $episodeReward += $reward
        
        $recentCastles.Add($action) | Out-Null
        if ($recentCastles.Count -gt 5) {
            $recentCastles.RemoveAt(0)
        }
        
        $nextContext = @{ RecentTypes = $recentCastles }
        $nextState = $agent.GetState($nextContext)
        
        $agent.Learn($state, $action, $reward, $nextState)
    }
    
    $agent.EndEpisode($episodeReward)
    
    if ($ep % 10 -eq 0 -or $ep -eq 1 -or $ep -eq $episodes) {
        $stats = $agent.GetStats()
        $exploitPct = (1.0 - $stats.ExplorationRatio) * 100
        
        Write-Host ("Episode {0,3} | Reward: {1,6:F2} | Epsilon: {2:F3} | Exploit: {3,5:F1}% | Q-Table: {4,3} entries" -f `
            $ep, $episodeReward, $stats.Epsilon, $exploitPct, $stats.QTableSize)
    }
}

Write-Host "`n? Training complete!" -ForegroundColor Green

Write-Host "`n" + ("-" * 60) -ForegroundColor Cyan
Write-Host "FINAL RESULTS" -ForegroundColor Cyan
Write-Host ("-" * 60) -ForegroundColor Cyan

$finalStats = $agent.GetStats()

Write-Host "`nLearning Progress:" -ForegroundColor Yellow
Write-Host " Total Episodes: $($finalStats.Episode)"
Write-Host " Total Reward: $($finalStats.TotalReward.ToString('F2'))"
Write-Host " Average Reward: $($finalStats.AverageReward.ToString('F2'))"
Write-Host " Recent Average (last 10): $($finalStats.RecentAverageReward.ToString('F2'))"

Write-Host "`nExploration vs Exploitation:" -ForegroundColor Yellow
Write-Host " Explorations: $($finalStats.ExplorationCount)"
Write-Host " Exploitations: $($finalStats.ExploitationCount)"
Write-Host " Final Epsilon: $($finalStats.Epsilon.ToString('F3'))"

Write-Host "`nKnowledge Base:" -ForegroundColor Yellow
Write-Host " Q-Table Entries: $($finalStats.QTableSize)"
Write-Host " Experiences Stored: $($finalStats.MemorySize)"

# FIX 1: Show Q-values for ACTUAL states used
Write-Host "`nLearned Q-Values BY STATE:" -ForegroundColor Yellow
Write-Host "(State = number of recent castles shown)" -ForegroundColor Gray

$statesFound = @()

for ($stateNum = 0; $stateNum -le 5; $stateNum++) {
    $qValues = $agent.GetQValues("$stateNum")
    
    # Check if this state has any non-zero values
    $hasLearning = $false
    foreach ($val in $qValues.Values) {
        if ($val -ne 0) {
            $hasLearning = $true
            break
        }
    }
    
    if ($hasLearning) {
        $statesFound += $stateNum
        Write-Host "`n State '$stateNum':" -ForegroundColor Cyan
        
        $sorted = $qValues.GetEnumerator() | Sort-Object Value -Descending
        
        foreach ($item in $sorted) {
            if ($item.Value -ne 0) {
                $color = if ($item.Value -gt 0) { "Green" } 
                         elseif ($item.Value -lt 0) { "Red" } 
                         else { "Gray" }
                
                Write-Host (" {0,-15} {1,8:F4}" -f $item.Key, $item.Value) -ForegroundColor $color
            }
        }
    }
}

if ($statesFound.Count -eq 0) {
    Write-Host "`n ? No learning detected in any state!" -ForegroundColor Red
    Write-Host " This suggests the Q-Learning update isn't working." -ForegroundColor Red
} else {
    Write-Host "`n? Learning detected in states: $($statesFound -join ', ')" -ForegroundColor Green
}

if ($finalStats.RecentAverageReward -gt $finalStats.AverageReward) {
    Write-Host "`n?? Agent is IMPROVING! Recent rewards higher than average!" -ForegroundColor Green
} else {
    Write-Host "`n? Agent performance stable" -ForegroundColor Yellow
}

Write-Host ""