VBAF.Business.CompanyAgent.ps1
|
#Requires -Version 5.1 # VBAF.Business.CompanyAgent.ps1 <# .SYNOPSIS Intelligent company agent with reinforcement learning .DESCRIPTION Company that learns optimal business strategies through Q-Learning and experience replay. .NOTES Part of VBAF Phase 2 - Business Applications Dependencies: QLearningAgent, ExperienceReplay, CompanyState, BusinessAction These must be loaded BEFORE this file is dot-sourced #> class CompanyAgent { # Identity [string]$Name [string]$Industry # State [CompanyState]$State [CompanyState]$PreviousState # Learning [QLearningAgent]$Brain [ExperienceReplay]$Memory [double]$TotalReward [int]$Episode # Actions [BusinessAction[]]$AvailableActions [BusinessAction]$LastAction # Performance Tracking [System.Collections.ArrayList]$RewardHistory [System.Collections.ArrayList]$ProfitHistory [System.Collections.ArrayList]$MarketShareHistory # Constructor CompanyAgent([string]$name, [string]$industry, [double]$startingCapital) { $this.Name = $name $this.Industry = $industry $this.State = New-Object CompanyState -ArgumentList $startingCapital $this.Episode = 0 $this.TotalReward = 0.0 # Initialize available actions $this.AvailableActions = [BusinessAction]::GetAllActions() $actionNames = $this.AvailableActions | ForEach-Object { $_.ToString() } # Create Q-Learning brain # Learning rate: 0.1, Epsilon: 0.3 (30% exploration) $this.Brain = New-Object QLearningAgent -ArgumentList $actionNames, 0.1, 0.3 # Experience replay memory (max 500 experiences) $this.Memory = New-Object ExperienceReplay -ArgumentList 500 # Performance tracking $this.RewardHistory = New-Object System.Collections.ArrayList $this.ProfitHistory = New-Object System.Collections.ArrayList $this.MarketShareHistory = New-Object System.Collections.ArrayList } # Observe current state [CompanyState] ObserveState() { return $this.State } # Decide next action using Q-Learning [BusinessAction] DecideAction() { # Get state representation $stateStr = $this.State.ToStateString() # Agent chooses action (epsilon-greedy) $actionName = $this.Brain.ChooseAction($stateStr) # Find corresponding BusinessAction $action = $this.AvailableActions | Where-Object { $_.ToString() -eq $actionName } | Select-Object -First 1 if ($null -eq $action) { # Fallback: do nothing $action = [BusinessAction]::DoNothing() } $this.LastAction = $action return $action } # Execute action and update state [hashtable] ExecuteAction([BusinessAction]$action) { # Save previous state $this.PreviousState = $this.State.Clone() # Apply action effects $results = $this.ApplyActionEffects($action) # NOTE: Do NOT simulate quarter here - the MarketEnvironment handles that! # Only simulate if this company is running standalone (not in a market) # $this.SimulateQuarter() <-- REMOVE THIS LINE # Calculate reward $reward = $this.CalculateReward($results) return @{ Action = $action Results = $results Reward = $reward NewState = $this.State } } # Apply action effects to state hidden [hashtable] ApplyActionEffects([BusinessAction]$action) { $results = @{ Type = $action.Type Success = $true Message = "" } switch ($action.Type) { "Pricing" { $oldPrice = $this.State.AveragePrice $this.State.AveragePrice = $oldPrice * $action.Parameters.PriceMultiplier $results.Message = "Price changed from `$$($oldPrice) to `$$($this.State.AveragePrice)" } "Investment" { $cost = $action.Parameters.Amount if ($this.State.Cash -ge $cost) { $this.State.Cash -= $cost # Apply investment benefits if ($action.Parameters.ContainsKey('InnovationBoost')) { $this.State.InnovationScore += $action.Parameters.InnovationBoost } if ($action.Parameters.ContainsKey('BrandBoost')) { $this.State.BrandValue *= (1 + $action.Parameters.BrandBoost) } if ($action.Parameters.ContainsKey('CapacityIncrease')) { $this.State.ProductionCapacity *= (1 + $action.Parameters.CapacityIncrease) } $results.Message = "Invested `$$cost in $($action.Name)" } else { $results.Success = $false $results.Message = "Insufficient cash for investment (need `$$cost, have `$$($this.State.Cash))" } } "Operational" { if ($action.Name.StartsWith("Hire")) { $count = $action.Parameters.Count $cost = $action.Parameters.Cost if ($this.State.Cash -ge $cost) { $this.State.EmployeeCount += $count $this.State.Cash -= $cost $this.State.ProductionCapacity *= (1 + $count * 0.05) $results.Message = "Hired $count employees" } else { $results.Success = $false $results.Message = "Cannot afford to hire" } } elseif ($action.Name.StartsWith("Layoff")) { $count = $action.Parameters.Count if ($this.State.EmployeeCount -gt $count) { $this.State.EmployeeCount -= $count $this.State.Cash -= $action.Parameters.SeveranceCost $this.State.ProductionCapacity *= 0.95 $results.Message = "Laid off $count employees" } else { $results.Success = $false $results.Message = "Not enough employees to layoff" } } elseif ($action.Name -eq "Quality_Improve") { if ($this.State.Cash -ge $action.Parameters.Cost) { $this.State.Cash -= $action.Parameters.Cost $this.State.ProductQuality += $action.Parameters.QualityIncrease $this.State.ProductQuality = [Math]::Min($this.State.ProductQuality, 1.0) $results.Message = "Improved quality" } else { $results.Success = $false $results.Message = "Cannot afford quality improvement" } } elseif ($action.Name -eq "Cost_Reduction") { $this.State.Costs *= (1 - $action.Parameters.CostSavings) $this.State.ProductQuality *= (1 + $action.Parameters.QualityImpact) $results.Message = "Reduced costs by $($action.Parameters.CostSavings.ToString('P0'))" } } "Strategic" { if ($action.Name -eq "Market_Expand") { if ($this.State.Cash -ge $action.Parameters.Cost) { $this.State.Cash -= $action.Parameters.Cost # Success is probabilistic $success = (Get-Random -Minimum 0.0 -Maximum 1.0) -gt $action.Parameters.RiskLevel if ($success) { $this.State.MarketShare *= (1 + $action.Parameters.NewCustomerPotential) $results.Message = "Market expansion successful!" } else { $results.Message = "Market expansion failed" $results.Success = $false } } } elseif ($action.Name -eq "Product_Launch") { if ($this.State.Cash -ge $action.Parameters.Cost) { $this.State.Cash -= $action.Parameters.Cost $success = (Get-Random -Minimum 0.0 -Maximum 1.0) -lt $action.Parameters.SuccessProbability if ($success) { $this.State.ProductsInPipeline++ $results.Message = "Product launch successful!" } else { $results.Message = "Product launch failed" $results.Success = $false } } } elseif ($action.Name -eq "Hold_Position") { $results.Message = "Maintaining current strategy" } } } return $results } # Simulate one quarter of business hidden [void] SimulateQuarter() { # Use GDPGrowth from market state (injected by MarketEnvironment) $economyGrowth = if ($this.State.PSObject.Properties['GDPGrowth']) { $this.State.GDPGrowth } else { 0.03 # Default 3% growth if not set } $baseDemand = 1000.0 * (1.0 + $economyGrowth) $baseDemand *= (1.0 + ($this.State.MarketShare * 2.0)) # Market share boost $baseDemand *= (0.5 + ($this.State.CustomerSatisfaction * 0.5)) $baseDemand *= (0.7 + ($this.State.ProductQuality * 0.3)) # Price elasticity (bounded to prevent extreme values) $priceEffect = 1.0 if ($this.State.AveragePrice -gt 100) { $priceDiff = [Math]::Min(($this.State.AveragePrice - 100) / 100, 2.0) $priceEffect = [Math]::Max(1.0 - ($priceDiff * 0.3), 0.3) } else { $priceDiff = [Math]::Min((100 - $this.State.AveragePrice) / 100, 2.0) $priceEffect = [Math]::Min(1.0 + ($priceDiff * 0.2), 2.0) } $demand = [Math]::Max($baseDemand * $priceEffect, 0) # Production constraint $production = [Math]::Min($demand, $this.State.ProductionCapacity) $production = [Math]::Max($production, 0) # Never negative $this.State.ProductsSold = [int]$production # Capacity utilization (bounded 0-1) if ($this.State.ProductionCapacity -gt 0) { $this.State.CapacityUtilization = [Math]::Min($production / $this.State.ProductionCapacity, 1.0) } else { $this.State.CapacityUtilization = 0.0 } # Revenue (always positive or zero) $this.State.Revenue = [Math]::Max($production * $this.State.AveragePrice, 0) # Costs (realistic and bounded) $fixedCosts = [Math]::Max($this.State.EmployeeCount * 12500, 0) # ~$50k/year per employee $variableCosts = [Math]::Max($production * 30, 0) # $30 per unit $this.State.Costs = $fixedCosts + $variableCosts # Profit $this.State.Profit = $this.State.Revenue - $this.State.Costs if ($this.State.Revenue -gt 0) { $this.State.ProfitMargin = $this.State.Profit / $this.State.Revenue } else { $this.State.ProfitMargin = 0.0 } # Update cash $this.State.Cash += $this.State.Profit # Update customer satisfaction (bounded changes) $qualityEffect = [Math]::Max([Math]::Min($this.State.ProductQuality * 0.05, 0.1), -0.1) $priceEffect = if ($this.State.AveragePrice -lt 100) { 0.02 } else { -0.02 } $this.State.CustomerSatisfaction += $qualityEffect + $priceEffect $this.State.CustomerSatisfaction = [Math]::Max(0.1, [Math]::Min(1.0, $this.State.CustomerSatisfaction)) # Update market share (bounded growth) $shareChange = ($this.State.InnovationScore * 0.01) + ($this.State.CustomerSatisfaction * 0.005) $shareChange = [Math]::Max([Math]::Min($shareChange, 0.05), -0.05) # Max 5% change per quarter $this.State.MarketShare += $shareChange $this.State.MarketShare = [Math]::Max(0.0, [Math]::Min(0.5, $this.State.MarketShare)) # Increment time $this.State.Quarter++ if ($this.State.Quarter -gt 4) { $this.State.Quarter = 1 $this.State.Year++ } # Track history $this.ProfitHistory.Add([double]$this.State.Profit) | Out-Null $this.MarketShareHistory.Add([double]$this.State.MarketShare) | Out-Null } # Calculate reward for RL hidden [double] CalculateReward([hashtable]$results) { $reward = 0.0 # Primary: Profit $reward += $this.State.Profit / 10000.0 # Normalize to reasonable scale # Growth if ($null -ne $this.PreviousState) { $growthRate = if ($this.PreviousState.Revenue -gt 0) { ($this.State.Revenue - $this.PreviousState.Revenue) / $this.PreviousState.Revenue } else { 0.0 } $reward += $growthRate * 20.0 } # Market share bonus $reward += $this.State.MarketShare * 10.0 # Customer satisfaction bonus $reward += $this.State.CustomerSatisfaction * 5.0 # Penalty for failed actions if (-not $results.Success) { $reward -= 5.0 } # Penalty for running out of cash if ($this.State.Cash -lt 0) { $reward -= 50.0 } return $reward } # Learn from experience [void] Learn([double]$reward) { $prevStateStr = $this.PreviousState.ToStateString() $currStateStr = $this.State.ToStateString() $actionStr = $this.LastAction.ToString() # Store experience $this.Memory.Add(@{ State = $prevStateStr Action = $actionStr Reward = $reward NextState = $currStateStr }) # Learn from this experience $this.Brain.Learn($prevStateStr, $actionStr, $reward, $currStateStr) # Also learn from a random batch (experience replay) if ($this.Memory.Memory.Count -ge 10) { $batch = $this.Memory.Sample(5) foreach ($exp in $batch) { $this.Brain.Learn($exp.State, $exp.Action, $exp.Reward, $exp.NextState) } } # Track reward $this.TotalReward += $reward $this.RewardHistory.Add([double]$reward) | Out-Null } # Run one episode (one quarter) [hashtable] RunEpisode() { # Decide action $action = $this.DecideAction() # Execute action $results = $this.ExecuteAction($action) # Learn from result $this.Learn($results.Reward) # Decay exploration $this.Brain.DecayEpsilon(0.995) # Increment episode $this.Episode++ return $results } # Get performance summary [hashtable] GetPerformanceSummary() { $avgReward = if ($this.RewardHistory.Count -gt 0) { ($this.RewardHistory | Measure-Object -Average).Average } else { 0.0 } $avgProfit = if ($this.ProfitHistory.Count -gt 0) { ($this.ProfitHistory | Measure-Object -Average).Average } else { 0.0 } return @{ Company = $this.Name Episodes = $this.Episode TotalReward = $this.TotalReward AverageReward = $avgReward CurrentProfit = $this.State.Profit AverageProfit = $avgProfit MarketShare = $this.State.MarketShare Cash = $this.State.Cash Epsilon = $this.Brain.Epsilon } } # Display current state [void] DisplayState() { Write-Host "`n=== $($this.Name) ($($this.Industry)) ===" -ForegroundColor Cyan Write-Host $this.State.ToString() Write-Host "Learning:" -ForegroundColor Yellow Write-Host " Episode: $($this.Episode)" Write-Host " Total Reward: $($this.TotalReward.ToString('F2'))" Write-Host " Exploration (ε): $($this.Brain.Epsilon.ToString('F3'))" } } |