VBAF

5.0.0

VBAF.Business.CompanyAgent.ps1

                                #Requires -Version 5.1

# VBAF.Business.CompanyAgent.ps1

<#

.SYNOPSIS

    Intelligent company agent with reinforcement learning brain

.DESCRIPTION

    A company that learns optimal business strategies through Q-Learning.

    Each company is an agent in a multi-agent market simulation.

    WHAT YOU ARE LEARNING HERE:

    ============================

    CompanyAgent is the bridge between abstract RL concepts and a

    concrete business problem. It wraps a QLearningAgent (the "Brain")

    inside a business context with:

      - Company state (cash, market share, profit, employees)

      - Business actions (price, invest, hire, expand)

      - Reward function (profit-based, with growth and satisfaction bonuses)

    AGENT ARCHITECTURE:

    ===================

    CompanyAgent contains:

      Brain:  QLearningAgent -- the learning algorithm

      Memory: ExperienceReplay -- stores past (state, action, reward) tuples

      State:  CompanyState -- the current business situation

      AvailableActions: BusinessAction[] -- what the agent can do

    This is the ADAPTER PATTERN -- wrapping a general-purpose RL agent

    (QLearningAgent) inside a domain-specific shell (CompanyAgent).

    The Brain knows nothing about companies or markets.

    The CompanyAgent knows nothing about Q-tables or epsilon-greedy.

    They communicate through strings (state representations) and numbers (rewards).

    STATE REPRESENTATION:

    =====================

    The QLearningAgent needs states as STRINGS (for Q-table keys).

    CompanyState.ToStateString() converts the company's situation

    into a compact string like "ProfitHigh_ShareMid_CashGood".

    This discretises the continuous business state into categories

    the Q-table can store and look up efficiently.

    REWARD FUNCTION DESIGN:

    =======================

    CalculateReward() defines WHAT the company is trying to optimise.

    Current reward components:

      +profit / 10000    -- primary objective: make money

      +growth * 20       -- bonus for revenue growth quarter-over-quarter

      +market_share * 10 -- bonus for gaining market position

      +satisfaction * 5  -- bonus for customer quality

      -5  if action failed (e.g. not enough cash)

      -50 if cash runs negative (bankruptcy warning)

    This reward function shapes what the agent learns.

    Different reward functions = different company personalities:

    - Remove market share bonus -> company optimises pure profit

    - Increase growth weight -> company becomes aggressive expander

    - Add competitor penalty -> company becomes more defensive

    EXPERIENCE REPLAY IN COMPANY CONTEXT:

    ======================================

    After each action, the agent:

    1. Stores the experience: (prevState, action, reward, newState)

    2. Immediately learns from it (online learning)

    3. Samples 5 random past experiences and learns from those too

    The random batch learning is experience replay -- the same technique

    used in DQN. It prevents the agent overfitting to the most recent

    experience and helps consolidate older lessons.

    QUARTER vs EPISODE:

    ===================

    In the multi-agent market simulation:

      Episode = one quarter of business operations

      MarketEnvironment calls SimulateQuarter() externally

    CompanyAgent.RunEpisode() is used for STANDALONE operation only.

    When running in MarketEnvironment, ExecuteAction() is called directly.

.NOTES

    Part of VBAF (Visual AI & Reinforcement Learning Framework)

    Educational use -- trace one full episode to see RL in a business context.

    Dependencies (must be loaded before this file):

      VBAF.RL.QLearningAgent.ps1

      VBAF.RL.ExperienceReplay.ps1

      VBAF.Business.CompanyState.ps1

      VBAF.Business.BusinessAction.ps1

#>

class CompanyAgent {

    #  Identity 

    [string]$Name

    [string]$Industry

    #  State 

    # Current and previous state -- previous state needed for reward calculation

    # (we need to measure how much things improved since last quarter)

    [CompanyState]$State

    [CompanyState]$PreviousState

    #  The Learning Brain 

    # Brain is a QLearningAgent -- the same class from VBAF.RL.QLearningAgent.ps1

    # It knows nothing about companies -- it just sees state strings and action strings.

    # Memory is an ExperienceReplay buffer -- same class from VBAF.RL.ExperienceReplay.ps1

    [QLearningAgent]$Brain

    [ExperienceReplay]$Memory

    [double]$TotalReward

    [int]$Episode

    #  Actions 

    # Available actions are predefined business moves: price, invest, hire, etc.

    # The Brain selects from these using epsilon-greedy Q-learning.

    [BusinessAction[]]$AvailableActions

    [BusinessAction]$LastAction

    #  Performance tracking 

    [System.Collections.ArrayList]$RewardHistory

    [System.Collections.ArrayList]$ProfitHistory

    [System.Collections.ArrayList]$MarketShareHistory

    # Constructor: create a company with starting capital.

    # Brain uses:

    #   LearningRate = 0.1 -- moderate learning speed

    #   Epsilon = 0.3      -- 30% exploration (less than pure RL -- business context)

    # Memory holds up to 500 past experiences (small -- business simulation is fast)

    CompanyAgent([string]$name, [string]$industry, [double]$startingCapital) {

        $this.Name     = $name

        $this.Industry = $industry

        $this.State    = New-Object CompanyState -ArgumentList $startingCapital

        $this.Episode  = 0

        $this.TotalReward = 0.0

        # Get all available business actions as string names for the Q-table

        $this.AvailableActions = [BusinessAction]::GetAllActions()

        $actionNames = $this.AvailableActions | ForEach-Object { $_.ToString() }

        # Create the Q-learning brain

        # Lower epsilon (0.3) than typical RL -- business agents exploit more

        # because random actions can waste large amounts of money

        $this.Brain  = New-Object QLearningAgent -ArgumentList $actionNames, 0.1, 0.3

        # Experience replay -- small buffer sufficient for business simulation

        $this.Memory = New-Object ExperienceReplay -ArgumentList 500

        $this.RewardHistory     = New-Object System.Collections.ArrayList

        $this.ProfitHistory     = New-Object System.Collections.ArrayList

        $this.MarketShareHistory = New-Object System.Collections.ArrayList

    }

    # Return the current company state.

    [CompanyState] ObserveState() {

        return $this.State

    }

    # DECIDE ACTION: the epsilon-greedy decision step.

    #

    # 1. Convert company state to a string the Q-table understands

    # 2. Brain.ChooseAction() returns the action string (epsilon-greedy)

    # 3. Find the matching BusinessAction object

    #

    # This is where the RL agent "thinks" -- the Q-table lookup happens here.

    # After many quarters, the Q-table learns: in state "ProfitLow_CashGood"

    # the best action is "Investment_RnD" (not "Pricing_Increase").

    [BusinessAction] DecideAction() {

        $stateStr   = $this.State.ToStateString()

        $actionName = $this.Brain.ChooseAction($stateStr)

        $action = $this.AvailableActions |

            Where-Object { $_.ToString() -eq $actionName } |

            Select-Object -First 1

        if ($null -eq $action) { $action = [BusinessAction]::DoNothing() }

        $this.LastAction = $action

        return $action

    }

    # EXECUTE ACTION: apply the chosen action and calculate reward.

    #

    # NOTE: This does NOT simulate the quarter -- that is done by MarketEnvironment.

    # MarketEnvironment.SimulateQuarter() handles market-level interactions

    # (how companies compete for market share, how prices affect demand).

    # CompanyAgent only handles the immediate effects of its own action.

    [hashtable] ExecuteAction([BusinessAction]$action) {

        $this.PreviousState = $this.State.Clone()

        $results = $this.ApplyActionEffects($action)

        $reward  = $this.CalculateReward($results)

        return @{

            Action   = $action

            Results  = $results

            Reward   = $reward

            NewState = $this.State

        }

    }

    # APPLY ACTION EFFECTS: change company state based on chosen action.

    #

    # Each action type has different effects:

    #   Pricing:     change AveragePrice (affects demand and revenue next quarter)

    #   Investment:  spend cash to improve InnovationScore, BrandValue, or Capacity

    #   Operational: hire/layoff employees, improve quality, reduce costs

    #   Strategic:   expand to new markets, launch products (probabilistic success)

    #

    # Actions that require cash check State.Cash before applying.

    # Failed actions (insufficient cash) are recorded in results.Success = false.

    hidden [hashtable] ApplyActionEffects([BusinessAction]$action) {

        $results = @{ Type = $action.Type; Success = $true; Message = "" }

        switch ($action.Type) {

            "Pricing" {

                $oldPrice = $this.State.AveragePrice

                $this.State.AveragePrice = $oldPrice * $action.Parameters.PriceMultiplier

                $results.Message = "Price changed from `$$($oldPrice) to `$$($this.State.AveragePrice)"

            }

            "Investment" {

                $cost = $action.Parameters.Amount

                if ($this.State.Cash -ge $cost) {

                    $this.State.Cash -= $cost

                    if ($action.Parameters.ContainsKey('InnovationBoost'))   { $this.State.InnovationScore    += $action.Parameters.InnovationBoost }

                    if ($action.Parameters.ContainsKey('BrandBoost'))        { $this.State.BrandValue         *= (1 + $action.Parameters.BrandBoost) }

                    if ($action.Parameters.ContainsKey('CapacityIncrease'))  { $this.State.ProductionCapacity *= (1 + $action.Parameters.CapacityIncrease) }

                    $results.Message = "Invested `$$cost in $($action.Name)"

                } else {

                    $results.Success = $false

                    $results.Message = "Insufficient cash (need `$$cost, have `$$($this.State.Cash))"

                }

            }

            "Operational" {

                if ($action.Name.StartsWith("Hire")) {

                    $count = $action.Parameters.Count

                    $cost  = $action.Parameters.Cost

                    if ($this.State.Cash -ge $cost) {

                        $this.State.EmployeeCount        += $count

                        $this.State.Cash                 -= $cost

                        $this.State.ProductionCapacity   *= (1 + $count * 0.05)

                        $results.Message = "Hired $count employees"

                    } else {

                        $results.Success = $false; $results.Message = "Cannot afford to hire"

                    }

                }

                elseif ($action.Name.StartsWith("Layoff")) {

                    $count = $action.Parameters.Count

                    if ($this.State.EmployeeCount -gt $count) {

                        $this.State.EmployeeCount      -= $count

                        $this.State.Cash               -= $action.Parameters.SeveranceCost

                        $this.State.ProductionCapacity *= 0.95

                        $results.Message = "Laid off $count employees"

                    } else {

                        $results.Success = $false; $results.Message = "Not enough employees to lay off"

                    }

                }

                elseif ($action.Name -eq "Quality_Improve") {

                    if ($this.State.Cash -ge $action.Parameters.Cost) {

                        $this.State.Cash            -= $action.Parameters.Cost

                        $this.State.ProductQuality  += $action.Parameters.QualityIncrease

                        $this.State.ProductQuality   = [Math]::Min($this.State.ProductQuality, 1.0)

                        $results.Message = "Improved product quality"

                    } else {

                        $results.Success = $false; $results.Message = "Cannot afford quality improvement"

                    }

                }

                elseif ($action.Name -eq "Cost_Reduction") {

                    $this.State.Costs          *= (1 - $action.Parameters.CostSavings)

                    $this.State.ProductQuality *= (1 + $action.Parameters.QualityImpact)

                    $results.Message = "Reduced costs by $($action.Parameters.CostSavings.ToString('P0'))"

                }

            }

            "Strategic" {

                if ($action.Name -eq "Market_Expand") {

                    if ($this.State.Cash -ge $action.Parameters.Cost) {

                        $this.State.Cash -= $action.Parameters.Cost

                        # Market expansion is PROBABILISTIC -- reflects real business uncertainty

                        $success = (Get-Random -Minimum 0.0 -Maximum 1.0) -gt $action.Parameters.RiskLevel

                        if ($success) {

                            $this.State.MarketShare *= (1 + $action.Parameters.NewCustomerPotential)

                            $results.Message = "Market expansion successful!"

                        } else {

                            $results.Success = $false; $results.Message = "Market expansion failed (bad luck)"

                        }

                    }

                }

                elseif ($action.Name -eq "Product_Launch") {

                    if ($this.State.Cash -ge $action.Parameters.Cost) {

                        $this.State.Cash -= $action.Parameters.Cost

                        $success = (Get-Random -Minimum 0.0 -Maximum 1.0) -lt $action.Parameters.SuccessProbability

                        if ($success) {

                            $this.State.ProductsInPipeline++

                            $results.Message = "Product launch successful!"

                        } else {

                            $results.Success = $false; $results.Message = "Product launch failed"

                        }

                    }

                }

                elseif ($action.Name -eq "Hold_Position") {

                    $results.Message = "Maintaining current strategy"

                }

            }

        }

        return $results

    }

    # SIMULATE QUARTER: compute one quarter of business results.

    #

    # Called by MarketEnvironment after all companies have chosen actions.

    # Economics modelled:

    #   Demand:  base demand adjusted for market share, price, quality, satisfaction

    #   Supply:  capped at ProductionCapacity

    #   Revenue: units sold * price

    #   Costs:   fixed (employees) + variable (per unit) costs

    #   Profit:  revenue - costs

    #

    # PRICE ELASTICITY:

    # Price above 100 -> demand falls (customers buy less at higher prices)

    # Price below 100 -> demand rises (price attracts more customers)

    # Capped to prevent extreme values from destabilising the simulation.

    hidden [void] SimulateQuarter() {

        $economyGrowth = if ($this.State.PSObject.Properties['GDPGrowth']) { $this.State.GDPGrowth } else { 0.03 }

        $baseDemand  = 1000.0 * (1.0 + $economyGrowth)

        $baseDemand *= (1.0 + ($this.State.MarketShare * 2.0))

        $baseDemand *= (0.5 + ($this.State.CustomerSatisfaction * 0.5))

        $baseDemand *= (0.7 + ($this.State.ProductQuality * 0.3))

        # Price elasticity -- higher price reduces demand, lower price increases it

        $priceEffect = 1.0

        if ($this.State.AveragePrice -gt 100) {

            $priceDiff   = [Math]::Min(($this.State.AveragePrice - 100) / 100, 2.0)

            $priceEffect = [Math]::Max(1.0 - ($priceDiff * 0.3), 0.3)

        } else {

            $priceDiff   = [Math]::Min((100 - $this.State.AveragePrice) / 100, 2.0)

            $priceEffect = [Math]::Min(1.0 + ($priceDiff * 0.2), 2.0)

        }

        $demand                      = [Math]::Max($baseDemand * $priceEffect, 0)

        $production                  = [Math]::Max([Math]::Min($demand, $this.State.ProductionCapacity), 0)

        $this.State.ProductsSold     = [int]$production

        $this.State.CapacityUtilization = if ($this.State.ProductionCapacity -gt 0) {

            [Math]::Min($production / $this.State.ProductionCapacity, 1.0)

        } else { 0.0 }

        $this.State.Revenue          = [Math]::Max($production * $this.State.AveragePrice, 0)

        $fixedCosts                  = [Math]::Max($this.State.EmployeeCount * 12500, 0)

        $variableCosts               = [Math]::Max($production * 30, 0)

        $this.State.Costs            = $fixedCosts + $variableCosts

        $this.State.Profit           = $this.State.Revenue - $this.State.Costs

        $this.State.ProfitMargin     = if ($this.State.Revenue -gt 0) { $this.State.Profit / $this.State.Revenue } else { 0.0 }

        $this.State.Cash            += $this.State.Profit

        # Customer satisfaction drifts based on quality and price

        $qualityEffect = [Math]::Max([Math]::Min($this.State.ProductQuality * 0.05, 0.1), -0.1)

        $priceEff      = if ($this.State.AveragePrice -lt 100) { 0.02 } else { -0.02 }

        $this.State.CustomerSatisfaction += $qualityEffect + $priceEff

        $this.State.CustomerSatisfaction  = [Math]::Max(0.1, [Math]::Min(1.0, $this.State.CustomerSatisfaction))

        # Market share drifts based on innovation and satisfaction

        $shareChange = ($this.State.InnovationScore * 0.01) + ($this.State.CustomerSatisfaction * 0.005)

        $shareChange = [Math]::Max([Math]::Min($shareChange, 0.05), -0.05)

        $this.State.MarketShare = [Math]::Max(0.0, [Math]::Min(0.5, $this.State.MarketShare + $shareChange))

        $this.State.Quarter++

        if ($this.State.Quarter -gt 4) { $this.State.Quarter = 1; $this.State.Year++ }

        $this.ProfitHistory.Add([double]$this.State.Profit) | Out-Null

        $this.MarketShareHistory.Add([double]$this.State.MarketShare) | Out-Null

    }

    # CALCULATE REWARD: how good was this quarter for the company

    #

    # The reward function defines what "success" means.

    # Components:

    #   profit / 10000     -- normalise profit to a reasonable scale

    #   growth * 20        -- reward increasing revenue quarter-over-quarter

    #   market_share * 10  -- reward gaining market position

    #   satisfaction * 5   -- reward keeping customers happy

    #   -5 if action failed -- penalise wasted moves (e.g. insufficient cash)

    #   -50 if cash < 0    -- strong penalty for approaching bankruptcy

    #

    # Experimenting with reward weights changes company behaviour:

    # Increase growth weight -> more aggressive expansion

    # Reduce market share weight -> focus on profit over growth

    hidden [double] CalculateReward([hashtable]$results) {

        $reward = 0.0

        $reward += $this.State.Profit / 10000.0

        if ($null -ne $this.PreviousState) {

            $growthRate = if ($this.PreviousState.Revenue -gt 0) {

                ($this.State.Revenue - $this.PreviousState.Revenue) / $this.PreviousState.Revenue

            } else { 0.0 }

            $reward += $growthRate * 20.0

        }

        $reward += $this.State.MarketShare * 10.0

        $reward += $this.State.CustomerSatisfaction * 5.0

        if (-not $results.Success) { $reward -= 5.0 }    # Penalise failed actions

        if ($this.State.Cash -lt 0) { $reward -= 50.0 }  # Penalise near-bankruptcy

        return $reward

    }

    # LEARN: update the Q-table from this quarter's experience.

    #

    # Two types of learning happen here:

    #   1. Immediate learning: learn from the just-completed (state, action, reward) tuple

    #   2. Experience replay: sample 5 random past experiences and learn from those too

    #

    # Experience replay prevents the agent forgetting older strategies

    # and helps consolidate learning across many quarters.

    [void] Learn([double]$reward) {

        $prevStateStr = $this.PreviousState.ToStateString()

        $currStateStr = $this.State.ToStateString()

        $actionStr    = $this.LastAction.ToString()

        # Store this experience for future replay

        $this.Memory.Add(@{

            State     = $prevStateStr

            Action    = $actionStr

            Reward    = $reward

            NextState = $currStateStr

        })

        # Immediate learning from this experience

        $this.Brain.Learn($prevStateStr, $actionStr, $reward, $currStateStr)

        # Experience replay -- learn from 5 random past experiences

        if ($this.Memory.Memory.Count -ge 10) {

            $batch = $this.Memory.Sample(5)

            foreach ($exp in $batch) {

                $this.Brain.Learn($exp.State, $exp.Action, $exp.Reward, $exp.NextState)

            }

        }

        $this.TotalReward += $reward

        $this.RewardHistory.Add([double]$reward) | Out-Null

    }

    # RUN EPISODE: one full quarter cycle for standalone operation.

    # In multi-agent mode (MarketEnvironment), this is NOT called --

    # instead, ExecuteAction() is called directly by MarketEnvironment.

    [hashtable] RunEpisode() {

        $action  = $this.DecideAction()

        $results = $this.ExecuteAction($action)

        $this.Learn($results.Reward)

        $this.Brain.DecayEpsilon(0.995)

        $this.Episode++

        return $results

    }

    [hashtable] GetPerformanceSummary() {

        $avgReward = if ($this.RewardHistory.Count  -gt 0) { ($this.RewardHistory  | Measure-Object -Average).Average } else { 0.0 }

        $avgProfit = if ($this.ProfitHistory.Count  -gt 0) { ($this.ProfitHistory  | Measure-Object -Average).Average } else { 0.0 }

        return @{

            Company       = $this.Name

            Episodes      = $this.Episode

            TotalReward   = $this.TotalReward

            AverageReward = $avgReward

            CurrentProfit = $this.State.Profit

            AverageProfit = $avgProfit

            MarketShare   = $this.State.MarketShare

            Cash          = $this.State.Cash

            Epsilon       = $this.Brain.Epsilon

        }

    }

    [void] DisplayState() {

        Write-Host ""

        Write-Host "  === $($this.Name) ($($this.Industry)) ===" -ForegroundColor Cyan

        Write-Host $this.State.ToString()

        Write-Host "  Learning:" -ForegroundColor Yellow

        Write-Host "    Episode    : $($this.Episode)"

        Write-Host "    TotalReward: $($this.TotalReward.ToString('F2'))"

        Write-Host "    Epsilon    : $($this.Brain.Epsilon.ToString('F3'))"

    }

}