AI/MiMoTTS.psm1

# MiMo-V2.5-TTS Integration Module for MiMo CLI
# Provides text-to-speech synthesis using MiMo-V2.5-TTS model

function Invoke-MiMoTTS {
    [CmdletBinding()]
    param(
        [Parameter(Mandatory=$true)]
        [string]$Text,
        
        [string]$Voice = "mimo_default",
        [string]$OutputFormat = "wav",
        [string]$ApiKey = $env:MIMO_API_KEY,
        [string]$BaseUrl = "https://api.xiaomimimo.com/v1"
    )
    
    if (-not $ApiKey) {
        Write-Error "MiMo API key not found. Set MIMO_API_KEY environment variable."
        return $null
    }
    
    # MiMo-V2.5-TTS uses the chat completions endpoint
    $url = "$BaseUrl/chat/completions"
    $headers = @{
        "Authorization" = "Bearer $ApiKey"
        "Content-Type" = "application/json"
    }
    
    # According to documentation, text should be in assistant message
    $body = @{
        model = "mimo-v2.5-tts"
        messages = @(
            @{
                role = "assistant"
                content = $Text
            }
        )
        audio = @{
            format = $OutputFormat
            voice = $Voice
        }
    } | ConvertTo-Json -Depth 10
    
    try {
        $response = Invoke-RestMethod -Uri $url -Method Post -Headers $headers -Body $body
        # Return the audio data (base64 encoded)
        return $response.choices[0].message.audio.data
    }
    catch {
        Write-Error "Failed to call MiMo TTS API: $_"
        return $null
    }
}

function Save-MiMoTTSAudio {
    [CmdletBinding()]
    param(
        [Parameter(Mandatory=$true)]
        [string]$Text,
        
        [Parameter(Mandatory=$true)]
        [string]$OutputPath,
        
        [string]$Voice = "mimo_default",
        [string]$OutputFormat = "wav",
        [string]$ApiKey = $env:MIMO_API_KEY,
        [string]$BaseUrl = "https://api.xiaomimimo.com/v1"
    )
    
    Write-Host "Generating speech for: $Text" -ForegroundColor Cyan
    
    # Get audio data (base64 encoded)
    $audioData = Invoke-MiMoTTS -Text $Text -Voice $Voice -OutputFormat $OutputFormat -ApiKey $ApiKey -BaseUrl $BaseUrl
    
    if ($audioData) {
        # Decode base64 audio data
        $audioBytes = [System.Convert]::FromBase64String($audioData)
        
        # Save to file
        [System.IO.File]::WriteAllBytes($OutputPath, $audioBytes)
        
        Write-Host "Audio saved to: $OutputPath" -ForegroundColor Green
        return $true
    }
    else {
        Write-Host "Failed to generate speech" -ForegroundColor Red
        return $false
    }
}

function Get-MiMoTTSVoices {
    [CmdletBinding()]
    param(
        [string]$ApiKey = $env:MIMO_API_KEY,
        [string]$BaseUrl = "https://api.xiaomimimo.com/v1"
    )
    
    # According to documentation, available voices are:
    # mimo_default, mimo_default_v2, mimo_female_v1, mimo_male_v1
    $voices = @(
        @{
            Name = "mimo_default"
            Description = "Default voice"
            Language = "en-US"
        },
        @{
            Name = "mimo_default_v2"
            Description = "Default voice v2"
            Language = "en-US"
        },
        @{
            Name = "mimo_female_v1"
            Description = "Female voice"
            Language = "en-US"
        },
        @{
            Name = "mimo_male_v1"
            Description = "Male voice"
            Language = "en-US"
        }
    )
    
    return $voices
}

# Export functions
Export-ModuleMember -Function Invoke-MiMoTTS, Save-MiMoTTSAudio, Get-MiMoTTSVoices