PwshCopilot

1.2.1

Private/VoiceInput.ps1

                                <#

.SYNOPSIS

    Speech-to-Text helper (Azure Speech preferred; falls back to Windows offline engine).

.DESCRIPTION

    Provides a simple function Invoke-PSCopilotVoiceInput that:

      1. If Azure Speech credentials (env vars AZ_SPEECH_KEY + AZ_SPEECH_REGION) are present and a WAV/PCM file path is passed, sends it to Azure Speech REST API and returns the transcript.

      2. If -UseMicrophone is specified, attempts a quick one-shot microphone capture to a temp WAV (requires ffmpeg.exe on PATH OR Windows SoundRecorder fallback) then transcribes.

      3. If Azure creds not present, falls back to the legacy System.Speech.Recognition API (Windows only) for a short dictation (English locale assumed) when -UseMicrophone.

    This is intentionally lightweight and not a full streaming implementation. Improve as needed.

.NOTES

    For Azure Speech:

      Set-Item Env:AZ_SPEECH_KEY    "<your key>"

      Set-Item Env:AZ_SPEECH_REGION "<region>"   # e.g. eastus

    Optional config extension: you can also store these in the JSON config if you extend Config.ps1.

#>

function Invoke-PSCopilotVoiceInput {

    [CmdletBinding()]

    param(

        [Parameter(Position=0)] [string] $AudioPath,

        [switch] $UseMicrophone,

        [int] $Seconds = 5,

        [ValidateSet('azure','local','auto')] [string] $Engine = 'auto'

    )

    if ($UseMicrophone -and -not $AudioPath) {

        $AudioPath = Join-Path $env:TEMP ("pscopilot_voice_" + [guid]::NewGuid().ToString() + ".wav")

        Write-Verbose "Capturing microphone to $AudioPath for $Seconds second(s)..."

        if (Get-Command ffmpeg -ErrorAction SilentlyContinue) {

            # Capture default input (Windows). Adjust as needed for specific devices.

            # Uses dshow; if that fails, user must configure.

            $device = 'audio="virtual-audio-capturer"'

            try {

                ffmpeg -y -f dshow -i $device -t $Seconds -ac 1 -ar 16000 -acodec pcm_s16le $AudioPath 2>$null | Out-Null

            } catch { Write-Verbose "ffmpeg capture failed: $_" }

            if (-not (Test-Path $AudioPath)) { Write-Warning "ffmpeg didn't produce audio. Falling back to System.Speech capture." }

        }

        if (-not (Test-Path $AudioPath)) {

            try {

                Add-Type -AssemblyName System.Speech -ErrorAction Stop

                $rec = New-Object System.Speech.Recognition.SpeechRecognitionEngine

                $rec.SetInputToDefaultAudioDevice()

                $rec.LoadGrammar([System.Speech.Recognition.DictationGrammar]::new())

                $rec.RecognizeAsyncStop()

                $rec.RecognizeAsyncCancel()

                $rec.RecognizeAsync([System.Speech.Recognition.RecognizeMode]::Single)

                Write-Host "Speak now..." -ForegroundColor Cyan

                $result = $rec.Recognize()

                if ($result) { return $result.Text }

                else { return $null }

            } catch { Write-Error "Local recognition failed: $_"; return }

        }

    }

    # Decide engine

    $haveAzure = $env:AZ_SPEECH_KEY -and $env:AZ_SPEECH_REGION

    if ($Engine -eq 'azure' -or ($Engine -eq 'auto' -and $haveAzure)) {

        if (-not $AudioPath) { Write-Error "AudioPath required for Azure STT (or use -UseMicrophone)."; return }

        if (-not (Test-Path $AudioPath)) { Write-Error "Audio file not found: $AudioPath"; return }

        try {

            $bytes = [IO.File]::ReadAllBytes($AudioPath)

            $endpoint = "https://$($env:AZ_SPEECH_REGION).stt.speech.microsoft.com/speech/recognition/conversation/cognitiveservices/v1?language=en-US"

            $headers = @{ 'Ocp-Apim-Subscription-Key' = $env:AZ_SPEECH_KEY; 'Content-Type' = 'audio/wav; codecs=audio/pcm; samplerate=16000' }

            $resp = Invoke-RestMethod -Uri $endpoint -Method POST -Headers $headers -Body $bytes -ErrorAction Stop

            if ($resp.DisplayText) { return $resp.DisplayText }

            if ($resp.RecognitionStatus) { Write-Verbose ($resp | ConvertTo-Json -Depth 5) }

            return $null

        } catch { Write-Error "Azure STT failed: $_"; return }

    }

    else {

        if (-not $UseMicrophone) { Write-Error "Local engine only supports -UseMicrophone currently."; return }

        # We already handled local path capture earlier (System.Speech) so if we get here no result

        return $null

    }

}

Export-ModuleMember -Function Invoke-PSCopilotVoiceInput