Private/VoiceInput.ps1
<#
.SYNOPSIS Speech-to-Text helper (Azure Speech preferred; falls back to Windows offline engine). .DESCRIPTION Provides a simple function Invoke-PSCopilotVoiceInput that: 1. If Azure Speech credentials (env vars AZ_SPEECH_KEY + AZ_SPEECH_REGION) are present and a WAV/PCM file path is passed, sends it to Azure Speech REST API and returns the transcript. 2. If -UseMicrophone is specified, attempts a quick one-shot microphone capture to a temp WAV (requires ffmpeg.exe on PATH OR Windows SoundRecorder fallback) then transcribes. 3. If Azure creds not present, falls back to the legacy System.Speech.Recognition API (Windows only) for a short dictation (English locale assumed) when -UseMicrophone. This is intentionally lightweight and not a full streaming implementation. Improve as needed. .NOTES For Azure Speech: Set-Item Env:AZ_SPEECH_KEY "<your key>" Set-Item Env:AZ_SPEECH_REGION "<region>" # e.g. eastus Optional config extension: you can also store these in the JSON config if you extend Config.ps1. #> function Invoke-PSCopilotVoiceInput { [CmdletBinding()] param( [Parameter(Position=0)] [string] $AudioPath, [switch] $UseMicrophone, [int] $Seconds = 5, [ValidateSet('azure','local','auto')] [string] $Engine = 'auto' ) if ($UseMicrophone -and -not $AudioPath) { $AudioPath = Join-Path $env:TEMP ("pscopilot_voice_" + [guid]::NewGuid().ToString() + ".wav") Write-Verbose "Capturing microphone to $AudioPath for $Seconds second(s)..." if (Get-Command ffmpeg -ErrorAction SilentlyContinue) { # Capture default input (Windows). Adjust as needed for specific devices. # Uses dshow; if that fails, user must configure. $device = 'audio="virtual-audio-capturer"' try { ffmpeg -y -f dshow -i $device -t $Seconds -ac 1 -ar 16000 -acodec pcm_s16le $AudioPath 2>$null | Out-Null } catch { Write-Verbose "ffmpeg capture failed: $_" } if (-not (Test-Path $AudioPath)) { Write-Warning "ffmpeg didn't produce audio. Falling back to System.Speech capture." } } if (-not (Test-Path $AudioPath)) { try { Add-Type -AssemblyName System.Speech -ErrorAction Stop $rec = New-Object System.Speech.Recognition.SpeechRecognitionEngine $rec.SetInputToDefaultAudioDevice() $rec.LoadGrammar([System.Speech.Recognition.DictationGrammar]::new()) $rec.RecognizeAsyncStop() $rec.RecognizeAsyncCancel() $rec.RecognizeAsync([System.Speech.Recognition.RecognizeMode]::Single) Write-Host "Speak now..." -ForegroundColor Cyan $result = $rec.Recognize() if ($result) { return $result.Text } else { return $null } } catch { Write-Error "Local recognition failed: $_"; return } } } # Decide engine $haveAzure = $env:AZ_SPEECH_KEY -and $env:AZ_SPEECH_REGION if ($Engine -eq 'azure' -or ($Engine -eq 'auto' -and $haveAzure)) { if (-not $AudioPath) { Write-Error "AudioPath required for Azure STT (or use -UseMicrophone)."; return } if (-not (Test-Path $AudioPath)) { Write-Error "Audio file not found: $AudioPath"; return } try { $bytes = [IO.File]::ReadAllBytes($AudioPath) $endpoint = "https://$($env:AZ_SPEECH_REGION).stt.speech.microsoft.com/speech/recognition/conversation/cognitiveservices/v1?language=en-US" $headers = @{ 'Ocp-Apim-Subscription-Key' = $env:AZ_SPEECH_KEY; 'Content-Type' = 'audio/wav; codecs=audio/pcm; samplerate=16000' } $resp = Invoke-RestMethod -Uri $endpoint -Method POST -Headers $headers -Body $bytes -ErrorAction Stop if ($resp.DisplayText) { return $resp.DisplayText } if ($resp.RecognitionStatus) { Write-Verbose ($resp | ConvertTo-Json -Depth 5) } return $null } catch { Write-Error "Azure STT failed: $_"; return } } else { if (-not $UseMicrophone) { Write-Error "Local engine only supports -UseMicrophone currently."; return } # We already handled local path capture earlier (System.Speech) so if we get here no result return $null } } Export-ModuleMember -Function Invoke-PSCopilotVoiceInput |