PSSpeech.psm1


function Get-SpeechToken {
    [CmdletBinding()]
    param (
        [Parameter()]
        [ValidateSet("westeurope","northeurope")]
        $Region = "westeurope",
        [Parameter(Mandatory)]
        [ValidateNotNullOrEmpty()]
        $Key 
    )
    $FetchTokenHeader = @{
        'Content-type'='application/x-www-form-urlencoded';
        'Content-Length'= '0';
        'Ocp-Apim-Subscription-Key' = $Key
    } 
    New-Object -TypeName psobject -Property (@{
        TimeStamp = Get-DAte
        Token = Invoke-RestMethod -Method POST -Uri "https://$region.api.cognitive.microsoft.com/sts/v1.0/issueToken" -Headers $FetchTokenHeader
    })
}

function Save-SpeechToken {
    [CmdletBinding()]
    param (
        [Parameter(ValueFromPipeline)]
        $Token    
    )
    Set-Variable -Scope global -Name PSSpeechToken -Value $token
}

function Get-SpeechVoicesList {
    [CmdletBinding()]
    param (
        [ValidateSet("westeurope","northeurope")]
        $Region = "westeurope",
        $Token = $Global:PSSpeechToken
    )
    $AuthHeader = @{
        'Content-type' = 'application/ssml+xml';
        'Authorization' = "Bearer $($Token.Token)";
        'Content-Length'= '0';
    }   
    Invoke-RestMethod -uri "https://$region.tts.speech.microsoft.com/cognitiveservices/voices/list" -Headers $AuthHeader -Method Get
}

function Convert-TextToSpeech {
    [CmdletBinding()]
    param (
        [Parameter()]
        [ValidateSet("westeurope","northeurope")]
        $Region = "westeurope",
        [Parameter()]
        $Token = $Global:PSSpeechToken,
        [Parameter()]
        [ValidateNotNullOrEmpty()]
        [String]
        $Text,
        [Parameter()]
        [System.IO.FileInfo]
        $Path,
        [Parameter()]
        [ValidateSet('en-US-GuyNeural','en-US-JessaNeural','zh-CN-XiaoxiaoNeural','it-IT-ElsaNeural','de-DE-KatjaNeural','en-GB-HarryNeural','fr-FR-HortenseNeural','pt-BR-FranciscaNeural')]
        $Voice = 'en-GB-HarryNeural',
        [Parameter()]
        [ValidateSet("raw-16khz-16bit-mono-pcm","audio-16khz-128kbitrate-mono-mp3","audio-16khz-32kbitrate-mono-mp3","audio-24khz-96kbitrate-mono-mp3","audio-24khz-48kbitrate-mono-mp3","audio-24khz-160kbitrate-mono-mp3","audio-16khz-64kbitrate-mono-mp3")]
        $OutputFormat = "audio-16khz-32kbitrate-mono-mp3"
    )
    $AuthHeader = @{
        'Content-type' = 'application/ssml+xml'
        'Authorization' = "Bearer $($Token.Token)"
        'X-Microsoft-OutputFormat' = $OutputFormat
        'User-Agent' = "powershell"
    }
    # build the ssml xml
    [xml]$xml = "<speak version='1.0' xml:lang='en-GB'><voice xml:lang='en-GB' xml:gender='Female' name='$Voice'>$Text</voice></speak>"
    # send to speech service and save output in file
    Invoke-RestMethod -Uri "https://$region.tts.speech.microsoft.com/cognitiveservices/v1" -Headers $AuthHeader -Method Post -Body $xml -OutFile $Path
}