SpeechToText_Example

1.0.0

SpeechToText_Example.ps1

                                
<#PSScriptInfo

.VERSION 1.0.0

.GUID fab4bdf6-b198-4bf4-9eac-87c18122fdac

.AUTHOR mikko@lavento.com

.COMPANYNAME 

.COPYRIGHT 

.TAGS Google, Speech, GoogleCloud, API, Speech-to-text

.LICENSEURI 

.PROJECTURI 

.ICONURI 

.EXTERNALMODULEDEPENDENCIES 

.REQUIREDSCRIPTS 

.EXTERNALSCRIPTDEPENDENCIES 

.RELEASENOTES

#>

<# 

.DESCRIPTION 

 Speech to Text API example using Google's cloud.

 pre-requisites: assumption that these are done: https://cloud.google.com/speech-to-text/docs/quickstart-client-libraries

 API shorter than minute (not used in this example). Strangely experimenting under 30 second audios were accepted, not 30-60sec audios.

 https://speech.googleapis.com/v1/speech

 longer than minute audio file

 https://speech.googleapis.com/v1/speech:longrunningrecognize

 xxxxxxx gcloud commands for debugging reason xxxxxxx

 Problem: Using rest-api via powershell, the errors you get, tells nothing: 400 bad request.

 Debugging with gcloud console gives exact errors like no access to bucket etc.

 list what account you are using

 basically should be the same than you are using in your .json key file

 gcloud auth list

 setting the usable account

 speechtotext@speechtotext-<numbers>.iam.gserviceaccount.com

 gcloud auth activate-service-account speechtotext@speechtotext-<numbers>.iam.gserviceaccount.com --key-file="C:\Skriptit\SpeechToText\speechtotext-<numbers>-72583c2223fe.json"

 Testing the API

 helpful site: https://cloud.google.com/speech-to-text/docs/async-recognize speech-async-recognize-gcs-gcloud

 gcloud ml speech recognize-long-running gs://<yourbucket>_bucket/Test.flac --language-code=en-US --async

 Getting th status of the work

 gcloud ml speech operations describe 591271978XXXX801384

 gcloud ml speech operations wait 1727881XXXX9022087

#> 

Param()

# Speech to Text example using Google's Speech to Text API

# 18.9.2019 M.Lavento

$outputfile = "C:\Skriptit\SpeechToText\outputtext.txt"

#creds for the project and bucket

$env:GOOGLE_APPLICATION_CREDENTIALS="C:\Skriptit\SpeechToText\speechtotext-<numbers>-72583c2223fe.json"

$cred = gcloud auth application-default print-access-token

$headers = @{ Authorization = "Bearer $cred" }

$body = @{

    audio = @{

        uri = 'gs://<your_bucket>/audio.flac'

    }

    config = @{

        languageCode = 'en-US'

    }

}

#Build JSON body for the request

$jbody = ConvertTo-Json ($body)

$result = Invoke-WebRequest -Method Post -Headers $headers -ContentType: "application/json; charset=utf-8" -Body $jbody -Uri "https://speech.googleapis.com/v1/speech:longrunningrecognize" 

Write-Host "Processing.....Webrequest code:" $result.StatusDescription

#We get JSON as an answer

$JobNameFromJSON = $result | Select-Object -Expand Content | ConvertFrom-Json 

#query the status of the job

$jobnumber = $JobNameFromJSON.name

$joburi = "https://speech.googleapis.com/v1/operations/$jobnumber"

#Loop to wait audio being processed

do

{

#wait minute intervals to query the status

Start-Sleep -Seconds 60

$Jobstatus = Invoke-WebRequest -Method Get -Headers $headers -ContentType: "application/json; charset=utf-8" -Uri $joburi

#We get JSON as an answer

$JobStatusFromJSON = $Jobstatus| Select-Object -Expand Content | ConvertFrom-Json

#percent complete

$JobStatusPercentage = $JobStatusFromJSON.metadata.progressPercent

write-host "Job progress percentage: $JobStatusPercentage"

}

until ($JobStatusPercentage -eq "100")

#We are interest about response

$response = $JobStatus | ConvertFrom-Json 

$response.response.results | Out-File $outputfile -Force

Invoke-Item $outputfile