SpeechToText_Example.ps1


<#PSScriptInfo
 
.VERSION 1.0.0
 
.GUID fab4bdf6-b198-4bf4-9eac-87c18122fdac
 
.AUTHOR mikko@lavento.com
 
.COMPANYNAME
 
.COPYRIGHT
 
.TAGS Google, Speech, GoogleCloud, API, Speech-to-text
 
.LICENSEURI
 
.PROJECTURI
 
.ICONURI
 
.EXTERNALMODULEDEPENDENCIES
 
.REQUIREDSCRIPTS
 
.EXTERNALSCRIPTDEPENDENCIES
 
.RELEASENOTES
 
 
#>


<#
 
.DESCRIPTION
 Speech to Text API example using Google's cloud.
 
 pre-requisites: assumption that these are done: https://cloud.google.com/speech-to-text/docs/quickstart-client-libraries
 
 API shorter than minute (not used in this example). Strangely experimenting under 30 second audios were accepted, not 30-60sec audios.
 https://speech.googleapis.com/v1/speech
 
 longer than minute audio file
 https://speech.googleapis.com/v1/speech:longrunningrecognize
 
 
 xxxxxxx gcloud commands for debugging reason xxxxxxx
 Problem: Using rest-api via powershell, the errors you get, tells nothing: 400 bad request.
 Debugging with gcloud console gives exact errors like no access to bucket etc.
  
 list what account you are using
 basically should be the same than you are using in your .json key file
 gcloud auth list
  
 setting the usable account
 speechtotext@speechtotext-<numbers>.iam.gserviceaccount.com
 gcloud auth activate-service-account speechtotext@speechtotext-<numbers>.iam.gserviceaccount.com --key-file="C:\Skriptit\SpeechToText\speechtotext-<numbers>-72583c2223fe.json"
  
 Testing the API
 helpful site: https://cloud.google.com/speech-to-text/docs/async-recognize speech-async-recognize-gcs-gcloud
 gcloud ml speech recognize-long-running gs://<yourbucket>_bucket/Test.flac --language-code=en-US --async
  
 Getting th status of the work
 gcloud ml speech operations describe 591271978XXXX801384
 gcloud ml speech operations wait 1727881XXXX9022087
                            
 
#>
 

Param()


# Speech to Text example using Google's Speech to Text API
# 18.9.2019 M.Lavento


$outputfile = "C:\Skriptit\SpeechToText\outputtext.txt"


#creds for the project and bucket
$env:GOOGLE_APPLICATION_CREDENTIALS="C:\Skriptit\SpeechToText\speechtotext-<numbers>-72583c2223fe.json"

$cred = gcloud auth application-default print-access-token
$headers = @{ Authorization = "Bearer $cred" }

$body = @{
    audio = @{
        uri = 'gs://<your_bucket>/audio.flac'
    }
    config = @{
        languageCode = 'en-US'
    }
    
}

#Build JSON body for the request
$jbody = ConvertTo-Json ($body)

$result = Invoke-WebRequest -Method Post -Headers $headers -ContentType: "application/json; charset=utf-8" -Body $jbody -Uri "https://speech.googleapis.com/v1/speech:longrunningrecognize" 

Write-Host "Processing.....Webrequest code:" $result.StatusDescription
#We get JSON as an answer
$JobNameFromJSON = $result | Select-Object -Expand Content | ConvertFrom-Json 

#query the status of the job
$jobnumber = $JobNameFromJSON.name
$joburi = "https://speech.googleapis.com/v1/operations/$jobnumber"

#Loop to wait audio being processed
do
{
#wait minute intervals to query the status
Start-Sleep -Seconds 60

$Jobstatus = Invoke-WebRequest -Method Get -Headers $headers -ContentType: "application/json; charset=utf-8" -Uri $joburi
#We get JSON as an answer
$JobStatusFromJSON = $Jobstatus| Select-Object -Expand Content | ConvertFrom-Json
#percent complete
$JobStatusPercentage = $JobStatusFromJSON.metadata.progressPercent

write-host "Job progress percentage: $JobStatusPercentage"
}
until ($JobStatusPercentage -eq "100")


#We are interest about response
$response = $JobStatus | ConvertFrom-Json 
$response.response.results | Out-File $outputfile -Force

Invoke-Item $outputfile