google-cloud-playground/src/main.rs

use std::io::Cursor;
use bytes::BytesMut;
use std::path::PathBuf;
use std::pin::Pin;
use google_api_proto::google::cloud::speech::v1::streaming_recognize_request::StreamingRequest;
use google_api_proto::google::cloud::speech::v1::{
    recognition_config::AudioEncoding, speech_client::SpeechClient, RecognitionConfig,
    StreamingRecognitionConfig, StreamingRecognizeRequest,
};
use google_api_proto::google::cloud::translation::v3::{TranslateTextRequest, Translation};
use google_api_proto::google::cloud::translation::v3::translation_service_client::TranslationServiceClient;
use google_authz::{Credentials, GoogleAuthz};
use log::{debug, info};
use prost_types::Duration;
use tokio::io::AsyncReadExt;
use tokio_stream::wrappers::ReceiverStream;
use tonic::IntoStreamingRequest;
use tonic::transport::Channel;

#[tokio::main]
async fn main() -> eyre::Result<()> {
    tracing_subscriber::fmt::init();
    //console_subscriber::init();

    debug!("starting...");

    let channel = Channel::from_static("https://speech.googleapis.com")
        .connect()
        .await?;
    // let channel_translate = Channel::from_static("https://translate.googleapis.com")
    //     .connect()
    //     .await?;

    let credentials = Credentials::builder()
        .json_file("i-centralvideo-dictate-dev-c184dd68967a.json".as_ref())
        .build()
        .await?;
    let channel = GoogleAuthz::builder(channel)
        .credentials(credentials)
        .build()
        .await;

    debug!("authenticated channel created!");


    // let mut translate = TranslationServiceClient::new(channel_translate);
    // let resp = translate.translate_text(TranslateTextRequest {
    //     contents: vec!["Que palhacada danada".to_string()],
    //     mime_type: "text/plain".to_string(),
    //     target_language_code: "en_US".to_string(),
    //     ..Default::default()
    // }).await.unwrap();
    // debug!("requested translation");
    //
    // for trans in resp.into_inner().translations.iter() {
    //     debug!("translation = {} // {}", trans.translated_text, trans.detected_language_code);
    // }

    let mut client = SpeechClient::new(channel);

    let (sender, receiver) = tokio::sync::mpsc::channel(1024);
    let receiver_stream = Box::pin(ReceiverStream::new(receiver));
    let mut stream = client.streaming_recognize(receiver_stream).await?.into_inner();
    debug!("Called the streaming_recognize method");

    sender.try_send(StreamingRecognizeRequest {
        streaming_request: Some(StreamingRequest::StreamingConfig(
            StreamingRecognitionConfig {
                config: Some(RecognitionConfig {
                    encoding: AudioEncoding::Flac.into(), // matching current example file
                    sample_rate_hertz: 48000,             // matching current example file
                    language_code: "en-US".to_string(),   // we only support en-US to start with
                    model: "video".to_string(),           // dictate does not set this option
                    use_enhanced: true,                   // dictate does not set this option
                    profanity_filter: true,               // used by Dictate, so we also use it here
                    enable_word_time_offsets: true, // important so we can get the spoken word time ranges
                    max_alternatives: 1,            // make sure the default is used
                    ..Default::default()
                }),
                single_utterance: false,
                interim_results: false,
            },
        )),
    })?;

    debug!("sent streaming request configurations");

    let file = tokio::fs::File::open("some-audio.flac").await?;
    let mut audio_file = tokio::io::BufReader::new(file);

    // spawn task reading from file and uploading to Google Speech API
    tokio::spawn(async move {
        // read file chunk
        let mut buffer = [0; 1024 * 5];
        while let Ok(_) = audio_file.read(&mut buffer).await {
            // send to server
            sender
                .try_send(StreamingRecognizeRequest {
                    streaming_request: Some(StreamingRequest::AudioContent(
                        BytesMut::from(buffer.as_slice()).freeze(),
                    )),
                })
                .unwrap();
            debug!("added a buffer to the sender queue");
        }
    })
    .await?;

    debug!("waiting for responses...");
    // continuous receiving the transcribed response
    while let Some(response) = stream.message().await? {
        let mut num_results = 0;
        for res in &response.results {
            num_results = num_results + 1;
            info!("Result {} {{", num_results);
            if let Some(rec) = res.alternatives.first() {
                info!("\tTranscription: {}", rec.transcript);
                for word_info in &rec.words {
                    // let start_time: WordTimestamp = word_info.start_time.into();
                    let start_time = word_info.start_time.as_ref().unwrap();
                    let end_time = word_info.end_time.as_ref().unwrap();
                    info!(
                        "\t - {}: [{}.{} - {}.{}]",
                        word_info.word,
                        start_time.seconds,
                        start_time.nanos,
                        end_time.seconds,
                        end_time.nanos
                    );
                }
            }
            info!("}}");
        }
    }

    Ok(())
}