use std::io::Cursor; use bytes::BytesMut; use std::path::PathBuf; use std::pin::Pin; use google_api_proto::google::cloud::speech::v1::streaming_recognize_request::StreamingRequest; use google_api_proto::google::cloud::speech::v1::{ recognition_config::AudioEncoding, speech_client::SpeechClient, RecognitionConfig, StreamingRecognitionConfig, StreamingRecognizeRequest, }; use google_api_proto::google::cloud::translation::v3::{TranslateTextRequest, Translation}; use google_api_proto::google::cloud::translation::v3::translation_service_client::TranslationServiceClient; use google_authz::{Credentials, GoogleAuthz}; use log::{debug, info}; use prost_types::Duration; use tokio::io::AsyncReadExt; use tokio_stream::wrappers::ReceiverStream; use tonic::IntoStreamingRequest; use tonic::transport::Channel; #[tokio::main] async fn main() -> eyre::Result<()> { tracing_subscriber::fmt::init(); //console_subscriber::init(); debug!("starting..."); let channel = Channel::from_static("https://speech.googleapis.com") .connect() .await?; // let channel_translate = Channel::from_static("https://translate.googleapis.com") // .connect() // .await?; let credentials = Credentials::builder() .json_file("i-centralvideo-dictate-dev-c184dd68967a.json".as_ref()) .build() .await?; let channel = GoogleAuthz::builder(channel) .credentials(credentials) .build() .await; debug!("authenticated channel created!"); // let mut translate = TranslationServiceClient::new(channel_translate); // let resp = translate.translate_text(TranslateTextRequest { // contents: vec!["Que palhacada danada".to_string()], // mime_type: "text/plain".to_string(), // target_language_code: "en_US".to_string(), // ..Default::default() // }).await.unwrap(); // debug!("requested translation"); // // for trans in resp.into_inner().translations.iter() { // debug!("translation = {} // {}", trans.translated_text, trans.detected_language_code); // } let mut client = SpeechClient::new(channel); let (sender, receiver) = tokio::sync::mpsc::channel(1024); let receiver_stream = Box::pin(ReceiverStream::new(receiver)); let mut stream = client.streaming_recognize(receiver_stream).await?.into_inner(); debug!("Called the streaming_recognize method"); sender.try_send(StreamingRecognizeRequest { streaming_request: Some(StreamingRequest::StreamingConfig( StreamingRecognitionConfig { config: Some(RecognitionConfig { encoding: AudioEncoding::Flac.into(), // matching current example file sample_rate_hertz: 48000, // matching current example file language_code: "en-US".to_string(), // we only support en-US to start with model: "video".to_string(), // dictate does not set this option use_enhanced: true, // dictate does not set this option profanity_filter: true, // used by Dictate, so we also use it here enable_word_time_offsets: true, // important so we can get the spoken word time ranges max_alternatives: 1, // make sure the default is used ..Default::default() }), single_utterance: false, interim_results: false, }, )), })?; debug!("sent streaming request configurations"); let file = tokio::fs::File::open("some-audio.flac").await?; let mut audio_file = tokio::io::BufReader::new(file); // spawn task reading from file and uploading to Google Speech API tokio::spawn(async move { // read file chunk let mut buffer = [0; 1024 * 5]; while let Ok(_) = audio_file.read(&mut buffer).await { // send to server sender .try_send(StreamingRecognizeRequest { streaming_request: Some(StreamingRequest::AudioContent( BytesMut::from(buffer.as_slice()).freeze(), )), }) .unwrap(); debug!("added a buffer to the sender queue"); } }) .await?; debug!("waiting for responses..."); // continuous receiving the transcribed response while let Some(response) = stream.message().await? { let mut num_results = 0; for res in &response.results { num_results = num_results + 1; info!("Result {} {{", num_results); if let Some(rec) = res.alternatives.first() { info!("\tTranscription: {}", rec.transcript); for word_info in &rec.words { // let start_time: WordTimestamp = word_info.start_time.into(); let start_time = word_info.start_time.as_ref().unwrap(); let end_time = word_info.end_time.as_ref().unwrap(); info!( "\t - {}: [{}.{} - {}.{}]", word_info.word, start_time.seconds, start_time.nanos, end_time.seconds, end_time.nanos ); } } info!("}}"); } } Ok(()) }