From d9245945877b07670c37ee8310bcb48a7a0fca4c Mon Sep 17 00:00:00 2001 From: Chris McCord Date: Mon, 6 Mar 2023 23:07:26 -0500 Subject: [PATCH] WIP --- lib/live_beats/application.ex | 40 ++++++++++------------------------- 1 file changed, 11 insertions(+), 29 deletions(-) diff --git a/lib/live_beats/application.ex b/lib/live_beats/application.ex index 0c5d2b5..0bbe148 100644 --- a/lib/live_beats/application.ex +++ b/lib/live_beats/application.ex @@ -5,41 +5,23 @@ defmodule LiveBeats.Application do use Application - def speech_to_text(serving, path, chunk_time_sec \\ 5) do + def speech_to_text(serving, path, chunk_time \\ 5) do {:ok, stat} = LiveBeats.MP3Stat.parse(path) - chunks = trunc(Float.ceil(stat.duration / chunk_time_sec)) - {ffmpeg_args, _} = - Enum.reduce(1..(chunks - 1), {[], 0}, fn _chunk, {args, ss} -> - chunk_args = ~w( - -i #{path} - -ac 1 - -ar 16000 - -f f32le - -ss #{ss} - -t #{chunk_time_sec} - -hide_banner - -loglevel quiet - pipe:1 - ) - - {[chunk_args | args], ss + chunk_time_sec} - end) - - ffmpeg_args - |> Enum.reverse() - |> Task.async_stream( - fn args -> - {data, 0} = System.cmd("ffmpeg", args) - Nx.Serving.batched_run(serving, Nx.from_binary(data, :f32)) - end, - timeout: 20_000 - ) + 0..stat.duration//chunk_time + |> Task.async_stream(&ffmpeg_to_nx(serving, path, &1, chunk_time), timeout: 20_000) |> Enum.each(fn {:ok, %{results: [%{text: text} | _]}} -> IO.puts(">> #{text}") end) end + defp ffmpeg_to_nx(serving, path, ss, duration) do + args = ~w(-i #{path} -ac 1 -ar 16000 -f f32le -ss #{ss} -t #{duration} -v quiet pipe:1) + {data, 0} = System.cmd("ffmpeg", args) + + Nx.Serving.batched_run(serving, Nx.from_binary(data, :f32)) + end + @impl true def start(_type, _args) do LiveBeats.MediaLibrary.attach() @@ -53,7 +35,7 @@ defmodule LiveBeats.Application do {Nx.Serving, serving: Bumblebee.Audio.speech_to_text(whisper, featurizer, tokenizer, - max_new_tokens: 100, + max_new_tokens: 50, defn_options: [batch_size: 10, compiler: EXLA] ), name: WhisperServing,