From 57f5c0f14277c6e476b9998c953e58d7deb0cd9d Mon Sep 17 00:00:00 2001 From: Chris McCord Date: Mon, 22 May 2023 13:57:29 -0400 Subject: [PATCH] Whisper --- Dockerfile | 10 +++- config/config.exs | 2 +- config/dev.exs | 2 +- fly.toml | 43 +++++++------- lib/live_beats/application.ex | 56 ++++++++++++------- lib/live_beats/audio.ex | 17 ++++++ lib/live_beats/media_library.ex | 31 +++++++--- lib/live_beats/media_library/song.ex | 5 ++ .../controllers/oauth_callback_controller.ex | 4 +- lib/live_beats_web/live/profile_live.ex | 19 ++++++- mix.exs | 7 ++- mix.lock | 3 +- ...0230314150807_add_transcripts_to_songs.exs | 9 +++ 13 files changed, 148 insertions(+), 60 deletions(-) create mode 100644 lib/live_beats/audio.ex create mode 100644 priv/repo/migrations/20230314150807_add_transcripts_to_songs.exs diff --git a/Dockerfile b/Dockerfile index 6ceef20..b5b8f67 100644 --- a/Dockerfile +++ b/Dockerfile @@ -12,13 +12,13 @@ # - https://pkgs.org/ - resource for finding needed packages # - Ex: hexpm/elixir:1.12.0-erlang-24.0.1-debian-bullseye-20210902-slim # -ARG BUILDER_IMAGE="hexpm/elixir:1.12.0-erlang-24.0.1-debian-bullseye-20210902-slim" +ARG BUILDER_IMAGE="hexpm/elixir:1.14.0-erlang-24.0.1-debian-bullseye-20210902-slim" ARG RUNNER_IMAGE="debian:bullseye-20210902-slim" FROM ${BUILDER_IMAGE} as builder # install build dependencies -RUN apt-get update -y && apt-get install -y build-essential git \ +RUN apt-get update -y && apt-get install -y build-essential git curl ffmpeg \ && apt-get clean && rm -f /var/lib/apt/lists/*_* # prepare build dir @@ -30,6 +30,7 @@ RUN mix local.hex --force && \ # set build ENV ENV MIX_ENV="prod" +ENV BUMBLEBEE_CACHE_DIR="/app/.bumblebee" # install mix dependencies COPY mix.exs mix.lock ./ @@ -57,6 +58,7 @@ COPY assets assets RUN mix assets.deploy RUN mix compile +RUN mix run -e 'LiveBeats.Application.load_serving()' --no-start # Changes to config/runtime.exs don't require recompiling the code COPY config/runtime.exs config/ @@ -68,7 +70,7 @@ RUN mix release # the compiled release and other runtime necessities FROM ${RUNNER_IMAGE} -RUN apt-get update -y && apt-get install -y libstdc++6 openssl libncurses5 locales \ +RUN apt-get update -y && apt-get install -y libstdc++6 openssl libncurses5 locales curl ffmpeg \ && apt-get clean && rm -f /var/lib/apt/lists/*_* # Set the locale @@ -80,9 +82,11 @@ ENV LC_ALL en_US.UTF-8 WORKDIR "/app" RUN chown nobody /app +ENV BUMBLEBEE_CACHE_DIR="/app/.bumblebee" # Only copy the final release from the build stage COPY --from=builder --chown=nobody:root /app/_build/prod/rel/live_beats ./ +COPY --from=builder --chown=nobody:root /app/.bumblebee/ ./.bumblebee USER nobody diff --git a/config/config.exs b/config/config.exs index e60bde9..a0d2a9f 100644 --- a/config/config.exs +++ b/config/config.exs @@ -34,7 +34,7 @@ config :esbuild, # Configure tailwind (the version is required) config :tailwind, - version: "3.1.8", + version: "3.2.7", default: [ args: ~w( --config=tailwind.config.js diff --git a/config/dev.exs b/config/dev.exs index 7401360..0686acb 100644 --- a/config/dev.exs +++ b/config/dev.exs @@ -2,7 +2,7 @@ import Config config :live_beats, :files, uploads_dir: Path.expand("../priv/uploads", __DIR__), - host: [scheme: "http", host: "localhost", port: 4000], + host: [scheme: "http", host: "localhost", port: 4001], server_ip: "127.0.0.1", hostname: "localhost", transport_opts: [] diff --git a/fly.toml b/fly.toml index 4f9205d..23116c5 100644 --- a/fly.toml +++ b/fly.toml @@ -1,45 +1,46 @@ -app = "livebeats" +# fly.toml app configuration file generated for livebeats on 2023-05-19T22:42:40-04:00 +# +# See https://fly.io/docs/reference/configuration/ for information about how to use this file. +# +app = "livebeats" +primary_region = "ord" kill_signal = "SIGTERM" -kill_timeout = 5 -processes = [] +kill_timeout = "5s" + +[experimental] + auto_rollback = true [deploy] release_command = "/app/bin/migrate" [env] + BUMBLEBEE_CACHE_DIR = "/app/.bumblebee" PHX_HOST = "livebeats.fly.dev" [mounts] source="data" destination="/app/uploads" -[experimental] - allowed_public_ports = [] - auto_rollback = true - [[services]] - http_checks = [] + protocol = "tcp" internal_port = 4000 processes = ["app"] - protocol = "tcp" - script_checks = [] + [[services.ports]] + port = 80 + handlers = ["http"] + + [[services.ports]] + port = 443 + handlers = ["tls", "http"] [services.concurrency] + type = "connections" hard_limit = 2500 soft_limit = 2000 - type = "connections" - - [[services.ports]] - handlers = ["http"] - port = 80 - - [[services.ports]] - handlers = ["tls", "http"] - port = 443 [[services.tcp_checks]] - grace_period = "20s" # allow some time for startup interval = "15s" + timeout = "2s" + grace_period = "20s" restart_limit = 0 - timeout = "2s" \ No newline at end of file diff --git a/lib/live_beats/application.ex b/lib/live_beats/application.ex index 7cac791..f5cd977 100644 --- a/lib/live_beats/application.ex +++ b/lib/live_beats/application.ex @@ -5,32 +5,46 @@ defmodule LiveBeats.Application do use Application + def load_serving do + {:ok, whisper} = Bumblebee.load_model({:hf, "openai/whisper-tiny"}) + {:ok, featurizer} = Bumblebee.load_featurizer({:hf, "openai/whisper-tiny"}) + {:ok, tokenizer} = Bumblebee.load_tokenizer({:hf, "openai/whisper-tiny"}) + + Bumblebee.Audio.speech_to_text(whisper, featurizer, tokenizer, + compile: [batch_size: 1], + max_new_tokens: 100, + defn_options: [compiler: EXLA] + ) + end + @impl true def start(_type, _args) do LiveBeats.MediaLibrary.attach() topologies = Application.get_env(:libcluster, :topologies) || [] - children = [ - {Cluster.Supervisor, [topologies, [name: LiveBeats.ClusterSupervisor]]}, - {Task.Supervisor, name: LiveBeats.TaskSupervisor}, - # Start the Ecto repository - LiveBeats.Repo, - LiveBeats.ReplicaRepo, - # Start the Telemetry supervisor - LiveBeatsWeb.Telemetry, - # Start the PubSub system - {Phoenix.PubSub, name: LiveBeats.PubSub}, - # start presence - LiveBeatsWeb.Presence, - {Finch, name: LiveBeats.Finch}, - # Start the Endpoint (http/https) - LiveBeatsWeb.Endpoint, - # Expire songs every six hours - {LiveBeats.SongsCleaner, interval: {3600 * 6, :second}} - - # Start a worker by calling: LiveBeats.Worker.start_link(arg) - # {LiveBeats.Worker, arg} - ] + children = + [ + {Nx.Serving, name: WhisperServing, serving: load_serving()}, + {Cluster.Supervisor, [topologies, [name: LiveBeats.ClusterSupervisor]]}, + {Task.Supervisor, name: LiveBeats.TaskSupervisor}, + {Task.Supervisor, name: Fly.Machine.TaskSupervisor}, + # Start the Ecto repository + LiveBeats.Repo, + LiveBeats.ReplicaRepo, + # Start the Telemetry supervisor + LiveBeatsWeb.Telemetry, + # Start the PubSub system + {Phoenix.PubSub, name: LiveBeats.PubSub}, + # start presence + LiveBeatsWeb.Presence, + {Finch, name: LiveBeats.Finch}, + # Start the Endpoint (http/https) + LiveBeatsWeb.Endpoint, + # Expire songs every six hours + {LiveBeats.SongsCleaner, interval: {3600 * 6, :second}} + # Start a worker by calling: LiveBeats.Worker.start_link(arg) + # {LiveBeats.Worker, arg} + ] # See https://hexdocs.pm/elixir/Supervisor.html # for other strategies and supported options diff --git a/lib/live_beats/audio.ex b/lib/live_beats/audio.ex new file mode 100644 index 0000000..f4340b5 --- /dev/null +++ b/lib/live_beats/audio.ex @@ -0,0 +1,17 @@ +defmodule LiveBeats.Audio do + def speech_to_text(path, chunk_time, func) do + {:ok, stat} = LiveBeats.MP3Stat.parse(path) + + 0..stat.duration//chunk_time + |> Task.async_stream( + fn ss -> + args = ~w(-ac 1 -ar 16k -f f32le -ss #{ss} -t #{chunk_time} -v quiet -) + {data, 0} = System.cmd("ffmpeg", ["-i", path] ++ args) + {ss, Nx.Serving.batched_run(WhisperServing, Nx.from_binary(data, :f32))} + end, + max_concurrency: 2, + timeout: :infinity + ) + |> Enum.map(fn {:ok, {ss, %{results: [%{text: text}]}}} -> func.(ss, text) end) + end +end diff --git a/lib/live_beats/media_library.ex b/lib/live_beats/media_library.ex index ac9b959..e5b6f94 100644 --- a/lib/live_beats/media_library.ex +++ b/lib/live_beats/media_library.ex @@ -52,13 +52,7 @@ defmodule LiveBeats.MediaLibrary do user.id == song.user_id end - def play_song(%Song{id: id}) do - play_song(id) - end - - def play_song(id) do - song = get_song!(id) - + def play_song(%Song{} = song) do played_at = cond do playing?(song) -> @@ -97,6 +91,12 @@ defmodule LiveBeats.MediaLibrary do new_song end + def play_song(id) do + id + |> get_song!() + |> play_song() + end + def pause_song(%Song{} = song) do now = DateTime.truncate(DateTime.utc_now(), :second) set = [status: :paused, paused_at: now] @@ -211,6 +211,7 @@ defmodule LiveBeats.MediaLibrary do |> Enum.filter(&match?({{:song, _ref}, _}, &1)) |> Enum.map(fn {{:song, ref}, song} -> consume_file.(ref, fn tmp_path -> store_mp3(song, tmp_path) end) + async_transcribe(song, user) {ref, song} end) @@ -231,6 +232,22 @@ defmodule LiveBeats.MediaLibrary do end end + defp async_transcribe(%Song{} = song, %Accounts.User{} = user) do + Task.Supervisor.start_child(LiveBeats.TaskSupervisor, fn -> + segments = + LiveBeats.Audio.speech_to_text(song.mp3_filepath, 20, fn ss, text -> + segment = %Song.TranscriptSegment{ss: ss, text: text} + broadcast!(user.id, {segment, song.id}) + + segment + end) + + Repo.update_all(from(s in Song, where: s.id == ^song.id), + set: [transcript_segments: segments] + ) + end) + end + defp broadcast_imported(%Accounts.User{} = user, songs) do songs = Enum.map(songs, fn {_ref, song} -> song end) broadcast!(user.id, %Events.SongsImported{user_id: user.id, songs: songs}) diff --git a/lib/live_beats/media_library/song.ex b/lib/live_beats/media_library/song.ex index e445212..43c1513 100644 --- a/lib/live_beats/media_library/song.ex +++ b/lib/live_beats/media_library/song.ex @@ -25,6 +25,11 @@ defmodule LiveBeats.MediaLibrary.Song do belongs_to :user, Accounts.User belongs_to :genre, LiveBeats.MediaLibrary.Genre + embeds_many :transcript_segments, TranscriptSegment do + field :ss, :integer + field :text, :string + end + timestamps() end diff --git a/lib/live_beats_web/controllers/oauth_callback_controller.ex b/lib/live_beats_web/controllers/oauth_callback_controller.ex index d4ebc54..bc45b2d 100644 --- a/lib/live_beats_web/controllers/oauth_callback_controller.ex +++ b/lib/live_beats_web/controllers/oauth_callback_controller.ex @@ -15,7 +15,7 @@ defmodule LiveBeatsWeb.OAuthCallbackController do |> LiveBeatsWeb.UserAuth.log_in_user(user) else {:error, %Ecto.Changeset{} = changeset} -> - Logger.debug("failed GitHub insert #{inspect(changeset.errors)}") + Logger.info("failed GitHub insert #{inspect(changeset.errors)}") conn |> put_flash( @@ -25,7 +25,7 @@ defmodule LiveBeatsWeb.OAuthCallbackController do |> redirect(to: "/") {:error, reason} -> - Logger.debug("failed GitHub exchange #{inspect(reason)}") + Logger.info("failed GitHub exchange #{inspect(reason)}") conn |> put_flash(:error, "We were unable to contact GitHub. Please try again later") diff --git a/lib/live_beats_web/live/profile_live.ex b/lib/live_beats_web/live/profile_live.ex index 4fe7cee..bc89302 100644 --- a/lib/live_beats_web/live/profile_live.ex +++ b/lib/live_beats_web/live/profile_live.ex @@ -60,6 +60,13 @@ defmodule LiveBeatsWeb.ProfileLive do total_count={@presences_count} /> +
+
+ [<%= LiveBeats.MP3Stat.to_mmss(segment.ss) %>] + <%= segment.text %> +
+
+
<%= for {_id, song} <- if(@owns_profile?, do: @streams.songs, else: []), id = "delete-modal-#{song.id}" do %> <.modal @@ -151,6 +158,7 @@ defmodule LiveBeatsWeb.ProfileLive do end active_song = MediaLibrary.get_current_active_song(profile) + segments = if active_song, do: active_song.transcript_segments, else: [] songs = MediaLibrary.list_profile_songs(profile, 50) @@ -164,6 +172,7 @@ defmodule LiveBeatsWeb.ProfileLive do songs_count: Enum.count(songs) ) |> stream(:songs, songs) + |> stream(:transcript_segments, segments, dom_id: &"ss-#{&1.ss}") |> assign_presences() {:ok, socket, temporary_assigns: [presences: %{}]} @@ -255,12 +264,20 @@ defmodule LiveBeatsWeb.ProfileLive do {:noreply, pause_song(socket, song)} end + def handle_info({MediaLibrary, {%MediaLibrary.Song.TranscriptSegment{} = seg, song_id}}, socket) do + if socket.assigns.active_song_id == song_id do + {:noreply, stream_insert(socket, :transcript_segments, seg)} + else + {:noreply, socket} + end + end + def handle_info({MediaLibrary, %MediaLibrary.Events.SongsImported{songs: songs}}, socket) do %{current_user: current_user, active_song_id: active_song_id} = socket.assigns first = hd(songs) if !active_song_id && MediaLibrary.can_control_playback?(current_user, first) do - MediaLibrary.play_song(first.id) + MediaLibrary.play_song(first) end {:noreply, diff --git a/mix.exs b/mix.exs index b164b32..461854f 100644 --- a/mix.exs +++ b/mix.exs @@ -53,8 +53,11 @@ defmodule LiveBeats.MixProject do {:mint, "~> 1.0"}, {:heroicons, "~> 0.2.2"}, {:castore, "~> 0.1.13"}, - {:tailwind, "~> 0.1"}, - {:libcluster, "~> 3.3.1"} + {:tailwind, "~> 0.2.0"}, + {:libcluster, "~> 3.3.1"}, + {:bumblebee, github: "elixir-nx/bumblebee"}, + {:exla, ">= 0.0.0"}, + {:req, "~> 0.3.7"} ] end diff --git a/mix.lock b/mix.lock index d223b38..78b322a 100644 --- a/mix.lock +++ b/mix.lock @@ -45,9 +45,10 @@ "postgrex": {:hex, :postgrex, "0.16.5", "fcc4035cc90e23933c5d69a9cd686e329469446ef7abba2cf70f08e2c4b69810", [:mix], [{:connection, "~> 1.1", [hex: :connection, repo: "hexpm", optional: false]}, {:db_connection, "~> 2.1", [hex: :db_connection, repo: "hexpm", optional: false]}, {:decimal, "~> 1.5 or ~> 2.0", [hex: :decimal, repo: "hexpm", optional: false]}, {:jason, "~> 1.0", [hex: :jason, repo: "hexpm", optional: true]}, {:table, "~> 0.1.0", [hex: :table, repo: "hexpm", optional: true]}], "hexpm", "edead639dc6e882618c01d8fc891214c481ab9a3788dfe38dd5e37fd1d5fb2e8"}, "progress_bar": {:hex, :progress_bar, "2.0.1", "7b40200112ae533d5adceb80ff75fbe66dc753bca5f6c55c073bfc122d71896d", [:mix], [{:decimal, "~> 2.0", [hex: :decimal, repo: "hexpm", optional: false]}], "hexpm", "2519eb58a2f149a3a094e729378256d8cb6d96a259ec94841bd69fdc71f18f87"}, "ranch": {:hex, :ranch, "1.8.0", "8c7a100a139fd57f17327b6413e4167ac559fbc04ca7448e9be9057311597a1d", [:make, :rebar3], [], "hexpm", "49fbcfd3682fab1f5d109351b61257676da1a2fdbe295904176d5e521a2ddfe5"}, + "req": {:hex, :req, "0.3.7", "e4ea5d73e3f434c0a15601bb85330ffd0e57860c098283e98c28d21172a1f749", [:mix], [{:brotli, "~> 0.3.1", [hex: :brotli, repo: "hexpm", optional: true]}, {:ezstd, "~> 1.0", [hex: :ezstd, repo: "hexpm", optional: true]}, {:finch, "~> 0.9", [hex: :finch, repo: "hexpm", optional: false]}, {:jason, "~> 1.0", [hex: :jason, repo: "hexpm", optional: false]}, {:mime, "~> 1.6 or ~> 2.0", [hex: :mime, repo: "hexpm", optional: false]}, {:nimble_csv, "~> 1.0", [hex: :nimble_csv, repo: "hexpm", optional: true]}, {:plug, "~> 1.0", [hex: :plug, repo: "hexpm", optional: true]}], "hexpm", "a7d3c0bec7d2d23198ef12676d2c950bec258308c6a5123eb98465030205f39c"}, "rustler_precompiled": {:hex, :rustler_precompiled, "0.6.1", "160b545bce8bf9a3f1b436b2c10f53574036a0db628e40f393328cbbe593602f", [:mix], [{:castore, "~> 0.1 or ~> 1.0", [hex: :castore, repo: "hexpm", optional: false]}, {:rustler, "~> 0.23", [hex: :rustler, repo: "hexpm", optional: true]}], "hexpm", "0dd269fa261c4e3df290b12031c575fff07a542749f7b0e8b744d72d66c43600"}, "swoosh": {:hex, :swoosh, "1.8.2", "af9a22ab2c0d20b266f61acca737fa11a121902de9466a39e91bacdce012101c", [:mix], [{:cowboy, "~> 1.1 or ~> 2.4", [hex: :cowboy, repo: "hexpm", optional: true]}, {:ex_aws, "~> 2.1", [hex: :ex_aws, repo: "hexpm", optional: true]}, {:finch, "~> 0.6", [hex: :finch, repo: "hexpm", optional: true]}, {:gen_smtp, "~> 0.13 or ~> 1.0", [hex: :gen_smtp, repo: "hexpm", optional: true]}, {:hackney, "~> 1.9", [hex: :hackney, repo: "hexpm", optional: true]}, {:jason, "~> 1.0", [hex: :jason, repo: "hexpm", optional: false]}, {:mail, "~> 0.2", [hex: :mail, repo: "hexpm", optional: true]}, {:mime, "~> 1.1 or ~> 2.0", [hex: :mime, repo: "hexpm", optional: false]}, {:plug_cowboy, ">= 1.0.0", [hex: :plug_cowboy, repo: "hexpm", optional: true]}, {:telemetry, "~> 0.4.2 or ~> 1.0", [hex: :telemetry, repo: "hexpm", optional: false]}], "hexpm", "d058ba750eafadb6c09a84a352c14c5d1eeeda6e84945fcc95785b7f3067b7db"}, - "tailwind": {:hex, :tailwind, "0.1.9", "25ba09d42f7bfabe170eb67683a76d6ec2061952dc9bd263a52a99ba3d24bd4d", [:mix], [{:castore, ">= 0.0.0", [hex: :castore, repo: "hexpm", optional: false]}], "hexpm", "9213f87709c458aaec313bb5f2df2b4d2cedc2b630e4ae821bf3c54c47a56d0b"}, + "tailwind": {:hex, :tailwind, "0.2.0", "95f9e4a32020c5bec480f1d6a43a49ac8030b13183127b577605f506d6e13a66", [:mix], [{:castore, ">= 0.0.0", [hex: :castore, repo: "hexpm", optional: false]}], "hexpm", "385e939fcd7fe4654be5130b187e358aaabade385513f9d200ffecdbb9552a9e"}, "telemetry": {:hex, :telemetry, "1.2.1", "68fdfe8d8f05a8428483a97d7aab2f268aaff24b49e0f599faa091f1d4e7f61c", [:rebar3], [], "hexpm", "dad9ce9d8effc621708f99eac538ef1cbe05d6a874dd741de2e689c47feafed5"}, "telemetry_metrics": {:hex, :telemetry_metrics, "0.6.1", "315d9163a1d4660aedc3fee73f33f1d355dcc76c5c3ab3d59e76e3edf80eef1f", [:mix], [{:telemetry, "~> 0.4 or ~> 1.0", [hex: :telemetry, repo: "hexpm", optional: false]}], "hexpm", "7be9e0871c41732c233be71e4be11b96e56177bf15dde64a8ac9ce72ac9834c6"}, "telemetry_poller": {:hex, :telemetry_poller, "1.0.0", "db91bb424e07f2bb6e73926fcafbfcbcb295f0193e0a00e825e589a0a47e8453", [:rebar3], [{:telemetry, "~> 1.0", [hex: :telemetry, repo: "hexpm", optional: false]}], "hexpm", "b3a24eafd66c3f42da30fc3ca7dda1e9d546c12250a2d60d7b81d264fbec4f6e"}, diff --git a/priv/repo/migrations/20230314150807_add_transcripts_to_songs.exs b/priv/repo/migrations/20230314150807_add_transcripts_to_songs.exs new file mode 100644 index 0000000..7c4f83c --- /dev/null +++ b/priv/repo/migrations/20230314150807_add_transcripts_to_songs.exs @@ -0,0 +1,9 @@ +defmodule LiveBeats.Repo.Migrations.AddTranscriptsToSongs do + use Ecto.Migration + + def change do + alter table(:songs) do + add :transcript_segments, {:array, :map}, null: false, default: [] + end + end +end