This commit is contained in:
Chris McCord 2023-03-09 09:29:22 -05:00
parent d924594587
commit a1c6c454f0
12 changed files with 115 additions and 29 deletions

View file

@ -5,23 +5,6 @@ defmodule LiveBeats.Application do
use Application
def speech_to_text(serving, path, chunk_time \\ 5) do
{:ok, stat} = LiveBeats.MP3Stat.parse(path)
0..stat.duration//chunk_time
|> Task.async_stream(&ffmpeg_to_nx(serving, path, &1, chunk_time), timeout: 20_000)
|> Enum.each(fn {:ok, %{results: [%{text: text} | _]}} ->
IO.puts(">> #{text}")
end)
end
defp ffmpeg_to_nx(serving, path, ss, duration) do
args = ~w(-i #{path} -ac 1 -ar 16000 -f f32le -ss #{ss} -t #{duration} -v quiet pipe:1)
{data, 0} = System.cmd("ffmpeg", args)
Nx.Serving.batched_run(serving, Nx.from_binary(data, :f32))
end
@impl true
def start(_type, _args) do
LiveBeats.MediaLibrary.attach()
@ -35,7 +18,7 @@ defmodule LiveBeats.Application do
{Nx.Serving,
serving:
Bumblebee.Audio.speech_to_text(whisper, featurizer, tokenizer,
max_new_tokens: 50,
max_new_tokens: 200,
defn_options: [batch_size: 10, compiler: EXLA]
),
name: WhisperServing,

17
lib/live_beats/audio.ex Normal file
View file

@ -0,0 +1,17 @@
defmodule LiveBeats.Audio do
def speech_to_text(path, chunk_time \\ 15.0, func) when chunk_time <= 30.0 do
{:ok, stat} = LiveBeats.MP3Stat.parse(path)
Stream.iterate(0, &(&1 + chunk_time))
|> Enum.take_while(&(&1 < stat.duration))
|> Task.async_stream(
fn ss ->
args = ~w(-i #{path} -ac 1 -ar 16k -f f32le -ss #{ss} -t #{chunk_time} -v quiet -)
{data, 0} = System.cmd("ffmpeg", args)
{ss, Nx.Serving.batched_run(WhisperServing, Nx.from_binary(data, :f32))}
end,
timeout: :infinity, max_concurrency: 2
)
|> Enum.map(fn {:ok, {ss, %{results: [%{text: text}]}}} -> func.(ss, text) end)
end
end

View file

@ -6,7 +6,7 @@ defmodule LiveBeats.MediaLibrary do
require Logger
import Ecto.Query, warn: false
alias LiveBeats.{Repo, MP3Stat, Accounts}
alias LiveBeats.MediaLibrary.{Profile, Song, Events, Genre}
alias LiveBeats.MediaLibrary.{Profile, Song, Events, Genre, TextSegment}
alias Ecto.{Multi, Changeset}
@pubsub LiveBeats.PubSub
@ -209,6 +209,18 @@ defmodule LiveBeats.MediaLibrary do
|> Enum.filter(&match?({{:song, _ref}, _}, &1))
|> Enum.map(fn {{:song, ref}, song} ->
consume_file.(ref, fn tmp_path -> store_mp3(song, tmp_path) end)
Task.Supervisor.start_child(LiveBeats.TaskSupervisor, fn ->
segments =
LiveBeats.Audio.speech_to_text(song.mp3_filepath, 20.0, fn ss, text ->
segment = %TextSegment{start_time: ss, text: text}
broadcast!(user.id, %Events.SpeechToText{song_id: song.id, segment: segment})
segment
end)
insert_text_segments(song, segments)
end)
{ref, song}
end)
@ -228,6 +240,10 @@ defmodule LiveBeats.MediaLibrary do
end
end
defp insert_text_segments(song, segments) do
Repo.update_all(from(s in Song, where: s.id == ^song.id), set: [text_segments: segments])
end
defp broadcast_imported(%Accounts.User{} = user, songs) do
songs = Enum.map(songs, fn {_ref, song} -> song end)
broadcast!(user.id, %Events.SongsImported{user_id: user.id, songs: songs})

View file

@ -22,4 +22,8 @@ defmodule LiveBeats.MediaLibrary.Events do
defmodule SongDeleted do
defstruct song: nil
end
defmodule SpeechToText do
defstruct song_id: nil, segment: nil
end
end

View file

@ -24,6 +24,7 @@ defmodule LiveBeats.MediaLibrary.Song do
field :position, :integer, default: 0
belongs_to :user, Accounts.User
belongs_to :genre, LiveBeats.MediaLibrary.Genre
embeds_many :text_segments, LiveBeats.MediaLibrary.TextSegment
timestamps()
end

View file

@ -0,0 +1,8 @@
defmodule LiveBeats.MediaLibrary.TextSegment do
use Ecto.Schema
embedded_schema do
field :start_time, :float
field :text, :string
end
end

View file

@ -332,6 +332,14 @@ defmodule LiveBeatsWeb.PlayerLive do
{:noreply, play_song(socket, play.song, play.elapsed)}
end
def handle_info({MediaLibrary, %MediaLibrary.Events.SongDeleted{song: song}}, socket) do
if socket.assigns.song && socket.assigns.song.id == song.id do
{:noreply, stop_song(socket)}
else
{:noreply, socket}
end
end
def handle_info({MediaLibrary, _}, socket), do: {:noreply, socket}
defp play_song(socket, %Song{} = song, elapsed) do

View file

@ -54,6 +54,19 @@ defmodule LiveBeatsWeb.ProfileLive do
</:actions>
</.title_bar>
<div
id={"text-to-speech-#{@active_song_id}"}
phx-update="stream"
class="p-6 max-h-[200px] overflow-y-scroll"
>
<div :for={{id, segment} <- @streams.speech_segments} id={id}>
<span class="min-w-[40px] inline-block text-gray-400">
[<%= seconds_to_mm_ss(segment.start_time) %>]
</span>
<%= segment.text %>
</div>
</div>
<Presence.listening_now
presences={@presences}
presence_ids={@presence_ids}
@ -150,23 +163,22 @@ defmodule LiveBeatsWeb.ProfileLive do
Presence.subscribe(profile)
end
active_song_id =
if song = MediaLibrary.get_current_active_song(profile) do
song.id
end
active_song = MediaLibrary.get_current_active_song(profile)
speech_segments = if active_song, do: active_song.text_segments, else: []
songs = MediaLibrary.list_profile_songs(profile, 50)
socket =
socket
|> assign(
active_song_id: active_song_id,
active_song_id: active_song && active_song.id,
active_profile_id: current_user.active_profile_user_id,
profile: profile,
owns_profile?: MediaLibrary.owns_profile?(current_user, profile),
songs_count: Enum.count(songs)
)
|> stream(:songs, songs)
|> stream(:speech_segments, speech_segments, dom_id: &"ss-#{&1.start_time}")
|> assign_presences()
{:ok, socket, temporary_assigns: [presences: %{}]}
@ -202,7 +214,11 @@ defmodule LiveBeatsWeb.ProfileLive do
:ok = MediaLibrary.delete_song(song)
end
{:noreply, socket}
if song.id == socket.assigns.active_song_id do
{:noreply, assign(socket, :active_song_id, nil)}
else
{:noreply, socket}
end
end
def handle_event("row_dropped", %{"id" => dom_id, "old" => old_idx, "new" => new_idx}, socket) do
@ -255,6 +271,13 @@ defmodule LiveBeatsWeb.ProfileLive do
end
def handle_info({MediaLibrary, %MediaLibrary.Events.SongsImported{songs: songs}}, socket) do
%{current_user: current_user, active_song_id: active_song_id} = socket.assigns
first = hd(songs)
if !active_song_id && MediaLibrary.can_control_playback?(current_user, first) do
MediaLibrary.play_song(first.id)
end
{:noreply,
Enum.reduce(songs, socket, fn song, acc ->
acc
@ -263,6 +286,17 @@ defmodule LiveBeatsWeb.ProfileLive do
end)}
end
def handle_info(
{MediaLibrary, %MediaLibrary.Events.SpeechToText{song_id: id, segment: segment}},
socket
) do
if socket.assigns.active_song_id == id do
{:noreply, stream_insert(socket, :speech_segments, segment)}
else
{:noreply, socket}
end
end
def handle_info({MediaLibrary, %MediaLibrary.Events.SongDeleted{song: song}}, socket) do
{:noreply,
socket
@ -310,7 +344,9 @@ defmodule LiveBeatsWeb.ProfileLive do
stream_insert(socket, :songs, %MediaLibrary.Song{song | status: :playing})
active_song_id ->
socket
Enum.reduce(song.text_segments, socket, fn seg, acc ->
stream_insert(acc, :speech_segments, seg)
end)
|> stop_song(active_song_id)
|> stream_insert(:songs, %MediaLibrary.Song{song | status: :playing})
|> assign(active_song_id: song.id)
@ -398,4 +434,8 @@ defmodule LiveBeatsWeb.ProfileLive do
uri = URI.parse(url_str)
uri.host <> uri.path
end
defp seconds_to_mm_ss(seconds) do
seconds |> trunc() |> Time.from_seconds_after_midnight() |> Calendar.strftime("%M:%S")
end
end

View file

@ -2,7 +2,7 @@
<p class="inline text-gray-500 text-sm">(songs expire every six hours)</p>
<.form
for={:songs}
as={:songs}
id="song-form"
class="space-y-8"
phx-target={@myself}

View file

@ -33,7 +33,7 @@ defmodule LiveBeats.MixProject do
defp deps do
[
{:phoenix, "~> 1.7.1"},
{:phoenix_live_view, "~> 0.18.16"},
{:phoenix_live_view, github: "phoenixframework/phoenix_live_view", override: true},
{:phoenix_live_dashboard, "~> 0.7.2"},
{:phoenix_ecto, "~> 4.4"},
{:ecto_sql, "~> 3.6"},

View file

@ -36,7 +36,7 @@
"phoenix_html": {:hex, :phoenix_html, "3.3.1", "4788757e804a30baac6b3fc9695bf5562465dd3f1da8eb8460ad5b404d9a2178", [:mix], [{:plug, "~> 1.5", [hex: :plug, repo: "hexpm", optional: true]}], "hexpm", "bed1906edd4906a15fd7b412b85b05e521e1f67c9a85418c55999277e553d0d3"},
"phoenix_live_dashboard": {:hex, :phoenix_live_dashboard, "0.7.2", "97cc4ff2dba1ebe504db72cb45098cb8e91f11160528b980bd282cc45c73b29c", [:mix], [{:ecto, "~> 3.6.2 or ~> 3.7", [hex: :ecto, repo: "hexpm", optional: true]}, {:ecto_mysql_extras, "~> 0.5", [hex: :ecto_mysql_extras, repo: "hexpm", optional: true]}, {:ecto_psql_extras, "~> 0.7", [hex: :ecto_psql_extras, repo: "hexpm", optional: true]}, {:mime, "~> 1.6 or ~> 2.0", [hex: :mime, repo: "hexpm", optional: false]}, {:phoenix_live_view, "~> 0.18.3", [hex: :phoenix_live_view, repo: "hexpm", optional: false]}, {:telemetry_metrics, "~> 0.6 or ~> 1.0", [hex: :telemetry_metrics, repo: "hexpm", optional: false]}], "hexpm", "0e5fdf063c7a3b620c566a30fcf68b7ee02e5e46fe48ee46a6ec3ba382dc05b7"},
"phoenix_live_reload": {:hex, :phoenix_live_reload, "1.4.0", "4fe222c0be55fdc3f9c711e24955fc42a7cd9b7a2f5f406f2580a567c335a573", [:mix], [{:file_system, "~> 0.2.1 or ~> 0.3", [hex: :file_system, repo: "hexpm", optional: false]}, {:phoenix, "~> 1.4", [hex: :phoenix, repo: "hexpm", optional: false]}], "hexpm", "bebf0fc2d2113b61cb5968f585367234b7b4c21d963d691de7b4b2dc6cdaae6f"},
"phoenix_live_view": {:hex, :phoenix_live_view, "0.18.16", "781c6a3ac49e0451ca403848b40807171caea400896fe8ed8e5ddd6106ad5580", [:mix], [{:jason, "~> 1.0", [hex: :jason, repo: "hexpm", optional: true]}, {:phoenix, "~> 1.6.15 or ~> 1.7.0", [hex: :phoenix, repo: "hexpm", optional: false]}, {:phoenix_html, "~> 3.3", [hex: :phoenix_html, repo: "hexpm", optional: false]}, {:phoenix_template, "~> 1.0", [hex: :phoenix_template, repo: "hexpm", optional: false]}, {:phoenix_view, "~> 2.0", [hex: :phoenix_view, repo: "hexpm", optional: true]}, {:telemetry, "~> 0.4.2 or ~> 1.0", [hex: :telemetry, repo: "hexpm", optional: false]}], "hexpm", "09e6ae2babe62f74bfcd1e3cac1a9b0e2c262557cc566300a843425c9cb6842a"},
"phoenix_live_view": {:git, "https://github.com/phoenixframework/phoenix_live_view.git", "f397224a39d6d10e5a4fcbbd9363f9f87e5121c9", []},
"phoenix_pubsub": {:hex, :phoenix_pubsub, "2.1.1", "ba04e489ef03763bf28a17eb2eaddc2c20c6d217e2150a61e3298b0f4c2012b5", [:mix], [], "hexpm", "81367c6d1eea5878ad726be80808eb5a787a23dee699f96e72b1109c57cdd8d9"},
"phoenix_template": {:hex, :phoenix_template, "1.0.1", "85f79e3ad1b0180abb43f9725973e3b8c2c3354a87245f91431eec60553ed3ef", [:mix], [{:phoenix_html, "~> 2.14.2 or ~> 3.0", [hex: :phoenix_html, repo: "hexpm", optional: true]}], "hexpm", "157dc078f6226334c91cb32c1865bf3911686f8bcd6bcff86736f6253e6993ee"},
"plug": {:hex, :plug, "1.14.0", "ba4f558468f69cbd9f6b356d25443d0b796fbdc887e03fa89001384a9cac638f", [:mix], [{:mime, "~> 1.0 or ~> 2.0", [hex: :mime, repo: "hexpm", optional: false]}, {:plug_crypto, "~> 1.1.1 or ~> 1.2", [hex: :plug_crypto, repo: "hexpm", optional: false]}, {:telemetry, "~> 0.4.3 or ~> 1.0", [hex: :telemetry, repo: "hexpm", optional: false]}], "hexpm", "bf020432c7d4feb7b3af16a0c2701455cbbbb95e5b6866132cb09eb0c29adc14"},

View file

@ -0,0 +1,9 @@
defmodule LiveBeats.Repo.Migrations.AddLyricsToSongs do
use Ecto.Migration
def change do
alter table(:songs) do
add :text_segments, {:array, :map}, null: false, default: []
end
end
end