Skip to content

Commit

Permalink
use xav instead of ffmpeg
Browse files Browse the repository at this point in the history
  • Loading branch information
kevinschweikert committed Oct 22, 2024
1 parent 94fa47a commit 1b6e128
Show file tree
Hide file tree
Showing 3 changed files with 25 additions and 45 deletions.
64 changes: 21 additions & 43 deletions lib/bumblebee/audio/speech_to_text_whisper.ex
Original file line number Diff line number Diff line change
Expand Up @@ -136,7 +136,7 @@ defmodule Bumblebee.Audio.SpeechToTextWhisper do
{:ok, [Nx.backend_transfer(input, Nx.BinaryBackend)]}

{:file, path} when is_binary(path) ->
ffmpeg_read_as_pcm(path, sampling_rate)
from_file(path, sampling_rate)

other ->
cond do
Expand Down Expand Up @@ -164,49 +164,27 @@ defmodule Bumblebee.Audio.SpeechToTextWhisper do
end
end

defp ffmpeg_read_as_pcm(path, sampling_rate) do
channels = 1
defp from_file(path, sampling_rate) do
# This chunk can be of arbitrary size, the serving accumulates
# and overlaps chunks internally as needed.

format =
case System.endianness() do
:little -> "f32le"
:big -> "f32be"
end

cond do
System.find_executable("ffmpeg") == nil ->
{:error, "ffmpeg not found in PATH"}

not File.exists?(path) ->
{:error, "no file found at #{path}"}

true ->
# This chunk can be of arbitrary size, the serving accumulates
# and overlaps chunks internally as needed. We read the file
# as stream to reduce memory usage
chunk_size = 30

stream =
Stream.iterate(0, fn offset -> offset + chunk_size end)
|> Stream.transform({}, fn offset, acc ->
System.cmd(
"ffmpeg",
~w[-ss #{offset} -t #{chunk_size} -i #{path} -ac #{channels} -ar #{sampling_rate} -f #{format} -hide_banner -loglevel quiet pipe:1]
)
|> case do
{<<>>, 0} ->
{:halt, acc}

{data, 0} ->
chunk = Nx.from_binary(data, :f32, backend: Nx.BinaryBackend)
{[chunk], acc}

{_, 1} ->
raise "ffmpeg failed to decode the given file"
end
end)

{:ok, stream}
if File.exists?(path) do
stream =
path
|> Xav.Reader.stream!(
read: :audio,
out_format: :f32,
out_channels: 1,
out_sample_rate: sampling_rate
)
|> Stream.map(fn frame -> Xav.Frame.to_nx(frame) end)
|> Stream.chunk_every(1000)
|> Stream.map(&Nx.Batch.concatenate/1)
|> Stream.map(fn batch -> Nx.Defn.jit_apply(&Function.identity/1, [batch]) end)

{:ok, stream}
else
{:error, "no file found at #{path}"}
end
end

Expand Down
5 changes: 3 additions & 2 deletions mix.exs
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ defmodule Bumblebee.MixProject do
{:axon, "~> 0.7.0"},
# {:axon, github: "elixir-nx/axon", override: true},
{:tokenizers, "~> 0.4"},
{:nx, "~> 0.9.0"},
{:nx, "~> 0.9.0", override: true},
{:exla, ">= 0.0.0", only: [:dev, :test]},
{:torchx, ">= 0.0.0", only: [:dev, :test]},
# {:nx, github: "elixir-nx/nx", sparse: "nx", override: true},
Expand All @@ -49,7 +49,8 @@ defmodule Bumblebee.MixProject do
{:stb_image, "~> 0.6.0", only: :test},
{:bypass, "~> 2.1", only: :test},
{:ex_doc, "~> 0.28", only: :dev, runtime: false},
{:nx_signal, "~> 0.2.0"}
{:nx_signal, "~> 0.2.0"},
{:xav, "~> 0.6.0"}
]
end

Expand Down
1 change: 1 addition & 0 deletions mix.lock
Original file line number Diff line number Diff line change
Expand Up @@ -36,5 +36,6 @@
"torchx": {:hex, :torchx, "0.9.0", "936cbd32233f89d73700c39b7ef56f94b3f3541db03c90f8ddf6b3fe73260e28", [:mix], [{:nx, "~> 0.9.0", [hex: :nx, repo: "hexpm", optional: false]}], "hexpm", "4e057d6b93fc91191957230b2c61c408861b888abdf6a900baf0db4125405505"},
"unpickler": {:hex, :unpickler, "0.1.0", "c2262c0819e6985b761e7107546cef96a485f401816be5304a65fdd200d5bd6a", [:mix], [], "hexpm", "e2b3f61e62406187ac52afead8a63bfb4e49394028993f3c4c42712743cab79e"},
"unzip": {:hex, :unzip, "0.12.0", "beed92238724732418b41eba77dcb7f51e235b707406c05b1732a3052d1c0f36", [:mix], [], "hexpm", "95655b72db368e5a84951f0bed586ac053b55ee3815fd96062fce10ce4fc998d"},
"xav": {:hex, :xav, "0.6.0", "38835d735fc3d620e41c84fe29cd7db0381436b54c9ef209ba9112255a091fc4", [:make, :mix], [{:elixir_make, "~> 0.7", [hex: :elixir_make, repo: "hexpm", optional: false]}, {:nx, "~> 0.7.0", [hex: :nx, repo: "hexpm", optional: true]}], "hexpm", "ad988df9d44c0ca3ccc4053ea0f1f1702ca14d4b926047b224deb527f0866edf"},
"xla": {:hex, :xla, "0.8.0", "fef314d085dd3ee16a0816c095239938f80769150e15db16dfaa435553d7cb16", [:make, :mix], [{:elixir_make, "~> 0.4", [hex: :elixir_make, repo: "hexpm", optional: false]}], "hexpm", "739c61c8d93b97e12ba0369d10e76130224c208f1a76ad293e3581f056833e57"},
}

0 comments on commit 1b6e128

Please sign in to comment.