Language detection

Signed-off-by: marcin mikołajczak <git@mkljczk.pl>
This commit is contained in:
marcin mikołajczak 2022-11-03 00:13:09 +01:00
parent 03d4e7eecc
commit 32994bb9c3
6 changed files with 144 additions and 7 deletions

View file

@ -3523,5 +3523,27 @@ config :pleroma, :config_description, [
suggestion: [100_000]
}
]
},
%{
group: :pleroma,
key: Pleroma.Language.LanguageDetector,
type: :group,
description: "Language detection providers",
children: [
%{
key: :provider,
type: :module,
suggestions: [
Pleroma.Language.LanguageDetector.Fasttext
]
},
%{
group: {:subgroup, Pleroma.Language.LanguageDetector.Fasttext},
key: :model,
label: "fastText language detection model",
type: :string,
suggestions: ["/usr/share/fasttext/lid.176.bin"]
}
]
}
]

View file

@ -188,7 +188,27 @@ defmodule Pleroma.ApplicationRequirements do
false
end
if Enum.all?([preview_proxy_commands_status | filter_commands_statuses], & &1) do
language_detector_commands_status =
if Pleroma.Language.LanguageDetector.missing_dependencies() == [] do
true
else
Logger.error(
"The following dependencies required by the currently enabled " <>
"language detection provider are not installed: " <>
inspect(Pleroma.Language.LanguageDetector.missing_dependencies())
)
false
end
if Enum.all?(
[
preview_proxy_commands_status,
language_detector_commands_status
| filter_commands_statuses
],
& &1
) do
:ok
else
{:error,

View file

@ -0,0 +1,34 @@
# Pleroma: A lightweight social networking server
# Copyright © 2017-2022 Pleroma Authors <https://pleroma.social/>
# SPDX-License-Identifier: AGPL-3.0-only
defmodule Pleroma.Language.LanguageDetector do
@words_threshold 4
def missing_dependencies do
provider = get_provider()
if provider do
provider.missing_dependencies()
else
nil
end
end
def detect(text) do
provider = get_provider()
{:ok, text} = text |> FastSanitize.strip_tags()
word_count = text |> String.split(~r/\s+/) |> Enum.count()
if word_count < @words_threshold or !provider or !provider.configured? do
nil
else
provider.detect(text)
end
end
defp get_provider() do
Pleroma.Config.get([__MODULE__, :provider])
end
end

View file

@ -0,0 +1,47 @@
# Pleroma: A lightweight social networking server
# Copyright © 2017-2022 Pleroma Authors <https://pleroma.social/>
# SPDX-License-Identifier: AGPL-3.0-only
defmodule Pleroma.Language.LanguageDetector.Fasttext do
import Pleroma.Web.Utils.Guards, only: [not_empty_string: 1]
alias Pleroma.Language.LanguageDetector.Provider
@behaviour Provider
@impl Provider
def missing_dependencies do
if Pleroma.Utils.command_available?("fasttext") do
[]
else
["fasttext"]
end
end
@impl Provider
def configured?, do: not_empty_string(get_model())
@impl Provider
def detect(text) do
text_path = Path.join(System.tmp_dir!(), "fasttext-#{Ecto.UUID.generate()}")
File.write(text_path, text)
detected_language =
case System.cmd("fasttext", ["predict", get_model(), text_path]) do
{"__label__" <> language, _} ->
language |> String.trim()
_ ->
nil
end
File.rm(text_path)
detected_language
end
defp get_model do
Pleroma.Config.get([__MODULE__, :model])
end
end

View file

@ -0,0 +1,11 @@
# Pleroma: A lightweight social networking server
# Copyright © 2017-2022 Pleroma Authors <https://pleroma.social/>
# SPDX-License-Identifier: AGPL-3.0-only
defmodule Pleroma.Language.LanguageDetector.Provider do
@callback missing_dependencies() :: [String.t()]
@callback configured?() :: boolean()
@callback detect(text :: String.t()) :: String.t() | nil
end

View file

@ -5,6 +5,7 @@
defmodule Pleroma.Web.CommonAPI.ActivityDraft do
alias Pleroma.Activity
alias Pleroma.Conversation.Participation
alias Pleroma.Language.LanguageDetector
alias Pleroma.Object
alias Pleroma.Web.ActivityPub.Builder
alias Pleroma.Web.ActivityPub.Visibility
@ -241,13 +242,15 @@ defmodule Pleroma.Web.CommonAPI.ActivityDraft do
end
defp language(draft) do
language = draft.params[:language]
language =
with language <- draft.params[:language],
true <- good_locale_code?(language) do
language
else
_ -> LanguageDetector.detect(draft.full_payload)
end
if good_locale_code?(language) do
%__MODULE__{draft | language: language}
else
draft
end
%__MODULE__{draft | language: language}
end
defp object(draft) do