Pleroma.Web.RichMedia.Parser: Remove test-specific codepaths

Also consolidate Tesla mocks into the HttpRequestMock module.

Tests were not exercising the real codepaths. The Rich Media Preview only works with https, but most of these tests were only mocking http.
This commit is contained in:
Mark Felder 2024-02-06 14:34:59 -05:00
parent e957362779
commit 6b7b443ff9
10 changed files with 144 additions and 153 deletions

View file

@ -8,10 +8,13 @@ defmodule Pleroma.Caching do
@callback put(Cachex.cache(), any(), any(), Keyword.t()) :: {Cachex.status(), boolean()}
@callback put(Cachex.cache(), any(), any()) :: {Cachex.status(), boolean()}
@callback fetch!(Cachex.cache(), any(), function() | nil) :: any()
@callback fetch(Cachex.cache(), any(), function() | nil) ::
{atom(), any()} | {atom(), any(), any()}
# @callback del(Cachex.cache(), any(), Keyword.t()) :: {Cachex.status(), boolean()}
@callback del(Cachex.cache(), any()) :: {Cachex.status(), boolean()}
@callback stream!(Cachex.cache(), any()) :: Enumerable.t()
@callback expire_at(Cachex.cache(), binary(), number()) :: {Cachex.status(), boolean()}
@callback expire(Cachex.cache(), binary(), number()) :: {Cachex.status(), boolean()}
@callback exists?(Cachex.cache(), any()) :: {Cachex.status(), boolean()}
@callback execute!(Cachex.cache(), function()) :: any()
@callback get_and_update(Cachex.cache(), any(), function()) ::

View file

@ -13,70 +13,65 @@ defmodule Pleroma.Web.RichMedia.Parser do
def parse(nil), do: {:error, "No URL provided"}
if Pleroma.Config.get(:env) == :test do
@spec parse(String.t()) :: {:ok, map()} | {:error, any()}
def parse(url), do: parse_url(url)
else
@spec parse(String.t()) :: {:ok, map()} | {:error, any()}
def parse(url) do
with {:ok, data} <- get_cached_or_parse(url),
{:ok, _} <- set_ttl_based_on_image(data, url) do
{:ok, data}
end
@spec parse(String.t()) :: {:ok, map()} | {:error, any()}
def parse(url) do
with {:ok, data} <- get_cached_or_parse(url),
{:ok, _} <- set_ttl_based_on_image(data, url) do
{:ok, data}
end
end
defp get_cached_or_parse(url) do
case @cachex.fetch(:rich_media_cache, url, fn ->
case parse_url(url) do
{:ok, _} = res ->
{:commit, res}
defp get_cached_or_parse(url) do
case @cachex.fetch(:rich_media_cache, url, fn ->
case parse_url(url) do
{:ok, _} = res ->
{:commit, res}
{:error, reason} = e ->
# Unfortunately we have to log errors here, instead of doing that
# along with ttl setting at the bottom. Otherwise we can get log spam
# if more than one process was waiting for the rich media card
# while it was generated. Ideally we would set ttl here as well,
# so we don't override it number_of_waiters_on_generation
# times, but one, obviously, can't set ttl for not-yet-created entry
# and Cachex doesn't support returning ttl from the fetch callback.
log_error(url, reason)
{:commit, e}
end
end) do
{action, res} when action in [:commit, :ok] ->
case res do
{:ok, _data} = res ->
res
{:error, reason} = e ->
# Unfortunately we have to log errors here, instead of doing that
# along with ttl setting at the bottom. Otherwise we can get log spam
# if more than one process was waiting for the rich media card
# while it was generated. Ideally we would set ttl here as well,
# so we don't override it number_of_waiters_on_generation
# times, but one, obviously, can't set ttl for not-yet-created entry
# and Cachex doesn't support returning ttl from the fetch callback.
log_error(url, reason)
{:commit, e}
end
end) do
{action, res} when action in [:commit, :ok] ->
case res do
{:ok, _data} = res ->
res
{:error, reason} = e ->
if action == :commit, do: set_error_ttl(url, reason)
e
end
{:error, reason} = e ->
if action == :commit, do: set_error_ttl(url, reason)
e
end
{:error, e} ->
{:error, {:cachex_error, e}}
end
{:error, e} ->
{:error, {:cachex_error, e}}
end
end
defp set_error_ttl(_url, :body_too_large), do: :ok
defp set_error_ttl(_url, {:content_type, _}), do: :ok
defp set_error_ttl(_url, :body_too_large), do: :ok
defp set_error_ttl(_url, {:content_type, _}), do: :ok
# The TTL is not set for the errors above, since they are unlikely to change
# with time
# The TTL is not set for the errors above, since they are unlikely to change
# with time
defp set_error_ttl(url, _reason) do
ttl = Pleroma.Config.get([:rich_media, :failure_backoff], 60_000)
@cachex.expire(:rich_media_cache, url, ttl)
:ok
end
defp set_error_ttl(url, _reason) do
ttl = Pleroma.Config.get([:rich_media, :failure_backoff], 60_000)
@cachex.expire(:rich_media_cache, url, ttl)
:ok
end
defp log_error(url, {:invalid_metadata, data}) do
Logger.debug(fn -> "Incomplete or invalid metadata for #{url}: #{inspect(data)}" end)
end
defp log_error(url, {:invalid_metadata, data}) do
Logger.debug(fn -> "Incomplete or invalid metadata for #{url}: #{inspect(data)}" end)
end
defp log_error(url, reason) do
Logger.warning(fn -> "Rich media error for #{url}: #{inspect(reason)}" end)
end
defp log_error(url, reason) do
Logger.warning(fn -> "Rich media error for #{url}: #{inspect(reason)}" end)
end
@doc """

View file

@ -1,3 +1,3 @@
<link rel="alternate" type="application/json+oembed"
href="http://example.com/oembed.json"
href="https://example.com/oembed.json"
title="Bacon Lollys oEmbed Profile" />

View file

@ -336,13 +336,7 @@ defmodule Pleroma.Web.MastodonAPI.StatusControllerTest do
path -> Pleroma.Test.StaticConfig.get(path)
end)
Tesla.Mock.mock(fn
%{
method: :get,
url: "https://example.com/twitter-card"
} ->
%Tesla.Env{status: 200, body: File.read!("test/fixtures/rich_media/twitter_card.html")}
Tesla.Mock.mock_global(fn
env ->
apply(HttpRequestMock, :request, [env])
end)

View file

@ -49,6 +49,7 @@ defmodule Pleroma.Web.PleromaAPI.ChatMessageReferenceViewTest do
:chat_message_id_idempotency_key_cache, ^id -> {:ok, "123"}
cache, key -> NullCache.get(cache, key)
end)
|> stub(:fetch, fn :rich_media_cache, _, _ -> {:ok, {:ok, %{}}} end)
chat_message = MessageReferenceView.render("show.json", chat_message_reference: cm_ref)

View file

@ -3,7 +3,7 @@
# SPDX-License-Identifier: AGPL-3.0-only
defmodule Pleroma.Web.RichMedia.HelpersTest do
use Pleroma.DataCase, async: true
use Pleroma.DataCase, async: false
alias Pleroma.StaticStubbedConfigMock, as: ConfigMock
alias Pleroma.Web.CommonAPI
@ -14,7 +14,7 @@ defmodule Pleroma.Web.RichMedia.HelpersTest do
import Tesla.Mock
setup do
mock(fn env -> apply(HttpRequestMock, :request, [env]) end)
mock_global(fn env -> apply(HttpRequestMock, :request, [env]) end)
ConfigMock
|> stub(:get, fn

View file

@ -3,95 +3,26 @@
# SPDX-License-Identifier: AGPL-3.0-only
defmodule Pleroma.Web.RichMedia.ParserTest do
use ExUnit.Case, async: true
use Pleroma.DataCase, async: false
alias Pleroma.Web.RichMedia.Parser
import Tesla.Mock
setup do
Tesla.Mock.mock(fn
%{
method: :get,
url: "http://example.com/ogp"
} ->
%Tesla.Env{status: 200, body: File.read!("test/fixtures/rich_media/ogp.html")}
%{
method: :get,
url: "http://example.com/non-ogp"
} ->
%Tesla.Env{status: 200, body: File.read!("test/fixtures/rich_media/non_ogp_embed.html")}
%{
method: :get,
url: "http://example.com/ogp-missing-title"
} ->
%Tesla.Env{
status: 200,
body: File.read!("test/fixtures/rich_media/ogp-missing-title.html")
}
%{
method: :get,
url: "http://example.com/twitter-card"
} ->
%Tesla.Env{status: 200, body: File.read!("test/fixtures/rich_media/twitter_card.html")}
%{
method: :get,
url: "http://example.com/oembed"
} ->
%Tesla.Env{status: 200, body: File.read!("test/fixtures/rich_media/oembed.html")}
%{
method: :get,
url: "http://example.com/oembed.json"
} ->
%Tesla.Env{status: 200, body: File.read!("test/fixtures/rich_media/oembed.json")}
%{method: :get, url: "http://example.com/empty"} ->
%Tesla.Env{status: 200, body: "hello"}
%{method: :get, url: "http://example.com/malformed"} ->
%Tesla.Env{status: 200, body: File.read!("test/fixtures/rich_media/malformed-data.html")}
%{method: :get, url: "http://example.com/error"} ->
{:error, :overload}
%{
method: :head,
url: "http://example.com/huge-page"
} ->
%Tesla.Env{
status: 200,
headers: [{"content-length", "2000001"}, {"content-type", "text/html"}]
}
%{
method: :head,
url: "http://example.com/pdf-file"
} ->
%Tesla.Env{
status: 200,
headers: [{"content-length", "1000000"}, {"content-type", "application/pdf"}]
}
%{method: :head} ->
%Tesla.Env{status: 404, body: "", headers: []}
end)
:ok
mock_global(fn env -> apply(HttpRequestMock, :request, [env]) end)
end
test "returns error when no metadata present" do
assert {:error, _} = Parser.parse("http://example.com/empty")
assert {:error, _} = Parser.parse("https://example.com/empty")
end
test "doesn't just add a title" do
assert {:error, {:invalid_metadata, _}} = Parser.parse("http://example.com/non-ogp")
assert {:error, {:invalid_metadata, _}} = Parser.parse("https://example.com/non-ogp")
end
test "parses ogp" do
assert Parser.parse("http://example.com/ogp") ==
assert Parser.parse("https://example.com/ogp") ==
{:ok,
%{
"image" => "http://ia.media-imdb.com/images/rock.jpg",
@ -99,12 +30,12 @@ defmodule Pleroma.Web.RichMedia.ParserTest do
"description" =>
"Directed by Michael Bay. With Sean Connery, Nicolas Cage, Ed Harris, John Spencer.",
"type" => "video.movie",
"url" => "http://example.com/ogp"
"url" => "https://example.com/ogp"
}}
end
test "falls back to <title> when ogp:title is missing" do
assert Parser.parse("http://example.com/ogp-missing-title") ==
assert Parser.parse("https://example.com/ogp-missing-title") ==
{:ok,
%{
"image" => "http://ia.media-imdb.com/images/rock.jpg",
@ -112,12 +43,12 @@ defmodule Pleroma.Web.RichMedia.ParserTest do
"description" =>
"Directed by Michael Bay. With Sean Connery, Nicolas Cage, Ed Harris, John Spencer.",
"type" => "video.movie",
"url" => "http://example.com/ogp-missing-title"
"url" => "https://example.com/ogp-missing-title"
}}
end
test "parses twitter card" do
assert Parser.parse("http://example.com/twitter-card") ==
assert Parser.parse("https://example.com/twitter-card") ==
{:ok,
%{
"card" => "summary",
@ -125,12 +56,12 @@ defmodule Pleroma.Web.RichMedia.ParserTest do
"image" => "https://farm6.staticflickr.com/5510/14338202952_93595258ff_z.jpg",
"title" => "Small Island Developing States Photo Submission",
"description" => "View the album on Flickr.",
"url" => "http://example.com/twitter-card"
"url" => "https://example.com/twitter-card"
}}
end
test "parses OEmbed and filters HTML tags" do
assert Parser.parse("http://example.com/oembed") ==
assert Parser.parse("https://example.com/oembed") ==
{:ok,
%{
"author_name" => "\u202E\u202D\u202Cbees\u202C",
@ -150,7 +81,7 @@ defmodule Pleroma.Web.RichMedia.ParserTest do
"thumbnail_width" => 150,
"title" => "Bacon Lollys",
"type" => "photo",
"url" => "http://example.com/oembed",
"url" => "https://example.com/oembed",
"version" => "1.0",
"web_page" => "https://www.flickr.com/photos/bees/2362225867/",
"web_page_short_url" => "https://flic.kr/p/4AK2sc",
@ -159,18 +90,18 @@ defmodule Pleroma.Web.RichMedia.ParserTest do
end
test "rejects invalid OGP data" do
assert {:error, _} = Parser.parse("http://example.com/malformed")
assert {:error, _} = Parser.parse("https://example.com/malformed")
end
test "returns error if getting page was not successful" do
assert {:error, :overload} = Parser.parse("http://example.com/error")
assert {:error, :overload} = Parser.parse("https://example.com/error")
end
test "does a HEAD request to check if the body is too large" do
assert {:error, :body_too_large} = Parser.parse("http://example.com/huge-page")
assert {:error, :body_too_large} = Parser.parse("https://example.com/huge-page")
end
test "does a HEAD request to check if the body is html" do
assert {:error, {:content_type, _}} = Parser.parse("http://example.com/pdf-file")
assert {:error, {:content_type, _}} = Parser.parse("https://example.com/pdf-file")
end
end

View file

@ -26,9 +26,15 @@ defmodule Pleroma.CachexProxy do
@impl true
defdelegate fetch!(cache, key, func), to: Cachex
@impl true
defdelegate fetch(cache, key, func), to: Cachex
@impl true
defdelegate expire_at(cache, str, num), to: Cachex
@impl true
defdelegate expire(cache, str, num), to: Cachex
@impl true
defdelegate exists?(cache, key), to: Cachex

View file

@ -1059,7 +1059,7 @@ defmodule HttpRequestMock do
}}
end
def get("http://example.com/malformed", _, _, _) do
def get("https://example.com/malformed", _, _, _) do
{:ok,
%Tesla.Env{status: 200, body: File.read!("test/fixtures/rich_media/malformed-data.html")}}
end
@ -1472,6 +1472,37 @@ defmodule HttpRequestMock do
{:ok, %Tesla.Env{status: 200, body: File.read!("test/fixtures/rich_media/yahoo.html")}}
end
def get("https://example.com/error", _, _, _), do: {:error, :overload}
def get("https://example.com/ogp-missing-title", _, _, _) do
{:ok,
%Tesla.Env{
status: 200,
body: File.read!("test/fixtures/rich_media/ogp-missing-title.html")
}}
end
def get("https://example.com/oembed", _, _, _) do
{:ok, %Tesla.Env{status: 200, body: File.read!("test/fixtures/rich_media/oembed.html")}}
end
def get("https://example.com/oembed.json", _, _, _) do
{:ok, %Tesla.Env{status: 200, body: File.read!("test/fixtures/rich_media/oembed.json")}}
end
def get("https://example.com/twitter-card", _, _, _) do
{:ok, %Tesla.Env{status: 200, body: File.read!("test/fixtures/rich_media/twitter_card.html")}}
end
def get("https://example.com/non-ogp", _, _, _) do
{:ok,
%Tesla.Env{status: 200, body: File.read!("test/fixtures/rich_media/non_ogp_embed.html")}}
end
def get("https://example.com/empty", _, _, _) do
{:ok, %Tesla.Env{status: 200, body: "hello"}}
end
def get(url, query, body, headers) do
{:error,
"Mock response not implemented for GET #{inspect(url)}, #{query}, #{inspect(body)}, #{inspect(headers)}"}
@ -1545,17 +1576,41 @@ defmodule HttpRequestMock do
# Most of the rich media mocks are missing HEAD requests, so we just return 404.
@rich_media_mocks [
"https://example.com/empty",
"https://example.com/error",
"https://example.com/malformed",
"https://example.com/non-ogp",
"https://example.com/oembed",
"https://example.com/oembed.json",
"https://example.com/ogp",
"https://example.com/ogp-missing-data",
"https://example.com/ogp-missing-title",
"https://example.com/twitter-card",
"https://google.com/",
"https://yahoo.com/",
"https://pleroma.local/notice/9kCP7V"
"https://pleroma.local/notice/9kCP7V",
"https://yahoo.com/"
]
def head(url, _query, _body, _headers) when url in @rich_media_mocks do
{:ok, %Tesla.Env{status: 404, body: ""}}
end
def head("https://example.com/pdf-file", _, _, _) do
{:ok,
%Tesla.Env{
status: 200,
headers: [{"content-length", "1000000"}, {"content-type", "application/pdf"}]
}}
end
def head("https://example.com/huge-page", _, _, _) do
{:ok,
%Tesla.Env{
status: 200,
headers: [{"content-length", "2000001"}, {"content-type", "text/html"}]
}}
end
def head(url, query, body, headers) do
{:error,
"Mock response not implemented for HEAD #{inspect(url)}, #{query}, #{inspect(body)}, #{inspect(headers)}"}

View file

@ -28,6 +28,9 @@ defmodule Pleroma.NullCache do
end
end
@impl true
def fetch(_, key, func), do: func.(key)
@impl true
def get_and_update(_, _, func) do
func.(nil)
@ -36,6 +39,9 @@ defmodule Pleroma.NullCache do
@impl true
def expire_at(_, _, _), do: {:ok, true}
@impl true
def expire(_, _, _), do: {:ok, true}
@impl true
def exists?(_, _), do: {:ok, false}