Generate post thumbnail/metadata in background (ref #4529) (#4564)

* Generate post thumbnail/metadata in background (ref #4529)

* fix api test

* Apply suggestions from code review

Co-authored-by: SleeplessOne1917 <28871516+SleeplessOne1917@users.noreply.github.com>

* fix test

---------

Co-authored-by: SleeplessOne1917 <28871516+SleeplessOne1917@users.noreply.github.com>
This commit is contained in:
Nutomic 2024-03-27 15:54:42 +01:00 committed by GitHub
parent a632a86852
commit a4b79ca610
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
6 changed files with 135 additions and 112 deletions

View file

@ -17,13 +17,16 @@ import {
deleteAllImages,
delta,
epsilon,
followCommunity,
gamma,
getSite,
imageFetchLimit,
registerUser,
resolveBetaCommunity,
resolveCommunity,
resolvePost,
setupLogins,
waitForPost,
unfollows,
} from "./shared";
const downloadFileSync = require("download-file-sync");
@ -209,6 +212,11 @@ test("Images in remote post are proxied if setting enabled", async () => {
test("No image proxying if setting is disabled", async () => {
let user = await registerUser(beta, betaUrl);
let community = await createCommunity(alpha);
let betaCommunity = await resolveCommunity(
beta,
community.community_view.community.actor_id,
);
await followCommunity(beta, true, betaCommunity.community!.community.id);
const upload_form: UploadImage = {
image: Buffer.from("test"),
@ -228,15 +236,19 @@ test("No image proxying if setting is disabled", async () => {
).toBeTruthy();
expect(post.post_view.post.body).toBe("![](http://example.com/image2.png)");
let gammaPost = await resolvePost(delta, post.post_view.post);
expect(gammaPost.post).toBeDefined();
let betaPost = await waitForPost(
beta,
post.post_view.post,
res => res?.post.alt_text != null,
);
expect(betaPost.post).toBeDefined();
// remote image doesnt get proxied after federation
expect(
gammaPost.post!.post.url?.startsWith("http://127.0.0.1:8551/pictrs/image/"),
betaPost.post.url?.startsWith("http://127.0.0.1:8551/pictrs/image/"),
).toBeTruthy();
expect(gammaPost.post!.post.body).toBe("![](http://example.com/image2.png)");
expect(betaPost.post.body).toBe("![](http://example.com/image2.png)");
// Make sure the alt text got federated
expect(post.post_view.post.alt_text).toBe(gammaPost.post!.post.alt_text);
expect(post.post_view.post.alt_text).toBe(betaPost.post.alt_text);
});

View file

@ -55,7 +55,18 @@ afterAll(() => {
unfollows();
});
function assertPostFederation(postOne?: PostView, postTwo?: PostView) {
async function assertPostFederation(postOne: PostView, postTwo: PostView) {
// Link metadata is generated in background task and may not be ready yet at this time,
// so wait for it explicitly. For removed posts we cant refetch anything.
postOne = await waitForPost(beta, postOne.post, res => {
return res === null || res?.post.embed_title !== null;
});
postTwo = await waitForPost(
beta,
postTwo.post,
res => res === null || res?.post.embed_title !== null,
);
expect(postOne?.post.ap_id).toBe(postTwo?.post.ap_id);
expect(postOne?.post.name).toBe(postTwo?.post.name);
expect(postOne?.post.body).toBe(postTwo?.post.body);
@ -109,7 +120,7 @@ test("Create a post", async () => {
expect(betaPost?.community.local).toBe(true);
expect(betaPost?.creator.local).toBe(false);
expect(betaPost?.counts.score).toBe(1);
assertPostFederation(betaPost, postRes.post_view);
await assertPostFederation(betaPost, postRes.post_view);
// Delta only follows beta, so it should not see an alpha ap_id
await expect(
@ -157,7 +168,7 @@ test("Unlike a post", async () => {
expect(betaPost?.community.local).toBe(true);
expect(betaPost?.creator.local).toBe(false);
expect(betaPost?.counts.score).toBe(0);
assertPostFederation(betaPost, postRes.post_view);
await assertPostFederation(betaPost, postRes.post_view);
});
test("Update a post", async () => {
@ -178,7 +189,7 @@ test("Update a post", async () => {
expect(betaPost.community.local).toBe(true);
expect(betaPost.creator.local).toBe(false);
expect(betaPost.post.name).toBe(updatedName);
assertPostFederation(betaPost, updatedPost.post_view);
await assertPostFederation(betaPost, updatedPost.post_view);
// Make sure lemmy beta cannot update the post
await expect(editPost(beta, betaPost.post)).rejects.toStrictEqual(
@ -329,7 +340,7 @@ test("Delete a post", async () => {
throw "Missing beta post 2";
}
expect(betaPost2.post.deleted).toBe(false);
assertPostFederation(betaPost2, undeletedPost.post_view);
await assertPostFederation(betaPost2, undeletedPost.post_view);
// Make sure lemmy beta cannot delete the post
await expect(deletePost(beta, true, betaPost2.post)).rejects.toStrictEqual(
@ -372,7 +383,7 @@ test("Remove a post from admin and community on different instance", async () =>
// Make sure lemmy beta sees post is undeleted
let betaPost2 = (await resolvePost(beta, postRes.post_view.post)).post;
expect(betaPost2?.post.removed).toBe(false);
assertPostFederation(betaPost2, undeletedPost.post_view);
await assertPostFederation(betaPost2!, undeletedPost.post_view);
});
test("Remove a post from admin and community on same instance", async () => {
@ -403,7 +414,7 @@ test("Remove a post from admin and community on same instance", async () => {
p => p?.post_view.post.removed ?? false,
);
expect(alphaPost?.post_view.post.removed).toBe(true);
assertPostFederation(alphaPost.post_view, removePostRes.post_view);
await assertPostFederation(alphaPost.post_view, removePostRes.post_view);
// Undelete
let undeletedPost = await removePost(beta, false, betaPost.post);
@ -416,7 +427,7 @@ test("Remove a post from admin and community on same instance", async () => {
p => !!p && !p.post.removed,
);
expect(alphaPost2.post.removed).toBe(false);
assertPostFederation(alphaPost2, undeletedPost.post_view);
await assertPostFederation(alphaPost2, undeletedPost.post_view);
await unfollowRemotes(alpha);
});

View file

@ -1,17 +1,24 @@
use crate::{
context::LemmyContext,
lemmy_db_schema::traits::Crud,
post::{LinkMetadata, OpenGraphData},
utils::proxy_image_link,
send_activity::{ActivityChannel, SendActivityData},
utils::{local_site_opt_to_sensitive, proxy_image_link, proxy_image_link_opt_apub},
};
use activitypub_federation::config::Data;
use encoding::{all::encodings, DecoderTrap};
use lemmy_db_schema::{
newtypes::DbUrl,
source::images::{LocalImage, LocalImageForm},
source::{
images::{LocalImage, LocalImageForm},
local_site::LocalSite,
post::{Post, PostUpdateForm},
},
};
use lemmy_utils::{
error::{LemmyError, LemmyErrorType},
settings::structs::{PictrsImageMode, Settings},
spawn_try_task,
version::VERSION,
REQWEST_TIMEOUT,
};
@ -83,6 +90,50 @@ pub async fn fetch_link_metadata_opt(
_ => Default::default(),
}
}
/// Generate post thumbnail in background task, because some sites can be very slow to respond.
///
/// Takes a callback to generate a send activity task, so that post can be federated with metadata.
pub fn generate_post_link_metadata(
post: Post,
custom_thumbnail: Option<Url>,
send_activity: impl FnOnce(Post) -> Option<SendActivityData> + Send + 'static,
local_site: Option<LocalSite>,
context: Data<LemmyContext>,
) {
spawn_try_task(async move {
let allow_sensitive = local_site_opt_to_sensitive(&local_site);
let page_is_sensitive = post.nsfw;
let allow_generate_thumbnail = allow_sensitive || !page_is_sensitive;
let mut thumbnail_url = custom_thumbnail.or_else(|| post.thumbnail_url.map(Into::into));
let do_generate_thumbnail = thumbnail_url.is_none() && allow_generate_thumbnail;
// Generate local thumbnail only if no thumbnail was federated and 'sensitive' attributes allow it.
let metadata = fetch_link_metadata_opt(
post.url.map(Into::into).as_ref(),
do_generate_thumbnail,
&context,
)
.await;
if let Some(thumbnail_url_) = metadata.thumbnail {
thumbnail_url = Some(thumbnail_url_.into());
}
let thumbnail_url = proxy_image_link_opt_apub(thumbnail_url, &context).await?;
let form = PostUpdateForm {
embed_title: Some(metadata.opengraph_data.title),
embed_description: Some(metadata.opengraph_data.description),
embed_video_url: Some(metadata.opengraph_data.embed_video_url),
thumbnail_url: Some(thumbnail_url),
url_content_type: Some(metadata.content_type),
..Default::default()
};
let updated_post = Post::update(&mut context.pool(), post.id, &form).await?;
if let Some(send_activity) = send_activity(updated_post) {
ActivityChannel::submit_activity(send_activity, &context).await?;
}
Ok(())
});
}
/// Extract site metadata from HTML Opengraph attributes.
fn extract_opengraph_data(html_bytes: &[u8], url: &Url) -> Result<OpenGraphData, LemmyError> {

View file

@ -4,8 +4,8 @@ use lemmy_api_common::{
build_response::build_post_response,
context::LemmyContext,
post::{CreatePost, PostResponse},
request::fetch_link_metadata_opt,
send_activity::{ActivityChannel, SendActivityData},
request::generate_post_link_metadata,
send_activity::SendActivityData,
utils::{
check_community_user_action,
generate_local_apub_endpoint,
@ -75,6 +75,7 @@ pub async fn create_post(
is_url_blocked(&url, &url_blocklist)?;
check_url_scheme(&url)?;
check_url_scheme(&custom_thumbnail)?;
let url = proxy_image_link_opt_apub(url, &context).await?;
check_community_user_action(
&local_user_view.person,
@ -98,18 +99,6 @@ pub async fn create_post(
}
}
// Only generate the thumbnail if there's no custom thumbnail provided,
// otherwise it will save it in pictrs
let generate_thumbnail = custom_thumbnail.is_none();
// Fetch post links and pictrs cached image
let metadata = fetch_link_metadata_opt(url.as_ref(), generate_thumbnail, &context).await;
let url = proxy_image_link_opt_apub(url, &context).await?;
let thumbnail_url = proxy_image_link_opt_apub(custom_thumbnail, &context)
.await?
.map(Into::into)
.or(metadata.thumbnail);
// Only need to check if language is allowed in case user set it explicitly. When using default
// language, it already only returns allowed languages.
CommunityLanguage::is_allowed_community_language(
@ -134,18 +123,13 @@ pub async fn create_post(
let post_form = PostInsertForm::builder()
.name(data.name.trim().to_string())
.url_content_type(metadata.content_type)
.url(url)
.body(body)
.alt_text(data.alt_text.clone())
.community_id(data.community_id)
.creator_id(local_user_view.person.id)
.nsfw(data.nsfw)
.embed_title(metadata.opengraph_data.title)
.embed_description(metadata.opengraph_data.description)
.embed_video_url(metadata.opengraph_data.embed_video_url)
.language_id(language_id)
.thumbnail_url(thumbnail_url)
.build();
let inserted_post = Post::create(&mut context.pool(), &post_form)
@ -170,6 +154,14 @@ pub async fn create_post(
.await
.with_lemmy_type(LemmyErrorType::CouldntCreatePost)?;
generate_post_link_metadata(
updated_post.clone(),
custom_thumbnail,
|post| Some(SendActivityData::CreatePost(post)),
Some(local_site),
context.reset_request_count(),
);
// They like their own post by default
let person_id = local_user_view.person.id;
let post_id = inserted_post.id;
@ -183,9 +175,6 @@ pub async fn create_post(
.await
.with_lemmy_type(LemmyErrorType::CouldntLikePost)?;
ActivityChannel::submit_activity(SendActivityData::CreatePost(updated_post.clone()), &context)
.await?;
// Mark the post as read
mark_post_as_read(person_id, post_id, &mut context.pool()).await?;

View file

@ -4,8 +4,8 @@ use lemmy_api_common::{
build_response::build_post_response,
context::LemmyContext,
post::{EditPost, PostResponse},
request::fetch_link_metadata,
send_activity::{ActivityChannel, SendActivityData},
request::generate_post_link_metadata,
send_activity::SendActivityData,
utils::{
check_community_user_action,
get_url_blocklist,
@ -84,40 +84,11 @@ pub async fn update_post(
Err(LemmyErrorType::NoPostEditAllowed)?
}
// Fetch post links and thumbnail if url was updated
let (embed_title, embed_description, embed_video_url, metadata_thumbnail, metadata_content_type) =
match &url {
Some(url) => {
// Only generate the thumbnail if there's no custom thumbnail provided,
// otherwise it will save it in pictrs
let generate_thumbnail = custom_thumbnail.is_none() || orig_post.thumbnail_url.is_none();
let metadata = fetch_link_metadata(url, generate_thumbnail, &context).await?;
(
Some(metadata.opengraph_data.title),
Some(metadata.opengraph_data.description),
Some(metadata.opengraph_data.embed_video_url),
Some(metadata.thumbnail),
Some(metadata.content_type),
)
}
_ => Default::default(),
};
let url = match url {
Some(url) => Some(proxy_image_link_opt_apub(Some(url), &context).await?),
_ => Default::default(),
};
let custom_thumbnail = match custom_thumbnail {
Some(custom_thumbnail) => {
Some(proxy_image_link_opt_apub(Some(custom_thumbnail), &context).await?)
}
_ => Default::default(),
};
let thumbnail_url = custom_thumbnail.or(metadata_thumbnail);
let language_id = data.language_id;
CommunityLanguage::is_allowed_community_language(
&mut context.pool(),
@ -129,15 +100,10 @@ pub async fn update_post(
let post_form = PostUpdateForm {
name: data.name.clone(),
url,
url_content_type: metadata_content_type,
body: diesel_option_overwrite(body),
alt_text: diesel_option_overwrite(data.alt_text.clone()),
nsfw: data.nsfw,
embed_title,
embed_description,
embed_video_url,
language_id: data.language_id,
thumbnail_url,
updated: Some(Some(naive_now())),
..Default::default()
};
@ -147,7 +113,13 @@ pub async fn update_post(
.await
.with_lemmy_type(LemmyErrorType::CouldntUpdatePost)?;
ActivityChannel::submit_activity(SendActivityData::UpdatePost(updated_post), &context).await?;
generate_post_link_metadata(
updated_post.clone(),
custom_thumbnail,
|post| Some(SendActivityData::UpdatePost(post)),
Some(local_site),
context.reset_request_count(),
);
build_post_response(
context.deref(),

View file

@ -24,10 +24,9 @@ use chrono::{DateTime, Utc};
use html2text::{from_read_with_decorator, render::text_renderer::TrivialDecorator};
use lemmy_api_common::{
context::LemmyContext,
request::fetch_link_metadata_opt,
request::generate_post_link_metadata,
utils::{
get_url_blocklist,
local_site_opt_to_sensitive,
local_site_opt_to_slur_regex,
process_markdown_opt,
proxy_image_link_opt_apub,
@ -218,6 +217,7 @@ impl Object for ApubPost {
let old_post = page.id.dereference_local(context).await;
let first_attachment = page.attachment.first();
let local_site = LocalSite::read(&mut context.pool()).await.ok();
let form = if !page.is_mod_action(context).await? {
let url = if let Some(attachment) = first_attachment.cloned() {
@ -231,20 +231,8 @@ impl Object for ApubPost {
check_url_scheme(&url)?;
let alt_text = first_attachment.cloned().and_then(Attachment::alt_text);
let local_site = LocalSite::read(&mut context.pool()).await.ok();
let allow_sensitive = local_site_opt_to_sensitive(&local_site);
let page_is_sensitive = page.sensitive.unwrap_or(false);
let allow_generate_thumbnail = allow_sensitive || !page_is_sensitive;
let mut thumbnail_url = page.image.map(|i| i.url);
let do_generate_thumbnail = thumbnail_url.is_none() && allow_generate_thumbnail;
// Generate local thumbnail only if no thumbnail was federated and 'sensitive' attributes allow it.
let metadata = fetch_link_metadata_opt(url.as_ref(), do_generate_thumbnail, context).await;
if let Some(thumbnail_url_) = metadata.thumbnail {
thumbnail_url = Some(thumbnail_url_.into());
}
let url = proxy_image_link_opt_apub(url, context).await?;
let thumbnail_url = proxy_image_link_opt_apub(thumbnail_url, context).await?;
let slur_regex = &local_site_opt_to_slur_regex(&local_site);
let url_blocklist = get_url_blocklist(context).await?;
@ -254,30 +242,22 @@ impl Object for ApubPost {
let language_id =
LanguageTag::to_language_id_single(page.language, &mut context.pool()).await?;
PostInsertForm {
name,
url: url.map(Into::into),
body,
alt_text,
creator_id: creator.id,
community_id: community.id,
removed: None,
locked: page.comments_enabled.map(|e| !e),
published: page.published.map(Into::into),
updated: page.updated.map(Into::into),
deleted: Some(false),
nsfw: page.sensitive,
embed_title: metadata.opengraph_data.title,
embed_description: metadata.opengraph_data.description,
embed_video_url: metadata.opengraph_data.embed_video_url,
thumbnail_url,
ap_id: Some(page.id.clone().into()),
local: Some(false),
language_id,
featured_community: None,
featured_local: None,
url_content_type: metadata.content_type,
}
PostInsertForm::builder()
.name(name)
.url(url.map(Into::into))
.body(body)
.alt_text(alt_text)
.creator_id(creator.id)
.community_id(community.id)
.locked(page.comments_enabled.map(|e| !e))
.published(page.published.map(Into::into))
.updated(page.updated.map(Into::into))
.deleted(Some(false))
.nsfw(page.sensitive)
.ap_id(Some(page.id.clone().into()))
.local(Some(false))
.language_id(language_id)
.build()
} else {
// if is mod action, only update locked/stickied fields, nothing else
PostInsertForm::builder()
@ -292,6 +272,14 @@ impl Object for ApubPost {
let post = Post::create(&mut context.pool(), &form).await?;
generate_post_link_metadata(
post.clone(),
page.image.map(|i| i.url),
|_| None,
local_site,
context.reset_request_count(),
);
// write mod log entry for lock
if Page::is_locked_changed(&old_post, &page.comments_enabled) {
let form = ModLockPostForm {