From b5d45fa82601443c7d5af415e26885934c3b8d71 Mon Sep 17 00:00:00 2001 From: Dessalines Date: Thu, 19 Jan 2023 21:41:37 -0500 Subject: [PATCH 1/4] Add reddit -> lemmy importer to readme. (#2662) --- README.md | 1 + 1 file changed, 1 insertion(+) diff --git a/README.md b/README.md index b058e36d8..358bd760f 100644 --- a/README.md +++ b/README.md @@ -130,6 +130,7 @@ Each Lemmy server can set its own moderation policy; appointing site-wide admins - [lemmy-rust-client](https://github.com/LemmyNet/lemmy/tree/main/crates/api_common) - [Dart API client](https://github.com/LemmurOrg/lemmy_api_client) - [go-lemmy](https://gitea.arsenm.dev/Arsen6331/go-lemmy) +- [Reddit -> Lemmy Importer](https://github.com/rileynull/RedditLemmyImporter) ## Support / Donate From ac56504291a7ab2cf0975272d54f6cf37b99d0f1 Mon Sep 17 00:00:00 2001 From: Dessalines Date: Fri, 20 Jan 2023 12:15:07 -0500 Subject: [PATCH 2/4] Upgrade to postgres 15. (#2659) --- docker/dev/docker-compose.yml | 30 +++++++++++++++++-- docker/federation/docker-compose.yml | 10 +++---- docker/prod/docker-compose.yml | 2 +- ...pgrade.sh => postgres_12_to_15_upgrade.sh} | 17 ++++++----- 4 files changed, 44 insertions(+), 15 deletions(-) rename scripts/{postgres_12_to_14_upgrade.sh => postgres_12_to_15_upgrade.sh} (69%) diff --git a/docker/dev/docker-compose.yml b/docker/dev/docker-compose.yml index 56647a7a5..e38d0ca1d 100644 --- a/docker/dev/docker-compose.yml +++ b/docker/dev/docker-compose.yml @@ -86,10 +86,36 @@ services: restart: always postgres: - image: postgres:14-alpine + image: postgres:15-alpine # this needs to match the database host in lemmy.hson + # Tune your settings via + # https://pgtune.leopard.in.ua/#/ + # You can use this technique to add them here + # https://stackoverflow.com/a/30850095/1655478 hostname: postgres - command: ["postgres", "-c", "session_preload_libraries=auto_explain", "-c", "auto_explain.log_min_duration=5ms", "-c", "auto_explain.log_analyze=true"] + command: [ + "postgres", + "-c", "session_preload_libraries=auto_explain", + "-c", "auto_explain.log_min_duration=5ms", + "-c", "auto_explain.log_analyze=true" + # Tuning config + # "-c", "max_connections=200", + # "-c", "shared_buffers=3GB", + # "-c", "effective_cache_size=9GB", + # "-c", "maintenance_work_mem=768MB", + # "-c", "checkpoint_completion_target=0.9", + # "-c", "wal_buffers=16MB", + # "-c", "default_statistics_target=100", + # "-c", "random_page_cost=4", + # "-c", "effective_io_concurrency=2", + # "-c", "work_mem=7864kB", + # "-c", "min_wal_size=1GB", + # "-c", "max_wal_size=4GB", + # "-c", "max_worker_processes=4", + # "-c", "max_parallel_workers_per_gather=2", + # "-c", "max_parallel_workers=4", + # "-c", "max_parallel_maintenance_workers=2", + ] networks: - lemmyinternal # adding the external facing network to allow direct db access for devs diff --git a/docker/federation/docker-compose.yml b/docker/federation/docker-compose.yml index f362f0197..13e73fb98 100644 --- a/docker/federation/docker-compose.yml +++ b/docker/federation/docker-compose.yml @@ -47,7 +47,7 @@ services: ports: - "8541:8541" postgres_alpha: - image: postgres:14-alpine + image: postgres:15-alpine environment: - POSTGRES_USER=lemmy - POSTGRES_PASSWORD=password @@ -75,7 +75,7 @@ services: ports: - "8551:8551" postgres_beta: - image: postgres:14-alpine + image: postgres:15-alpine environment: - POSTGRES_USER=lemmy - POSTGRES_PASSWORD=password @@ -103,7 +103,7 @@ services: ports: - "8561:8561" postgres_gamma: - image: postgres:14-alpine + image: postgres:15-alpine environment: - POSTGRES_USER=lemmy - POSTGRES_PASSWORD=password @@ -132,7 +132,7 @@ services: ports: - "8571:8571" postgres_delta: - image: postgres:14-alpine + image: postgres:15-alpine environment: - POSTGRES_USER=lemmy - POSTGRES_PASSWORD=password @@ -161,7 +161,7 @@ services: ports: - "8581:8581" postgres_epsilon: - image: postgres:14-alpine + image: postgres:15-alpine environment: - POSTGRES_USER=lemmy - POSTGRES_PASSWORD=password diff --git a/docker/prod/docker-compose.yml b/docker/prod/docker-compose.yml index a0fe5cfeb..a1efd03f1 100644 --- a/docker/prod/docker-compose.yml +++ b/docker/prod/docker-compose.yml @@ -72,7 +72,7 @@ services: restart: always postgres: - image: postgres:14-alpine + image: postgres:15-alpine # this needs to match the database host in lemmy.hson hostname: postgres networks: diff --git a/scripts/postgres_12_to_14_upgrade.sh b/scripts/postgres_12_to_15_upgrade.sh similarity index 69% rename from scripts/postgres_12_to_14_upgrade.sh rename to scripts/postgres_12_to_15_upgrade.sh index 6f1f6d461..0850e8fca 100755 --- a/scripts/postgres_12_to_14_upgrade.sh +++ b/scripts/postgres_12_to_15_upgrade.sh @@ -3,12 +3,15 @@ set -e echo "Do not stop in the middle of this upgrade, wait until you see the message: Upgrade complete." +echo "Stopping lemmy and all services..." +sudo docker-compose stop + echo "Make sure postgres is started..." sudo docker-compose up -d postgres sleep 20s -echo "Exporting the Database to 12_14.dump.sql ..." -sudo docker-compose exec -T postgres pg_dumpall -c -U lemmy > 12_14_dump.sql +echo "Exporting the Database to 12_15.dump.sql ..." +sudo docker-compose exec -T postgres pg_dumpall -c -U lemmy > 12_15_dump.sql echo "Done." echo "Stopping postgres..." @@ -18,20 +21,20 @@ sleep 20s echo "Removing the old postgres folder" sudo rm -rf volumes/postgres -echo "Updating docker-compose to use postgres version 14." -sed -i "s/postgres:12-alpine/postgres:14-alpine/" ./docker-compose.yml +echo "Updating docker-compose to use postgres version 15." +sed -i "s/image: postgres:.*/image: postgres:15-alpine/" ./docker-compose.yml echo "Starting up new postgres..." sudo docker-compose up -d postgres sleep 20s echo "Importing the database...." -cat 12_14_dump.sql | sudo docker-compose exec -T postgres psql -U lemmy +cat 12_15_dump.sql | sudo docker-compose exec -T postgres psql -U lemmy echo "Done." POSTGRES_PASSWORD=$(grep "POSTGRES_PASSWORD" ./docker-compose.yml | cut -d"=" -f2) -echo "Fixing a weird password issue with postgres 14" +echo "Fixing a weird password issue with postgres 15" sudo docker-compose exec -T postgres psql -U lemmy -c "alter user lemmy with password '$POSTGRES_PASSWORD'" sudo docker-compose restart postgres @@ -41,5 +44,5 @@ sudo chown -R 991:991 volumes/pictrs echo "Starting up lemmy..." sudo docker-compose up -d -echo "A copy of your old database is at 12_14.dump.sql . You can delete this file if the upgrade went smoothly." +echo "A copy of your old database is at 12_15.dump.sql . You can delete this file if the upgrade went smoothly." echo "Upgrade complete." From 7e3d3839b60b52f5539699e1291c476820c67d43 Mon Sep 17 00:00:00 2001 From: Nutomic Date: Fri, 20 Jan 2023 18:43:23 +0100 Subject: [PATCH 3/4] Post creation from Mastodon (fixes #2590) (#2651) * Post creation from Mastodon (fixes #2590) * better logic for page title * add deserialize helper Co-authored-by: Dessalines --- crates/apub/assets/mastodon/objects/page.json | 53 +++++++++++++++++++ crates/apub/src/api/resolve_object.rs | 7 +-- crates/apub/src/objects/post.rs | 27 ++++++++-- crates/apub/src/protocol/objects/mod.rs | 1 + crates/apub/src/protocol/objects/page.rs | 29 +++++++++- scripts/test.sh | 2 +- 6 files changed, 105 insertions(+), 14 deletions(-) create mode 100644 crates/apub/assets/mastodon/objects/page.json diff --git a/crates/apub/assets/mastodon/objects/page.json b/crates/apub/assets/mastodon/objects/page.json new file mode 100644 index 000000000..06d9b2215 --- /dev/null +++ b/crates/apub/assets/mastodon/objects/page.json @@ -0,0 +1,53 @@ +{ + "@context": [ + "https://www.w3.org/ns/activitystreams", + { + "ostatus": "http://ostatus.org#", + "atomUri": "ostatus:atomUri", + "inReplyToAtomUri": "ostatus:inReplyToAtomUri", + "conversation": "ostatus:conversation", + "sensitive": "as:sensitive", + "toot": "http://joinmastodon.org/ns#", + "votersCount": "toot:votersCount" + } + ], + "id": "https://mastodon.madrid/users/felix/statuses/107224289116410645", + "type": "Note", + "summary": null, + "published": "2021-11-05T11:46:50Z", + "url": "https://mastodon.madrid/@felix/107224289116410645", + "attributedTo": "https://mastodon.madrid/users/felix", + "to": [ + "https://mastodon.madrid/users/felix/followers" + ], + "cc": [ + "https://www.w3.org/ns/activitystreams#Public", + "https://mamot.fr/users/retiolus" + ], + "sensitive": false, + "atomUri": "https://mastodon.madrid/users/felix/statuses/107224289116410645", + "inReplyToAtomUri": "https://mamot.fr/users/retiolus/statuses/107224244380204526", + "conversation": "tag:mamot.fr,2021-11-05:objectId=64635960:objectType=Conversation", + "content": "

@retiolus i have never been disappointed by a thinkpad. if you want to save money, get a model from a few years ago, there isnt a huge difference anyway.

", + "contentMap": { + "en": "

@retiolus i have never been disappointed by a thinkpad. if you want to save money, get a model from a few years ago, there isnt a huge difference anyway.

" + }, + "attachment": [], + "tag": [ + { + "type": "Mention", + "href": "https://mamot.fr/users/retiolus", + "name": "@retiolus@mamot.fr" + } + ], + "replies": { + "id": "https://mastodon.madrid/users/felix/statuses/107224289116410645/replies", + "type": "Collection", + "first": { + "type": "CollectionPage", + "next": "https://mastodon.madrid/users/felix/statuses/107224289116410645/replies?only_other_accounts=true&page=true", + "partOf": "https://mastodon.madrid/users/felix/statuses/107224289116410645/replies", + "items": [] + } + } +} \ No newline at end of file diff --git a/crates/apub/src/api/resolve_object.rs b/crates/apub/src/api/resolve_object.rs index c179ed582..dd39218bc 100644 --- a/crates/apub/src/api/resolve_object.rs +++ b/crates/apub/src/api/resolve_object.rs @@ -46,12 +46,7 @@ async fn convert_response( ) -> Result { use SearchableObjects::*; let removed_or_deleted; - let mut res = ResolveObjectResponse { - comment: None, - post: None, - community: None, - person: None, - }; + let mut res = ResolveObjectResponse::default(); match object { Person(p) => { removed_or_deleted = p.deleted; diff --git a/crates/apub/src/objects/post.rs b/crates/apub/src/objects/post.rs index e15e1b2dc..2ef6401f5 100644 --- a/crates/apub/src/objects/post.rs +++ b/crates/apub/src/objects/post.rs @@ -21,6 +21,7 @@ use activitypub_federation::{ utils::verify_domains_match, }; use activitystreams_kinds::public; +use anyhow::anyhow; use chrono::NaiveDateTime; use lemmy_api_common::{ context::LemmyContext, @@ -40,11 +41,13 @@ use lemmy_db_schema::{ }; use lemmy_utils::{ error::LemmyError, - utils::{check_slurs, convert_datetime, markdown_to_html, remove_slurs}, + utils::{check_slurs_opt, convert_datetime, markdown_to_html, remove_slurs}, }; use std::ops::Deref; use url::Url; +const MAX_TITLE_LENGTH: usize = 100; + #[derive(Clone, Debug)] pub struct ApubPost(pub(crate) Post); @@ -108,7 +111,7 @@ impl ApubObject for ApubPost { attributed_to: AttributedTo::Lemmy(ObjectId::new(creator.actor_id)), to: vec![community.actor_id.clone().into(), public()], cc: vec![], - name: self.name.clone(), + name: Some(self.name.clone()), content: self.body.as_ref().map(|b| markdown_to_html(b)), media_type: Some(MediaTypeMarkdownOrHtml::Html), source: self.body.clone().map(Source::new), @@ -121,6 +124,7 @@ impl ApubObject for ApubPost { published: Some(convert_datetime(self.published)), updated: self.updated.map(convert_datetime), audience: Some(ObjectId::new(community.actor_id)), + in_reply_to: None, }; Ok(page) } @@ -151,7 +155,7 @@ impl ApubObject for ApubPost { verify_person_in_community(&page.creator()?, &community, context, request_counter).await?; let slur_regex = &local_site_opt_to_slur_regex(&local_site_data.local_site); - check_slurs(&page.name, slur_regex)?; + check_slurs_opt(&page.name, slur_regex)?; verify_domains_match(page.creator()?.inner(), page.id.inner())?; verify_is_public(&page.to, &page.cc)?; @@ -169,6 +173,19 @@ impl ApubObject for ApubPost { .dereference(context, local_instance(context).await, request_counter) .await?; let community = page.community(context, request_counter).await?; + let mut name = page + .name + .clone() + .or_else(|| { + page + .content + .clone() + .and_then(|c| c.lines().next().map(ToString::to_string)) + }) + .ok_or_else(|| anyhow!("Object must have name or content"))?; + if name.chars().count() > MAX_TITLE_LENGTH { + name = name.chars().take(MAX_TITLE_LENGTH).collect(); + } let form = if !page.is_mod_action(context).await? { let first_attachment = page.attachment.into_iter().map(Attachment::url).next(); @@ -197,7 +214,7 @@ impl ApubObject for ApubPost { let language_id = LanguageTag::to_language_id_single(page.language, context.pool()).await?; PostInsertForm { - name: page.name.clone(), + name, url: url.map(Into::into), body: body_slurs_removed, creator_id: creator.id, @@ -221,7 +238,7 @@ impl ApubObject for ApubPost { } else { // if is mod action, only update locked/stickied fields, nothing else PostInsertForm::builder() - .name(page.name.clone()) + .name(name) .creator_id(creator.id) .community_id(community.id) .ap_id(Some(page.id.clone().into())) diff --git a/crates/apub/src/protocol/objects/mod.rs b/crates/apub/src/protocol/objects/mod.rs index 5a3b90bf6..2dcf1eed7 100644 --- a/crates/apub/src/protocol/objects/mod.rs +++ b/crates/apub/src/protocol/objects/mod.rs @@ -131,6 +131,7 @@ mod tests { fn test_parse_objects_mastodon() { test_json::("assets/mastodon/objects/person.json").unwrap(); test_json::("assets/mastodon/objects/note.json").unwrap(); + test_json::("assets/mastodon/objects/page.json").unwrap(); } #[test] diff --git a/crates/apub/src/protocol/objects/page.rs b/crates/apub/src/protocol/objects/page.rs index 3aadb20c1..9055b1fcc 100644 --- a/crates/apub/src/protocol/objects/page.rs +++ b/crates/apub/src/protocol/objects/page.rs @@ -23,7 +23,7 @@ use itertools::Itertools; use lemmy_api_common::context::LemmyContext; use lemmy_db_schema::newtypes::DbUrl; use lemmy_utils::error::LemmyError; -use serde::{Deserialize, Serialize}; +use serde::{de::Error, Deserialize, Deserializer, Serialize}; use serde_with::skip_serializing_none; use url::Url; @@ -46,8 +46,11 @@ pub struct Page { pub(crate) attributed_to: AttributedTo, #[serde(deserialize_with = "deserialize_one_or_many")] pub(crate) to: Vec, - pub(crate) name: String, + // If there is inReplyTo field this is actually a comment and must not be parsed + #[serde(deserialize_with = "deserialize_not_present", default)] + pub(crate) in_reply_to: Option, + pub(crate) name: Option, #[serde(deserialize_with = "deserialize_one_or_many", default)] pub(crate) cc: Vec, pub(crate) content: Option, @@ -259,3 +262,25 @@ impl InCommunity for Page { } } } + +/// Only allows deserialization if the field is missing or null. If it is present, throws an error. +pub fn deserialize_not_present<'de, D>(deserializer: D) -> Result, D::Error> +where + D: Deserializer<'de>, +{ + let result: Option = Deserialize::deserialize(deserializer)?; + match result { + None => Ok(None), + Some(_) => Err(D::Error::custom("Post must not have inReplyTo property")), + } +} + +#[cfg(test)] +mod tests { + use crate::protocol::{objects::page::Page, tests::test_parse_lemmy_item}; + + #[test] + fn test_not_parsing_note_as_page() { + assert!(test_parse_lemmy_item::("assets/lemmy/objects/note.json").is_err()); + } +} diff --git a/scripts/test.sh b/scripts/test.sh index 44c40ad23..a64d99d42 100755 --- a/scripts/test.sh +++ b/scripts/test.sh @@ -1,5 +1,5 @@ #!/bin/bash -set -ex +set -e PACKAGE="$1" echo "$PACKAGE" From 6eb5ed343c2f8541a2a9efaf06ac80168de9b4a1 Mon Sep 17 00:00:00 2001 From: Dessalines Date: Fri, 20 Jan 2023 12:44:03 -0500 Subject: [PATCH 4/4] Update post_aggregates indexes to account for featured_local and featured_community columns. (#2661) - Fixes #2660 --- .../down.sql | 22 ++++++++++++++ .../up.sql | 30 +++++++++++++++++++ 2 files changed, 52 insertions(+) create mode 100644 migrations/2023-01-17-165819_cleanup_post_aggregates_indexes/down.sql create mode 100644 migrations/2023-01-17-165819_cleanup_post_aggregates_indexes/up.sql diff --git a/migrations/2023-01-17-165819_cleanup_post_aggregates_indexes/down.sql b/migrations/2023-01-17-165819_cleanup_post_aggregates_indexes/down.sql new file mode 100644 index 000000000..d91843e19 --- /dev/null +++ b/migrations/2023-01-17-165819_cleanup_post_aggregates_indexes/down.sql @@ -0,0 +1,22 @@ +-- Drop the new indexes +drop index idx_post_aggregates_featured_local_newest_comment_time, + idx_post_aggregates_featured_community_newest_comment_time, + idx_post_aggregates_featured_local_comments, + idx_post_aggregates_featured_community_comments, + idx_post_aggregates_featured_local_hot, + idx_post_aggregates_featured_community_hot, + idx_post_aggregates_featured_local_active, + idx_post_aggregates_featured_community_active, + idx_post_aggregates_featured_local_score, + idx_post_aggregates_featured_community_score, + idx_post_aggregates_featured_local_published, + idx_post_aggregates_featured_community_published; + +-- Create the old indexes +create index idx_post_aggregates_newest_comment_time on post_aggregates (newest_comment_time desc); +create index idx_post_aggregates_comments on post_aggregates (comments desc); +create index idx_post_aggregates_hot on post_aggregates (hot_rank(score, published) desc, published desc); +create index idx_post_aggregates_active on post_aggregates (hot_rank(score, newest_comment_time) desc, newest_comment_time desc); +create index idx_post_aggregates_score on post_aggregates (score desc); +create index idx_post_aggregates_published on post_aggregates (published desc); + diff --git a/migrations/2023-01-17-165819_cleanup_post_aggregates_indexes/up.sql b/migrations/2023-01-17-165819_cleanup_post_aggregates_indexes/up.sql new file mode 100644 index 000000000..881089293 --- /dev/null +++ b/migrations/2023-01-17-165819_cleanup_post_aggregates_indexes/up.sql @@ -0,0 +1,30 @@ +-- Drop the old indexes +drop index idx_post_aggregates_newest_comment_time, + idx_post_aggregates_comments, + idx_post_aggregates_hot, + idx_post_aggregates_active, + idx_post_aggregates_score, + idx_post_aggregates_published; + +-- All of the post fetching queries now start with either +-- featured_local desc, or featured_community desc, then the other sorts. +-- So you now need to double these indexes + +create index idx_post_aggregates_featured_local_newest_comment_time on post_aggregates (featured_local desc, newest_comment_time desc); +create index idx_post_aggregates_featured_community_newest_comment_time on post_aggregates (featured_community desc, newest_comment_time desc); + +create index idx_post_aggregates_featured_local_comments on post_aggregates (featured_local desc, comments desc); +create index idx_post_aggregates_featured_community_comments on post_aggregates (featured_community desc, comments desc); + +create index idx_post_aggregates_featured_local_hot on post_aggregates (featured_local desc, hot_rank(score, published) desc, published desc); +create index idx_post_aggregates_featured_community_hot on post_aggregates (featured_community desc, hot_rank(score, published) desc, published desc); + +create index idx_post_aggregates_featured_local_active on post_aggregates (featured_local desc, hot_rank(score, newest_comment_time) desc, newest_comment_time desc); +create index idx_post_aggregates_featured_community_active on post_aggregates (featured_community desc, hot_rank(score, newest_comment_time) desc, newest_comment_time desc); + +create index idx_post_aggregates_featured_local_score on post_aggregates (featured_local desc, score desc); +create index idx_post_aggregates_featured_community_score on post_aggregates (featured_community desc, score desc); + +create index idx_post_aggregates_featured_local_published on post_aggregates (featured_local desc, published desc); +create index idx_post_aggregates_featured_community_published on post_aggregates (featured_community desc, published desc); +