Add controversial ranking (#3205)

* Added controversy rank property to posts and comments, and ability to sort by it

* Triggers instead of schedules tasks, integer -> double, TODO: comments don't seem to get updated with floats, divide SortTypes

* Created PersonSortType

* PersonSortType::MostComments case

* Removed unused PartialOrd trait

* Added new person sort type mappings

* SortType -> PersonSortType

* fixes

* cargo fmt

* fixes after merge with main

* Fixed bug in controversy rank trigger, removed TopX sorts from PersonSortType and added CommentScore instead

* Uncovered enum case

* clippy

* reset translation changes

* translations

* translations

* Added additional hot ordering on controversial posts and comments

* featured local and featured community added to controversy rank index, additional order_by removed (?), added post_score and post_count to PersonSortType

* Woodpecker rerun

* cargo fmt

* woodpecker rerun

* fixed controversy_rank order

* fix

* Readded migration as latest, removed second update statement for setting controversy rank
This commit is contained in:
Pavlos Smith 2023-07-26 20:07:05 +03:00 committed by GitHub
parent dc4572460e
commit c890797b37
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
17 changed files with 232 additions and 52 deletions

View file

@ -8,7 +8,7 @@ use lemmy_api_common::{
};
use lemmy_db_schema::{
source::{community::Community, local_site::LocalSite},
utils::post_to_comment_sort_type,
utils::{post_to_comment_sort_type, post_to_person_sort_type},
SearchType,
};
use lemmy_db_views::{comment_view::CommentQuery, post_view::PostQuery};
@ -98,7 +98,7 @@ pub async fn search(
}
SearchType::Users => {
users = PersonQuery {
sort: (sort),
sort: (sort.map(post_to_person_sort_type)),
search_term: (Some(q)),
page: (page),
limit: (limit),
@ -168,7 +168,7 @@ pub async fn search(
vec![]
} else {
PersonQuery {
sort: (sort),
sort: (sort.map(post_to_person_sort_type)),
search_term: (Some(q)),
page: (page),
limit: (limit),

View file

@ -12,7 +12,7 @@ use serde::{Deserialize, Serialize};
#[cfg(feature = "full")]
use ts_rs::TS;
#[derive(PartialEq, Eq, Debug, Serialize, Deserialize, Clone)]
#[derive(PartialEq, Debug, Serialize, Deserialize, Clone)]
#[cfg_attr(feature = "full", derive(Queryable, Associations, Identifiable, TS))]
#[cfg_attr(feature = "full", diesel(table_name = comment_aggregates))]
#[cfg_attr(feature = "full", diesel(belongs_to(crate::source::comment::Comment)))]
@ -28,6 +28,7 @@ pub struct CommentAggregates {
/// The total number of children in this comment branch.
pub child_count: i32,
pub hot_rank: i32,
pub controversy_rank: f64,
}
#[derive(PartialEq, Eq, Debug, Serialize, Deserialize, Clone)]
@ -72,7 +73,7 @@ pub struct PersonAggregates {
pub comment_score: i64,
}
#[derive(PartialEq, Eq, Debug, Serialize, Deserialize, Clone)]
#[derive(PartialEq, Debug, Serialize, Deserialize, Clone)]
#[cfg_attr(feature = "full", derive(Queryable, Associations, Identifiable, TS))]
#[cfg_attr(feature = "full", diesel(table_name = post_aggregates))]
#[cfg_attr(feature = "full", diesel(belongs_to(crate::source::post::Post)))]
@ -98,6 +99,7 @@ pub struct PostAggregates {
pub hot_rank_active: i32,
pub community_id: CommunityId,
pub creator_id: PersonId,
pub controversy_rank: f64,
}
#[derive(PartialEq, Eq, Debug, Serialize, Deserialize, Clone)]

View file

@ -66,6 +66,7 @@ pub enum SortType {
TopThreeMonths,
TopSixMonths,
TopNineMonths,
Controversial,
}
#[derive(EnumString, Display, Debug, Serialize, Deserialize, Clone, Copy)]
@ -77,6 +78,20 @@ pub enum CommentSortType {
Top,
New,
Old,
Controversial,
}
#[derive(EnumString, Display, Debug, Serialize, Deserialize, Clone, Copy)]
#[cfg_attr(feature = "full", derive(TS))]
#[cfg_attr(feature = "full", ts(export))]
/// The person sort types. See here for descriptions: https://join-lemmy.org/docs/en/users/03-votes-and-ranking.html
pub enum PersonSortType {
New,
Old,
MostComments,
CommentScore,
PostScore,
PostCount,
}
#[derive(EnumString, Display, Debug, Serialize, Deserialize, Clone, Copy, PartialEq, Eq)]

View file

@ -93,6 +93,7 @@ diesel::table! {
published -> Timestamp,
child_count -> Int4,
hot_rank -> Int4,
controversy_rank -> Float8,
}
}
@ -676,6 +677,7 @@ diesel::table! {
hot_rank_active -> Int4,
community_id -> Int4,
creator_id -> Int4,
controversy_rank -> Float8,
}
}

View file

@ -3,6 +3,7 @@ use crate::{
diesel_migrations::MigrationHarness,
newtypes::DbUrl,
CommentSortType,
PersonSortType,
SortType,
};
use activitypub_federation::{fetch::object_id::ObjectId, traits::Object};
@ -347,6 +348,7 @@ pub fn post_to_comment_sort_type(sort: SortType) -> CommentSortType {
SortType::Active | SortType::Hot => CommentSortType::Hot,
SortType::New | SortType::NewComments | SortType::MostComments => CommentSortType::New,
SortType::Old => CommentSortType::Old,
SortType::Controversial => CommentSortType::Controversial,
SortType::TopHour
| SortType::TopSixHour
| SortType::TopTwelveHour
@ -361,6 +363,16 @@ pub fn post_to_comment_sort_type(sort: SortType) -> CommentSortType {
}
}
pub fn post_to_person_sort_type(sort: SortType) -> PersonSortType {
match sort {
SortType::Active | SortType::Hot | SortType::Controversial => PersonSortType::CommentScore,
SortType::New | SortType::NewComments => PersonSortType::New,
SortType::MostComments => PersonSortType::MostComments,
SortType::Old => PersonSortType::Old,
_ => PersonSortType::CommentScore,
}
}
static EMAIL_REGEX: Lazy<Regex> = Lazy::new(|| {
Regex::new(r"^[a-zA-Z0-9.!#$%&*+/=?^_`{|}~-]+@[a-zA-Z0-9-]+(?:\.[a-zA-Z0-9-]+)*$")
.expect("compile email regex")
@ -373,6 +385,10 @@ pub mod functions {
fn hot_rank(score: BigInt, time: Timestamp) -> Integer;
}
sql_function! {
fn controversy_rank(upvotes: BigInt, downvotes: BigInt, score: BigInt) -> Double;
}
sql_function!(fn lower(x: Text) -> Text);
}

View file

@ -478,6 +478,7 @@ mod tests {
published: agg.published,
child_count: 0,
hot_rank: 1728,
controversy_rank: 0.0,
},
my_vote: None,
resolver: None,

View file

@ -366,6 +366,9 @@ impl<'a> CommentQuery<'a> {
CommentSortType::Hot => query
.then_order_by(comment_aggregates::hot_rank.desc())
.then_order_by(comment_aggregates::score.desc()),
CommentSortType::Controversial => {
query.then_order_by(comment_aggregates::controversy_rank.desc())
}
CommentSortType::New => query.then_order_by(comment::published.desc()),
CommentSortType::Old => query.then_order_by(comment::published.asc()),
CommentSortType::Top => query.order_by(comment_aggregates::score.desc()),
@ -948,6 +951,7 @@ mod tests {
published: agg.published,
child_count: 5,
hot_rank: 1728,
controversy_rank: 0.0,
},
}
}

View file

@ -470,6 +470,7 @@ mod tests {
featured_local: false,
hot_rank: 1728,
hot_rank_active: 1728,
controversy_rank: 0.0,
community_id: inserted_post.community_id,
creator_id: inserted_post.creator_id,
},

View file

@ -422,6 +422,7 @@ impl<'a> PostQuery<'a> {
SortType::Hot => query
.then_order_by(post_aggregates::hot_rank.desc())
.then_order_by(post_aggregates::published.desc()),
SortType::Controversial => query.then_order_by(post_aggregates::controversy_rank.desc()),
SortType::New => query.then_order_by(post_aggregates::published.desc()),
SortType::Old => query.then_order_by(post_aggregates::published.asc()),
SortType::NewComments => query.then_order_by(post_aggregates::newest_comment_time.desc()),
@ -1141,6 +1142,7 @@ mod tests {
featured_local: false,
hot_rank: 1728,
hot_rank_active: 1728,
controversy_rank: 0.0,
community_id: inserted_post.community_id,
creator_id: inserted_post.creator_id,
},

View file

@ -25,7 +25,7 @@ use serde_with::skip_serializing_none;
use ts_rs::TS;
#[skip_serializing_none]
#[derive(Debug, PartialEq, Eq, Serialize, Deserialize, Clone)]
#[derive(Debug, PartialEq, Serialize, Deserialize, Clone)]
#[cfg_attr(feature = "full", derive(TS))]
#[cfg_attr(feature = "full", ts(export))]
/// A comment report view.
@ -43,7 +43,7 @@ pub struct CommentReportView {
}
#[skip_serializing_none]
#[derive(Debug, PartialEq, Eq, Serialize, Deserialize, Clone)]
#[derive(Debug, PartialEq, Serialize, Deserialize, Clone)]
#[cfg_attr(feature = "full", derive(TS))]
#[cfg_attr(feature = "full", ts(export))]
/// A comment view.
@ -71,7 +71,7 @@ pub struct LocalUserView {
}
#[skip_serializing_none]
#[derive(Debug, PartialEq, Eq, Serialize, Deserialize, Clone)]
#[derive(Debug, PartialEq, Serialize, Deserialize, Clone)]
#[cfg_attr(feature = "full", derive(TS))]
#[cfg_attr(feature = "full", ts(export))]
/// A post report view.
@ -88,7 +88,7 @@ pub struct PostReportView {
}
#[skip_serializing_none]
#[derive(Debug, PartialEq, Eq, Serialize, Deserialize, Clone)]
#[derive(Debug, PartialEq, Serialize, Deserialize, Clone)]
#[cfg_attr(feature = "full", derive(TS))]
#[cfg_attr(feature = "full", ts(export))]
/// A post view.

View file

@ -266,6 +266,9 @@ impl CommentReplyQuery {
query = match self.sort.unwrap_or(CommentSortType::New) {
CommentSortType::Hot => query.then_order_by(comment_aggregates::hot_rank.desc()),
CommentSortType::Controversial => {
query.then_order_by(comment_aggregates::controversy_rank.desc())
}
CommentSortType::New => query.then_order_by(comment_reply::published.desc()),
CommentSortType::Old => query.then_order_by(comment_reply::published.asc()),
CommentSortType::Top => query.order_by(comment_aggregates::score.desc()),

View file

@ -170,7 +170,8 @@ impl<'a> CommunityQuery<'a> {
}
New => query = query.order_by(community::published.desc()),
Old => query = query.order_by(community::published.asc()),
MostComments => query = query.order_by(community_aggregates::comments.desc()),
// Controversial is temporary until a CommentSortType is created
MostComments | Controversial => query = query.order_by(community_aggregates::comments.desc()),
TopAll | TopYear | TopNineMonths => {
query = query.order_by(community_aggregates::subscribers.desc())
}

View file

@ -271,6 +271,9 @@ impl PersonMentionQuery {
query = match self.sort.unwrap_or(CommentSortType::Hot) {
CommentSortType::Hot => query.then_order_by(comment_aggregates::hot_rank.desc()),
CommentSortType::Controversial => {
query.then_order_by(comment_aggregates::controversy_rank.desc())
}
CommentSortType::New => query.then_order_by(comment::published.desc()),
CommentSortType::Old => query.then_order_by(comment::published.asc()),
CommentSortType::Top => query.order_by(comment_aggregates::score.desc()),

View file

@ -1,6 +1,6 @@
use crate::structs::PersonView;
use diesel::{
dsl::{now, IntervalDsl},
dsl::now,
result::Error,
BoolExpressionMethods,
ExpressionMethods,
@ -16,7 +16,7 @@ use lemmy_db_schema::{
source::person::Person,
traits::JoinView,
utils::{fuzzy_search, get_conn, limit_and_offset, DbPool},
SortType,
PersonSortType,
};
use std::iter::Iterator;
@ -80,7 +80,7 @@ impl PersonView {
#[derive(Default)]
pub struct PersonQuery {
pub sort: Option<SortType>,
pub sort: Option<PersonSortType>,
pub search_term: Option<String>,
pub page: Option<i64>,
pub limit: Option<i64>,
@ -101,43 +101,13 @@ impl PersonQuery {
.or_filter(person::display_name.ilike(searcher));
}
query = match self.sort.unwrap_or(SortType::Hot) {
SortType::New | SortType::NewComments => query.order_by(person::published.desc()),
SortType::Old => query.order_by(person::published.asc()),
SortType::Hot | SortType::Active | SortType::TopAll => {
query.order_by(person_aggregates::comment_score.desc())
}
SortType::MostComments => query.order_by(person_aggregates::comment_count.desc()),
SortType::TopYear => query
.filter(person::published.gt(now - 1.years()))
.order_by(person_aggregates::comment_score.desc()),
SortType::TopMonth => query
.filter(person::published.gt(now - 1.months()))
.order_by(person_aggregates::comment_score.desc()),
SortType::TopWeek => query
.filter(person::published.gt(now - 1.weeks()))
.order_by(person_aggregates::comment_score.desc()),
SortType::TopDay => query
.filter(person::published.gt(now - 1.days()))
.order_by(person_aggregates::comment_score.desc()),
SortType::TopHour => query
.filter(person::published.gt(now - 1.hours()))
.order_by(person_aggregates::comment_score.desc()),
SortType::TopSixHour => query
.filter(person::published.gt(now - 6.hours()))
.order_by(person_aggregates::comment_score.desc()),
SortType::TopTwelveHour => query
.filter(person::published.gt(now - 12.hours()))
.order_by(person_aggregates::comment_score.desc()),
SortType::TopThreeMonths => query
.filter(person::published.gt(now - 3.months()))
.order_by(person_aggregates::comment_score.desc()),
SortType::TopSixMonths => query
.filter(person::published.gt(now - 6.months()))
.order_by(person_aggregates::comment_score.desc()),
SortType::TopNineMonths => query
.filter(person::published.gt(now - 9.months()))
.order_by(person_aggregates::comment_score.desc()),
query = match self.sort.unwrap_or(PersonSortType::CommentScore) {
PersonSortType::New => query.order_by(person::published.desc()),
PersonSortType::Old => query.order_by(person::published.asc()),
PersonSortType::MostComments => query.order_by(person_aggregates::comment_count.desc()),
PersonSortType::CommentScore => query.order_by(person_aggregates::comment_score.desc()),
PersonSortType::PostScore => query.order_by(person_aggregates::post_score.desc()),
PersonSortType::PostCount => query.order_by(person_aggregates::post_count.desc()),
};
let (limit, offset) = limit_and_offset(self.page, self.limit)?;

View file

@ -70,7 +70,7 @@ pub struct PersonBlockView {
}
#[skip_serializing_none]
#[derive(Debug, PartialEq, Eq, Serialize, Deserialize, Clone)]
#[derive(Debug, PartialEq, Serialize, Deserialize, Clone)]
#[cfg_attr(feature = "full", derive(TS))]
#[cfg_attr(feature = "full", ts(export))]
/// A person mention view.
@ -90,7 +90,7 @@ pub struct PersonMentionView {
}
#[skip_serializing_none]
#[derive(Debug, PartialEq, Eq, Serialize, Deserialize, Clone)]
#[derive(Debug, PartialEq, Serialize, Deserialize, Clone)]
#[cfg_attr(feature = "full", derive(TS))]
#[cfg_attr(feature = "full", ts(export))]
/// A comment reply view.

View file

@ -0,0 +1,63 @@
-- Update comment_aggregates_score trigger function to exclude controversy_rank update
create or replace function comment_aggregates_score()
returns trigger language plpgsql
as $$
begin
IF (TG_OP = 'INSERT') THEN
update comment_aggregates ca
set score = score + NEW.score,
upvotes = case when NEW.score = 1 then upvotes + 1 else upvotes end,
downvotes = case when NEW.score = -1 then downvotes + 1 else downvotes end
where ca.comment_id = NEW.comment_id;
ELSIF (TG_OP = 'DELETE') THEN
-- Join to comment because that comment may not exist anymore
update comment_aggregates ca
set score = score - OLD.score,
upvotes = case when OLD.score = 1 then upvotes - 1 else upvotes end,
downvotes = case when OLD.score = -1 then downvotes - 1 else downvotes end
from comment c
where ca.comment_id = c.id
and ca.comment_id = OLD.comment_id;
END IF;
return null;
end $$;
-- Update post_aggregates_score trigger function to exclude controversy_rank update
create or replace function post_aggregates_score()
returns trigger language plpgsql
as $$
begin
IF (TG_OP = 'INSERT') THEN
update post_aggregates pa
set score = score + NEW.score,
upvotes = case when NEW.score = 1 then upvotes + 1 else upvotes end,
downvotes = case when NEW.score = -1 then downvotes + 1 else downvotes end
where pa.post_id = NEW.post_id;
ELSIF (TG_OP = 'DELETE') THEN
-- Join to post because that post may not exist anymore
update post_aggregates pa
set score = score - OLD.score,
upvotes = case when OLD.score = 1 then upvotes - 1 else upvotes end,
downvotes = case when OLD.score = -1 then downvotes - 1 else downvotes end
from post p
where pa.post_id = p.id
and pa.post_id = OLD.post_id;
END IF;
return null;
end $$;
-- Drop the indexes
drop index if exists idx_post_aggregates_featured_local_controversy;
drop index if exists idx_post_aggregates_featured_community_controversy;
drop index if exists idx_comment_aggregates_controversy;
-- Remove the added columns from the tables
alter table post_aggregates drop column controversy_rank;
alter table comment_aggregates drop column controversy_rank;
-- Remove function
drop function controversy_rank(numeric, numeric);

View file

@ -0,0 +1,97 @@
-- Need to add immutable to the controversy_rank function in order to index by it
-- Controversy Rank:
-- if downvotes <= 0 or upvotes <= 0:
-- 0
-- else:
-- (upvotes + downvotes) * min(upvotes, downvotes) / max(upvotes, downvotes)
create or replace function controversy_rank(upvotes numeric, downvotes numeric)
returns float as $$
begin
if downvotes <= 0 or upvotes <= 0 then
return 0;
else
return (upvotes + downvotes) *
case when upvotes > downvotes
then downvotes::float / upvotes::float
else upvotes::float / downvotes::float
end;
end if;
end; $$
LANGUAGE plpgsql
IMMUTABLE;
-- Aggregates
alter table post_aggregates add column controversy_rank float not null default 0;
alter table comment_aggregates add column controversy_rank float not null default 0;
-- Populate them initially
-- Note: After initial population, these are updated with vote triggers
update post_aggregates set controversy_rank = controversy_rank(upvotes::numeric, downvotes::numeric);
update comment_aggregates set controversy_rank = controversy_rank(upvotes::numeric, downvotes::numeric);
-- Create single column indexes
create index idx_post_aggregates_featured_local_controversy on post_aggregates (featured_local desc, controversy_rank desc);
create index idx_post_aggregates_featured_community_controversy on post_aggregates (featured_community desc, controversy_rank desc);
create index idx_comment_aggregates_controversy on comment_aggregates (controversy_rank desc);
-- Update post_aggregates_score trigger function to include controversy_rank update
create or replace function post_aggregates_score()
returns trigger language plpgsql
as $$
begin
IF (TG_OP = 'INSERT') THEN
update post_aggregates pa
set score = score + NEW.score,
upvotes = case when NEW.score = 1 then upvotes + 1 else upvotes end,
downvotes = case when NEW.score = -1 then downvotes + 1 else downvotes end,
controversy_rank = controversy_rank(pa.upvotes + case when NEW.score = 1 then 1 else 0 end::numeric,
pa.downvotes + case when NEW.score = -1 then 1 else 0 end::numeric)
where pa.post_id = NEW.post_id;
ELSIF (TG_OP = 'DELETE') THEN
-- Join to post because that post may not exist anymore
update post_aggregates pa
set score = score - OLD.score,
upvotes = case when OLD.score = 1 then upvotes - 1 else upvotes end,
downvotes = case when OLD.score = -1 then downvotes - 1 else downvotes end,
controversy_rank = controversy_rank(pa.upvotes + case when NEW.score = 1 then 1 else 0 end::numeric,
pa.downvotes + case when NEW.score = -1 then 1 else 0 end::numeric)
from post p
where pa.post_id = p.id
and pa.post_id = OLD.post_id;
END IF;
return null;
end $$;
-- Update comment_aggregates_score trigger function to include controversy_rank update
create or replace function comment_aggregates_score()
returns trigger language plpgsql
as $$
begin
IF (TG_OP = 'INSERT') THEN
update comment_aggregates ca
set score = score + NEW.score,
upvotes = case when NEW.score = 1 then upvotes + 1 else upvotes end,
downvotes = case when NEW.score = -1 then downvotes + 1 else downvotes end,
controversy_rank = controversy_rank(ca.upvotes + case when NEW.score = 1 then 1 else 0 end::numeric,
ca.downvotes + case when NEW.score = -1 then 1 else 0 end::numeric)
where ca.comment_id = NEW.comment_id;
ELSIF (TG_OP = 'DELETE') THEN
-- Join to comment because that comment may not exist anymore
update comment_aggregates ca
set score = score - OLD.score,
upvotes = case when OLD.score = 1 then upvotes - 1 else upvotes end,
downvotes = case when OLD.score = -1 then downvotes - 1 else downvotes end,
controversy_rank = controversy_rank(ca.upvotes + case when NEW.score = 1 then 1 else 0 end::numeric,
ca.downvotes + case when NEW.score = -1 then 1 else 0 end::numeric)
from comment c
where ca.comment_id = c.id
and ca.comment_id = OLD.comment_id;
END IF;
return null;
end $$;