Organize utils into separate files. Fixes #2295 (#2736)

* Organize utils into separate files. Fixes #2295

* Moving tests.

* Fix test.

* Fix test 2
This commit is contained in:
Dessalines 2023-02-15 23:05:14 -05:00 committed by GitHub
parent 8a27978ee1
commit 36aaa50644
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
43 changed files with 381 additions and 361 deletions

1
Cargo.lock generated
View file

@ -2401,6 +2401,7 @@ dependencies = [
"tracing",
"tracing-opentelemetry 0.17.4",
"url",
"uuid",
"webpage",
]

View file

@ -19,7 +19,7 @@ use lemmy_db_schema::{
traits::{Bannable, Crud, Followable},
};
use lemmy_db_views_actor::structs::PersonViewSafe;
use lemmy_utils::{error::LemmyError, utils::naive_from_unix, ConnectionId};
use lemmy_utils::{error::LemmyError, utils::time::naive_from_unix, ConnectionId};
#[async_trait::async_trait(?Send)]
impl Perform for BanFromCommunity {

View file

@ -2,7 +2,7 @@ use actix_web::web::Data;
use captcha::Captcha;
use lemmy_api_common::{context::LemmyContext, utils::local_site_to_slur_regex};
use lemmy_db_schema::source::local_site::LocalSite;
use lemmy_utils::{error::LemmyError, utils::check_slurs, ConnectionId};
use lemmy_utils::{error::LemmyError, utils::slurs::check_slurs, ConnectionId};
mod comment;
mod comment_report;

View file

@ -14,7 +14,7 @@ use lemmy_db_schema::{
traits::Crud,
};
use lemmy_db_views_actor::structs::PersonViewSafe;
use lemmy_utils::{error::LemmyError, utils::naive_from_unix, ConnectionId};
use lemmy_utils::{error::LemmyError, utils::time::naive_from_unix, ConnectionId};
#[async_trait::async_trait(?Send)]
impl Perform for BanPerson {

View file

@ -18,7 +18,7 @@ use lemmy_db_schema::{
use lemmy_utils::{
claims::Claims,
error::LemmyError,
utils::{is_valid_display_name, is_valid_matrix_id},
utils::validation::{is_valid_display_name, is_valid_matrix_id},
ConnectionId,
};

View file

@ -47,7 +47,6 @@ tracing-opentelemetry = { workspace = true }
actix-ws = { workspace = true }
futures = { workspace = true }
background-jobs = "0.13.0"
[dev-dependencies]
uuid = { workspace = true }
actix-rt = { workspace = true }
reqwest = { workspace = true }

View file

@ -39,7 +39,7 @@ use lemmy_utils::{
location_info,
rate_limit::RateLimitConfig,
settings::structs::Settings,
utils::{build_slur_regex, generate_random_string},
utils::slurs::build_slur_regex,
};
use regex::Regex;
use reqwest_middleware::ClientWithMiddleware;
@ -360,7 +360,7 @@ pub async fn send_password_reset_email(
settings: &Settings,
) -> Result<(), LemmyError> {
// Generate a random token
let token = generate_random_string();
let token = uuid::Uuid::new_v4().to_string();
// Insert the row
let token2 = token.clone();
@ -386,7 +386,7 @@ pub async fn send_verification_email(
let form = EmailVerificationForm {
local_user_id: user.local_user.id,
email: new_email.to_string(),
verification_token: generate_random_string(),
verification_token: uuid::Uuid::new_v4().to_string(),
};
let verify_link = format!(
"{}/verify_email/{}",

View file

@ -22,7 +22,7 @@ use lemmy_db_schema::{
};
use lemmy_db_views::structs::{CommentView, LocalUserView, PostView, PrivateMessageView};
use lemmy_db_views_actor::structs::CommunityView;
use lemmy_utils::{error::LemmyError, utils::MentionData, ConnectionId};
use lemmy_utils::{error::LemmyError, utils::mention::MentionData, ConnectionId};
#[tracing::instrument(skip_all)]
pub async fn send_post_ws_message<OP: ToString + Send + OperationType + 'static>(

View file

@ -30,7 +30,7 @@ use lemmy_db_schema::{
};
use lemmy_utils::{
error::LemmyError,
utils::{remove_slurs, scrape_text_for_mentions},
utils::{mention::scrape_text_for_mentions, slurs::remove_slurs},
ConnectionId,
};

View file

@ -26,7 +26,7 @@ use lemmy_db_schema::{
use lemmy_db_views::structs::CommentView;
use lemmy_utils::{
error::LemmyError,
utils::{remove_slurs, scrape_text_for_mentions},
utils::{mention::scrape_text_for_mentions, slurs::remove_slurs},
ConnectionId,
};

View file

@ -34,7 +34,10 @@ use lemmy_db_views::structs::SiteView;
use lemmy_db_views_actor::structs::CommunityView;
use lemmy_utils::{
error::LemmyError,
utils::{check_slurs, check_slurs_opt, is_valid_actor_name},
utils::{
slurs::{check_slurs, check_slurs_opt},
validation::is_valid_actor_name,
},
ConnectionId,
};

View file

@ -13,7 +13,7 @@ use lemmy_db_schema::{
},
traits::Crud,
};
use lemmy_utils::{error::LemmyError, utils::naive_from_unix, ConnectionId};
use lemmy_utils::{error::LemmyError, utils::time::naive_from_unix, ConnectionId};
#[async_trait::async_trait(?Send)]
impl PerformCrud for RemoveCommunity {

View file

@ -17,7 +17,7 @@ use lemmy_db_schema::{
utils::{diesel_option_overwrite, diesel_option_overwrite_to_url, naive_now},
};
use lemmy_db_views_actor::structs::CommunityModeratorView;
use lemmy_utils::{error::LemmyError, utils::check_slurs_opt, ConnectionId};
use lemmy_utils::{error::LemmyError, utils::slurs::check_slurs_opt, ConnectionId};
#[async_trait::async_trait(?Send)]
impl PerformCrud for EditCommunity {

View file

@ -29,7 +29,10 @@ use lemmy_db_schema::{
use lemmy_db_views_actor::structs::CommunityView;
use lemmy_utils::{
error::LemmyError,
utils::{check_slurs, check_slurs_opt, clean_url_params, is_valid_post_title},
utils::{
slurs::{check_slurs, check_slurs_opt},
validation::{clean_url_params, is_valid_post_title},
},
ConnectionId,
};
use tracing::{warn, Instrument};

View file

@ -18,7 +18,10 @@ use lemmy_db_schema::{
};
use lemmy_utils::{
error::LemmyError,
utils::{check_slurs_opt, clean_url_params, is_valid_post_title},
utils::{
slurs::check_slurs_opt,
validation::{clean_url_params, is_valid_post_title},
},
ConnectionId,
};

View file

@ -22,7 +22,7 @@ use lemmy_db_schema::{
traits::Crud,
};
use lemmy_db_views::structs::LocalUserView;
use lemmy_utils::{error::LemmyError, utils::remove_slurs, ConnectionId};
use lemmy_utils::{error::LemmyError, utils::slurs::remove_slurs, ConnectionId};
#[async_trait::async_trait(?Send)]
impl PerformCrud for CreatePrivateMessage {

View file

@ -14,7 +14,7 @@ use lemmy_db_schema::{
traits::Crud,
utils::naive_now,
};
use lemmy_utils::{error::LemmyError, utils::remove_slurs, ConnectionId};
use lemmy_utils::{error::LemmyError, utils::slurs::remove_slurs, ConnectionId};
#[async_trait::async_trait(?Send)]
impl PerformCrud for EditPrivateMessage {

View file

@ -26,7 +26,7 @@ use lemmy_db_schema::{
use lemmy_db_views::structs::SiteView;
use lemmy_utils::{
error::LemmyError,
utils::{check_slurs, check_slurs_opt},
utils::slurs::{check_slurs, check_slurs_opt},
ConnectionId,
};
use url::Url;

View file

@ -28,7 +28,7 @@ use lemmy_db_schema::{
ListingType,
};
use lemmy_db_views::structs::SiteView;
use lemmy_utils::{error::LemmyError, utils::check_slurs_opt, ConnectionId};
use lemmy_utils::{error::LemmyError, utils::slurs::check_slurs_opt, ConnectionId};
use std::str::FromStr;
#[async_trait::async_trait(?Send)]

View file

@ -30,7 +30,10 @@ use lemmy_db_views::structs::{LocalUserView, SiteView};
use lemmy_utils::{
claims::Claims,
error::LemmyError,
utils::{check_slurs, check_slurs_opt, is_valid_actor_name},
utils::{
slurs::{check_slurs, check_slurs_opt},
validation::is_valid_actor_name,
},
ConnectionId,
};

View file

@ -40,7 +40,7 @@ use lemmy_db_schema::{
},
traits::{Bannable, Crud, Followable},
};
use lemmy_utils::{error::LemmyError, utils::convert_datetime};
use lemmy_utils::{error::LemmyError, utils::time::convert_datetime};
use url::Url;
impl BlockUser {

View file

@ -21,7 +21,7 @@ use lemmy_db_schema::{
utils::DbPool,
};
use lemmy_db_views::structs::SiteView;
use lemmy_utils::{error::LemmyError, utils::naive_from_unix};
use lemmy_utils::{error::LemmyError, utils::time::naive_from_unix};
use serde::Deserialize;
use url::Url;

View file

@ -6,7 +6,7 @@ use lemmy_db_schema::{
source::{comment::Comment, post::Post},
traits::Crud,
};
use lemmy_utils::{error::LemmyError, utils::scrape_text_for_mentions};
use lemmy_utils::{error::LemmyError, utils::mention::scrape_text_for_mentions};
pub mod comment;
pub mod post;

View file

@ -13,7 +13,7 @@ use lemmy_db_schema::{
};
use lemmy_utils::{
error::LemmyError,
utils::{scrape_text_for_mentions, MentionData},
utils::mention::{scrape_text_for_mentions, MentionData},
};
use serde::{Deserialize, Serialize};
use serde_json::Value;

View file

@ -33,7 +33,7 @@ use lemmy_db_schema::{
};
use lemmy_utils::{
error::LemmyError,
utils::{convert_datetime, markdown_to_html, remove_slurs},
utils::{markdown::markdown_to_html, slurs::remove_slurs, time::convert_datetime},
};
use std::ops::Deref;
use url::Url;

View file

@ -33,7 +33,7 @@ use lemmy_db_schema::{
use lemmy_db_views_actor::structs::CommunityFollowerView;
use lemmy_utils::{
error::LemmyError,
utils::{convert_datetime, markdown_to_html},
utils::{markdown::markdown_to_html, time::convert_datetime},
};
use std::ops::Deref;
use tracing::debug;

View file

@ -30,7 +30,11 @@ use lemmy_db_schema::{
};
use lemmy_utils::{
error::LemmyError,
utils::{check_slurs, check_slurs_opt, convert_datetime, markdown_to_html},
utils::{
markdown::markdown_to_html,
slurs::{check_slurs, check_slurs_opt},
time::convert_datetime,
},
};
use std::ops::Deref;
use tracing::debug;

View file

@ -32,7 +32,11 @@ use lemmy_db_schema::{
};
use lemmy_utils::{
error::LemmyError,
utils::{check_slurs, check_slurs_opt, convert_datetime, markdown_to_html},
utils::{
markdown::markdown_to_html,
slurs::{check_slurs, check_slurs_opt},
time::convert_datetime,
},
};
use std::ops::Deref;
use url::Url;

View file

@ -42,7 +42,11 @@ use lemmy_db_schema::{
};
use lemmy_utils::{
error::LemmyError,
utils::{check_slurs_opt, convert_datetime, markdown_to_html, remove_slurs},
utils::{
markdown::markdown_to_html,
slurs::{check_slurs_opt, remove_slurs},
time::convert_datetime,
},
};
use std::ops::Deref;
use url::Url;

View file

@ -25,7 +25,7 @@ use lemmy_db_schema::{
};
use lemmy_utils::{
error::LemmyError,
utils::{convert_datetime, markdown_to_html},
utils::{markdown::markdown_to_html, time::convert_datetime},
};
use std::ops::Deref;
use url::Url;

View file

@ -27,7 +27,7 @@ use lemmy_db_schema::{
};
use lemmy_utils::{
error::LemmyError,
utils::{check_slurs, check_slurs_opt},
utils::slurs::{check_slurs, check_slurs_opt},
};
use serde::{Deserialize, Serialize};
use serde_with::skip_serializing_none;

View file

@ -6,7 +6,6 @@ use crate::{
};
use diesel::{dsl::insert_into, result::Error, ExpressionMethods, QueryDsl};
use diesel_async::{AsyncPgConnection, RunQueryDsl};
use lemmy_utils::utils::generate_domain_url;
use url::Url;
impl Instance {
@ -28,7 +27,7 @@ impl Instance {
Self::create_conn(conn, domain).await
}
pub async fn create_from_actor_id(pool: &DbPool, actor_id: &Url) -> Result<Self, Error> {
let domain = &generate_domain_url(actor_id).expect("actor id missing a domain");
let domain = actor_id.host_str().expect("actor id missing a domain");
Self::create(pool, domain).await
}
pub async fn create_conn(conn: &mut AsyncPgConnection, domain: &str) -> Result<Self, Error> {

View file

@ -20,7 +20,7 @@ use lemmy_db_views_actor::{
person_mention_view::PersonMentionQuery,
structs::{CommentReplyView, PersonMentionView},
};
use lemmy_utils::{claims::Claims, error::LemmyError, utils::markdown_to_html};
use lemmy_utils::{claims::Claims, error::LemmyError, utils::markdown::markdown_to_html};
use once_cell::sync::Lazy;
use rss::{
extension::dublincore::DublinCoreExtensionBuilder,

View file

@ -11,8 +11,6 @@ pub mod settings;
pub mod claims;
pub mod error;
pub mod request;
#[cfg(test)]
mod test;
pub mod utils;
pub mod version;

View file

@ -1,5 +1,5 @@
use crate::{error::LemmyError, utils::get_ip, IpAddr};
use actix_web::dev::{Service, ServiceRequest, ServiceResponse, Transform};
use crate::{error::LemmyError, IpAddr};
use actix_web::dev::{ConnectionInfo, Service, ServiceRequest, ServiceResponse, Transform};
use futures::future::{ok, Ready};
use rate_limiter::{RateLimitStorage, RateLimitType};
use serde::{Deserialize, Serialize};
@ -220,3 +220,15 @@ where
})
}
}
fn get_ip(conn_info: &ConnectionInfo) -> IpAddr {
IpAddr(
conn_info
.realip_remote_addr()
.unwrap_or("127.0.0.1:12345")
.split(':')
.next()
.unwrap_or("127.0.0.1")
.to_string(),
)
}

View file

@ -1,98 +0,0 @@
use crate::utils::{
is_valid_actor_name,
is_valid_display_name,
is_valid_matrix_id,
is_valid_post_title,
remove_slurs,
scrape_text_for_mentions,
slur_check,
slurs_vec_to_str,
};
use regex::RegexBuilder;
#[test]
fn test_mentions_regex() {
let text = "Just read a great blog post by [@tedu@honk.teduangst.com](/u/test). And another by !test_community@fish.teduangst.com . Another [@lemmy@lemmy-alpha:8540](/u/fish)";
let mentions = scrape_text_for_mentions(text);
assert_eq!(mentions[0].name, "tedu".to_string());
assert_eq!(mentions[0].domain, "honk.teduangst.com".to_string());
assert_eq!(mentions[1].domain, "lemmy-alpha:8540".to_string());
}
#[test]
fn test_valid_actor_name() {
let actor_name_max_length = 20;
assert!(is_valid_actor_name("Hello_98", actor_name_max_length));
assert!(is_valid_actor_name("ten", actor_name_max_length));
assert!(!is_valid_actor_name("Hello-98", actor_name_max_length));
assert!(!is_valid_actor_name("a", actor_name_max_length));
assert!(!is_valid_actor_name("", actor_name_max_length));
}
#[test]
fn test_valid_display_name() {
let actor_name_max_length = 20;
assert!(is_valid_display_name("hello @there", actor_name_max_length));
assert!(!is_valid_display_name(
"@hello there",
actor_name_max_length
));
// Make sure zero-space with an @ doesn't work
assert!(!is_valid_display_name(
&format!("{}@my name is", '\u{200b}'),
actor_name_max_length
));
}
#[test]
fn test_valid_post_title() {
assert!(is_valid_post_title("Post Title"));
assert!(is_valid_post_title(" POST TITLE 😃😃😃😃😃"));
assert!(!is_valid_post_title("\n \n \n \n ")); // tabs/spaces/newlines
}
#[test]
fn test_valid_matrix_id() {
assert!(is_valid_matrix_id("@dess:matrix.org"));
assert!(!is_valid_matrix_id("dess:matrix.org"));
assert!(!is_valid_matrix_id(" @dess:matrix.org"));
assert!(!is_valid_matrix_id("@dess:matrix.org t"));
}
#[test]
fn test_slur_filter() {
let slur_regex = Some(RegexBuilder::new(r"(fag(g|got|tard)?\b|cock\s?sucker(s|ing)?|ni((g{2,}|q)+|[gq]{2,})[e3r]+(s|z)?|mudslime?s?|kikes?|\bspi(c|k)s?\b|\bchinks?|gooks?|bitch(es|ing|y)?|whor(es?|ing)|\btr(a|@)nn?(y|ies?)|\b(b|re|r)tard(ed)?s?)").case_insensitive(true).build().unwrap());
let test =
"faggot test kike tranny cocksucker retardeds. Capitalized Niggerz. This is a bunch of other safe text.";
let slur_free = "No slurs here";
assert_eq!(
remove_slurs(test, &slur_regex),
"*removed* test *removed* *removed* *removed* *removed*. Capitalized *removed*. This is a bunch of other safe text."
.to_string()
);
let has_slurs_vec = vec![
"Niggerz",
"cocksucker",
"faggot",
"kike",
"retardeds",
"tranny",
];
let has_slurs_err_str = "No slurs - Niggerz, cocksucker, faggot, kike, retardeds, tranny";
assert_eq!(slur_check(test, &slur_regex), Err(has_slurs_vec));
assert_eq!(slur_check(slur_free, &slur_regex), Ok(()));
if let Err(slur_vec) = slur_check(test, &slur_regex) {
assert_eq!(&slurs_vec_to_str(&slur_vec), has_slurs_err_str);
}
}
// These helped with testing
// #[test]
// fn test_send_email() {
// let result = send_email("not a subject", "test_email@gmail.com", "ur user", "<h1>HI there</h1>");
// assert!(result.is_ok());
// }

View file

@ -1,223 +0,0 @@
use crate::{error::LemmyError, location_info, IpAddr};
use actix_web::dev::ConnectionInfo;
use anyhow::Context;
use chrono::{DateTime, FixedOffset, NaiveDateTime};
use itertools::Itertools;
use once_cell::sync::Lazy;
use rand::{distributions::Alphanumeric, thread_rng, Rng};
use regex::{Regex, RegexBuilder};
use url::Url;
static MENTIONS_REGEX: Lazy<Regex> = Lazy::new(|| {
Regex::new(r"@(?P<name>[\w.]+)@(?P<domain>[a-zA-Z0-9._:-]+)").expect("compile regex")
});
static VALID_ACTOR_NAME_REGEX: Lazy<Regex> =
Lazy::new(|| Regex::new(r"^[a-zA-Z0-9_]{3,}$").expect("compile regex"));
static VALID_POST_TITLE_REGEX: Lazy<Regex> =
Lazy::new(|| Regex::new(r".*\S{3,}.*").expect("compile regex"));
static VALID_MATRIX_ID_REGEX: Lazy<Regex> = Lazy::new(|| {
Regex::new(r"^@[A-Za-z0-9._=-]+:[A-Za-z0-9.-]+\.[A-Za-z]{2,}$").expect("compile regex")
});
// taken from https://en.wikipedia.org/wiki/UTM_parameters
static CLEAN_URL_PARAMS_REGEX: Lazy<Regex> = Lazy::new(|| {
Regex::new(r"^utm_source|utm_medium|utm_campaign|utm_term|utm_content|gclid|gclsrc|dclid|fbclid$")
.expect("compile regex")
});
pub fn naive_from_unix(time: i64) -> NaiveDateTime {
NaiveDateTime::from_timestamp_opt(time, 0).expect("convert datetime")
}
pub fn convert_datetime(datetime: NaiveDateTime) -> DateTime<FixedOffset> {
DateTime::<FixedOffset>::from_utc(
datetime,
FixedOffset::east_opt(0).expect("create fixed offset"),
)
}
pub fn remove_slurs(test: &str, slur_regex: &Option<Regex>) -> String {
if let Some(slur_regex) = slur_regex {
slur_regex.replace_all(test, "*removed*").to_string()
} else {
test.to_string()
}
}
pub(crate) fn slur_check<'a>(
test: &'a str,
slur_regex: &'a Option<Regex>,
) -> Result<(), Vec<&'a str>> {
if let Some(slur_regex) = slur_regex {
let mut matches: Vec<&str> = slur_regex.find_iter(test).map(|mat| mat.as_str()).collect();
// Unique
matches.sort_unstable();
matches.dedup();
if matches.is_empty() {
Ok(())
} else {
Err(matches)
}
} else {
Ok(())
}
}
pub fn build_slur_regex(regex_str: Option<&str>) -> Option<Regex> {
regex_str.map(|slurs| {
RegexBuilder::new(slurs)
.case_insensitive(true)
.build()
.expect("compile regex")
})
}
pub fn check_slurs(text: &str, slur_regex: &Option<Regex>) -> Result<(), LemmyError> {
if let Err(slurs) = slur_check(text, slur_regex) {
Err(LemmyError::from_error_message(
anyhow::anyhow!("{}", slurs_vec_to_str(&slurs)),
"slurs",
))
} else {
Ok(())
}
}
pub fn check_slurs_opt(
text: &Option<String>,
slur_regex: &Option<Regex>,
) -> Result<(), LemmyError> {
match text {
Some(t) => check_slurs(t, slur_regex),
None => Ok(()),
}
}
pub(crate) fn slurs_vec_to_str(slurs: &[&str]) -> String {
let start = "No slurs - ";
let combined = &slurs.join(", ");
[start, combined].concat()
}
pub fn generate_random_string() -> String {
thread_rng()
.sample_iter(&Alphanumeric)
.map(char::from)
.take(30)
.collect()
}
pub fn markdown_to_html(text: &str) -> String {
comrak::markdown_to_html(text, &comrak::ComrakOptions::default())
}
// TODO nothing is done with community / group webfingers yet, so just ignore those for now
#[derive(Clone, PartialEq, Eq, Hash)]
pub struct MentionData {
pub name: String,
pub domain: String,
}
impl MentionData {
pub fn is_local(&self, hostname: &str) -> bool {
hostname.eq(&self.domain)
}
pub fn full_name(&self) -> String {
format!("@{}@{}", &self.name, &self.domain)
}
}
pub fn scrape_text_for_mentions(text: &str) -> Vec<MentionData> {
let mut out: Vec<MentionData> = Vec::new();
for caps in MENTIONS_REGEX.captures_iter(text) {
out.push(MentionData {
name: caps["name"].to_string(),
domain: caps["domain"].to_string(),
});
}
out.into_iter().unique().collect()
}
fn has_newline(name: &str) -> bool {
name.contains('\n')
}
pub fn is_valid_actor_name(name: &str, actor_name_max_length: usize) -> bool {
name.chars().count() <= actor_name_max_length
&& VALID_ACTOR_NAME_REGEX.is_match(name)
&& !has_newline(name)
}
// Can't do a regex here, reverse lookarounds not supported
pub fn is_valid_display_name(name: &str, actor_name_max_length: usize) -> bool {
!name.starts_with('@')
&& !name.starts_with('\u{200b}')
&& name.chars().count() >= 3
&& name.chars().count() <= actor_name_max_length
&& !has_newline(name)
}
pub fn is_valid_matrix_id(matrix_id: &str) -> bool {
VALID_MATRIX_ID_REGEX.is_match(matrix_id) && !has_newline(matrix_id)
}
pub fn is_valid_post_title(title: &str) -> bool {
VALID_POST_TITLE_REGEX.is_match(title) && !has_newline(title)
}
pub fn get_ip(conn_info: &ConnectionInfo) -> IpAddr {
IpAddr(
conn_info
.realip_remote_addr()
.unwrap_or("127.0.0.1:12345")
.split(':')
.next()
.unwrap_or("127.0.0.1")
.to_string(),
)
}
pub fn clean_url_params(url: &Url) -> Url {
let mut url_out = url.clone();
if url.query().is_some() {
let new_query = url
.query_pairs()
.filter(|q| !CLEAN_URL_PARAMS_REGEX.is_match(&q.0))
.map(|q| format!("{}={}", q.0, q.1))
.join("&");
url_out.set_query(Some(&new_query));
}
url_out
}
pub fn generate_domain_url(actor_id: &Url) -> Result<String, LemmyError> {
Ok(actor_id.host_str().context(location_info!())?.to_string())
}
#[cfg(test)]
mod tests {
use crate::utils::{clean_url_params, is_valid_post_title};
use url::Url;
#[test]
fn test_clean_url_params() {
let url = Url::parse("https://example.com/path/123?utm_content=buffercf3b2&utm_medium=social&username=randomuser&id=123").unwrap();
let cleaned = clean_url_params(&url);
let expected = Url::parse("https://example.com/path/123?username=randomuser&id=123").unwrap();
assert_eq!(expected.to_string(), cleaned.to_string());
let url = Url::parse("https://example.com/path/123").unwrap();
let cleaned = clean_url_params(&url);
assert_eq!(url.to_string(), cleaned.to_string());
}
#[test]
fn regex_checks() {
assert!(!is_valid_post_title("hi"));
assert!(is_valid_post_title("him"));
assert!(!is_valid_post_title("n\n\n\n\nanother"));
assert!(!is_valid_post_title("hello there!\n this is a test."));
assert!(is_valid_post_title("hello there! this is a test."));
}
}

View file

@ -0,0 +1,3 @@
pub fn markdown_to_html(text: &str) -> String {
comrak::markdown_to_html(text, &comrak::ComrakOptions::default())
}

View file

@ -0,0 +1,48 @@
use itertools::Itertools;
use once_cell::sync::Lazy;
use regex::Regex;
static MENTIONS_REGEX: Lazy<Regex> = Lazy::new(|| {
Regex::new(r"@(?P<name>[\w.]+)@(?P<domain>[a-zA-Z0-9._:-]+)").expect("compile regex")
});
// TODO nothing is done with community / group webfingers yet, so just ignore those for now
#[derive(Clone, PartialEq, Eq, Hash)]
pub struct MentionData {
pub name: String,
pub domain: String,
}
impl MentionData {
pub fn is_local(&self, hostname: &str) -> bool {
hostname.eq(&self.domain)
}
pub fn full_name(&self) -> String {
format!("@{}@{}", &self.name, &self.domain)
}
}
pub fn scrape_text_for_mentions(text: &str) -> Vec<MentionData> {
let mut out: Vec<MentionData> = Vec::new();
for caps in MENTIONS_REGEX.captures_iter(text) {
out.push(MentionData {
name: caps["name"].to_string(),
domain: caps["domain"].to_string(),
});
}
out.into_iter().unique().collect()
}
#[cfg(test)]
mod test {
use crate::utils::mention::scrape_text_for_mentions;
#[test]
fn test_mentions_regex() {
let text = "Just read a great blog post by [@tedu@honk.teduangst.com](/u/test). And another by !test_community@fish.teduangst.com . Another [@lemmy@lemmy-alpha:8540](/u/fish)";
let mentions = scrape_text_for_mentions(text);
assert_eq!(mentions[0].name, "tedu".to_string());
assert_eq!(mentions[0].domain, "honk.teduangst.com".to_string());
assert_eq!(mentions[1].domain, "lemmy-alpha:8540".to_string());
}
}

View file

@ -0,0 +1,5 @@
pub mod markdown;
pub mod mention;
pub mod slurs;
pub mod time;
pub mod validation;

View file

@ -0,0 +1,109 @@
use crate::error::LemmyError;
use regex::{Regex, RegexBuilder};
pub fn remove_slurs(test: &str, slur_regex: &Option<Regex>) -> String {
if let Some(slur_regex) = slur_regex {
slur_regex.replace_all(test, "*removed*").to_string()
} else {
test.to_string()
}
}
pub(crate) fn slur_check<'a>(
test: &'a str,
slur_regex: &'a Option<Regex>,
) -> Result<(), Vec<&'a str>> {
if let Some(slur_regex) = slur_regex {
let mut matches: Vec<&str> = slur_regex.find_iter(test).map(|mat| mat.as_str()).collect();
// Unique
matches.sort_unstable();
matches.dedup();
if matches.is_empty() {
Ok(())
} else {
Err(matches)
}
} else {
Ok(())
}
}
pub fn build_slur_regex(regex_str: Option<&str>) -> Option<Regex> {
regex_str.map(|slurs| {
RegexBuilder::new(slurs)
.case_insensitive(true)
.build()
.expect("compile regex")
})
}
pub fn check_slurs(text: &str, slur_regex: &Option<Regex>) -> Result<(), LemmyError> {
if let Err(slurs) = slur_check(text, slur_regex) {
Err(LemmyError::from_error_message(
anyhow::anyhow!("{}", slurs_vec_to_str(&slurs)),
"slurs",
))
} else {
Ok(())
}
}
pub fn check_slurs_opt(
text: &Option<String>,
slur_regex: &Option<Regex>,
) -> Result<(), LemmyError> {
match text {
Some(t) => check_slurs(t, slur_regex),
None => Ok(()),
}
}
pub(crate) fn slurs_vec_to_str(slurs: &[&str]) -> String {
let start = "No slurs - ";
let combined = &slurs.join(", ");
[start, combined].concat()
}
#[cfg(test)]
mod test {
use crate::utils::slurs::{remove_slurs, slur_check, slurs_vec_to_str};
use regex::RegexBuilder;
#[test]
fn test_slur_filter() {
let slur_regex = Some(RegexBuilder::new(r"(fag(g|got|tard)?\b|cock\s?sucker(s|ing)?|ni((g{2,}|q)+|[gq]{2,})[e3r]+(s|z)?|mudslime?s?|kikes?|\bspi(c|k)s?\b|\bchinks?|gooks?|bitch(es|ing|y)?|whor(es?|ing)|\btr(a|@)nn?(y|ies?)|\b(b|re|r)tard(ed)?s?)").case_insensitive(true).build().unwrap());
let test =
"faggot test kike tranny cocksucker retardeds. Capitalized Niggerz. This is a bunch of other safe text.";
let slur_free = "No slurs here";
assert_eq!(
remove_slurs(test, &slur_regex),
"*removed* test *removed* *removed* *removed* *removed*. Capitalized *removed*. This is a bunch of other safe text."
.to_string()
);
let has_slurs_vec = vec![
"Niggerz",
"cocksucker",
"faggot",
"kike",
"retardeds",
"tranny",
];
let has_slurs_err_str = "No slurs - Niggerz, cocksucker, faggot, kike, retardeds, tranny";
assert_eq!(slur_check(test, &slur_regex), Err(has_slurs_vec));
assert_eq!(slur_check(slur_free, &slur_regex), Ok(()));
if let Err(slur_vec) = slur_check(test, &slur_regex) {
assert_eq!(&slurs_vec_to_str(&slur_vec), has_slurs_err_str);
}
}
// These helped with testing
// #[test]
// fn test_send_email() {
// let result = send_email("not a subject", "test_email@gmail.com", "ur user", "<h1>HI there</h1>");
// assert!(result.is_ok());
// }
}

View file

@ -0,0 +1,12 @@
use chrono::{DateTime, FixedOffset, NaiveDateTime};
pub fn naive_from_unix(time: i64) -> NaiveDateTime {
NaiveDateTime::from_timestamp_opt(time, 0).expect("convert datetime")
}
pub fn convert_datetime(datetime: NaiveDateTime) -> DateTime<FixedOffset> {
DateTime::<FixedOffset>::from_utc(
datetime,
FixedOffset::east_opt(0).expect("create fixed offset"),
)
}

View file

@ -0,0 +1,131 @@
use itertools::Itertools;
use once_cell::sync::Lazy;
use regex::Regex;
use url::Url;
static VALID_ACTOR_NAME_REGEX: Lazy<Regex> =
Lazy::new(|| Regex::new(r"^[a-zA-Z0-9_]{3,}$").expect("compile regex"));
static VALID_POST_TITLE_REGEX: Lazy<Regex> =
Lazy::new(|| Regex::new(r".*\S{3,}.*").expect("compile regex"));
static VALID_MATRIX_ID_REGEX: Lazy<Regex> = Lazy::new(|| {
Regex::new(r"^@[A-Za-z0-9._=-]+:[A-Za-z0-9.-]+\.[A-Za-z]{2,}$").expect("compile regex")
});
// taken from https://en.wikipedia.org/wiki/UTM_parameters
static CLEAN_URL_PARAMS_REGEX: Lazy<Regex> = Lazy::new(|| {
Regex::new(r"^utm_source|utm_medium|utm_campaign|utm_term|utm_content|gclid|gclsrc|dclid|fbclid$")
.expect("compile regex")
});
fn has_newline(name: &str) -> bool {
name.contains('\n')
}
pub fn is_valid_actor_name(name: &str, actor_name_max_length: usize) -> bool {
name.chars().count() <= actor_name_max_length
&& VALID_ACTOR_NAME_REGEX.is_match(name)
&& !has_newline(name)
}
// Can't do a regex here, reverse lookarounds not supported
pub fn is_valid_display_name(name: &str, actor_name_max_length: usize) -> bool {
!name.starts_with('@')
&& !name.starts_with('\u{200b}')
&& name.chars().count() >= 3
&& name.chars().count() <= actor_name_max_length
&& !has_newline(name)
}
pub fn is_valid_matrix_id(matrix_id: &str) -> bool {
VALID_MATRIX_ID_REGEX.is_match(matrix_id) && !has_newline(matrix_id)
}
pub fn is_valid_post_title(title: &str) -> bool {
VALID_POST_TITLE_REGEX.is_match(title) && !has_newline(title)
}
pub fn clean_url_params(url: &Url) -> Url {
let mut url_out = url.clone();
if url.query().is_some() {
let new_query = url
.query_pairs()
.filter(|q| !CLEAN_URL_PARAMS_REGEX.is_match(&q.0))
.map(|q| format!("{}={}", q.0, q.1))
.join("&");
url_out.set_query(Some(&new_query));
}
url_out
}
#[cfg(test)]
mod tests {
use crate::utils::validation::{
clean_url_params,
is_valid_actor_name,
is_valid_display_name,
is_valid_matrix_id,
is_valid_post_title,
};
use url::Url;
#[test]
fn test_clean_url_params() {
let url = Url::parse("https://example.com/path/123?utm_content=buffercf3b2&utm_medium=social&username=randomuser&id=123").unwrap();
let cleaned = clean_url_params(&url);
let expected = Url::parse("https://example.com/path/123?username=randomuser&id=123").unwrap();
assert_eq!(expected.to_string(), cleaned.to_string());
let url = Url::parse("https://example.com/path/123").unwrap();
let cleaned = clean_url_params(&url);
assert_eq!(url.to_string(), cleaned.to_string());
}
#[test]
fn regex_checks() {
assert!(!is_valid_post_title("hi"));
assert!(is_valid_post_title("him"));
assert!(!is_valid_post_title("n\n\n\n\nanother"));
assert!(!is_valid_post_title("hello there!\n this is a test."));
assert!(is_valid_post_title("hello there! this is a test."));
}
#[test]
fn test_valid_actor_name() {
let actor_name_max_length = 20;
assert!(is_valid_actor_name("Hello_98", actor_name_max_length));
assert!(is_valid_actor_name("ten", actor_name_max_length));
assert!(!is_valid_actor_name("Hello-98", actor_name_max_length));
assert!(!is_valid_actor_name("a", actor_name_max_length));
assert!(!is_valid_actor_name("", actor_name_max_length));
}
#[test]
fn test_valid_display_name() {
let actor_name_max_length = 20;
assert!(is_valid_display_name("hello @there", actor_name_max_length));
assert!(!is_valid_display_name(
"@hello there",
actor_name_max_length
));
// Make sure zero-space with an @ doesn't work
assert!(!is_valid_display_name(
&format!("{}@my name is", '\u{200b}'),
actor_name_max_length
));
}
#[test]
fn test_valid_post_title() {
assert!(is_valid_post_title("Post Title"));
assert!(is_valid_post_title(" POST TITLE 😃😃😃😃😃"));
assert!(!is_valid_post_title("\n \n \n \n ")); // tabs/spaces/newlines
}
#[test]
fn test_valid_matrix_id() {
assert!(is_valid_matrix_id("@dess:matrix.org"));
assert!(!is_valid_matrix_id("dess:matrix.org"));
assert!(!is_valid_matrix_id(" @dess:matrix.org"));
assert!(!is_valid_matrix_id("@dess:matrix.org t"));
}
}