Dont federate embeds, but refetch them for security (#106)

Dont federate embeds, but refetch them for security (#ref 647)

Co-authored-by: Felix Ableitner <me@nutomic.com>
Reviewed-on: https://yerbamate.dev/LemmyNet/lemmy/pulls/106
This commit is contained in:
nutomic 2020-09-24 17:43:42 +00:00 committed by dessalines
parent 4de80dc29d
commit bfed8a8be4
7 changed files with 200 additions and 258 deletions

2
Cargo.lock generated
View file

@ -1833,7 +1833,6 @@ dependencies = [
"lemmy_websocket",
"log",
"openssl",
"percent-encoding",
"rand 0.7.3",
"reqwest",
"serde 1.0.116",
@ -1984,6 +1983,7 @@ dependencies = [
"lettre_email",
"log",
"openssl",
"percent-encoding",
"rand 0.7.3",
"regex",
"reqwest",

View file

@ -31,7 +31,6 @@ strum_macros = "0.19"
jsonwebtoken = "7.0"
lazy_static = "1.3"
url = { version = "2.1", features = ["serde"] }
percent-encoding = "2.1"
openssl = "0.10"
http = "0.2"
http-signature-normalization-actix = { version = "0.4", default-features = false, features = ["sha-2"] }

View file

@ -1,6 +1,5 @@
use crate::claims::Claims;
use actix_web::{web, web::Data};
use anyhow::anyhow;
use lemmy_db::{
community::Community,
community_view::CommunityUserBanView,
@ -10,18 +9,8 @@ use lemmy_db::{
DbPool,
};
use lemmy_structs::{blocking, comment::*, community::*, post::*, site::*, user::*};
use lemmy_utils::{
apub::get_apub_protocol_string,
request::{retry, RecvError},
settings::Settings,
APIError,
ConnectionId,
LemmyError,
};
use lemmy_utils::{settings::Settings, APIError, ConnectionId, LemmyError};
use lemmy_websocket::{serialize_websocket_message, LemmyContext, UserOperation};
use log::error;
use percent_encoding::{utf8_percent_encode, NON_ALPHANUMERIC};
use reqwest::Client;
use serde::Deserialize;
use std::process::Command;
use url::Url;
@ -361,179 +350,12 @@ pub(crate) fn espeak_wav_base64(text: &str) -> Result<String, LemmyError> {
Ok(base64)
}
#[derive(Deserialize, Debug)]
pub(crate) struct IframelyResponse {
title: Option<String>,
description: Option<String>,
thumbnail_url: Option<String>,
html: Option<String>,
}
pub(crate) async fn fetch_iframely(
client: &Client,
url: &str,
) -> Result<IframelyResponse, LemmyError> {
let fetch_url = format!("http://iframely/oembed?url={}", url);
let response = retry(|| client.get(&fetch_url).send()).await?;
let res: IframelyResponse = response
.json()
.await
.map_err(|e| RecvError(e.to_string()))?;
Ok(res)
}
#[derive(Deserialize, Debug, Clone)]
pub(crate) struct PictrsResponse {
files: Vec<PictrsFile>,
msg: String,
}
#[derive(Deserialize, Debug, Clone)]
pub(crate) struct PictrsFile {
file: String,
delete_token: String,
}
pub(crate) async fn fetch_pictrs(
client: &Client,
image_url: &str,
) -> Result<PictrsResponse, LemmyError> {
is_image_content_type(client, image_url).await?;
let fetch_url = format!(
"http://pictrs:8080/image/download?url={}",
utf8_percent_encode(image_url, NON_ALPHANUMERIC) // TODO this might not be needed
);
let response = retry(|| client.get(&fetch_url).send()).await?;
let response: PictrsResponse = response
.json()
.await
.map_err(|e| RecvError(e.to_string()))?;
if response.msg == "ok" {
Ok(response)
} else {
Err(anyhow!("{}", &response.msg).into())
}
}
async fn fetch_iframely_and_pictrs_data(
client: &Client,
url: Option<String>,
) -> (
Option<String>,
Option<String>,
Option<String>,
Option<String>,
) {
match &url {
Some(url) => {
// Fetch iframely data
let (iframely_title, iframely_description, iframely_thumbnail_url, iframely_html) =
match fetch_iframely(client, url).await {
Ok(res) => (res.title, res.description, res.thumbnail_url, res.html),
Err(e) => {
error!("iframely err: {}", e);
(None, None, None, None)
}
};
// Fetch pictrs thumbnail
let pictrs_hash = match iframely_thumbnail_url {
Some(iframely_thumbnail_url) => match fetch_pictrs(client, &iframely_thumbnail_url).await {
Ok(res) => Some(res.files[0].file.to_owned()),
Err(e) => {
error!("pictrs err: {}", e);
None
}
},
// Try to generate a small thumbnail if iframely is not supported
None => match fetch_pictrs(client, &url).await {
Ok(res) => Some(res.files[0].file.to_owned()),
Err(e) => {
error!("pictrs err: {}", e);
None
}
},
};
// The full urls are necessary for federation
let pictrs_thumbnail = if let Some(pictrs_hash) = pictrs_hash {
Some(format!(
"{}://{}/pictrs/image/{}",
get_apub_protocol_string(),
Settings::get().hostname,
pictrs_hash
))
} else {
None
};
(
iframely_title,
iframely_description,
iframely_html,
pictrs_thumbnail,
)
}
None => (None, None, None, None),
}
}
pub(crate) async fn is_image_content_type(client: &Client, test: &str) -> Result<(), LemmyError> {
let response = retry(|| client.get(test).send()).await?;
if response
.headers()
.get("Content-Type")
.ok_or_else(|| anyhow!("No Content-Type header"))?
.to_str()?
.starts_with("image/")
{
Ok(())
} else {
Err(anyhow!("Not an image type.").into())
}
}
#[cfg(test)]
mod tests {
use crate::{captcha_espeak_wav_base64, is_image_content_type};
#[test]
fn test_image() {
actix_rt::System::new("tset_image").block_on(async move {
let client = reqwest::Client::default();
assert!(is_image_content_type(&client, "https://1734811051.rsc.cdn77.org/data/images/full/365645/as-virus-kills-navajos-in-their-homes-tribal-women-provide-lifeline.jpg?w=600?w=650").await.is_ok());
assert!(is_image_content_type(&client,
"https://twitter.com/BenjaminNorton/status/1259922424272957440?s=20"
)
.await.is_err()
);
});
}
use crate::captcha_espeak_wav_base64;
#[test]
fn test_espeak() {
assert!(captcha_espeak_wav_base64("WxRt2l").is_ok())
}
// These helped with testing
// #[test]
// fn test_iframely() {
// let res = fetch_iframely(client, "https://www.redspark.nu/?p=15341").await;
// assert!(res.is_ok());
// }
// #[test]
// fn test_pictshare() {
// let res = fetch_pictshare("https://upload.wikimedia.org/wikipedia/en/2/27/The_Mandalorian_logo.jpg");
// assert!(res.is_ok());
// let res_other = fetch_pictshare("https://upload.wikimedia.org/wikipedia/en/2/27/The_Mandalorian_logo.jpgaoeu");
// assert!(res_other.is_err());
// }
}

View file

@ -1,6 +1,5 @@
use crate::{
check_community_ban,
fetch_iframely_and_pictrs_data,
get_user_from_jwt,
get_user_from_jwt_opt,
is_mod_or_admin,
@ -25,6 +24,7 @@ use lemmy_db::{
use lemmy_structs::{blocking, post::*};
use lemmy_utils::{
apub::{make_apub_endpoint, EndpointType},
request::fetch_iframely_and_pictrs_data,
utils::{check_slurs, check_slurs_opt, is_valid_post_title},
APIError,
ConnectionId,

View file

@ -24,7 +24,7 @@ use activitystreams::{
Undo,
Update,
},
object::{kind::PageType, Image, Object, Page, Tombstone},
object::{kind::PageType, Image, Page, Tombstone},
prelude::*,
public,
};
@ -41,6 +41,7 @@ use lemmy_db::{
use lemmy_structs::blocking;
use lemmy_utils::{
location_info,
request::fetch_iframely_and_pictrs_data,
utils::{check_slurs, convert_datetime, remove_slurs},
LemmyError,
};
@ -104,24 +105,6 @@ impl ToApub for Post {
let url = self.url.as_ref().filter(|u| !u.is_empty());
if let Some(u) = url {
page.set_url(u.to_owned());
// Embeds
let mut page_preview = Page::new();
page_preview.set_url(u.to_owned());
if let Some(embed_title) = &self.embed_title {
page_preview.set_name(embed_title.to_owned());
}
if let Some(embed_description) = &self.embed_description {
page_preview.set_summary(embed_description.to_owned());
}
if let Some(embed_html) = &self.embed_html {
page_preview.set_content(embed_html.to_owned());
}
page.set_preview(page_preview.into_any_base()?);
}
if let Some(thumbnail_url) = &self.thumbnail_url {
@ -147,50 +130,6 @@ impl ToApub for Post {
}
}
struct EmbedType {
title: Option<String>,
description: Option<String>,
html: Option<String>,
}
fn extract_embed_from_apub(
page: &Ext1<Object<PageType>, PageExtension>,
) -> Result<EmbedType, LemmyError> {
match page.inner.preview() {
Some(preview) => {
let preview_page = Page::from_any_base(preview.one().context(location_info!())?.to_owned())?
.context(location_info!())?;
let title = preview_page
.name()
.map(|n| n.one())
.flatten()
.map(|s| s.as_xsd_string())
.flatten()
.map(|s| s.to_string());
let description = preview_page
.summary()
.map(|s| s.as_single_xsd_string())
.flatten()
.map(|s| s.to_string());
let html = preview_page
.content()
.map(|c| c.as_single_xsd_string())
.flatten()
.map(|s| s.to_string());
Ok(EmbedType {
title,
description,
html,
})
}
None => Ok(EmbedType {
title: None,
description: None,
html: None,
}),
}
}
#[async_trait::async_trait(?Send)]
impl FromApub for PostForm {
type ApubType = PageExt;
@ -237,8 +176,19 @@ impl FromApub for PostForm {
.map(|u| u.to_string()),
None => None,
};
let url = page
.inner
.url()
.map(|u| u.as_single_xsd_any_uri())
.flatten()
.map(|s| s.to_string());
let embed = extract_embed_from_apub(page)?;
let (iframely_title, iframely_description, iframely_html, pictrs_thumbnail) =
if let Some(url) = &url {
fetch_iframely_and_pictrs_data(context.client(), Some(url.to_owned())).await
} else {
(None, None, None, thumbnail_url)
};
let name = page
.inner
@ -248,12 +198,6 @@ impl FromApub for PostForm {
.as_single_xsd_string()
.context(location_info!())?
.to_string();
let url = page
.inner
.url()
.map(|u| u.as_single_xsd_any_uri())
.flatten()
.map(|s| s.to_string());
let body = page
.inner
.content()
@ -284,10 +228,10 @@ impl FromApub for PostForm {
deleted: None,
nsfw: ext.sensitive,
stickied: Some(ext.stickied),
embed_title: embed.title,
embed_description: embed.description,
embed_html: embed.html,
thumbnail_url,
embed_title: iframely_title,
embed_description: iframely_description,
embed_html: iframely_html,
thumbnail_url: pictrs_thumbnail,
ap_id: Some(check_actor_domain(page, expected_domain)?),
local: false,
})

View file

@ -16,6 +16,7 @@ lettre_email = "0.9"
log = "0.4"
itertools = "0.9"
rand = "0.7"
percent-encoding = "2.1"
serde = { version = "1.0", features = ["derive"] }
serde_json = { version = "1.0", features = ["preserve_order"]}
thiserror = "1.0"

View file

@ -1,5 +1,9 @@
use crate::LemmyError;
use crate::{apub::get_apub_protocol_string, settings::Settings, LemmyError};
use anyhow::anyhow;
use log::error;
use percent_encoding::{utf8_percent_encode, NON_ALPHANUMERIC};
use reqwest::Client;
use serde::Deserialize;
use std::future::Future;
use thiserror::Error;
@ -41,3 +45,175 @@ where
response
}
#[derive(Deserialize, Debug)]
pub(crate) struct IframelyResponse {
title: Option<String>,
description: Option<String>,
thumbnail_url: Option<String>,
html: Option<String>,
}
pub(crate) async fn fetch_iframely(
client: &Client,
url: &str,
) -> Result<IframelyResponse, LemmyError> {
let fetch_url = format!("http://iframely/oembed?url={}", url);
let response = retry(|| client.get(&fetch_url).send()).await?;
let res: IframelyResponse = response
.json()
.await
.map_err(|e| RecvError(e.to_string()))?;
Ok(res)
}
#[derive(Deserialize, Debug, Clone)]
pub(crate) struct PictrsResponse {
files: Vec<PictrsFile>,
msg: String,
}
#[derive(Deserialize, Debug, Clone)]
pub(crate) struct PictrsFile {
file: String,
delete_token: String,
}
pub(crate) async fn fetch_pictrs(
client: &Client,
image_url: &str,
) -> Result<PictrsResponse, LemmyError> {
is_image_content_type(client, image_url).await?;
let fetch_url = format!(
"http://pictrs:8080/image/download?url={}",
utf8_percent_encode(image_url, NON_ALPHANUMERIC) // TODO this might not be needed
);
let response = retry(|| client.get(&fetch_url).send()).await?;
let response: PictrsResponse = response
.json()
.await
.map_err(|e| RecvError(e.to_string()))?;
if response.msg == "ok" {
Ok(response)
} else {
Err(anyhow!("{}", &response.msg).into())
}
}
pub async fn fetch_iframely_and_pictrs_data(
client: &Client,
url: Option<String>,
) -> (
Option<String>,
Option<String>,
Option<String>,
Option<String>,
) {
match &url {
Some(url) => {
// Fetch iframely data
let (iframely_title, iframely_description, iframely_thumbnail_url, iframely_html) =
match fetch_iframely(client, url).await {
Ok(res) => (res.title, res.description, res.thumbnail_url, res.html),
Err(e) => {
error!("iframely err: {}", e);
(None, None, None, None)
}
};
// Fetch pictrs thumbnail
let pictrs_hash = match iframely_thumbnail_url {
Some(iframely_thumbnail_url) => match fetch_pictrs(client, &iframely_thumbnail_url).await {
Ok(res) => Some(res.files[0].file.to_owned()),
Err(e) => {
error!("pictrs err: {}", e);
None
}
},
// Try to generate a small thumbnail if iframely is not supported
None => match fetch_pictrs(client, &url).await {
Ok(res) => Some(res.files[0].file.to_owned()),
Err(e) => {
error!("pictrs err: {}", e);
None
}
},
};
// The full urls are necessary for federation
let pictrs_thumbnail = if let Some(pictrs_hash) = pictrs_hash {
Some(format!(
"{}://{}/pictrs/image/{}",
get_apub_protocol_string(),
Settings::get().hostname,
pictrs_hash
))
} else {
None
};
(
iframely_title,
iframely_description,
iframely_html,
pictrs_thumbnail,
)
}
None => (None, None, None, None),
}
}
async fn is_image_content_type(client: &Client, test: &str) -> Result<(), LemmyError> {
let response = retry(|| client.get(test).send()).await?;
if response
.headers()
.get("Content-Type")
.ok_or_else(|| anyhow!("No Content-Type header"))?
.to_str()?
.starts_with("image/")
{
Ok(())
} else {
Err(anyhow!("Not an image type.").into())
}
}
#[cfg(test)]
mod tests {
use crate::request::is_image_content_type;
#[test]
fn test_image() {
actix_rt::System::new("tset_image").block_on(async move {
let client = reqwest::Client::default();
assert!(is_image_content_type(&client, "https://1734811051.rsc.cdn77.org/data/images/full/365645/as-virus-kills-navajos-in-their-homes-tribal-women-provide-lifeline.jpg?w=600?w=650").await.is_ok());
assert!(is_image_content_type(&client,
"https://twitter.com/BenjaminNorton/status/1259922424272957440?s=20"
)
.await.is_err()
);
});
}
// These helped with testing
// #[test]
// fn test_iframely() {
// let res = fetch_iframely(client, "https://www.redspark.nu/?p=15341").await;
// assert!(res.is_ok());
// }
// #[test]
// fn test_pictshare() {
// let res = fetch_pictshare("https://upload.wikimedia.org/wikipedia/en/2/27/The_Mandalorian_logo.jpg");
// assert!(res.is_ok());
// let res_other = fetch_pictshare("https://upload.wikimedia.org/wikipedia/en/2/27/The_Mandalorian_logo.jpgaoeu");
// assert!(res_other.is_err());
// }
}