bookwyrm/bookwyrm/models/bookwyrm_import_job.py

463 lines
15 KiB
Python
Raw Normal View History

2023-10-22 05:52:29 +00:00
"""Import a user from another Bookwyrm instance"""
import json
import logging
from django.db.models import FileField, JSONField, CharField
from django.utils import timezone
from django.utils.html import strip_tags
from django.contrib.postgres.fields import ArrayField as DjangoArrayField
from bookwyrm import activitypub
from bookwyrm import models
from bookwyrm.tasks import app, IMPORTS
2023-10-22 05:52:29 +00:00
from bookwyrm.models.job import ParentJob, ParentTask, SubTask
from bookwyrm.utils.tar import BookwyrmTarFile
logger = logging.getLogger(__name__)
class BookwyrmImportJob(ParentJob):
"""entry for a specific request for importing a bookwyrm user backup"""
archive_file = FileField(null=True, blank=True)
import_data = JSONField(null=True)
required = DjangoArrayField(CharField(max_length=50, blank=True), blank=True)
def start_job(self):
"""Start the job"""
start_import_task.delay(job_id=self.id, no_children=True)
@app.task(queue=IMPORTS, base=ParentTask)
def start_import_task(**kwargs):
"""trigger the child import tasks for each user data"""
job = BookwyrmImportJob.objects.get(id=kwargs["job_id"])
archive_file = job.archive_file
# don't start the job if it was stopped from the UI
if job.complete:
return
try:
archive_file.open("rb")
with BookwyrmTarFile.open(mode="r:gz", fileobj=archive_file) as tar:
json_filename = next(
filter(lambda n: n.startswith("archive"), tar.getnames())
)
job.import_data = json.loads(tar.read(json_filename).decode("utf-8"))
if "include_user_profile" in job.required:
update_user_profile(job.user, tar, job.import_data)
if "include_user_settings" in job.required:
update_user_settings(job.user, job.import_data)
if "include_goals" in job.required:
update_goals(job.user, job.import_data.get("goals", []))
if "include_saved_lists" in job.required:
upsert_saved_lists(job.user, job.import_data.get("saved_lists", []))
if "include_follows" in job.required:
upsert_follows(job.user, job.import_data.get("follows", []))
if "include_blocks" in job.required:
upsert_user_blocks(job.user, job.import_data.get("blocks", []))
process_books(job, tar)
job.set_status("complete")
archive_file.close()
except Exception as err: # pylint: disable=broad-except
logger.exception("User Import Job %s Failed with error: %s", job.id, err)
job.set_status("failed")
def process_books(job, tar):
"""
Process user import data related to books
We always import the books even if not assigning
them to shelves, lists etc
"""
books = job.import_data.get("books")
for data in books:
book = get_or_create_edition(data, tar)
if "include_shelves" in job.required:
upsert_shelves(book, job.user, data)
if "include_readthroughs" in job.required:
upsert_readthroughs(data.get("readthroughs"), job.user, book.id)
if "include_comments" in job.required:
upsert_statuses(
job.user, models.Comment, data.get("comments"), book.remote_id
)
if "include_quotations" in job.required:
upsert_statuses(
job.user, models.Quotation, data.get("quotations"), book.remote_id
)
if "include_reviews" in job.required:
upsert_statuses(
job.user, models.Review, data.get("reviews"), book.remote_id
)
if "include_lists" in job.required:
upsert_lists(job.user, data.get("lists"), book.id)
def get_or_create_edition(book_data, tar):
"""Take a JSON string of work and edition data,
find or create the edition and work in the database and
return an edition instance"""
edition = book_data.get("edition")
existing = models.Edition.find_existing(edition)
if existing:
return existing
# make sure we have the authors in the local DB
# replace the old author ids in the edition JSON
edition["authors"] = []
for author in book_data.get("authors"):
parsed_author = activitypub.parse(author)
instance = parsed_author.to_model(
model=models.Author, save=True, overwrite=True
)
edition["authors"].append(instance.remote_id)
# we will add the cover later from the tar
# don't try to load it from the old server
cover = edition.get("cover", {})
cover_path = cover.get("url", None)
edition["cover"] = {}
# first we need the parent work to exist
work = book_data.get("work")
work["editions"] = []
parsed_work = activitypub.parse(work)
work_instance = parsed_work.to_model(model=models.Work, save=True, overwrite=True)
# now we have a work we can add it to the edition
# and create the edition model instance
edition["work"] = work_instance.remote_id
parsed_edition = activitypub.parse(edition)
book = parsed_edition.to_model(model=models.Edition, save=True, overwrite=True)
# set the cover image from the tar
if cover_path:
tar.write_image_to_file(cover_path, book.cover)
return book
def upsert_readthroughs(data, user, book_id):
"""Take a JSON string of readthroughs and
find or create the instances in the database"""
for read_through in data:
obj = {}
keys = [
"progress_mode",
"start_date",
"finish_date",
"stopped_date",
"is_active",
]
for key in keys:
obj[key] = read_through[key]
obj["user_id"] = user.id
obj["book_id"] = book_id
existing = models.ReadThrough.objects.filter(**obj).first()
if not existing:
models.ReadThrough.objects.create(**obj)
def upsert_statuses(user, cls, data, book_remote_id):
"""Take a JSON string of a status and
find or create the instances in the database"""
for status in data:
if is_alias(
user, status["attributedTo"]
): # don't let l33t hax0rs steal other people's posts
# update ids and remove replies
status["attributedTo"] = user.remote_id
status["to"] = update_followers_address(user, status["to"])
status["cc"] = update_followers_address(user, status["cc"])
status[
"replies"
] = (
{}
) # this parses incorrectly but we can't set it without knowing the new id
status["inReplyToBook"] = book_remote_id
parsed = activitypub.parse(status)
if not status_already_exists(
user, parsed
): # don't duplicate posts on multiple import
instance = parsed.to_model(model=cls, save=True, overwrite=True)
for val in [
"progress",
"progress_mode",
"position",
"endposition",
"position_mode",
]:
if status.get(val):
instance.val = status[val]
instance.remote_id = instance.get_remote_id() # update the remote_id
instance.save() # save and broadcast
else:
2024-01-28 04:56:44 +00:00
logger.warning("User does not have permission to import statuses")
def upsert_lists(user, lists, book_id):
"""Take a list of objects each containing
a list and list item as AP objects
Because we are creating new IDs we can't assume the id
will exist or be accurate, so we only use to_model for
adding new items after checking whether they exist .
"""
book = models.Edition.objects.get(id=book_id)
for blist in lists:
booklist = models.List.objects.filter(name=blist["name"], user=user).first()
if not booklist:
blist["owner"] = user.remote_id
parsed = activitypub.parse(blist)
booklist = parsed.to_model(model=models.List, save=True, overwrite=True)
booklist.privacy = blist["privacy"]
booklist.save()
item = models.ListItem.objects.filter(book=book, book_list=booklist).exists()
if not item:
count = booklist.books.count()
models.ListItem.objects.create(
book=book,
book_list=booklist,
user=user,
notes=blist["list_item"]["notes"],
approved=blist["list_item"]["approved"],
order=count + 1,
)
def upsert_shelves(book, user, book_data):
"""Take shelf JSON objects and create
DB entries if they don't already exist"""
shelves = book_data["shelves"]
for shelf in shelves:
book_shelf = models.Shelf.objects.filter(name=shelf["name"], user=user).first()
if not book_shelf:
book_shelf = models.Shelf.objects.create(name=shelf["name"], user=user)
# add the book as a ShelfBook if needed
if not models.ShelfBook.objects.filter(
book=book, shelf=book_shelf, user=user
).exists():
models.ShelfBook.objects.create(
book=book, shelf=book_shelf, user=user, shelved_date=timezone.now()
)
def update_user_profile(user, tar, data):
"""update the user's profile from import data"""
name = data.get("name", None)
username = data.get("preferredUsername")
user.name = name if name else username
user.summary = strip_tags(data.get("summary", None))
user.save(update_fields=["name", "summary"])
if data["icon"].get("url"):
avatar_filename = next(filter(lambda n: n.startswith("avatar"), tar.getnames()))
tar.write_image_to_file(avatar_filename, user.avatar)
def update_user_settings(user, data):
"""update the user's settings from import data"""
update_fields = ["manually_approves_followers", "hide_follows", "discoverable"]
ap_fields = [
("manuallyApprovesFollowers", "manually_approves_followers"),
("hideFollows", "hide_follows"),
("discoverable", "discoverable"),
]
for (ap_field, bw_field) in ap_fields:
setattr(user, bw_field, data[ap_field])
bw_fields = [
"show_goal",
"show_suggested_users",
"default_post_privacy",
"preferred_timezone",
]
for field in bw_fields:
update_fields.append(field)
setattr(user, field, data["settings"][field])
user.save(update_fields=update_fields)
@app.task(queue=IMPORTS, base=SubTask)
2023-10-22 05:52:29 +00:00
def update_user_settings_task(job_id):
"""wrapper task for user's settings import"""
parent_job = BookwyrmImportJob.objects.get(id=job_id)
return update_user_settings(parent_job.user, parent_job.import_data.get("user"))
def update_goals(user, data):
"""update the user's goals from import data"""
for goal in data:
# edit the existing goal if there is one
existing = models.AnnualGoal.objects.filter(
year=goal["year"], user=user
).first()
if existing:
for k in goal.keys():
setattr(existing, k, goal[k])
existing.save()
else:
goal["user"] = user
models.AnnualGoal.objects.create(**goal)
@app.task(queue=IMPORTS, base=SubTask)
2023-10-22 05:52:29 +00:00
def update_goals_task(job_id):
"""wrapper task for user's goals import"""
parent_job = BookwyrmImportJob.objects.get(id=job_id)
return update_goals(parent_job.user, parent_job.import_data.get("goals"))
def upsert_saved_lists(user, values):
"""Take a list of remote ids and add as saved lists"""
for remote_id in values:
book_list = activitypub.resolve_remote_id(remote_id, models.List)
if book_list:
user.saved_lists.add(book_list)
@app.task(queue=IMPORTS, base=SubTask)
2023-10-22 05:52:29 +00:00
def upsert_saved_lists_task(job_id):
"""wrapper task for user's saved lists import"""
parent_job = BookwyrmImportJob.objects.get(id=job_id)
return upsert_saved_lists(
parent_job.user, parent_job.import_data.get("saved_lists")
)
def upsert_follows(user, values):
"""Take a list of remote ids and add as follows"""
for remote_id in values:
followee = activitypub.resolve_remote_id(remote_id, models.User)
if followee:
(follow_request, created,) = models.UserFollowRequest.objects.get_or_create(
user_subject=user,
user_object=followee,
)
if not created:
# this request probably failed to connect with the remote
# and should save to trigger a re-broadcast
follow_request.save()
@app.task(queue=IMPORTS, base=SubTask)
2023-10-22 05:52:29 +00:00
def upsert_follows_task(job_id):
"""wrapper task for user's follows import"""
parent_job = BookwyrmImportJob.objects.get(id=job_id)
return upsert_follows(parent_job.user, parent_job.import_data.get("follows"))
def upsert_user_blocks(user, user_ids):
"""block users"""
for user_id in user_ids:
user_object = activitypub.resolve_remote_id(user_id, models.User)
if user_object:
exists = models.UserBlocks.objects.filter(
user_subject=user, user_object=user_object
).exists()
if not exists:
models.UserBlocks.objects.create(
user_subject=user, user_object=user_object
)
# remove the blocked users's lists from the groups
models.List.remove_from_group(user, user_object)
# remove the blocked user from all blocker's owned groups
models.GroupMember.remove(user, user_object)
@app.task(queue=IMPORTS, base=SubTask)
2023-10-22 05:52:29 +00:00
def upsert_user_blocks_task(job_id):
"""wrapper task for user's blocks import"""
parent_job = BookwyrmImportJob.objects.get(id=job_id)
return upsert_user_blocks(
parent_job.user, parent_job.import_data.get("blocked_users")
)
def update_followers_address(user, field):
"""statuses to or cc followers need to have the followers
address updated to the new local user"""
for i, audience in enumerate(field):
if audience.rsplit("/")[-1] == "followers":
field[i] = user.followers_url
return field
def is_alias(user, remote_id):
"""check that the user is listed as movedTo or also_known_as
in the remote user's profile"""
remote_user = activitypub.resolve_remote_id(
remote_id=remote_id, model=models.User, save=False
)
if remote_user:
if remote_user.moved_to:
return user.remote_id == remote_user.moved_to
if remote_user.also_known_as:
return user in remote_user.also_known_as.all()
return False
def status_already_exists(user, status):
"""check whether this status has already been published
by this user. We can't rely on to_model() because it
only matches on remote_id, which we have to change
*after* saving because it needs the primary key (id)"""
return models.Status.objects.filter(
user=user, content=status.content, published_date=status.published
).exists()