diff --git a/bookwyrm/management/commands/deduplicate_book_data.py b/bookwyrm/management/commands/deduplicate_book_data.py index dde7d133c..c2d897ce3 100644 --- a/bookwyrm/management/commands/deduplicate_book_data.py +++ b/bookwyrm/management/commands/deduplicate_book_data.py @@ -1,13 +1,14 @@ """ PROCEED WITH CAUTION: uses deduplication fields to permanently merge book data objects """ + from django.core.management.base import BaseCommand from django.db.models import Count from bookwyrm import models -from bookwyrm.management.merge import merge_objects -def dedupe_model(model): +def dedupe_model(model, dry_run=False): """combine duplicate editions and update related models""" + print(f"deduplicating {model.__name__}:") fields = model._meta.get_fields() dedupe_fields = [ f for f in fields if hasattr(f, "deduplication_field") and f.deduplication_field @@ -16,30 +17,42 @@ def dedupe_model(model): dupes = ( model.objects.values(field.name) .annotate(Count(field.name)) - .filter(**{"%s__count__gt" % field.name: 1}) + .filter(**{f"{field.name}__count__gt": 1}) + .exclude(**{field.name: ""}) + .exclude(**{f"{field.name}__isnull": True}) ) for dupe in dupes: value = dupe[field.name] - if not value or value == "": - continue print("----------") - print(dupe) objs = model.objects.filter(**{field.name: value}).order_by("id") canonical = objs.first() - print("keeping", canonical.remote_id) + action = "would merge" if dry_run else "merging" + print( + f"{action} into {model.__name__} {canonical.remote_id} based on {field.name} {value}:" + ) for obj in objs[1:]: - print(obj.remote_id) - merge_objects(canonical, obj) + print(f"- {obj.remote_id}") + absorbed_fields = obj.merge_into(canonical, dry_run=dry_run) + print(f" absorbed fields: {absorbed_fields}") class Command(BaseCommand): """deduplicate allllll the book data models""" help = "merges duplicate book data" + + def add_arguments(self, parser): + """add the arguments for this command""" + parser.add_argument( + "--dry_run", + action="store_true", + help="don't actually merge, only print what would happen", + ) + # pylint: disable=no-self-use,unused-argument def handle(self, *args, **options): """run deduplications""" - dedupe_model(models.Edition) - dedupe_model(models.Work) - dedupe_model(models.Author) + dedupe_model(models.Edition, dry_run=options["dry_run"]) + dedupe_model(models.Work, dry_run=options["dry_run"]) + dedupe_model(models.Author, dry_run=options["dry_run"]) diff --git a/bookwyrm/management/merge.py b/bookwyrm/management/merge.py deleted file mode 100644 index f55229f18..000000000 --- a/bookwyrm/management/merge.py +++ /dev/null @@ -1,50 +0,0 @@ -from django.db.models import ManyToManyField - - -def update_related(canonical, obj): - """update all the models with fk to the object being removed""" - # move related models to canonical - related_models = [ - (r.remote_field.name, r.related_model) for r in canonical._meta.related_objects - ] - for (related_field, related_model) in related_models: - # Skip the ManyToMany fields that aren’t auto-created. These - # should have a corresponding OneToMany field in the model for - # the linking table anyway. If we update it through that model - # instead then we won’t lose the extra fields in the linking - # table. - related_field_obj = related_model._meta.get_field(related_field) - if isinstance(related_field_obj, ManyToManyField): - through = related_field_obj.remote_field.through - if not through._meta.auto_created: - continue - related_objs = related_model.objects.filter(**{related_field: obj}) - for related_obj in related_objs: - print("replacing in", related_model.__name__, related_field, related_obj.id) - try: - setattr(related_obj, related_field, canonical) - related_obj.save() - except TypeError: - getattr(related_obj, related_field).add(canonical) - getattr(related_obj, related_field).remove(obj) - - -def copy_data(canonical, obj): - """try to get the most data possible""" - for data_field in obj._meta.get_fields(): - if not hasattr(data_field, "activitypub_field"): - continue - data_value = getattr(obj, data_field.name) - if not data_value: - continue - if not getattr(canonical, data_field.name): - print("setting data field", data_field.name, data_value) - setattr(canonical, data_field.name, data_value) - canonical.save() - - -def merge_objects(canonical, obj): - copy_data(canonical, obj) - update_related(canonical, obj) - # remove the outdated entry - obj.delete() diff --git a/bookwyrm/management/merge_command.py b/bookwyrm/management/merge_command.py index 805dc73fa..66e60814a 100644 --- a/bookwyrm/management/merge_command.py +++ b/bookwyrm/management/merge_command.py @@ -1,4 +1,3 @@ -from bookwyrm.management.merge import merge_objects from django.core.management.base import BaseCommand @@ -9,6 +8,11 @@ class MergeCommand(BaseCommand): """add the arguments for this command""" parser.add_argument("--canonical", type=int, required=True) parser.add_argument("--other", type=int, required=True) + parser.add_argument( + "--dry_run", + action="store_true", + help="don't actually merge, only print what would happen", + ) # pylint: disable=no-self-use,unused-argument def handle(self, *args, **options): @@ -26,4 +30,8 @@ class MergeCommand(BaseCommand): print("other book doesn’t exist!") return - merge_objects(canonical, other) + absorbed_fields = other.merge_into(canonical, dry_run=options["dry_run"]) + + action = "would be" if options["dry_run"] else "has been" + print(f"{other.remote_id} {action} merged into {canonical.remote_id}") + print(f"absorbed fields: {absorbed_fields}") diff --git a/bookwyrm/migrations/0197_mergedauthor_mergedbook.py b/bookwyrm/migrations/0197_mergedauthor_mergedbook.py new file mode 100644 index 000000000..23ca38ab2 --- /dev/null +++ b/bookwyrm/migrations/0197_mergedauthor_mergedbook.py @@ -0,0 +1,48 @@ +# Generated by Django 3.2.24 on 2024-02-28 21:30 + +from django.db import migrations, models +import django.db.models.deletion + + +class Migration(migrations.Migration): + + dependencies = [ + ("bookwyrm", "0196_merge_pr3134_into_main"), + ] + + operations = [ + migrations.CreateModel( + name="MergedBook", + fields=[ + ("deleted_id", models.IntegerField(primary_key=True, serialize=False)), + ( + "merged_into", + models.ForeignKey( + on_delete=django.db.models.deletion.PROTECT, + related_name="absorbed", + to="bookwyrm.book", + ), + ), + ], + options={ + "abstract": False, + }, + ), + migrations.CreateModel( + name="MergedAuthor", + fields=[ + ("deleted_id", models.IntegerField(primary_key=True, serialize=False)), + ( + "merged_into", + models.ForeignKey( + on_delete=django.db.models.deletion.PROTECT, + related_name="absorbed", + to="bookwyrm.author", + ), + ), + ], + options={ + "abstract": False, + }, + ), + ] diff --git a/bookwyrm/models/author.py b/bookwyrm/models/author.py index 9dc3962ad..70f85b3c8 100644 --- a/bookwyrm/models/author.py +++ b/bookwyrm/models/author.py @@ -1,4 +1,5 @@ """ database schema for info about authors """ + import re from typing import Tuple, Any @@ -10,13 +11,15 @@ from bookwyrm import activitypub from bookwyrm.settings import DOMAIN from bookwyrm.utils.db import format_trigger -from .book import BookDataModel +from .book import BookDataModel, MergedAuthor from . import fields class Author(BookDataModel): """basic biographic info""" + merged_model = MergedAuthor + wikipedia_link = fields.CharField( max_length=255, blank=True, null=True, deduplication_field=True ) diff --git a/bookwyrm/models/book.py b/bookwyrm/models/book.py index 5dba6532f..6075d2c92 100644 --- a/bookwyrm/models/book.py +++ b/bookwyrm/models/book.py @@ -1,13 +1,15 @@ """ database schema for books and shelves """ + from itertools import chain import re -from typing import Any +from typing import Any, Dict +from typing_extensions import Self from django.contrib.postgres.search import SearchVectorField from django.contrib.postgres.indexes import GinIndex from django.core.cache import cache from django.db import models, transaction -from django.db.models import Prefetch +from django.db.models import Prefetch, ManyToManyField from django.dispatch import receiver from django.utils.translation import gettext_lazy as _ from model_utils import FieldTracker @@ -108,10 +110,115 @@ class BookDataModel(ObjectMixin, BookWyrmModel): """only send book data updates to other bookwyrm instances""" super().broadcast(activity, sender, software=software, **kwargs) + def merge_into(self, canonical: Self, dry_run=False) -> Dict[str, Any]: + """merge this entity into another entity""" + if canonical.id == self.id: + raise ValueError(f"Cannot merge {self} into itself") + + absorbed_fields = canonical.absorb_data_from(self, dry_run=dry_run) + + if dry_run: + return absorbed_fields + + canonical.save() + + self.merged_model.objects.create(deleted_id=self.id, merged_into=canonical) + + # move related models to canonical + related_models = [ + (r.remote_field.name, r.related_model) for r in self._meta.related_objects + ] + # pylint: disable=protected-access + for related_field, related_model in related_models: + # Skip the ManyToMany fields that aren’t auto-created. These + # should have a corresponding OneToMany field in the model for + # the linking table anyway. If we update it through that model + # instead then we won’t lose the extra fields in the linking + # table. + # pylint: disable=protected-access + related_field_obj = related_model._meta.get_field(related_field) + if isinstance(related_field_obj, ManyToManyField): + through = related_field_obj.remote_field.through + if not through._meta.auto_created: + continue + related_objs = related_model.objects.filter(**{related_field: self}) + for related_obj in related_objs: + try: + setattr(related_obj, related_field, canonical) + related_obj.save() + except TypeError: + getattr(related_obj, related_field).add(canonical) + getattr(related_obj, related_field).remove(self) + + self.delete() + return absorbed_fields + + def absorb_data_from(self, other: Self, dry_run=False) -> Dict[str, Any]: + """fill empty fields with values from another entity""" + absorbed_fields = {} + for data_field in self._meta.get_fields(): + if not hasattr(data_field, "activitypub_field"): + continue + canonical_value = getattr(self, data_field.name) + other_value = getattr(other, data_field.name) + if not other_value: + continue + if isinstance(data_field, fields.ArrayField): + if new_values := list(set(other_value) - set(canonical_value)): + # append at the end (in no particular order) + if not dry_run: + setattr(self, data_field.name, canonical_value + new_values) + absorbed_fields[data_field.name] = new_values + elif isinstance(data_field, fields.PartialDateField): + if ( + (not canonical_value) + or (other_value.has_day and not canonical_value.has_day) + or (other_value.has_month and not canonical_value.has_month) + ): + if not dry_run: + setattr(self, data_field.name, other_value) + absorbed_fields[data_field.name] = other_value + else: + if not canonical_value: + if not dry_run: + setattr(self, data_field.name, other_value) + absorbed_fields[data_field.name] = other_value + return absorbed_fields + + +class MergedBookDataModel(models.Model): + """a BookDataModel instance that has been merged into another instance. kept + to be able to redirect old URLs""" + + deleted_id = models.IntegerField(primary_key=True) + + class Meta: + """abstract just like BookDataModel""" + + abstract = True + + +class MergedBook(MergedBookDataModel): + """an Book that has been merged into another one""" + + merged_into = models.ForeignKey( + "Book", on_delete=models.PROTECT, related_name="absorbed" + ) + + +class MergedAuthor(MergedBookDataModel): + """an Author that has been merged into another one""" + + merged_into = models.ForeignKey( + "Author", on_delete=models.PROTECT, related_name="absorbed" + ) + class Book(BookDataModel): """a generic book, which can mean either an edition or a work""" + merged_model = MergedBook + connector = models.ForeignKey("Connector", on_delete=models.PROTECT, null=True) # book/work metadata @@ -192,9 +299,13 @@ class Book(BookDataModel): """properties of this edition, as a string""" items = [ self.physical_format if hasattr(self, "physical_format") else None, - f"{self.languages[0]} language" - if self.languages and self.languages[0] and self.languages[0] != "English" - else None, + ( + f"{self.languages[0]} language" + if self.languages + and self.languages[0] + and self.languages[0] != "English" + else None + ), str(self.published_date.year) if self.published_date else None, ", ".join(self.publishers) if hasattr(self, "publishers") else None, ] diff --git a/bookwyrm/tests/test_merge.py b/bookwyrm/tests/test_merge.py new file mode 100644 index 000000000..933751832 --- /dev/null +++ b/bookwyrm/tests/test_merge.py @@ -0,0 +1,97 @@ +"""test merging Authors, Works and Editions""" + +from django.test import TestCase +from django.test.client import Client + +from bookwyrm import models + + +class MergeBookDataModel(TestCase): + """test merging of subclasses of BookDataModel""" + + @classmethod + def setUpTestData(cls): # pylint: disable=invalid-name + """shared data""" + models.SiteSettings.objects.create() + + cls.jrr_tolkien = models.Author.objects.create( + name="J.R.R. Tolkien", + aliases=["JRR Tolkien", "Tolkien"], + bio="This guy wrote about hobbits and stuff.", + openlibrary_key="OL26320A", + isni="0000000121441970", + ) + cls.jrr_tolkien_2 = models.Author.objects.create( + name="J.R.R. Tolkien", + aliases=["JRR Tolkien", "John Ronald Reuel Tolkien"], + openlibrary_key="OL26320A", + isni="wrong", + wikidata="Q892", + ) + cls.jrr_tolkien_2_id = cls.jrr_tolkien_2.id + + # perform merges + cls.jrr_tolkien_absorbed_fields = cls.jrr_tolkien_2.merge_into(cls.jrr_tolkien) + + def test_merged_author(self): + """verify merged author after merge""" + self.assertEqual(self.jrr_tolkien_2.id, None, msg="duplicate should be deleted") + + def test_canonical_author(self): + """verify canonical author data after merge""" + + self.assertFalse( + self.jrr_tolkien.id is None, msg="canonical should not be deleted" + ) + + # identical in canonical and duplicate; should be unchanged + self.assertEqual(self.jrr_tolkien.name, "J.R.R. Tolkien") + self.assertEqual(self.jrr_tolkien.openlibrary_key, "OL26320A") + + # present in canonical and absent in duplicate; should be unchanged + self.assertEqual( + self.jrr_tolkien.bio, "This guy wrote about hobbits and stuff." + ) + + # absent in canonical and present in duplicate; should be absorbed + self.assertEqual(self.jrr_tolkien.wikidata, "Q892") + + # scalar value that is different in canonical and duplicate; should be unchanged + self.assertEqual(self.jrr_tolkien.isni, "0000000121441970") + + # set value with both matching and non-matching elements; should be the + # union of canonical and duplicate + self.assertEqual( + self.jrr_tolkien.aliases, + [ + "JRR Tolkien", + "Tolkien", + "John Ronald Reuel Tolkien", + ], + ) + + def test_merged_author_redirect(self): + """a web request for a merged author should redirect to the canonical author""" + client = Client() + response = client.get( + f"/author/{self.jrr_tolkien_2_id}/s/jrr-tolkien", follow=True + ) + self.assertEqual(response.redirect_chain, [(self.jrr_tolkien.local_path, 301)]) + + def test_merged_author_activitypub(self): + """an activitypub request for a merged author should return the data for + the canonical author (including the canonical id)""" + client = Client(HTTP_ACCEPT="application/json") + response = client.get(f"/author/{self.jrr_tolkien_2_id}") + self.assertEqual(response.status_code, 200) + self.assertEqual(response.json(), self.jrr_tolkien.to_activity()) + + def test_absorbed_fields(self): + """reported absorbed_fields should be accurate for --dry_run""" + self.assertEqual( + self.jrr_tolkien_absorbed_fields, + { + "aliases": ["John Ronald Reuel Tolkien"], + "wikidata": "Q892", + }, + ) diff --git a/bookwyrm/utils/partial_date.py b/bookwyrm/utils/partial_date.py index 40b89c838..4c9391476 100644 --- a/bookwyrm/utils/partial_date.py +++ b/bookwyrm/utils/partial_date.py @@ -67,6 +67,14 @@ class PartialDate(datetime): # current_timezone and default_timezone. return cls.from_datetime(datetime(year, month, day, tzinfo=_westmost_tz)) + def __eq__(self, other: object) -> bool: + if not isinstance(other, PartialDate): + return NotImplemented + return self.partial_isoformat() == other.partial_isoformat() + + def __repr__(self) -> str: + return f"<{self.__class__.__name__} object: {self.partial_isoformat()}>" + class MonthParts(PartialDate): """a date bound into month precision""" diff --git a/bookwyrm/views/author.py b/bookwyrm/views/author.py index 4dcf4c447..56977622f 100644 --- a/bookwyrm/views/author.py +++ b/bookwyrm/views/author.py @@ -1,4 +1,5 @@ """ the good people stuff! the authors! """ + from django.contrib.auth.decorators import login_required, permission_required from django.core.paginator import Paginator from django.shortcuts import get_object_or_404, redirect @@ -11,7 +12,11 @@ from bookwyrm import forms, models from bookwyrm.activitypub import ActivitypubResponse from bookwyrm.connectors import connector_manager from bookwyrm.settings import PAGE_LENGTH -from bookwyrm.views.helpers import is_api_request, maybe_redirect_local_path +from bookwyrm.views.helpers import ( + is_api_request, + get_mergeable_object_or_404, + maybe_redirect_local_path, +) # pylint: disable= no-self-use @@ -21,7 +26,7 @@ class Author(View): # pylint: disable=unused-argument def get(self, request, author_id, slug=None): """landing page for an author""" - author = get_object_or_404(models.Author, id=author_id) + author = get_mergeable_object_or_404(models.Author, id=author_id) if is_api_request(request): return ActivitypubResponse(author.to_activity()) @@ -56,13 +61,13 @@ class EditAuthor(View): def get(self, request, author_id): """info about a book""" - author = get_object_or_404(models.Author, id=author_id) + author = get_mergeable_object_or_404(models.Author, id=author_id) data = {"author": author, "form": forms.AuthorForm(instance=author)} return TemplateResponse(request, "author/edit_author.html", data) def post(self, request, author_id): """edit a author cool""" - author = get_object_or_404(models.Author, id=author_id) + author = get_mergeable_object_or_404(models.Author, id=author_id) form = forms.AuthorForm(request.POST, request.FILES, instance=author) if not form.is_valid(): @@ -82,7 +87,7 @@ def update_author_from_remote(request, author_id, connector_identifier): connector = connector_manager.load_connector( get_object_or_404(models.Connector, identifier=connector_identifier) ) - author = get_object_or_404(models.Author, id=author_id) + author = get_mergeable_object_or_404(models.Author, id=author_id) connector.update_author_from_remote(author) diff --git a/bookwyrm/views/books/books.py b/bookwyrm/views/books/books.py index 565220b6e..bbf041850 100644 --- a/bookwyrm/views/books/books.py +++ b/bookwyrm/views/books/books.py @@ -1,4 +1,5 @@ """ the good stuff! the books! """ + from uuid import uuid4 from django.contrib.auth.decorators import login_required, permission_required @@ -15,7 +16,11 @@ from bookwyrm.activitypub import ActivitypubResponse from bookwyrm.connectors import connector_manager, ConnectorException from bookwyrm.connectors.abstract_connector import get_image from bookwyrm.settings import PAGE_LENGTH -from bookwyrm.views.helpers import is_api_request, maybe_redirect_local_path +from bookwyrm.views.helpers import ( + is_api_request, + maybe_redirect_local_path, + get_mergeable_object_or_404, +) # pylint: disable=no-self-use @@ -40,7 +45,11 @@ class Book(View): # table, so they never have clashing IDs book = ( models.Edition.viewer_aware_objects(request.user) - .filter(Q(id=book_id) | Q(parent_work__id=book_id)) + .filter( + Q(id=book_id) + | Q(parent_work__id=book_id) + | Q(absorbed__deleted_id=book_id) + ) .order_by("-edition_rank") .select_related("parent_work") .prefetch_related("authors", "file_links") @@ -82,11 +91,13 @@ class Book(View): "book": book, "statuses": paginated.get_page(request.GET.get("page")), "review_count": reviews.count(), - "ratings": reviews.filter( - Q(content__isnull=True) | Q(content="") - ).select_related("user") - if not user_statuses - else None, + "ratings": ( + reviews.filter(Q(content__isnull=True) | Q(content="")).select_related( + "user" + ) + if not user_statuses + else None + ), "rating": reviews.aggregate(Avg("rating"))["rating__avg"], "lists": lists, "update_error": kwargs.get("update_error", False), @@ -130,7 +141,7 @@ class Book(View): @require_POST def upload_cover(request, book_id): """upload a new cover""" - book = get_object_or_404(models.Edition, id=book_id) + book = get_mergeable_object_or_404(models.Edition, id=book_id) book.last_edited_by = request.user url = request.POST.get("cover-url") @@ -168,7 +179,7 @@ def set_cover_from_url(url): @permission_required("bookwyrm.edit_book", raise_exception=True) def add_description(request, book_id): """upload a new cover""" - book = get_object_or_404(models.Edition, id=book_id) + book = get_mergeable_object_or_404(models.Edition, id=book_id) description = request.POST.get("description") @@ -199,7 +210,9 @@ def update_book_from_remote(request, book_id, connector_identifier): connector = connector_manager.load_connector( get_object_or_404(models.Connector, identifier=connector_identifier) ) - book = get_object_or_404(models.Book.objects.select_subclasses(), id=book_id) + book = get_mergeable_object_or_404( + models.Book.objects.select_subclasses(), id=book_id + ) try: connector.update_book_from_remote(book) diff --git a/bookwyrm/views/books/edit_book.py b/bookwyrm/views/books/edit_book.py index ae492374f..b8ceece13 100644 --- a/bookwyrm/views/books/edit_book.py +++ b/bookwyrm/views/books/edit_book.py @@ -1,4 +1,5 @@ """ the good stuff! the books! """ + from re import sub, findall from django.contrib.auth.decorators import login_required, permission_required from django.contrib.postgres.search import SearchRank, SearchVector @@ -18,9 +19,10 @@ from bookwyrm.utils.isni import ( build_author_from_isni, augment_author_metadata, ) -from bookwyrm.views.helpers import get_edition +from bookwyrm.views.helpers import get_edition, get_mergeable_object_or_404 from .books import set_cover_from_url + # pylint: disable=no-self-use @method_decorator(login_required, name="dispatch") @method_decorator( @@ -42,7 +44,7 @@ class EditBook(View): def post(self, request, book_id): """edit a book cool""" - book = get_object_or_404(models.Edition, id=book_id) + book = get_mergeable_object_or_404(models.Edition, id=book_id) form = forms.EditionForm(request.POST, request.FILES, instance=book) @@ -130,7 +132,7 @@ class CreateBook(View): with transaction.atomic(): book = form.save(request) - parent_work = get_object_or_404(models.Work, id=parent_work_id) + parent_work = get_mergeable_object_or_404(models.Work, id=parent_work_id) book.parent_work = parent_work if authors: @@ -295,7 +297,7 @@ class ConfirmEditBook(View): if not book.parent_work: work_match = request.POST.get("parent_work") if work_match and work_match != "0": - work = get_object_or_404(models.Work, id=work_match) + work = get_mergeable_object_or_404(models.Work, id=work_match) else: work = models.Work.objects.create(title=form.cleaned_data["title"]) work.authors.set(book.authors.all()) diff --git a/bookwyrm/views/books/editions.py b/bookwyrm/views/books/editions.py index a3167fac4..538ff6377 100644 --- a/bookwyrm/views/books/editions.py +++ b/bookwyrm/views/books/editions.py @@ -1,4 +1,5 @@ """ the good stuff! the books! """ + from functools import reduce import operator @@ -7,7 +8,7 @@ from django.core.cache import cache as django_cache from django.core.paginator import Paginator from django.db import transaction from django.db.models import Q -from django.shortcuts import get_object_or_404, redirect +from django.shortcuts import redirect from django.template.response import TemplateResponse from django.views import View from django.views.decorators.http import require_POST @@ -15,7 +16,7 @@ from django.views.decorators.http import require_POST from bookwyrm import forms, models from bookwyrm.activitypub import ActivitypubResponse from bookwyrm.settings import PAGE_LENGTH -from bookwyrm.views.helpers import is_api_request +from bookwyrm.views.helpers import is_api_request, get_mergeable_object_or_404 # pylint: disable=no-self-use @@ -24,7 +25,7 @@ class Editions(View): def get(self, request, book_id): """list of editions of a book""" - work = get_object_or_404(models.Work, id=book_id) + work = get_mergeable_object_or_404(models.Work, id=book_id) if is_api_request(request): return ActivitypubResponse(work.to_edition_list(**request.GET)) @@ -83,7 +84,7 @@ class Editions(View): def switch_edition(request): """switch your copy of a book to a different edition""" edition_id = request.POST.get("edition") - new_edition = get_object_or_404(models.Edition, id=edition_id) + new_edition = get_mergeable_object_or_404(models.Edition, id=edition_id) shelfbooks = models.ShelfBook.objects.filter( book__parent_work=new_edition.parent_work, shelf__user=request.user ) diff --git a/bookwyrm/views/books/links.py b/bookwyrm/views/books/links.py index 70b91f2d9..4793c6019 100644 --- a/bookwyrm/views/books/links.py +++ b/bookwyrm/views/books/links.py @@ -1,4 +1,5 @@ """ the good stuff! the books! """ + from django.contrib.auth.decorators import login_required, permission_required from django.db import transaction from django.shortcuts import get_object_or_404, redirect @@ -8,6 +9,7 @@ from django.utils.decorators import method_decorator from django.views.decorators.http import require_POST from bookwyrm import forms, models +from bookwyrm.views.helpers import get_mergeable_object_or_404 # pylint: disable=no-self-use @@ -20,7 +22,7 @@ class BookFileLinks(View): def get(self, request, book_id): """view links""" - book = get_object_or_404(models.Edition, id=book_id) + book = get_mergeable_object_or_404(models.Edition, id=book_id) annotated_links = get_annotated_links(book) data = {"book": book, "links": annotated_links} @@ -36,7 +38,7 @@ class BookFileLinks(View): # this form shouldn't ever really get here, since it's just a dropdown # get the data again rather than redirecting - book = get_object_or_404(models.Edition, id=book_id) + book = get_mergeable_object_or_404(models.Edition, id=book_id) annotated_links = get_annotated_links(book, form=form) data = {"book": book, "links": annotated_links} @@ -75,7 +77,7 @@ class AddFileLink(View): def get(self, request, book_id): """Create link form""" - book = get_object_or_404(models.Edition, id=book_id) + book = get_mergeable_object_or_404(models.Edition, id=book_id) data = { "file_link_form": forms.FileLinkForm(), "book": book, @@ -85,7 +87,9 @@ class AddFileLink(View): @transaction.atomic def post(self, request, book_id, link_id=None): """Add a link to a copy of the book you can read""" - book = get_object_or_404(models.Book.objects.select_subclasses(), id=book_id) + book = get_mergeable_object_or_404( + models.Book.objects.select_subclasses(), id=book_id + ) link = get_object_or_404(models.FileLink, id=link_id) if link_id else None form = forms.FileLinkForm(request.POST, instance=link) if not form.is_valid(): diff --git a/bookwyrm/views/books/series.py b/bookwyrm/views/books/series.py index bdc8dccab..eb3a2a04f 100644 --- a/bookwyrm/views/books/series.py +++ b/bookwyrm/views/books/series.py @@ -1,10 +1,10 @@ """ books belonging to the same series """ + from sys import float_info from django.views import View -from django.shortcuts import get_object_or_404 from django.template.response import TemplateResponse -from bookwyrm.views.helpers import is_api_request +from bookwyrm.views.helpers import is_api_request, get_mergeable_object_or_404 from bookwyrm import models @@ -27,7 +27,7 @@ class BookSeriesBy(View): if is_api_request(request): pass - author = get_object_or_404(models.Author, id=author_id) + author = get_mergeable_object_or_404(models.Author, id=author_id) results = models.Edition.objects.filter(authors=author, series=series_name) @@ -56,9 +56,11 @@ class BookSeriesBy(View): sorted(numbered_books, key=sort_by_series) + sorted( dated_books, - key=lambda book: book.first_published_date - if book.first_published_date - else book.published_date, + key=lambda book: ( + book.first_published_date + if book.first_published_date + else book.published_date + ), ) + sorted( unsortable_books, diff --git a/bookwyrm/views/get_started.py b/bookwyrm/views/get_started.py index 511a886ca..9a28dfbca 100644 --- a/bookwyrm/views/get_started.py +++ b/bookwyrm/views/get_started.py @@ -1,4 +1,5 @@ """ Helping new users figure out the lay of the land """ + import re from django.contrib.auth.decorators import login_required @@ -13,6 +14,7 @@ from django.views import View from bookwyrm import book_search, forms, models from bookwyrm.settings import INSTANCE_ACTOR_USERNAME from bookwyrm.suggested_users import suggested_users +from bookwyrm.views.helpers import get_mergeable_object_or_404 from .preferences.edit_user import save_user_form @@ -80,8 +82,8 @@ class GetStartedBooks(View): for k, v in request.POST.items() if re.match(r"\d+", k) and re.match(r"\d+", v) ] - for (book_id, shelf_id) in shelve_actions: - book = get_object_or_404(models.Edition, id=book_id) + for book_id, shelf_id in shelve_actions: + book = get_mergeable_object_or_404(models.Edition, id=book_id) shelf = get_object_or_404(models.Shelf, id=shelf_id) models.ShelfBook.objects.create(book=book, shelf=shelf, user=request.user) diff --git a/bookwyrm/views/helpers.py b/bookwyrm/views/helpers.py index 60d950354..5bbb05033 100644 --- a/bookwyrm/views/helpers.py +++ b/bookwyrm/views/helpers.py @@ -1,4 +1,5 @@ """ helper functions used in various views """ + import re from datetime import datetime, timedelta import dateutil.parser @@ -8,7 +9,7 @@ from dateutil.parser import ParserError from requests import HTTPError from django.db.models import Q from django.conf import settings as django_settings -from django.shortcuts import redirect +from django.shortcuts import redirect, _get_queryset from django.http import Http404 from django.utils import translation @@ -232,3 +233,19 @@ def redirect_to_referer(request, *args, **kwargs): # if not, use the args passed you'd normally pass to redirect() return redirect(*args or "/", **kwargs) + + +# pylint: disable=redefined-builtin,invalid-name +def get_mergeable_object_or_404(klass, id): + """variant of get_object_or_404 that also redirects if id has been merged + into another object""" + queryset = _get_queryset(klass) + try: + return queryset.get(pk=id) + except queryset.model.DoesNotExist: + try: + return queryset.get(absorbed__deleted_id=id) + except queryset.model.DoesNotExist: + pass + + raise Http404(f"No {queryset.model} with ID {id} exists") diff --git a/bookwyrm/views/reading.py b/bookwyrm/views/reading.py index 2ce59b096..478d27990 100644 --- a/bookwyrm/views/reading.py +++ b/bookwyrm/views/reading.py @@ -1,4 +1,5 @@ """ the good stuff! the books! """ + import logging from django.contrib.auth.decorators import login_required from django.core.cache import cache @@ -11,6 +12,7 @@ from django.views import View from django.views.decorators.http import require_POST from bookwyrm import forms, models +from bookwyrm.views.helpers import get_mergeable_object_or_404 from bookwyrm.views.shelf.shelf_actions import unshelve from .status import CreateStatus from .helpers import get_edition, handle_reading_status, is_api_request @@ -130,7 +132,7 @@ class ReadThrough(View): def get(self, request, book_id, readthrough_id=None): """standalone form in case of errors""" - book = get_object_or_404(models.Edition, id=book_id) + book = get_mergeable_object_or_404(models.Edition, id=book_id) form = forms.ReadThroughForm() data = {"form": form, "book": book} if readthrough_id: @@ -152,7 +154,7 @@ class ReadThrough(View): ) form = forms.ReadThroughForm(request.POST) if not form.is_valid(): - book = get_object_or_404(models.Edition, id=book_id) + book = get_mergeable_object_or_404(models.Edition, id=book_id) data = {"form": form, "book": book} if request.POST.get("id"): data["readthrough"] = get_object_or_404( diff --git a/bookwyrm/views/shelf/shelf_actions.py b/bookwyrm/views/shelf/shelf_actions.py index f0f5fa159..d68ea4219 100644 --- a/bookwyrm/views/shelf/shelf_actions.py +++ b/bookwyrm/views/shelf/shelf_actions.py @@ -1,11 +1,12 @@ """ shelf views """ + from django.db import IntegrityError, transaction from django.contrib.auth.decorators import login_required from django.shortcuts import get_object_or_404, redirect from django.views.decorators.http import require_POST from bookwyrm import forms, models -from bookwyrm.views.helpers import redirect_to_referer +from bookwyrm.views.helpers import redirect_to_referer, get_mergeable_object_or_404 @login_required @@ -36,7 +37,7 @@ def delete_shelf(request, shelf_id): @transaction.atomic def shelve(request): """put a book on a user's shelf""" - book = get_object_or_404(models.Edition, id=request.POST.get("book")) + book = get_mergeable_object_or_404(models.Edition, id=request.POST.get("book")) desired_shelf = get_object_or_404( request.user.shelf_set, identifier=request.POST.get("shelf") ) @@ -97,7 +98,7 @@ def shelve(request): def unshelve(request, book_id=False): """remove a book from a user's shelf""" identity = book_id if book_id else request.POST.get("book") - book = get_object_or_404(models.Edition, id=identity) + book = get_mergeable_object_or_404(models.Edition, id=identity) shelf_book = get_object_or_404( models.ShelfBook, book=book, shelf__id=request.POST["shelf"] ) diff --git a/bookwyrm/views/status.py b/bookwyrm/views/status.py index 34b62d0b4..f2f03405f 100644 --- a/bookwyrm/views/status.py +++ b/bookwyrm/views/status.py @@ -1,4 +1,5 @@ """ what are we here for if not for posting """ + import re import logging @@ -19,6 +20,7 @@ from markdown import markdown from bookwyrm import forms, models from bookwyrm.models.report import DELETE_ITEM from bookwyrm.utils import regex, sanitizer +from bookwyrm.views.helpers import get_mergeable_object_or_404 from .helpers import handle_remote_webfinger, is_api_request from .helpers import load_date_in_user_tz_as_utc, redirect_to_referer @@ -52,7 +54,7 @@ class CreateStatus(View): def get(self, request, status_type): # pylint: disable=unused-argument """compose view (...not used?)""" - book = get_object_or_404(models.Edition, id=request.GET.get("book")) + book = get_mergeable_object_or_404(models.Edition, id=request.GET.get("book")) data = {"book": book} return TemplateResponse(request, "compose.html", data) @@ -98,7 +100,7 @@ class CreateStatus(View): # inspect the text for user tags content = status.content mentions = find_mentions(request.user, content) - for (_, mention_user) in mentions.items(): + for _, mention_user in mentions.items(): # add them to status mentions fk status.mention_users.add(mention_user) content = format_mentions(content, mentions) @@ -109,7 +111,7 @@ class CreateStatus(View): # inspect the text for hashtags hashtags = find_or_create_hashtags(content) - for (_, mention_hashtag) in hashtags.items(): + for _, mention_hashtag in hashtags.items(): # add them to status mentions fk status.mention_hashtags.add(mention_hashtag) content = format_hashtags(content, hashtags) @@ -140,7 +142,7 @@ class CreateStatus(View): def format_mentions(content, mentions): """Detect @mentions and make them links""" - for (mention_text, mention_user) in mentions.items(): + for mention_text, mention_user in mentions.items(): # turn the mention into a link content = re.sub( rf"(?