Merge pull request #3299 from Minnozz/absorb

Track which Author/Work/Edition a duplicate has been merged into
This commit is contained in:
Mouse Reeve 2024-04-09 05:55:44 -05:00 committed by GitHub
commit 73630331d1
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
19 changed files with 405 additions and 116 deletions

View file

@ -1,13 +1,14 @@
""" PROCEED WITH CAUTION: uses deduplication fields to permanently
merge book data objects """
from django.core.management.base import BaseCommand
from django.db.models import Count
from bookwyrm import models
from bookwyrm.management.merge import merge_objects
def dedupe_model(model):
def dedupe_model(model, dry_run=False):
"""combine duplicate editions and update related models"""
print(f"deduplicating {model.__name__}:")
fields = model._meta.get_fields()
dedupe_fields = [
f for f in fields if hasattr(f, "deduplication_field") and f.deduplication_field
@ -16,30 +17,42 @@ def dedupe_model(model):
dupes = (
model.objects.values(field.name)
.annotate(Count(field.name))
.filter(**{"%s__count__gt" % field.name: 1})
.filter(**{f"{field.name}__count__gt": 1})
.exclude(**{field.name: ""})
.exclude(**{f"{field.name}__isnull": True})
)
for dupe in dupes:
value = dupe[field.name]
if not value or value == "":
continue
print("----------")
print(dupe)
objs = model.objects.filter(**{field.name: value}).order_by("id")
canonical = objs.first()
print("keeping", canonical.remote_id)
action = "would merge" if dry_run else "merging"
print(
f"{action} into {model.__name__} {canonical.remote_id} based on {field.name} {value}:"
)
for obj in objs[1:]:
print(obj.remote_id)
merge_objects(canonical, obj)
print(f"- {obj.remote_id}")
absorbed_fields = obj.merge_into(canonical, dry_run=dry_run)
print(f" absorbed fields: {absorbed_fields}")
class Command(BaseCommand):
"""deduplicate allllll the book data models"""
help = "merges duplicate book data"
def add_arguments(self, parser):
"""add the arguments for this command"""
parser.add_argument(
"--dry_run",
action="store_true",
help="don't actually merge, only print what would happen",
)
# pylint: disable=no-self-use,unused-argument
def handle(self, *args, **options):
"""run deduplications"""
dedupe_model(models.Edition)
dedupe_model(models.Work)
dedupe_model(models.Author)
dedupe_model(models.Edition, dry_run=options["dry_run"])
dedupe_model(models.Work, dry_run=options["dry_run"])
dedupe_model(models.Author, dry_run=options["dry_run"])

View file

@ -1,50 +0,0 @@
from django.db.models import ManyToManyField
def update_related(canonical, obj):
"""update all the models with fk to the object being removed"""
# move related models to canonical
related_models = [
(r.remote_field.name, r.related_model) for r in canonical._meta.related_objects
]
for (related_field, related_model) in related_models:
# Skip the ManyToMany fields that arent auto-created. These
# should have a corresponding OneToMany field in the model for
# the linking table anyway. If we update it through that model
# instead then we wont lose the extra fields in the linking
# table.
related_field_obj = related_model._meta.get_field(related_field)
if isinstance(related_field_obj, ManyToManyField):
through = related_field_obj.remote_field.through
if not through._meta.auto_created:
continue
related_objs = related_model.objects.filter(**{related_field: obj})
for related_obj in related_objs:
print("replacing in", related_model.__name__, related_field, related_obj.id)
try:
setattr(related_obj, related_field, canonical)
related_obj.save()
except TypeError:
getattr(related_obj, related_field).add(canonical)
getattr(related_obj, related_field).remove(obj)
def copy_data(canonical, obj):
"""try to get the most data possible"""
for data_field in obj._meta.get_fields():
if not hasattr(data_field, "activitypub_field"):
continue
data_value = getattr(obj, data_field.name)
if not data_value:
continue
if not getattr(canonical, data_field.name):
print("setting data field", data_field.name, data_value)
setattr(canonical, data_field.name, data_value)
canonical.save()
def merge_objects(canonical, obj):
copy_data(canonical, obj)
update_related(canonical, obj)
# remove the outdated entry
obj.delete()

View file

@ -1,4 +1,3 @@
from bookwyrm.management.merge import merge_objects
from django.core.management.base import BaseCommand
@ -9,6 +8,11 @@ class MergeCommand(BaseCommand):
"""add the arguments for this command"""
parser.add_argument("--canonical", type=int, required=True)
parser.add_argument("--other", type=int, required=True)
parser.add_argument(
"--dry_run",
action="store_true",
help="don't actually merge, only print what would happen",
)
# pylint: disable=no-self-use,unused-argument
def handle(self, *args, **options):
@ -26,4 +30,8 @@ class MergeCommand(BaseCommand):
print("other book doesnt exist!")
return
merge_objects(canonical, other)
absorbed_fields = other.merge_into(canonical, dry_run=options["dry_run"])
action = "would be" if options["dry_run"] else "has been"
print(f"{other.remote_id} {action} merged into {canonical.remote_id}")
print(f"absorbed fields: {absorbed_fields}")

View file

@ -0,0 +1,48 @@
# Generated by Django 3.2.24 on 2024-02-28 21:30
from django.db import migrations, models
import django.db.models.deletion
class Migration(migrations.Migration):
dependencies = [
("bookwyrm", "0196_merge_pr3134_into_main"),
]
operations = [
migrations.CreateModel(
name="MergedBook",
fields=[
("deleted_id", models.IntegerField(primary_key=True, serialize=False)),
(
"merged_into",
models.ForeignKey(
on_delete=django.db.models.deletion.PROTECT,
related_name="absorbed",
to="bookwyrm.book",
),
),
],
options={
"abstract": False,
},
),
migrations.CreateModel(
name="MergedAuthor",
fields=[
("deleted_id", models.IntegerField(primary_key=True, serialize=False)),
(
"merged_into",
models.ForeignKey(
on_delete=django.db.models.deletion.PROTECT,
related_name="absorbed",
to="bookwyrm.author",
),
),
],
options={
"abstract": False,
},
),
]

View file

@ -1,4 +1,5 @@
""" database schema for info about authors """
import re
from typing import Tuple, Any
@ -10,13 +11,15 @@ from bookwyrm import activitypub
from bookwyrm.settings import DOMAIN
from bookwyrm.utils.db import format_trigger
from .book import BookDataModel
from .book import BookDataModel, MergedAuthor
from . import fields
class Author(BookDataModel):
"""basic biographic info"""
merged_model = MergedAuthor
wikipedia_link = fields.CharField(
max_length=255, blank=True, null=True, deduplication_field=True
)

View file

@ -1,13 +1,15 @@
""" database schema for books and shelves """
from itertools import chain
import re
from typing import Any
from typing import Any, Dict
from typing_extensions import Self
from django.contrib.postgres.search import SearchVectorField
from django.contrib.postgres.indexes import GinIndex
from django.core.cache import cache
from django.db import models, transaction
from django.db.models import Prefetch
from django.db.models import Prefetch, ManyToManyField
from django.dispatch import receiver
from django.utils.translation import gettext_lazy as _
from model_utils import FieldTracker
@ -108,10 +110,115 @@ class BookDataModel(ObjectMixin, BookWyrmModel):
"""only send book data updates to other bookwyrm instances"""
super().broadcast(activity, sender, software=software, **kwargs)
def merge_into(self, canonical: Self, dry_run=False) -> Dict[str, Any]:
"""merge this entity into another entity"""
if canonical.id == self.id:
raise ValueError(f"Cannot merge {self} into itself")
absorbed_fields = canonical.absorb_data_from(self, dry_run=dry_run)
if dry_run:
return absorbed_fields
canonical.save()
self.merged_model.objects.create(deleted_id=self.id, merged_into=canonical)
# move related models to canonical
related_models = [
(r.remote_field.name, r.related_model) for r in self._meta.related_objects
]
# pylint: disable=protected-access
for related_field, related_model in related_models:
# Skip the ManyToMany fields that arent auto-created. These
# should have a corresponding OneToMany field in the model for
# the linking table anyway. If we update it through that model
# instead then we wont lose the extra fields in the linking
# table.
# pylint: disable=protected-access
related_field_obj = related_model._meta.get_field(related_field)
if isinstance(related_field_obj, ManyToManyField):
through = related_field_obj.remote_field.through
if not through._meta.auto_created:
continue
related_objs = related_model.objects.filter(**{related_field: self})
for related_obj in related_objs:
try:
setattr(related_obj, related_field, canonical)
related_obj.save()
except TypeError:
getattr(related_obj, related_field).add(canonical)
getattr(related_obj, related_field).remove(self)
self.delete()
return absorbed_fields
def absorb_data_from(self, other: Self, dry_run=False) -> Dict[str, Any]:
"""fill empty fields with values from another entity"""
absorbed_fields = {}
for data_field in self._meta.get_fields():
if not hasattr(data_field, "activitypub_field"):
continue
canonical_value = getattr(self, data_field.name)
other_value = getattr(other, data_field.name)
if not other_value:
continue
if isinstance(data_field, fields.ArrayField):
if new_values := list(set(other_value) - set(canonical_value)):
# append at the end (in no particular order)
if not dry_run:
setattr(self, data_field.name, canonical_value + new_values)
absorbed_fields[data_field.name] = new_values
elif isinstance(data_field, fields.PartialDateField):
if (
(not canonical_value)
or (other_value.has_day and not canonical_value.has_day)
or (other_value.has_month and not canonical_value.has_month)
):
if not dry_run:
setattr(self, data_field.name, other_value)
absorbed_fields[data_field.name] = other_value
else:
if not canonical_value:
if not dry_run:
setattr(self, data_field.name, other_value)
absorbed_fields[data_field.name] = other_value
return absorbed_fields
class MergedBookDataModel(models.Model):
"""a BookDataModel instance that has been merged into another instance. kept
to be able to redirect old URLs"""
deleted_id = models.IntegerField(primary_key=True)
class Meta:
"""abstract just like BookDataModel"""
abstract = True
class MergedBook(MergedBookDataModel):
"""an Book that has been merged into another one"""
merged_into = models.ForeignKey(
"Book", on_delete=models.PROTECT, related_name="absorbed"
)
class MergedAuthor(MergedBookDataModel):
"""an Author that has been merged into another one"""
merged_into = models.ForeignKey(
"Author", on_delete=models.PROTECT, related_name="absorbed"
)
class Book(BookDataModel):
"""a generic book, which can mean either an edition or a work"""
merged_model = MergedBook
connector = models.ForeignKey("Connector", on_delete=models.PROTECT, null=True)
# book/work metadata
@ -192,9 +299,13 @@ class Book(BookDataModel):
"""properties of this edition, as a string"""
items = [
self.physical_format if hasattr(self, "physical_format") else None,
f"{self.languages[0]} language"
if self.languages and self.languages[0] and self.languages[0] != "English"
else None,
(
f"{self.languages[0]} language"
if self.languages
and self.languages[0]
and self.languages[0] != "English"
else None
),
str(self.published_date.year) if self.published_date else None,
", ".join(self.publishers) if hasattr(self, "publishers") else None,
]

View file

@ -0,0 +1,97 @@
"""test merging Authors, Works and Editions"""
from django.test import TestCase
from django.test.client import Client
from bookwyrm import models
class MergeBookDataModel(TestCase):
"""test merging of subclasses of BookDataModel"""
@classmethod
def setUpTestData(cls): # pylint: disable=invalid-name
"""shared data"""
models.SiteSettings.objects.create()
cls.jrr_tolkien = models.Author.objects.create(
name="J.R.R. Tolkien",
aliases=["JRR Tolkien", "Tolkien"],
bio="This guy wrote about hobbits and stuff.",
openlibrary_key="OL26320A",
isni="0000000121441970",
)
cls.jrr_tolkien_2 = models.Author.objects.create(
name="J.R.R. Tolkien",
aliases=["JRR Tolkien", "John Ronald Reuel Tolkien"],
openlibrary_key="OL26320A",
isni="wrong",
wikidata="Q892",
)
cls.jrr_tolkien_2_id = cls.jrr_tolkien_2.id
# perform merges
cls.jrr_tolkien_absorbed_fields = cls.jrr_tolkien_2.merge_into(cls.jrr_tolkien)
def test_merged_author(self):
"""verify merged author after merge"""
self.assertEqual(self.jrr_tolkien_2.id, None, msg="duplicate should be deleted")
def test_canonical_author(self):
"""verify canonical author data after merge"""
self.assertFalse(
self.jrr_tolkien.id is None, msg="canonical should not be deleted"
)
# identical in canonical and duplicate; should be unchanged
self.assertEqual(self.jrr_tolkien.name, "J.R.R. Tolkien")
self.assertEqual(self.jrr_tolkien.openlibrary_key, "OL26320A")
# present in canonical and absent in duplicate; should be unchanged
self.assertEqual(
self.jrr_tolkien.bio, "This guy wrote about hobbits and stuff."
)
# absent in canonical and present in duplicate; should be absorbed
self.assertEqual(self.jrr_tolkien.wikidata, "Q892")
# scalar value that is different in canonical and duplicate; should be unchanged
self.assertEqual(self.jrr_tolkien.isni, "0000000121441970")
# set value with both matching and non-matching elements; should be the
# union of canonical and duplicate
self.assertEqual(
self.jrr_tolkien.aliases,
[
"JRR Tolkien",
"Tolkien",
"John Ronald Reuel Tolkien",
],
)
def test_merged_author_redirect(self):
"""a web request for a merged author should redirect to the canonical author"""
client = Client()
response = client.get(
f"/author/{self.jrr_tolkien_2_id}/s/jrr-tolkien", follow=True
)
self.assertEqual(response.redirect_chain, [(self.jrr_tolkien.local_path, 301)])
def test_merged_author_activitypub(self):
"""an activitypub request for a merged author should return the data for
the canonical author (including the canonical id)"""
client = Client(HTTP_ACCEPT="application/json")
response = client.get(f"/author/{self.jrr_tolkien_2_id}")
self.assertEqual(response.status_code, 200)
self.assertEqual(response.json(), self.jrr_tolkien.to_activity())
def test_absorbed_fields(self):
"""reported absorbed_fields should be accurate for --dry_run"""
self.assertEqual(
self.jrr_tolkien_absorbed_fields,
{
"aliases": ["John Ronald Reuel Tolkien"],
"wikidata": "Q892",
},
)

View file

@ -67,6 +67,14 @@ class PartialDate(datetime):
# current_timezone and default_timezone.
return cls.from_datetime(datetime(year, month, day, tzinfo=_westmost_tz))
def __eq__(self, other: object) -> bool:
if not isinstance(other, PartialDate):
return NotImplemented
return self.partial_isoformat() == other.partial_isoformat()
def __repr__(self) -> str:
return f"<{self.__class__.__name__} object: {self.partial_isoformat()}>"
class MonthParts(PartialDate):
"""a date bound into month precision"""

View file

@ -1,4 +1,5 @@
""" the good people stuff! the authors! """
from django.contrib.auth.decorators import login_required, permission_required
from django.core.paginator import Paginator
from django.shortcuts import get_object_or_404, redirect
@ -11,7 +12,11 @@ from bookwyrm import forms, models
from bookwyrm.activitypub import ActivitypubResponse
from bookwyrm.connectors import connector_manager
from bookwyrm.settings import PAGE_LENGTH
from bookwyrm.views.helpers import is_api_request, maybe_redirect_local_path
from bookwyrm.views.helpers import (
is_api_request,
get_mergeable_object_or_404,
maybe_redirect_local_path,
)
# pylint: disable= no-self-use
@ -21,7 +26,7 @@ class Author(View):
# pylint: disable=unused-argument
def get(self, request, author_id, slug=None):
"""landing page for an author"""
author = get_object_or_404(models.Author, id=author_id)
author = get_mergeable_object_or_404(models.Author, id=author_id)
if is_api_request(request):
return ActivitypubResponse(author.to_activity())
@ -56,13 +61,13 @@ class EditAuthor(View):
def get(self, request, author_id):
"""info about a book"""
author = get_object_or_404(models.Author, id=author_id)
author = get_mergeable_object_or_404(models.Author, id=author_id)
data = {"author": author, "form": forms.AuthorForm(instance=author)}
return TemplateResponse(request, "author/edit_author.html", data)
def post(self, request, author_id):
"""edit a author cool"""
author = get_object_or_404(models.Author, id=author_id)
author = get_mergeable_object_or_404(models.Author, id=author_id)
form = forms.AuthorForm(request.POST, request.FILES, instance=author)
if not form.is_valid():
@ -82,7 +87,7 @@ def update_author_from_remote(request, author_id, connector_identifier):
connector = connector_manager.load_connector(
get_object_or_404(models.Connector, identifier=connector_identifier)
)
author = get_object_or_404(models.Author, id=author_id)
author = get_mergeable_object_or_404(models.Author, id=author_id)
connector.update_author_from_remote(author)

View file

@ -1,4 +1,5 @@
""" the good stuff! the books! """
from uuid import uuid4
from django.contrib.auth.decorators import login_required, permission_required
@ -15,7 +16,11 @@ from bookwyrm.activitypub import ActivitypubResponse
from bookwyrm.connectors import connector_manager, ConnectorException
from bookwyrm.connectors.abstract_connector import get_image
from bookwyrm.settings import PAGE_LENGTH
from bookwyrm.views.helpers import is_api_request, maybe_redirect_local_path
from bookwyrm.views.helpers import (
is_api_request,
maybe_redirect_local_path,
get_mergeable_object_or_404,
)
# pylint: disable=no-self-use
@ -40,7 +45,11 @@ class Book(View):
# table, so they never have clashing IDs
book = (
models.Edition.viewer_aware_objects(request.user)
.filter(Q(id=book_id) | Q(parent_work__id=book_id))
.filter(
Q(id=book_id)
| Q(parent_work__id=book_id)
| Q(absorbed__deleted_id=book_id)
)
.order_by("-edition_rank")
.select_related("parent_work")
.prefetch_related("authors", "file_links")
@ -82,11 +91,13 @@ class Book(View):
"book": book,
"statuses": paginated.get_page(request.GET.get("page")),
"review_count": reviews.count(),
"ratings": reviews.filter(
Q(content__isnull=True) | Q(content="")
).select_related("user")
if not user_statuses
else None,
"ratings": (
reviews.filter(Q(content__isnull=True) | Q(content="")).select_related(
"user"
)
if not user_statuses
else None
),
"rating": reviews.aggregate(Avg("rating"))["rating__avg"],
"lists": lists,
"update_error": kwargs.get("update_error", False),
@ -130,7 +141,7 @@ class Book(View):
@require_POST
def upload_cover(request, book_id):
"""upload a new cover"""
book = get_object_or_404(models.Edition, id=book_id)
book = get_mergeable_object_or_404(models.Edition, id=book_id)
book.last_edited_by = request.user
url = request.POST.get("cover-url")
@ -168,7 +179,7 @@ def set_cover_from_url(url):
@permission_required("bookwyrm.edit_book", raise_exception=True)
def add_description(request, book_id):
"""upload a new cover"""
book = get_object_or_404(models.Edition, id=book_id)
book = get_mergeable_object_or_404(models.Edition, id=book_id)
description = request.POST.get("description")
@ -199,7 +210,9 @@ def update_book_from_remote(request, book_id, connector_identifier):
connector = connector_manager.load_connector(
get_object_or_404(models.Connector, identifier=connector_identifier)
)
book = get_object_or_404(models.Book.objects.select_subclasses(), id=book_id)
book = get_mergeable_object_or_404(
models.Book.objects.select_subclasses(), id=book_id
)
try:
connector.update_book_from_remote(book)

View file

@ -1,4 +1,5 @@
""" the good stuff! the books! """
from re import sub, findall
from django.contrib.auth.decorators import login_required, permission_required
from django.contrib.postgres.search import SearchRank, SearchVector
@ -18,9 +19,10 @@ from bookwyrm.utils.isni import (
build_author_from_isni,
augment_author_metadata,
)
from bookwyrm.views.helpers import get_edition
from bookwyrm.views.helpers import get_edition, get_mergeable_object_or_404
from .books import set_cover_from_url
# pylint: disable=no-self-use
@method_decorator(login_required, name="dispatch")
@method_decorator(
@ -42,7 +44,7 @@ class EditBook(View):
def post(self, request, book_id):
"""edit a book cool"""
book = get_object_or_404(models.Edition, id=book_id)
book = get_mergeable_object_or_404(models.Edition, id=book_id)
form = forms.EditionForm(request.POST, request.FILES, instance=book)
@ -130,7 +132,7 @@ class CreateBook(View):
with transaction.atomic():
book = form.save(request)
parent_work = get_object_or_404(models.Work, id=parent_work_id)
parent_work = get_mergeable_object_or_404(models.Work, id=parent_work_id)
book.parent_work = parent_work
if authors:
@ -295,7 +297,7 @@ class ConfirmEditBook(View):
if not book.parent_work:
work_match = request.POST.get("parent_work")
if work_match and work_match != "0":
work = get_object_or_404(models.Work, id=work_match)
work = get_mergeable_object_or_404(models.Work, id=work_match)
else:
work = models.Work.objects.create(title=form.cleaned_data["title"])
work.authors.set(book.authors.all())

View file

@ -1,4 +1,5 @@
""" the good stuff! the books! """
from functools import reduce
import operator
@ -7,7 +8,7 @@ from django.core.cache import cache as django_cache
from django.core.paginator import Paginator
from django.db import transaction
from django.db.models import Q
from django.shortcuts import get_object_or_404, redirect
from django.shortcuts import redirect
from django.template.response import TemplateResponse
from django.views import View
from django.views.decorators.http import require_POST
@ -15,7 +16,7 @@ from django.views.decorators.http import require_POST
from bookwyrm import forms, models
from bookwyrm.activitypub import ActivitypubResponse
from bookwyrm.settings import PAGE_LENGTH
from bookwyrm.views.helpers import is_api_request
from bookwyrm.views.helpers import is_api_request, get_mergeable_object_or_404
# pylint: disable=no-self-use
@ -24,7 +25,7 @@ class Editions(View):
def get(self, request, book_id):
"""list of editions of a book"""
work = get_object_or_404(models.Work, id=book_id)
work = get_mergeable_object_or_404(models.Work, id=book_id)
if is_api_request(request):
return ActivitypubResponse(work.to_edition_list(**request.GET))
@ -83,7 +84,7 @@ class Editions(View):
def switch_edition(request):
"""switch your copy of a book to a different edition"""
edition_id = request.POST.get("edition")
new_edition = get_object_or_404(models.Edition, id=edition_id)
new_edition = get_mergeable_object_or_404(models.Edition, id=edition_id)
shelfbooks = models.ShelfBook.objects.filter(
book__parent_work=new_edition.parent_work, shelf__user=request.user
)

View file

@ -1,4 +1,5 @@
""" the good stuff! the books! """
from django.contrib.auth.decorators import login_required, permission_required
from django.db import transaction
from django.shortcuts import get_object_or_404, redirect
@ -8,6 +9,7 @@ from django.utils.decorators import method_decorator
from django.views.decorators.http import require_POST
from bookwyrm import forms, models
from bookwyrm.views.helpers import get_mergeable_object_or_404
# pylint: disable=no-self-use
@ -20,7 +22,7 @@ class BookFileLinks(View):
def get(self, request, book_id):
"""view links"""
book = get_object_or_404(models.Edition, id=book_id)
book = get_mergeable_object_or_404(models.Edition, id=book_id)
annotated_links = get_annotated_links(book)
data = {"book": book, "links": annotated_links}
@ -36,7 +38,7 @@ class BookFileLinks(View):
# this form shouldn't ever really get here, since it's just a dropdown
# get the data again rather than redirecting
book = get_object_or_404(models.Edition, id=book_id)
book = get_mergeable_object_or_404(models.Edition, id=book_id)
annotated_links = get_annotated_links(book, form=form)
data = {"book": book, "links": annotated_links}
@ -75,7 +77,7 @@ class AddFileLink(View):
def get(self, request, book_id):
"""Create link form"""
book = get_object_or_404(models.Edition, id=book_id)
book = get_mergeable_object_or_404(models.Edition, id=book_id)
data = {
"file_link_form": forms.FileLinkForm(),
"book": book,
@ -85,7 +87,9 @@ class AddFileLink(View):
@transaction.atomic
def post(self, request, book_id, link_id=None):
"""Add a link to a copy of the book you can read"""
book = get_object_or_404(models.Book.objects.select_subclasses(), id=book_id)
book = get_mergeable_object_or_404(
models.Book.objects.select_subclasses(), id=book_id
)
link = get_object_or_404(models.FileLink, id=link_id) if link_id else None
form = forms.FileLinkForm(request.POST, instance=link)
if not form.is_valid():

View file

@ -1,10 +1,10 @@
""" books belonging to the same series """
from sys import float_info
from django.views import View
from django.shortcuts import get_object_or_404
from django.template.response import TemplateResponse
from bookwyrm.views.helpers import is_api_request
from bookwyrm.views.helpers import is_api_request, get_mergeable_object_or_404
from bookwyrm import models
@ -27,7 +27,7 @@ class BookSeriesBy(View):
if is_api_request(request):
pass
author = get_object_or_404(models.Author, id=author_id)
author = get_mergeable_object_or_404(models.Author, id=author_id)
results = models.Edition.objects.filter(authors=author, series=series_name)
@ -56,9 +56,11 @@ class BookSeriesBy(View):
sorted(numbered_books, key=sort_by_series)
+ sorted(
dated_books,
key=lambda book: book.first_published_date
if book.first_published_date
else book.published_date,
key=lambda book: (
book.first_published_date
if book.first_published_date
else book.published_date
),
)
+ sorted(
unsortable_books,

View file

@ -1,4 +1,5 @@
""" Helping new users figure out the lay of the land """
import re
from django.contrib.auth.decorators import login_required
@ -13,6 +14,7 @@ from django.views import View
from bookwyrm import book_search, forms, models
from bookwyrm.settings import INSTANCE_ACTOR_USERNAME
from bookwyrm.suggested_users import suggested_users
from bookwyrm.views.helpers import get_mergeable_object_or_404
from .preferences.edit_user import save_user_form
@ -80,8 +82,8 @@ class GetStartedBooks(View):
for k, v in request.POST.items()
if re.match(r"\d+", k) and re.match(r"\d+", v)
]
for (book_id, shelf_id) in shelve_actions:
book = get_object_or_404(models.Edition, id=book_id)
for book_id, shelf_id in shelve_actions:
book = get_mergeable_object_or_404(models.Edition, id=book_id)
shelf = get_object_or_404(models.Shelf, id=shelf_id)
models.ShelfBook.objects.create(book=book, shelf=shelf, user=request.user)

View file

@ -1,4 +1,5 @@
""" helper functions used in various views """
import re
from datetime import datetime, timedelta
import dateutil.parser
@ -8,7 +9,7 @@ from dateutil.parser import ParserError
from requests import HTTPError
from django.db.models import Q
from django.conf import settings as django_settings
from django.shortcuts import redirect
from django.shortcuts import redirect, _get_queryset
from django.http import Http404
from django.utils import translation
@ -232,3 +233,19 @@ def redirect_to_referer(request, *args, **kwargs):
# if not, use the args passed you'd normally pass to redirect()
return redirect(*args or "/", **kwargs)
# pylint: disable=redefined-builtin,invalid-name
def get_mergeable_object_or_404(klass, id):
"""variant of get_object_or_404 that also redirects if id has been merged
into another object"""
queryset = _get_queryset(klass)
try:
return queryset.get(pk=id)
except queryset.model.DoesNotExist:
try:
return queryset.get(absorbed__deleted_id=id)
except queryset.model.DoesNotExist:
pass
raise Http404(f"No {queryset.model} with ID {id} exists")

View file

@ -1,4 +1,5 @@
""" the good stuff! the books! """
import logging
from django.contrib.auth.decorators import login_required
from django.core.cache import cache
@ -11,6 +12,7 @@ from django.views import View
from django.views.decorators.http import require_POST
from bookwyrm import forms, models
from bookwyrm.views.helpers import get_mergeable_object_or_404
from bookwyrm.views.shelf.shelf_actions import unshelve
from .status import CreateStatus
from .helpers import get_edition, handle_reading_status, is_api_request
@ -130,7 +132,7 @@ class ReadThrough(View):
def get(self, request, book_id, readthrough_id=None):
"""standalone form in case of errors"""
book = get_object_or_404(models.Edition, id=book_id)
book = get_mergeable_object_or_404(models.Edition, id=book_id)
form = forms.ReadThroughForm()
data = {"form": form, "book": book}
if readthrough_id:
@ -152,7 +154,7 @@ class ReadThrough(View):
)
form = forms.ReadThroughForm(request.POST)
if not form.is_valid():
book = get_object_or_404(models.Edition, id=book_id)
book = get_mergeable_object_or_404(models.Edition, id=book_id)
data = {"form": form, "book": book}
if request.POST.get("id"):
data["readthrough"] = get_object_or_404(

View file

@ -1,11 +1,12 @@
""" shelf views """
from django.db import IntegrityError, transaction
from django.contrib.auth.decorators import login_required
from django.shortcuts import get_object_or_404, redirect
from django.views.decorators.http import require_POST
from bookwyrm import forms, models
from bookwyrm.views.helpers import redirect_to_referer
from bookwyrm.views.helpers import redirect_to_referer, get_mergeable_object_or_404
@login_required
@ -36,7 +37,7 @@ def delete_shelf(request, shelf_id):
@transaction.atomic
def shelve(request):
"""put a book on a user's shelf"""
book = get_object_or_404(models.Edition, id=request.POST.get("book"))
book = get_mergeable_object_or_404(models.Edition, id=request.POST.get("book"))
desired_shelf = get_object_or_404(
request.user.shelf_set, identifier=request.POST.get("shelf")
)
@ -97,7 +98,7 @@ def shelve(request):
def unshelve(request, book_id=False):
"""remove a book from a user's shelf"""
identity = book_id if book_id else request.POST.get("book")
book = get_object_or_404(models.Edition, id=identity)
book = get_mergeable_object_or_404(models.Edition, id=identity)
shelf_book = get_object_or_404(
models.ShelfBook, book=book, shelf__id=request.POST["shelf"]
)

View file

@ -1,4 +1,5 @@
""" what are we here for if not for posting """
import re
import logging
@ -19,6 +20,7 @@ from markdown import markdown
from bookwyrm import forms, models
from bookwyrm.models.report import DELETE_ITEM
from bookwyrm.utils import regex, sanitizer
from bookwyrm.views.helpers import get_mergeable_object_or_404
from .helpers import handle_remote_webfinger, is_api_request
from .helpers import load_date_in_user_tz_as_utc, redirect_to_referer
@ -52,7 +54,7 @@ class CreateStatus(View):
def get(self, request, status_type): # pylint: disable=unused-argument
"""compose view (...not used?)"""
book = get_object_or_404(models.Edition, id=request.GET.get("book"))
book = get_mergeable_object_or_404(models.Edition, id=request.GET.get("book"))
data = {"book": book}
return TemplateResponse(request, "compose.html", data)
@ -98,7 +100,7 @@ class CreateStatus(View):
# inspect the text for user tags
content = status.content
mentions = find_mentions(request.user, content)
for (_, mention_user) in mentions.items():
for _, mention_user in mentions.items():
# add them to status mentions fk
status.mention_users.add(mention_user)
content = format_mentions(content, mentions)
@ -109,7 +111,7 @@ class CreateStatus(View):
# inspect the text for hashtags
hashtags = find_or_create_hashtags(content)
for (_, mention_hashtag) in hashtags.items():
for _, mention_hashtag in hashtags.items():
# add them to status mentions fk
status.mention_hashtags.add(mention_hashtag)
content = format_hashtags(content, hashtags)
@ -140,7 +142,7 @@ class CreateStatus(View):
def format_mentions(content, mentions):
"""Detect @mentions and make them links"""
for (mention_text, mention_user) in mentions.items():
for mention_text, mention_user in mentions.items():
# turn the mention into a link
content = re.sub(
rf"(?<!/)\B{mention_text}\b(?!@)",
@ -152,7 +154,7 @@ def format_mentions(content, mentions):
def format_hashtags(content, hashtags):
"""Detect #hashtags and make them links"""
for (mention_text, mention_hashtag) in hashtags.items():
for mention_text, mention_hashtag in hashtags.items():
# turn the mention into a link
content = re.sub(
rf"(?<!/)\B{mention_text}\b(?!@)",