Merge pull request #2821 from bpeel/merge-commands

Add management commands to merge a pair of editions or authors
This commit is contained in:
Mouse Reeve 2023-04-25 16:27:12 -07:00 committed by GitHub
commit 8fa89f5ece
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
5 changed files with 105 additions and 36 deletions

View file

@ -3,38 +3,7 @@ merge book data objects """
from django.core.management.base import BaseCommand
from django.db.models import Count
from bookwyrm import models
def update_related(canonical, obj):
"""update all the models with fk to the object being removed"""
# move related models to canonical
related_models = [
(r.remote_field.name, r.related_model) for r in canonical._meta.related_objects
]
for (related_field, related_model) in related_models:
related_objs = related_model.objects.filter(**{related_field: obj})
for related_obj in related_objs:
print("replacing in", related_model.__name__, related_field, related_obj.id)
try:
setattr(related_obj, related_field, canonical)
related_obj.save()
except TypeError:
getattr(related_obj, related_field).add(canonical)
getattr(related_obj, related_field).remove(obj)
def copy_data(canonical, obj):
"""try to get the most data possible"""
for data_field in obj._meta.get_fields():
if not hasattr(data_field, "activitypub_field"):
continue
data_value = getattr(obj, data_field.name)
if not data_value:
continue
if not getattr(canonical, data_field.name):
print("setting data field", data_field.name, data_value)
setattr(canonical, data_field.name, data_value)
canonical.save()
from bookwyrm.management.merge import merge_objects
def dedupe_model(model):
@ -61,10 +30,7 @@ def dedupe_model(model):
print("keeping", canonical.remote_id)
for obj in objs[1:]:
print(obj.remote_id)
copy_data(canonical, obj)
update_related(canonical, obj)
# remove the outdated entry
obj.delete()
merge_objects(canonical, obj)
class Command(BaseCommand):

View file

@ -0,0 +1,12 @@
""" PROCEED WITH CAUTION: uses deduplication fields to permanently
merge author data objects """
from bookwyrm import models
from bookwyrm.management.merge_command import MergeCommand
class Command(MergeCommand):
"""merges two authors by ID"""
help = "merges specified authors into one"
MODEL = models.Author

View file

@ -0,0 +1,12 @@
""" PROCEED WITH CAUTION: uses deduplication fields to permanently
merge edition data objects """
from bookwyrm import models
from bookwyrm.management.merge_command import MergeCommand
class Command(MergeCommand):
"""merges two editions by ID"""
help = "merges specified editions into one"
MODEL = models.Edition

View file

@ -0,0 +1,50 @@
from django.db.models import ManyToManyField
def update_related(canonical, obj):
"""update all the models with fk to the object being removed"""
# move related models to canonical
related_models = [
(r.remote_field.name, r.related_model) for r in canonical._meta.related_objects
]
for (related_field, related_model) in related_models:
# Skip the ManyToMany fields that arent auto-created. These
# should have a corresponding OneToMany field in the model for
# the linking table anyway. If we update it through that model
# instead then we wont lose the extra fields in the linking
# table.
related_field_obj = related_model._meta.get_field(related_field)
if isinstance(related_field_obj, ManyToManyField):
through = related_field_obj.remote_field.through
if not through._meta.auto_created:
continue
related_objs = related_model.objects.filter(**{related_field: obj})
for related_obj in related_objs:
print("replacing in", related_model.__name__, related_field, related_obj.id)
try:
setattr(related_obj, related_field, canonical)
related_obj.save()
except TypeError:
getattr(related_obj, related_field).add(canonical)
getattr(related_obj, related_field).remove(obj)
def copy_data(canonical, obj):
"""try to get the most data possible"""
for data_field in obj._meta.get_fields():
if not hasattr(data_field, "activitypub_field"):
continue
data_value = getattr(obj, data_field.name)
if not data_value:
continue
if not getattr(canonical, data_field.name):
print("setting data field", data_field.name, data_value)
setattr(canonical, data_field.name, data_value)
canonical.save()
def merge_objects(canonical, obj):
copy_data(canonical, obj)
update_related(canonical, obj)
# remove the outdated entry
obj.delete()

View file

@ -0,0 +1,29 @@
from bookwyrm.management.merge import merge_objects
from django.core.management.base import BaseCommand
class MergeCommand(BaseCommand):
"""base class for merge commands"""
def add_arguments(self, parser):
"""add the arguments for this command"""
parser.add_argument("--canonical", type=int, required=True)
parser.add_argument("--other", type=int, required=True)
# pylint: disable=no-self-use,unused-argument
def handle(self, *args, **options):
"""merge the two objects"""
model = self.MODEL
try:
canonical = model.objects.get(id=options["canonical"])
except model.DoesNotExist:
print("canonical book doesnt exist!")
return
try:
other = model.objects.get(id=options["other"])
except model.DoesNotExist:
print("other book doesnt exist!")
return
merge_objects(canonical, other)