Merge pull request #2333 from bookwyrm-social/cancel-imports

Let users and admins cancel imports
This commit is contained in:
Mouse Reeve 2022-11-07 10:35:37 -08:00 committed by GitHub
commit 4e1da6a759
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
25 changed files with 354 additions and 175 deletions

View file

@ -1,15 +1,7 @@
""" handle reading a csv from an external service, defaults are from Goodreads """
import csv
import logging
from django.utils import timezone
from django.utils.translation import gettext_lazy as _
from bookwyrm import models
from bookwyrm.models import ImportJob, ImportItem
from bookwyrm.tasks import app, LOW
logger = logging.getLogger(__name__)
class Importer:
@ -118,127 +110,3 @@ class Importer:
# this will re-normalize the raw data
self.create_item(job, item.index, item.data)
return job
def start_import(self, job): # pylint: disable=no-self-use
"""initalizes a csv import job"""
result = start_import_task.delay(job.id)
job.task_id = result.id
job.save()
@app.task(queue="low_priority")
def start_import_task(job_id):
"""trigger the child tasks for each row"""
job = ImportJob.objects.get(id=job_id)
# these are sub-tasks so that one big task doesn't use up all the memory in celery
for item in job.items.values_list("id", flat=True).all():
import_item_task.delay(item)
@app.task(queue="low_priority")
def import_item_task(item_id):
"""resolve a row into a book"""
item = models.ImportItem.objects.get(id=item_id)
try:
item.resolve()
except Exception as err: # pylint: disable=broad-except
item.fail_reason = _("Error loading book")
item.save()
item.update_job()
raise err
if item.book:
# shelves book and handles reviews
handle_imported_book(item)
else:
item.fail_reason = _("Could not find a match for book")
item.save()
item.update_job()
def handle_imported_book(item):
"""process a csv and then post about it"""
job = item.job
user = job.user
if isinstance(item.book, models.Work):
item.book = item.book.default_edition
if not item.book:
item.fail_reason = _("Error loading book")
item.save()
return
if not isinstance(item.book, models.Edition):
item.book = item.book.edition
existing_shelf = models.ShelfBook.objects.filter(book=item.book, user=user).exists()
# shelve the book if it hasn't been shelved already
if item.shelf and not existing_shelf:
desired_shelf = models.Shelf.objects.get(identifier=item.shelf, user=user)
shelved_date = item.date_added or timezone.now()
models.ShelfBook(
book=item.book, shelf=desired_shelf, user=user, shelved_date=shelved_date
).save(priority=LOW)
for read in item.reads:
# check for an existing readthrough with the same dates
if models.ReadThrough.objects.filter(
user=user,
book=item.book,
start_date=read.start_date,
finish_date=read.finish_date,
).exists():
continue
read.book = item.book
read.user = user
read.save()
if job.include_reviews and (item.rating or item.review) and not item.linked_review:
# we don't know the publication date of the review,
# but "now" is a bad guess
published_date_guess = item.date_read or item.date_added
if item.review:
# pylint: disable=consider-using-f-string
review_title = "Review of {!r} on {!r}".format(
item.book.title,
job.source,
)
review = models.Review.objects.filter(
user=user,
book=item.book,
name=review_title,
rating=item.rating,
published_date=published_date_guess,
).first()
if not review:
review = models.Review(
user=user,
book=item.book,
name=review_title,
content=item.review,
rating=item.rating,
published_date=published_date_guess,
privacy=job.privacy,
)
review.save(software="bookwyrm", priority=LOW)
else:
# just a rating
review = models.ReviewRating.objects.filter(
user=user,
book=item.book,
published_date=published_date_guess,
rating=item.rating,
).first()
if not review:
review = models.ReviewRating(
user=user,
book=item.book,
rating=item.rating,
published_date=published_date_guess,
privacy=job.privacy,
)
review.save(software="bookwyrm", priority=LOW)
# only broadcast this review to other bookwyrm instances
item.linked_review = review
item.save()

View file

@ -0,0 +1,32 @@
# Generated by Django 3.2.15 on 2022-11-05 20:30
from django.db import migrations, models
class Migration(migrations.Migration):
dependencies = [
("bookwyrm", "0159_auto_20220924_0634"),
]
operations = [
migrations.AddField(
model_name="importitem",
name="task_id",
field=models.CharField(blank=True, max_length=200, null=True),
),
migrations.AddField(
model_name="importjob",
name="status",
field=models.CharField(
choices=[
("pending", "Pending"),
("active", "Active"),
("complete", "Complete"),
("stopped", "Stopped"),
],
max_length=50,
null=True,
),
),
]

View file

@ -0,0 +1,28 @@
# Generated by Django 3.2.15 on 2022-11-05 20:40
from django.db import migrations, models
class Migration(migrations.Migration):
dependencies = [
("bookwyrm", "0160_auto_20221105_2030"),
]
operations = [
migrations.AlterField(
model_name="importjob",
name="status",
field=models.CharField(
choices=[
("pending", "Pending"),
("active", "Active"),
("complete", "Complete"),
("stopped", "Stopped"),
],
default="pending",
max_length=50,
null=True,
),
),
]

View file

@ -0,0 +1,18 @@
# Generated by Django 3.2.15 on 2022-11-05 22:28
from django.db import migrations, models
class Migration(migrations.Migration):
dependencies = [
("bookwyrm", "0161_alter_importjob_status"),
]
operations = [
migrations.AddField(
model_name="importjob",
name="task_id",
field=models.CharField(blank=True, max_length=200, null=True),
),
]

View file

@ -5,9 +5,21 @@ import dateutil.parser
from django.db import models
from django.utils import timezone
from django.utils.translation import gettext_lazy as _
from bookwyrm.connectors import connector_manager
from bookwyrm.models import ReadThrough, User, Book, Edition
from bookwyrm.models import (
User,
Book,
Edition,
Work,
ShelfBook,
Shelf,
ReadThrough,
Review,
ReviewRating,
)
from bookwyrm.tasks import app, LOW
from .fields import PrivacyLevels
@ -31,6 +43,14 @@ def construct_search_term(title, author):
return " ".join([title, author])
ImportStatuses = [
("pending", _("Pending")),
("active", _("Active")),
("complete", _("Complete")),
("stopped", _("Stopped")),
]
class ImportJob(models.Model):
"""entry for a specific request for book data import"""
@ -39,10 +59,44 @@ class ImportJob(models.Model):
updated_date = models.DateTimeField(default=timezone.now)
include_reviews = models.BooleanField(default=True)
mappings = models.JSONField()
complete = models.BooleanField(default=False)
source = models.CharField(max_length=100)
privacy = models.CharField(max_length=255, default="public", choices=PrivacyLevels)
retry = models.BooleanField(default=False)
task_id = models.CharField(max_length=200, null=True, blank=True)
complete = models.BooleanField(default=False)
status = models.CharField(
max_length=50, choices=ImportStatuses, default="pending", null=True
)
def start_job(self):
"""Report that the job has started"""
task = start_import_task.delay(self.id)
self.task_id = task.id
self.status = "active"
self.save(update_fields=["status", "task_id"])
def complete_job(self):
"""Report that the job has completed"""
self.status = "complete"
self.complete = True
self.pending_items.update(fail_reason=_("Import stopped"))
self.save(update_fields=["status", "complete"])
def stop_job(self):
"""Stop the job"""
self.status = "stopped"
self.complete = True
self.save(update_fields=["status", "complete"])
self.pending_items.update(fail_reason=_("Import stopped"))
# stop starting
app.control.revoke(self.task_id, terminate=True)
tasks = self.pending_items.filter(task_id__isnull=False).values_list(
"task_id", flat=True
)
app.control.revoke(list(tasks))
@property
def pending_items(self):
@ -97,15 +151,18 @@ class ImportItem(models.Model):
linked_review = models.ForeignKey(
"Review", on_delete=models.SET_NULL, null=True, blank=True
)
task_id = models.CharField(max_length=200, null=True, blank=True)
def update_job(self):
"""let the job know when the items get work done"""
job = self.job
if job.complete:
return
job.updated_date = timezone.now()
job.save()
if not job.pending_items.exists() and not job.complete:
job.complete = True
job.save(update_fields=["complete"])
job.complete_job()
def resolve(self):
"""try various ways to lookup a book"""
@ -269,3 +326,136 @@ class ImportItem(models.Model):
return "{} by {}".format(
self.normalized_data.get("title"), self.normalized_data.get("authors")
)
@app.task(queue="low_priority")
def start_import_task(job_id):
"""trigger the child tasks for each row"""
job = ImportJob.objects.get(id=job_id)
# don't start the job if it was stopped from the UI
if job.complete:
return
# these are sub-tasks so that one big task doesn't use up all the memory in celery
for item in job.items.all():
task = import_item_task.delay(item.id)
item.task_id = task.id
item.save()
job.status = "active"
job.save()
@app.task(queue="low_priority")
def import_item_task(item_id):
"""resolve a row into a book"""
item = ImportItem.objects.get(id=item_id)
# make sure the job has not been stopped
if item.job.complete:
return
try:
item.resolve()
except Exception as err: # pylint: disable=broad-except
item.fail_reason = _("Error loading book")
item.save()
item.update_job()
raise err
if item.book:
# shelves book and handles reviews
handle_imported_book(item)
else:
item.fail_reason = _("Could not find a match for book")
item.save()
item.update_job()
def handle_imported_book(item):
"""process a csv and then post about it"""
job = item.job
if job.complete:
return
user = job.user
if isinstance(item.book, Work):
item.book = item.book.default_edition
if not item.book:
item.fail_reason = _("Error loading book")
item.save()
return
if not isinstance(item.book, Edition):
item.book = item.book.edition
existing_shelf = ShelfBook.objects.filter(book=item.book, user=user).exists()
# shelve the book if it hasn't been shelved already
if item.shelf and not existing_shelf:
desired_shelf = Shelf.objects.get(identifier=item.shelf, user=user)
shelved_date = item.date_added or timezone.now()
ShelfBook(
book=item.book, shelf=desired_shelf, user=user, shelved_date=shelved_date
).save(priority=LOW)
for read in item.reads:
# check for an existing readthrough with the same dates
if ReadThrough.objects.filter(
user=user,
book=item.book,
start_date=read.start_date,
finish_date=read.finish_date,
).exists():
continue
read.book = item.book
read.user = user
read.save()
if job.include_reviews and (item.rating or item.review) and not item.linked_review:
# we don't know the publication date of the review,
# but "now" is a bad guess
published_date_guess = item.date_read or item.date_added
if item.review:
# pylint: disable=consider-using-f-string
review_title = "Review of {!r} on {!r}".format(
item.book.title,
job.source,
)
review = Review.objects.filter(
user=user,
book=item.book,
name=review_title,
rating=item.rating,
published_date=published_date_guess,
).first()
if not review:
review = Review(
user=user,
book=item.book,
name=review_title,
content=item.review,
rating=item.rating,
published_date=published_date_guess,
privacy=job.privacy,
)
review.save(software="bookwyrm", priority=LOW)
else:
# just a rating
review = ReviewRating.objects.filter(
user=user,
book=item.book,
published_date=published_date_guess,
rating=item.rating,
).first()
if not review:
review = ReviewRating(
user=user,
book=item.book,
rating=item.rating,
published_date=published_date_guess,
privacy=job.privacy,
)
review.save(software="bookwyrm", priority=LOW)
# only broadcast this review to other bookwyrm instances
item.linked_review = review
item.save()

View file

@ -120,17 +120,26 @@
<td>{{ job.updated_date }}</td>
<td>{{ job.item_count|intcomma }}</td>
<td>
{% if job.complete %}
<span class="tag is-success">
{% trans "Completed" %}
<span
{% if job.status == "stopped" %}
class="tag is-danger"
{% elif job.status == "pending" %}
class="tag is-warning"
{% elif job.complete %}
class="tag"
{% else %}
class="tag is-success"
{% endif %}
>
{% if job.status %}
{{ job.status }}
{{ job.status_display }}
{% elif job.complete %}
{% trans "Complete" %}
{% else %}
{% trans "Active" %}
{% endif %}
</span>
{% else %}
<span class="tag is-warning">
{% blocktrans trimmed with percent=job.percent_complete %}
Active, {{ percent }}% complete
{% endblocktrans %}
</span>
{% endif %}
</td>
</tr>
{% endfor %}

View file

@ -66,6 +66,13 @@
</div>
{% endif %}
{% if not job.complete %}
<form name="stop-import" action="{% url 'import-stop' job.id %}" method="POST">
{% csrf_token %}
<button class="button is-danger" type="submit">{% trans "Stop import" %}</button>
</form>
{% endif %}
{% if manual_review_count and not legacy %}
<div class="notification">
{% blocktrans trimmed count counter=manual_review_count with display_counter=manual_review_count|intcomma %}

View file

@ -1,7 +1,7 @@
{% extends 'components/modal.html' %}
{% load i18n %}
{% block modal-title %}{% trans "Mark import as complete?" %}{% endblock %}
{% block modal-title %}{% trans "Stop import?" %}{% endblock %}
{% block modal-body %}
{% trans "This action cannot be un-done" %}

View file

@ -27,11 +27,6 @@
</div>
<div class="table-container block content">
{% if status == "active" %}
<div class="notification is-warning">
<p>{% trans "Marking an import as complete will <em>not</em> stop it." %}</p>
</div>
{% endif %}
<table class="table is-striped is-fullwidth">
<tr>
{% url 'settings-imports' as url %}
@ -82,7 +77,7 @@
{% if status == "active" %}
<td>
{% join "complete" import.id as modal_id %}
<button type="button" data-modal-open="{{ modal_id }}" class="button is-danger">{% trans "Mark as complete" %}</button>
<button type="button" data-modal-open="{{ modal_id }}" class="button is-danger">{% trans "Stop import" %}</button>
{% include "settings/imports/complete_import_modal.html" with id=modal_id %}
</td>
{% endif %}
@ -91,7 +86,7 @@
{% if not imports %}
<tr>
<td colspan="6">
<em>{% trans "No matching imports founds." %} </em>
<em>{% trans "No matching imports found." %} </em>
</td>
</tr>
{% endif %}

View file

@ -6,7 +6,7 @@ from django.test import TestCase
from bookwyrm import models
from bookwyrm.importers import CalibreImporter
from bookwyrm.importers.importer import handle_imported_book
from bookwyrm.models.import_job import handle_imported_book
# pylint: disable=consider-using-with
@ -16,6 +16,7 @@ from bookwyrm.importers.importer import handle_imported_book
class CalibreImport(TestCase):
"""importing from Calibre csv"""
# pylint: disable=invalid-name
def setUp(self):
"""use a test csv"""
self.importer = CalibreImporter()

View file

@ -8,7 +8,7 @@ from django.test import TestCase
from bookwyrm import models
from bookwyrm.importers import GoodreadsImporter
from bookwyrm.importers.importer import handle_imported_book
from bookwyrm.models.import_job import handle_imported_book
def make_date(*args):
@ -23,6 +23,7 @@ def make_date(*args):
class GoodreadsImport(TestCase):
"""importing from goodreads csv"""
# pylint: disable=invalid-name
def setUp(self):
"""use a test csv"""
self.importer = GoodreadsImporter()

View file

@ -1,4 +1,5 @@
""" testing import """
from collections import namedtuple
import pathlib
from unittest.mock import patch
import datetime
@ -9,8 +10,8 @@ import responses
from bookwyrm import models
from bookwyrm.importers import Importer
from bookwyrm.importers.importer import start_import_task, import_item_task
from bookwyrm.importers.importer import handle_imported_book
from bookwyrm.models.import_job import start_import_task, import_item_task
from bookwyrm.models.import_job import handle_imported_book
def make_date(*args):
@ -25,6 +26,7 @@ def make_date(*args):
class GenericImporter(TestCase):
"""importing from csv"""
# pylint: disable=invalid-name
def setUp(self):
"""use a test csv"""
@ -103,9 +105,13 @@ class GenericImporter(TestCase):
import_job = self.importer.create_job(
self.local_user, self.csv, False, "unlisted"
)
with patch("bookwyrm.importers.importer.start_import_task.delay") as mock:
self.importer.start_import(import_job)
MockTask = namedtuple("Task", ("id"))
with patch("bookwyrm.models.import_job.start_import_task.delay") as mock:
mock.return_value = MockTask(123)
import_job.start_job()
self.assertEqual(mock.call_count, 1)
import_job.refresh_from_db()
self.assertEqual(import_job.task_id, "123")
@responses.activate
def test_start_import_task(self, *_):
@ -114,7 +120,9 @@ class GenericImporter(TestCase):
self.local_user, self.csv, False, "unlisted"
)
with patch("bookwyrm.importers.importer.import_item_task.delay") as mock:
MockTask = namedtuple("Task", ("id"))
with patch("bookwyrm.models.import_job.import_item_task.delay") as mock:
mock.return_value = MockTask(123)
start_import_task(import_job.id)
self.assertEqual(mock.call_count, 4)

View file

@ -8,7 +8,7 @@ from django.test import TestCase
from bookwyrm import models
from bookwyrm.importers import LibrarythingImporter
from bookwyrm.importers.importer import handle_imported_book
from bookwyrm.models.import_job import handle_imported_book
def make_date(*args):
@ -23,6 +23,7 @@ def make_date(*args):
class LibrarythingImport(TestCase):
"""importing from librarything tsv"""
# pylint: disable=invalid-name
def setUp(self):
"""use a test tsv"""
self.importer = LibrarythingImporter()

View file

@ -8,7 +8,7 @@ from django.test import TestCase
from bookwyrm import models
from bookwyrm.importers import OpenLibraryImporter
from bookwyrm.importers.importer import handle_imported_book
from bookwyrm.models.import_job import handle_imported_book
def make_date(*args):
@ -23,6 +23,7 @@ def make_date(*args):
class OpenLibraryImport(TestCase):
"""importing from openlibrary csv"""
# pylint: disable=invalid-name
def setUp(self):
"""use a test csv"""
self.importer = OpenLibraryImporter()

View file

@ -8,7 +8,7 @@ from django.test import TestCase
from bookwyrm import models
from bookwyrm.importers import StorygraphImporter
from bookwyrm.importers.importer import handle_imported_book
from bookwyrm.models.import_job import handle_imported_book
def make_date(*args):
@ -23,6 +23,7 @@ def make_date(*args):
class StorygraphImport(TestCase):
"""importing from storygraph csv"""
# pylint: disable=invalid-name
def setUp(self):
"""use a test csv"""
self.importer = StorygraphImporter()

View file

@ -14,6 +14,7 @@ from bookwyrm.tests.validate_html import validate_html
class ImportViews(TestCase):
"""goodreads import views"""
# pylint: disable=invalid-name
def setUp(self):
"""we need basic test data and mocks"""
self.factory = RequestFactory()
@ -84,7 +85,7 @@ class ImportViews(TestCase):
request = self.factory.post("", form.data)
request.user = self.local_user
with patch("bookwyrm.importers.Importer.start_import"):
with patch("bookwyrm.models.import_job.ImportJob.start_job"):
view(request)
job = models.ImportJob.objects.get()
self.assertFalse(job.include_reviews)
@ -102,6 +103,6 @@ class ImportViews(TestCase):
)
request = self.factory.post("")
request.user = self.local_user
with patch("bookwyrm.importers.importer.import_item_task.delay") as mock:
with patch("bookwyrm.models.import_job.import_item_task.delay") as mock:
views.retry_item(request, job.id, item.id)
self.assertEqual(mock.call_count, 1)

View file

@ -11,6 +11,7 @@ from bookwyrm import models, views
class ImportManualReviewViews(TestCase):
"""goodreads import views"""
# pylint: disable=invalid-name
def setUp(self):
"""we need basic test data and mocks"""
self.factory = RequestFactory()
@ -59,7 +60,7 @@ class ImportManualReviewViews(TestCase):
request = self.factory.post("")
request.user = self.local_user
with patch("bookwyrm.importers.importer.import_item_task.delay") as mock:
with patch("bookwyrm.models.import_job.import_item_task.delay") as mock:
views.approve_import_item(request, self.job.id, import_item.id)
self.assertEqual(mock.call_count, 1)
import_item.refresh_from_db()

View file

@ -1,4 +1,5 @@
""" test for app action functionality """
from collections import namedtuple
from unittest.mock import patch
from django.template.response import TemplateResponse
from django.test import TestCase
@ -11,6 +12,7 @@ from bookwyrm import models, views
class ImportTroubleshootViews(TestCase):
"""goodreads import views"""
# pylint: disable=invalid-name
def setUp(self):
"""we need basic test data and mocks"""
self.factory = RequestFactory()
@ -48,7 +50,9 @@ class ImportTroubleshootViews(TestCase):
request = self.factory.post("")
request.user = self.local_user
with patch("bookwyrm.importers.Importer.start_import"):
MockTask = namedtuple("Task", ("id"))
with patch("bookwyrm.models.import_job.start_import_task.delay") as mock:
mock.return_value = MockTask(123)
view(request, import_job.id)
self.assertEqual(models.ImportJob.objects.count(), 2)

View file

@ -353,6 +353,11 @@ urlpatterns = [
views.ImportStatus.as_view(),
name="import-status",
),
re_path(
r"^import/(?P<job_id>\d+)/stop/?$",
views.stop_import,
name="import-stop",
),
re_path(
r"^import/(?P<job_id>\d+)/retry/(?P<item_id>\d+)/?$",
views.retry_item,

View file

@ -74,7 +74,7 @@ from .shelf.shelf_actions import shelve, unshelve
# csv import
from .imports.import_data import Import
from .imports.import_status import ImportStatus, retry_item
from .imports.import_status import ImportStatus, retry_item, stop_import
from .imports.troubleshoot import ImportTroubleshoot
from .imports.manually_review import (
ImportManualReview,

View file

@ -40,6 +40,5 @@ class ImportList(View):
def post(self, request, import_id):
"""Mark an import as complete"""
import_job = get_object_or_404(models.ImportJob, id=import_id)
import_job.complete = True
import_job.save()
import_job.stop_job()
return redirect("settings-imports")

View file

@ -97,6 +97,6 @@ class Import(View):
except (UnicodeDecodeError, ValueError, KeyError):
return HttpResponseBadRequest(_("Not a valid csv file"))
importer.start_import(job)
job.start_job()
return redirect(f"/import/{job.id}")

View file

@ -11,7 +11,7 @@ from django.views.decorators.http import require_POST
from bookwyrm import models
from bookwyrm.importers import GoodreadsImporter
from bookwyrm.importers.importer import import_item_task
from bookwyrm.models.import_job import import_item_task
from bookwyrm.settings import PAGE_LENGTH
# pylint: disable= no-self-use
@ -74,3 +74,12 @@ def retry_item(request, job_id, item_id):
)
import_item_task.delay(item.id)
return redirect("import-status", job_id)
@login_required
@require_POST
def stop_import(request, job_id):
"""scrap that"""
job = get_object_or_404(models.ImportJob, id=job_id, user=request.user)
job.stop_job()
return redirect("import-status", job_id)

View file

@ -9,7 +9,7 @@ from django.views import View
from django.views.decorators.http import require_POST
from bookwyrm import models
from bookwyrm.importers.importer import import_item_task
from bookwyrm.models.import_job import import_item_task
from bookwyrm.settings import PAGE_LENGTH
# pylint: disable= no-self-use

View file

@ -52,5 +52,5 @@ class ImportTroubleshoot(View):
job,
items,
)
importer.start_import(job)
job.start_job()
return redirect(f"/import/{job.id}")