From d149e57494757143ed905802e5a0d24930d22d2f Mon Sep 17 00:00:00 2001 From: Mouse Reeve Date: Tue, 31 May 2022 12:41:57 -0700 Subject: [PATCH] Split expand book data task into per-edition tasks Loading every edition in one task takes ages, and produces a large task that clogs up the queue. This will create more, smaller tasks that will finish more quickly. --- bookwyrm/connectors/connector_manager.py | 9 +++++++++ bookwyrm/connectors/inventaire.py | 18 +++++++++++------- bookwyrm/connectors/openlibrary.py | 4 ++-- 3 files changed, 22 insertions(+), 9 deletions(-) diff --git a/bookwyrm/connectors/connector_manager.py b/bookwyrm/connectors/connector_manager.py index 86774af56..37b093aa9 100644 --- a/bookwyrm/connectors/connector_manager.py +++ b/bookwyrm/connectors/connector_manager.py @@ -152,6 +152,15 @@ def load_more_data(connector_id, book_id): connector.expand_book_data(book) +@app.task(queue="low_priority") +def create_edition_task(connector_id, work_id, data): + """separate task for each of the 10,000 editions of LoTR""" + connector_info = models.Connector.objects.get(id=connector_id) + connector = load_connector(connector_info) + work = models.Work.objects.select_subclasses().get(id=work_id) + connector.create_edition_from_data(work, data) + + def load_connector(connector_info): """instantiate the connector class""" connector = importlib.import_module( diff --git a/bookwyrm/connectors/inventaire.py b/bookwyrm/connectors/inventaire.py index c13f4e3e6..3d5f913bd 100644 --- a/bookwyrm/connectors/inventaire.py +++ b/bookwyrm/connectors/inventaire.py @@ -5,7 +5,7 @@ from bookwyrm import models from bookwyrm.book_search import SearchResult from .abstract_connector import AbstractConnector, Mapping from .abstract_connector import get_data -from .connector_manager import ConnectorException +from .connector_manager import ConnectorException, create_edition_task class Connector(AbstractConnector): @@ -156,12 +156,16 @@ class Connector(AbstractConnector): for edition_uri in edition_options.get("uris"): remote_id = self.get_remote_id(edition_uri) - try: - data = self.get_book_data(remote_id) - except ConnectorException: - # who, indeed, knows - continue - self.create_edition_from_data(work, data) + create_edition_task.delay(self.connector.id, work.id, remote_id) + + def create_edition_from_data(self, work, edition_data, instance=None): + """pass in the url as data and then call the version in abstract connector""" + try: + data = self.get_book_data(edition_data) + except ConnectorException: + # who, indeed, knows + return + super().create_edition_from_data(work, data, instance=instance) def get_cover_url(self, cover_blob, *_): """format the relative cover url into an absolute one: diff --git a/bookwyrm/connectors/openlibrary.py b/bookwyrm/connectors/openlibrary.py index 2b625dffc..5288cc2be 100644 --- a/bookwyrm/connectors/openlibrary.py +++ b/bookwyrm/connectors/openlibrary.py @@ -5,7 +5,7 @@ from bookwyrm import models from bookwyrm.book_search import SearchResult from .abstract_connector import AbstractConnector, Mapping from .abstract_connector import get_data, infer_physical_format, unique_physical_format -from .connector_manager import ConnectorException +from .connector_manager import ConnectorException, create_edition_task from .openlibrary_languages import languages @@ -204,7 +204,7 @@ class Connector(AbstractConnector): # does this edition have ANY interesting data? if ignore_edition(edition_data): continue - self.create_edition_from_data(work, edition_data) + create_edition_task.delay(self.connector.id, work.id, edition_data) def ignore_edition(edition_data):