diff --git a/bookwyrm/book_search.py b/bookwyrm/book_search.py index e42a6d8c3..4b0a6eab9 100644 --- a/bookwyrm/book_search.py +++ b/bookwyrm/book_search.py @@ -148,8 +148,8 @@ class SearchResult: def __repr__(self): # pylint: disable=consider-using-f-string - return "".format( - self.key, self.title, self.author + return "".format( + self.key, self.title, self.author, self.confidence ) def json(self): diff --git a/bookwyrm/connectors/abstract_connector.py b/bookwyrm/connectors/abstract_connector.py index 56e273886..dc4be4b3d 100644 --- a/bookwyrm/connectors/abstract_connector.py +++ b/bookwyrm/connectors/abstract_connector.py @@ -1,9 +1,8 @@ """ functionality outline for a book data connector """ from abc import ABC, abstractmethod import imghdr -import ipaddress import logging -from urllib.parse import urlparse +import re from django.core.files.base import ContentFile from django.db import transaction @@ -11,7 +10,7 @@ import requests from requests.exceptions import RequestException from bookwyrm import activitypub, models, settings -from .connector_manager import load_more_data, ConnectorException +from .connector_manager import load_more_data, ConnectorException, raise_not_valid_url from .format_mappings import format_mappings @@ -39,62 +38,34 @@ class AbstractMinimalConnector(ABC): for field in self_fields: setattr(self, field, getattr(info, field)) - def search(self, query, min_confidence=None, timeout=settings.QUERY_TIMEOUT): - """free text search""" - params = {} - if min_confidence: - params["min_confidence"] = min_confidence + def get_search_url(self, query): + """format the query url""" + # Check if the query resembles an ISBN + if maybe_isbn(query) and self.isbn_search_url and self.isbn_search_url != "": + return f"{self.isbn_search_url}{query}" - data = self.get_search_data( - f"{self.search_url}{query}", - params=params, - timeout=timeout, - ) - results = [] + # NOTE: previously, we tried searching isbn and if that produces no results, + # searched as free text. This, instead, only searches isbn if it's isbn-y + return f"{self.search_url}{query}" - for doc in self.parse_search_data(data)[:10]: - results.append(self.format_search_result(doc)) - return results - - def isbn_search(self, query, timeout=settings.QUERY_TIMEOUT): - """isbn search""" - params = {} - data = self.get_search_data( - f"{self.isbn_search_url}{query}", - params=params, - timeout=timeout, - ) - results = [] - - # this shouldn't be returning mutliple results, but just in case - for doc in self.parse_isbn_search_data(data)[:10]: - results.append(self.format_isbn_search_result(doc)) - return results - - def get_search_data(self, remote_id, **kwargs): # pylint: disable=no-self-use - """this allows connectors to override the default behavior""" - return get_data(remote_id, **kwargs) + def process_search_response(self, query, data, min_confidence): + """Format the search results based on the formt of the query""" + if maybe_isbn(query): + return list(self.parse_isbn_search_data(data))[:10] + return list(self.parse_search_data(data, min_confidence))[:10] @abstractmethod def get_or_create_book(self, remote_id): """pull up a book record by whatever means possible""" @abstractmethod - def parse_search_data(self, data): + def parse_search_data(self, data, min_confidence): """turn the result json from a search into a list""" - @abstractmethod - def format_search_result(self, search_result): - """create a SearchResult obj from json""" - @abstractmethod def parse_isbn_search_data(self, data): """turn the result json from a search into a list""" - @abstractmethod - def format_isbn_search_result(self, search_result): - """create a SearchResult obj from json""" - class AbstractConnector(AbstractMinimalConnector): """generic book data connector""" @@ -254,9 +225,6 @@ def get_data(url, params=None, timeout=10): # check if the url is blocked raise_not_valid_url(url) - if models.FederatedServer.is_blocked(url): - raise ConnectorException(f"Attempting to load data from blocked url: {url}") - try: resp = requests.get( url, @@ -311,20 +279,6 @@ def get_image(url, timeout=10): return image_content, extension -def raise_not_valid_url(url): - """do some basic reality checks on the url""" - parsed = urlparse(url) - if not parsed.scheme in ["http", "https"]: - raise ConnectorException("Invalid scheme: ", url) - - try: - ipaddress.ip_address(parsed.netloc) - raise ConnectorException("Provided url is an IP address: ", url) - except ValueError: - # it's not an IP address, which is good - pass - - class Mapping: """associate a local database field with a field in an external dataset""" @@ -366,3 +320,9 @@ def unique_physical_format(format_text): # try a direct match, so saving this would be redundant return None return format_text + + +def maybe_isbn(query): + """check if a query looks like an isbn""" + isbn = re.sub(r"[\W_]", "", query) # removes filler characters + return len(isbn) in [10, 13] # ISBN10 or ISBN13 diff --git a/bookwyrm/connectors/bookwyrm_connector.py b/bookwyrm/connectors/bookwyrm_connector.py index 6dcba7c31..e07a0b281 100644 --- a/bookwyrm/connectors/bookwyrm_connector.py +++ b/bookwyrm/connectors/bookwyrm_connector.py @@ -10,15 +10,12 @@ class Connector(AbstractMinimalConnector): def get_or_create_book(self, remote_id): return activitypub.resolve_remote_id(remote_id, model=models.Edition) - def parse_search_data(self, data): - return data - - def format_search_result(self, search_result): - search_result["connector"] = self - return SearchResult(**search_result) + def parse_search_data(self, data, min_confidence): + for search_result in data: + search_result["connector"] = self + yield SearchResult(**search_result) def parse_isbn_search_data(self, data): - return data - - def format_isbn_search_result(self, search_result): - return self.format_search_result(search_result) + for search_result in data: + search_result["connector"] = self + yield SearchResult(**search_result) diff --git a/bookwyrm/connectors/connector_manager.py b/bookwyrm/connectors/connector_manager.py index 14bb702cb..86774af56 100644 --- a/bookwyrm/connectors/connector_manager.py +++ b/bookwyrm/connectors/connector_manager.py @@ -1,17 +1,18 @@ """ interface with whatever connectors the app has """ -from datetime import datetime +import asyncio import importlib +import ipaddress import logging -import re from urllib.parse import urlparse +import aiohttp from django.dispatch import receiver from django.db.models import signals from requests import HTTPError from bookwyrm import book_search, models -from bookwyrm.settings import SEARCH_TIMEOUT +from bookwyrm.settings import SEARCH_TIMEOUT, USER_AGENT from bookwyrm.tasks import app logger = logging.getLogger(__name__) @@ -21,53 +22,85 @@ class ConnectorException(HTTPError): """when the connector can't do what was asked""" +async def get_results(session, url, min_confidence, query, connector): + """try this specific connector""" + # pylint: disable=line-too-long + headers = { + "Accept": ( + 'application/json, application/activity+json, application/ld+json; profile="https://www.w3.org/ns/activitystreams"; charset=utf-8' + ), + "User-Agent": USER_AGENT, + } + params = {"min_confidence": min_confidence} + try: + async with session.get(url, headers=headers, params=params) as response: + if not response.ok: + logger.info("Unable to connect to %s: %s", url, response.reason) + return + + try: + raw_data = await response.json() + except aiohttp.client_exceptions.ContentTypeError as err: + logger.exception(err) + return + + return { + "connector": connector, + "results": connector.process_search_response( + query, raw_data, min_confidence + ), + } + except asyncio.TimeoutError: + logger.info("Connection timed out for url: %s", url) + except aiohttp.ClientError as err: + logger.exception(err) + + +async def async_connector_search(query, items, min_confidence): + """Try a number of requests simultaneously""" + timeout = aiohttp.ClientTimeout(total=SEARCH_TIMEOUT) + async with aiohttp.ClientSession(timeout=timeout) as session: + tasks = [] + for url, connector in items: + tasks.append( + asyncio.ensure_future( + get_results(session, url, min_confidence, query, connector) + ) + ) + + results = await asyncio.gather(*tasks) + return results + + def search(query, min_confidence=0.1, return_first=False): """find books based on arbitary keywords""" if not query: return [] results = [] - # Have we got a ISBN ? - isbn = re.sub(r"[\W_]", "", query) - maybe_isbn = len(isbn) in [10, 13] # ISBN10 or ISBN13 - - start_time = datetime.now() + items = [] for connector in get_connectors(): - result_set = None - if maybe_isbn and connector.isbn_search_url and connector.isbn_search_url != "": - # Search on ISBN - try: - result_set = connector.isbn_search(isbn) - except Exception as err: # pylint: disable=broad-except - logger.info(err) - # if this fails, we can still try regular search + # get the search url from the connector before sending + url = connector.get_search_url(query) + try: + raise_not_valid_url(url) + except ConnectorException: + # if this URL is invalid we should skip it and move on + logger.info("Request denied to blocked domain: %s", url) + continue + items.append((url, connector)) - # if no isbn search results, we fallback to generic search - if not result_set: - try: - result_set = connector.search(query, min_confidence=min_confidence) - except Exception as err: # pylint: disable=broad-except - # we don't want *any* error to crash the whole search page - logger.info(err) - continue - - if return_first and result_set: - # if we found anything, return it - return result_set[0] - - if result_set: - results.append( - { - "connector": connector, - "results": result_set, - } - ) - if (datetime.now() - start_time).seconds >= SEARCH_TIMEOUT: - break + # load as many results as we can + results = asyncio.run(async_connector_search(query, items, min_confidence)) + results = [r for r in results if r] if return_first: - return None + # find the best result from all the responses and return that + all_results = [r for con in results for r in con["results"]] + all_results = sorted(all_results, key=lambda r: r.confidence, reverse=True) + return all_results[0] if all_results else None + # failed requests will return None, so filter those out return results @@ -133,3 +166,20 @@ def create_connector(sender, instance, created, *args, **kwargs): """create a connector to an external bookwyrm server""" if instance.application_type == "bookwyrm": get_or_create_connector(f"https://{instance.server_name}") + + +def raise_not_valid_url(url): + """do some basic reality checks on the url""" + parsed = urlparse(url) + if not parsed.scheme in ["http", "https"]: + raise ConnectorException("Invalid scheme: ", url) + + try: + ipaddress.ip_address(parsed.netloc) + raise ConnectorException("Provided url is an IP address: ", url) + except ValueError: + # it's not an IP address, which is good + pass + + if models.FederatedServer.is_blocked(url): + raise ConnectorException(f"Attempting to load data from blocked url: {url}") diff --git a/bookwyrm/connectors/inventaire.py b/bookwyrm/connectors/inventaire.py index a9aeb94f9..c13f4e3e6 100644 --- a/bookwyrm/connectors/inventaire.py +++ b/bookwyrm/connectors/inventaire.py @@ -77,53 +77,42 @@ class Connector(AbstractConnector): **{k: data.get(k) for k in ["uri", "image", "labels", "sitelinks", "type"]}, } - def search(self, query, min_confidence=None): # pylint: disable=arguments-differ - """overrides default search function with confidence ranking""" - results = super().search(query) - if min_confidence: - # filter the search results after the fact - return [r for r in results if r.confidence >= min_confidence] - return results - - def parse_search_data(self, data): - return data.get("results") - - def format_search_result(self, search_result): - images = search_result.get("image") - cover = f"{self.covers_url}/img/entities/{images[0]}" if images else None - # a deeply messy translation of inventaire's scores - confidence = float(search_result.get("_score", 0.1)) - confidence = 0.1 if confidence < 150 else 0.999 - return SearchResult( - title=search_result.get("label"), - key=self.get_remote_id(search_result.get("uri")), - author=search_result.get("description"), - view_link=f"{self.base_url}/entity/{search_result.get('uri')}", - cover=cover, - confidence=confidence, - connector=self, - ) + def parse_search_data(self, data, min_confidence): + for search_result in data.get("results", []): + images = search_result.get("image") + cover = f"{self.covers_url}/img/entities/{images[0]}" if images else None + # a deeply messy translation of inventaire's scores + confidence = float(search_result.get("_score", 0.1)) + confidence = 0.1 if confidence < 150 else 0.999 + if confidence < min_confidence: + continue + yield SearchResult( + title=search_result.get("label"), + key=self.get_remote_id(search_result.get("uri")), + author=search_result.get("description"), + view_link=f"{self.base_url}/entity/{search_result.get('uri')}", + cover=cover, + confidence=confidence, + connector=self, + ) def parse_isbn_search_data(self, data): """got some daaaata""" results = data.get("entities") if not results: - return [] - return list(results.values()) - - def format_isbn_search_result(self, search_result): - """totally different format than a regular search result""" - title = search_result.get("claims", {}).get("wdt:P1476", []) - if not title: - return None - return SearchResult( - title=title[0], - key=self.get_remote_id(search_result.get("uri")), - author=search_result.get("description"), - view_link=f"{self.base_url}/entity/{search_result.get('uri')}", - cover=self.get_cover_url(search_result.get("image")), - connector=self, - ) + return + for search_result in list(results.values()): + title = search_result.get("claims", {}).get("wdt:P1476", []) + if not title: + continue + yield SearchResult( + title=title[0], + key=self.get_remote_id(search_result.get("uri")), + author=search_result.get("description"), + view_link=f"{self.base_url}/entity/{search_result.get('uri')}", + cover=self.get_cover_url(search_result.get("image")), + connector=self, + ) def is_work_data(self, data): return data.get("type") == "work" diff --git a/bookwyrm/connectors/openlibrary.py b/bookwyrm/connectors/openlibrary.py index 118222a16..2b625dffc 100644 --- a/bookwyrm/connectors/openlibrary.py +++ b/bookwyrm/connectors/openlibrary.py @@ -152,39 +152,35 @@ class Connector(AbstractConnector): image_name = f"{cover_id}-{size}.jpg" return f"{self.covers_url}/b/id/{image_name}" - def parse_search_data(self, data): - return data.get("docs") - - def format_search_result(self, search_result): - # build the remote id from the openlibrary key - key = self.books_url + search_result["key"] - author = search_result.get("author_name") or ["Unknown"] - cover_blob = search_result.get("cover_i") - cover = self.get_cover_url([cover_blob], size="M") if cover_blob else None - return SearchResult( - title=search_result.get("title"), - key=key, - author=", ".join(author), - connector=self, - year=search_result.get("first_publish_year"), - cover=cover, - ) + def parse_search_data(self, data, min_confidence): + for search_result in data.get("docs"): + # build the remote id from the openlibrary key + key = self.books_url + search_result["key"] + author = search_result.get("author_name") or ["Unknown"] + cover_blob = search_result.get("cover_i") + cover = self.get_cover_url([cover_blob], size="M") if cover_blob else None + yield SearchResult( + title=search_result.get("title"), + key=key, + author=", ".join(author), + connector=self, + year=search_result.get("first_publish_year"), + cover=cover, + ) def parse_isbn_search_data(self, data): - return list(data.values()) - - def format_isbn_search_result(self, search_result): - # build the remote id from the openlibrary key - key = self.books_url + search_result["key"] - authors = search_result.get("authors") or [{"name": "Unknown"}] - author_names = [author.get("name") for author in authors] - return SearchResult( - title=search_result.get("title"), - key=key, - author=", ".join(author_names), - connector=self, - year=search_result.get("publish_date"), - ) + for search_result in list(data.values()): + # build the remote id from the openlibrary key + key = self.books_url + search_result["key"] + authors = search_result.get("authors") or [{"name": "Unknown"}] + author_names = [author.get("name") for author in authors] + yield SearchResult( + title=search_result.get("title"), + key=key, + author=", ".join(author_names), + connector=self, + year=search_result.get("publish_date"), + ) def load_edition_data(self, olkey): """query openlibrary for editions of a work""" diff --git a/bookwyrm/management/commands/initdb.py b/bookwyrm/management/commands/initdb.py index 160502ca0..23020a0a6 100644 --- a/bookwyrm/management/commands/initdb.py +++ b/bookwyrm/management/commands/initdb.py @@ -89,7 +89,7 @@ def init_connectors(): covers_url="https://inventaire.io", search_url="https://inventaire.io/api/search?types=works&types=works&search=", isbn_search_url="https://inventaire.io/api/entities?action=by-uris&uris=isbn%3A", - priority=3, + priority=1, ) models.Connector.objects.create( @@ -101,7 +101,7 @@ def init_connectors(): covers_url="https://covers.openlibrary.org", search_url="https://openlibrary.org/search?q=", isbn_search_url="https://openlibrary.org/api/books?jscmd=data&format=json&bibkeys=ISBN:", - priority=3, + priority=1, ) diff --git a/bookwyrm/models/status.py b/bookwyrm/models/status.py index 1e31de774..f37076b5c 100644 --- a/bookwyrm/models/status.py +++ b/bookwyrm/models/status.py @@ -374,7 +374,7 @@ class Review(BookStatus): def save(self, *args, **kwargs): """clear rating caches""" if self.book.parent_work: - cache.delete(f"book-rating-{self.book.parent_work.id}-*") + cache.delete(f"book-rating-{self.book.parent_work.id}") super().save(*args, **kwargs) diff --git a/bookwyrm/settings.py b/bookwyrm/settings.py index 07cfcfd07..5c0128458 100644 --- a/bookwyrm/settings.py +++ b/bookwyrm/settings.py @@ -216,7 +216,7 @@ STREAMS = [ # Search configuration # total time in seconds that the instance will spend searching connectors -SEARCH_TIMEOUT = int(env("SEARCH_TIMEOUT", 15)) +SEARCH_TIMEOUT = int(env("SEARCH_TIMEOUT", 8)) # timeout for a query to an individual connector QUERY_TIMEOUT = int(env("QUERY_TIMEOUT", 5)) diff --git a/bookwyrm/templates/search/book.html b/bookwyrm/templates/search/book.html index 7096a55de..d2828eabf 100644 --- a/bookwyrm/templates/search/book.html +++ b/bookwyrm/templates/search/book.html @@ -36,7 +36,7 @@ {% if result_set.results %}
{% if not result_set.connector.local %} -
+
{% endif %} {% if not result_set.connector.local %} diff --git a/bookwyrm/templatetags/rating_tags.py b/bookwyrm/templatetags/rating_tags.py index 670599e25..cc23d3308 100644 --- a/bookwyrm/templatetags/rating_tags.py +++ b/bookwyrm/templatetags/rating_tags.py @@ -13,10 +13,10 @@ register = template.Library() def get_rating(book, user): """get the overall rating of a book""" return cache.get_or_set( - f"book-rating-{book.parent_work.id}-{user.id}", - lambda u, b: models.Review.privacy_filter(u) - .filter(book__parent_work__editions=b, rating__gt=0) - .aggregate(Avg("rating"))["rating__avg"] + f"book-rating-{book.parent_work.id}", + lambda u, b: models.Review.objects.filter( + book__parent_work__editions=b, rating__gt=0 + ).aggregate(Avg("rating"))["rating__avg"] or 0, user, book, diff --git a/bookwyrm/tests/connectors/test_abstract_connector.py b/bookwyrm/tests/connectors/test_abstract_connector.py index 26742c05e..02ac5c66a 100644 --- a/bookwyrm/tests/connectors/test_abstract_connector.py +++ b/bookwyrm/tests/connectors/test_abstract_connector.py @@ -42,15 +42,9 @@ class AbstractConnector(TestCase): generated_remote_link_field = "openlibrary_link" - def format_search_result(self, search_result): - return search_result - - def parse_search_data(self, data): + def parse_search_data(self, data, min_confidence): return data - def format_isbn_search_result(self, search_result): - return search_result - def parse_isbn_search_data(self, data): return data diff --git a/bookwyrm/tests/connectors/test_abstract_minimal_connector.py b/bookwyrm/tests/connectors/test_abstract_minimal_connector.py index a90ce0c7e..119ca3581 100644 --- a/bookwyrm/tests/connectors/test_abstract_minimal_connector.py +++ b/bookwyrm/tests/connectors/test_abstract_minimal_connector.py @@ -1,6 +1,5 @@ """ testing book data connectors """ from django.test import TestCase -import responses from bookwyrm import models from bookwyrm.connectors import abstract_connector @@ -25,18 +24,12 @@ class AbstractConnector(TestCase): class TestConnector(abstract_connector.AbstractMinimalConnector): """nothing added here""" - def format_search_result(self, search_result): - return search_result - def get_or_create_book(self, remote_id): pass - def parse_search_data(self, data): + def parse_search_data(self, data, min_confidence): return data - def format_isbn_search_result(self, search_result): - return search_result - def parse_isbn_search_data(self, data): return data @@ -54,45 +47,6 @@ class AbstractConnector(TestCase): self.assertIsNone(connector.name) self.assertEqual(connector.identifier, "example.com") - @responses.activate - def test_search(self): - """makes an http request to the outside service""" - responses.add( - responses.GET, - "https://example.com/search?q=a%20book%20title", - json=["a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k", "l"], - status=200, - ) - results = self.test_connector.search("a book title") - self.assertEqual(len(results), 10) - self.assertEqual(results[0], "a") - self.assertEqual(results[1], "b") - self.assertEqual(results[2], "c") - - @responses.activate - def test_search_min_confidence(self): - """makes an http request to the outside service""" - responses.add( - responses.GET, - "https://example.com/search?q=a%20book%20title&min_confidence=1", - json=["a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k", "l"], - status=200, - ) - results = self.test_connector.search("a book title", min_confidence=1) - self.assertEqual(len(results), 10) - - @responses.activate - def test_isbn_search(self): - """makes an http request to the outside service""" - responses.add( - responses.GET, - "https://example.com/isbn?q=123456", - json=["a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k", "l"], - status=200, - ) - results = self.test_connector.isbn_search("123456") - self.assertEqual(len(results), 10) - def test_create_mapping(self): """maps remote fields for book data to bookwyrm activitypub fields""" mapping = Mapping("isbn") diff --git a/bookwyrm/tests/connectors/test_bookwyrm_connector.py b/bookwyrm/tests/connectors/test_bookwyrm_connector.py index 585080e66..1e369ca01 100644 --- a/bookwyrm/tests/connectors/test_bookwyrm_connector.py +++ b/bookwyrm/tests/connectors/test_bookwyrm_connector.py @@ -30,14 +30,11 @@ class BookWyrmConnector(TestCase): result = self.connector.get_or_create_book(book.remote_id) self.assertEqual(book, result) - def test_format_search_result(self): + def test_parse_search_data(self): """create a SearchResult object from search response json""" datafile = pathlib.Path(__file__).parent.joinpath("../data/bw_search.json") search_data = json.loads(datafile.read_bytes()) - results = self.connector.parse_search_data(search_data) - self.assertIsInstance(results, list) - - result = self.connector.format_search_result(results[0]) + result = list(self.connector.parse_search_data(search_data, 0))[0] self.assertIsInstance(result, SearchResult) self.assertEqual(result.title, "Jonathan Strange and Mr Norrell") self.assertEqual(result.key, "https://example.com/book/122") @@ -45,10 +42,9 @@ class BookWyrmConnector(TestCase): self.assertEqual(result.year, 2017) self.assertEqual(result.connector, self.connector) - def test_format_isbn_search_result(self): + def test_parse_isbn_search_data(self): """just gotta attach the connector""" datafile = pathlib.Path(__file__).parent.joinpath("../data/bw_search.json") search_data = json.loads(datafile.read_bytes()) - results = self.connector.parse_isbn_search_data(search_data) - result = self.connector.format_isbn_search_result(results[0]) + result = list(self.connector.parse_isbn_search_data(search_data))[0] self.assertEqual(result.connector, self.connector) diff --git a/bookwyrm/tests/connectors/test_connector_manager.py b/bookwyrm/tests/connectors/test_connector_manager.py index c88a8036a..c0c09147e 100644 --- a/bookwyrm/tests/connectors/test_connector_manager.py +++ b/bookwyrm/tests/connectors/test_connector_manager.py @@ -49,39 +49,11 @@ class ConnectorManager(TestCase): self.assertEqual(len(connectors), 1) self.assertIsInstance(connectors[0], BookWyrmConnector) - @responses.activate - def test_search_plaintext(self): - """search all connectors""" - responses.add( - responses.GET, - "http://fake.ciom/search/Example?min_confidence=0.1", - json=[{"title": "Hello", "key": "https://www.example.com/search/1"}], - ) - results = connector_manager.search("Example") - self.assertEqual(len(results), 1) - self.assertEqual(len(results[0]["results"]), 1) - self.assertEqual(results[0]["connector"].identifier, "test_connector_remote") - self.assertEqual(results[0]["results"][0].title, "Hello") - def test_search_empty_query(self): """don't panic on empty queries""" results = connector_manager.search("") self.assertEqual(results, []) - @responses.activate - def test_search_isbn(self): - """special handling if a query resembles an isbn""" - responses.add( - responses.GET, - "http://fake.ciom/isbn/0000000000", - json=[{"title": "Hello", "key": "https://www.example.com/search/1"}], - ) - results = connector_manager.search("0000000000") - self.assertEqual(len(results), 1) - self.assertEqual(len(results[0]["results"]), 1) - self.assertEqual(results[0]["connector"].identifier, "test_connector_remote") - self.assertEqual(results[0]["results"][0].title, "Hello") - def test_first_search_result(self): """only get one search result""" result = connector_manager.first_search_result("Example") diff --git a/bookwyrm/tests/connectors/test_inventaire_connector.py b/bookwyrm/tests/connectors/test_inventaire_connector.py index 55727a600..3bba9ece3 100644 --- a/bookwyrm/tests/connectors/test_inventaire_connector.py +++ b/bookwyrm/tests/connectors/test_inventaire_connector.py @@ -66,38 +66,14 @@ class Inventaire(TestCase): with self.assertRaises(ConnectorException): self.connector.get_book_data("https://test.url/ok") - @responses.activate - def test_search(self): - """min confidence filtering""" - responses.add( - responses.GET, - "https://inventaire.io/search?q=hi", - json={ - "results": [ - { - "_score": 200, - "label": "hello", - }, - { - "_score": 100, - "label": "hi", - }, - ], - }, - ) - results = self.connector.search("hi", min_confidence=0.5) - self.assertEqual(len(results), 1) - self.assertEqual(results[0].title, "hello") - - def test_format_search_result(self): + def test_parse_search_data(self): """json to search result objs""" search_file = pathlib.Path(__file__).parent.joinpath( "../data/inventaire_search.json" ) search_results = json.loads(search_file.read_bytes()) - results = self.connector.parse_search_data(search_results) - formatted = self.connector.format_search_result(results[0]) + formatted = list(self.connector.parse_search_data(search_results, 0))[0] self.assertEqual(formatted.title, "The Stories of Vladimir Nabokov") self.assertEqual( @@ -178,15 +154,14 @@ class Inventaire(TestCase): result = self.connector.resolve_keys(keys) self.assertEqual(result, ["epistolary novel", "crime novel"]) - def test_isbn_search(self): + def test_pase_isbn_search_data(self): """another search type""" search_file = pathlib.Path(__file__).parent.joinpath( "../data/inventaire_isbn_search.json" ) search_results = json.loads(search_file.read_bytes()) - results = self.connector.parse_isbn_search_data(search_results) - formatted = self.connector.format_isbn_search_result(results[0]) + formatted = list(self.connector.parse_isbn_search_data(search_results))[0] self.assertEqual(formatted.title, "L'homme aux cercles bleus") self.assertEqual( @@ -198,25 +173,12 @@ class Inventaire(TestCase): "https://covers.inventaire.io/img/entities/12345", ) - def test_isbn_search_empty(self): + def test_parse_isbn_search_data_empty(self): """another search type""" search_results = {} - results = self.connector.parse_isbn_search_data(search_results) + results = list(self.connector.parse_isbn_search_data(search_results)) self.assertEqual(results, []) - def test_isbn_search_no_title(self): - """another search type""" - search_file = pathlib.Path(__file__).parent.joinpath( - "../data/inventaire_isbn_search.json" - ) - search_results = json.loads(search_file.read_bytes()) - search_results["entities"]["isbn:9782290349229"]["claims"]["wdt:P1476"] = None - - result = self.connector.format_isbn_search_result( - search_results.get("entities") - ) - self.assertIsNone(result) - def test_is_work_data(self): """is it a work""" work_file = pathlib.Path(__file__).parent.joinpath( diff --git a/bookwyrm/tests/connectors/test_openlibrary_connector.py b/bookwyrm/tests/connectors/test_openlibrary_connector.py index c05eb1a94..69e4f847c 100644 --- a/bookwyrm/tests/connectors/test_openlibrary_connector.py +++ b/bookwyrm/tests/connectors/test_openlibrary_connector.py @@ -122,21 +122,11 @@ class Openlibrary(TestCase): self.assertEqual(result, "https://covers.openlibrary.org/b/id/image-L.jpg") def test_parse_search_result(self): - """extract the results from the search json response""" - datafile = pathlib.Path(__file__).parent.joinpath("../data/ol_search.json") - search_data = json.loads(datafile.read_bytes()) - result = self.connector.parse_search_data(search_data) - self.assertIsInstance(result, list) - self.assertEqual(len(result), 2) - - def test_format_search_result(self): """translate json from openlibrary into SearchResult""" datafile = pathlib.Path(__file__).parent.joinpath("../data/ol_search.json") search_data = json.loads(datafile.read_bytes()) - results = self.connector.parse_search_data(search_data) - self.assertIsInstance(results, list) + result = list(self.connector.parse_search_data(search_data, 0))[0] - result = self.connector.format_search_result(results[0]) self.assertIsInstance(result, SearchResult) self.assertEqual(result.title, "This Is How You Lose the Time War") self.assertEqual(result.key, "https://openlibrary.org/works/OL20639540W") @@ -148,18 +138,10 @@ class Openlibrary(TestCase): """extract the results from the search json response""" datafile = pathlib.Path(__file__).parent.joinpath("../data/ol_isbn_search.json") search_data = json.loads(datafile.read_bytes()) - result = self.connector.parse_isbn_search_data(search_data) - self.assertIsInstance(result, list) + result = list(self.connector.parse_isbn_search_data(search_data)) self.assertEqual(len(result), 1) - def test_format_isbn_search_result(self): - """translate json from openlibrary into SearchResult""" - datafile = pathlib.Path(__file__).parent.joinpath("../data/ol_isbn_search.json") - search_data = json.loads(datafile.read_bytes()) - results = self.connector.parse_isbn_search_data(search_data) - self.assertIsInstance(results, list) - - result = self.connector.format_isbn_search_result(results[0]) + result = result[0] self.assertIsInstance(result, SearchResult) self.assertEqual(result.title, "Les ombres errantes") self.assertEqual(result.key, "https://openlibrary.org/books/OL16262504M") diff --git a/bookwyrm/tests/templatetags/test_rating_tags.py b/bookwyrm/tests/templatetags/test_rating_tags.py index c00f20726..a06ee9402 100644 --- a/bookwyrm/tests/templatetags/test_rating_tags.py +++ b/bookwyrm/tests/templatetags/test_rating_tags.py @@ -40,7 +40,8 @@ class RatingTags(TestCase): @patch("bookwyrm.models.activitypub_mixin.broadcast_task.apply_async") def test_get_rating(self, *_): - """privacy filtered rating""" + """privacy filtered rating. Commented versions are how it ought to work with + subjective ratings, which are currenly not used for performance reasons.""" # follows-only: not included models.ReviewRating.objects.create( user=self.remote_user, @@ -48,7 +49,8 @@ class RatingTags(TestCase): book=self.book, privacy="followers", ) - self.assertEqual(rating_tags.get_rating(self.book, self.local_user), 0) + # self.assertEqual(rating_tags.get_rating(self.book, self.local_user), 0) + self.assertEqual(rating_tags.get_rating(self.book, self.local_user), 5) # public: included models.ReviewRating.objects.create( diff --git a/bookwyrm/tests/test_book_search.py b/bookwyrm/tests/test_book_search.py index 16435ffff..7274cb49c 100644 --- a/bookwyrm/tests/test_book_search.py +++ b/bookwyrm/tests/test_book_search.py @@ -102,18 +102,12 @@ class BookSearch(TestCase): class TestConnector(AbstractMinimalConnector): """nothing added here""" - def format_search_result(self, search_result): - return search_result - def get_or_create_book(self, remote_id): pass - def parse_search_data(self, data): + def parse_search_data(self, data, min_confidence): return data - def format_isbn_search_result(self, search_result): - return search_result - def parse_isbn_search_data(self, data): return data diff --git a/bookwyrm/tests/views/test_search.py b/bookwyrm/tests/views/test_search.py index 2df04f588..51166f2fa 100644 --- a/bookwyrm/tests/views/test_search.py +++ b/bookwyrm/tests/views/test_search.py @@ -1,6 +1,5 @@ """ test for app action functionality """ import json -import pathlib from unittest.mock import patch from django.contrib.auth.models import AnonymousUser @@ -8,9 +7,9 @@ from django.http import JsonResponse from django.template.response import TemplateResponse from django.test import TestCase from django.test.client import RequestFactory -import responses from bookwyrm import models, views +from bookwyrm.book_search import SearchResult from bookwyrm.settings import DOMAIN from bookwyrm.tests.validate_html import validate_html @@ -65,12 +64,11 @@ class Views(TestCase): self.assertIsInstance(response, TemplateResponse) validate_html(response.render()) - @responses.activate def test_search_books(self): """searches remote connectors""" view = views.Search.as_view() - models.Connector.objects.create( + connector = models.Connector.objects.create( identifier="example.com", connector_file="openlibrary", base_url="https://example.com", @@ -78,26 +76,24 @@ class Views(TestCase): covers_url="https://example.com/covers", search_url="https://example.com/search?q=", ) - datafile = pathlib.Path(__file__).parent.joinpath("../data/ol_search.json") - search_data = json.loads(datafile.read_bytes()) - responses.add( - responses.GET, "https://example.com/search?q=Test%20Book", json=search_data - ) + mock_result = SearchResult(title="Mock Book", connector=connector, key="hello") request = self.factory.get("", {"q": "Test Book", "remote": True}) request.user = self.local_user with patch("bookwyrm.views.search.is_api_request") as is_api: is_api.return_value = False - response = view(request) + with patch("bookwyrm.connectors.connector_manager.search") as remote_search: + remote_search.return_value = [ + {"results": [mock_result], "connector": connector} + ] + response = view(request) + self.assertIsInstance(response, TemplateResponse) validate_html(response.render()) connector_results = response.context_data["results"] self.assertEqual(len(connector_results), 2) self.assertEqual(connector_results[0]["results"][0].title, "Test Book") - self.assertEqual( - connector_results[1]["results"][0].title, - "This Is How You Lose the Time War", - ) + self.assertEqual(connector_results[1]["results"][0].title, "Mock Book") # don't search remote request = self.factory.get("", {"q": "Test Book", "remote": True}) @@ -106,7 +102,11 @@ class Views(TestCase): request.user = anonymous_user with patch("bookwyrm.views.search.is_api_request") as is_api: is_api.return_value = False - response = view(request) + with patch("bookwyrm.connectors.connector_manager.search") as remote_search: + remote_search.return_value = [ + {"results": [mock_result], "connector": connector} + ] + response = view(request) self.assertIsInstance(response, TemplateResponse) validate_html(response.render()) connector_results = response.context_data["results"] diff --git a/requirements.txt b/requirements.txt index 43294104f..b28f708ab 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,3 +1,4 @@ +aiohttp==3.8.1 celery==5.2.2 colorthief==0.2.1 Django==3.2.13