Merge pull request #2935 from jderuiter/markdown-import

Convert description from Markdown when importing from Open Library
This commit is contained in:
Mouse Reeve 2023-08-06 16:14:25 -07:00 committed by GitHub
commit 861d3b1500
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
4 changed files with 121 additions and 6 deletions

View file

@ -2,8 +2,11 @@
import re
from typing import Any, Optional, Union, Iterator, Iterable
from markdown import markdown
from bookwyrm import models
from bookwyrm.book_search import SearchResult
from bookwyrm.utils.sanitizer import clean
from .abstract_connector import AbstractConnector, Mapping, JsonDict
from .abstract_connector import get_data, infer_physical_format, unique_physical_format
from .connector_manager import ConnectorException, create_edition_task
@ -235,11 +238,22 @@ def ignore_edition(edition_data: JsonDict) -> bool:
return True
def get_description(description_blob: Union[JsonDict, str]) -> Optional[str]:
def get_description(description_blob: Union[JsonDict, str]) -> str:
"""descriptions can be a string or a dict"""
if isinstance(description_blob, dict):
return description_blob.get("value")
return description_blob
description = markdown(description_blob.get("value", ""))
else:
description = markdown(description_blob)
if (
description.startswith("<p>")
and description.endswith("</p>")
and description.count("<p>") == 1
):
# If there is just one <p> tag and it is around the text remove it
return description[len("<p>") : -len("</p>")].strip()
return clean(description)
def get_openlibrary_key(key: str) -> str:

View file

@ -14,7 +14,7 @@ from bookwyrm.connectors.openlibrary import get_languages, get_description
from bookwyrm.connectors.openlibrary import pick_default_edition, get_openlibrary_key
from bookwyrm.connectors.connector_manager import ConnectorException
# pylint: disable=too-many-public-methods
class Openlibrary(TestCase):
"""test loading data from openlibrary.org"""
@ -34,11 +34,15 @@ class Openlibrary(TestCase):
work_file = pathlib.Path(__file__).parent.joinpath("../data/ol_work.json")
edition_file = pathlib.Path(__file__).parent.joinpath("../data/ol_edition.json")
edition_md_file = pathlib.Path(__file__).parent.joinpath(
"../data/ol_edition_markdown.json"
)
edition_list_file = pathlib.Path(__file__).parent.joinpath(
"../data/ol_edition_list.json"
)
self.work_data = json.loads(work_file.read_bytes())
self.edition_data = json.loads(edition_file.read_bytes())
self.edition_md_data = json.loads(edition_md_file.read_bytes())
self.edition_list_data = json.loads(edition_list_file.read_bytes())
def test_get_remote_id_from_data(self):
@ -185,6 +189,18 @@ class Openlibrary(TestCase):
expected = "First in the Old Kingdom/Abhorsen series."
self.assertEqual(description, expected)
def test_get_description_markdown_paragraphs(self):
"""should do some cleanup on the description data"""
description = get_description("Paragraph 1\n\nParagraph 2")
expected = "<p>Paragraph 1</p>\n<p>Paragraph 2</p>"
self.assertEqual(description, expected)
def test_get_description_markdown_blockquote(self):
"""should do some cleanup on the description data"""
description = get_description("> Quote\n\nParagraph 2")
expected = "<blockquote>\n<p>Quote</p>\n</blockquote>\n<p>Paragraph 2</p>"
self.assertEqual(description, expected)
def test_get_openlibrary_key(self):
"""extracts the uuid"""
key = get_openlibrary_key("/books/OL27320736M")
@ -218,13 +234,44 @@ class Openlibrary(TestCase):
self.assertEqual(result.parent_work, work)
self.assertEqual(result.title, "Sabriel")
self.assertEqual(result.isbn_10, "0060273224")
self.assertIsNotNone(result.description)
self.assertEqual(result.description, self.edition_data["description"]["value"])
self.assertEqual(result.languages[0], "English")
self.assertEqual(result.publishers[0], "Harper Trophy")
self.assertEqual(result.pages, 491)
self.assertEqual(result.subjects[0], "Fantasy.")
self.assertEqual(result.physical_format, "Hardcover")
@responses.activate
def test_create_edition_markdown_from_data(self):
"""okay but can it actually create an edition with proper metadata"""
work = models.Work.objects.create(title="Hello")
responses.add(
responses.GET,
"https://openlibrary.org/authors/OL10183984A",
json={"hi": "there"},
status=200,
)
with patch(
"bookwyrm.connectors.openlibrary.Connector.get_authors_from_data"
) as mock:
mock.return_value = []
result = self.connector.create_edition_from_data(work, self.edition_md_data)
self.assertEqual(
result.description,
'<blockquote>\n<p>"She didn\'t choose her garden" opens this chapbook '
"exploring Black womanhood, mental and physical health, spirituality, and "
"ancestral roots. It is an investigation of how to locate a self amidst "
"complex racial history and how to forge an authentic way forward. There's "
"internal slippage as the subject weaves between the presence and spirits "
"of others, as well as a reckoning with the toll of navigating this world "
"as a Black woman. Yet, we also see hopefulness: a refuge in becoming part "
"of the collective, beyond individuality. <em>The Stars With You</em> "
"gives us a speculative yearning for what is to come and probes what is "
"required to reach it.</p>\n</blockquote>\n<ul>\n<li><a "
'href="https://store.cooperdillon.com/product/the-stars-with-you-by-'
'stefani-cox">publisher</a></li>\n</ul>',
)
def test_ignore_edition(self):
"""skip editions with poor metadata"""
self.assertFalse(ignore_edition({"isbn_13": "hi"}))

View file

@ -0,0 +1,54 @@
{
"type": {
"key": "/type/edition"
},
"authors": [
{
"key": "/authors/OL10183984A"
}
],
"languages": [
{
"key": "/languages/eng"
}
],
"publish_date": "2022",
"publishers": [
"Cooper Dillon Books"
],
"source_records": [
"bwb:9781943899159"
],
"subjects": [
"Poetry (poetic works by one author)",
"Poetry, collections"
],
"title": "The Stars with You",
"description": {
"type": "/type/text",
"value": ">\"She didn't choose her garden\" opens this chapbook exploring Black womanhood, mental and physical health, spirituality, and ancestral roots. It is an investigation of how to locate a self amidst complex racial history and how to forge an authentic way forward. There's internal slippage as the subject weaves between the presence and spirits of others, as well as a reckoning with the toll of navigating this world as a Black woman. Yet, we also see hopefulness: a refuge in becoming part of the collective, beyond individuality. *The Stars With You* gives us a speculative yearning for what is to come and probes what is required to reach it.\r\n\r\n- [publisher](https://store.cooperdillon.com/product/the-stars-with-you-by-stefani-cox)"
},
"works": [
{
"key": "/works/OL27172905W"
}
],
"key": "/books/OL36884359M",
"identifiers": {},
"isbn_13": [
"9781943899159"
],
"classifications": {},
"physical_format": "Paperback",
"number_of_pages": 36,
"latest_revision": 3,
"revision": 3,
"created": {
"type": "/type/datetime",
"value": "2022-01-28T19:20:08.156459"
},
"last_modified": {
"type": "/type/datetime",
"value": "2023-07-30T23:42:51.589566"
}
}

View file

@ -2,7 +2,7 @@
import bleach
def clean(input_text):
def clean(input_text: str) -> str:
"""Run through "bleach" """
return bleach.clean(
input_text,