From 9661dbeaaca00205e52da1f6f700180f944345d1 Mon Sep 17 00:00:00 2001 From: Markus Heiser Date: Wed, 1 May 2024 18:25:22 +0200 Subject: [PATCH] [fix] update wikidata units - remove URL prefix from Q-name Sometimes the URL prefix switches from a http to a https, this patch harden the code that removes the URL prefix from wikidata Q-name, issue has been reported in [1]. [1] https://github.com/searxng/searxng/pull/3437#issuecomment-2082121730 Signed-off-by: Markus Heiser --- searx/data/wikidata_units.json | 134 +++++++++++++++++- searxng_extra/update/update_wikidata_units.py | 10 +- 2 files changed, 138 insertions(+), 6 deletions(-) diff --git a/searx/data/wikidata_units.json b/searx/data/wikidata_units.json index d46772e47..51a412cfe 100644 --- a/searx/data/wikidata_units.json +++ b/searx/data/wikidata_units.json @@ -1404,6 +1404,136 @@ "symbol": "cm H₂O", "to_si_factor": 98.0665 }, + "Q125387265": { + "si_name": "Q11574", + "symbol": "qs", + "to_si_factor": 1e-30 + }, + "Q125387281": { + "si_name": "Q11574", + "symbol": "rs", + "to_si_factor": 1e-27 + }, + "Q125389370": { + "si_name": "Q11579", + "symbol": "rK", + "to_si_factor": 1e-27 + }, + "Q125389387": { + "si_name": "Q11579", + "symbol": "qK", + "to_si_factor": 1e-30 + }, + "Q125389519": { + "si_name": "Q11579", + "symbol": "RK", + "to_si_factor": 1e+27 + }, + "Q125389534": { + "si_name": "Q11579", + "symbol": "QK", + "to_si_factor": 1e+30 + }, + "Q125390959": { + "si_name": "Q41509", + "symbol": "rmol", + "to_si_factor": 1e-27 + }, + "Q125390987": { + "si_name": "Q41509", + "symbol": "qmol", + "to_si_factor": 1e-30 + }, + "Q125392001": { + "si_name": "Q41509", + "symbol": "Rmol", + "to_si_factor": 1e+27 + }, + "Q125392014": { + "si_name": "Q41509", + "symbol": "Qmol", + "to_si_factor": 1e+30 + }, + "Q125470272": { + "si_name": "Q102573", + "symbol": "rBq", + "to_si_factor": 1e-27 + }, + "Q125470277": { + "si_name": "Q102573", + "symbol": "qBq", + "to_si_factor": 1e-30 + }, + "Q125470426": { + "si_name": "Q102573", + "symbol": "RBq", + "to_si_factor": 1e+27 + }, + "Q125470445": { + "si_name": "Q102573", + "symbol": "QBq", + "to_si_factor": 1e+30 + }, + "Q125470704": { + "si_name": "Q25406", + "symbol": "rC", + "to_si_factor": 1e-27 + }, + "Q125470716": { + "si_name": "Q25406", + "symbol": "qC", + "to_si_factor": 1e-30 + }, + "Q125471094": { + "si_name": "Q25406", + "symbol": "RC", + "to_si_factor": 1e+27 + }, + "Q125471109": { + "si_name": "Q25406", + "symbol": "QC", + "to_si_factor": 1e+30 + }, + "Q125471199": { + "si_name": null, + "symbol": "r°C", + "to_si_factor": null + }, + "Q125471200": { + "si_name": null, + "symbol": "q°C", + "to_si_factor": null + }, + "Q125471246": { + "si_name": null, + "symbol": "R°C", + "to_si_factor": null + }, + "Q125471247": { + "si_name": null, + "symbol": "Q°C", + "to_si_factor": null + }, + "Q125471334": { + "si_name": "Q131255", + "symbol": "rF", + "to_si_factor": 1e-27 + }, + "Q125471344": { + "si_name": "Q131255", + "symbol": "qF", + "to_si_factor": 1e-30 + }, + "Q125471409": { + "si_name": "Q131255", + "symbol": "RF", + "to_si_factor": 1e+27 + }, + "Q125471423": { + "si_name": "Q131255", + "symbol": "QF", + "to_si_factor": 1e+30 + }, "Q12714022": { "si_name": "Q11570", "symbol": "cwt", @@ -4506,7 +4636,7 @@ }, "Q829073": { "si_name": "Q33680", - "symbol": null, + "symbol": "\"", "to_si_factor": 4.84813681109536e-06 }, "Q83216": { @@ -6274,4 +6404,4 @@ "symbol": "m Hg", "to_si_factor": 133322.0 } -} +} \ No newline at end of file diff --git a/searxng_extra/update/update_wikidata_units.py b/searxng_extra/update/update_wikidata_units.py index f384df749..96326874a 100755 --- a/searxng_extra/update/update_wikidata_units.py +++ b/searxng_extra/update/update_wikidata_units.py @@ -51,16 +51,18 @@ WHERE ORDER BY ?item DESC(?rank) ?symbol """ -_wikidata_url = "https://www.wikidata.org/entity/" - def get_data(): results = collections.OrderedDict() response = wikidata.send_wikidata_query(SARQL_REQUEST) for unit in response['results']['bindings']: - name = unit['item']['value'].replace(_wikidata_url, '') + symbol = unit['symbol']['value'] - si_name = unit.get('tosiUnit', {}).get('value', '').replace(_wikidata_url, '') + name = unit['item']['value'].rsplit('/', 1)[1] + si_name = unit.get('tosiUnit', {}).get('value', '') + if si_name: + si_name = si_name.rsplit('/', 1)[1] + to_si_factor = unit.get('tosi', {}).get('value', '') if name not in results: # ignore duplicate: always use the first one