From 9deefbbc849c01ff3010dfc15072d962e4727c97 Mon Sep 17 00:00:00 2001 From: Piero Toffanin Date: Sun, 11 Dec 2022 00:45:14 -0500 Subject: [PATCH 1/4] Builds/runs natively on Apple M1 --- app/__init__.py | 10 ------- app/detect.py | 72 ++++++++++++++++++++++++++++++++++++++++++++++++ app/language.py | 2 +- requirements.txt | 4 +-- 4 files changed, 74 insertions(+), 14 deletions(-) create mode 100644 app/detect.py diff --git a/app/__init__.py b/app/__init__.py index 208da59..982fb0b 100644 --- a/app/__init__.py +++ b/app/__init__.py @@ -1,13 +1,3 @@ import os - -# override polyglot path -import polyglot -from appdirs import user_data_dir - -polyglot.polyglot_path = os.path.join( - user_data_dir(appname="LibreTranslate", appauthor="uav4geo"), "polyglot_data" -) - - from .main import main from .manage import manage diff --git a/app/detect.py b/app/detect.py new file mode 100644 index 0000000..1b43087 --- /dev/null +++ b/app/detect.py @@ -0,0 +1,72 @@ +# Originally adapted from https://github.com/aboSamoor/polyglot/blob/master/polyglot/base.py + +import pycld2 as cld2 + +class UnknownLanguage(Exception): + pass + +class Language(object): + def __init__(self, choice): + name, code, confidence, bytesize = choice + self.code = code + self.name = name + self.confidence = float(confidence) + self.read_bytes = int(bytesize) + + def __str__(self): + return ("name: {:<12}code: {:<9}confidence: {:>5.1f} " + "read bytes:{:>6}".format(self.name, self.code, + self.confidence, self.read_bytes)) + + @staticmethod + def from_code(code): + return Language(("", code, 100, 0)) + + +class Detector(object): + """ Detect the language used in a snippet of text.""" + + def __init__(self, text, quiet=False): + """ Detector of the language used in `text`. + Args: + text (string): unicode string. + """ + self.__text = text + self.reliable = True + """False if the detector used Best Effort strategy in detection.""" + self.quiet = quiet + """If true, exceptions will be silenced.""" + self.detect(text) + + @staticmethod + def supported_languages(): + """Returns a list of the languages that can be detected by pycld2.""" + return [name.capitalize() for name,code in cld2.LANGUAGES if not name.startswith("X_")] + + def detect(self, text): + """Decide which language is used to write the text. + The method tries first to detect the language with high reliability. If + that is not possible, the method switches to best effort strategy. + Args: + text (string): A snippet of text, the longer it is the more reliable we + can detect the language used to write the text. + """ + reliable, index, top_3_choices = cld2.detect(text, bestEffort=False) + + if not reliable: + self.reliable = False + reliable, index, top_3_choices = cld2.detect(text, bestEffort=True) + + if not self.quiet: + if not reliable: + raise UnknownLanguage("Try passing a longer snippet of text") + + self.languages = [Language(x) for x in top_3_choices] + self.language = self.languages[0] + return self.language + + def __str__(self): + text = "Prediction is reliable: {}\n".format(self.reliable) + text += u"\n".join(["Language {}: {}".format(i+1, str(l)) + for i,l in enumerate(self.languages)]) + return text \ No newline at end of file diff --git a/app/language.py b/app/language.py index 2e358c2..05696b6 100644 --- a/app/language.py +++ b/app/language.py @@ -1,7 +1,7 @@ import string from argostranslate import translate -from polyglot.detect.base import Detector, UnknownLanguage +from app.detect import Detector, UnknownLanguage __languages = None diff --git a/requirements.txt b/requirements.txt index 39a300f..062976f 100644 --- a/requirements.txt +++ b/requirements.txt @@ -5,10 +5,8 @@ flask-swagger-ui==4.11.1 Flask-Limiter==2.6.3 waitress==2.1.2 expiringdict==1.2.2 -pyicu>=2.8 -pycld2==0.41 +LTpycld2==0.42 morfessor==2.0.6 -polyglot==16.7.4 appdirs==1.4.4 APScheduler==3.9.1 translatehtml==1.5.2 From 9629cb88889cb85497221052f8f5405c38dba851 Mon Sep 17 00:00:00 2001 From: Piero Toffanin Date: Sun, 11 Dec 2022 00:55:27 -0500 Subject: [PATCH 2/4] Enable multi-platform docker images --- .github/workflows/publish-docker.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/publish-docker.yml b/.github/workflows/publish-docker.yml index 8e0e1ad..3fcc54e 100644 --- a/.github/workflows/publish-docker.yml +++ b/.github/workflows/publish-docker.yml @@ -42,7 +42,7 @@ jobs: - name: Build and push Image uses: docker/build-push-action@v2 with: - platforms: linux/amd64 + platforms: linux/amd64,linux/arm64 tags: | ${{ steps.get-variables.outputs.gh-username-lower }}/libretranslate:${{ env.TAG }}, ghcr.io/${{ steps.get-variables.outputs.gh-username-lower }}/libretranslate:${{ env.TAG }} From c21fedc6bb5de25256fc0985206cc7672327fd32 Mon Sep 17 00:00:00 2001 From: Piero Toffanin Date: Sun, 11 Dec 2022 01:04:12 -0500 Subject: [PATCH 3/4] Remove libicu dep --- Dockerfile | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/Dockerfile b/Dockerfile index aea119a..a51883d 100644 --- a/Dockerfile +++ b/Dockerfile @@ -4,7 +4,7 @@ WORKDIR /app ARG DEBIAN_FRONTEND=noninteractive RUN apt-get update -qq \ - && apt-get -qqq install --no-install-recommends -y libicu-dev pkg-config gcc g++ \ + && apt-get -qqq install --no-install-recommends -y pkg-config gcc g++ \ && apt-get clean \ && rm -rf /var/lib/apt @@ -25,7 +25,6 @@ ARG with_models=false ARG models= RUN addgroup --system --gid 1032 libretranslate && adduser --system --uid 1032 libretranslate -RUN apt-get update -qq && apt-get -qqq install --no-install-recommends -y libicu67 && apt-get clean && rm -rf /var/lib/apt USER libretranslate COPY --from=builder --chown=1032:1032 /app /app From 5974c022b6fabbc17e3f5a3cabf545d01043f252 Mon Sep 17 00:00:00 2001 From: Piero Toffanin Date: Sun, 11 Dec 2022 01:09:22 -0500 Subject: [PATCH 4/4] Simplify mirrors table in readme --- README.md | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/README.md b/README.md index 40e335b..4867a97 100644 --- a/README.md +++ b/README.md @@ -301,15 +301,15 @@ Then issue `./launcher rebuild app`. From the Discourse's admin panel then selec This is a list of public LibreTranslate instances, some require an API key. If you want to add a new URL, please open a pull request. -URL |API Key Required|Payment Link|Cost ---- | --- | --- | --- -[libretranslate.com](https://libretranslate.com)|:heavy_check_mark:|[Buy](https://buy.stripe.com/3cs4j3a4u4c8d3i289)| [$19 / month](https://buy.stripe.com/3cs4j3a4u4c8d3i289), 80 requests / minute limit -[libretranslate.de](https://libretranslate.de)|-|- -[translate.argosopentech.com](https://translate.argosopentech.com/)|-|- -[translate.api.skitzen.com](https://translate.api.skitzen.com/)|-|- -[translate.fortytwo-it.com](https://translate.fortytwo-it.com/)|-|- -[translate.terraprint.co](https://translate.terraprint.co/)|-|- -[lt.vern.cc](https://lt.vern.cc)|-|- +URL |API Key Required | Links +--- | --- | --- +[libretranslate.com](https://libretranslate.com)|:heavy_check_mark:|[Get API Key](https://buy.stripe.com/3cs4j3a4u4c8d3i289) +[libretranslate.de](https://libretranslate.de)|- +[translate.argosopentech.com](https://translate.argosopentech.com/)|- +[translate.api.skitzen.com](https://translate.api.skitzen.com/)|- +[translate.fortytwo-it.com](https://translate.fortytwo-it.com/)|- +[translate.terraprint.co](https://translate.terraprint.co/)|- +[lt.vern.cc](https://lt.vern.cc)|- ## TOR/i2p Mirrors