IceCubesApp/Packages/StatusKit/Sources/StatusKit/LanguageDetection/LanguageDetection.swift
Thomas Ricouard 1f858414d8 format .
2024-02-14 12:48:14 +01:00

35 lines
951 B
Swift

import Foundation
import NaturalLanguage
private func stripToPureLanguage(inText: String) -> String {
let hashtagRegex = try! Regex("#[\\w]*")
let emojiRegex = try! Regex(":\\w*:")
let atRegex = try! Regex("@\\w*")
var resultStr = inText
for regex in [hashtagRegex, emojiRegex, atRegex] {
let splitArray = resultStr.split(separator: regex, omittingEmptySubsequences: true)
resultStr = splitArray.joined() as String
}
return resultStr.trimmingCharacters(in: .whitespacesAndNewlines)
}
func detectLanguage(text: String) -> String? {
let recognizer = NLLanguageRecognizer()
let strippedText = stripToPureLanguage(inText: text)
recognizer.processString(strippedText)
let hypotheses = recognizer.languageHypotheses(withMaximum: 1)
// Use the detected language only with >= 85 % confidence
if let (lang, confidence) = hypotheses.first, confidence >= 0.85 {
return lang.rawValue
} else {
return nil
}
}