From e84a7e8c2a9a26040d77f5e33382cfa470cbaf0a Mon Sep 17 00:00:00 2001 From: Michael Date: Sat, 30 Sep 2023 15:51:53 +0000 Subject: [PATCH] Additional languages / new hook for additional language detection --- doc/Addons.md | 8 ++++++++ src/Core/L10n.php | 30 ++++++++++++++++++++++++------ src/Model/Item.php | 14 ++++++++++++-- 3 files changed, 44 insertions(+), 8 deletions(-) diff --git a/doc/Addons.md b/doc/Addons.md index 567e5b2756..497717e6f7 100644 --- a/doc/Addons.md +++ b/doc/Addons.md @@ -221,6 +221,13 @@ Please note: body contents are bbcode - not HTML Called when receiving a post from another source. This may also be used to post local activity or system generated messages. `$b` is the item array of information to be stored in the database and the item body is bbcode. +### get_language +Called after the language detection. This can be used for alternative language detection methods. +`$data` is an array: + +- **text**: The text that is analyzed. +- **detected**: The array with the original language detection. Can be overwritten via an addon. + ### addon_settings Called when generating the HTML for the addon settings page. `$data` is an array containing: @@ -800,6 +807,7 @@ Here is a complete list of all hook callbacks with file locations (as of 24-Sep- ### src/Model/Item.php + Hook::callAll('get_language', $item); Hook::callAll('post_local', $item); Hook::callAll('post_remote', $item); Hook::callAll('post_local_end', $posted_item); diff --git a/src/Core/L10n.php b/src/Core/L10n.php index 636cb11908..7fd7fc4e87 100644 --- a/src/Core/L10n.php +++ b/src/Core/L10n.php @@ -397,13 +397,31 @@ class L10n // See https://github.com/friendica/friendica/issues/10511 // Persian is manually added to language detection until a persian translation is provided for the interface, at // which point it will be automatically available through `getAvailableLanguages()` and this should be removed. - // Additionally Portuguese, Ukrainian, traditional Chinese and Welsh are added to that list. + // Additionally some more languages are added to that list that are used in the Fediverse. $additional_langs = [ - 'cy' => 'Cymraeg', - 'uk' => 'Українська', - 'pt-PT' => 'Português', - 'zh-hant' => '繁體', - 'fa' => 'فارسی' + 'af' => 'Afrikaans', + 'cy' => 'Cymraeg', + 'el-monoton' => 'Ελληνικά', + 'eu' => 'euskara', + 'fa' => 'فارسی', + 'gl' => 'Galego', + 'hi' => 'हिन्दी', + 'hr' => 'Hrvatski', + 'id' => 'bahasa Indonesia', + 'ko' => '한국인', + 'lt' => 'lietuvių', + 'lv' => 'latviešu', + 'sk' => 'slovenský', + 'sl' => 'Slovenščina', + 'sw' => 'Kiswahili', + 'th' => 'แบบไทย', + 'tl' => 'Wikang Tagalog', + 'tr' => 'Türkçe', + 'pt-PT' => 'Português', + 'uk' => 'Українська', + 'uz' => 'Ўзбек', + 'vi' => 'Tiếng Việt', + 'zh-hant' => '繁體', ]; $langs = array_merge($additional_langs, $langs); ksort($langs); diff --git a/src/Model/Item.php b/src/Model/Item.php index 41aae04134..c529c2c166 100644 --- a/src/Model/Item.php +++ b/src/Model/Item.php @@ -2017,7 +2017,7 @@ class Item $naked_body = BBCode::toPlaintext($naked_body); // Remove possibly remaining links - $naked_body = preg_replace(Strings::autoLinkRegEx(), '', $naked_body); + $naked_body = trim(preg_replace(Strings::autoLinkRegEx(), '', $naked_body)); if (empty($naked_body)) { return []; @@ -2029,7 +2029,17 @@ class Item $availableLanguages = DI::l10n()->convertForLanguageDetection($availableLanguages); $ld = new Language(array_keys($availableLanguages)); - return $ld->detect($naked_body)->limit(0, $count)->close() ?: []; + $languages = $ld->detect($naked_body)->limit(0, $count)->close() ?: []; + + $data = [ + 'text' => $naked_body, + 'detected' => $languages, + ]; + + Hook::callAll('get_language', $data); + $languages = $data['detected']; + + return $languages; } /**