From 2cd54320e4521fbad276fc08a8b4ed744da7a121 Mon Sep 17 00:00:00 2001 From: Michael Date: Sat, 3 Oct 2020 15:42:21 +0000 Subject: [PATCH 1/3] New language detection --- composer.json | 4 +- composer.lock | 105 +++++++++++++++++++++++---------------------- src/Model/Item.php | 15 ++++--- 3 files changed, 64 insertions(+), 60 deletions(-) diff --git a/composer.json b/composer.json index 71b7499dbd..f103ce2fe0 100644 --- a/composer.json +++ b/composer.json @@ -40,7 +40,6 @@ "nikic/fast-route": "^1.3", "paragonie/hidden-string": "^1.0", "pear/console_table": "^1.3", - "pear/text_languagedetect": "1.*", "pragmarx/google2fa": "^5.0", "pragmarx/recovery": "^0.1.0", "psr/container": "^1.0", @@ -64,7 +63,8 @@ "npm-asset/perfect-scrollbar": "0.6.16", "npm-asset/textcomplete": "^0.18.2", "npm-asset/typeahead.js": "^0.11.1", - "phpseclib/phpseclib": "^2.0" + "phpseclib/phpseclib": "^2.0", + "patrickschur/language-detection": "^3.4" }, "repositories": [ { diff --git a/composer.lock b/composer.lock index 26412f83a2..de1a1bdc0b 100644 --- a/composer.lock +++ b/composer.lock @@ -4,7 +4,7 @@ "Read more about it at https://getcomposer.org/doc/01-basic-usage.md#installing-dependencies", "This file is @generated automatically" ], - "content-hash": "ed9aa898eaf8a1f8a807f3be9eecc3d7", + "content-hash": "2cd2011a7e93f6f64524d1663651c016", "packages": [ { "name": "asika/simple-console", @@ -460,7 +460,6 @@ "jsonld.php" ] }, - "notification-url": "https://packagist.org/downloads/", "license": [ "BSD-3-Clause" ], @@ -478,11 +477,11 @@ "description": "A JSON-LD Processor and API implementation in PHP.", "homepage": "https://git.friendi.ca/friendica/php-json-ld", "keywords": [ + "JSON", "JSON-LD", "Linked Data", "RDF", "Semantic Web", - "json", "jsonld" ], "time": "2018-10-08T20:41:00+00:00" @@ -2379,6 +2378,52 @@ ], "time": "2020-03-20T21:48:09+00:00" }, + { + "name": "patrickschur/language-detection", + "version": "v3.4.0", + "source": { + "type": "git", + "url": "https://github.com/patrickschur/language-detection.git", + "reference": "95b55109177d5c4bd6b1bec6e8835cd0df36ef5f" + }, + "dist": { + "type": "zip", + "url": "https://api.github.com/repos/patrickschur/language-detection/zipball/95b55109177d5c4bd6b1bec6e8835cd0df36ef5f", + "reference": "95b55109177d5c4bd6b1bec6e8835cd0df36ef5f", + "shasum": "" + }, + "require": { + "ext-mbstring": "*", + "php": "^7" + }, + "require-dev": { + "phpunit/phpunit": "^6" + }, + "type": "library", + "autoload": { + "psr-4": { + "LanguageDetection\\": "src/LanguageDetection" + } + }, + "notification-url": "https://packagist.org/downloads/", + "license": [ + "MIT" + ], + "authors": [ + { + "name": "Patrick Schur", + "email": "patrick_schur@outlook.de" + } + ], + "description": "A language detection library for PHP. Detects the language from a given text string.", + "homepage": "https://github.com/patrickschur/language-detection", + "keywords": [ + "detect", + "detection", + "language" + ], + "time": "2018-09-19T21:45:51+00:00" + }, { "name": "pear/console_table", "version": "v1.3.1", @@ -2434,50 +2479,6 @@ ], "time": "2018-01-25T20:47:17+00:00" }, - { - "name": "pear/text_languagedetect", - "version": "v1.0.1", - "source": { - "type": "git", - "url": "https://github.com/pear/Text_LanguageDetect.git", - "reference": "9e253f26cef9a9066f53f200cc3e0684018cb5b5" - }, - "dist": { - "type": "zip", - "url": "https://api.github.com/repos/pear/Text_LanguageDetect/zipball/9e253f26cef9a9066f53f200cc3e0684018cb5b5", - "reference": "9e253f26cef9a9066f53f200cc3e0684018cb5b5", - "shasum": "" - }, - "require-dev": { - "phpunit/phpunit": "8.*|9.*" - }, - "suggest": { - "ext-mbstring": "May require the mbstring PHP extension" - }, - "type": "library", - "autoload": { - "psr-0": { - "Text": "./" - } - }, - "notification-url": "https://packagist.org/downloads/", - "include-path": [ - "./" - ], - "license": [ - "BSD-2-Clause" - ], - "authors": [ - { - "name": "Nicholas Pisarro", - "email": "taak@php.net", - "role": "Lead" - } - ], - "description": "Identify human languages from text samples", - "homepage": "http://pear.php.net/package/Text_LanguageDetect", - "time": "2020-05-17T12:19:40+00:00" - }, { "name": "phpseclib/phpseclib", "version": "2.0.29", @@ -4450,7 +4451,7 @@ } ], "description": "Provides the functionality to compare PHP values for equality", - "homepage": "https://github.com/sebastianbergmann/comparator", + "homepage": "http://www.github.com/sebastianbergmann/comparator", "keywords": [ "comparator", "compare", @@ -4552,7 +4553,7 @@ } ], "description": "Provides functionality to handle HHVM/PHP environments", - "homepage": "https://github.com/sebastianbergmann/environment", + "homepage": "http://www.github.com/sebastianbergmann/environment", "keywords": [ "Xdebug", "environment", @@ -4620,7 +4621,7 @@ } ], "description": "Provides the functionality to export PHP variables for visualization", - "homepage": "https://github.com/sebastianbergmann/exporter", + "homepage": "http://www.github.com/sebastianbergmann/exporter", "keywords": [ "export", "exporter" @@ -4672,7 +4673,7 @@ } ], "description": "Snapshotting of global state", - "homepage": "https://github.com/sebastianbergmann/global-state", + "homepage": "http://www.github.com/sebastianbergmann/global-state", "keywords": [ "global state" ], @@ -4774,7 +4775,7 @@ } ], "description": "Provides functionality to recursively process PHP variables", - "homepage": "https://github.com/sebastianbergmann/recursion-context", + "homepage": "http://www.github.com/sebastianbergmann/recursion-context", "time": "2016-11-19T07:33:16+00:00" }, { diff --git a/src/Model/Item.php b/src/Model/Item.php index c384eb4f94..e8ebe5671c 100644 --- a/src/Model/Item.php +++ b/src/Model/Item.php @@ -43,8 +43,8 @@ use Friendica\Util\Map; use Friendica\Util\Network; use Friendica\Util\Strings; use Friendica\Worker\Delivery; -use Text_LanguageDetect; use Friendica\Repository\PermissionSet as RepPermissionSet; +use LanguageDetection\Language; class Item { @@ -1699,10 +1699,10 @@ class Item $item['plink'] = ($item['plink'] ?? '') ?: DI::baseUrl() . '/display/' . urlencode($item['guid']); - $item['language'] = self::getLanguage($item); - $item['gravity'] = self::getGravity($item); + $item['language'] = self::getLanguage($item); + $default = ['url' => $item['author-link'], 'name' => $item['author-name'], 'photo' => $item['author-avatar'], 'network' => $item['network']]; $item['author-id'] = ($item['author-id'] ?? 0) ?: Contact::getIdForURL($item['author-link'], 0, null, $default); @@ -2472,11 +2472,14 @@ class Item */ private static function getLanguage(array $item) { + if (!in_array($item['gravity'], [GRAVITY_PARENT, GRAVITY_COMMENT])) { + return ''; + } + $naked_body = BBCode::toPlaintext($item['body'], false); - $ld = new Text_LanguageDetect(); - $ld->setNameMode(2); - $languages = $ld->detect($naked_body, 3); + $ld = new Language; + $languages = $ld->detect($naked_body)->limit(0, 3)->close(); if (is_array($languages)) { return json_encode($languages); } From 26e9efc746cdb75e5317881c1f1e46df467f319b Mon Sep 17 00:00:00 2001 From: Michael Date: Sat, 3 Oct 2020 16:17:18 +0000 Subject: [PATCH 2/3] Added brackets --- src/Model/Item.php | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Model/Item.php b/src/Model/Item.php index e8ebe5671c..8af14e8243 100644 --- a/src/Model/Item.php +++ b/src/Model/Item.php @@ -2478,7 +2478,7 @@ class Item $naked_body = BBCode::toPlaintext($item['body'], false); - $ld = new Language; + $ld = new Language(); $languages = $ld->detect($naked_body)->limit(0, 3)->close(); if (is_array($languages)) { return json_encode($languages); From 8edfe893aca3d8c3379f1219e370a9f466a11032 Mon Sep 17 00:00:00 2001 From: Philipp Date: Sat, 3 Oct 2020 18:20:28 +0200 Subject: [PATCH 3/3] Sort composer dependencies --- composer.json | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/composer.json b/composer.json index f103ce2fe0..838d0018c6 100644 --- a/composer.json +++ b/composer.json @@ -39,7 +39,9 @@ "monolog/monolog": "^1.25", "nikic/fast-route": "^1.3", "paragonie/hidden-string": "^1.0", + "patrickschur/language-detection": "^3.4", "pear/console_table": "^1.3", + "phpseclib/phpseclib": "^2.0", "pragmarx/google2fa": "^5.0", "pragmarx/recovery": "^0.1.0", "psr/container": "^1.0", @@ -62,9 +64,7 @@ "npm-asset/moment": "^2.24", "npm-asset/perfect-scrollbar": "0.6.16", "npm-asset/textcomplete": "^0.18.2", - "npm-asset/typeahead.js": "^0.11.1", - "phpseclib/phpseclib": "^2.0", - "patrickschur/language-detection": "^3.4" + "npm-asset/typeahead.js": "^0.11.1" }, "repositories": [ {