Merge pull request #13611 from annando/languages

Use the post language for the language detection / config for quality
This commit is contained in:
Hypolite Petovan 2023-11-05 16:23:57 -08:00 committed by GitHub
commit 58e5f0d9c5
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
6 changed files with 101 additions and 25 deletions

View File

@ -117,7 +117,7 @@ class Item
const DELIVER_FIELDLIST = [ const DELIVER_FIELDLIST = [
'uid', 'id', 'parent', 'uri-id', 'uri', 'thr-parent', 'parent-uri', 'guid', 'uid', 'id', 'parent', 'uri-id', 'uri', 'thr-parent', 'parent-uri', 'guid',
'parent-guid', 'conversation', 'received', 'created', 'edited', 'verb', 'object-type', 'object', 'target', 'parent-guid', 'conversation', 'received', 'created', 'edited', 'verb', 'object-type', 'object', 'target',
'private', 'title', 'body', 'raw-body', 'location', 'coord', 'app', 'private', 'title', 'body', 'raw-body', 'language', 'location', 'coord', 'app',
'inform', 'deleted', 'extid', 'post-type', 'post-reason', 'gravity', 'inform', 'deleted', 'extid', 'post-type', 'post-reason', 'gravity',
'allow_cid', 'allow_gid', 'deny_cid', 'deny_gid', 'allow_cid', 'allow_gid', 'deny_cid', 'deny_gid',
'author-id', 'author-addr', 'author-link', 'author-name', 'author-avatar', 'owner-id', 'owner-link', 'contact-uid', 'author-id', 'author-addr', 'author-link', 'author-name', 'author-avatar', 'owner-id', 'owner-link', 'contact-uid',
@ -1484,6 +1484,10 @@ class Item
*/ */
private static function setOwnerforResharedItem(array $item) private static function setOwnerforResharedItem(array $item)
{ {
if ($item['uid'] == 0) {
return;
}
$parent = Post::selectFirst( $parent = Post::selectFirst(
['id', 'causer-id', 'owner-id', 'author-id', 'author-link', 'origin', 'post-reason'], ['id', 'causer-id', 'owner-id', 'author-id', 'author-link', 'origin', 'post-reason'],
['uri-id' => $item['thr-parent-id'], 'uid' => $item['uid']] ['uri-id' => $item['thr-parent-id'], 'uid' => $item['uid']]

View File

@ -586,7 +586,14 @@ class User
$languages = []; $languages = [];
$uids = []; $uids = [];
$users = DBA::select('user', ['uid', 'language'], ["`verified` AND NOT `blocked` AND NOT `account_removed` AND NOT `account_expired` AND `uid` > ?", 0]); $condition = ["`verified` AND NOT `blocked` AND NOT `account_removed` AND NOT `account_expired` AND `uid` > ?", 0];
$abandon_days = intval(DI::config()->get('system', 'account_abandon_days'));
if (!empty($abandon_days)) {
$condition = DBA::mergeConditions($condition, ["`last-activity` > ?", DateTimeFormat::utc('now - ' . $abandon_days . ' days')]);
}
$users = DBA::select('user', ['uid', 'language'], $condition);
while ($user = DBA::fetch($users)) { while ($user = DBA::fetch($users)) {
$uids[] = $user['uid']; $uids[] = $user['uid'];
$code = DI::l10n()->toISO6391($user['language']); $code = DI::l10n()->toISO6391($user['language']);
@ -612,6 +619,7 @@ class User
} }
DBA::close($channels); DBA::close($channels);
ksort($languages);
return array_keys($languages); return array_keys($languages);
} }

View File

@ -1673,7 +1673,39 @@ class Processor
} }
} }
return Relay::isSolicitedPost($messageTags, $content, $authorid, $id, Protocol::ACTIVITYPUB, $activity['thread-completion'] ?? 0); $languages = self::getPostLanguages($activity);
return Relay::isSolicitedPost($messageTags, $content, $authorid, $id, Protocol::ACTIVITYPUB, $activity['thread-completion'] ?? 0, $languages);
}
/**
* Fetch the post language from the content
*
* @param array $activity
* @return array
*/
private static function getPostLanguages(array $activity): array
{
$content = JsonLD::fetchElement($activity['as:object'], 'as:content') ?? '';
$languages = JsonLD::fetchElementArray($activity['as:object'], 'as:content', '@language') ?? [];
if (empty($languages)) {
return [];
}
$iso639 = new \Matriphe\ISO639\ISO639;
$result = [];
foreach ($languages as $language) {
if ($language == $content) {
continue;
}
$language = DI::l10n()->toISO6391($language);
if (!in_array($language, array_column($iso639->allLanguages(), 0))) {
continue;
}
$result[] = $language;
}
return $result;
} }
/** /**

View File

@ -895,6 +895,19 @@ class Transmitter
*/ */
public static function getReceiversForUriId(int $uri_id, bool $blindcopy) public static function getReceiversForUriId(int $uri_id, bool $blindcopy)
{ {
$tags = Tag::getByURIId($uri_id, [Tag::TO, Tag::CC, Tag::BCC, Tag::AUDIENCE]);
if (empty($tags)) {
Logger::debug('No receivers found', ['uri-id' => $uri_id]);
$post = Post::selectFirst([Item::DELIVER_FIELDLIST], ['uri-id' => $uri_id, 'origin' => true]);
if (!empty($post)) {
ActivityPub\Transmitter::storeReceiversForItem($post);
$tags = Tag::getByURIId($uri_id, [Tag::TO, Tag::CC, Tag::BCC, Tag::AUDIENCE]);
Logger::debug('Receivers are created', ['uri-id' => $uri_id, 'receivers' => count($tags)]);
} else {
Logger::debug('Origin item not found', ['uri-id' => $uri_id]);
}
}
$receivers = [ $receivers = [
'to' => [], 'to' => [],
'cc' => [], 'cc' => [],
@ -902,7 +915,7 @@ class Transmitter
'audience' => [], 'audience' => [],
]; ];
foreach (Tag::getByURIId($uri_id, [Tag::TO, Tag::CC, Tag::BCC, Tag::AUDIENCE]) as $receiver) { foreach ($tags as $receiver) {
switch ($receiver['type']) { switch ($receiver['type']) {
case Tag::TO: case Tag::TO:
$receivers['to'][] = $receiver['url']; $receivers['to'][] = $receiver['url'];
@ -1884,7 +1897,7 @@ class Transmitter
if (!empty($item['language'])) { if (!empty($item['language'])) {
$languages = array_keys(json_decode($item['language'], true)); $languages = array_keys(json_decode($item['language'], true));
if (!empty($languages[0])) { if (!empty($languages[0])) {
return $languages[0]; return DI::l10n()->toISO6391($languages[0]);
} }
} }
@ -1892,12 +1905,12 @@ class Transmitter
if (!empty($item['uid'])) { if (!empty($item['uid'])) {
$user = DBA::selectFirst('user', ['language'], ['uid' => $item['uid']]); $user = DBA::selectFirst('user', ['language'], ['uid' => $item['uid']]);
if (!empty($user['language'])) { if (!empty($user['language'])) {
return $user['language']; return DI::l10n()->toISO6391($user['language']);
} }
} }
// And finally just use the system language // And finally just use the system language
return DI::config()->get('system', 'language'); return DI::l10n()->toISO6391(DI::config()->get('system', 'language'));
} }
/** /**

View File

@ -57,9 +57,12 @@ class Relay
* @param string $body * @param string $body
* @param int $authorid * @param int $authorid
* @param string $url * @param string $url
* @param string $network
* @param int $causerid
* @param array $languages
* @return boolean "true" is the post is wanted by the system * @return boolean "true" is the post is wanted by the system
*/ */
public static function isSolicitedPost(array $tags, string $body, int $authorid, string $url, string $network = '', int $causerid = 0): bool public static function isSolicitedPost(array $tags, string $body, int $authorid, string $url, string $network = '', int $causerid = 0, array $languages = []): bool
{ {
$config = DI::config(); $config = DI::config();
@ -128,7 +131,7 @@ class Relay
} }
} }
if (!self::isWantedLanguage($body, 0, $authorid)) { if (!self::isWantedLanguage($body, 0, $authorid, $languages)) {
Logger::info('Unwanted or Undetected language found - rejected', ['network' => $network, 'url' => $url, 'causer' => $causer, 'tags' => $tags]); Logger::info('Unwanted or Undetected language found - rejected', ['network' => $network, 'url' => $url, 'causer' => $causer, 'tags' => $tags]);
return false; return false;
} }
@ -171,37 +174,45 @@ class Relay
* @param string $body * @param string $body
* @param int $uri_id * @param int $uri_id
* @param int $author_id * @param int $author_id
* @param array $languages
* @return boolean * @return boolean
*/ */
public static function isWantedLanguage(string $body, int $uri_id = 0, int $author_id = 0) public static function isWantedLanguage(string $body, int $uri_id = 0, int $author_id = 0, array $languages = [])
{ {
if (empty($body) || Smilies::isEmojiPost($body)) { $detected = [];
$quality = DI::config()->get('system', 'relay_language_quality');
foreach (Item::getLanguageArray($body, DI::config()->get('system', 'relay_languages'), $uri_id, $author_id) as $language => $reliability) {
if (($reliability >= $quality) && ($quality > 0)) {
$detected[] = $language;
}
}
if (empty($languages) && empty($detected) && (empty($body) || Smilies::isEmojiPost($body))) {
Logger::debug('Empty body or only emojis', ['body' => $body]); Logger::debug('Empty body or only emojis', ['body' => $body]);
return true; return true;
} }
$languages = []; if (!empty($languages) || !empty($detected)) {
foreach (Item::getLanguageArray($body, 10, $uri_id, $author_id) as $language => $reliability) {
if ($reliability > 0) {
$languages[] = $language;
}
}
if (!empty($languages)) {
$cachekey = 'relay:isWantedLanguage'; $cachekey = 'relay:isWantedLanguage';
$user_languages = DI::cache()->get($cachekey); $user_languages = DI::cache()->get($cachekey);
if (is_null($user_languages)) { if (is_null($user_languages)) {
$user_languages = User::getLanguages(); $user_languages = User::getLanguages();
DI::cache()->set($cachekey, $user_languages, Duration::HALF_HOUR); DI::cache()->set($cachekey, $user_languages);
} }
foreach ($languages as $language) { foreach ($detected as $language) {
if (in_array($language, $user_languages)) { if (in_array($language, $user_languages)) {
Logger::debug('Wanted language found', ['language' => $language, 'languages' => $languages, 'userlang' => $user_languages, 'body' => $body]); Logger::debug('Wanted language found in detected languages', ['language' => $language, 'detected' => $detected, 'userlang' => $user_languages, 'body' => $body]);
return true; return true;
} }
} }
Logger::debug('No wanted language found', ['languages' => $languages, 'userlang' => $user_languages, 'body' => $body]); foreach ($languages as $language) {
if (in_array($language, $user_languages)) {
Logger::debug('Wanted language found in defined languages', ['language' => $language, 'languages' => $languages, 'detected' => $detected, 'userlang' => $user_languages, 'body' => $body]);
return true;
}
}
Logger::debug('No wanted language found', ['languages' => $languages, 'detected' => $detected, 'userlang' => $user_languages, 'body' => $body]);
return false; return false;
} elseif (DI::config()->get('system', 'relay_deny_undetected_language')) { } elseif (DI::config()->get('system', 'relay_deny_undetected_language')) {
Logger::info('Undetected language found', ['body' => $body]); Logger::info('Undetected language found', ['body' => $body]);

View File

@ -562,6 +562,14 @@ return [
// Deny undetected languages // Deny undetected languages
'relay_deny_undetected_language' => false, 'relay_deny_undetected_language' => false,
// relay_language_quality (Float)
// Minimum value for the language detection quality for relay posts. The value must be between 0 and 1.
'relay_language_quality' => 0,
// relay_languages (Integer)
// Number of languages that are used per post to check for acceptable posts.
'relay_languages' => 10,
// session_handler (database|cache|native) // session_handler (database|cache|native)
// Whether to use Cache to store session data or to use PHP native session storage. // Whether to use Cache to store session data or to use PHP native session storage.
'session_handler' => 'database', 'session_handler' => 'database',