From fc22a3e83f7c9e2217b3416b9330970978becca2 Mon Sep 17 00:00:00 2001 From: Michael Date: Thu, 1 Feb 2024 23:08:53 +0000 Subject: [PATCH 1/4] Unify searchindex table with engagement table --- database.sql | 15 ++++++++++----- doc/database/db_post-engagement.md | 2 +- doc/database/db_post-searchindex.md | 19 ++++++++++++------- src/Database/PostUpdate.php | 28 +++++++++++++++++++--------- src/Model/Item.php | 2 +- src/Model/Post/Content.php | 8 ++++---- src/Model/Post/Engagement.php | 11 ++++++----- src/Model/Post/SearchIndex.php | 18 ++++++++++++------ src/Module/Api/Mastodon/Search.php | 2 +- src/Module/Conversation/Timeline.php | 4 ++-- static/dbstructure.config.php | 18 +++++++++++------- 11 files changed, 79 insertions(+), 48 deletions(-) diff --git a/database.sql b/database.sql index da167e2c41..871651bdb7 100644 --- a/database.sql +++ b/database.sql @@ -1,6 +1,6 @@ -- ------------------------------------------ -- Friendica 2024.03-dev (Yellow Archangel) --- DB_UPDATE_VERSION 1549 +-- DB_UPDATE_VERSION 1550 -- ------------------------------------------ @@ -1346,7 +1346,7 @@ CREATE TABLE IF NOT EXISTS `post-engagement` ( `owner-id` int unsigned NOT NULL DEFAULT 0 COMMENT 'Item owner', `contact-type` tinyint NOT NULL DEFAULT 0 COMMENT 'Person, organisation, news, community, relay', `media-type` tinyint NOT NULL DEFAULT 0 COMMENT 'Type of media in a bit array (1 = image, 2 = video, 4 = audio', - `language` varchar(128) COMMENT 'Language information about this post', + `iso-639-1` char(2) COMMENT 'Language information about this post in the ISO 639-1 format', `searchtext` mediumtext COMMENT 'Simplified text for the full text search', `size` int unsigned COMMENT 'Body size', `created` datetime COMMENT '', @@ -1467,14 +1467,19 @@ CREATE TABLE IF NOT EXISTS `post-question-option` ( -- CREATE TABLE IF NOT EXISTS `post-searchindex` ( `uri-id` int unsigned NOT NULL COMMENT 'Id of the item-uri table entry that contains the item uri', - `network` char(4) COMMENT '', - `private` tinyint unsigned COMMENT '0=public, 1=private, 2=unlisted', + `owner-id` int unsigned NOT NULL DEFAULT 0 COMMENT 'Item owner', + `media-type` tinyint NOT NULL DEFAULT 0 COMMENT 'Type of media in a bit array (1 = image, 2 = video, 4 = audio', + `iso-639-1` char(2) COMMENT 'Language information about this post in the ISO 639-1 format', `searchtext` mediumtext COMMENT 'Simplified text for the full text search', + `size` int unsigned COMMENT 'Body size', `created` datetime COMMENT '', + `restricted` boolean NOT NULL DEFAULT '0' COMMENT 'If true, this post is either unlisted or not from a federated network', PRIMARY KEY(`uri-id`), + INDEX `owner-id` (`owner-id`), INDEX `created` (`created`), FULLTEXT INDEX `searchtext` (`searchtext`), - FOREIGN KEY (`uri-id`) REFERENCES `item-uri` (`id`) ON UPDATE RESTRICT ON DELETE CASCADE + FOREIGN KEY (`uri-id`) REFERENCES `item-uri` (`id`) ON UPDATE RESTRICT ON DELETE CASCADE, + FOREIGN KEY (`owner-id`) REFERENCES `contact` (`id`) ON UPDATE RESTRICT ON DELETE CASCADE ) DEFAULT COLLATE utf8mb4_general_ci COMMENT='Content for all posts'; -- diff --git a/doc/database/db_post-engagement.md b/doc/database/db_post-engagement.md index 027ae56a79..2166702040 100644 --- a/doc/database/db_post-engagement.md +++ b/doc/database/db_post-engagement.md @@ -12,7 +12,7 @@ Fields | owner-id | Item owner | int unsigned | NO | | 0 | | | contact-type | Person, organisation, news, community, relay | tinyint | NO | | 0 | | | media-type | Type of media in a bit array (1 = image, 2 = video, 4 = audio | tinyint | NO | | 0 | | -| language | Language information about this post | varchar(128) | YES | | NULL | | +| iso-639-1 | Language information about this post in the ISO 639-1 format | char(2) | YES | | NULL | | | searchtext | Simplified text for the full text search | mediumtext | YES | | NULL | | | size | Body size | int unsigned | YES | | NULL | | | created | | datetime | YES | | NULL | | diff --git a/doc/database/db_post-searchindex.md b/doc/database/db_post-searchindex.md index a6f5879854..18efd607fc 100644 --- a/doc/database/db_post-searchindex.md +++ b/doc/database/db_post-searchindex.md @@ -6,13 +6,16 @@ Content for all posts Fields ------ -| Field | Description | Type | Null | Key | Default | Extra | -| ---------- | --------------------------------------------------------- | ---------------- | ---- | --- | ------- | ----- | -| uri-id | Id of the item-uri table entry that contains the item uri | int unsigned | NO | PRI | NULL | | -| network | | char(4) | YES | | NULL | | -| private | 0=public, 1=private, 2=unlisted | tinyint unsigned | YES | | NULL | | -| searchtext | Simplified text for the full text search | mediumtext | YES | | NULL | | -| created | | datetime | YES | | NULL | | +| Field | Description | Type | Null | Key | Default | Extra | +| ---------- | --------------------------------------------------------------------- | ------------ | ---- | --- | ------- | ----- | +| uri-id | Id of the item-uri table entry that contains the item uri | int unsigned | NO | PRI | NULL | | +| owner-id | Item owner | int unsigned | NO | | 0 | | +| media-type | Type of media in a bit array (1 = image, 2 = video, 4 = audio | tinyint | NO | | 0 | | +| iso-639-1 | Language information about this post in the ISO 639-1 format | char(2) | YES | | NULL | | +| searchtext | Simplified text for the full text search | mediumtext | YES | | NULL | | +| size | Body size | int unsigned | YES | | NULL | | +| created | | datetime | YES | | NULL | | +| restricted | If true, this post is either unlisted or not from a federated network | boolean | NO | | 0 | | Indexes ------------ @@ -20,6 +23,7 @@ Indexes | Name | Fields | | ---------- | -------------------- | | PRIMARY | uri-id | +| owner-id | owner-id | | created | created | | searchtext | FULLTEXT, searchtext | @@ -29,5 +33,6 @@ Foreign Keys | Field | Target Table | Target Field | |-------|--------------|--------------| | uri-id | [item-uri](help/database/db_item-uri) | id | +| owner-id | [contact](help/database/db_contact) | id | Return to [database documentation](help/database) diff --git a/src/Database/PostUpdate.php b/src/Database/PostUpdate.php index 09c97708d2..bad13ad4a9 100644 --- a/src/Database/PostUpdate.php +++ b/src/Database/PostUpdate.php @@ -52,7 +52,7 @@ class PostUpdate // Needed for the helper function to read from the legacy term table const OBJECT_TYPE_POST = 1; - const VERSION = 1547; + const VERSION = 1550; /** * Calls the post update functions @@ -128,7 +128,7 @@ class PostUpdate if (!self::update1544()) { return false; } - if (!self::update1547()) { + if (!self::update1550()) { return false; } return true; @@ -1369,14 +1369,24 @@ class PostUpdate * @throws \Friendica\Network\HTTPException\InternalServerErrorException * @throws \ImagickException */ - private static function update1547() + private static function update1550() { // Was the script completed? - if (DI::keyValue()->get('post_update_version') >= 1547) { + if (DI::keyValue()->get('post_update_version') >= 1550) { return true; } - $id = (int)(DI::keyValue()->get('post_update_version_1547_id') ?? 0); + $engagements = DBA::select('post-engagement', ['uri-id'], ["`iso-639-1` IS NULL"], ['order' => ['uri-id' => true], 'limit' => 1000]); + while ($engagement = DBA::fetch($engagements)) { + $item = Post::selectFirst([], ['uri-id' => $engagement['uri-id']]); + if (empty($item)) { + continue; + } + Post\Engagement::storeFromItem($item); + } + DBA::close($engagements); + + $id = (int)(DI::keyValue()->get('post_update_version_1550_id') ?? 0); if ($id == 0) { $post = Post::selectFirstPost(['uri-id'], [], ['order' => ['uri-id' => true]]); $id = (int)($post['uri-id'] ?? 0); @@ -1393,7 +1403,7 @@ class PostUpdate DBA::mergeConditions($condition, ["`created` > ?", $limit]); } - $posts = Post::selectPosts(['uri-id', 'network', 'private', 'created'], $condition, ['order' => ['uri-id' => true], 'limit' => 1000]); + $posts = Post::selectPosts(['uri-id', 'created'], $condition, ['order' => ['uri-id' => true], 'limit' => 1000]); if (DBA::errorNo() != 0) { Logger::error('Database error', ['no' => DBA::errorNo(), 'message' => DBA::errorMessage()]); @@ -1402,17 +1412,17 @@ class PostUpdate while ($post = Post::fetch($posts)) { $id = $post['uri-id']; - Post\SearchIndex::insert($post['uri-id'], $post['network'], $post['private'], $post['created'], true); + Post\SearchIndex::insert($post['uri-id'], $post['created'], true); ++$rows; } DBA::close($posts); - DI::keyValue()->set('post_update_version_1547_id', $id); + DI::keyValue()->set('post_update_version_1550_id', $id); Logger::info('Processed', ['rows' => $rows, 'last' => $id]); if ($rows <= 100) { - DI::keyValue()->set('post_update_version', 1547); + DI::keyValue()->set('post_update_version', 1550); Logger::info('Done'); return true; } diff --git a/src/Model/Item.php b/src/Model/Item.php index 9134bc149f..3d35618396 100644 --- a/src/Model/Item.php +++ b/src/Model/Item.php @@ -1450,7 +1450,7 @@ class Item $engagement_uri_id = Post\Engagement::storeFromItem($posted_item); if (in_array($posted_item['gravity'], [self::GRAVITY_PARENT, self::GRAVITY_COMMENT])) { - Post\SearchIndex::insert($posted_item['uri-id'], $posted_item['network'], $posted_item['private'], $posted_item['created']); + Post\SearchIndex::insert($posted_item['uri-id'], $posted_item['created']); } elseif ($posted_item['verb'] == Activity::ANNOUNCE) { Post\SearchIndex::update($posted_item['thr-parent-id']); } diff --git a/src/Model/Post/Content.php b/src/Model/Post/Content.php index 74cfbfe6f5..2f735bbb87 100644 --- a/src/Model/Post/Content.php +++ b/src/Model/Post/Content.php @@ -110,9 +110,9 @@ class Content { $search = Post\Engagement::escapeKeywords($search); if ($uid != 0) { - $condition = ["MATCH (`searchtext`) AGAINST (? IN BOOLEAN MODE) and (private = ? OR `uri-id` in (SELECT `uri-id` FROM `post-user` where `uid` = ?))", $search, Item::PUBLIC, $uid]; + $condition = ["MATCH (`searchtext`) AGAINST (? IN BOOLEAN MODE) AND (NOT `restricted` OR `uri-id` IN (SELECT `uri-id` FROM `post-user` WHERE `uid` = ?))", $search, $uid]; } else { - $condition = ["MATCH (`searchtext`) AGAINST (? IN BOOLEAN MODE) and private = ?", $search, Item::PUBLIC]; + $condition = ["MATCH (`searchtext`) AGAINST (? IN BOOLEAN MODE) AND NOT `restricted`", $search]; } if (!empty($last_uriid)) { @@ -139,9 +139,9 @@ class Content { $search = Post\Engagement::escapeKeywords($search); if ($uid != 0) { - $condition = ["MATCH (`searchtext`) AGAINST (? IN BOOLEAN MODE) and (private = ? OR `uri-id` in (SELECT `uri-id` FROM `post-user` where `uid` = ?))", $search, Item::PUBLIC, $uid]; + $condition = ["MATCH (`searchtext`) AGAINST (? IN BOOLEAN MODE) AND (NOT `restricted` OR `uri-id` IN (SELECT `uri-id` FROM `post-user` WHERE `uid` = ?))", $search, $uid]; } else { - $condition = ["MATCH (`searchtext`) AGAINST (? IN BOOLEAN MODE) and private = ?", $search, Item::PUBLIC]; + $condition = ["MATCH (`searchtext`) AGAINST (? IN BOOLEAN MODE) AND NOT `restricted", $search]; } return DBA::count('post-searchindex', $condition); } diff --git a/src/Model/Post/Engagement.php b/src/Model/Post/Engagement.php index a460ba85b3..3841a55bd9 100644 --- a/src/Model/Post/Engagement.php +++ b/src/Model/Post/Engagement.php @@ -22,6 +22,7 @@ namespace Friendica\Model\Post; use Friendica\Content\Text\BBCode; +use Friendica\Core\L10n; use Friendica\Core\Logger; use Friendica\Core\Protocol; use Friendica\Database\DBA; @@ -93,9 +94,9 @@ class Engagement } $searchtext = self::getSearchTextForItem($parent); + $language = !empty($parent['language']) ? (array_key_first(json_decode($parent['language'], true)) ?? L10n::UNDETERMINED_LANGUAGE) : L10n::UNDETERMINED_LANGUAGE; if (!$store) { - $language = !empty($parent['language']) ? (array_key_first(json_decode($parent['language'], true)) ?? '') : ''; - $store = DI::userDefinedChannel()->match($searchtext, $language); + $store = DI::userDefinedChannel()->match($searchtext, $language); } $engagement = [ @@ -103,7 +104,7 @@ class Engagement 'owner-id' => $parent['owner-id'], 'contact-type' => $parent['contact-contact-type'], 'media-type' => $mediatype, - 'language' => $parent['language'], + 'iso-639-1' => $language, 'searchtext' => $searchtext, 'size' => self::getContentSize($parent), 'created' => $parent['created'], @@ -130,7 +131,7 @@ class Engagement return ($ret && !$exists) ? $engagement['uri-id'] : 0; } - private static function getContentSize(array $item): int + public static function getContentSize(array $item): int { $body = ' ' . $item['title'] . ' ' . $item['content-warning'] . ' ' . $item['body']; $body = BBCode::removeAttachment($body); @@ -315,7 +316,7 @@ class Engagement return $text; } - private static function getMediaType(int $uri_id): int + public static function getMediaType(int $uri_id): int { $media = Post\Media::getByURIId($uri_id); $type = 0; diff --git a/src/Model/Post/SearchIndex.php b/src/Model/Post/SearchIndex.php index 67d6c91d12..042f800d46 100644 --- a/src/Model/Post/SearchIndex.php +++ b/src/Model/Post/SearchIndex.php @@ -21,10 +21,13 @@ namespace Friendica\Model\Post; +use Friendica\Core\L10n; use Friendica\Core\Logger; +use Friendica\Core\Protocol; use Friendica\Database\Database; use Friendica\Database\DBA; use Friendica\DI; +use Friendica\Model\Item; use Friendica\Model\Post; use Friendica\Util\DateTimeFormat; @@ -34,24 +37,27 @@ class SearchIndex * Insert a post-searchindex entry * * @param int $uri_id - * @param string $network - * @param int $private * @param string $created * @param bool $refresh */ - public static function insert(int $uri_id, string $network, int $private, string $created, bool $refresh = false) + public static function insert(int $uri_id, string $created, bool $refresh = false) { $limit = self::searchAgeDateLimit(); if (!empty($limit) && (strtotime($created) < strtotime($limit))) { return; } + $item = Post::selectFirstPost(['created', 'owner-id', 'private', 'language', 'network', 'title', 'content-warning', 'body'], ['uri-id' => $uri_id]); + $search = [ 'uri-id' => $uri_id, - 'network' => $network, - 'private' => $private, - 'created' => $created, + 'owner-id' => $item['owner-id'], + 'media-type' => Engagement::getMediaType($uri_id), + 'iso-639-1' => !empty($item['language']) ? (array_key_first(json_decode($item['language'], true)) ?? L10n::UNDETERMINED_LANGUAGE) : L10n::UNDETERMINED_LANGUAGE, 'searchtext' => Post\Engagement::getSearchTextForUriId($uri_id, $refresh), + 'size' => Engagement::getContentSize($item), + 'created' => $item['created'], + 'restricted' => !in_array($item['network'], Protocol::FEDERATED) || ($item['private'] != Item::PUBLIC), ]; return DBA::insert('post-searchindex', $search, Database::INSERT_UPDATE); } diff --git a/src/Module/Api/Mastodon/Search.php b/src/Module/Api/Mastodon/Search.php index 23f920dc3e..25db39950a 100644 --- a/src/Module/Api/Mastodon/Search.php +++ b/src/Module/Api/Mastodon/Search.php @@ -154,7 +154,7 @@ class Search extends BaseApi $table = 'tag-search-view'; } else { $q = Post\Engagement::escapeKeywords($q); - $condition = ["MATCH (`searchtext`) AGAINST (? IN BOOLEAN MODE) and (private = ? OR `uri-id` in (SELECT `uri-id` FROM `post-user` where `uid` = ?))", $q, Item::PUBLIC, $uid]; + $condition = ["MATCH (`searchtext`) AGAINST (? IN BOOLEAN MODE) AND (NOT `restricted` OR `uri-id` IN (SELECT `uri-id` FROM `post-user` WHERE `uid` = ?))", $q, $uid]; $table = 'post-searchindex'; } diff --git a/src/Module/Conversation/Timeline.php b/src/Module/Conversation/Timeline.php index d0f8f09736..311a48a204 100644 --- a/src/Module/Conversation/Timeline.php +++ b/src/Module/Conversation/Timeline.php @@ -324,7 +324,7 @@ class Timeline extends BaseModule } elseif ($this->selectedTab == ChannelEntity::AUDIO) { $condition = ["`media-type` & ?", 4]; } elseif ($this->selectedTab == ChannelEntity::LANGUAGE) { - $condition = ["JSON_EXTRACT(JSON_KEYS(language), '$[0]') = ?", User::getLanguageCode($uid)]; + $condition = ["`iso-639-1` = ?", User::getLanguageCode($uid)]; } elseif (is_numeric($this->selectedTab)) { $condition = $this->getUserChannelConditions($this->selectedTab, $uid); } @@ -450,7 +450,7 @@ class Timeline extends BaseModule $conditions = []; $languages = $languages ?: User::getWantedLanguages($uid); foreach ($languages as $language) { - $conditions[] = "JSON_EXTRACT(JSON_KEYS(language), '$[0]') = ?"; + $conditions[] = "`iso-639-1` = ?"; $condition[] = $language; } if (!empty($conditions)) { diff --git a/static/dbstructure.config.php b/static/dbstructure.config.php index a75403fcd9..3dc1d4d38c 100644 --- a/static/dbstructure.config.php +++ b/static/dbstructure.config.php @@ -56,7 +56,7 @@ use Friendica\Database\DBA; // This file is required several times during the test in DbaDefinition which justifies this condition if (!defined('DB_UPDATE_VERSION')) { - define('DB_UPDATE_VERSION', 1549); + define('DB_UPDATE_VERSION', 1550); } return [ @@ -1245,7 +1245,7 @@ return [ "post-activity" => [ "comment" => "Original remote activity", "fields" => [ - "uri-id" => ["type" => "int unsigned", "not null" => "1", "primary" => "1", "foreign" => ["item-uri" => "id"], "comment" => "Id of the item-uri table entry that contains the item uri"], + "uri-id" => ["type" => "int unsigned", "not null" => "1", "primary" => "1", "foreign" => ["item-uri" => "id"], "comment" => "Id of the item-uri table entry that contains the item uri"], "activity" => ["type" => "mediumtext", "comment" => "Original activity"], "received" => ["type" => "datetime", "comment" => ""], ], @@ -1256,7 +1256,7 @@ return [ "post-category" => [ "comment" => "post relation to categories", "fields" => [ - "uri-id" => ["type" => "int unsigned", "not null" => "1", "primary" => "1", "foreign" => ["item-uri" => "id"], "comment" => "Id of the item-uri table entry that contains the item uri"], + "uri-id" => ["type" => "int unsigned", "not null" => "1", "primary" => "1", "foreign" => ["item-uri" => "id"], "comment" => "Id of the item-uri table entry that contains the item uri"], "uid" => ["type" => "mediumint unsigned", "not null" => "1", "default" => "0", "primary" => "1", "foreign" => ["user" => "uid"], "comment" => "User id"], "type" => ["type" => "tinyint unsigned", "not null" => "1", "default" => "0", "primary" => "1", "comment" => ""], "tid" => ["type" => "int unsigned", "not null" => "1", "default" => "0", "primary" => "1", "foreign" => ["tag" => "id", "on delete" => "restrict"], "comment" => ""], @@ -1363,11 +1363,11 @@ return [ "post-engagement" => [ "comment" => "Engagement data per post", "fields" => [ - "uri-id" => ["type" => "int unsigned", "not null" => "1", "primary" => "1", "foreign" => ["item-uri" => "id"], "comment" => "Id of the item-uri table entry that contains the item uri"], + "uri-id" => ["type" => "int unsigned", "not null" => "1", "primary" => "1", "foreign" => ["item-uri" => "id"], "comment" => "Id of the item-uri table entry that contains the item uri"], "owner-id" => ["type" => "int unsigned", "not null" => "1", "default" => "0", "foreign" => ["contact" => "id"], "comment" => "Item owner"], "contact-type" => ["type" => "tinyint", "not null" => "1", "default" => "0", "comment" => "Person, organisation, news, community, relay"], "media-type" => ["type" => "tinyint", "not null" => "1", "default" => "0", "comment" => "Type of media in a bit array (1 = image, 2 = video, 4 = audio"], - "language" => ["type" => "varchar(128)", "comment" => "Language information about this post"], + "iso-639-1" => ["type" => "char(2)", "comment" => "Language information about this post in the ISO 639-1 format"], "searchtext" => ["type" => "mediumtext", "comment" => "Simplified text for the full text search"], "size" => ["type" => "int unsigned", "comment" => "Body size"], "created" => ["type" => "datetime", "comment" => ""], @@ -1486,13 +1486,17 @@ return [ "comment" => "Content for all posts", "fields" => [ "uri-id" => ["type" => "int unsigned", "not null" => "1", "primary" => "1", "foreign" => ["item-uri" => "id"], "comment" => "Id of the item-uri table entry that contains the item uri"], - "network" => ["type" => "char(4)", "comment" => ""], - "private" => ["type" => "tinyint unsigned", "comment" => "0=public, 1=private, 2=unlisted"], + "owner-id" => ["type" => "int unsigned", "not null" => "1", "default" => "0", "foreign" => ["contact" => "id"], "comment" => "Item owner"], + "media-type" => ["type" => "tinyint", "not null" => "1", "default" => "0", "comment" => "Type of media in a bit array (1 = image, 2 = video, 4 = audio"], + "iso-639-1" => ["type" => "char(2)", "comment" => "Language information about this post in the ISO 639-1 format"], "searchtext" => ["type" => "mediumtext", "comment" => "Simplified text for the full text search"], + "size" => ["type" => "int unsigned", "comment" => "Body size"], "created" => ["type" => "datetime", "comment" => ""], + "restricted" => ["type" => "boolean", "not null" => "1", "default" => "0", "comment" => "If true, this post is either unlisted or not from a federated network"], ], "indexes" => [ "PRIMARY" => ["uri-id"], + "owner-id" => ["owner-id"], "created" => ["created"], "searchtext" => ["FULLTEXT", "searchtext"], ] From 7faa42882b32bbce311e23f5e38e1c6d5a1ba6d2 Mon Sep 17 00:00:00 2001 From: Michael Date: Fri, 2 Feb 2024 07:05:39 +0000 Subject: [PATCH 2/4] language field renamed to "language" --- database.sql | 4 ++-- doc/database/db_post-engagement.md | 2 +- doc/database/db_post-searchindex.md | 2 +- src/Database/PostUpdate.php | 2 +- src/Model/Post/Engagement.php | 2 +- src/Model/Post/SearchIndex.php | 2 +- src/Module/Conversation/Timeline.php | 4 ++-- static/dbstructure.config.php | 4 ++-- update.php | 13 ++++++++++++- 9 files changed, 23 insertions(+), 12 deletions(-) diff --git a/database.sql b/database.sql index 871651bdb7..c8ab483b44 100644 --- a/database.sql +++ b/database.sql @@ -1346,7 +1346,7 @@ CREATE TABLE IF NOT EXISTS `post-engagement` ( `owner-id` int unsigned NOT NULL DEFAULT 0 COMMENT 'Item owner', `contact-type` tinyint NOT NULL DEFAULT 0 COMMENT 'Person, organisation, news, community, relay', `media-type` tinyint NOT NULL DEFAULT 0 COMMENT 'Type of media in a bit array (1 = image, 2 = video, 4 = audio', - `iso-639-1` char(2) COMMENT 'Language information about this post in the ISO 639-1 format', + `language` char(2) COMMENT 'Language information about this post in the ISO 639-1 format', `searchtext` mediumtext COMMENT 'Simplified text for the full text search', `size` int unsigned COMMENT 'Body size', `created` datetime COMMENT '', @@ -1469,7 +1469,7 @@ CREATE TABLE IF NOT EXISTS `post-searchindex` ( `uri-id` int unsigned NOT NULL COMMENT 'Id of the item-uri table entry that contains the item uri', `owner-id` int unsigned NOT NULL DEFAULT 0 COMMENT 'Item owner', `media-type` tinyint NOT NULL DEFAULT 0 COMMENT 'Type of media in a bit array (1 = image, 2 = video, 4 = audio', - `iso-639-1` char(2) COMMENT 'Language information about this post in the ISO 639-1 format', + `language` char(2) COMMENT 'Language information about this post in the ISO 639-1 format', `searchtext` mediumtext COMMENT 'Simplified text for the full text search', `size` int unsigned COMMENT 'Body size', `created` datetime COMMENT '', diff --git a/doc/database/db_post-engagement.md b/doc/database/db_post-engagement.md index 2166702040..c82c62b863 100644 --- a/doc/database/db_post-engagement.md +++ b/doc/database/db_post-engagement.md @@ -12,7 +12,7 @@ Fields | owner-id | Item owner | int unsigned | NO | | 0 | | | contact-type | Person, organisation, news, community, relay | tinyint | NO | | 0 | | | media-type | Type of media in a bit array (1 = image, 2 = video, 4 = audio | tinyint | NO | | 0 | | -| iso-639-1 | Language information about this post in the ISO 639-1 format | char(2) | YES | | NULL | | +| language | Language information about this post in the ISO 639-1 format | char(2) | YES | | NULL | | | searchtext | Simplified text for the full text search | mediumtext | YES | | NULL | | | size | Body size | int unsigned | YES | | NULL | | | created | | datetime | YES | | NULL | | diff --git a/doc/database/db_post-searchindex.md b/doc/database/db_post-searchindex.md index 18efd607fc..c6504a7ed3 100644 --- a/doc/database/db_post-searchindex.md +++ b/doc/database/db_post-searchindex.md @@ -11,7 +11,7 @@ Fields | uri-id | Id of the item-uri table entry that contains the item uri | int unsigned | NO | PRI | NULL | | | owner-id | Item owner | int unsigned | NO | | 0 | | | media-type | Type of media in a bit array (1 = image, 2 = video, 4 = audio | tinyint | NO | | 0 | | -| iso-639-1 | Language information about this post in the ISO 639-1 format | char(2) | YES | | NULL | | +| language | Language information about this post in the ISO 639-1 format | char(2) | YES | | NULL | | | searchtext | Simplified text for the full text search | mediumtext | YES | | NULL | | | size | Body size | int unsigned | YES | | NULL | | | created | | datetime | YES | | NULL | | diff --git a/src/Database/PostUpdate.php b/src/Database/PostUpdate.php index bad13ad4a9..c82fd090c8 100644 --- a/src/Database/PostUpdate.php +++ b/src/Database/PostUpdate.php @@ -1376,7 +1376,7 @@ class PostUpdate return true; } - $engagements = DBA::select('post-engagement', ['uri-id'], ["`iso-639-1` IS NULL"], ['order' => ['uri-id' => true], 'limit' => 1000]); + $engagements = DBA::select('post-engagement', ['uri-id'], ["`language` IS NULL"], ['order' => ['uri-id' => true], 'limit' => 1000]); while ($engagement = DBA::fetch($engagements)) { $item = Post::selectFirst([], ['uri-id' => $engagement['uri-id']]); if (empty($item)) { diff --git a/src/Model/Post/Engagement.php b/src/Model/Post/Engagement.php index 3841a55bd9..213550f9c6 100644 --- a/src/Model/Post/Engagement.php +++ b/src/Model/Post/Engagement.php @@ -104,7 +104,7 @@ class Engagement 'owner-id' => $parent['owner-id'], 'contact-type' => $parent['contact-contact-type'], 'media-type' => $mediatype, - 'iso-639-1' => $language, + 'language' => $language, 'searchtext' => $searchtext, 'size' => self::getContentSize($parent), 'created' => $parent['created'], diff --git a/src/Model/Post/SearchIndex.php b/src/Model/Post/SearchIndex.php index 042f800d46..98a82cae63 100644 --- a/src/Model/Post/SearchIndex.php +++ b/src/Model/Post/SearchIndex.php @@ -53,7 +53,7 @@ class SearchIndex 'uri-id' => $uri_id, 'owner-id' => $item['owner-id'], 'media-type' => Engagement::getMediaType($uri_id), - 'iso-639-1' => !empty($item['language']) ? (array_key_first(json_decode($item['language'], true)) ?? L10n::UNDETERMINED_LANGUAGE) : L10n::UNDETERMINED_LANGUAGE, + 'language' => !empty($item['language']) ? (array_key_first(json_decode($item['language'], true)) ?? L10n::UNDETERMINED_LANGUAGE) : L10n::UNDETERMINED_LANGUAGE, 'searchtext' => Post\Engagement::getSearchTextForUriId($uri_id, $refresh), 'size' => Engagement::getContentSize($item), 'created' => $item['created'], diff --git a/src/Module/Conversation/Timeline.php b/src/Module/Conversation/Timeline.php index 311a48a204..eb14b2ffc4 100644 --- a/src/Module/Conversation/Timeline.php +++ b/src/Module/Conversation/Timeline.php @@ -324,7 +324,7 @@ class Timeline extends BaseModule } elseif ($this->selectedTab == ChannelEntity::AUDIO) { $condition = ["`media-type` & ?", 4]; } elseif ($this->selectedTab == ChannelEntity::LANGUAGE) { - $condition = ["`iso-639-1` = ?", User::getLanguageCode($uid)]; + $condition = ["`language` = ?", User::getLanguageCode($uid)]; } elseif (is_numeric($this->selectedTab)) { $condition = $this->getUserChannelConditions($this->selectedTab, $uid); } @@ -450,7 +450,7 @@ class Timeline extends BaseModule $conditions = []; $languages = $languages ?: User::getWantedLanguages($uid); foreach ($languages as $language) { - $conditions[] = "`iso-639-1` = ?"; + $conditions[] = "`language` = ?"; $condition[] = $language; } if (!empty($conditions)) { diff --git a/static/dbstructure.config.php b/static/dbstructure.config.php index 3dc1d4d38c..e9082efc66 100644 --- a/static/dbstructure.config.php +++ b/static/dbstructure.config.php @@ -1367,7 +1367,7 @@ return [ "owner-id" => ["type" => "int unsigned", "not null" => "1", "default" => "0", "foreign" => ["contact" => "id"], "comment" => "Item owner"], "contact-type" => ["type" => "tinyint", "not null" => "1", "default" => "0", "comment" => "Person, organisation, news, community, relay"], "media-type" => ["type" => "tinyint", "not null" => "1", "default" => "0", "comment" => "Type of media in a bit array (1 = image, 2 = video, 4 = audio"], - "iso-639-1" => ["type" => "char(2)", "comment" => "Language information about this post in the ISO 639-1 format"], + "language" => ["type" => "char(2)", "comment" => "Language information about this post in the ISO 639-1 format"], "searchtext" => ["type" => "mediumtext", "comment" => "Simplified text for the full text search"], "size" => ["type" => "int unsigned", "comment" => "Body size"], "created" => ["type" => "datetime", "comment" => ""], @@ -1488,7 +1488,7 @@ return [ "uri-id" => ["type" => "int unsigned", "not null" => "1", "primary" => "1", "foreign" => ["item-uri" => "id"], "comment" => "Id of the item-uri table entry that contains the item uri"], "owner-id" => ["type" => "int unsigned", "not null" => "1", "default" => "0", "foreign" => ["contact" => "id"], "comment" => "Item owner"], "media-type" => ["type" => "tinyint", "not null" => "1", "default" => "0", "comment" => "Type of media in a bit array (1 = image, 2 = video, 4 = audio"], - "iso-639-1" => ["type" => "char(2)", "comment" => "Language information about this post in the ISO 639-1 format"], + "language" => ["type" => "char(2)", "comment" => "Language information about this post in the ISO 639-1 format"], "searchtext" => ["type" => "mediumtext", "comment" => "Simplified text for the full text search"], "size" => ["type" => "int unsigned", "comment" => "Body size"], "created" => ["type" => "datetime", "comment" => ""], diff --git a/update.php b/update.php index 0e655e83db..74a8ad7015 100644 --- a/update.php +++ b/update.php @@ -1410,4 +1410,15 @@ function update_1539() DBA::close($users); return Update::SUCCESS; -} \ No newline at end of file +} + +function pre_update_1550() +{ + if (DBStructure::existsTable('post-engagement') && DBStructure::existsColumn('post-engagement', ['language'])) { + DBA::e("ALTER TABLE `post-engagement` DROP `language`"); + } + if (DBStructure::existsTable('post-searchindex') && DBStructure::existsColumn('post-searchindex', ['network'])) { + DBA::e("ALTER TABLE `post-searchindex` DROP `network`, DROP `private`"); + } + return Update::SUCCESS; +} From fc05daefb5b23e7c120ea74fb4302165b694c8ad Mon Sep 17 00:00:00 2001 From: Michael Date: Fri, 2 Feb 2024 10:46:20 +0000 Subject: [PATCH 3/4] "media" is added to the search text --- doc/Channels.md | 4 ++++ src/Model/Post/Engagement.php | 31 +++++++++++++++++++++++-------- 2 files changed, 27 insertions(+), 8 deletions(-) diff --git a/doc/Channels.md b/doc/Channels.md index 3f9e474918..2e00e1a1c6 100644 --- a/doc/Channels.md +++ b/doc/Channels.md @@ -71,6 +71,10 @@ Alternatives are presented with "|". * source:service | source:news - The posts originates from a service account. This source type is often used to mark bot accounts. * source:application | source:relay - The post is created by an application. This is most likely unused in the fediverse for post creation. * tag - Use "tag:tagname" to search for a specific tag. +* media - With this keyword you can search for attached media. + * media:image | media:photo | media:picture - The post contains an image + * media:video - The post contains a video + * media:audio - The post contains audio * network | net - Use this to include or exclude some networks from your channel. * network:apub | network:activitypub - ActivityPub (Used by the systems in the Fediverse) * network:dfrn | network:friendica - Legacy Friendica protocol. Nowayday Friendica mostly uses ActivityPub. diff --git a/src/Model/Post/Engagement.php b/src/Model/Post/Engagement.php index 213550f9c6..e4647f43ed 100644 --- a/src/Model/Post/Engagement.php +++ b/src/Model/Post/Engagement.php @@ -39,10 +39,12 @@ use Friendica\Util\DateTimeFormat; class Engagement { - const KEYWORDS = ['source', 'server', 'from', 'to', 'group', 'application', 'tag', 'network', 'platform', 'visibility', 'language']; + const KEYWORDS = ['source', 'server', 'from', 'to', 'group', 'application', 'tag', 'network', 'platform', 'visibility', 'language', 'media']; const SHORTCUTS = ['lang' => 'language', 'net' => 'network', 'relay' => 'application']; const ALTERNATIVES = ['source:news' => 'source:service', 'source:relay' => 'source:application', - 'network:activitypub' => 'network:apub', 'network:friendica' => 'network:dfrn', 'network:diaspora' => 'network:dspr', 'network:ostatus' => 'network:stat', + 'media:picture' => 'media:image', 'media:photo' => 'media:image', + 'network:activitypub' => 'network:apub', 'network:friendica' => 'network:dfrn', + 'network:diaspora' => 'network:dspr', 'network:ostatus' => 'network:stat', 'network:discourse' => 'network:dscs', 'network:tumblr' => 'network:tmbl', 'network:bluesky' => 'network:bsky']; /** @@ -93,7 +95,7 @@ class Engagement $store = !empty($mediatype); } - $searchtext = self::getSearchTextForItem($parent); + $searchtext = self::getSearchTextForItem($parent, $mediatype); $language = !empty($parent['language']) ? (array_key_first(json_decode($parent['language'], true)) ?? L10n::UNDETERMINED_LANGUAGE) : L10n::UNDETERMINED_LANGUAGE; if (!$store) { $store = DI::userDefinedChannel()->match($searchtext, $language); @@ -172,7 +174,7 @@ class Engagement } } - return self::getSearchText($item, $receivers, $tags); + return self::getSearchText($item, $receivers, $tags, 0); } public static function getSearchTextForUriId(int $uri_id, bool $refresh = false): string @@ -190,17 +192,18 @@ class Engagement if (empty($post['uri-id'])) { return ''; } - return self::getSearchTextForItem($post); + $mediatype = self::getMediaType($uri_id); + return self::getSearchTextForItem($post, $mediatype); } - private static function getSearchTextForItem(array $item): string + private static function getSearchTextForItem(array $item, int $mediatype): string { $receivers = array_column(Tag::getByURIId($item['uri-id'], [Tag::MENTION, Tag::IMPLICIT_MENTION, Tag::EXCLUSIVE_MENTION, Tag::AUDIENCE]), 'url'); $tags = array_column(Tag::getByURIId($item['uri-id'], [Tag::HASHTAG]), 'name'); - return self::getSearchText($item, $receivers, $tags); + return self::getSearchText($item, $receivers, $tags, $mediatype); } - private static function getSearchText(array $item, array $receivers, array $tags): string + private static function getSearchText(array $item, array $receivers, array $tags, int $mediatype): string { $body = '[nosmile]network_' . $item['network']; @@ -286,6 +289,18 @@ class Engagement $body .= ' language_' . array_key_first($languages); } + if ($mediatype & 1) { + $body .= ' media_image'; + } + + if ($mediatype & 2) { + $body .= ' media_video'; + } + + if ($mediatype & 4) { + $body .= ' media_audio'; + } + $body .= ' ' . $item['title'] . ' ' . $item['content-warning'] . ' ' . $item['body']; return BBCode::toSearchText($body, $item['uri-id']); From 885b3a12b9b021602dd3c9ff763c040ea23c17ae Mon Sep 17 00:00:00 2001 From: Michael Date: Fri, 2 Feb 2024 16:05:24 +0000 Subject: [PATCH 4/4] Search for tags and media in full text when doing full text searches --- src/Module/Conversation/Timeline.php | 110 ++++++++++++++++++++++----- 1 file changed, 92 insertions(+), 18 deletions(-) diff --git a/src/Module/Conversation/Timeline.php b/src/Module/Conversation/Timeline.php index eb14b2ffc4..a8db2d85df 100644 --- a/src/Module/Conversation/Timeline.php +++ b/src/Module/Conversation/Timeline.php @@ -412,19 +412,50 @@ class Timeline extends BaseModule } if (!empty($channel->fullTextSearch)) { - $condition = DBA::mergeConditions($condition, ["MATCH (`searchtext`) AGAINST (? IN BOOLEAN MODE)", Engagement::escapeKeywords($channel->fullTextSearch)]); - } + if (!empty($channel->includeTags)) { + $additional = self:: addIncludeTags($channel->includeTags); + } else { + $additional = ''; + } - if (!empty($channel->includeTags)) { - $search = explode(',', mb_strtolower($channel->includeTags)); - $placeholders = substr(str_repeat("?, ", count($search)), 0, -2); - $condition = DBA::mergeConditions($condition, array_merge(["`uri-id` IN (SELECT `uri-id` FROM `post-tag` INNER JOIN `tag` ON `tag`.`id` = `post-tag`.`tid` WHERE `post-tag`.`type` = 1 AND `name` IN (" . $placeholders . "))"], $search)); - } + if (!empty($channel->excludeTags)) { + foreach (explode(',', mb_strtolower($channel->excludeTags)) as $tag) { + $additional .= ' -tag:' . $tag; + } + } - if (!empty($channel->excludeTags)) { - $search = explode(',', mb_strtolower($channel->excludeTags)); - $placeholders = substr(str_repeat("?, ", count($search)), 0, -2); - $condition = DBA::mergeConditions($condition, array_merge(["NOT `uri-id` IN (SELECT `uri-id` FROM `post-tag` INNER JOIN `tag` ON `tag`.`id` = `post-tag`.`tid` WHERE `post-tag`.`type` = 1 AND `name` IN (" . $placeholders . "))"], $search)); + if (!empty($channel->mediaType)) { + $additional .= self::addMediaTerms($channel->mediaType); + } + + $additional .= self::addLanguageSearchTerms($uid, $channel->languages); + + if ($additional) { + $searchterms = '+(' . trim($channel->fullTextSearch) . ')' . $additional; + } else { + $searchterms = $channel->fullTextSearch; + } + + $condition = DBA::mergeConditions($condition, ["MATCH (`searchtext`) AGAINST (? IN BOOLEAN MODE)", Engagement::escapeKeywords($searchterms)]); + } else { + if (!empty($channel->includeTags)) { + $search = explode(',', mb_strtolower($channel->includeTags)); + $placeholders = substr(str_repeat("?, ", count($search)), 0, -2); + $condition = DBA::mergeConditions($condition, array_merge(["`uri-id` IN (SELECT `uri-id` FROM `post-tag` INNER JOIN `tag` ON `tag`.`id` = `post-tag`.`tid` WHERE `post-tag`.`type` = 1 AND `name` IN (" . $placeholders . "))"], $search)); + } + + if (!empty($channel->excludeTags)) { + $search = explode(',', mb_strtolower($channel->excludeTags)); + $placeholders = substr(str_repeat("?, ", count($search)), 0, -2); + $condition = DBA::mergeConditions($condition, array_merge(["NOT `uri-id` IN (SELECT `uri-id` FROM `post-tag` INNER JOIN `tag` ON `tag`.`id` = `post-tag`.`tid` WHERE `post-tag`.`type` = 1 AND `name` IN (" . $placeholders . "))"], $search)); + } + + if (!empty($channel->mediaType)) { + $condition = DBA::mergeConditions($condition, ["`media-type` & ?", $channel->mediaType]); + } + + // For "addLanguageCondition" to work, the condition must not be empty + $condition = $this->addLanguageCondition($uid, $condition ?: ["true"], $channel->languages); } if (!is_null($channel->minSize)) { @@ -435,24 +466,67 @@ class Timeline extends BaseModule $condition = DBA::mergeConditions($condition, ["`size` <= ?", $channel->maxSize]); } - if (!empty($channel->mediaType)) { - $condition = DBA::mergeConditions($condition, ["`media-type` & ?", $channel->mediaType]); + return $condition; + } + + private function addIncludeTags(string $includeTags): string + { + $tagterms = ''; + foreach (explode(',', mb_strtolower($includeTags)) as $tag) { + $tagterms .= ' tag:' . $tag; } - // For "addLanguageCondition" to work, the condition must not be empty - $condition = $this->addLanguageCondition($uid, $condition ?: ["true"], $channel->languages); + if ($tagterms) { + return ' +(' . trim($tagterms) . ')'; + } else { + return ''; + } + } - return $condition; + private function addMediaTerms(int $mediaType): string + { + $mediaterms = ''; + if ($mediaType & 1) { + $mediaterms .= ' media:image'; + } + + if ($mediaType & 2) { + $mediaterms .= ' media:video'; + } + + if ($mediaType & 4) { + $mediaterms .= ' media:audio'; + } + + if ($mediaterms) { + return ' +(' . trim($mediaterms) . ')'; + } else { + return ''; + } + } + + private function addLanguageSearchTerms(int $uid, $languages = null): string + { + $langterms = ''; + foreach ($languages ?: User::getWantedLanguages($uid) as $language) { + $langterms .= ' language:' . $language; + } + + if ($langterms) { + return ' +(' . trim($langterms) . ')'; + } else { + return ''; + } } private function addLanguageCondition(int $uid, array $condition, $languages = null): array { $conditions = []; - $languages = $languages ?: User::getWantedLanguages($uid); - foreach ($languages as $language) { + foreach ($languages ?: User::getWantedLanguages($uid) as $language) { $conditions[] = "`language` = ?"; $condition[] = $language; } + if (!empty($conditions)) { $condition[0] .= " AND (" . implode(' OR ', $conditions) . ")"; }