Use full text search

This commit is contained in:
Michael
2023-09-21 23:27:05 +00:00
parent fac76a33df
commit 508d84b2b7
10 changed files with 173 additions and 121 deletions

View File

@@ -230,6 +230,7 @@ class BBCode
{
DI::profiler()->startRecording('rendering');
// Remove pictures in advance to avoid unneeded proxy calls
$text = preg_replace("/\[img\=([0-9]*)x([0-9]*)\](.*?)\[\/img\]/ism", ' ', $text);
$text = preg_replace("/\[img\=(.*?)\](.*?)\[\/img\]/ism", ' $2 ', $text);
$text = preg_replace("/\[img.*?\[\/img\]/ism", ' ', $text);

View File

@@ -21,6 +21,7 @@
namespace Friendica\Model\Post;
use Friendica\Content\Text\BBCode;
use Friendica\Core\Logger;
use Friendica\Core\Protocol;
use Friendica\Database\Database;
@@ -52,7 +53,7 @@ class Engagement
return;
}
$parent = Post::selectFirst(['created', 'owner-id', 'uid', 'private', 'contact-contact-type', 'language'], ['uri-id' => $item['parent-uri-id']]);
$parent = Post::selectFirst(['uri-id', 'created', 'owner-id', 'uid', 'private', 'contact-contact-type', 'language', 'title', 'content-warning', 'body'], ['uri-id' => $item['parent-uri-id']]);
if ($parent['created'] < DateTimeFormat::utc('now - ' . DI::config()->get('channel', 'engagement_hours') . ' hour')) {
Logger::debug('Post is too old', ['uri-id' => $item['uri-id'], 'parent-uri-id' => $item['parent-uri-id'], 'created' => $parent['created']]);
@@ -87,6 +88,7 @@ class Engagement
'contact-type' => $parent['contact-contact-type'],
'media-type' => $mediatype,
'language' => $parent['language'],
'searchtext' => self::getSearchText($parent),
'created' => $parent['created'],
'restricted' => !in_array($item['network'], Protocol::FEDERATED) || ($parent['private'] != Item::PUBLIC),
'comments' => DBA::count('post', ['parent-uri-id' => $item['parent-uri-id'], 'gravity' => Item::GRAVITY_COMMENT]),
@@ -104,6 +106,20 @@ class Engagement
Logger::debug('Engagement stored', ['fields' => $engagement, 'ret' => $ret]);
}
private static function getSearchText(array $item): string
{
$body = $item['title'] . "\n" . $item['content-warning'] . "\n" . $item['body'] . "\n";
$body = Post\Media::addAttachmentsToBody($item['uri-id'], $body);
$text = BBCode::toPlaintext($body, false);
do {
$oldtext = $text;
$text = str_replace([' ', "\n", "\r"], ' ', $text);
} while ($oldtext != $text);
return $text;
}
private static function getMediaType(int $uri_id): int
{
$media = Post\Media::getByURIId($uri_id);

View File

@@ -376,8 +376,7 @@ class Timeline extends BaseModule
$condition = [];
if (!empty($channel->fullTextSearch)) {
$first = $this->database->selectFirst('post-engagement', ['uri-id']);
$condition = DBA::mergeConditions($condition, ["`uri-id` IN (SELECT `uri-id` FROM `post-content` WHERE `uri-id` >= ? AND MATCH (`title`, `content-warning`, `body`) AGAINST (? IN BOOLEAN MODE))", $first['uri-id'], $channel->fullTextSearch]);
$condition = DBA::mergeConditions($condition, ["MATCH (`searchtext`) AGAINST (? IN BOOLEAN MODE)", $channel->fullTextSearch]);
}
if (!empty($channel->includeTags)) {

View File

@@ -69,7 +69,7 @@ class Channels extends BaseSettings
'uid' => $uid,
'include-tags' => $this->cleanTags($request['new_include_tags']),
'exclude-tags' => $this->cleanTags($request['new_exclude_tags']),
'full-text-search' => null, // Currently not supported for performance reasons
'full-text-search' => $this->cleanTags($request['new_text_search']),
'media-type' => ($request['new_image'] ? 1 : 0) | ($request['new_video'] ? 2 : 0) | ($request['new_audio'] ? 4 : 0),
]);
$saved = $this->channel->save($channel);
@@ -92,7 +92,7 @@ class Channels extends BaseSettings
'uid' => $uid,
'include-tags' => $this->cleanTags($request['include_tags'][$id]),
'exclude-tags' => $this->cleanTags($request['exclude_tags'][$id]),
'full-text-search' => null, // Currently not supported for performance reasons
'full-text-search' => $this->cleanTags($request['text_search'][$id]),
'media-type' => ($request['image'][$id] ? 1 : 0) | ($request['video'][$id] ? 2 : 0) | ($request['audio'][$id] ? 4 : 0),
]);
$saved = $this->channel->save($channel);
@@ -119,6 +119,7 @@ class Channels extends BaseSettings
'access_key' => ["access_key[$channel->code]", $this->t("Access Key"), $channel->accessKey],
'include_tags' => ["include_tags[$channel->code]", $this->t("Include Tags"), $channel->includeTags],
'exclude_tags' => ["exclude_tags[$channel->code]", $this->t("Exclude Tags"), $channel->excludeTags],
'text_search' => ["text_search[$channel->code]", $this->t("Full Text Search"), $channel->fullTextSearch],
'image' => ["image[$channel->code]", $this->t("Images"), $channel->mediaType & 1],
'video' => ["video[$channel->code]", $this->t("Videos"), $channel->mediaType & 2],
'audio' => ["audio[$channel->code]", $this->t("Audio"), $channel->mediaType & 4],
@@ -133,6 +134,7 @@ class Channels extends BaseSettings
'access_key' => ["new_access_key", $this->t("Access Key"), '', $this->t('When you want to access this channel via an access key, you can define it here. Pay attentioon to not use an already used one.')],
'include_tags' => ["new_include_tags", $this->t("Include Tags"), '', $this->t('Comma separated list of tags. A post will be used when it contains any of the listed tags.')],
'exclude_tags' => ["new_exclude_tags", $this->t("Exclude Tags"), '', $this->t('Comma separated list of tags. If a post contain any of these tags, then it will not be part of nthis channel.')],
'text_search' => ["new_text_search", $this->t("Full Text Search"), '', $this->t('Search terms for the body.')],
'image' => ['new_image', $this->t("Images"), false, $this->t("Check to display images in the channel.")],
'video' => ["new_video", $this->t("Videos"), false, $this->t("Check to display videos in the channel.")],
'audio' => ["new_audio", $this->t("Audio"), false, $this->t("Check to display audio in the channel.")],