. * */ namespace Friendica\Model; use Friendica\Content\Text\BBCode; use Friendica\Core\Cache\Enum\Duration; use Friendica\Core\Logger; use Friendica\Core\Protocol; use Friendica\Core\System; use Friendica\Database\Database; use Friendica\Database\DBA; use Friendica\DI; use Friendica\Protocol\ActivityPub; use Friendica\Util\DateTimeFormat; use Friendica\Util\Strings; /** * Class Tag * * This Model class handles tag table interactions. * This tables stores relevant tags related to posts, like hashtags and mentions. */ class Tag { const UNKNOWN = 0; const HASHTAG = 1; const MENTION = 2; /** * An implicit mention is a mention in a comment body that is redundant with the threading information. */ const IMPLICIT_MENTION = 8; /** * An exclusive mention transmits the post only to the target account without transmitting it to the followers, usually a forum. */ const EXCLUSIVE_MENTION = 9; const TO = 10; const CC = 11; const BTO = 12; const BCC = 13; const ACCOUNT = 1; const GENERAL_COLLECTION = 2; const FOLLOWER_COLLECTION = 3; const PUBLIC_COLLECTION = 4; const TAG_CHARACTER = [ self::HASHTAG => '#', self::MENTION => '@', self::EXCLUSIVE_MENTION => '!', self::IMPLICIT_MENTION => '%', ]; /** * Store tag/mention elements * * @param integer $uriid URI id * @param integer $type Type * @param string $name Name * @param string $url URL * @param integer $target Target (default: null) * @return void */ public static function store(int $uriid, int $type, string $name, string $url = '', int $target = null) { if ($type == self::HASHTAG) { // Trim Unicode non-word characters $name = preg_replace('/(^\W+)|(\W+$)/us', '', $name); $tags = explode(self::TAG_CHARACTER[self::HASHTAG], $name); if (count($tags) > 1) { foreach ($tags as $tag) { self::store($uriid, $type, $tag, $url); } return; } } if (empty($name)) { return; } $cid = 0; $tagid = 0; if (in_array($type, [self::MENTION, self::EXCLUSIVE_MENTION, self::IMPLICIT_MENTION, self::TO, self::CC, self::BTO, self::BCC])) { if (empty($url)) { // No mention without a contact url return; } if ((substr($url, 0, 7) == 'https//') || (substr($url, 0, 6) == 'http//')) { Logger::notice('Wrong scheme in url', ['url' => $url, 'callstack' => System::callstack(20)]); } $cid = Contact::getIdForURL($url, 0, false); Logger::debug('Got id for contact', ['cid' => $cid, 'url' => $url]); if (empty($cid)) { $tag = DBA::selectFirst('tag', ['name', 'type'], ['url' => $url]); if (!empty($tag)) { if ($tag['name'] != substr($name, 0, 96)) { DBA::update('tag', ['name' => substr($name, 0, 96)], ['url' => $url]); } if (!empty($target) && ($tag['type'] != $target)) { DBA::update('tag', ['type' => $target], ['url' => $url]); } } } } if (empty($cid)) { if (!in_array($type, [self::TO, self::CC, self::BTO, self::BCC])) { if (($type != self::HASHTAG) && !empty($url) && ($url != $name)) { $url = strtolower($url); } else { $url = ''; } } $tagid = self::getID($name, $url, $target); if (empty($tagid)) { return; } } $fields = ['uri-id' => $uriid, 'type' => $type, 'tid' => $tagid, 'cid' => $cid]; if (in_array($type, [self::MENTION, self::EXCLUSIVE_MENTION, self::IMPLICIT_MENTION])) { $condition = $fields; $condition['type'] = [self::MENTION, self::EXCLUSIVE_MENTION, self::IMPLICIT_MENTION]; if (DBA::exists('post-tag', $condition)) { Logger::info('Tag already exists', $fields); return; } } DBA::insert('post-tag', $fields, Database::INSERT_IGNORE); Logger::info('Stored tag/mention', ['uri-id' => $uriid, 'tag-id' => $tagid, 'contact-id' => $cid, 'name' => $name, 'type' => $type, 'callstack' => System::callstack(8)]); } /** * Fetch the target type for the given url * * @param string $url * @param bool $fetch Fetch information via network operations * @return null|int */ public static function getTargetType(string $url, bool $fetch = true) { $target = null; if (empty($url)) { return $target; } $tag = DBA::selectFirst('tag', ['url', 'type'], ['url' => $url]); if (!empty($tag['type'])) { $target = $tag['type']; if ($target != self::GENERAL_COLLECTION) { Logger::debug('Found existing type', ['type' => $tag['type'], 'url' => $url]); return $target; } } if ($url == ActivityPub::PUBLIC_COLLECTION) { $target = self::PUBLIC_COLLECTION; Logger::debug('Public collection', ['url' => $url]); } else { if (DBA::exists('apcontact', ['followers' => $url])) { $target = self::FOLLOWER_COLLECTION; Logger::debug('Found collection via existing apcontact', ['url' => $url]); } elseif (Contact::getIdForURL($url, 0, $fetch ? null : false)) { $target = self::ACCOUNT; Logger::debug('URL is an account', ['url' => $url]); } elseif ($fetch && ($target != self::GENERAL_COLLECTION)) { $content = ActivityPub::fetchContent($url); if (!empty($content['type']) && ($content['type'] == 'OrderedCollection')) { $target = self::GENERAL_COLLECTION; Logger::debug('URL is an ordered collection', ['url' => $url]); } } } if (!empty($target) && !empty($tag['url']) && ($tag['type'] != $target)) { DBA::update('tag', ['type' => $target], ['url' => $url]); } if (empty($target)) { Logger::debug('No type could be detected', ['url' => $url]); } return $target; } /** * Get a tag id for a given tag name and url * * @param string $name * @param string $url * @param int $type * @return void */ public static function getID(string $name, string $url = '', int $type = null) { $fields = ['name' => substr($name, 0, 96), 'url' => $url]; $tag = DBA::selectFirst('tag', ['id', 'type'], $fields); if (DBA::isResult($tag)) { if (empty($tag['type']) && !empty($type)) { DBA::update('tag', ['type' => $type], $fields); } return $tag['id']; } if (!empty($type)) { $fields['type'] = $type; } DBA::insert('tag', $fields, Database::INSERT_IGNORE); $tid = DBA::lastInsertId(); if (!empty($tid)) { return $tid; } Logger::error('No tag id created', $fields); return 0; } /** * Store tag/mention elements * * @param integer $uriid URI id * @param string $hash Hash * @param string $name Name * @param string $url URL * @param boolean $probing Whether probing is active * @return void */ public static function storeByHash(int $uriid, string $hash, string $name, string $url = '', bool $probing = true) { $type = self::getTypeForHash($hash); if ($type == self::UNKNOWN) { return; } self::store($uriid, $type, $name, $url, $probing); } /** * Get tags and mentions from the body * * @param string $body Body of the post * @param string $tags Accepted tags * * @return array Tag list */ public static function getFromBody(string $body, string $tags = null) { if (is_null($tags)) { $tags = self::TAG_CHARACTER[self::HASHTAG] . self::TAG_CHARACTER[self::MENTION] . self::TAG_CHARACTER[self::EXCLUSIVE_MENTION]; } if (!preg_match_all("/([" . $tags . "])\[url\=([^\[\]]*)\]([^\[\]]*)\[\/url\]/ism", $body, $result, PREG_SET_ORDER)) { return []; } return $result; } /** * Store tags and mentions from the body * * @param integer $uriid URI-Id * @param string $body Body of the post * @param string $tags Accepted tags * @param boolean $probing Perform a probing for contacts, adding them if needed * @return void */ public static function storeFromBody(int $uriid, string $body, string $tags = null, bool $probing = true) { Logger::info('Check for tags', ['uri-id' => $uriid, 'hash' => $tags, 'callstack' => System::callstack()]); if (is_null($tags)) { $tags = self::TAG_CHARACTER[self::HASHTAG] . self::TAG_CHARACTER[self::MENTION] . self::TAG_CHARACTER[self::EXCLUSIVE_MENTION]; } // Only remove the shared data from "real" reshares $shared = BBCode::fetchShareAttributes($body); if (!empty($shared['guid'])) { if (preg_match("/\s*\[share .*?\](.*?)\[\/share\]\s*/ism", $body, $matches)) { $share_body = $matches[1]; } $body = preg_replace("/\s*\[share .*?\].*?\[\/share\]\s*/ism", '', $body); } foreach (self::getFromBody($body, $tags) as $tag) { self::storeByHash($uriid, $tag[1], $tag[3], $tag[2], $probing); } // Search for hashtags in the shared body (but only if hashtags are wanted) if (!empty($share_body) && (strpos($tags, self::TAG_CHARACTER[self::HASHTAG]) !== false)) { foreach (self::getFromBody($share_body, self::TAG_CHARACTER[self::HASHTAG]) as $tag) { self::storeByHash($uriid, $tag[1], $tag[3], $tag[2], $probing); } } } /** * Store raw tags (not encapsulated in links) from the body * This function is needed in the intermediate phase. * Later we can call item::setHashtags in advance to have all tags converted. * * @param integer $uriid URI-Id * @param string $body Body of the post * @return void */ public static function storeRawTagsFromBody(int $uriid, string $body) { Logger::info('Check for tags', ['uri-id' => $uriid, 'callstack' => System::callstack()]); $result = BBCode::getTags($body); if (empty($result)) { return; } Logger::info('Found tags', ['uri-id' => $uriid, 'result' => $result]); foreach ($result as $tag) { if (substr($tag, 0, 1) != self::TAG_CHARACTER[self::HASHTAG]) { continue; } self::storeByHash($uriid, substr($tag, 0, 1), substr($tag, 1)); } } /** * Checks for stored hashtags and mentions for the given post * * @param integer $uriid * @return bool */ public static function existsForPost(int $uriid) { return DBA::exists('post-tag', ['uri-id' => $uriid, 'type' => [self::HASHTAG, self::MENTION, self::EXCLUSIVE_MENTION, self::IMPLICIT_MENTION]]); } /** * Remove tag/mention * * @param integer $uriid URI id * @param integer $type Type * @param string $name Name * @param string $url URL * @return void */ public static function remove(int $uriid, int $type, string $name, string $url = '') { $condition = ['uri-id' => $uriid, 'type' => $type, 'url' => $url]; if ($type == self::HASHTAG) { $condition['name'] = $name; } $tag = DBA::selectFirst('tag-view', ['tid', 'cid'], $condition); if (!DBA::isResult($tag)) { return; } Logger::info('Removing tag/mention', ['uri-id' => $uriid, 'tid' => $tag['tid'], 'name' => $name, 'url' => $url, 'callstack' => System::callstack(8)]); DBA::delete('post-tag', ['uri-id' => $uriid, 'type' => $type, 'tid' => $tag['tid'], 'cid' => $tag['cid']]); } /** * Remove tag/mention * * @param integer $uriid * @param string $hash * @param string $name * @param string $url */ public static function removeByHash(int $uriid, string $hash, string $name, string $url = '') { $type = self::getTypeForHash($hash); if ($type == self::UNKNOWN) { return; } self::remove($uriid, $type, $name, $url); } /** * Get the type for the given hash * * @param string $hash * @return integer type */ private static function getTypeForHash(string $hash) { if ($hash == self::TAG_CHARACTER[self::MENTION]) { return self::MENTION; } elseif ($hash == self::TAG_CHARACTER[self::EXCLUSIVE_MENTION]) { return self::EXCLUSIVE_MENTION; } elseif ($hash == self::TAG_CHARACTER[self::IMPLICIT_MENTION]) { return self::IMPLICIT_MENTION; } elseif ($hash == self::TAG_CHARACTER[self::HASHTAG]) { return self::HASHTAG; } else { return self::UNKNOWN; } } /** * Create implicit mentions for a given post * * @param integer $uri_id * @param integer $parent_uri_id */ public static function createImplicitMentions(int $uri_id, int $parent_uri_id) { // Always mention the direct parent author $parent = Post::selectFirst(['author-link', 'author-name'], ['uri-id' => $parent_uri_id]); self::store($uri_id, self::IMPLICIT_MENTION, $parent['author-name'], $parent['author-link']); if (DI::config()->get('system', 'disable_implicit_mentions')) { return; } $tags = DBA::select('tag-view', ['name', 'url'], ['uri-id' => $parent_uri_id, 'type' => [self::MENTION, self::EXCLUSIVE_MENTION, self::IMPLICIT_MENTION]]); while ($tag = DBA::fetch($tags)) { self::store($uri_id, self::IMPLICIT_MENTION, $tag['name'], $tag['url']); } DBA::close($tags); } /** * Retrieves the terms from the provided type(s) associated with the provided item ID. * * @param int $item_id * @param int|array $type * @return array * @throws \Exception */ public static function getByURIId(int $uri_id, array $type = [self::HASHTAG, self::MENTION, self::EXCLUSIVE_MENTION, self::IMPLICIT_MENTION]) { $condition = ['uri-id' => $uri_id, 'type' => $type]; return DBA::selectToArray('tag-view', ['type', 'name', 'url', 'tag-type'], $condition); } /** * Return a string with all tags and mentions * * @param integer $uri_id * @param array $type * @return string tags and mentions * @throws \Exception */ public static function getCSVByURIId(int $uri_id, array $type = [self::HASHTAG, self::MENTION, self::EXCLUSIVE_MENTION, self::IMPLICIT_MENTION]) { $tag_list = []; $tags = self::getByURIId($uri_id, $type); foreach ($tags as $tag) { $tag_list[] = self::TAG_CHARACTER[$tag['type']] . '[url=' . $tag['url'] . ']' . $tag['name'] . '[/url]'; } return implode(',', $tag_list); } /** * Sorts an item's tags into mentions, hashtags and other tags. Generate personalized URLs by user and modify the * provided item's body with them. * * @param array $item * @return array * @throws \Friendica\Network\HTTPException\InternalServerErrorException * @throws \ImagickException */ public static function populateFromItem(&$item) { $return = [ 'tags' => [], 'hashtags' => [], 'mentions' => [], 'implicit_mentions' => [], ]; $searchpath = DI::baseUrl() . "/search?tag="; $taglist = DBA::select('tag-view', ['type', 'name', 'url', 'cid'], ['uri-id' => $item['uri-id'], 'type' => [self::HASHTAG, self::MENTION, self::EXCLUSIVE_MENTION, self::IMPLICIT_MENTION]]); while ($tag = DBA::fetch($taglist)) { if ($tag['url'] == '') { $tag['url'] = $searchpath . rawurlencode($tag['name']); } $orig_tag = $tag['url']; $prefix = self::TAG_CHARACTER[$tag['type']]; switch($tag['type']) { case self::HASHTAG: if ($orig_tag != $tag['url']) { $item['body'] = str_replace($orig_tag, $tag['url'], $item['body']); } $return['hashtags'][] = '' . $prefix . '' . htmlspecialchars($tag['name']) . ''; $return['tags'][] = '' . $prefix . '' . htmlspecialchars($tag['name']) . ''; break; case self::MENTION: case self::EXCLUSIVE_MENTION: if (!empty($tag['cid'])) { $tag['url'] = Contact::magicLinkById($tag['cid']); } else { $tag['url'] = Contact::magicLink($tag['url']); } $return['mentions'][] = '' . $prefix . '' . htmlspecialchars($tag['name']) . ''; $return['tags'][] = '' . $prefix . '' . htmlspecialchars($tag['name']) . ''; break; case self::IMPLICIT_MENTION: $return['implicit_mentions'][] = $prefix . $tag['name']; break; } } DBA::close($taglist); return $return; } /** * Counts posts for given tag * * @param string $search * @param integer $uid * @return integer number of posts */ public static function countByTag(string $search, int $uid = 0) { $condition = ["`name` = ? AND (`uid` = ? OR (`uid` = ? AND NOT `global`)) AND (`network` IN (?, ?, ?, ?) OR (`uid` = ? AND `uid` != ?))", $search, 0, $uid, Protocol::ACTIVITYPUB, Protocol::DFRN, Protocol::DIASPORA, Protocol::OSTATUS, $uid, 0]; return DBA::count('tag-search-view', $condition); } /** * Search posts for given tag * * @param string $search * @param integer $uid * @param integer $start * @param integer $limit * @param integer $last_uriid * @return array with URI-ID */ public static function getURIIdListByTag(string $search, int $uid = 0, int $start = 0, int $limit = 100, int $last_uriid = 0) { $condition = ["`name` = ? AND (`uid` = ? OR (`uid` = ? AND NOT `global`)) AND (`network` IN (?, ?, ?, ?) OR (`uid` = ? AND `uid` != ?))", $search, 0, $uid, Protocol::ACTIVITYPUB, Protocol::DFRN, Protocol::DIASPORA, Protocol::OSTATUS, $uid, 0]; if (!empty($last_uriid)) { $condition = DBA::mergeConditions($condition, ["`uri-id` < ?", $last_uriid]); } $params = [ 'order' => ['uri-id' => true], 'limit' => [$start, $limit] ]; $tags = DBA::select('tag-search-view', ['uri-id'], $condition, $params); $uriids = []; while ($tag = DBA::fetch($tags)) { $uriids[] = $tag['uri-id']; } DBA::close($tags); return $uriids; } /** * Returns a list of the most frequent global hashtags over the given period * * @param int $period Period in hours to consider posts * @param int $limit Number of returned tags * @return array * @throws \Exception */ public static function getGlobalTrendingHashtags(int $period, $limit = 10) { $tags = DI::cache()->get('global_trending_tags-' . $period . '-' . $limit); if (!empty($tags)) { return $tags; } else { return self::setGlobalTrendingHashtags($period, $limit); } } /** * Fetch the blocked tags as SQL * * @return string */ private static function getBlockedSQL() { $blocked_txt = DI::config()->get('system', 'blocked_tags'); if (empty($blocked_txt)) { return ''; } $blocked = explode(',', $blocked_txt); array_walk($blocked, function(&$value) { $value = "'" . DBA::escape(trim($value)) . "'";}); return " AND NOT `name` IN (" . implode(',', $blocked) . ")"; } /** * Creates a list of the most frequent global hashtags over the given period * * @param int $period Period in hours to consider posts * @param int $limit Number of returned tags * @return array * @throws \Exception */ public static function setGlobalTrendingHashtags(int $period, int $limit = 10) { // Get a uri-id that is at least X hours old. // We use the uri-id in the query for the hash tags since this is much faster $post = Post::selectFirstThread(['uri-id'], ["`uid` = ? AND `received` < ?", 0, DateTimeFormat::utc('now - ' . $period . ' hour')], ['order' => ['received' => true]]); if (empty($post['uri-id'])) { return []; } $block_sql = self::getBlockedSQL(); $tagsStmt = DBA::p("SELECT `name` AS `term`, COUNT(*) AS `score`, COUNT(DISTINCT(`author-id`)) as `authors` FROM `tag-search-view` WHERE `private` = ? AND `uid` = ? AND `uri-id` > ? $block_sql GROUP BY `term` ORDER BY `authors` DESC, `score` DESC LIMIT ?", Item::PUBLIC, 0, $post['uri-id'], $limit); if (DBA::isResult($tagsStmt)) { $tags = DBA::toArray($tagsStmt); DI::cache()->set('global_trending_tags-' . $period . '-' . $limit, $tags, Duration::DAY); return $tags; } return []; } /** * Returns a list of the most frequent local hashtags over the given period * * @param int $period Period in hours to consider posts * @param int $limit Number of returned tags * @return array * @throws \Exception */ public static function getLocalTrendingHashtags(int $period, $limit = 10) { $tags = DI::cache()->get('local_trending_tags-' . $period . '-' . $limit); if (!empty($tags)) { return $tags; } else { return self::setLocalTrendingHashtags($period, $limit); } } /** * Returns a list of the most frequent local hashtags over the given period * * @param int $period Period in hours to consider posts * @param int $limit Number of returned tags * @return array * @throws \Exception */ public static function setLocalTrendingHashtags(int $period, int $limit = 10) { // Get a uri-id that is at least X hours old. // We use the uri-id in the query for the hash tags since this is much faster $post = Post::selectFirstThread(['uri-id'], ["`uid` = ? AND `received` < ?", 0, DateTimeFormat::utc('now - ' . $period . ' hour')], ['order' => ['received' => true]]); if (empty($post['uri-id'])) { return []; } $block_sql = self::getBlockedSQL(); $tagsStmt = DBA::p("SELECT `name` AS `term`, COUNT(*) AS `score`, COUNT(DISTINCT(`author-id`)) as `authors` FROM `tag-search-view` WHERE `private` = ? AND `wall` AND `origin` AND `uri-id` > ? $block_sql GROUP BY `term` ORDER BY `authors` DESC, `score` DESC LIMIT ?", Item::PUBLIC, $post['uri-id'], $limit); if (DBA::isResult($tagsStmt)) { $tags = DBA::toArray($tagsStmt); DI::cache()->set('local_trending_tags-' . $period . '-' . $limit, $tags, Duration::DAY); return $tags; } return []; } /** * Check if the provided tag is of one of the provided term types. * * @param string $tag * @param int ...$types * @return bool */ public static function isType($tag, ...$types) { $tag_chars = []; foreach ($types as $type) { if (array_key_exists($type, self::TAG_CHARACTER)) { $tag_chars[] = self::TAG_CHARACTER[$type]; } } return Strings::startsWithChars($tag, $tag_chars); } /** * Fetch user who subscribed to the given tag * * @param string $tag * @return array User list */ private static function getUIDListByTag(string $tag) { $uids = []; $searches = DBA::select('search', ['uid'], ['term' => $tag]); while ($search = DBA::fetch($searches)) { $uids[] = $search['uid']; } DBA::close($searches); return $uids; } /** * Fetch user who subscribed to the tags of the given item * * @param integer $uri_id * @return array User list */ public static function getUIDListByURIId(int $uri_id) { $uids = []; $tags = self::getByURIId($uri_id, [self::HASHTAG]); foreach ($tags as $tag) { $uids = array_merge($uids, self::getUIDListByTag(self::TAG_CHARACTER[self::HASHTAG] . $tag['name'])); } return array_unique($uids); } }