Require whitespace around smilies and normalize federating text
This commit is contained in:
@@ -21,6 +21,7 @@
|
||||
|
||||
namespace Friendica\Content;
|
||||
|
||||
use Friendica\Content\Text\BBCode;
|
||||
use Friendica\Core\Hook;
|
||||
use Friendica\DI;
|
||||
use Friendica\Util\Strings;
|
||||
@@ -67,7 +68,7 @@ class Smilies
|
||||
*/
|
||||
public static function getList(): array
|
||||
{
|
||||
$texts = [
|
||||
$texts = [
|
||||
'<3',
|
||||
'</3',
|
||||
'<\\3',
|
||||
@@ -153,34 +154,129 @@ class Smilies
|
||||
}
|
||||
|
||||
/**
|
||||
* Finds all used smilies (like :heart: or :p) in the provided text.
|
||||
* Normalizes smiley shortcodes into texts with no special symbols.
|
||||
*
|
||||
* @param string $text that might contain smilie usages (denoted by a starting colon)
|
||||
* @param bool $extract_url whether to further extract image urls
|
||||
* @return array with smilie codes (colon included) as the keys, the smilie images as values
|
||||
* @return array
|
||||
* 'texts' => smilie shortcut
|
||||
* 'icons' => icon url or an empty string
|
||||
* 'norms' => normalized shortcut
|
||||
*/
|
||||
public static function extractUsedSmilies(string $text, bool $extract_url = false): array
|
||||
public static function getNormalizedList(): array
|
||||
{
|
||||
$smilies = self::getList();
|
||||
$norms = [];
|
||||
$icons = $smilies['icons'];
|
||||
foreach ($smilies['texts'] as $i => $shortcode) {
|
||||
// Extract urls
|
||||
$icon = $icons[$i];
|
||||
if (preg_match('/src="(.+?)"/', $icon, $match)) {
|
||||
$icon = $match[1];
|
||||
} else {
|
||||
$icon = '';
|
||||
}
|
||||
$icons[$i] = $icon;
|
||||
|
||||
// Normalize name
|
||||
$norm = preg_replace('/[\s\-:#~]/', '', $shortcode);
|
||||
if (ctype_alnum($norm)) {
|
||||
$norms[] = $norm;
|
||||
} elseif (preg_match('#/smiley-(\w+)\.gif#', $icon, $match)) {
|
||||
$norms[] = $match[1];
|
||||
} else {
|
||||
$norms[] = 'smiley' . $i;
|
||||
}
|
||||
}
|
||||
$smilies['norms'] = $norms;
|
||||
return $smilies;
|
||||
}
|
||||
|
||||
/**
|
||||
* Finds all used smilies (denoted by quoting colons like :heart:) in the provided text and normalizes their usages.
|
||||
*
|
||||
* @param string $text that might contain smiley usages
|
||||
* @return array with smilie codes (colon included) as the keys, their image urls as values;
|
||||
* the normalized string is put under the '' (empty string) key
|
||||
*/
|
||||
public static function extractUsedSmilies(string $text): array
|
||||
{
|
||||
$emojis = [];
|
||||
|
||||
$smilies = self::getList();
|
||||
$icons = $smilies['icons'];
|
||||
foreach ($smilies['texts'] as $i => $name) {
|
||||
if (strstr($text, $name)) {
|
||||
$image = $icons[$i];
|
||||
if ($extract_url) {
|
||||
if (preg_match('/src="(.+?)"/', $image, $match)) {
|
||||
$image = $match[1];
|
||||
} else {
|
||||
continue;
|
||||
}
|
||||
$emojis[''] = BBCode::performWithEscapedTags($text, ['code'], function ($text) use (&$emojis) {
|
||||
return BBCode::performWithEscapedTags($text, ['noparse', 'nobb', 'pre'], function ($text) use (&$emojis) {
|
||||
if (strpos($text, '[nosmile]') !== false || self::noSmilies()) {
|
||||
return $text;
|
||||
}
|
||||
$emojis[$name] = $image;
|
||||
}
|
||||
}
|
||||
$smilies = self::getNormalizedList();
|
||||
$normalized = array_combine($smilies['texts'], $smilies['norms']);
|
||||
return self::performForEachWordMatch(
|
||||
array_combine($smilies['texts'], $smilies['icons']),
|
||||
$text,
|
||||
function (string $name, string $image) use($normalized, &$emojis) {
|
||||
$name = $normalized[$name];
|
||||
if (preg_match('/src="(.+?)"/', $image, $match)) {
|
||||
$image = $match[1];
|
||||
$emojis[$name] = $image;
|
||||
}
|
||||
return ':' . $name . ':';
|
||||
},
|
||||
);
|
||||
});
|
||||
});
|
||||
|
||||
return $emojis;
|
||||
}
|
||||
|
||||
/**
|
||||
* Similar to strtr but matches only whole words and replaces texts with $callback.
|
||||
*
|
||||
* @param array $words
|
||||
* @param string $subject
|
||||
* @param callable $callback ($offset, $value)
|
||||
* @return string
|
||||
*/
|
||||
private static function performForEachWordMatch(array $words, string $subject, callable $callback): string
|
||||
{
|
||||
$offset = 0;
|
||||
$result = '';
|
||||
$processed = 0;
|
||||
// Learned from PHP's strtr implementation
|
||||
// Should probably improve performance once JIT-compiled
|
||||
$length_bitset = 0;
|
||||
$ord_bitset = 0;
|
||||
foreach ($words as $word => $_) {
|
||||
$length = strlen($word);
|
||||
if ($length <= 31) {
|
||||
$length_bitset |= 1 << $length;
|
||||
}
|
||||
$ord = ord($word);
|
||||
$ord_bitset |= 1 << ($ord & 31);
|
||||
}
|
||||
|
||||
while ($offset < strlen($subject) && preg_match('/\s+?(?=\S|$)/', $subject, $matches, PREG_OFFSET_CAPTURE, $offset)) {
|
||||
[$whitespaces, $next] = $matches[0];
|
||||
$word = substr($subject, $offset, $next - $offset);
|
||||
|
||||
$shift = strlen($word);
|
||||
$ord = ord($word);
|
||||
if (($shift > 31 || ($length_bitset & (1 << $shift)))
|
||||
&& ($ord_bitset & (1 << ($ord & 31)))
|
||||
&& array_key_exists($word, $words)) {
|
||||
$result .= substr($subject, $processed, $offset - $processed);
|
||||
$result .= call_user_func($callback, $word, $words[$word]);
|
||||
$processed = $offset + strlen($word);
|
||||
}
|
||||
$offset = $next + strlen($whitespaces);
|
||||
}
|
||||
$word = substr($subject, $offset);
|
||||
if (array_key_exists($word, $words)) {
|
||||
$result .= substr($subject, $processed, $offset - $processed);
|
||||
$result .= call_user_func($callback, $word, $words[$word]);
|
||||
} else {
|
||||
$result .= substr($subject, $processed);
|
||||
}
|
||||
return $result;
|
||||
}
|
||||
|
||||
/**
|
||||
* Copied from http://php.net/manual/en/function.str-replace.php#88569
|
||||
* Modified for camel caps: renamed stro_replace -> strOrigReplace
|
||||
@@ -198,7 +294,13 @@ class Smilies
|
||||
*/
|
||||
private static function strOrigReplace(array $search, array $replace, string $subject): string
|
||||
{
|
||||
return strtr($subject, array_combine($search, $replace));
|
||||
return self::performForEachWordMatch(
|
||||
array_combine($search, $replace),
|
||||
$subject,
|
||||
function (string $_, string $value) {
|
||||
return $value;
|
||||
}
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -227,6 +329,12 @@ class Smilies
|
||||
return $s;
|
||||
}
|
||||
|
||||
private static function noSmilies(): bool {
|
||||
return (intval(DI::config()->get('system', 'no_smilies')) ||
|
||||
(DI::userSession()->getLocalUserId() &&
|
||||
intval(DI::pConfig()->get(DI::userSession()->getLocalUserId(), 'system', 'no_smilies'))));
|
||||
}
|
||||
|
||||
/**
|
||||
* Replaces emoji shortcodes in a string from a structured array of searches and replaces.
|
||||
*
|
||||
@@ -240,9 +348,7 @@ class Smilies
|
||||
*/
|
||||
public static function replaceFromArray(string $text, array $smilies, bool $no_images = false): string
|
||||
{
|
||||
if (intval(DI::config()->get('system', 'no_smilies'))
|
||||
|| (DI::userSession()->getLocalUserId() && intval(DI::pConfig()->get(DI::userSession()->getLocalUserId(), 'system', 'no_smilies')))
|
||||
) {
|
||||
if (self::noSmilies()) {
|
||||
return $text;
|
||||
}
|
||||
|
||||
@@ -261,7 +367,7 @@ class Smilies
|
||||
$smilies = $cleaned;
|
||||
}
|
||||
|
||||
$text = preg_replace_callback('/<(3+)/', [self::class, 'heartReplaceCallback'], $text);
|
||||
$text = preg_replace_callback('/\B<3+?\b/', [self::class, 'heartReplaceCallback'], $text);
|
||||
$text = self::strOrigReplace($smilies['texts'], $smilies['icons'], $text);
|
||||
|
||||
$text = preg_replace_callback('/<(code)>(.*?)<\/code>/ism', [self::class, 'decode'], $text);
|
||||
@@ -302,16 +408,7 @@ class Smilies
|
||||
*/
|
||||
private static function heartReplaceCallback(array $matches): string
|
||||
{
|
||||
if (strlen($matches[1]) == 1) {
|
||||
return $matches[0];
|
||||
}
|
||||
|
||||
$t = '';
|
||||
for ($cnt = 0; $cnt < strlen($matches[1]); $cnt ++) {
|
||||
$t .= '❤';
|
||||
}
|
||||
|
||||
return str_replace($matches[0], $t, $matches[0]);
|
||||
return str_repeat('❤', strlen($matches[0]) - 4);
|
||||
}
|
||||
|
||||
/**
|
||||
|
||||
@@ -34,28 +34,18 @@ class Emoji extends BaseFactory
|
||||
/**
|
||||
* Creates an emoji collection from shortcode => image mappings.
|
||||
*
|
||||
* Only emojis with shortcodes of the form of ':shortcode:' are passed in the collection.
|
||||
*
|
||||
* @param array $smilies
|
||||
* @param bool $extract_url
|
||||
*
|
||||
* @return Emojis
|
||||
*/
|
||||
public function createCollectionFromArray(array $smilies, bool $extract_url = true): Emojis
|
||||
public function createCollectionFromArray(array $smilies): Emojis
|
||||
{
|
||||
$prototype = null;
|
||||
|
||||
$emojis = [];
|
||||
|
||||
foreach ($smilies as $shortcode => $url) {
|
||||
if (substr($shortcode, 0, 1) == ':' && substr($shortcode, -1) == ':') {
|
||||
if ($extract_url) {
|
||||
if (preg_match('/src="(.+?)"/', $url, $matches)) {
|
||||
$url = $matches[1];
|
||||
} else {
|
||||
continue;
|
||||
}
|
||||
}
|
||||
if ($shortcode !== '' && $url !== '') {
|
||||
$shortcode = trim($shortcode, ':');
|
||||
|
||||
if ($prototype === null) {
|
||||
@@ -71,12 +61,20 @@ class Emoji extends BaseFactory
|
||||
}
|
||||
|
||||
/**
|
||||
* @param array $smilies
|
||||
* @param array $smilies as is returned by Smilies::getList()
|
||||
*
|
||||
* @return Emojis
|
||||
*/
|
||||
public function createCollectionFromSmilies(array $smilies): Emojis
|
||||
{
|
||||
return self::createCollectionFromArray(array_combine($smilies['texts'], $smilies['icons']));
|
||||
$emojis = [];
|
||||
$icons = $smilies['icons'];
|
||||
foreach ($smilies['texts'] as $i => $name) {
|
||||
$url = $icons[$i];
|
||||
if (preg_match('/src="(.+?)"/', $url, $matches)) {
|
||||
$emojis[$name] = $matches[1];
|
||||
}
|
||||
}
|
||||
return self::createCollectionFromArray($emojis);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -290,11 +290,18 @@ class Status extends BaseFactory
|
||||
|
||||
$emojis = null;
|
||||
if (DI::baseUrl()->isLocalUrl($item['uri'])) {
|
||||
$used_smilies = Smilies::extractUsedSmilies($item['body'] ?: $item['raw-body']);
|
||||
$used_smilies = Smilies::extractUsedSmilies($item['raw-body'] ?: $item['body']);
|
||||
// $used_smilies contains normalized texts
|
||||
if ($item['raw-body']) {
|
||||
$item['raw-body'] = $used_smilies[''];
|
||||
} elseif ($item['body']) {
|
||||
$item['body'] = $used_smilies[''];
|
||||
}
|
||||
unset($used_smilies['']);
|
||||
$emojis = $this->mstdnEmojiFactory->createCollectionFromArray($used_smilies)->getArrayCopy(true);
|
||||
} else {
|
||||
if (preg_match_all("(\[emoji=(.*?)](.*?)\[/emoji])ism", $item['body'] ?: $item['raw-body'], $matches)) {
|
||||
$emojis = $this->mstdnEmojiFactory->createCollectionFromArray(array_combine($matches[2], $matches[1]), false)->getArrayCopy(true);
|
||||
$emojis = $this->mstdnEmojiFactory->createCollectionFromArray(array_combine($matches[2], $matches[1]))->getArrayCopy(true);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -899,7 +899,7 @@ class Transmitter
|
||||
$tags = Tag::getByURIId($uri_id, [Tag::TO, Tag::CC, Tag::BCC, Tag::AUDIENCE]);
|
||||
if (empty($tags)) {
|
||||
Logger::debug('No receivers found', ['uri-id' => $uri_id]);
|
||||
$post = Post::selectFirst([Item::DELIVER_FIELDLIST], ['uri-id' => $uri_id, 'origin' => true]);
|
||||
$post = Post::selectFirst(Item::DELIVER_FIELDLIST, ['uri-id' => $uri_id, 'origin' => true]);
|
||||
if (!empty($post)) {
|
||||
ActivityPub\Transmitter::storeReceiversForItem($post);
|
||||
$tags = Tag::getByURIId($uri_id, [Tag::TO, Tag::CC, Tag::BCC, Tag::AUDIENCE]);
|
||||
@@ -1512,10 +1512,14 @@ class Transmitter
|
||||
*
|
||||
* @param array $tags Tag array
|
||||
* @param string $text Text containing tags like :tag:
|
||||
* @return string normalized text
|
||||
*/
|
||||
private static function addEmojiTags(array &$tags, string $text)
|
||||
{
|
||||
foreach (Smilies::extractUsedSmilies($text, true) as $name => $url) {
|
||||
$emojis = Smilies::extractUsedSmilies($text);
|
||||
$normalized = $emojis[''];
|
||||
unset($emojis['']);
|
||||
foreach ($emojis as $name => $url) {
|
||||
$tags[] = [
|
||||
'type' => 'Emoji',
|
||||
'name' => $name,
|
||||
@@ -1525,6 +1529,7 @@ class Transmitter
|
||||
],
|
||||
];
|
||||
}
|
||||
return $normalized;
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -1559,8 +1564,6 @@ class Transmitter
|
||||
}
|
||||
}
|
||||
|
||||
self::addEmojiTags($tags, $item['body']);
|
||||
|
||||
$announce = self::getAnnounceArray($item);
|
||||
// Mention the original author upon commented reshares
|
||||
if (!empty($announce['comment'])) {
|
||||
@@ -1808,10 +1811,11 @@ class Transmitter
|
||||
$item = Post\Media::addHTMLAttachmentToItem($item);
|
||||
|
||||
$body = $item['body'];
|
||||
|
||||
$emojis = [];
|
||||
if ($type == 'Note') {
|
||||
$body = $item['raw-body'] ?? self::removePictures($body);
|
||||
}
|
||||
$body = self::addEmojiTags($emojis, $body);
|
||||
|
||||
/**
|
||||
* @todo Improve the automated summary
|
||||
@@ -1893,7 +1897,7 @@ class Transmitter
|
||||
}
|
||||
|
||||
$data['attachment'] = self::createAttachmentList($item);
|
||||
$data['tag'] = self::createTagList($item, $data['quoteUrl'] ?? '');
|
||||
$data['tag'] = array_merge(self::createTagList($item, $data['quoteUrl'] ?? ''), $emojis);
|
||||
|
||||
if (empty($data['location']) && (!empty($item['coord']) || !empty($item['location']))) {
|
||||
$data['location'] = self::createLocation($item);
|
||||
|
||||
Reference in New Issue
Block a user