Fix substituting smilies and smilies containing whitespaces
This commit is contained in:
parent
5c3227ac4c
commit
37188c76b8
|
@ -153,43 +153,6 @@ class Smilies
|
||||||
return $params;
|
return $params;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
|
||||||
* Normalizes smiley shortcodes into texts with no special symbols.
|
|
||||||
*
|
|
||||||
* @return array
|
|
||||||
* 'texts' => smilie shortcut
|
|
||||||
* 'icons' => icon url or an empty string
|
|
||||||
* 'norms' => normalized shortcut
|
|
||||||
*/
|
|
||||||
public static function getNormalizedList(): array
|
|
||||||
{
|
|
||||||
$smilies = self::getList();
|
|
||||||
$norms = [];
|
|
||||||
$icons = $smilies['icons'];
|
|
||||||
foreach ($smilies['texts'] as $i => $shortcode) {
|
|
||||||
// Extract urls
|
|
||||||
$icon = $icons[$i];
|
|
||||||
if (preg_match('/src="(.+?)"/', $icon, $match)) {
|
|
||||||
$icon = $match[1];
|
|
||||||
} else {
|
|
||||||
$icon = '';
|
|
||||||
}
|
|
||||||
$icons[$i] = $icon;
|
|
||||||
|
|
||||||
// Normalize name
|
|
||||||
$norm = preg_replace('/[\s\-:#~]/', '', $shortcode);
|
|
||||||
if (ctype_alnum($norm)) {
|
|
||||||
$norms[] = $norm;
|
|
||||||
} elseif (preg_match('#/smiley-(\w+)\.gif#', $icon, $match)) {
|
|
||||||
$norms[] = $match[1];
|
|
||||||
} else {
|
|
||||||
$norms[] = 'smiley' . $i;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
$smilies['norms'] = $norms;
|
|
||||||
return $smilies;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Finds all used smilies (denoted by quoting colons like :heart:) in the provided text and normalizes their usages.
|
* Finds all used smilies (denoted by quoting colons like :heart:) in the provided text and normalizes their usages.
|
||||||
*
|
*
|
||||||
|
@ -206,18 +169,36 @@ class Smilies
|
||||||
if (strpos($text, '[nosmile]') !== false || self::noSmilies()) {
|
if (strpos($text, '[nosmile]') !== false || self::noSmilies()) {
|
||||||
return $text;
|
return $text;
|
||||||
}
|
}
|
||||||
$smilies = self::getNormalizedList();
|
$smilies = self::getList();
|
||||||
$normalized = array_combine($smilies['texts'], $smilies['norms']);
|
$normalized = [];
|
||||||
return self::performForEachWordMatch(
|
return self::performForEachWordMatch(
|
||||||
array_combine($smilies['texts'], $smilies['icons']),
|
array_combine($smilies['texts'], $smilies['icons']),
|
||||||
$text,
|
$text,
|
||||||
function (string $name, string $image) use($normalized, &$emojis) {
|
function (string $name, string $image) use($normalized, &$emojis) {
|
||||||
$name = $normalized[$name];
|
if (array_key_exists($name, $normalized)) {
|
||||||
if (preg_match('/src="(.+?)"/', $image, $match)) {
|
return $normalized[$name];
|
||||||
$image = $match[1];
|
}
|
||||||
$emojis[$name] = $image;
|
if (preg_match('/src="(.+?)"/', $image, $match)) {
|
||||||
|
$url = $match[1];
|
||||||
|
// Image smilies, which should be normalized instead of being embedded for some protocols like ActivityPub.
|
||||||
|
// Normalize name
|
||||||
|
$norm = preg_replace('/[\s\-:#~]/', '', $name);
|
||||||
|
if (!ctype_alnum($norm)) {
|
||||||
|
if (preg_match('#/smiley-(\w+)\.gif#', $url, $match)) {
|
||||||
|
$norm = $match[1];
|
||||||
|
} else {
|
||||||
|
$norm = 'smiley' . count($normalized);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
$shortcode = ':' . $norm . ':';
|
||||||
|
$normalized[$name] = $shortcode;
|
||||||
|
$emojis[$norm] = $url;
|
||||||
|
return $shortcode;
|
||||||
|
} else {
|
||||||
|
$normalized[$name] = $image;
|
||||||
|
// Probably text-substitution smilies (e.g., Unicode ones).
|
||||||
|
return $image;
|
||||||
}
|
}
|
||||||
return ':' . $name . ':';
|
|
||||||
},
|
},
|
||||||
);
|
);
|
||||||
});
|
});
|
||||||
|
@ -240,11 +221,15 @@ class Smilies
|
||||||
$ord2_bitset = 0;
|
$ord2_bitset = 0;
|
||||||
$prefixes = [];
|
$prefixes = [];
|
||||||
foreach ($words as $word => $_) {
|
foreach ($words as $word => $_) {
|
||||||
if (strlen($word) < 2 || !ctype_graph($word)) {
|
if (strlen($word) < 2) {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
$ord1 = ord($word);
|
$ord1 = ord($word);
|
||||||
$ord2 = ord($word[1]);
|
$ord2 = ord($word[1]);
|
||||||
|
// A smiley shortcode must not begin or end with whitespaces.
|
||||||
|
if (ctype_space($ord1) || ctype_space($word[strlen($word) - 1])) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
$ord1_bitset |= 1 << ($ord1 & 31);
|
$ord1_bitset |= 1 << ($ord1 & 31);
|
||||||
$ord2_bitset |= 1 << ($ord2 & 31);
|
$ord2_bitset |= 1 << ($ord2 & 31);
|
||||||
if (!array_key_exists($word[0], $prefixes)) {
|
if (!array_key_exists($word[0], $prefixes)) {
|
||||||
|
@ -253,52 +238,37 @@ class Smilies
|
||||||
$prefixes[$word[0]][] = $word;
|
$prefixes[$word[0]][] = $word;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
$slength = strlen($subject);
|
||||||
$result = '';
|
$result = '';
|
||||||
|
// $processed is used to delay string concatenation since appending a char every loop is inefficient.
|
||||||
$processed = 0;
|
$processed = 0;
|
||||||
$s_start = 0; // Segment start
|
// Find possible starting points for smilies.
|
||||||
// No spaces are allowed in smilies, so they can serve as delimiters.
|
// For built-in smilies, the two bitsets should make attempts quite efficient.
|
||||||
// Splitting by some delimiters may not necessary though?
|
// However, presuming custom smilies follow the format of ":shortcode" or ":shortcode:",
|
||||||
while (true) {
|
// if the user adds more smilies (with addons), the second bitset may eventually become useless.
|
||||||
if ($s_start >= strlen($subject)) {
|
for ($i = 0; $i < $slength - 1; $i++) {
|
||||||
$result .= substr($subject, $processed);
|
$c = $subject[$i];
|
||||||
break;
|
$d = $subject[$i + 1];
|
||||||
}
|
if (($ord1_bitset & (1 << (ord($c) & 31))) && ($ord2_bitset & (1 << (ord($d) & 31))) && array_key_exists($c, $prefixes)) {
|
||||||
if (preg_match('/\s+?(?=\S|$)/', $subject, $match, PREG_OFFSET_CAPTURE, $s_start)) {
|
foreach ($prefixes[$c] as $word) {
|
||||||
[$whitespaces, $s_end] = $match[0];
|
$wlength = strlen($word);
|
||||||
} else {
|
if (substr($subject, $i, $wlength) === $word) {
|
||||||
$s_end = strlen($subject);
|
// Check for boundaries
|
||||||
$whitespaces = '';
|
if (($i === 0 || ctype_space($subject[$i - 1]) || ctype_punct($subject[$i - 1]))
|
||||||
}
|
&& ($i + $wlength >= $slength || ctype_space($subject[$i + $wlength]) || ctype_punct($subject[$i + $wlength]))) {
|
||||||
$s_length = $s_end - $s_start;
|
$result .= substr($subject, $processed, $i - $processed);
|
||||||
if ($s_length > 1) {
|
$result .= call_user_func($callback, $word, $words[$word]);
|
||||||
$segment = substr($subject, $s_start, $s_length);
|
$i += $wlength;
|
||||||
// Find possible starting points for smilies.
|
$processed = $i;
|
||||||
// For built-in smilies, the two bitsets should make attempts quite efficient.
|
$i--;
|
||||||
// However, presuming custom smilies follow the format of ":shortcode" or ":shortcode:",
|
break;
|
||||||
// if the user adds more smilies (with addons), the second bitset may eventually become useless.
|
|
||||||
for ($i = 0; $i < $s_length - 1; $i++) {
|
|
||||||
$c = $segment[$i];
|
|
||||||
$d = $segment[$i + 1];
|
|
||||||
if (($ord1_bitset & (1 << (ord($c) & 31))) && ($ord2_bitset & (1 << (ord($d) & 31))) && array_key_exists($c, $prefixes)) {
|
|
||||||
foreach ($prefixes[$c] as $word) {
|
|
||||||
$wlength = strlen($word);
|
|
||||||
if ($wlength <= $s_length - $i && substr($segment, $i, $wlength) === $word) {
|
|
||||||
// Check for boundaries
|
|
||||||
if (($i === 0 || ctype_space($segment[$i - 1]) || ctype_punct($segment[$i - 1]))
|
|
||||||
&& ($i + $wlength >= $s_length || ctype_space($segment[$i + $wlength]) || ctype_punct($segment[$i + $wlength]))) {
|
|
||||||
$result .= substr($subject, $processed, $s_start - $processed + $i);
|
|
||||||
$result .= call_user_func($callback, $word, $words[$word]);
|
|
||||||
$i += $wlength;
|
|
||||||
$processed = $s_start + $i;
|
|
||||||
$i--;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
$s_start = $s_end + strlen($whitespaces);
|
}
|
||||||
|
if ($processed < $slength) {
|
||||||
|
$result .= substr($subject, $processed);
|
||||||
}
|
}
|
||||||
return $result;
|
return $result;
|
||||||
}
|
}
|
||||||
|
|
|
@ -0,0 +1,36 @@
|
||||||
|
<?php
|
||||||
|
/**
|
||||||
|
* @copyright Copyright (C) 2010-2023, the Friendica project
|
||||||
|
*
|
||||||
|
* @license GNU AGPL version 3 or any later version
|
||||||
|
*
|
||||||
|
* This program is free software: you can redistribute it and/or modify
|
||||||
|
* it under the terms of the GNU Affero General Public License as
|
||||||
|
* published by the Free Software Foundation, either version 3 of the
|
||||||
|
* License, or (at your option) any later version.
|
||||||
|
*
|
||||||
|
* This program is distributed in the hope that it will be useful,
|
||||||
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
* GNU Affero General Public License for more details.
|
||||||
|
*
|
||||||
|
* You should have received a copy of the GNU Affero General Public License
|
||||||
|
* along with this program. If not, see <https://www.gnu.org/licenses/>.
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
|
||||||
|
use Friendica\Content\Smilies;
|
||||||
|
|
||||||
|
function add_test_unicode_smilies(array &$b)
|
||||||
|
{
|
||||||
|
// String-substitution smilies
|
||||||
|
// - no whitespaces
|
||||||
|
Smilies::add($b, '⽕', '🔥');
|
||||||
|
// - with whitespaces
|
||||||
|
Smilies::add($b, ':hugging face:', '🤗');
|
||||||
|
// - with multiple whitespaces
|
||||||
|
Smilies::add($b, ':face with hand over mouth:', '🤭');
|
||||||
|
// Image-based smilies
|
||||||
|
// - with whitespaces
|
||||||
|
Smilies::add($b, ':smiley heart 333:', '<img class="smiley" src="/images/smiley-heart.gif" alt="smiley-heart" title="smiley-heart" />');
|
||||||
|
}
|
|
@ -371,7 +371,7 @@ return [
|
||||||
[
|
[
|
||||||
'uri-id' => 100,
|
'uri-id' => 100,
|
||||||
'title' => 'item_title',
|
'title' => 'item_title',
|
||||||
'body' => ':like ~friendica no [code]:dislike[/code] :-p :-[ <3',
|
'body' => ':like ~friendica no [code]:dislike[/code] :-p :-[ :hugging face: <3 :smiley heart 333: ⽕',
|
||||||
'plink' => 'https://friendica.local/post/100',
|
'plink' => 'https://friendica.local/post/100',
|
||||||
],
|
],
|
||||||
],
|
],
|
||||||
|
|
|
@ -26,6 +26,7 @@
|
||||||
namespace Friendica\Test\src\Content;
|
namespace Friendica\Test\src\Content;
|
||||||
|
|
||||||
use Friendica\Content\Smilies;
|
use Friendica\Content\Smilies;
|
||||||
|
use Friendica\Core\Hook;
|
||||||
use Friendica\DI;
|
use Friendica\DI;
|
||||||
use Friendica\Network\HTTPException\InternalServerErrorException;
|
use Friendica\Network\HTTPException\InternalServerErrorException;
|
||||||
use Friendica\Test\FixtureTest;
|
use Friendica\Test\FixtureTest;
|
||||||
|
@ -37,6 +38,9 @@ class SmiliesTest extends FixtureTest
|
||||||
parent::setUp();
|
parent::setUp();
|
||||||
|
|
||||||
DI::config()->set('system', 'no_smilies', false);
|
DI::config()->set('system', 'no_smilies', false);
|
||||||
|
|
||||||
|
Hook::register('smilie', 'tests/Util/SmileyWhitespaceAddon.php', 'add_test_unicode_smilies');
|
||||||
|
Hook::loadHooks();
|
||||||
}
|
}
|
||||||
|
|
||||||
public function dataLinks()
|
public function dataLinks()
|
||||||
|
@ -184,6 +188,26 @@ class SmiliesTest extends FixtureTest
|
||||||
'expected' => '(3<33)',
|
'expected' => '(3<33)',
|
||||||
'body' => '(3<33)',
|
'body' => '(3<33)',
|
||||||
],
|
],
|
||||||
|
'space' => [
|
||||||
|
'expected' => 'alt="smiley-heart"',
|
||||||
|
'body' => ':smiley heart 333:',
|
||||||
|
],
|
||||||
|
'substitution-1' => [
|
||||||
|
'expected' => '🔥',
|
||||||
|
'body' => '⽕',
|
||||||
|
],
|
||||||
|
'substitution-2' => [
|
||||||
|
'expected' => '🤗',
|
||||||
|
'body' => ':hugging face:',
|
||||||
|
],
|
||||||
|
'substitution-3' => [
|
||||||
|
'expected' => '🤭',
|
||||||
|
'body' => ':face with hand over mouth:',
|
||||||
|
],
|
||||||
|
'mixed' => [
|
||||||
|
'expected' => '🔥 🤭 invalid:hugging face: 🤗',
|
||||||
|
'body' => '⽕ :face with hand over mouth: invalid:hugging face: :hugging face:',
|
||||||
|
],
|
||||||
];
|
];
|
||||||
foreach ([':-[', ':-D', 'o.O'] as $emoji) {
|
foreach ([':-[', ':-D', 'o.O'] as $emoji) {
|
||||||
foreach (['A', '_', ':', '-'] as $prefix) {
|
foreach (['A', '_', ':', '-'] as $prefix) {
|
||||||
|
@ -245,6 +269,31 @@ class SmiliesTest extends FixtureTest
|
||||||
'body' => '~friendica',
|
'body' => '~friendica',
|
||||||
'normalized' => ':friendica:'
|
'normalized' => ':friendica:'
|
||||||
],
|
],
|
||||||
|
'space' => [
|
||||||
|
'expected' => ['smileyheart333'],
|
||||||
|
'body' => ':smiley heart 333:',
|
||||||
|
'normalized' => ':smileyheart333:'
|
||||||
|
],
|
||||||
|
'substitution-1' => [
|
||||||
|
'expected' => [],
|
||||||
|
'body' => '⽕',
|
||||||
|
'normalized' => '🔥',
|
||||||
|
],
|
||||||
|
'substitution-2' => [
|
||||||
|
'expected' => [],
|
||||||
|
'body' => ':hugging face:',
|
||||||
|
'normalized' => '🤗',
|
||||||
|
],
|
||||||
|
'substitution-3' => [
|
||||||
|
'expected' => [],
|
||||||
|
'body' => ':face with hand over mouth:',
|
||||||
|
'normalized' => '🤭',
|
||||||
|
],
|
||||||
|
'mixed' => [
|
||||||
|
'expected' => [],
|
||||||
|
'body' => '⽕ :face with hand over mouth: invalid:hugging face: :hugging face:',
|
||||||
|
'normalized' => '🔥 🤭 invalid:hugging face: 🤗',
|
||||||
|
],
|
||||||
];
|
];
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -21,8 +21,9 @@
|
||||||
|
|
||||||
namespace Friendica\Test\src\Factory\Api\Mastodon;
|
namespace Friendica\Test\src\Factory\Api\Mastodon;
|
||||||
|
|
||||||
use Friendica\Model\Post;
|
use Friendica\Core\Hook;
|
||||||
use Friendica\DI;
|
use Friendica\DI;
|
||||||
|
use Friendica\Model\Post;
|
||||||
use Friendica\Test\FixtureTest;
|
use Friendica\Test\FixtureTest;
|
||||||
|
|
||||||
class StatusTest extends FixtureTest
|
class StatusTest extends FixtureTest
|
||||||
|
@ -35,6 +36,9 @@ class StatusTest extends FixtureTest
|
||||||
|
|
||||||
DI::config()->set('system', 'no_smilies', false);
|
DI::config()->set('system', 'no_smilies', false);
|
||||||
$this->status = DI::mstdnStatus();
|
$this->status = DI::mstdnStatus();
|
||||||
|
|
||||||
|
Hook::register('smilie', 'tests/Util/SmileyWhitespaceAddon.php', 'add_test_unicode_smilies');
|
||||||
|
Hook::loadHooks();
|
||||||
}
|
}
|
||||||
|
|
||||||
public function testSimpleStatus()
|
public function testSimpleStatus()
|
||||||
|
@ -50,8 +54,8 @@ class StatusTest extends FixtureTest
|
||||||
$post = Post::selectFirst([], ['id' => 14]);
|
$post = Post::selectFirst([], ['id' => 14]);
|
||||||
$this->assertNotNull($post);
|
$this->assertNotNull($post);
|
||||||
$result = $this->status->createFromUriId($post['uri-id'])->toArray();
|
$result = $this->status->createFromUriId($post['uri-id'])->toArray();
|
||||||
$this->assertEquals(':like: :friendica: no <code>:dislike</code> :p: :embarrassed: ❤', $result['content']);
|
$this->assertEquals(':like: :friendica: no <code>:dislike</code> :p: :embarrassed: 🤗 ❤ :smileyheart333: 🔥', $result['content']);
|
||||||
$emojis = array_fill_keys(['like', 'friendica', 'p', 'embarrassed'], true);
|
$emojis = array_fill_keys(['like', 'friendica', 'p', 'embarrassed', 'smileyheart333'], true);
|
||||||
$this->assertEquals(count($emojis), count($result['emojis']));
|
$this->assertEquals(count($emojis), count($result['emojis']));
|
||||||
foreach ($result['emojis'] as $emoji) {
|
foreach ($result['emojis'] as $emoji) {
|
||||||
$this->assertTrue(array_key_exists($emoji['shortcode'], $emojis));
|
$this->assertTrue(array_key_exists($emoji['shortcode'], $emojis));
|
||||||
|
|
|
@ -21,6 +21,7 @@
|
||||||
|
|
||||||
namespace Friendica\Test\src\Protocol\ActivityPub;
|
namespace Friendica\Test\src\Protocol\ActivityPub;
|
||||||
|
|
||||||
|
use Friendica\Core\Hook;
|
||||||
use Friendica\DI;
|
use Friendica\DI;
|
||||||
use Friendica\Model\Post;
|
use Friendica\Model\Post;
|
||||||
use Friendica\Protocol\ActivityPub\Transmitter;
|
use Friendica\Protocol\ActivityPub\Transmitter;
|
||||||
|
@ -33,6 +34,9 @@ class TransmitterTest extends FixtureTest
|
||||||
parent::setUp();
|
parent::setUp();
|
||||||
|
|
||||||
DI::config()->set('system', 'no_smilies', false);
|
DI::config()->set('system', 'no_smilies', false);
|
||||||
|
|
||||||
|
Hook::register('smilie', 'tests/Util/SmileyWhitespaceAddon.php', 'add_test_unicode_smilies');
|
||||||
|
Hook::loadHooks();
|
||||||
}
|
}
|
||||||
|
|
||||||
public function testEmojiPost()
|
public function testEmojiPost()
|
||||||
|
@ -42,8 +46,8 @@ class TransmitterTest extends FixtureTest
|
||||||
$note = Transmitter::createNote($post);
|
$note = Transmitter::createNote($post);
|
||||||
$this->assertNotNull($note);
|
$this->assertNotNull($note);
|
||||||
|
|
||||||
$this->assertEquals(':like: :friendica: no <code>:dislike</code> :p: :embarrassed: ❤', $note['content']);
|
$this->assertEquals(':like: :friendica: no <code>:dislike</code> :p: :embarrassed: 🤗 ❤ :smileyheart333: 🔥', $note['content']);
|
||||||
$emojis = array_fill_keys(['like', 'friendica', 'p', 'embarrassed'], true);
|
$emojis = array_fill_keys(['like', 'friendica', 'p', 'embarrassed', 'smileyheart333'], true);
|
||||||
$this->assertEquals(count($emojis), count($note['tag']));
|
$this->assertEquals(count($emojis), count($note['tag']));
|
||||||
foreach ($note['tag'] as $emoji) {
|
foreach ($note['tag'] as $emoji) {
|
||||||
$this->assertTrue(array_key_exists($emoji['name'], $emojis));
|
$this->assertTrue(array_key_exists($emoji['name'], $emojis));
|
||||||
|
|
Loading…
Reference in New Issue
Block a user