Allow using punctuation chars as smiley delimiters
This commit is contained in:
parent
d45e9d6af2
commit
d493946ba4
|
@ -197,11 +197,11 @@ class Smilies
|
||||||
* @return array with smilie codes (colon included) as the keys, their image urls as values;
|
* @return array with smilie codes (colon included) as the keys, their image urls as values;
|
||||||
* the normalized string is put under the '' (empty string) key
|
* the normalized string is put under the '' (empty string) key
|
||||||
*/
|
*/
|
||||||
public static function extractUsedSmilies(string $text): array
|
public static function extractUsedSmilies(string $text, string &$normalized = null): array
|
||||||
{
|
{
|
||||||
$emojis = [];
|
$emojis = [];
|
||||||
|
|
||||||
$emojis[''] = BBCode::performWithEscapedTags($text, ['code'], function ($text) use (&$emojis) {
|
$normalized = BBCode::performWithEscapedTags($text, ['code'], function ($text) use (&$emojis) {
|
||||||
return BBCode::performWithEscapedTags($text, ['noparse', 'nobb', 'pre'], function ($text) use (&$emojis) {
|
return BBCode::performWithEscapedTags($text, ['noparse', 'nobb', 'pre'], function ($text) use (&$emojis) {
|
||||||
if (strpos($text, '[nosmile]') !== false || self::noSmilies()) {
|
if (strpos($text, '[nosmile]') !== false || self::noSmilies()) {
|
||||||
return $text;
|
return $text;
|
||||||
|
@ -236,43 +236,69 @@ class Smilies
|
||||||
*/
|
*/
|
||||||
private static function performForEachWordMatch(array $words, string $subject, callable $callback): string
|
private static function performForEachWordMatch(array $words, string $subject, callable $callback): string
|
||||||
{
|
{
|
||||||
$offset = 0;
|
$ord1_bitset = 0;
|
||||||
|
$ord2_bitset = 0;
|
||||||
|
$prefixes = [];
|
||||||
|
foreach ($words as $word => $_) {
|
||||||
|
if (strlen($word) < 2 || !ctype_graph($word)) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
$ord1 = ord($word);
|
||||||
|
$ord2 = ord($word[1]);
|
||||||
|
$ord1_bitset |= 1 << ($ord1 & 31);
|
||||||
|
$ord2_bitset |= 1 << ($ord2 & 31);
|
||||||
|
if (!array_key_exists($word[0], $prefixes)) {
|
||||||
|
$prefixes[$word[0]] = [];
|
||||||
|
}
|
||||||
|
$prefixes[$word[0]][] = $word;
|
||||||
|
}
|
||||||
|
|
||||||
$result = '';
|
$result = '';
|
||||||
$processed = 0;
|
$processed = 0;
|
||||||
// Learned from PHP's strtr implementation
|
$s_start = 0; // Segment start
|
||||||
// Should probably improve performance once JIT-compiled
|
// No spaces are allowed in smilies, so they can serve as delimiters.
|
||||||
$length_bitset = 0;
|
// Splitting by some delimiters may not necessary though?
|
||||||
$ord_bitset = 0;
|
while (true) {
|
||||||
foreach ($words as $word => $_) {
|
if ($s_start >= strlen($subject)) {
|
||||||
$length = strlen($word);
|
$result .= substr($subject, $processed);
|
||||||
if ($length <= 31) {
|
break;
|
||||||
$length_bitset |= 1 << $length;
|
|
||||||
}
|
}
|
||||||
$ord = ord($word);
|
if (preg_match('/\s+?(?=\S|$)/', $subject, $match, PREG_OFFSET_CAPTURE, $s_start)) {
|
||||||
$ord_bitset |= 1 << ($ord & 31);
|
[$whitespaces, $s_end] = $match[0];
|
||||||
}
|
} else {
|
||||||
|
$s_end = strlen($subject);
|
||||||
while ($offset < strlen($subject) && preg_match('/\s+?(?=\S|$)/', $subject, $matches, PREG_OFFSET_CAPTURE, $offset)) {
|
$whitespaces = '';
|
||||||
[$whitespaces, $next] = $matches[0];
|
|
||||||
$word = substr($subject, $offset, $next - $offset);
|
|
||||||
|
|
||||||
$shift = strlen($word);
|
|
||||||
$ord = ord($word);
|
|
||||||
if (($shift > 31 || ($length_bitset & (1 << $shift)))
|
|
||||||
&& ($ord_bitset & (1 << ($ord & 31)))
|
|
||||||
&& array_key_exists($word, $words)) {
|
|
||||||
$result .= substr($subject, $processed, $offset - $processed);
|
|
||||||
$result .= call_user_func($callback, $word, $words[$word]);
|
|
||||||
$processed = $offset + strlen($word);
|
|
||||||
}
|
}
|
||||||
$offset = $next + strlen($whitespaces);
|
$s_length = $s_end - $s_start;
|
||||||
}
|
if ($s_length > 1) {
|
||||||
$word = substr($subject, $offset);
|
$segment = substr($subject, $s_start, $s_length);
|
||||||
if (array_key_exists($word, $words)) {
|
// Find possible starting points for smilies.
|
||||||
$result .= substr($subject, $processed, $offset - $processed);
|
// For built-in smilies, the two bitsets should make attempts quite efficient.
|
||||||
$result .= call_user_func($callback, $word, $words[$word]);
|
// However, presuming custom smilies follow the format of ":shortcode" or ":shortcode:",
|
||||||
} else {
|
// if the user adds more smilies (with addons), the second bitset may eventually become useless.
|
||||||
$result .= substr($subject, $processed);
|
for ($i = 0; $i < $s_length - 1; $i++) {
|
||||||
|
$c = $segment[$i];
|
||||||
|
$d = $segment[$i + 1];
|
||||||
|
if (($ord1_bitset & (1 << (ord($c) & 31))) && ($ord2_bitset & (1 << (ord($d) & 31))) && array_key_exists($c, $prefixes)) {
|
||||||
|
foreach ($prefixes[$c] as $word) {
|
||||||
|
$wlength = strlen($word);
|
||||||
|
if ($wlength <= $s_length - $i && substr($segment, $i, $wlength) === $word) {
|
||||||
|
// Check for boundaries
|
||||||
|
if (($i === 0 || ctype_space($segment[$i - 1]) || ctype_punct($segment[$i - 1]))
|
||||||
|
&& ($i + $wlength >= $s_length || ctype_space($segment[$i + $wlength]) || ctype_punct($segment[$i + $wlength]))) {
|
||||||
|
$result .= substr($subject, $processed, $s_start - $processed + $i);
|
||||||
|
$result .= call_user_func($callback, $word, $words[$word]);
|
||||||
|
$i += $wlength;
|
||||||
|
$processed = $s_start + $i;
|
||||||
|
$i--;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
$s_start = $s_end + strlen($whitespaces);
|
||||||
}
|
}
|
||||||
return $result;
|
return $result;
|
||||||
}
|
}
|
||||||
|
|
|
@ -290,14 +290,12 @@ class Status extends BaseFactory
|
||||||
|
|
||||||
$emojis = null;
|
$emojis = null;
|
||||||
if (DI::baseUrl()->isLocalUrl($item['uri'])) {
|
if (DI::baseUrl()->isLocalUrl($item['uri'])) {
|
||||||
$used_smilies = Smilies::extractUsedSmilies($item['raw-body'] ?: $item['body']);
|
$used_smilies = Smilies::extractUsedSmilies($item['raw-body'] ?: $item['body'], $normalized);
|
||||||
// $used_smilies contains normalized texts
|
|
||||||
if ($item['raw-body']) {
|
if ($item['raw-body']) {
|
||||||
$item['raw-body'] = $used_smilies[''];
|
$item['raw-body'] = $normalized;
|
||||||
} elseif ($item['body']) {
|
} elseif ($item['body']) {
|
||||||
$item['body'] = $used_smilies[''];
|
$item['body'] = $normalized;
|
||||||
}
|
}
|
||||||
unset($used_smilies['']);
|
|
||||||
$emojis = $this->mstdnEmojiFactory->createCollectionFromArray($used_smilies)->getArrayCopy(true);
|
$emojis = $this->mstdnEmojiFactory->createCollectionFromArray($used_smilies)->getArrayCopy(true);
|
||||||
} else {
|
} else {
|
||||||
if (preg_match_all("(\[emoji=(.*?)](.*?)\[/emoji])ism", $item['body'] ?: $item['raw-body'], $matches)) {
|
if (preg_match_all("(\[emoji=(.*?)](.*?)\[/emoji])ism", $item['body'] ?: $item['raw-body'], $matches)) {
|
||||||
|
|
|
@ -1514,11 +1514,9 @@ class Transmitter
|
||||||
* @param string $text Text containing tags like :tag:
|
* @param string $text Text containing tags like :tag:
|
||||||
* @return string normalized text
|
* @return string normalized text
|
||||||
*/
|
*/
|
||||||
private static function addEmojiTags(array &$tags, string $text)
|
private static function addEmojiTags(array &$tags, string $text): string
|
||||||
{
|
{
|
||||||
$emojis = Smilies::extractUsedSmilies($text);
|
$emojis = Smilies::extractUsedSmilies($text, $normalized);
|
||||||
$normalized = $emojis[''];
|
|
||||||
unset($emojis['']);
|
|
||||||
foreach ($emojis as $name => $url) {
|
foreach ($emojis as $name => $url) {
|
||||||
$tags[] = [
|
$tags[] = [
|
||||||
'type' => 'Emoji',
|
'type' => 'Emoji',
|
||||||
|
|
|
@ -371,7 +371,7 @@ return [
|
||||||
[
|
[
|
||||||
'uri-id' => 100,
|
'uri-id' => 100,
|
||||||
'title' => 'item_title',
|
'title' => 'item_title',
|
||||||
'body' => ':like ~friendica no [code]:dislike[/code] :-p :-[',
|
'body' => ':like ~friendica no [code]:dislike[/code] :-p :-[ <3',
|
||||||
'plink' => 'https://friendica.local/post/100',
|
'plink' => 'https://friendica.local/post/100',
|
||||||
],
|
],
|
||||||
],
|
],
|
||||||
|
|
|
@ -147,7 +147,7 @@ class SmiliesTest extends FixtureTest
|
||||||
|
|
||||||
public function dataReplace(): array
|
public function dataReplace(): array
|
||||||
{
|
{
|
||||||
return [
|
$data = [
|
||||||
'simple-1' => [
|
'simple-1' => [
|
||||||
'expected' => 'alt=":-p"',
|
'expected' => 'alt=":-p"',
|
||||||
'body' => ':-p',
|
'body' => ':-p',
|
||||||
|
@ -165,7 +165,7 @@ class SmiliesTest extends FixtureTest
|
||||||
'body' => '~friendicaca',
|
'body' => '~friendicaca',
|
||||||
],
|
],
|
||||||
'symbol-boundary-1' => [
|
'symbol-boundary-1' => [
|
||||||
'expected' => '(:-p)',
|
'expected' => 'alt=":-p"',
|
||||||
'body' => '(:-p)',
|
'body' => '(:-p)',
|
||||||
],
|
],
|
||||||
'hearts-1' => [
|
'hearts-1' => [
|
||||||
|
@ -185,6 +185,19 @@ class SmiliesTest extends FixtureTest
|
||||||
'body' => '(3<33)',
|
'body' => '(3<33)',
|
||||||
],
|
],
|
||||||
];
|
];
|
||||||
|
foreach ([':-[', ':-D', 'o.O'] as $emoji) {
|
||||||
|
foreach (['A', '_', ':', '-'] as $prefix) {
|
||||||
|
foreach (['', ' ', 'A', ':', '-'] as $suffix) {
|
||||||
|
$no_smile = ($prefix !== '' && ctype_alnum($prefix)) || ($suffix !== '' && ctype_alnum($suffix));
|
||||||
|
$s = $prefix . $emoji . $suffix;
|
||||||
|
$data[] = [
|
||||||
|
'expected' => $no_smile ? $s : 'alt="' . $emoji . '"',
|
||||||
|
'body' => $s,
|
||||||
|
];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return $data;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -202,6 +215,11 @@ class SmiliesTest extends FixtureTest
|
||||||
public function dataExtractUsedSmilies(): array
|
public function dataExtractUsedSmilies(): array
|
||||||
{
|
{
|
||||||
return [
|
return [
|
||||||
|
'symbols' => [
|
||||||
|
'expected' => ['p', 'heart', 'embarrassed', 'kiss'],
|
||||||
|
'body' => ':-p <3 ":-[:-"',
|
||||||
|
'normalized' => ':p: :heart: ":embarrassed::kiss:',
|
||||||
|
],
|
||||||
'single-smiley' => [
|
'single-smiley' => [
|
||||||
'expected' => ['like'],
|
'expected' => ['like'],
|
||||||
'body' => ':like',
|
'body' => ':like',
|
||||||
|
@ -239,11 +257,12 @@ class SmiliesTest extends FixtureTest
|
||||||
*/
|
*/
|
||||||
public function testExtractUsedSmilies(array $expected, string $body, string $normalized)
|
public function testExtractUsedSmilies(array $expected, string $body, string $normalized)
|
||||||
{
|
{
|
||||||
$extracted = Smilies::extractUsedSmilies($body);
|
$extracted = Smilies::extractUsedSmilies($body, $converted);
|
||||||
$this->assertEquals($normalized, $extracted['']);
|
$expected = array_fill_keys($expected, true);
|
||||||
foreach ($expected as $shortcode) {
|
$this->assertEquals($normalized, $converted);
|
||||||
$this->assertArrayHasKey($shortcode, $extracted);
|
foreach (array_keys($extracted) as $shortcode) {
|
||||||
|
$this->assertArrayHasKey($shortcode, $expected);
|
||||||
}
|
}
|
||||||
$this->assertEquals(count($expected), count($extracted) - 1);
|
$this->assertEquals(count($expected), count($extracted));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -50,7 +50,7 @@ class StatusTest extends FixtureTest
|
||||||
$post = Post::selectFirst([], ['id' => 14]);
|
$post = Post::selectFirst([], ['id' => 14]);
|
||||||
$this->assertNotNull($post);
|
$this->assertNotNull($post);
|
||||||
$result = $this->status->createFromUriId($post['uri-id'])->toArray();
|
$result = $this->status->createFromUriId($post['uri-id'])->toArray();
|
||||||
$this->assertEquals(':like: :friendica: no <code>:dislike</code> :p: :embarrassed:', $result['content']);
|
$this->assertEquals(':like: :friendica: no <code>:dislike</code> :p: :embarrassed: ❤', $result['content']);
|
||||||
$emojis = array_fill_keys(['like', 'friendica', 'p', 'embarrassed'], true);
|
$emojis = array_fill_keys(['like', 'friendica', 'p', 'embarrassed'], true);
|
||||||
$this->assertEquals(count($emojis), count($result['emojis']));
|
$this->assertEquals(count($emojis), count($result['emojis']));
|
||||||
foreach ($result['emojis'] as $emoji) {
|
foreach ($result['emojis'] as $emoji) {
|
||||||
|
|
|
@ -42,7 +42,7 @@ class TransmitterTest extends FixtureTest
|
||||||
$note = Transmitter::createNote($post);
|
$note = Transmitter::createNote($post);
|
||||||
$this->assertNotNull($note);
|
$this->assertNotNull($note);
|
||||||
|
|
||||||
$this->assertEquals(':like: :friendica: no <code>:dislike</code> :p: :embarrassed:', $note['content']);
|
$this->assertEquals(':like: :friendica: no <code>:dislike</code> :p: :embarrassed: ❤', $note['content']);
|
||||||
$emojis = array_fill_keys(['like', 'friendica', 'p', 'embarrassed'], true);
|
$emojis = array_fill_keys(['like', 'friendica', 'p', 'embarrassed'], true);
|
||||||
$this->assertEquals(count($emojis), count($note['tag']));
|
$this->assertEquals(count($emojis), count($note['tag']));
|
||||||
foreach ($note['tag'] as $emoji) {
|
foreach ($note['tag'] as $emoji) {
|
||||||
|
|
Loading…
Reference in New Issue
Block a user