diff --git a/doc/BBCode.md b/doc/BBCode.md index cab51bd09f..59f7414491 100644 --- a/doc/BBCode.md +++ b/doc/BBCode.md @@ -613,15 +613,26 @@ On Mastodon this field is used for the content warning. Result - If you need to put literal bbcode in a message, [noparse], [nobb] or [pre] are used to escape bbcode: + If you need to put literal BBCode in a message, [noparse], [nobb] or [pre] blocks prevent BBCode conversion: + Note: [code] has priority over [noparse], [nobb] and [pre] which makes them display as BBCode tags in code blocks instead of being removed. + [code] blocks inside [noparse] will still be converted to a code block. [b]bold[/b] + + Additionally, [noparse] and [pre] blocks prevent mention and hashtag conversion to links: + + + @user@domain.tld #hashtag + [nosmile] is used to disable smilies on a post by post basis

diff --git a/include/api.php b/include/api.php index 90d0a511b7..d87a54f91d 100644 --- a/include/api.php +++ b/include/api.php @@ -624,7 +624,7 @@ function api_get_user(App $a, $contact_id = null) 'name' => $contact["name"], 'screen_name' => (($contact['nick']) ? $contact['nick'] : $contact['name']), 'location' => ($contact["location"] != "") ? $contact["location"] : ContactSelector::networkToName($contact['network'], $contact['url'], $contact['protocol']), - 'description' => BBCode::toPlaintext($contact["about"]), + 'description' => BBCode::toPlaintext($contact["about"] ?? ''), 'profile_image_url' => $contact["micro"], 'profile_image_url_https' => $contact["micro"], 'profile_image_url_profile_size' => $contact["thumb"], @@ -698,7 +698,7 @@ function api_get_user(App $a, $contact_id = null) 'name' => (($uinfo[0]['name']) ? $uinfo[0]['name'] : $uinfo[0]['nick']), 'screen_name' => (($uinfo[0]['nick']) ? $uinfo[0]['nick'] : $uinfo[0]['name']), 'location' => $location, - 'description' => BBCode::toPlaintext($description), + 'description' => BBCode::toPlaintext($description ?? ''), 'profile_image_url' => $uinfo[0]['micro'], 'profile_image_url_https' => $uinfo[0]['micro'], 'profile_image_url_profile_size' => $uinfo[0]["thumb"], diff --git a/include/enotify.php b/include/enotify.php index ae2e2e7fef..89a81833d0 100644 --- a/include/enotify.php +++ b/include/enotify.php @@ -465,7 +465,7 @@ function notification($params) if ($show_in_notification_page) { $notification = DI::notify()->insert([ 'name' => $params['source_name'] ?? '', - 'name_cache' => substr(strip_tags(BBCode::convert($params['source_name'] ?? '')), 0, 255), + 'name_cache' => substr(strip_tags(BBCode::convert($params['source_name'])), 0, 255), 'url' => $params['source_link'] ?? '', 'photo' => $params['source_photo'] ?? '', 'link' => $itemlink ?? '', diff --git a/mod/cal.php b/mod/cal.php index edcbaa7e8f..4b90b02cd8 100644 --- a/mod/cal.php +++ b/mod/cal.php @@ -78,7 +78,7 @@ function cal_init(App $a) '$photo' => $profile['photo'], '$addr' => $profile['addr'] ?: '', '$account_type' => $account_type, - '$about' => BBCode::convert($profile['about'] ?: ''), + '$about' => BBCode::convert($profile['about']), ]); $cal_widget = Widget\CalendarExport::getHTML(); diff --git a/mod/item.php b/mod/item.php index b023b97d19..31336c4348 100644 --- a/mod/item.php +++ b/mod/item.php @@ -369,16 +369,16 @@ function item_post(App $a) { // Look for any tags and linkify them $inform = ''; - - $tags = BBCode::getTags($body); - - $tagged = []; - $private_forum = false; + $private_id = null; $only_to_forum = false; $forum_contact = []; - if (count($tags)) { + BBCode::performWithEscapedTags($body, ['noparse', 'pre', 'code'], function ($body) use ($profile_uid, $network, $str_contact_allow, &$inform, &$private_forum, &$private_id, &$only_to_forum, &$forum_contact) { + $tags = BBCode::getTags($body); + + $tagged = []; + foreach ($tags as $tag) { $tag_type = substr($tag, 0, 1); @@ -386,41 +386,36 @@ function item_post(App $a) { continue; } - /* - * If we already tagged 'Robert Johnson', don't try and tag 'Robert'. + /* If we already tagged 'Robert Johnson', don't try and tag 'Robert'. * Robert Johnson should be first in the $tags array */ - $fullnametagged = false; - /// @TODO $tagged is initialized above if () block and is not filled, maybe old-lost code? foreach ($tagged as $nextTag) { if (stristr($nextTag, $tag . ' ')) { - $fullnametagged = true; - break; + continue 2; } } - if ($fullnametagged) { - continue; - } $success = handle_tag($body, $inform, local_user() ? local_user() : $profile_uid, $tag, $network); if ($success['replaced']) { $tagged[] = $tag; } // When the forum is private or the forum is addressed with a "!" make the post private - if (is_array($success['contact']) && (!empty($success['contact']['prv']) || ($tag_type == Tag::TAG_CHARACTER[Tag::EXCLUSIVE_MENTION]))) { + if (!empty($success['contact']['prv']) || ($tag_type == Tag::TAG_CHARACTER[Tag::EXCLUSIVE_MENTION])) { $private_forum = $success['contact']['prv']; $only_to_forum = ($tag_type == Tag::TAG_CHARACTER[Tag::EXCLUSIVE_MENTION]); $private_id = $success['contact']['id']; $forum_contact = $success['contact']; - } elseif (is_array($success['contact']) && !empty($success['contact']['forum']) && - ($str_contact_allow == '<' . $success['contact']['id'] . '>')) { + } elseif (!empty($success['contact']['forum']) && ($str_contact_allow == '<' . $success['contact']['id'] . '>')) { $private_forum = false; $only_to_forum = true; $private_id = $success['contact']['id']; $forum_contact = $success['contact']; } } - } + + return $body; + }); + $original_contact_id = $contact_id; @@ -642,7 +637,7 @@ function item_post(App $a) { // Check for hashtags in the body and repair or add hashtag links if ($preview || $orig_post) { - Item::setHashtags($datarray); + $datarray['body'] = Item::setHashtags($datarray['body']); } // preview mode - prepare the body for display and send it via json diff --git a/mod/photos.php b/mod/photos.php index 7240c0052b..2b72fd3e4e 100644 --- a/mod/photos.php +++ b/mod/photos.php @@ -82,7 +82,7 @@ function photos_init(App $a) { '$photo' => $profile['photo'], '$addr' => $profile['addr'] ?? '', '$account_type' => $account_type, - '$about' => BBCode::convert($profile['about'] ?? ''), + '$about' => BBCode::convert($profile['about']), ]); $albums = Photo::getAlbums($a->data['user']['uid']); diff --git a/mod/poco.php b/mod/poco.php index ef77c9c992..f1fdc55d75 100644 --- a/mod/poco.php +++ b/mod/poco.php @@ -204,142 +204,142 @@ function poco_init(App $a) { } } - if (is_array($contacts)) { - if (DBA::isResult($contacts)) { - foreach ($contacts as $contact) { - if (!isset($contact['updated'])) { - $contact['updated'] = ''; - } + if (!is_array($contacts)) { + throw new \Friendica\Network\HTTPException\InternalServerErrorException(); + } - if (! isset($contact['generation'])) { - if ($global) { - $contact['generation'] = 3; - } elseif ($system_mode) { - $contact['generation'] = 1; - } else { - $contact['generation'] = 2; - } - } - - if (($contact['keywords'] == "") && isset($contact['pub_keywords'])) { - $contact['keywords'] = $contact['pub_keywords']; - } - if (isset($contact['account-type'])) { - $contact['contact-type'] = $contact['account-type']; - } - $about = DI::cache()->get("about:" . $contact['updated'] . ":" . $contact['nurl']); - if (is_null($about)) { - $about = BBCode::convert($contact['about'], false); - DI::cache()->set("about:" . $contact['updated'] . ":" . $contact['nurl'], $about); - } - - // Non connected persons can only see the keywords of a Diaspora account - if ($contact['network'] == Protocol::DIASPORA) { - $contact['location'] = ""; - $about = ""; - } - - $entry = []; - if ($fields_ret['id']) { - $entry['id'] = (int)$contact['id']; - } - if ($fields_ret['displayName']) { - $entry['displayName'] = $contact['name']; - } - if ($fields_ret['aboutMe']) { - $entry['aboutMe'] = $about; - } - if ($fields_ret['currentLocation']) { - $entry['currentLocation'] = $contact['location']; - } - if ($fields_ret['generation']) { - $entry['generation'] = (int)$contact['generation']; - } - if ($fields_ret['urls']) { - $entry['urls'] = [['value' => $contact['url'], 'type' => 'profile']]; - if ($contact['addr'] && ($contact['network'] !== Protocol::MAIL)) { - $entry['urls'][] = ['value' => 'acct:' . $contact['addr'], 'type' => 'webfinger']; - } - } - if ($fields_ret['preferredUsername']) { - $entry['preferredUsername'] = $contact['nick']; - } - if ($fields_ret['updated']) { - if (! $global) { - $entry['updated'] = $contact['success_update']; - - if ($contact['name-date'] > $entry['updated']) { - $entry['updated'] = $contact['name-date']; - } - if ($contact['uri-date'] > $entry['updated']) { - $entry['updated'] = $contact['uri-date']; - } - if ($contact['avatar-date'] > $entry['updated']) { - $entry['updated'] = $contact['avatar-date']; - } - } else { - $entry['updated'] = $contact['updated']; - } - $entry['updated'] = date("c", strtotime($entry['updated'])); - } - if ($fields_ret['photos']) { - $entry['photos'] = [['value' => $contact['photo'], 'type' => 'profile']]; - } - if ($fields_ret['network']) { - $entry['network'] = $contact['network']; - if ($entry['network'] == Protocol::STATUSNET) { - $entry['network'] = Protocol::OSTATUS; - } - if (($entry['network'] == "") && ($contact['self'])) { - $entry['network'] = Protocol::DFRN; - } - } - if ($fields_ret['tags']) { - $tags = str_replace(",", " ", $contact['keywords']); - $tags = explode(" ", $tags); - - $cleaned = []; - foreach ($tags as $tag) { - $tag = trim(strtolower($tag)); - if ($tag != "") { - $cleaned[] = $tag; - } - } - - $entry['tags'] = [$cleaned]; - } - if ($fields_ret['address']) { - $entry['address'] = []; - - // Deactivated. It just reveals too much data. (Although its from the default profile) - //if (isset($rr['address'])) - // $entry['address']['streetAddress'] = $rr['address']; - - if (isset($contact['locality'])) { - $entry['address']['locality'] = $contact['locality']; - } - if (isset($contact['region'])) { - $entry['address']['region'] = $contact['region']; - } - // See above - //if (isset($rr['postal-code'])) - // $entry['address']['postalCode'] = $rr['postal-code']; - - if (isset($contact['country'])) { - $entry['address']['country'] = $contact['country']; - } - } - - if ($fields_ret['contactType']) { - $entry['contactType'] = intval($contact['contact-type']); - } - $ret['entry'][] = $entry; + if (DBA::isResult($contacts)) { + foreach ($contacts as $contact) { + if (!isset($contact['updated'])) { + $contact['updated'] = ''; } - } else { - $ret['entry'][] = []; + + if (! isset($contact['generation'])) { + if ($global) { + $contact['generation'] = 3; + } elseif ($system_mode) { + $contact['generation'] = 1; + } else { + $contact['generation'] = 2; + } + } + + if (($contact['keywords'] == "") && isset($contact['pub_keywords'])) { + $contact['keywords'] = $contact['pub_keywords']; + } + if (isset($contact['account-type'])) { + $contact['contact-type'] = $contact['account-type']; + } + $about = DI::cache()->get("about:" . $contact['updated'] . ":" . $contact['nurl']); + if (is_null($about)) { + $about = BBCode::convert($contact['about'], false); + DI::cache()->set("about:" . $contact['updated'] . ":" . $contact['nurl'], $about); + } + + // Non connected persons can only see the keywords of a Diaspora account + if ($contact['network'] == Protocol::DIASPORA) { + $contact['location'] = ""; + $about = ""; + } + + $entry = []; + if ($fields_ret['id']) { + $entry['id'] = (int)$contact['id']; + } + if ($fields_ret['displayName']) { + $entry['displayName'] = $contact['name']; + } + if ($fields_ret['aboutMe']) { + $entry['aboutMe'] = $about; + } + if ($fields_ret['currentLocation']) { + $entry['currentLocation'] = $contact['location']; + } + if ($fields_ret['generation']) { + $entry['generation'] = (int)$contact['generation']; + } + if ($fields_ret['urls']) { + $entry['urls'] = [['value' => $contact['url'], 'type' => 'profile']]; + if ($contact['addr'] && ($contact['network'] !== Protocol::MAIL)) { + $entry['urls'][] = ['value' => 'acct:' . $contact['addr'], 'type' => 'webfinger']; + } + } + if ($fields_ret['preferredUsername']) { + $entry['preferredUsername'] = $contact['nick']; + } + if ($fields_ret['updated']) { + if (! $global) { + $entry['updated'] = $contact['success_update']; + + if ($contact['name-date'] > $entry['updated']) { + $entry['updated'] = $contact['name-date']; + } + if ($contact['uri-date'] > $entry['updated']) { + $entry['updated'] = $contact['uri-date']; + } + if ($contact['avatar-date'] > $entry['updated']) { + $entry['updated'] = $contact['avatar-date']; + } + } else { + $entry['updated'] = $contact['updated']; + } + $entry['updated'] = date("c", strtotime($entry['updated'])); + } + if ($fields_ret['photos']) { + $entry['photos'] = [['value' => $contact['photo'], 'type' => 'profile']]; + } + if ($fields_ret['network']) { + $entry['network'] = $contact['network']; + if ($entry['network'] == Protocol::STATUSNET) { + $entry['network'] = Protocol::OSTATUS; + } + if (($entry['network'] == "") && ($contact['self'])) { + $entry['network'] = Protocol::DFRN; + } + } + if ($fields_ret['tags']) { + $tags = str_replace(",", " ", $contact['keywords']); + $tags = explode(" ", $tags); + + $cleaned = []; + foreach ($tags as $tag) { + $tag = trim(strtolower($tag)); + if ($tag != "") { + $cleaned[] = $tag; + } + } + + $entry['tags'] = [$cleaned]; + } + if ($fields_ret['address']) { + $entry['address'] = []; + + // Deactivated. It just reveals too much data. (Although its from the default profile) + //if (isset($rr['address'])) + // $entry['address']['streetAddress'] = $rr['address']; + + if (isset($contact['locality'])) { + $entry['address']['locality'] = $contact['locality']; + } + if (isset($contact['region'])) { + $entry['address']['region'] = $contact['region']; + } + // See above + //if (isset($rr['postal-code'])) + // $entry['address']['postalCode'] = $rr['postal-code']; + + if (isset($contact['country'])) { + $entry['address']['country'] = $contact['country']; + } + } + + if ($fields_ret['contactType']) { + $entry['contactType'] = intval($contact['contact-type']); + } + $ret['entry'][] = $entry; } } else { - throw new \Friendica\Network\HTTPException\InternalServerErrorException(); + $ret['entry'][] = []; } Logger::log("End of poco", Logger::DEBUG); diff --git a/mod/videos.php b/mod/videos.php index 49c64ef973..a3344a8b43 100644 --- a/mod/videos.php +++ b/mod/videos.php @@ -67,7 +67,7 @@ function videos_init(App $a) '$photo' => $profile['photo'], '$addr' => $profile['addr'] ?? '', '$account_type' => $account_type, - '$about' => BBCode::convert($profile['about'] ?? ''), + '$about' => BBCode::convert($profile['about']), ]); // If not there, create 'aside' empty diff --git a/src/Content/Text/BBCode.php b/src/Content/Text/BBCode.php index 50536dc1a5..0f3c4ad529 100644 --- a/src/Content/Text/BBCode.php +++ b/src/Content/Text/BBCode.php @@ -1252,647 +1252,635 @@ class BBCode * @return string * @throws \Friendica\Network\HTTPException\InternalServerErrorException */ - public static function convert($text, $try_oembed = true, $simple_html = self::INTERNAL, $for_plaintext = false) + public static function convert(string $text = null, $try_oembed = true, $simple_html = self::INTERNAL, $for_plaintext = false) { + // Accounting for null default column values + if (is_null($text) || $text === '') { + return ''; + } + $a = DI::app(); - /* - * preg_match_callback function to replace potential Oembed tags with Oembed content - * - * $match[0] = [tag]$url[/tag] or [tag=$url]$title[/tag] - * $match[1] = $url - * $match[2] = $title or absent - */ - $try_oembed_callback = function ($match) - { - $url = $match[1]; - $title = $match[2] ?? null; + $text = self::performWithEscapedTags($text, ['code'], function ($text) use ($try_oembed, $simple_html, $for_plaintext, $a) { + $text = self::performWithEscapedTags($text, ['noparse', 'nobb', 'pre'], function ($text) use ($try_oembed, $simple_html, $for_plaintext, $a) { + /* + * preg_match_callback function to replace potential Oembed tags with Oembed content + * + * $match[0] = [tag]$url[/tag] or [tag=$url]$title[/tag] + * $match[1] = $url + * $match[2] = $title or absent + */ + $try_oembed_callback = function ($match) + { + $url = $match[1]; + $title = $match[2] ?? null; - try { - $return = OEmbed::getHTML($url, $title); - } catch (Exception $ex) { - $return = $match[0]; - } - - return $return; - }; - - // Extracting code blocks before the whitespace processing and the autolinker - $codeblocks = []; - - $text = preg_replace_callback("#\[code(?:=([^\]]*))?\](.*?)\[\/code\]#ism", - function ($matches) use (&$codeblocks) { - $return = '#codeblock-' . count($codeblocks) . '#'; - if (strpos($matches[2], "\n") !== false) { - $codeblocks[] = '
' . htmlspecialchars(trim($matches[2], "\n\r"), ENT_NOQUOTES, 'UTF-8') . '
'; - } else { - $codeblocks[] = '' . htmlspecialchars($matches[2], ENT_NOQUOTES, 'UTF-8') . ''; - } - - return $return; - }, - $text - ); - - // Hide all [noparse] contained bbtags by spacefying them - // POSSIBLE BUG --> Will the 'preg' functions crash if there's an embedded image? - - $text = preg_replace_callback("/\[noparse\](.*?)\[\/noparse\]/ism", 'self::escapeNoparseCallback', $text); - $text = preg_replace_callback("/\[nobb\](.*?)\[\/nobb\]/ism", 'self::escapeNoparseCallback', $text); - $text = preg_replace_callback("/\[pre\](.*?)\[\/pre\]/ism", 'self::escapeNoparseCallback', $text); - - // Remove the abstract element. It is a non visible element. - $text = self::stripAbstract($text); - - // Move all spaces out of the tags - $text = preg_replace("/\[(\w*)\](\s*)/ism", '$2[$1]', $text); - $text = preg_replace("/(\s*)\[\/(\w*)\]/ism", '[/$2]$1', $text); - - // Extract the private images which use data urls since preg has issues with - // large data sizes. Stash them away while we do bbcode conversion, and then put them back - // in after we've done all the regex matching. We cannot use any preg functions to do this. - - $extracted = self::extractImagesFromItemBody($text); - $text = $extracted['body']; - $saved_image = $extracted['images']; - - // If we find any event code, turn it into an event. - // After we're finished processing the bbcode we'll - // replace all of the event code with a reformatted version. - - $ev = Event::fromBBCode($text); - - // Replace any html brackets with HTML Entities to prevent executing HTML or script - // Don't use strip_tags here because it breaks [url] search by replacing & with amp - - $text = str_replace("<", "<", $text); - $text = str_replace(">", ">", $text); - - // remove some newlines before the general conversion - $text = preg_replace("/\s?\[share(.*?)\]\s?(.*?)\s?\[\/share\]\s?/ism", "[share$1]$2[/share]", $text); - $text = preg_replace("/\s?\[quote(.*?)\]\s?(.*?)\s?\[\/quote\]\s?/ism", "[quote$1]$2[/quote]", $text); - - // when the content is meant exporting to other systems then remove the avatar picture since this doesn't really look good on these systems - if (!$try_oembed) { - $text = preg_replace("/\[share(.*?)avatar\s?=\s?'.*?'\s?(.*?)\]\s?(.*?)\s?\[\/share\]\s?/ism", "\n[share$1$2]$3[/share]", $text); - } - - // Convert new line chars to html
tags - - // nlbr seems to be hopelessly messed up - // $Text = nl2br($Text); - - // We'll emulate it. - - $text = trim($text); - $text = str_replace("\r\n", "\n", $text); - - // Remove linefeeds inside of the table elements. See issue #6799 - $search = ["\n[th]", "[th]\n", " [th]", "\n[/th]", "[/th]\n", "[/th] ", - "\n[td]", "[td]\n", " [td]", "\n[/td]", "[/td]\n", "[/td] ", - "\n[tr]", "[tr]\n", " [tr]", "[tr] ", "\n[/tr]", "[/tr]\n", " [/tr]", "[/tr] ", - "[table]\n", "[table] ", " [table]", "\n[/table]", " [/table]", "[/table] "]; - $replace = ["[th]", "[th]", "[th]", "[/th]", "[/th]", "[/th]", - "[td]", "[td]", "[td]", "[/td]", "[/td]", "[/td]", - "[tr]", "[tr]", "[tr]", "[tr]", "[/tr]", "[/tr]", "[/tr]", "[/tr]", - "[table]", "[table]", "[table]", "[/table]", "[/table]", "[/table]"]; - do { - $oldtext = $text; - $text = str_replace($search, $replace, $text); - } while ($oldtext != $text); - - // Replace these here only once - $search = ["\n[table]", "[/table]\n"]; - $replace = ["[table]", "[/table]"]; - $text = str_replace($search, $replace, $text); - - // removing multiplicated newlines - if (DI::config()->get('system', 'remove_multiplicated_lines')) { - $search = ["\n\n\n", "\n ", " \n", "[/quote]\n\n", "\n[/quote]", "[/li]\n", "\n[li]", "\n[ul]", "[/ul]\n", "\n\n[share ", "[/attachment]\n", - "\n[h1]", "[/h1]\n", "\n[h2]", "[/h2]\n", "\n[h3]", "[/h3]\n", "\n[h4]", "[/h4]\n", "\n[h5]", "[/h5]\n", "\n[h6]", "[/h6]\n"]; - $replace = ["\n\n", "\n", "\n", "[/quote]\n", "[/quote]", "[/li]", "[li]", "[ul]", "[/ul]", "\n[share ", "[/attachment]", - "[h1]", "[/h1]", "[h2]", "[/h2]", "[h3]", "[/h3]", "[h4]", "[/h4]", "[h5]", "[/h5]", "[h6]", "[/h6]"]; - do { - $oldtext = $text; - $text = str_replace($search, $replace, $text); - } while ($oldtext != $text); - } - - /// @todo Have a closer look at the different html modes - // Handle attached links or videos - if ($simple_html == self::ACTIVITYPUB) { - $text = self::removeAttachment($text); - } elseif (!in_array($simple_html, [self::INTERNAL, self::CONNECTORS])) { - $text = self::removeAttachment($text, true); - } else { - $text = self::convertAttachment($text, $simple_html, $try_oembed); - } - - // leave open the posibility of [map=something] - // this is replaced in Item::prepareBody() which has knowledge of the item location - if (strpos($text, '[/map]') !== false) { - $text = preg_replace_callback( - "/\[map\](.*?)\[\/map\]/ism", - function ($match) use ($simple_html) { - return str_replace($match[0], '

' . Map::byLocation($match[1], $simple_html) . '

', $match[0]); - }, - $text - ); - } - - if (strpos($text, '[map=') !== false) { - $text = preg_replace_callback( - "/\[map=(.*?)\]/ism", - function ($match) use ($simple_html) { - return str_replace($match[0], '

' . Map::byCoordinates(str_replace('/', ' ', $match[1]), $simple_html) . '

', $match[0]); - }, - $text - ); - } - - if (strpos($text, '[map]') !== false) { - $text = preg_replace("/\[map\]/", '

', $text); - } - - // Check for headers - $text = preg_replace("(\[h1\](.*?)\[\/h1\])ism", '

$1

', $text); - $text = preg_replace("(\[h2\](.*?)\[\/h2\])ism", '

$1

', $text); - $text = preg_replace("(\[h3\](.*?)\[\/h3\])ism", '

$1

', $text); - $text = preg_replace("(\[h4\](.*?)\[\/h4\])ism", '

$1

', $text); - $text = preg_replace("(\[h5\](.*?)\[\/h5\])ism", '
$1
', $text); - $text = preg_replace("(\[h6\](.*?)\[\/h6\])ism", '
$1
', $text); - - // Check for paragraph - $text = preg_replace("(\[p\](.*?)\[\/p\])ism", '

$1

', $text); - - // Check for bold text - $text = preg_replace("(\[b\](.*?)\[\/b\])ism", '$1', $text); - - // Check for Italics text - $text = preg_replace("(\[i\](.*?)\[\/i\])ism", '$1', $text); - - // Check for Underline text - $text = preg_replace("(\[u\](.*?)\[\/u\])ism", '$1', $text); - - // Check for strike-through text - $text = preg_replace("(\[s\](.*?)\[\/s\])ism", '$1', $text); - - // Check for over-line text - $text = preg_replace("(\[o\](.*?)\[\/o\])ism", '$1', $text); - - // Check for colored text - $text = preg_replace("(\[color=(.*?)\](.*?)\[\/color\])ism", "$2", $text); - - // Check for sized text - // [size=50] --> font-size: 50px (with the unit). - if ($simple_html != self::DIASPORA) { - $text = preg_replace("(\[size=(\d*?)\](.*?)\[\/size\])ism", "$2", $text); - $text = preg_replace("(\[size=(.*?)\](.*?)\[\/size\])ism", "$2", $text); - } else { - // Issue 2199: Diaspora doesn't interpret the construct above, nor the or element - $text = preg_replace("(\[size=(.*?)\](.*?)\[\/size\])ism", "$2", $text); - } - - - // Check for centered text - $text = preg_replace("(\[center\](.*?)\[\/center\])ism", "
$1
", $text); - - // Check for list text - $text = str_replace("[*]", "
  • ", $text); - - // Check for style sheet commands - $text = preg_replace_callback( - "(\[style=(.*?)\](.*?)\[\/style\])ism", - function ($match) { - return "" . $match[2] . ""; - }, - $text - ); - - // Check for CSS classes - $text = preg_replace_callback( - "(\[class=(.*?)\](.*?)\[\/class\])ism", - function ($match) { - return "" . $match[2] . ""; - }, - $text - ); - - // handle nested lists - $endlessloop = 0; - - while ((((strpos($text, "[/list]") !== false) && (strpos($text, "[list") !== false)) || - ((strpos($text, "[/ol]") !== false) && (strpos($text, "[ol]") !== false)) || - ((strpos($text, "[/ul]") !== false) && (strpos($text, "[ul]") !== false)) || - ((strpos($text, "[/li]") !== false) && (strpos($text, "[li]") !== false))) && (++$endlessloop < 20)) { - $text = preg_replace("/\[list\](.*?)\[\/list\]/ism", '
      $1
    ', $text); - $text = preg_replace("/\[list=\](.*?)\[\/list\]/ism", '
      $1
    ', $text); - $text = preg_replace("/\[list=1\](.*?)\[\/list\]/ism", '
      $1
    ', $text); - $text = preg_replace("/\[list=((?-i)i)\](.*?)\[\/list\]/ism", '
      $2
    ', $text); - $text = preg_replace("/\[list=((?-i)I)\](.*?)\[\/list\]/ism", '
      $2
    ', $text); - $text = preg_replace("/\[list=((?-i)a)\](.*?)\[\/list\]/ism", '
      $2
    ', $text); - $text = preg_replace("/\[list=((?-i)A)\](.*?)\[\/list\]/ism", '
      $2
    ', $text); - $text = preg_replace("/\[ul\](.*?)\[\/ul\]/ism", '
      $1
    ', $text); - $text = preg_replace("/\[ol\](.*?)\[\/ol\]/ism", '
      $1
    ', $text); - $text = preg_replace("/\[li\](.*?)\[\/li\]/ism", '
  • $1
  • ', $text); - } - - $text = preg_replace("/\[th\](.*?)\[\/th\]/sm", '$1', $text); - $text = preg_replace("/\[td\](.*?)\[\/td\]/sm", '$1', $text); - $text = preg_replace("/\[tr\](.*?)\[\/tr\]/sm", '$1', $text); - $text = preg_replace("/\[table\](.*?)\[\/table\]/sm", '$1
    ', $text); - - $text = preg_replace("/\[table border=1\](.*?)\[\/table\]/sm", '$1
    ', $text); - $text = preg_replace("/\[table border=0\](.*?)\[\/table\]/sm", '$1
    ', $text); - - $text = str_replace('[hr]', '
    ', $text); - - if (!$for_plaintext) { - $escaped = []; - - // Escaping BBCodes susceptible to contain rogue URL we don'' want the autolinker to catch - $text = preg_replace_callback('#\[(url|img|audio|video|youtube|vimeo|share|attachment|iframe|bookmark).+?\[/\1\]#ism', - function ($matches) use (&$escaped) { - $return = '{escaped-' . count($escaped) . '}'; - $escaped[] = $matches[0]; + try { + $return = OEmbed::getHTML($url, $title); + } catch (Exception $ex) { + $return = $match[0]; + } return $return; - }, - $text - ); - - // Autolinker for isolated URLs - $text = preg_replace(Strings::autoLinkRegEx(), '[url]$1[/url]', $text); - - // Restoring escaped blocks - $text = preg_replace_callback('/{escaped-([0-9]+)}/iU', - function ($matches) use ($escaped) { - return $escaped[intval($matches[1])] ?? $matches[0]; - }, - $text - ); - } - - // This is actually executed in Item::prepareBody() - - $nosmile = strpos($text, '[nosmile]') !== false; - $text = str_replace('[nosmile]', '', $text); - - // Check for font change text - $text = preg_replace("/\[font=(.*?)\](.*?)\[\/font\]/sm", "$2", $text); - - // Declare the format for [spoiler] layout - $SpoilerLayout = '
    ' . DI::l10n()->t('Click to open/close') . '$1
    '; - - // Check for [spoiler] text - // handle nested quotes - $endlessloop = 0; - while ((strpos($text, "[/spoiler]") !== false) && (strpos($text, "[spoiler]") !== false) && (++$endlessloop < 20)) { - $text = preg_replace("/\[spoiler\](.*?)\[\/spoiler\]/ism", $SpoilerLayout, $text); - } - - // Check for [spoiler=Title] text - - // handle nested quotes - $endlessloop = 0; - while ((strpos($text, "[/spoiler]")!== false) && (strpos($text, "[spoiler=") !== false) && (++$endlessloop < 20)) { - $text = preg_replace("/\[spoiler=[\"\']*(.*?)[\"\']*\](.*?)\[\/spoiler\]/ism", - '
    $1$2
    ', - $text); - } - - // Declare the format for [quote] layout - $QuoteLayout = '
    $1
    '; - - // Check for [quote] text - // handle nested quotes - $endlessloop = 0; - while ((strpos($text, "[/quote]") !== false) && (strpos($text, "[quote]") !== false) && (++$endlessloop < 20)) { - $text = preg_replace("/\[quote\](.*?)\[\/quote\]/ism", "$QuoteLayout", $text); - } - - // Check for [quote=Author] text - - $t_wrote = DI::l10n()->t('$1 wrote:'); - - // handle nested quotes - $endlessloop = 0; - while ((strpos($text, "[/quote]")!== false) && (strpos($text, "[quote=") !== false) && (++$endlessloop < 20)) { - $text = preg_replace("/\[quote=[\"\']*(.*?)[\"\']*\](.*?)\[\/quote\]/ism", - "

    " . $t_wrote . "

    $2
    ", - $text); - } + }; - // [img=widthxheight]image source[/img] - $text = preg_replace_callback( - "/\[img\=([0-9]*)x([0-9]*)\](.*?)\[\/img\]/ism", - function ($matches) use ($simple_html) { - if (strpos($matches[3], "data:image/") === 0) { - return $matches[0]; + + // Remove the abstract element. It is a non visible element. + $text = self::stripAbstract($text); + + // Move all spaces out of the tags + $text = preg_replace("/\[(\w*)\](\s*)/ism", '$2[$1]', $text); + $text = preg_replace("/(\s*)\[\/(\w*)\]/ism", '[/$2]$1', $text); + + // Extract the private images which use data urls since preg has issues with + // large data sizes. Stash them away while we do bbcode conversion, and then put them back + // in after we've done all the regex matching. We cannot use any preg functions to do this. + + $extracted = self::extractImagesFromItemBody($text); + $text = $extracted['body']; + $saved_image = $extracted['images']; + + // If we find any event code, turn it into an event. + // After we're finished processing the bbcode we'll + // replace all of the event code with a reformatted version. + + $ev = Event::fromBBCode($text); + + // Replace any html brackets with HTML Entities to prevent executing HTML or script + // Don't use strip_tags here because it breaks [url] search by replacing & with amp + + $text = str_replace("<", "<", $text); + $text = str_replace(">", ">", $text); + + // remove some newlines before the general conversion + $text = preg_replace("/\s?\[share(.*?)\]\s?(.*?)\s?\[\/share\]\s?/ism", "[share$1]$2[/share]", $text); + $text = preg_replace("/\s?\[quote(.*?)\]\s?(.*?)\s?\[\/quote\]\s?/ism", "[quote$1]$2[/quote]", $text); + + // when the content is meant exporting to other systems then remove the avatar picture since this doesn't really look good on these systems + if (!$try_oembed) { + $text = preg_replace("/\[share(.*?)avatar\s?=\s?'.*?'\s?(.*?)\]\s?(.*?)\s?\[\/share\]\s?/ism", "\n[share$1$2]$3[/share]", $text); } - $matches[3] = self::proxyUrl($matches[3], $simple_html); - return "[img=" . $matches[1] . "x" . $matches[2] . "]" . $matches[3] . "[/img]"; - }, - $text - ); + // Convert new line chars to html
    tags - $text = preg_replace("/\[img\=([0-9]*)x([0-9]*)\](.*?)\[\/img\]/ism", '', $text); - $text = preg_replace("/\[zmg\=([0-9]*)x([0-9]*)\](.*?)\[\/zmg\]/ism", '', $text); + // nlbr seems to be hopelessly messed up + // $Text = nl2br($Text); - $text = preg_replace_callback("/\[img\=(.*?)\](.*?)\[\/img\]/ism", - function ($matches) use ($simple_html) { - $matches[1] = self::proxyUrl($matches[1], $simple_html); - $matches[2] = htmlspecialchars($matches[2], ENT_COMPAT); - return '' . $matches[2] . ''; - }, - $text); + // We'll emulate it. - // Images - // [img]pathtoimage[/img] - $text = preg_replace_callback( - "/\[img\](.*?)\[\/img\]/ism", - function ($matches) use ($simple_html) { - if (strpos($matches[1], "data:image/") === 0) { - return $matches[0]; + $text = trim($text); + $text = str_replace("\r\n", "\n", $text); + + // Remove linefeeds inside of the table elements. See issue #6799 + $search = ["\n[th]", "[th]\n", " [th]", "\n[/th]", "[/th]\n", "[/th] ", + "\n[td]", "[td]\n", " [td]", "\n[/td]", "[/td]\n", "[/td] ", + "\n[tr]", "[tr]\n", " [tr]", "[tr] ", "\n[/tr]", "[/tr]\n", " [/tr]", "[/tr] ", + "[table]\n", "[table] ", " [table]", "\n[/table]", " [/table]", "[/table] "]; + $replace = ["[th]", "[th]", "[th]", "[/th]", "[/th]", "[/th]", + "[td]", "[td]", "[td]", "[/td]", "[/td]", "[/td]", + "[tr]", "[tr]", "[tr]", "[tr]", "[/tr]", "[/tr]", "[/tr]", "[/tr]", + "[table]", "[table]", "[table]", "[/table]", "[/table]", "[/table]"]; + do { + $oldtext = $text; + $text = str_replace($search, $replace, $text); + } while ($oldtext != $text); + + // Replace these here only once + $search = ["\n[table]", "[/table]\n"]; + $replace = ["[table]", "[/table]"]; + $text = str_replace($search, $replace, $text); + + // removing multiplicated newlines + if (DI::config()->get('system', 'remove_multiplicated_lines')) { + $search = ["\n\n\n", "\n ", " \n", "[/quote]\n\n", "\n[/quote]", "[/li]\n", "\n[li]", "\n[ul]", "[/ul]\n", "\n\n[share ", "[/attachment]\n", + "\n[h1]", "[/h1]\n", "\n[h2]", "[/h2]\n", "\n[h3]", "[/h3]\n", "\n[h4]", "[/h4]\n", "\n[h5]", "[/h5]\n", "\n[h6]", "[/h6]\n"]; + $replace = ["\n\n", "\n", "\n", "[/quote]\n", "[/quote]", "[/li]", "[li]", "[ul]", "[/ul]", "\n[share ", "[/attachment]", + "[h1]", "[/h1]", "[h2]", "[/h2]", "[h3]", "[/h3]", "[h4]", "[/h4]", "[h5]", "[/h5]", "[h6]", "[/h6]"]; + do { + $oldtext = $text; + $text = str_replace($search, $replace, $text); + } while ($oldtext != $text); } - $matches[1] = self::proxyUrl($matches[1], $simple_html); - return "[img]" . $matches[1] . "[/img]"; - }, - $text - ); - - $text = preg_replace("/\[img\](.*?)\[\/img\]/ism", '' . DI::l10n()->t('Image/photo') . '', $text); - $text = preg_replace("/\[zmg\](.*?)\[\/zmg\]/ism", '' . DI::l10n()->t('Image/photo') . '', $text); - - $text = preg_replace("/\[crypt\](.*?)\[\/crypt\]/ism", '
    ' . DI::l10n()->t('Encrypted content') . '
    ', $text); - $text = preg_replace("/\[crypt(.*?)\](.*?)\[\/crypt\]/ism", '
    ' . DI::l10n()->t('Encrypted content') . '
    ', $text); - //$Text = preg_replace("/\[crypt=(.*?)\](.*?)\[\/crypt\]/ism", '
    ' . DI::l10n()->t('Encrypted content') . '
    ', $Text); - - // Simplify "video" element - $text = preg_replace('(\[video.*?\ssrc\s?=\s?([^\s\]]+).*?\].*?\[/video\])ism', '[video]$1[/video]', $text); - - // Try to Oembed - if ($try_oembed) { - $text = preg_replace("/\[video\](.*?\.(ogg|ogv|oga|ogm|webm|mp4).*?)\[\/video\]/ism", '', $text); - $text = preg_replace("/\[audio\](.*?)\[\/audio\]/ism", '', $text); - - $text = preg_replace_callback("/\[video\](.*?)\[\/video\]/ism", $try_oembed_callback, $text); - $text = preg_replace_callback("/\[audio\](.*?)\[\/audio\]/ism", $try_oembed_callback, $text); - } else { - $text = preg_replace("/\[video\](.*?)\[\/video\]/ism", - '$1', $text); - $text = preg_replace("/\[audio\](.*?)\[\/audio\]/ism", - '$1', $text); - } - - // html5 video and audio - - - if ($try_oembed) { - $text = preg_replace("/\[iframe\](.*?)\[\/iframe\]/ism", '', $text); - } else { - $text = preg_replace("/\[iframe\](.*?)\[\/iframe\]/ism", '$1', $text); - } - - // Youtube extensions - if ($try_oembed) { - $text = preg_replace_callback("/\[youtube\](https?:\/\/www.youtube.com\/watch\?v\=.*?)\[\/youtube\]/ism", $try_oembed_callback, $text); - $text = preg_replace_callback("/\[youtube\](www.youtube.com\/watch\?v\=.*?)\[\/youtube\]/ism", $try_oembed_callback, $text); - $text = preg_replace_callback("/\[youtube\](https?:\/\/youtu.be\/.*?)\[\/youtube\]/ism", $try_oembed_callback, $text); - } - - $text = preg_replace("/\[youtube\]https?:\/\/www.youtube.com\/watch\?v\=(.*?)\[\/youtube\]/ism", '[youtube]$1[/youtube]', $text); - $text = preg_replace("/\[youtube\]https?:\/\/www.youtube.com\/embed\/(.*?)\[\/youtube\]/ism", '[youtube]$1[/youtube]', $text); - $text = preg_replace("/\[youtube\]https?:\/\/youtu.be\/(.*?)\[\/youtube\]/ism", '[youtube]$1[/youtube]', $text); - - if ($try_oembed) { - $text = preg_replace("/\[youtube\]([A-Za-z0-9\-_=]+)(.*?)\[\/youtube\]/ism", '', $text); - } else { - $text = preg_replace("/\[youtube\]([A-Za-z0-9\-_=]+)(.*?)\[\/youtube\]/ism", - 'https://www.youtube.com/watch?v=$1', $text); - } - - if ($try_oembed) { - $text = preg_replace_callback("/\[vimeo\](https?:\/\/player.vimeo.com\/video\/[0-9]+).*?\[\/vimeo\]/ism", $try_oembed_callback, $text); - $text = preg_replace_callback("/\[vimeo\](https?:\/\/vimeo.com\/[0-9]+).*?\[\/vimeo\]/ism", $try_oembed_callback, $text); - } - - $text = preg_replace("/\[vimeo\]https?:\/\/player.vimeo.com\/video\/([0-9]+)(.*?)\[\/vimeo\]/ism", '[vimeo]$1[/vimeo]', $text); - $text = preg_replace("/\[vimeo\]https?:\/\/vimeo.com\/([0-9]+)(.*?)\[\/vimeo\]/ism", '[vimeo]$1[/vimeo]', $text); - - if ($try_oembed) { - $text = preg_replace("/\[vimeo\]([0-9]+)(.*?)\[\/vimeo\]/ism", '', $text); - } else { - $text = preg_replace("/\[vimeo\]([0-9]+)(.*?)\[\/vimeo\]/ism", - 'https://vimeo.com/$1', $text); - } - - // oembed tag - $text = OEmbed::BBCode2HTML($text); - - // Avoid triple linefeeds through oembed - $text = str_replace("


    ", "

    ", $text); - - // If we found an event earlier, strip out all the event code and replace with a reformatted version. - // Replace the event-start section with the entire formatted event. The other bbcode is stripped. - // Summary (e.g. title) is required, earlier revisions only required description (in addition to - // start which is always required). Allow desc with a missing summary for compatibility. - - if ((!empty($ev['desc']) || !empty($ev['summary'])) && !empty($ev['start'])) { - $sub = Event::getHTML($ev, $simple_html); - - $text = preg_replace("/\[event\-summary\](.*?)\[\/event\-summary\]/ism", '', $text); - $text = preg_replace("/\[event\-description\](.*?)\[\/event\-description\]/ism", '', $text); - $text = preg_replace("/\[event\-start\](.*?)\[\/event\-start\]/ism", $sub, $text); - $text = preg_replace("/\[event\-finish\](.*?)\[\/event\-finish\]/ism", '', $text); - $text = preg_replace("/\[event\-location\](.*?)\[\/event\-location\]/ism", '', $text); - $text = preg_replace("/\[event\-adjust\](.*?)\[\/event\-adjust\]/ism", '', $text); - $text = preg_replace("/\[event\-id\](.*?)\[\/event\-id\]/ism", '', $text); - } - - // Replace non graphical smilies for external posts - if (!$nosmile && !$for_plaintext) { - $text = Smilies::replace($text); - } - - if (!$for_plaintext && DI::config()->get('system', 'big_emojis') && ($simple_html != self::DIASPORA)) { - $conv = html_entity_decode(str_replace([' ', "\n", "\r"], '', $text)); - // Emojis are always 4 byte Unicode characters - if (!empty($conv) && (strlen($conv) / mb_strlen($conv) == 4)) { - $text = '' . $text . ''; - } - } - - if (!$for_plaintext) { - if (in_array($simple_html, [self::OSTATUS, self::ACTIVITYPUB])) { - $text = preg_replace_callback("/\[url\](.*?)\[\/url\]/ism", 'self::convertUrlForActivityPubCallback', $text); - $text = preg_replace_callback("/\[url\=(.*?)\](.*?)\[\/url\]/ism", 'self::convertUrlForActivityPubCallback', $text); - } - } else { - $text = preg_replace("(\[url\](.*?)\[\/url\])ism", " $1 ", $text); - $text = preg_replace_callback("&\[url=([^\[\]]*)\]\[img\](.*)\[\/img\]\[\/url\]&Usi", 'self::removePictureLinksCallback', $text); - } - - $text = str_replace(["\r","\n"], ['
    ', '
    '], $text); - - // Remove all hashtag addresses - if ($simple_html && !in_array($simple_html, [self::DIASPORA, self::OSTATUS, self::ACTIVITYPUB])) { - $text = preg_replace("/([#@!])\[url\=(.*?)\](.*?)\[\/url\]/ism", '$1$3', $text); - } elseif ($simple_html == self::DIASPORA) { - // The ! is converted to @ since Diaspora only understands the @ - $text = preg_replace("/([@!])\[url\=(.*?)\](.*?)\[\/url\]/ism", - '@$3', - $text); - } elseif (in_array($simple_html, [self::OSTATUS, self::ACTIVITYPUB])) { - $text = preg_replace("/([@!])\[url\=(.*?)\](.*?)\[\/url\]/ism", - '$1$3', - $text); - } elseif (!$simple_html) { - $text = preg_replace("/([@!])\[url\=(.*?)\](.*?)\[\/url\]/ism", - '$1$3', - $text); - } - - // Bookmarks in red - will be converted to bookmarks in friendica - $text = preg_replace("/#\^\[url\](.*?)\[\/url\]/ism", '[bookmark=$1]$1[/bookmark]', $text); - $text = preg_replace("/#\^\[url\=(.*?)\](.*?)\[\/url\]/ism", '[bookmark=$1]$2[/bookmark]', $text); - $text = preg_replace("/#\[url\=.*?\]\^\[\/url\]\[url\=(.*?)\](.*?)\[\/url\]/i", - "[bookmark=$1]$2[/bookmark]", $text); - - if (in_array($simple_html, [self::API, self::OSTATUS, self::TWITTER])) { - $text = preg_replace_callback("/([^#@!])\[url\=([^\]]*)\](.*?)\[\/url\]/ism", "self::expandLinksCallback", $text); - //$Text = preg_replace("/[^#@!]\[url\=([^\]]*)\](.*?)\[\/url\]/ism", ' $2 [url]$1[/url]', $Text); - $text = preg_replace("/\[bookmark\=([^\]]*)\](.*?)\[\/bookmark\]/ism", ' $2 [url]$1[/url]',$text); - } - - // Perform URL Search - if ($try_oembed) { - $text = preg_replace_callback("/\[bookmark\=([^\]]*)\](.*?)\[\/bookmark\]/ism", $try_oembed_callback, $text); - } - - $text = preg_replace("/\[bookmark\=([^\]]*)\](.*?)\[\/bookmark\]/ism", '[url=$1]$2[/url]', $text); - - // Handle Diaspora posts - $text = preg_replace_callback( - "&\[url=/?posts/([^\[\]]*)\](.*)\[\/url\]&Usi", - function ($match) { - return "[url=" . DI::baseUrl() . "/display/" . $match[1] . "]" . $match[2] . "[/url]"; - }, $text - ); - - $text = preg_replace_callback( - "&\[url=/people\?q\=(.*)\](.*)\[\/url\]&Usi", - function ($match) { - return "[url=" . DI::baseUrl() . "/search?search=%40" . $match[1] . "]" . $match[2] . "[/url]"; - }, $text - ); - - // Server independent link to posts and comments - // See issue: https://github.com/diaspora/diaspora_federation/issues/75 - $expression = "=diaspora://.*?/post/([0-9A-Za-z\-_@.:]{15,254}[0-9A-Za-z])=ism"; - $text = preg_replace($expression, DI::baseUrl()."/display/$1", $text); - - /* Tag conversion - * Supports: - * - #[url=][/url] - * - [url=]#[/url] - */ - $text = preg_replace_callback("/(?:#\[url\=[^\[\]]*\]|\[url\=[^\[\]]*\]#)(.*?)\[\/url\]/ism", function($matches) use ($simple_html) { - if ($simple_html == BBCode::ACTIVITYPUB) { - return '#' - . XML::escape($matches[1]) . ''; - } else { - return '#'; - } - }, $text); - - // We need no target="_blank" rel="noopener noreferrer" for local links - // convert links start with DI::baseUrl() as local link without the target="_blank" rel="noopener noreferrer" attribute - $escapedBaseUrl = preg_quote(DI::baseUrl(), '/'); - $text = preg_replace("/\[url\](".$escapedBaseUrl.".*?)\[\/url\]/ism", '$1', $text); - $text = preg_replace("/\[url\=(".$escapedBaseUrl.".*?)\](.*?)\[\/url\]/ism", '$2', $text); - - $text = preg_replace("/\[url\](.*?)\[\/url\]/ism", '$1', $text); - $text = preg_replace("/\[url\=(.*?)\](.*?)\[\/url\]/ism", '$2', $text); - - // Red compatibility, though the link can't be authenticated on Friendica - $text = preg_replace("/\[zrl\=(.*?)\](.*?)\[\/zrl\]/ism", '$2', $text); - - - // we may need to restrict this further if it picks up too many strays - // link acct:user@host to a webfinger profile redirector - - $text = preg_replace('/acct:([^@]+)@((?!\-)(?:[a-zA-Z\d\-]{0,62}[a-zA-Z\d]\.){1,126}(?!\d+)[a-zA-Z\d]{1,63})/', 'acct:$1@$2', $text); - - // Perform MAIL Search - $text = preg_replace("/\[mail\](.*?)\[\/mail\]/", '$1', $text); - $text = preg_replace("/\[mail\=(.*?)\](.*?)\[\/mail\]/", '$2', $text); - - // Unhide all [noparse] contained bbtags unspacefying them - // and triming the [noparse] tag. - - $text = preg_replace_callback("/\[noparse\](.*?)\[\/noparse\]/ism", 'self::unescapeNoparseCallback', $text); - $text = preg_replace_callback("/\[nobb\](.*?)\[\/nobb\]/ism", 'self::unescapeNoparseCallback', $text); - $text = preg_replace_callback("/\[pre\](.*?)\[\/pre\]/ism", 'self::unescapeNoparseCallback', $text); - - /// @todo What is the meaning of these lines? - $text = preg_replace('/\[\&\;([#a-z0-9]+)\;\]/', '&$1;', $text); - $text = preg_replace('/\&\#039\;/', '\'', $text); - - // Currently deactivated, it made problems with " inside of alt texts. - //$text = preg_replace('/\"\;/', '"', $text); - - // fix any escaped ampersands that may have been converted into links - $text = preg_replace('/\<([^>]*?)(src|href)=(.*?)\&\;(.*?)\>/ism', '<$1$2=$3&$4>', $text); - - // sanitizes src attributes (http and redir URLs for displaying in a web page, cid used for inline images in emails) - $allowed_src_protocols = ['//', 'http://', 'https://', 'redir/', 'cid:']; - - array_walk($allowed_src_protocols, function(&$value) { $value = preg_quote($value, '#');}); - - $text = preg_replace('#<([^>]*?)(src)="(?!' . implode('|', $allowed_src_protocols) . ')(.*?)"(.*?)>#ism', - '<$1$2=""$4 data-original-src="$3" class="invalid-src" title="' . DI::l10n()->t('Invalid source protocol') . '">', $text); - - // sanitize href attributes (only allowlisted protocols URLs) - // default value for backward compatibility - $allowed_link_protocols = DI::config()->get('system', 'allowed_link_protocols', []); - - // Always allowed protocol even if config isn't set or not including it - $allowed_link_protocols[] = '//'; - $allowed_link_protocols[] = 'http://'; - $allowed_link_protocols[] = 'https://'; - $allowed_link_protocols[] = 'redir/'; - - array_walk($allowed_link_protocols, function(&$value) { $value = preg_quote($value, '#');}); - - $regex = '#<([^>]*?)(href)="(?!' . implode('|', $allowed_link_protocols) . ')(.*?)"(.*?)>#ism'; - $text = preg_replace($regex, '<$1$2="javascript:void(0)"$4 data-original-href="$3" class="invalid-href" title="' . DI::l10n()->t('Invalid link protocol') . '">', $text); - - // Shared content - $text = self::convertShare( - $text, - function (array $attributes, array $author_contact, $content, $is_quote_share) use ($simple_html) { - return self::convertShareCallback($attributes, $author_contact, $content, $is_quote_share, $simple_html); - } - ); - - if ($saved_image) { - $text = self::interpolateSavedImagesIntoItemBody($text, $saved_image); - } - - // Restore code blocks - $text = preg_replace_callback('/#codeblock-([0-9]+)#/iU', - function ($matches) use ($codeblocks) { - $return = $matches[0]; - if (isset($codeblocks[intval($matches[1])])) { - $return = $codeblocks[$matches[1]]; + /// @todo Have a closer look at the different html modes + // Handle attached links or videos + if ($simple_html == self::ACTIVITYPUB) { + $text = self::removeAttachment($text); + } elseif (!in_array($simple_html, [self::INTERNAL, self::CONNECTORS])) { + $text = self::removeAttachment($text, true); + } else { + $text = self::convertAttachment($text, $simple_html, $try_oembed); } + + // leave open the posibility of [map=something] + // this is replaced in Item::prepareBody() which has knowledge of the item location + if (strpos($text, '[/map]') !== false) { + $text = preg_replace_callback( + "/\[map\](.*?)\[\/map\]/ism", + function ($match) use ($simple_html) { + return str_replace($match[0], '

    ' . Map::byLocation($match[1], $simple_html) . '

    ', $match[0]); + }, + $text + ); + } + + if (strpos($text, '[map=') !== false) { + $text = preg_replace_callback( + "/\[map=(.*?)\]/ism", + function ($match) use ($simple_html) { + return str_replace($match[0], '

    ' . Map::byCoordinates(str_replace('/', ' ', $match[1]), $simple_html) . '

    ', $match[0]); + }, + $text + ); + } + + if (strpos($text, '[map]') !== false) { + $text = preg_replace("/\[map\]/", '

    ', $text); + } + + // Check for headers + $text = preg_replace("(\[h1\](.*?)\[\/h1\])ism", '

    $1

    ', $text); + $text = preg_replace("(\[h2\](.*?)\[\/h2\])ism", '

    $1

    ', $text); + $text = preg_replace("(\[h3\](.*?)\[\/h3\])ism", '

    $1

    ', $text); + $text = preg_replace("(\[h4\](.*?)\[\/h4\])ism", '

    $1

    ', $text); + $text = preg_replace("(\[h5\](.*?)\[\/h5\])ism", '
    $1
    ', $text); + $text = preg_replace("(\[h6\](.*?)\[\/h6\])ism", '
    $1
    ', $text); + + // Check for paragraph + $text = preg_replace("(\[p\](.*?)\[\/p\])ism", '

    $1

    ', $text); + + // Check for bold text + $text = preg_replace("(\[b\](.*?)\[\/b\])ism", '$1', $text); + + // Check for Italics text + $text = preg_replace("(\[i\](.*?)\[\/i\])ism", '$1', $text); + + // Check for Underline text + $text = preg_replace("(\[u\](.*?)\[\/u\])ism", '$1', $text); + + // Check for strike-through text + $text = preg_replace("(\[s\](.*?)\[\/s\])ism", '$1', $text); + + // Check for over-line text + $text = preg_replace("(\[o\](.*?)\[\/o\])ism", '$1', $text); + + // Check for colored text + $text = preg_replace("(\[color=(.*?)\](.*?)\[\/color\])ism", "$2", $text); + + // Check for sized text + // [size=50] --> font-size: 50px (with the unit). + if ($simple_html != self::DIASPORA) { + $text = preg_replace("(\[size=(\d*?)\](.*?)\[\/size\])ism", "$2", $text); + $text = preg_replace("(\[size=(.*?)\](.*?)\[\/size\])ism", "$2", $text); + } else { + // Issue 2199: Diaspora doesn't interpret the construct above, nor the or element + $text = preg_replace("(\[size=(.*?)\](.*?)\[\/size\])ism", "$2", $text); + } + + + // Check for centered text + $text = preg_replace("(\[center\](.*?)\[\/center\])ism", "
    $1
    ", $text); + + // Check for list text + $text = str_replace("[*]", "
  • ", $text); + + // Check for style sheet commands + $text = preg_replace_callback( + "(\[style=(.*?)\](.*?)\[\/style\])ism", + function ($match) { + return "" . $match[2] . ""; + }, + $text + ); + + // Check for CSS classes + $text = preg_replace_callback( + "(\[class=(.*?)\](.*?)\[\/class\])ism", + function ($match) { + return "" . $match[2] . ""; + }, + $text + ); + + // handle nested lists + $endlessloop = 0; + + while ((((strpos($text, "[/list]") !== false) && (strpos($text, "[list") !== false)) || + ((strpos($text, "[/ol]") !== false) && (strpos($text, "[ol]") !== false)) || + ((strpos($text, "[/ul]") !== false) && (strpos($text, "[ul]") !== false)) || + ((strpos($text, "[/li]") !== false) && (strpos($text, "[li]") !== false))) && (++$endlessloop < 20)) { + $text = preg_replace("/\[list\](.*?)\[\/list\]/ism", '
      $1
    ', $text); + $text = preg_replace("/\[list=\](.*?)\[\/list\]/ism", '
      $1
    ', $text); + $text = preg_replace("/\[list=1\](.*?)\[\/list\]/ism", '
      $1
    ', $text); + $text = preg_replace("/\[list=((?-i)i)\](.*?)\[\/list\]/ism", '
      $2
    ', $text); + $text = preg_replace("/\[list=((?-i)I)\](.*?)\[\/list\]/ism", '
      $2
    ', $text); + $text = preg_replace("/\[list=((?-i)a)\](.*?)\[\/list\]/ism", '
      $2
    ', $text); + $text = preg_replace("/\[list=((?-i)A)\](.*?)\[\/list\]/ism", '
      $2
    ', $text); + $text = preg_replace("/\[ul\](.*?)\[\/ul\]/ism", '
      $1
    ', $text); + $text = preg_replace("/\[ol\](.*?)\[\/ol\]/ism", '
      $1
    ', $text); + $text = preg_replace("/\[li\](.*?)\[\/li\]/ism", '
  • $1
  • ', $text); + } + + $text = preg_replace("/\[th\](.*?)\[\/th\]/sm", '$1', $text); + $text = preg_replace("/\[td\](.*?)\[\/td\]/sm", '$1', $text); + $text = preg_replace("/\[tr\](.*?)\[\/tr\]/sm", '$1', $text); + $text = preg_replace("/\[table\](.*?)\[\/table\]/sm", '$1
    ', $text); + + $text = preg_replace("/\[table border=1\](.*?)\[\/table\]/sm", '$1
    ', $text); + $text = preg_replace("/\[table border=0\](.*?)\[\/table\]/sm", '$1
    ', $text); + + $text = str_replace('[hr]', '
    ', $text); + + if (!$for_plaintext) { + $escaped = []; + + // Escaping BBCodes susceptible to contain rogue URL we don'' want the autolinker to catch + $text = preg_replace_callback('#\[(url|img|audio|video|youtube|vimeo|share|attachment|iframe|bookmark).+?\[/\1\]#ism', + function ($matches) use (&$escaped) { + $return = '{escaped-' . count($escaped) . '}'; + $escaped[] = $matches[0]; + + return $return; + }, + $text + ); + + // Autolinker for isolated URLs + $text = preg_replace(Strings::autoLinkRegEx(), '[url]$1[/url]', $text); + + // Restoring escaped blocks + $text = preg_replace_callback('/{escaped-([0-9]+)}/iU', + function ($matches) use ($escaped) { + return $escaped[intval($matches[1])] ?? $matches[0]; + }, + $text + ); + } + + // This is actually executed in Item::prepareBody() + + $nosmile = strpos($text, '[nosmile]') !== false; + $text = str_replace('[nosmile]', '', $text); + + // Check for font change text + $text = preg_replace("/\[font=(.*?)\](.*?)\[\/font\]/sm", "$2", $text); + + // Declare the format for [spoiler] layout + $SpoilerLayout = '
    ' . DI::l10n()->t('Click to open/close') . '$1
    '; + + // Check for [spoiler] text + // handle nested quotes + $endlessloop = 0; + while ((strpos($text, "[/spoiler]") !== false) && (strpos($text, "[spoiler]") !== false) && (++$endlessloop < 20)) { + $text = preg_replace("/\[spoiler\](.*?)\[\/spoiler\]/ism", $SpoilerLayout, $text); + } + + // Check for [spoiler=Title] text + + // handle nested quotes + $endlessloop = 0; + while ((strpos($text, "[/spoiler]")!== false) && (strpos($text, "[spoiler=") !== false) && (++$endlessloop < 20)) { + $text = preg_replace("/\[spoiler=[\"\']*(.*?)[\"\']*\](.*?)\[\/spoiler\]/ism", + '
    $1$2
    ', + $text); + } + + // Declare the format for [quote] layout + $QuoteLayout = '
    $1
    '; + + // Check for [quote] text + // handle nested quotes + $endlessloop = 0; + while ((strpos($text, "[/quote]") !== false) && (strpos($text, "[quote]") !== false) && (++$endlessloop < 20)) { + $text = preg_replace("/\[quote\](.*?)\[\/quote\]/ism", "$QuoteLayout", $text); + } + + // Check for [quote=Author] text + + $t_wrote = DI::l10n()->t('$1 wrote:'); + + // handle nested quotes + $endlessloop = 0; + while ((strpos($text, "[/quote]")!== false) && (strpos($text, "[quote=") !== false) && (++$endlessloop < 20)) { + $text = preg_replace("/\[quote=[\"\']*(.*?)[\"\']*\](.*?)\[\/quote\]/ism", + "

    " . $t_wrote . "

    $2
    ", + $text); + } + + + // [img=widthxheight]image source[/img] + $text = preg_replace_callback( + "/\[img\=([0-9]*)x([0-9]*)\](.*?)\[\/img\]/ism", + function ($matches) use ($simple_html) { + if (strpos($matches[3], "data:image/") === 0) { + return $matches[0]; + } + + $matches[3] = self::proxyUrl($matches[3], $simple_html); + return "[img=" . $matches[1] . "x" . $matches[2] . "]" . $matches[3] . "[/img]"; + }, + $text + ); + + $text = preg_replace("/\[img\=([0-9]*)x([0-9]*)\](.*?)\[\/img\]/ism", '', $text); + $text = preg_replace("/\[zmg\=([0-9]*)x([0-9]*)\](.*?)\[\/zmg\]/ism", '', $text); + + $text = preg_replace_callback("/\[img\=(.*?)\](.*?)\[\/img\]/ism", + function ($matches) use ($simple_html) { + $matches[1] = self::proxyUrl($matches[1], $simple_html); + $matches[2] = htmlspecialchars($matches[2], ENT_COMPAT); + return '' . $matches[2] . ''; + }, + $text); + + // Images + // [img]pathtoimage[/img] + $text = preg_replace_callback( + "/\[img\](.*?)\[\/img\]/ism", + function ($matches) use ($simple_html) { + if (strpos($matches[1], "data:image/") === 0) { + return $matches[0]; + } + + $matches[1] = self::proxyUrl($matches[1], $simple_html); + return "[img]" . $matches[1] . "[/img]"; + }, + $text + ); + + $text = preg_replace("/\[img\](.*?)\[\/img\]/ism", '' . DI::l10n()->t('Image/photo') . '', $text); + $text = preg_replace("/\[zmg\](.*?)\[\/zmg\]/ism", '' . DI::l10n()->t('Image/photo') . '', $text); + + $text = preg_replace("/\[crypt\](.*?)\[\/crypt\]/ism", '
    ' . DI::l10n()->t('Encrypted content') . '
    ', $text); + $text = preg_replace("/\[crypt(.*?)\](.*?)\[\/crypt\]/ism", '
    ' . DI::l10n()->t('Encrypted content') . '
    ', $text); + //$Text = preg_replace("/\[crypt=(.*?)\](.*?)\[\/crypt\]/ism", '
    ' . DI::l10n()->t('Encrypted content') . '
    ', $Text); + + // Simplify "video" element + $text = preg_replace('(\[video.*?\ssrc\s?=\s?([^\s\]]+).*?\].*?\[/video\])ism', '[video]$1[/video]', $text); + + // Try to Oembed + if ($try_oembed) { + $text = preg_replace("/\[video\](.*?\.(ogg|ogv|oga|ogm|webm|mp4).*?)\[\/video\]/ism", '', $text); + $text = preg_replace("/\[audio\](.*?)\[\/audio\]/ism", '', $text); + + $text = preg_replace_callback("/\[video\](.*?)\[\/video\]/ism", $try_oembed_callback, $text); + $text = preg_replace_callback("/\[audio\](.*?)\[\/audio\]/ism", $try_oembed_callback, $text); + } else { + $text = preg_replace("/\[video\](.*?)\[\/video\]/ism", + '$1', $text); + $text = preg_replace("/\[audio\](.*?)\[\/audio\]/ism", + '$1', $text); + } + + // html5 video and audio + + + if ($try_oembed) { + $text = preg_replace("/\[iframe\](.*?)\[\/iframe\]/ism", '', $text); + } else { + $text = preg_replace("/\[iframe\](.*?)\[\/iframe\]/ism", '$1', $text); + } + + // Youtube extensions + if ($try_oembed) { + $text = preg_replace_callback("/\[youtube\](https?:\/\/www.youtube.com\/watch\?v\=.*?)\[\/youtube\]/ism", $try_oembed_callback, $text); + $text = preg_replace_callback("/\[youtube\](www.youtube.com\/watch\?v\=.*?)\[\/youtube\]/ism", $try_oembed_callback, $text); + $text = preg_replace_callback("/\[youtube\](https?:\/\/youtu.be\/.*?)\[\/youtube\]/ism", $try_oembed_callback, $text); + } + + $text = preg_replace("/\[youtube\]https?:\/\/www.youtube.com\/watch\?v\=(.*?)\[\/youtube\]/ism", '[youtube]$1[/youtube]', $text); + $text = preg_replace("/\[youtube\]https?:\/\/www.youtube.com\/embed\/(.*?)\[\/youtube\]/ism", '[youtube]$1[/youtube]', $text); + $text = preg_replace("/\[youtube\]https?:\/\/youtu.be\/(.*?)\[\/youtube\]/ism", '[youtube]$1[/youtube]', $text); + + if ($try_oembed) { + $text = preg_replace("/\[youtube\]([A-Za-z0-9\-_=]+)(.*?)\[\/youtube\]/ism", '', $text); + } else { + $text = preg_replace("/\[youtube\]([A-Za-z0-9\-_=]+)(.*?)\[\/youtube\]/ism", + 'https://www.youtube.com/watch?v=$1', $text); + } + + if ($try_oembed) { + $text = preg_replace_callback("/\[vimeo\](https?:\/\/player.vimeo.com\/video\/[0-9]+).*?\[\/vimeo\]/ism", $try_oembed_callback, $text); + $text = preg_replace_callback("/\[vimeo\](https?:\/\/vimeo.com\/[0-9]+).*?\[\/vimeo\]/ism", $try_oembed_callback, $text); + } + + $text = preg_replace("/\[vimeo\]https?:\/\/player.vimeo.com\/video\/([0-9]+)(.*?)\[\/vimeo\]/ism", '[vimeo]$1[/vimeo]', $text); + $text = preg_replace("/\[vimeo\]https?:\/\/vimeo.com\/([0-9]+)(.*?)\[\/vimeo\]/ism", '[vimeo]$1[/vimeo]', $text); + + if ($try_oembed) { + $text = preg_replace("/\[vimeo\]([0-9]+)(.*?)\[\/vimeo\]/ism", '', $text); + } else { + $text = preg_replace("/\[vimeo\]([0-9]+)(.*?)\[\/vimeo\]/ism", + 'https://vimeo.com/$1', $text); + } + + // oembed tag + $text = OEmbed::BBCode2HTML($text); + + // Avoid triple linefeeds through oembed + $text = str_replace("


    ", "

    ", $text); + + // If we found an event earlier, strip out all the event code and replace with a reformatted version. + // Replace the event-start section with the entire formatted event. The other bbcode is stripped. + // Summary (e.g. title) is required, earlier revisions only required description (in addition to + // start which is always required). Allow desc with a missing summary for compatibility. + + if ((!empty($ev['desc']) || !empty($ev['summary'])) && !empty($ev['start'])) { + $sub = Event::getHTML($ev, $simple_html); + + $text = preg_replace("/\[event\-summary\](.*?)\[\/event\-summary\]/ism", '', $text); + $text = preg_replace("/\[event\-description\](.*?)\[\/event\-description\]/ism", '', $text); + $text = preg_replace("/\[event\-start\](.*?)\[\/event\-start\]/ism", $sub, $text); + $text = preg_replace("/\[event\-finish\](.*?)\[\/event\-finish\]/ism", '', $text); + $text = preg_replace("/\[event\-location\](.*?)\[\/event\-location\]/ism", '', $text); + $text = preg_replace("/\[event\-adjust\](.*?)\[\/event\-adjust\]/ism", '', $text); + $text = preg_replace("/\[event\-id\](.*?)\[\/event\-id\]/ism", '', $text); + } + + // Replace non graphical smilies for external posts + if (!$nosmile && !$for_plaintext) { + $text = Smilies::replace($text); + } + + if (!$for_plaintext && DI::config()->get('system', 'big_emojis') && ($simple_html != self::DIASPORA)) { + $conv = html_entity_decode(str_replace([' ', "\n", "\r"], '', $text)); + // Emojis are always 4 byte Unicode characters + if (!empty($conv) && (strlen($conv) / mb_strlen($conv) == 4)) { + $text = '' . $text . ''; + } + } + + if (!$for_plaintext) { + if (in_array($simple_html, [self::OSTATUS, self::ACTIVITYPUB])) { + $text = preg_replace_callback("/\[url\](.*?)\[\/url\]/ism", 'self::convertUrlForActivityPubCallback', $text); + $text = preg_replace_callback("/\[url\=(.*?)\](.*?)\[\/url\]/ism", 'self::convertUrlForActivityPubCallback', $text); + } + } else { + $text = preg_replace("(\[url\](.*?)\[\/url\])ism", " $1 ", $text); + $text = preg_replace_callback("&\[url=([^\[\]]*)\]\[img\](.*)\[\/img\]\[\/url\]&Usi", 'self::removePictureLinksCallback', $text); + } + + $text = str_replace(["\r","\n"], ['
    ', '
    '], $text); + + // Remove all hashtag addresses + if ($simple_html && !in_array($simple_html, [self::DIASPORA, self::OSTATUS, self::ACTIVITYPUB])) { + $text = preg_replace("/([#@!])\[url\=(.*?)\](.*?)\[\/url\]/ism", '$1$3', $text); + } elseif ($simple_html == self::DIASPORA) { + // The ! is converted to @ since Diaspora only understands the @ + $text = preg_replace("/([@!])\[url\=(.*?)\](.*?)\[\/url\]/ism", + '@$3', + $text); + } elseif (in_array($simple_html, [self::OSTATUS, self::ACTIVITYPUB])) { + $text = preg_replace("/([@!])\[url\=(.*?)\](.*?)\[\/url\]/ism", + '$1$3', + $text); + } elseif (!$simple_html) { + $text = preg_replace("/([@!])\[url\=(.*?)\](.*?)\[\/url\]/ism", + '$1$3', + $text); + } + + // Bookmarks in red - will be converted to bookmarks in friendica + $text = preg_replace("/#\^\[url\](.*?)\[\/url\]/ism", '[bookmark=$1]$1[/bookmark]', $text); + $text = preg_replace("/#\^\[url\=(.*?)\](.*?)\[\/url\]/ism", '[bookmark=$1]$2[/bookmark]', $text); + $text = preg_replace("/#\[url\=.*?\]\^\[\/url\]\[url\=(.*?)\](.*?)\[\/url\]/i", + "[bookmark=$1]$2[/bookmark]", $text); + + if (in_array($simple_html, [self::API, self::OSTATUS, self::TWITTER])) { + $text = preg_replace_callback("/([^#@!])\[url\=([^\]]*)\](.*?)\[\/url\]/ism", "self::expandLinksCallback", $text); + //$Text = preg_replace("/[^#@!]\[url\=([^\]]*)\](.*?)\[\/url\]/ism", ' $2 [url]$1[/url]', $Text); + $text = preg_replace("/\[bookmark\=([^\]]*)\](.*?)\[\/bookmark\]/ism", ' $2 [url]$1[/url]',$text); + } + + // Perform URL Search + if ($try_oembed) { + $text = preg_replace_callback("/\[bookmark\=([^\]]*)\](.*?)\[\/bookmark\]/ism", $try_oembed_callback, $text); + } + + $text = preg_replace("/\[bookmark\=([^\]]*)\](.*?)\[\/bookmark\]/ism", '[url=$1]$2[/url]', $text); + + // Handle Diaspora posts + $text = preg_replace_callback( + "&\[url=/?posts/([^\[\]]*)\](.*)\[\/url\]&Usi", + function ($match) { + return "[url=" . DI::baseUrl() . "/display/" . $match[1] . "]" . $match[2] . "[/url]"; + }, $text + ); + + $text = preg_replace_callback( + "&\[url=/people\?q\=(.*)\](.*)\[\/url\]&Usi", + function ($match) { + return "[url=" . DI::baseUrl() . "/search?search=%40" . $match[1] . "]" . $match[2] . "[/url]"; + }, $text + ); + + // Server independent link to posts and comments + // See issue: https://github.com/diaspora/diaspora_federation/issues/75 + $expression = "=diaspora://.*?/post/([0-9A-Za-z\-_@.:]{15,254}[0-9A-Za-z])=ism"; + $text = preg_replace($expression, DI::baseUrl()."/display/$1", $text); + + /* Tag conversion + * Supports: + * - #[url=][/url] + * - [url=]#[/url] + */ + $text = preg_replace_callback("/(?:#\[url\=[^\[\]]*\]|\[url\=[^\[\]]*\]#)(.*?)\[\/url\]/ism", function($matches) use ($simple_html) { + if ($simple_html == BBCode::ACTIVITYPUB) { + return '#' + . XML::escape($matches[1]) . ''; + } else { + return '#'; + } + }, $text); + + // We need no target="_blank" rel="noopener noreferrer" for local links + // convert links start with DI::baseUrl() as local link without the target="_blank" rel="noopener noreferrer" attribute + $escapedBaseUrl = preg_quote(DI::baseUrl(), '/'); + $text = preg_replace("/\[url\](".$escapedBaseUrl.".*?)\[\/url\]/ism", '$1', $text); + $text = preg_replace("/\[url\=(".$escapedBaseUrl.".*?)\](.*?)\[\/url\]/ism", '$2', $text); + + $text = preg_replace("/\[url\](.*?)\[\/url\]/ism", '$1', $text); + $text = preg_replace("/\[url\=(.*?)\](.*?)\[\/url\]/ism", '$2', $text); + + // Red compatibility, though the link can't be authenticated on Friendica + $text = preg_replace("/\[zrl\=(.*?)\](.*?)\[\/zrl\]/ism", '$2', $text); + + + // we may need to restrict this further if it picks up too many strays + // link acct:user@host to a webfinger profile redirector + + $text = preg_replace('/acct:([^@]+)@((?!\-)(?:[a-zA-Z\d\-]{0,62}[a-zA-Z\d]\.){1,126}(?!\d+)[a-zA-Z\d]{1,63})/', 'acct:$1@$2', $text); + + // Perform MAIL Search + $text = preg_replace("/\[mail\](.*?)\[\/mail\]/", '$1', $text); + $text = preg_replace("/\[mail\=(.*?)\](.*?)\[\/mail\]/", '$2', $text); + + /// @todo What is the meaning of these lines? + $text = preg_replace('/\[\&\;([#a-z0-9]+)\;\]/', '&$1;', $text); + $text = preg_replace('/\&\#039\;/', '\'', $text); + + // Currently deactivated, it made problems with " inside of alt texts. + //$text = preg_replace('/\"\;/', '"', $text); + + // fix any escaped ampersands that may have been converted into links + $text = preg_replace('/\<([^>]*?)(src|href)=(.*?)\&\;(.*?)\>/ism', '<$1$2=$3&$4>', $text); + + // sanitizes src attributes (http and redir URLs for displaying in a web page, cid used for inline images in emails) + $allowed_src_protocols = ['//', 'http://', 'https://', 'redir/', 'cid:']; + + array_walk($allowed_src_protocols, function(&$value) { $value = preg_quote($value, '#');}); + + $text = preg_replace('#<([^>]*?)(src)="(?!' . implode('|', $allowed_src_protocols) . ')(.*?)"(.*?)>#ism', + '<$1$2=""$4 data-original-src="$3" class="invalid-src" title="' . DI::l10n()->t('Invalid source protocol') . '">', $text); + + // sanitize href attributes (only allowlisted protocols URLs) + // default value for backward compatibility + $allowed_link_protocols = DI::config()->get('system', 'allowed_link_protocols', []); + + // Always allowed protocol even if config isn't set or not including it + $allowed_link_protocols[] = '//'; + $allowed_link_protocols[] = 'http://'; + $allowed_link_protocols[] = 'https://'; + $allowed_link_protocols[] = 'redir/'; + + array_walk($allowed_link_protocols, function(&$value) { $value = preg_quote($value, '#');}); + + $regex = '#<([^>]*?)(href)="(?!' . implode('|', $allowed_link_protocols) . ')(.*?)"(.*?)>#ism'; + $text = preg_replace($regex, '<$1$2="javascript:void(0)"$4 data-original-href="$3" class="invalid-href" title="' . DI::l10n()->t('Invalid link protocol') . '">', $text); + + // Shared content + $text = self::convertShare( + $text, + function (array $attributes, array $author_contact, $content, $is_quote_share) use ($simple_html) { + return self::convertShareCallback($attributes, $author_contact, $content, $is_quote_share, $simple_html); + } + ); + + $text = self::interpolateSavedImagesIntoItemBody($text, $saved_image); + + return $text; + }); // Escaped noparse, nobb, pre + + // Remove escaping tags + $text = preg_replace("/\[noparse\](.*?)\[\/noparse\]/ism", '\1', $text); + $text = preg_replace("/\[nobb\](.*?)\[\/nobb\]/ism", '\1', $text); + $text = preg_replace("/\[pre\](.*?)\[\/pre\]/ism", '\1', $text); + + return $text; + }); // Escaped code + + $text = preg_replace_callback("#\[code(?:=([^\]]*))?\](.*?)\[\/code\]#ism", + function ($matches) { + if (strpos($matches[2], "\n") !== false) { + $return = '
    ' . htmlspecialchars(trim($matches[2], "\n\r"), ENT_NOQUOTES, 'UTF-8') . '
    '; + } else { + $return = '' . htmlspecialchars($matches[2], ENT_NOQUOTES, 'UTF-8') . ''; + } + return $return; }, $text @@ -2104,63 +2092,77 @@ class BBCode { $ret = []; - // Convert hashtag links to hashtags - $string = preg_replace('/#\[url\=([^\[\]]*)\](.*?)\[\/url\]/ism', '#$2 ', $string); + BBCode::performWithEscapedTags($string, ['noparse', 'pre', 'code'], function ($string) use (&$ret) { + // Convert hashtag links to hashtags + $string = preg_replace('/#\[url\=([^\[\]]*)\](.*?)\[\/url\]/ism', '#$2 ', $string); - // ignore anything in a code block - $string = preg_replace('/\[code.*?\].*?\[\/code\]/sm', '', $string); + // Force line feeds at bbtags + $string = str_replace(['[', ']'], ["\n[", "]\n"], $string); - // Force line feeds at bbtags - $string = str_replace(['[', ']'], ["\n[", "]\n"], $string); + // ignore anything in a bbtag + $string = preg_replace('/\[(.*?)\]/sm', '', $string); - // ignore anything in a bbtag - $string = preg_replace('/\[(.*?)\]/sm', '', $string); + // Match full names against @tags including the space between first and last + // We will look these up afterward to see if they are full names or not recognisable. - // Match full names against @tags including the space between first and last - // We will look these up afterward to see if they are full names or not recognisable. + if (preg_match_all('/(@[^ \x0D\x0A,:?]+ [^ \x0D\x0A@,:?]+)([ \x0D\x0A@,:?]|$)/', $string, $matches)) { + foreach ($matches[1] as $match) { + if (strstr($match, ']')) { + // we might be inside a bbcode color tag - leave it alone + continue; + } - if (preg_match_all('/(@[^ \x0D\x0A,:?]+ [^ \x0D\x0A@,:?]+)([ \x0D\x0A@,:?]|$)/', $string, $matches)) { - foreach ($matches[1] as $match) { - if (strstr($match, ']')) { - // we might be inside a bbcode color tag - leave it alone - continue; + if (substr($match, -1, 1) === '.') { + $ret[] = substr($match, 0, -1); + } else { + $ret[] = $match; + } } + } + + // Otherwise pull out single word tags. These can be @nickname, @first_last + // and #hash tags. + + if (preg_match_all('/([!#@][^\^ \x0D\x0A,;:?\']*[^\^ \x0D\x0A,;:?!\'.])/', $string, $matches)) { + foreach ($matches[1] as $match) { + if (strstr($match, ']')) { + // we might be inside a bbcode color tag - leave it alone + continue; + } + + // ignore strictly numeric tags like #1 + if ((strpos($match, '#') === 0) && ctype_digit(substr($match, 1))) { + continue; + } + + // try not to catch url fragments + if (strpos($string, $match) && preg_match('/[a-zA-z0-9\/]/', substr($string, strpos($string, $match) - 1, 1))) { + continue; + } - if (substr($match, -1, 1) === '.') { - $ret[] = substr($match, 0, -1); - } else { $ret[] = $match; } } - } + }); - // Otherwise pull out single word tags. These can be @nickname, @first_last - // and #hash tags. + return array_unique($ret); + } - if (preg_match_all('/([!#@][^\^ \x0D\x0A,;:?]+)([ \x0D\x0A,;:?]|$)/', $string, $matches)) { - foreach ($matches[1] as $match) { - if (strstr($match, ']')) { - // we might be inside a bbcode color tag - leave it alone - continue; - } + /** + * Perform a custom function on a text after having escaped blocks enclosed in the provided tag list. + * + * @param string $text + * @param array $tagList A list of tag names, e.g ['noparse', 'nobb', 'pre'] + * @param callable $callback + * @return string + * @throws Exception + *@see Strings::performWithEscapedBlocks + * + */ + public static function performWithEscapedTags(string $text, array $tagList, callable $callback) + { + $tagList = array_map('preg_quote', $tagList); - if (substr($match, -1, 1) === '.') { - $match = substr($match,0,-1); - } - - // ignore strictly numeric tags like #1 - if ((strpos($match, '#') === 0) && ctype_digit(substr($match, 1))) { - continue; - } - - // try not to catch url fragments - if (strpos($string, $match) && preg_match('/[a-zA-z0-9\/]/', substr($string, strpos($string, $match) - 1, 1))) { - continue; - } - $ret[] = $match; - } - } - - return $ret; + return Strings::performWithEscapedBlocks($text, '#\[(?:' . implode('|', $tagList) . ').*?\[/(?:' . implode('|', $tagList) . ')]#ism', $callback); } } diff --git a/src/Content/Text/HTML.php b/src/Content/Text/HTML.php index b35924b338..b69f5abc23 100644 --- a/src/Content/Text/HTML.php +++ b/src/Content/Text/HTML.php @@ -167,252 +167,238 @@ class HTML { $message = str_replace("\r", "", $message); - // Removing code blocks before the whitespace removal processing below - $codeblocks = []; + $message = Strings::performWithEscapedBlocks($message, '#
    #iUs', function ($message) { + $message = str_replace( + [ + "
  • ", + "

  • ", + ], + [ + "
  • ", + "
  • ", + ], + $message + ); + + // remove namespaces + $message = preg_replace('=<(\w+):(.+?)>=', '', $message); + $message = preg_replace('==', '', $message); + + $doc = new DOMDocument(); + $doc->preserveWhiteSpace = false; + + $message = mb_convert_encoding($message, 'HTML-ENTITIES', "UTF-8"); + + @$doc->loadHTML($message, LIBXML_HTML_NODEFDTD); + + XML::deleteNode($doc, 'style'); + XML::deleteNode($doc, 'head'); + XML::deleteNode($doc, 'title'); + XML::deleteNode($doc, 'meta'); + XML::deleteNode($doc, 'xml'); + XML::deleteNode($doc, 'removeme'); + + $xpath = new DomXPath($doc); + $list = $xpath->query("//pre"); + foreach ($list as $node) { + // Ensure to escape unescaped & - they will otherwise raise a warning + $safe_value = preg_replace('/&(?!\w+;)/', '&', $node->nodeValue); + $node->nodeValue = str_replace("\n", "\r", $safe_value); + } + + $message = $doc->saveHTML(); + $message = str_replace(["\n<", ">\n", "\r", "\n", "\xC3\x82\xC2\xA0"], ["<", ">", "
    ", " ", ""], $message); + $message = preg_replace('= [\s]*=i', " ", $message); + + @$doc->loadHTML($message, LIBXML_HTML_NODEFDTD); + + self::tagToBBCode($doc, 'html', [], "", ""); + self::tagToBBCode($doc, 'body', [], "", ""); + + // Outlook-Quote - Variant 1 + self::tagToBBCode($doc, 'p', ['class' => 'MsoNormal', 'style' => 'margin-left:35.4pt'], '[quote]', '[/quote]'); + + // Outlook-Quote - Variant 2 + self::tagToBBCode( + $doc, + 'div', + ['style' => 'border:none;border-left:solid blue 1.5pt;padding:0cm 0cm 0cm 4.0pt'], + '[quote]', + '[/quote]' + ); + + // MyBB-Stuff + self::tagToBBCode($doc, 'span', ['style' => 'text-decoration: underline;'], '[u]', '[/u]'); + self::tagToBBCode($doc, 'span', ['style' => 'font-style: italic;'], '[i]', '[/i]'); + self::tagToBBCode($doc, 'span', ['style' => 'font-weight: bold;'], '[b]', '[/b]'); + + /* self::node2BBCode($doc, 'font', array('face'=>'/([\w ]+)/', 'size'=>'/(\d+)/', 'color'=>'/(.+)/'), '[font=$1][size=$2][color=$3]', '[/color][/size][/font]'); + self::node2BBCode($doc, 'font', array('size'=>'/(\d+)/', 'color'=>'/(.+)/'), '[size=$1][color=$2]', '[/color][/size]'); + self::node2BBCode($doc, 'font', array('face'=>'/([\w ]+)/', 'size'=>'/(.+)/'), '[font=$1][size=$2]', '[/size][/font]'); + self::node2BBCode($doc, 'font', array('face'=>'/([\w ]+)/', 'color'=>'/(.+)/'), '[font=$1][color=$3]', '[/color][/font]'); + self::node2BBCode($doc, 'font', array('face'=>'/([\w ]+)/'), '[font=$1]', '[/font]'); + self::node2BBCode($doc, 'font', array('size'=>'/(\d+)/'), '[size=$1]', '[/size]'); + self::node2BBCode($doc, 'font', array('color'=>'/(.+)/'), '[color=$1]', '[/color]'); + */ + // Untested + //self::node2BBCode($doc, 'span', array('style'=>'/.*font-size:\s*(.+?)[,;].*font-family:\s*(.+?)[,;].*color:\s*(.+?)[,;].*/'), '[size=$1][font=$2][color=$3]', '[/color][/font][/size]'); + //self::node2BBCode($doc, 'span', array('style'=>'/.*font-size:\s*(\d+)[,;].*/'), '[size=$1]', '[/size]'); + //self::node2BBCode($doc, 'span', array('style'=>'/.*font-size:\s*(.+?)[,;].*/'), '[size=$1]', '[/size]'); + + self::tagToBBCode($doc, 'span', ['style' => '/.*color:\s*(.+?)[,;].*/'], '[color="$1"]', '[/color]'); + + //self::node2BBCode($doc, 'span', array('style'=>'/.*font-family:\s*(.+?)[,;].*/'), '[font=$1]', '[/font]'); + //self::node2BBCode($doc, 'div', array('style'=>'/.*font-family:\s*(.+?)[,;].*font-size:\s*(\d+?)pt.*/'), '[font=$1][size=$2]', '[/size][/font]'); + //self::node2BBCode($doc, 'div', array('style'=>'/.*font-family:\s*(.+?)[,;].*font-size:\s*(\d+?)px.*/'), '[font=$1][size=$2]', '[/size][/font]'); + //self::node2BBCode($doc, 'div', array('style'=>'/.*font-family:\s*(.+?)[,;].*/'), '[font=$1]', '[/font]'); + // Importing the classes - interesting for importing of posts from third party networks that were exported from friendica + // Test + //self::node2BBCode($doc, 'span', array('class'=>'/([\w ]+)/'), '[class=$1]', '[/class]'); + self::tagToBBCode($doc, 'span', ['class' => 'type-link'], '[class=type-link]', '[/class]'); + self::tagToBBCode($doc, 'span', ['class' => 'type-video'], '[class=type-video]', '[/class]'); + + self::tagToBBCode($doc, 'strong', [], '[b]', '[/b]'); + self::tagToBBCode($doc, 'em', [], '[i]', '[/i]'); + self::tagToBBCode($doc, 'b', [], '[b]', '[/b]'); + self::tagToBBCode($doc, 'i', [], '[i]', '[/i]'); + self::tagToBBCode($doc, 'u', [], '[u]', '[/u]'); + self::tagToBBCode($doc, 's', [], '[s]', '[/s]'); + self::tagToBBCode($doc, 'del', [], '[s]', '[/s]'); + self::tagToBBCode($doc, 'strike', [], '[s]', '[/s]'); + + self::tagToBBCode($doc, 'big', [], "[size=large]", "[/size]"); + self::tagToBBCode($doc, 'small', [], "[size=small]", "[/size]"); + + self::tagToBBCode($doc, 'blockquote', [], '[quote]', '[/quote]'); + + self::tagToBBCode($doc, 'br', [], "\n", ''); + + self::tagToBBCode($doc, 'p', ['class' => 'MsoNormal'], "\n", ""); + self::tagToBBCode($doc, 'div', ['class' => 'MsoNormal'], "\r", ""); + + self::tagToBBCode($doc, 'span', [], "", ""); + + self::tagToBBCode($doc, 'span', [], "", ""); + self::tagToBBCode($doc, 'pre', [], "", ""); + + self::tagToBBCode($doc, 'div', [], "\r", "\r"); + self::tagToBBCode($doc, 'p', [], "\n", "\n"); + + self::tagToBBCode($doc, 'ul', [], "[list]", "[/list]"); + self::tagToBBCode($doc, 'ol', [], "[list=1]", "[/list]"); + self::tagToBBCode($doc, 'li', [], "[*]", ""); + + self::tagToBBCode($doc, 'hr', [], "[hr]", ""); + + self::tagToBBCode($doc, 'table', [], "[table]", "[/table]"); + self::tagToBBCode($doc, 'th', [], "[th]", "[/th]"); + self::tagToBBCode($doc, 'tr', [], "[tr]", "[/tr]"); + self::tagToBBCode($doc, 'td', [], "[td]", "[/td]"); + + self::tagToBBCode($doc, 'h1', [], "[h1]", "[/h1]"); + self::tagToBBCode($doc, 'h2', [], "[h2]", "[/h2]"); + self::tagToBBCode($doc, 'h3', [], "[h3]", "[/h3]"); + self::tagToBBCode($doc, 'h4', [], "[h4]", "[/h4]"); + self::tagToBBCode($doc, 'h5', [], "[h5]", "[/h5]"); + self::tagToBBCode($doc, 'h6', [], "[h6]", "[/h6]"); + + self::tagToBBCode($doc, 'a', ['href' => '/mailto:(.+)/'], '[mail=$1]', '[/mail]'); + self::tagToBBCode($doc, 'a', ['href' => '/(.+)/'], '[url=$1]', '[/url]'); + + self::tagToBBCode($doc, 'img', ['src' => '/(.+)/', 'alt' => '/(.+)/'], '[img=$1]$2', '[/img]', true); + self::tagToBBCode($doc, 'img', ['src' => '/(.+)/', 'width' => '/(\d+)/', 'height' => '/(\d+)/'], '[img=$2x$3]$1', '[/img]', true); + self::tagToBBCode($doc, 'img', ['src' => '/(.+)/'], '[img]$1', '[/img]', true); + + + self::tagToBBCode($doc, 'video', ['src' => '/(.+)/'], '[video]$1', '[/video]', true); + self::tagToBBCode($doc, 'audio', ['src' => '/(.+)/'], '[audio]$1', '[/audio]', true); + self::tagToBBCode($doc, 'iframe', ['src' => '/(.+)/'], '[iframe]$1', '[/iframe]', true); + + self::tagToBBCode($doc, 'key', [], '[code]', '[/code]'); + self::tagToBBCode($doc, 'code', [], '[code]', '[/code]'); + + $message = $doc->saveHTML(); + + // I'm removing something really disturbing + // Don't know exactly what it is + $message = str_replace(chr(194) . chr(160), ' ', $message); + + $message = str_replace(" ", " ", $message); + + // removing multiple DIVs + $message = preg_replace('=\r *\r=i', "\n", $message); + $message = str_replace("\r", "\n", $message); + + Hook::callAll('html2bbcode', $message); + + $message = strip_tags($message); + + $message = html_entity_decode($message, ENT_QUOTES, 'UTF-8'); + + // remove quotes if they don't make sense + $message = preg_replace('=\[/quote\][\s]*\[quote\]=i', "\n", $message); + + $message = preg_replace('=\[quote\]\s*=i', "[quote]", $message); + $message = preg_replace('=\s*\[/quote\]=i', "[/quote]", $message); + + do { + $oldmessage = $message; + $message = str_replace("\n \n", "\n\n", $message); + } while ($oldmessage != $message); + + do { + $oldmessage = $message; + $message = str_replace("\n\n\n", "\n\n", $message); + } while ($oldmessage != $message); + + do { + $oldmessage = $message; + $message = str_replace( + [ + "[/size]\n\n", + "\n[hr]", + "[hr]\n", + "\n[list", + "[/list]\n", + "\n[/", + "[list]\n", + "[list=1]\n", + "\n[*]"], + [ + "[/size]\n", + "[hr]", + "[hr]", + "[list", + "[/list]", + "[/", + "[list]", + "[list=1]", + "[*]"], + $message + ); + } while ($message != $oldmessage); + + $message = str_replace( + ['[b][b]', '[/b][/b]', '[i][i]', '[/i][/i]'], + ['[b]', '[/b]', '[i]', '[/i]'], + $message + ); + + // Handling Yahoo style of mails + $message = str_replace('[hr][b]From:[/b]', '[quote][b]From:[/b]', $message); + + return $message; + }); + $message = preg_replace_callback( '#
    (.*)
    #iUs', - function ($matches) use (&$codeblocks) { - $return = '[codeblock-' . count($codeblocks) . ']'; - + function ($matches) { $prefix = '[code]'; if ($matches[1] != '') { $prefix = '[code=' . $matches[1] . ']'; } - $codeblocks[] = $prefix . PHP_EOL . trim($matches[2]) . PHP_EOL . '[/code]'; - return $return; - }, - $message - ); - - $message = str_replace( - [ - "
  • ", - "

  • ", - ], - [ - "
  • ", - "
  • ", - ], - $message - ); - - // remove namespaces - $message = preg_replace('=<(\w+):(.+?)>=', '', $message); - $message = preg_replace('==', '', $message); - - $doc = new DOMDocument(); - $doc->preserveWhiteSpace = false; - - $message = mb_convert_encoding($message, 'HTML-ENTITIES', "UTF-8"); - - @$doc->loadHTML($message, LIBXML_HTML_NODEFDTD); - - XML::deleteNode($doc, 'style'); - XML::deleteNode($doc, 'head'); - XML::deleteNode($doc, 'title'); - XML::deleteNode($doc, 'meta'); - XML::deleteNode($doc, 'xml'); - XML::deleteNode($doc, 'removeme'); - - $xpath = new DomXPath($doc); - $list = $xpath->query("//pre"); - foreach ($list as $node) { - // Ensure to escape unescaped & - they will otherwise raise a warning - $safe_value = preg_replace('/&(?!\w+;)/', '&', $node->nodeValue); - $node->nodeValue = str_replace("\n", "\r", $safe_value); - } - - $message = $doc->saveHTML(); - $message = str_replace(["\n<", ">\n", "\r", "\n", "\xC3\x82\xC2\xA0"], ["<", ">", "
    ", " ", ""], $message); - $message = preg_replace('= [\s]*=i', " ", $message); - - @$doc->loadHTML($message, LIBXML_HTML_NODEFDTD); - - self::tagToBBCode($doc, 'html', [], "", ""); - self::tagToBBCode($doc, 'body', [], "", ""); - - // Outlook-Quote - Variant 1 - self::tagToBBCode($doc, 'p', ['class' => 'MsoNormal', 'style' => 'margin-left:35.4pt'], '[quote]', '[/quote]'); - - // Outlook-Quote - Variant 2 - self::tagToBBCode( - $doc, - 'div', - ['style' => 'border:none;border-left:solid blue 1.5pt;padding:0cm 0cm 0cm 4.0pt'], - '[quote]', - '[/quote]' - ); - - // MyBB-Stuff - self::tagToBBCode($doc, 'span', ['style' => 'text-decoration: underline;'], '[u]', '[/u]'); - self::tagToBBCode($doc, 'span', ['style' => 'font-style: italic;'], '[i]', '[/i]'); - self::tagToBBCode($doc, 'span', ['style' => 'font-weight: bold;'], '[b]', '[/b]'); - - /* self::node2BBCode($doc, 'font', array('face'=>'/([\w ]+)/', 'size'=>'/(\d+)/', 'color'=>'/(.+)/'), '[font=$1][size=$2][color=$3]', '[/color][/size][/font]'); - self::node2BBCode($doc, 'font', array('size'=>'/(\d+)/', 'color'=>'/(.+)/'), '[size=$1][color=$2]', '[/color][/size]'); - self::node2BBCode($doc, 'font', array('face'=>'/([\w ]+)/', 'size'=>'/(.+)/'), '[font=$1][size=$2]', '[/size][/font]'); - self::node2BBCode($doc, 'font', array('face'=>'/([\w ]+)/', 'color'=>'/(.+)/'), '[font=$1][color=$3]', '[/color][/font]'); - self::node2BBCode($doc, 'font', array('face'=>'/([\w ]+)/'), '[font=$1]', '[/font]'); - self::node2BBCode($doc, 'font', array('size'=>'/(\d+)/'), '[size=$1]', '[/size]'); - self::node2BBCode($doc, 'font', array('color'=>'/(.+)/'), '[color=$1]', '[/color]'); - */ - // Untested - //self::node2BBCode($doc, 'span', array('style'=>'/.*font-size:\s*(.+?)[,;].*font-family:\s*(.+?)[,;].*color:\s*(.+?)[,;].*/'), '[size=$1][font=$2][color=$3]', '[/color][/font][/size]'); - //self::node2BBCode($doc, 'span', array('style'=>'/.*font-size:\s*(\d+)[,;].*/'), '[size=$1]', '[/size]'); - //self::node2BBCode($doc, 'span', array('style'=>'/.*font-size:\s*(.+?)[,;].*/'), '[size=$1]', '[/size]'); - - self::tagToBBCode($doc, 'span', ['style' => '/.*color:\s*(.+?)[,;].*/'], '[color="$1"]', '[/color]'); - - //self::node2BBCode($doc, 'span', array('style'=>'/.*font-family:\s*(.+?)[,;].*/'), '[font=$1]', '[/font]'); - //self::node2BBCode($doc, 'div', array('style'=>'/.*font-family:\s*(.+?)[,;].*font-size:\s*(\d+?)pt.*/'), '[font=$1][size=$2]', '[/size][/font]'); - //self::node2BBCode($doc, 'div', array('style'=>'/.*font-family:\s*(.+?)[,;].*font-size:\s*(\d+?)px.*/'), '[font=$1][size=$2]', '[/size][/font]'); - //self::node2BBCode($doc, 'div', array('style'=>'/.*font-family:\s*(.+?)[,;].*/'), '[font=$1]', '[/font]'); - // Importing the classes - interesting for importing of posts from third party networks that were exported from friendica - // Test - //self::node2BBCode($doc, 'span', array('class'=>'/([\w ]+)/'), '[class=$1]', '[/class]'); - self::tagToBBCode($doc, 'span', ['class' => 'type-link'], '[class=type-link]', '[/class]'); - self::tagToBBCode($doc, 'span', ['class' => 'type-video'], '[class=type-video]', '[/class]'); - - self::tagToBBCode($doc, 'strong', [], '[b]', '[/b]'); - self::tagToBBCode($doc, 'em', [], '[i]', '[/i]'); - self::tagToBBCode($doc, 'b', [], '[b]', '[/b]'); - self::tagToBBCode($doc, 'i', [], '[i]', '[/i]'); - self::tagToBBCode($doc, 'u', [], '[u]', '[/u]'); - self::tagToBBCode($doc, 's', [], '[s]', '[/s]'); - self::tagToBBCode($doc, 'del', [], '[s]', '[/s]'); - self::tagToBBCode($doc, 'strike', [], '[s]', '[/s]'); - - self::tagToBBCode($doc, 'big', [], "[size=large]", "[/size]"); - self::tagToBBCode($doc, 'small', [], "[size=small]", "[/size]"); - - self::tagToBBCode($doc, 'blockquote', [], '[quote]', '[/quote]'); - - self::tagToBBCode($doc, 'br', [], "\n", ''); - - self::tagToBBCode($doc, 'p', ['class' => 'MsoNormal'], "\n", ""); - self::tagToBBCode($doc, 'div', ['class' => 'MsoNormal'], "\r", ""); - - self::tagToBBCode($doc, 'span', [], "", ""); - - self::tagToBBCode($doc, 'span', [], "", ""); - self::tagToBBCode($doc, 'pre', [], "", ""); - - self::tagToBBCode($doc, 'div', [], "\r", "\r"); - self::tagToBBCode($doc, 'p', [], "\n", "\n"); - - self::tagToBBCode($doc, 'ul', [], "[list]", "[/list]"); - self::tagToBBCode($doc, 'ol', [], "[list=1]", "[/list]"); - self::tagToBBCode($doc, 'li', [], "[*]", ""); - - self::tagToBBCode($doc, 'hr', [], "[hr]", ""); - - self::tagToBBCode($doc, 'table', [], "[table]", "[/table]"); - self::tagToBBCode($doc, 'th', [], "[th]", "[/th]"); - self::tagToBBCode($doc, 'tr', [], "[tr]", "[/tr]"); - self::tagToBBCode($doc, 'td', [], "[td]", "[/td]"); - - self::tagToBBCode($doc, 'h1', [], "[h1]", "[/h1]"); - self::tagToBBCode($doc, 'h2', [], "[h2]", "[/h2]"); - self::tagToBBCode($doc, 'h3', [], "[h3]", "[/h3]"); - self::tagToBBCode($doc, 'h4', [], "[h4]", "[/h4]"); - self::tagToBBCode($doc, 'h5', [], "[h5]", "[/h5]"); - self::tagToBBCode($doc, 'h6', [], "[h6]", "[/h6]"); - - self::tagToBBCode($doc, 'a', ['href' => '/mailto:(.+)/'], '[mail=$1]', '[/mail]'); - self::tagToBBCode($doc, 'a', ['href' => '/(.+)/'], '[url=$1]', '[/url]'); - - self::tagToBBCode($doc, 'img', ['src' => '/(.+)/', 'alt' => '/(.+)/'], '[img=$1]$2', '[/img]', true); - self::tagToBBCode($doc, 'img', ['src' => '/(.+)/', 'width' => '/(\d+)/', 'height' => '/(\d+)/'], '[img=$2x$3]$1', '[/img]', true); - self::tagToBBCode($doc, 'img', ['src' => '/(.+)/'], '[img]$1', '[/img]', true); - - - self::tagToBBCode($doc, 'video', ['src' => '/(.+)/'], '[video]$1', '[/video]', true); - self::tagToBBCode($doc, 'audio', ['src' => '/(.+)/'], '[audio]$1', '[/audio]', true); - self::tagToBBCode($doc, 'iframe', ['src' => '/(.+)/'], '[iframe]$1', '[/iframe]', true); - - self::tagToBBCode($doc, 'key', [], '[code]', '[/code]'); - self::tagToBBCode($doc, 'code', [], '[code]', '[/code]'); - - $message = $doc->saveHTML(); - - // I'm removing something really disturbing - // Don't know exactly what it is - $message = str_replace(chr(194) . chr(160), ' ', $message); - - $message = str_replace(" ", " ", $message); - - // removing multiple DIVs - $message = preg_replace('=\r *\r=i', "\n", $message); - $message = str_replace("\r", "\n", $message); - - Hook::callAll('html2bbcode', $message); - - $message = strip_tags($message); - - $message = html_entity_decode($message, ENT_QUOTES, 'UTF-8'); - - // remove quotes if they don't make sense - $message = preg_replace('=\[/quote\][\s]*\[quote\]=i', "\n", $message); - - $message = preg_replace('=\[quote\]\s*=i', "[quote]", $message); - $message = preg_replace('=\s*\[/quote\]=i', "[/quote]", $message); - - do { - $oldmessage = $message; - $message = str_replace("\n \n", "\n\n", $message); - } while ($oldmessage != $message); - - do { - $oldmessage = $message; - $message = str_replace("\n\n\n", "\n\n", $message); - } while ($oldmessage != $message); - - do { - $oldmessage = $message; - $message = str_replace( - [ - "[/size]\n\n", - "\n[hr]", - "[hr]\n", - "\n[list", - "[/list]\n", - "\n[/", - "[list]\n", - "[list=1]\n", - "\n[*]"], - [ - "[/size]\n", - "[hr]", - "[hr]", - "[list", - "[/list]", - "[/", - "[list]", - "[list=1]", - "[*]"], - $message - ); - } while ($message != $oldmessage); - - $message = str_replace( - ['[b][b]', '[/b][/b]', '[i][i]', '[/i][/i]'], - ['[b]', '[/b]', '[i]', '[/i]'], - $message - ); - - // Handling Yahoo style of mails - $message = str_replace('[hr][b]From:[/b]', '[quote][b]From:[/b]', $message); - - // Restore code blocks - $message = preg_replace_callback( - '#\[codeblock-([0-9]+)\]#iU', - function ($matches) use ($codeblocks) { - $return = ''; - if (isset($codeblocks[intval($matches[1])])) { - $return = $codeblocks[$matches[1]]; - } - return $return; + return $prefix . PHP_EOL . trim($matches[2]) . PHP_EOL . '[/code]'; }, $message ); diff --git a/src/Model/Item.php b/src/Model/Item.php index 7a965f80b5..0a242aa058 100644 --- a/src/Model/Item.php +++ b/src/Model/Item.php @@ -1780,7 +1780,7 @@ class Item // Check for hashtags in the body and repair or add hashtag links - self::setHashtags($item); + $item['body'] = self::setHashtags($item['body']); // Fill the cache field self::putInCache($item); @@ -2424,84 +2424,69 @@ class Item } } - public static function setHashtags(&$item) + public static function setHashtags($body) { - $tags = BBCode::getTags($item["body"]); + $body = BBCode::performWithEscapedTags($body, ['noparse', 'pre', 'code'], function ($body) { + $tags = BBCode::getTags($body); - // No hashtags? - if (!count($tags)) { - return false; - } - - // What happens in [code], stays in [code]! - // escape the # and the [ - // hint: we will also get in trouble with #tags, when we want markdown in posts -> ### Headline 3 - $item["body"] = preg_replace_callback("/\[code(.*?)\](.*?)\[\/code\]/ism", - function ($match) { - // we truly ESCape all # and [ to prevent gettin weird tags in [code] blocks - $find = ['#', '[']; - $replace = [chr(27).'sharp', chr(27).'leftsquarebracket']; - return ("[code" . $match[1] . "]" . str_replace($find, $replace, $match[2]) . "[/code]"); - }, $item["body"]); - - // This sorting is important when there are hashtags that are part of other hashtags - // Otherwise there could be problems with hashtags like #test and #test2 - // Because of this we are sorting from the longest to the shortest tag. - usort($tags, function($a, $b) { - return strlen($b) <=> strlen($a); - }); - - $URLSearchString = "^\[\]"; - - // All hashtags should point to the home server if "local_tags" is activated - if (DI::config()->get('system', 'local_tags')) { - $item["body"] = preg_replace("/#\[url\=([$URLSearchString]*)\](.*?)\[\/url\]/ism", - "#[url=".DI::baseUrl()."/search?tag=$2]$2[/url]", $item["body"]); - } - - // mask hashtags inside of url, bookmarks and attachments to avoid urls in urls - $item["body"] = preg_replace_callback("/\[url\=([$URLSearchString]*)\](.*?)\[\/url\]/ism", - function ($match) { - return ("[url=" . str_replace("#", "#", $match[1]) . "]" . str_replace("#", "#", $match[2]) . "[/url]"); - }, $item["body"]); - - $item["body"] = preg_replace_callback("/\[bookmark\=([$URLSearchString]*)\](.*?)\[\/bookmark\]/ism", - function ($match) { - return ("[bookmark=" . str_replace("#", "#", $match[1]) . "]" . str_replace("#", "#", $match[2]) . "[/bookmark]"); - }, $item["body"]); - - $item["body"] = preg_replace_callback("/\[attachment (.*)\](.*?)\[\/attachment\]/ism", - function ($match) { - return ("[attachment " . str_replace("#", "#", $match[1]) . "]" . $match[2] . "[/attachment]"); - }, $item["body"]); - - // Repair recursive urls - $item["body"] = preg_replace("/#\[url\=([$URLSearchString]*)\](.*?)\[\/url\]/ism", - "#$2", $item["body"]); - - foreach ($tags as $tag) { - if ((strpos($tag, '#') !== 0) || strpos($tag, '[url=') || strlen($tag) < 2 || $tag[1] == '#') { - continue; + // No hashtags? + if (!count($tags)) { + return $body; } - $basetag = str_replace('_',' ',substr($tag,1)); - $newtag = '#[url=' . DI::baseUrl() . '/search?tag=' . $basetag . ']' . $basetag . '[/url]'; + // This sorting is important when there are hashtags that are part of other hashtags + // Otherwise there could be problems with hashtags like #test and #test2 + // Because of this we are sorting from the longest to the shortest tag. + usort($tags, function ($a, $b) { + return strlen($b) <=> strlen($a); + }); - $item["body"] = str_replace($tag, $newtag, $item["body"]); - } + $URLSearchString = "^\[\]"; - // Convert back the masked hashtags - $item["body"] = str_replace("#", "#", $item["body"]); + // All hashtags should point to the home server if "local_tags" is activated + if (DI::config()->get('system', 'local_tags')) { + $body = preg_replace("/#\[url\=([$URLSearchString]*)\](.*?)\[\/url\]/ism", + "#[url=" . DI::baseUrl() . "/search?tag=$2]$2[/url]", $body); + } - // Remember! What happens in [code], stays in [code] - // roleback the # and [ - $item["body"] = preg_replace_callback("/\[code(.*?)\](.*?)\[\/code\]/ism", - function ($match) { - // we truly unESCape all sharp and leftsquarebracket - $find = [chr(27).'sharp', chr(27).'leftsquarebracket']; - $replace = ['#', '[']; - return ("[code" . $match[1] . "]" . str_replace($find, $replace, $match[2]) . "[/code]"); - }, $item["body"]); + // mask hashtags inside of url, bookmarks and attachments to avoid urls in urls + $body = preg_replace_callback("/\[url\=([$URLSearchString]*)\](.*?)\[\/url\]/ism", + function ($match) { + return ("[url=" . str_replace("#", "#", $match[1]) . "]" . str_replace("#", "#", $match[2]) . "[/url]"); + }, $body); + + $body = preg_replace_callback("/\[bookmark\=([$URLSearchString]*)\](.*?)\[\/bookmark\]/ism", + function ($match) { + return ("[bookmark=" . str_replace("#", "#", $match[1]) . "]" . str_replace("#", "#", $match[2]) . "[/bookmark]"); + }, $body); + + $body = preg_replace_callback("/\[attachment (.*)\](.*?)\[\/attachment\]/ism", + function ($match) { + return ("[attachment " . str_replace("#", "#", $match[1]) . "]" . $match[2] . "[/attachment]"); + }, $body); + + // Repair recursive urls + $body = preg_replace("/#\[url\=([$URLSearchString]*)\](.*?)\[\/url\]/ism", + "#$2", $body); + + foreach ($tags as $tag) { + if ((strpos($tag, '#') !== 0) || strpos($tag, '[url=') || strlen($tag) < 2 || $tag[1] == '#') { + continue; + } + + $basetag = str_replace('_', ' ', substr($tag, 1)); + $newtag = '#[url=' . DI::baseUrl() . '/search?tag=' . $basetag . ']' . $basetag . '[/url]'; + + $body = str_replace($tag, $newtag, $body); + } + + // Convert back the masked hashtags + $body = str_replace("#", "#", $body); + + return $body; + }); + + return $body; } /** diff --git a/src/Model/Notify.php b/src/Model/Notify.php index fe1497316f..9ebf5c23b3 100644 --- a/src/Model/Notify.php +++ b/src/Model/Notify.php @@ -70,7 +70,7 @@ class Notify extends BaseModel private function setNameCache() { try { - $this->name_cache = strip_tags(BBCode::convert($this->source_name ?? '')); + $this->name_cache = strip_tags(BBCode::convert($this->source_name)); } catch (InternalServerErrorException $e) { } } diff --git a/src/Module/Debug/Babel.php b/src/Module/Debug/Babel.php index ecad73f4dc..5d688d6579 100644 --- a/src/Module/Debug/Babel.php +++ b/src/Module/Debug/Babel.php @@ -102,14 +102,12 @@ class Babel extends BaseModule 'content' => visible_whitespace($bbcode4) ]; - $item = ['body' => $bbcode]; - $tags = Text\BBCode::getTags($bbcode); - Item::setHashtags($item); + $body = Item::setHashtags($bbcode); $results[] = [ 'title' => DI::l10n()->t('Item Body'), - 'content' => visible_whitespace($item['body']) + 'content' => visible_whitespace($body) ]; $results[] = [ 'title' => DI::l10n()->t('Item Tags'), @@ -125,9 +123,7 @@ class Babel extends BaseModule $markdown = XML::unescape($diaspora); case 'markdown': - if (!isset($markdown)) { - $markdown = trim($_REQUEST['text']); - } + $markdown = $markdown ?? trim($_REQUEST['text']); $results[] = [ 'title' => DI::l10n()->t('Source input (Markdown)'), diff --git a/src/Util/Strings.php b/src/Util/Strings.php index 04d676ef57..35e7ebe151 100644 --- a/src/Util/Strings.php +++ b/src/Util/Strings.php @@ -472,4 +472,52 @@ class Strings return mb_substr($string, 0, $start) . $replacement . mb_substr($string, $start + $length, $string_length - $start - $length); } + + /** + * Perform a custom function on a text after having escaped blocks matched by the provided regular expressions. + * Only full matches are used, capturing group are ignored. + * + * To change the provided text, the callback function needs to return it and this function will return the modified + * version as well after having restored the escaped blocks. + * + * @param string $text + * @param string $regex + * @param callable $callback + * @return string + * @throws \Exception + */ + public static function performWithEscapedBlocks(string $text, string $regex, callable $callback) + { + // Enables nested use + $executionId = random_int(PHP_INT_MAX / 10, PHP_INT_MAX); + + $blocks = []; + + $text = preg_replace_callback($regex, + function ($matches) use ($executionId, &$blocks) { + $return = '«block-' . $executionId . '-' . count($blocks) . '»'; + + $blocks[] = $matches[0]; + + return $return; + }, + $text + ); + + $text = $callback($text) ?? ''; + + // Restore code blocks + $text = preg_replace_callback('/«block-' . $executionId . '-([0-9]+)»/iU', + function ($matches) use ($blocks) { + $return = $matches[0]; + if (isset($blocks[intval($matches[1])])) { + $return = $blocks[$matches[1]]; + } + return $return; + }, + $text + ); + + return $text; + } } diff --git a/tests/include/ApiTest.php b/tests/include/ApiTest.php index d6ce7576fe..a65283b2f1 100644 --- a/tests/include/ApiTest.php +++ b/tests/include/ApiTest.php @@ -3852,7 +3852,7 @@ class ApiTest extends DatabaseTest $assertXml=<< - + XML; $this->assertXmlStringEqualsXmlString($assertXml, $result); diff --git a/tests/src/Util/StringsTest.php b/tests/src/Util/StringsTest.php index 66fb2f0f53..611d72fe7c 100644 --- a/tests/src/Util/StringsTest.php +++ b/tests/src/Util/StringsTest.php @@ -194,4 +194,30 @@ class StringsTest extends TestCase ) ); } + + public function testPerformWithEscapedBlocks() + { + $originalText = '[noparse][/noparse][nobb]nobb[/nobb][noparse]noparse[/noparse]'; + + $text = Strings::performWithEscapedBlocks($originalText, '#[(?:noparse|nobb)].*?\[/(?:noparse|nobb)]#is', function ($text) { + return $text; + }); + + $this->assertEquals($originalText, $text); + } + + public function testPerformWithEscapedBlocksNested() + { + $originalText = '[noparse][/noparse][nobb]nobb[/nobb][noparse]noparse[/noparse]'; + + $text = Strings::performWithEscapedBlocks($originalText, '#[nobb].*?\[/nobb]#is', function ($text) { + $text = Strings::performWithEscapedBlocks($text, '#[noparse].*?\[/noparse]#is', function ($text) { + return $text; + }); + + return $text; + }); + + $this->assertEquals($originalText, $text); + } }