Merge pull request #7167 from MrPetovan/bug/7150-unescape-xml-entities

Add Unicode support to autolink regular expression
This commit is contained in:
Michael Vogel 2019-05-20 06:36:59 +02:00 committed by GitHub
commit cbca26d185
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 16 additions and 11 deletions

View File

@ -7,6 +7,7 @@ use Friendica\Content\Text;
use Friendica\Core\L10n; use Friendica\Core\L10n;
use Friendica\Core\Renderer; use Friendica\Core\Renderer;
use Friendica\Model\Item; use Friendica\Model\Item;
use Friendica\Util\XML;
/** /**
* Translates input text into different formats (HTML, BBCode, Markdown) * Translates input text into different formats (HTML, BBCode, Markdown)
@ -98,10 +99,10 @@ class Babel extends BaseModule
$markdown = trim($_REQUEST['text']); $markdown = trim($_REQUEST['text']);
$results[] = [ $results[] = [
'title' => L10n::t('Source input (Diaspora format)'), 'title' => L10n::t('Source input (Diaspora format)'),
'content' => '<pre>' . $markdown . '</pre>' 'content' => '<pre>' . htmlspecialchars($markdown) . '</pre>'
]; ];
$html = Text\Markdown::convert($markdown); $html = Text\Markdown::convert(html_entity_decode($markdown,ENT_COMPAT, 'UTF-8'));
$results[] = [ $results[] = [
'title' => L10n::t('Markdown::convert (raw HTML)'), 'title' => L10n::t('Markdown::convert (raw HTML)'),
'content' => visible_whitespace(htmlspecialchars($html)) 'content' => visible_whitespace(htmlspecialchars($html))
@ -112,7 +113,7 @@ class Babel extends BaseModule
'content' => $html 'content' => $html
]; ];
$bbcode = Text\Markdown::toBBCode($markdown); $bbcode = Text\Markdown::toBBCode(XML::unescape($markdown));
$results[] = [ $results[] = [
'title' => L10n::t('Markdown::toBBCode'), 'title' => L10n::t('Markdown::toBBCode'),
'content' => '<pre>' . $bbcode . '</pre>' 'content' => '<pre>' . $bbcode . '</pre>'

View File

@ -355,25 +355,25 @@ class Strings
*/ */
public static function autoLinkRegEx() public static function autoLinkRegEx()
{ {
return '@(?xi) return '@
(?<![=\'\]"/]) # Not preceded by [, =, \', ], ", / (?<![=\'\]"/]) # Not preceded by [, =, \', ], ", /
\b \b
( # Capture 1: entire matched URL ( # Capture 1: entire matched URL
https?:// # http or https protocol https?:// # http or https protocol
(?: (?:
[^/\s`!()\[\]{};:\'",<>?«»“”‘’.] # Domain can\'t start with a . [^/\s\xA0`!()\[\]{};:\'",<>?«»“”‘’.] # Domain can\'t start with a .
[^/\s`!()\[\]{};:\'",<>?«»“”‘’]+ # Domain can\'t end with a . [^/\s\xA0`!()\[\]{};:\'",<>?«»“”‘’]+ # Domain can\'t end with a .
\. \.
[^/\s`!()\[\]{};:\'".,<>?«»“”‘’]+/? # Followed by a slash [^/\s\xA0`!()\[\]{};:\'".,<>?«»“”‘’]+/? # Followed by a slash
) )
(?: # One or more: (?: # One or more:
[^\s()<>]+ # Run of non-space, non-()<> [^\s\xA0()<>]+ # Run of non-space, non-()<>
| # or | # or
\(([^\s()<>]+|(\([^\s()<>]+\)))*\) # balanced parens, up to 2 levels \(([^\s\xA0()<>]+|(\([^\s()<>]+\)))*\) # balanced parens, up to 2 levels
| # or | # or
[^\s`!()\[\]{};:\'".,<>?«»“”‘’] # not a space or one of these punct chars [^\s\xA0`!()\[\]{};:\'".,<>?«»“”‘’] # not a space or one of these punct chars
)* )*
)@'; )@xiu';
} }
/** /**

View File

@ -109,6 +109,10 @@ class BBCodeTest extends MockedTest
'data' => "http://example.com<ul>", 'data' => "http://example.com<ul>",
'assertHTML' => false 'assertHTML' => false
], ],
'bug-7150' => [
'data' => html_entity_decode('http://example.com&nbsp;', ENT_QUOTES, 'UTF-8'),
'assertHTML' => false
],
]; ];
} }