Merge pull request #7167 from MrPetovan/bug/7150-unescape-xml-entities

Add Unicode support to autolink regular expression
2019-05-20 06:36:59 +02:00 · 2019-05-20 06:36:59 +02:00 · cbca26d185
parent 6f0c6e8926 c21a227d0a
commit cbca26d185
3 changed files with 16 additions and 11 deletions
--- a/src/Module/Debug/Babel.php
+++ b/src/Module/Debug/Babel.php
@ -7,6 +7,7 @@ use Friendica\Content\Text;
 use Friendica\Core\L10n;
 use Friendica\Core\Renderer;
 use Friendica\Model\Item;
 use Friendica\Util\XML;
 /**
 * Translates input text into different formats (HTML, BBCode, Markdown)
@ -98,10 +99,10 @@ class Babel extends BaseModule
 					$markdown = trim($_REQUEST['text']);
 					$results[] = [
 						'title'   => L10n::t('Source input (Diaspora format)'),
-						'content' => '<pre>' . $markdown . '</pre>'
+						'content' => '<pre>' . htmlspecialchars($markdown) . '</pre>'
 					];
-					$html = Text\Markdown::convert($markdown);
+					$html = Text\Markdown::convert(html_entity_decode($markdown,ENT_COMPAT, 'UTF-8'));
 					$results[] = [
 						'title'   => L10n::t('Markdown::convert (raw HTML)'),
 						'content' => visible_whitespace(htmlspecialchars($html))
@ -112,7 +113,7 @@ class Babel extends BaseModule
 						'content' => $html
 					];
-					$bbcode = Text\Markdown::toBBCode($markdown);
+					$bbcode = Text\Markdown::toBBCode(XML::unescape($markdown));
 					$results[] = [
 						'title'   => L10n::t('Markdown::toBBCode'),
 						'content' => '<pre>' . $bbcode . '</pre>'
--- a/src/Util/Strings.php
+++ b/src/Util/Strings.php
@ -355,25 +355,25 @@ class Strings
 	 */
 	public static function autoLinkRegEx()
 	{
-		return '@(?xi)
+		return '@
 (?<![=\'\]"/])          # Not preceded by [, =, \', ], ", /
 \b
 (                              # Capture 1: entire matched URL
  https?://                            # http or https protocol
  (?:
-    [^/\s`!()\[\]{};:\'",<>?«»“”‘’.]    # Domain can\'t start with a . 
+    [^/\s\xA0`!()\[\]{};:\'",<>?«»“”‘’.]    # Domain can\'t start with a . 
-    [^/\s`!()\[\]{};:\'",<>?«»“”‘’]+    # Domain can\'t end with a .
+    [^/\s\xA0`!()\[\]{};:\'",<>?«»“”‘’]+    # Domain can\'t end with a .
    \.
-    [^/\s`!()\[\]{};:\'".,<>?«»“”‘’]+/? # Followed by a slash
+    [^/\s\xA0`!()\[\]{};:\'".,<>?«»“”‘’]+/? # Followed by a slash
  )
  (?:                                  # One or more:
-    [^\s()<>]+                         # Run of non-space, non-()<>
+    [^\s\xA0()<>]+                         # Run of non-space, non-()<>
    |                                  #   or
-    \(([^\s()<>]+|(\([^\s()<>]+\)))*\) # balanced parens, up to 2 levels
+    \(([^\s\xA0()<>]+|(\([^\s()<>]+\)))*\) # balanced parens, up to 2 levels
    |                                  #   or
-    [^\s`!()\[\]{};:\'".,<>?«»“”‘’]    # not a space or one of these punct chars
+    [^\s\xA0`!()\[\]{};:\'".,<>?«»“”‘’]    # not a space or one of these punct chars
  )*
-)@';
+)@xiu';
 	}
 	/**
--- a/tests/src/Content/Text/BBCodeTest.php
+++ b/tests/src/Content/Text/BBCodeTest.php
@ -109,6 +109,10 @@ class BBCodeTest extends MockedTest
 				'data' => "http://example.com<ul>",
 				'assertHTML' => false
 			],
 			'bug-7150' => [
 				'data' => html_entity_decode('http://example.com&nbsp;', ENT_QUOTES, 'UTF-8'),
 				'assertHTML' => false
 			],
 		];
 	}