From aca4b2eaaeb0e1485b7145f73ca46cf8e667cc77 Mon Sep 17 00:00:00 2001 From: Michael Date: Mon, 27 Mar 2023 06:40:22 +0000 Subject: [PATCH 01/12] Added support for "Tumblr Neue Post Format" --- src/Content/Text/NPF.php | 465 +++++++++++++++++++++++++++++++++++++++ src/Model/Post/Media.php | 11 + 2 files changed, 476 insertions(+) create mode 100644 src/Content/Text/NPF.php diff --git a/src/Content/Text/NPF.php b/src/Content/Text/NPF.php new file mode 100644 index 0000000000..608cfc2e37 --- /dev/null +++ b/src/Content/Text/NPF.php @@ -0,0 +1,465 @@ +. + * + */ + +namespace Friendica\Content\Text; + +use DOMDocument; +use DOMElement; +use DOMNode; +use Friendica\Model\Photo; +use Friendica\Model\Post; + +/** + * Tumblr Neue Post Format + * @see https://www.tumblr.com/docs/npf + */ +class NPF +{ + static public function fromBBCode(string $bbcode, int $uri_id): array + { + $npf = []; + + $bbcode = self::prepareBody($bbcode); + + $html = BBCode::convert($bbcode, false, BBCode::CONNECTORS); + if (empty($html)) { + return []; + } + + $doc = new DOMDocument(); + if (!@$doc->loadHTML(mb_convert_encoding($html, 'HTML-ENTITIES', 'UTF-8'))) { + return []; + } + + $node = $doc->getElementsByTagName('body')->item(0); + foreach ($node->childNodes as $child) { + $npf = self::routeElements($child, $uri_id, $npf); + } + + return self::addLinkBlock($uri_id, $npf); + } + + public static function prepareBody(string $body): string + { + $shared = BBCode::fetchShareAttributes($body); + if (!empty($shared)) { + $body = $shared['shared']; + } + + $body = BBCode::removeAttachment($body); + + $body = preg_replace("/\[img\=([0-9]*)x([0-9]*)\](.*?)\[\/img\]/ism", '[img]$3[/img]', $body); + + if (preg_match_all("#\[url=([^\]]+?)\]\s*\[img=([^\[\]]*)\]([^\[\]]*)\[\/img\]\s*\[/url\]#ism", $body, $pictures, PREG_SET_ORDER)) { + foreach ($pictures as $picture) { + if (preg_match('#/photo/.*-[01]\.#ism', $picture[2]) && (preg_match('#/photo/.*-0\.#ism', $picture[1]) || preg_match('#/photos/.*/image/#ism', $picture[1]))) { + $body = str_replace($picture[0], "\n\n[img=" . str_replace('-1.', '-0.', $picture[2]) . "]" . $picture[3] . "[/img]\n\n", $body); + } + } + } + + $body = preg_replace("/\[img\=(.*?)\](.*?)\[\/img\]/ism", "\n\n[img=$1]$2[/img]\n\n", $body); + + if (preg_match_all("#\[url=([^\]]+?)\]\s*\[img\]([^\[]+?)\[/img\]\s*\[/url\]#ism", $body, $pictures, PREG_SET_ORDER)) { + foreach ($pictures as $picture) { + if (preg_match('#/photo/.*-[01]\.#ism', $picture[2]) && (preg_match('#/photo/.*-0\.#ism', $picture[1]) || preg_match('#/photos/.*/image/#ism', $picture[1]))) { + $body = str_replace($picture[0], "\n\n[img]" . str_replace('-1.', '-0.', $picture[2]) . "[/img]\n\n", $body); + } + } + } + + $body = preg_replace("/\[img\](.*?)\[\/img\]/ism", "\n\n[img]$1[/img]\n\n", $body); + $body = preg_replace("/\[audio\](.*?)\[\/audio\]/ism", "\n\n[audio]$1[/audio]\n\n", $body); + $body = preg_replace("/\[video\](.*?)\[\/video\]/ism", "\n\n[video]$1[/video]\n\n", $body); + + do { + $oldbody = $body; + $body = str_replace(["\n\n\n"], ["\n\n"], $body); + } while ($oldbody != $body); + + return trim($body); + } + + static private function routeElements(DOMElement|DOMNode $child, int $uri_id, array $npf): array + { + switch ($child->tagName ?? '') { + case 'blockquote': + $npf = self::addTextBlock($child, $uri_id, $npf, 'indented'); + break; + + case 'h1': + $npf = self::addTextBlock($child, $uri_id, $npf, 'heading1'); + break; + + case 'h2': + $npf = self::addTextBlock($child, $uri_id, $npf, 'heading1'); + break; + + case 'h3': + $npf = self::addTextBlock($child, $uri_id, $npf, 'heading1'); + break; + + case 'h4': + $npf = self::addTextBlock($child, $uri_id, $npf, 'heading2'); + break; + + case 'h5': + $npf = self::addTextBlock($child, $uri_id, $npf, 'heading2'); + break; + + case 'h6': + $npf = self::addTextBlock($child, $uri_id, $npf, 'heading2'); + break; + + case 'ul': + $npf = self::addListBlock($child, $uri_id, $npf, false, 0); + break; + + case 'ol': + $npf = self::addListBlock($child, $uri_id, $npf, true, 0); + break; + + case 'hr': + case 'br': + break; + + case 'pre': + case 'code': + $npf = self::addTextBlock($child, $uri_id, $npf, 'indented'); + break; + + case 'a': + $npf = self::addMediaBlock($child, $uri_id, $npf); + break; + + case 'table': + // $child->ownerDocument->saveHTML($child) + break; + + case 'img': + $npf = self::addImageBlock($child, $uri_id, $npf); + break; + + default: + $npf = self::addTextBlock($child, $uri_id, $npf); + break; + } + return $npf; + } + + static private function addImageBlock(DOMElement|DOMNode $child, int $uri_id, array $npf): array + { + $attributes = []; + foreach ($child->attributes as $key => $attribute) { + $attributes[$key] = $attribute->value; + } + if (empty($attributes['src'])) { + return $npf; + } + + $entry = [ + 'type' => 'image', + 'media' => [], + ]; + + if (!empty($attributes['alt'])) { + $entry['alt_text'] = $attributes['alt']; + } + + if (!empty($attributes['title']) && ($attributes['alt'] ?? '' != $attributes['title'])) { + $entry['caption'] = $attributes['title']; + } + + $rid = Photo::ridFromURI($attributes['src']); + if (!empty($rid)) { + $photos = Photo::selectToArray([], ['resource-id' => $rid]); + foreach ($photos as $photo) { + $entry['media'][] = [ + 'type' => $photo['type'], + 'url' => str_replace('-0.', '-' . $photo['scale'] . '.', $attributes['src']), + 'width' => $photo['width'], + 'height' => $photo['height'], + ]; + } + if (empty($attributes['alt']) && !empty($photos[0]['desc'])) { + $entry['alt_text'] = $photos[0]['desc']; + } + } elseif ($media = Post\Media::getByURL($uri_id, $attributes['src'], [Post\Media::IMAGE])) { + $entry['media'][] = [ + 'type' => $media['mimetype'], + 'url' => $media['url'], + 'width' => $media['width'], + 'height' => $media['height'], + ]; + if (empty($attributes['alt']) && !empty($media['description'])) { + $entry['alt_text'] = $media['description']; + } + } else { + $entry['media'][] = ['url' => $attributes['src']]; + } + + $npf[] = $entry; + + return $npf; + } + + static private function addMediaBlock(DOMElement|DOMNode $child, int $uri_id, array $npf): array + { + $attributes = []; + foreach ($child->attributes as $key => $attribute) { + $attributes[$key] = $attribute->value; + } + if (empty($attributes['href'])) { + return $npf; + } + + $media = Post\Media::getByURL($uri_id, $attributes['href'], [Post\Media::AUDIO, Post\Media::VIDEO]); + if (!empty($media)) { + switch ($media['type']) { + case Post\Media::AUDIO: + $entry = [ + 'type' => 'audio', + 'media' => [ + 'type' => $media['mimetype'], + 'url' => $media['url'], + ] + ]; + + if (!empty($media['name'])) { + $entry['title'] = $media['name']; + } elseif (!empty($media['description'])) { + $entry['title'] = $media['description']; + } + + $npf[] = self::addPoster($media, $entry); + break; + + case Post\Media::VIDEO: + $entry = [ + 'type' => 'video', + 'media' => [ + 'type' => $media['mimetype'], + 'url' => $media['url'], + ] + ]; + + $npf[] = self::addPoster($media, $entry); + break; + } + } else { + $npf[] = [ + 'type' => 'text', + 'text' => $child->textContent, + 'formatting' => [ + 'start' => 0, + 'end' => strlen($child->textContent), + 'type' => 'link', + 'url' => $attributes['href'] + ] + ]; + } + return $npf; + } + + static private function addPoster(array $media, array $entry): array + { + $poster = []; + if (!empty($media['preview'])) { + $poster['url'] = $media['preview']; + } + if (!empty($media['preview-width'])) { + $poster['width'] = $media['preview-width']; + } + if (!empty($media['preview-height'])) { + $poster['height'] = $media['preview-height']; + } + if (!empty($poster)) { + $entry['poster'] = $poster; + } + return $entry; + } + + static private function fetchText(DOMElement|DOMNode $child, array $text = ['text' => '', 'formatting' => []]): array + { + foreach ($child->childNodes as $node) { + $start = strlen($text['text']); + + switch ($node->nodeName) { + case 'b': + case 'strong': + $type = 'bold'; + break; + + case 'i': + case 'em': + $type = 'italic'; + break; + + case 's': + $type = 'strikethrough'; + break; + + default: + $type = ''; + break; + } + if ($node->nodeName == 'br') { + $text['text'] .= "\n"; + } else { + $text['text'] .= $node->textContent; + } + if (!empty($type)) { + $text['formatting'][] = ['start' => $start, 'end' => strlen($text['text']), 'type' => $type]; + } + } + return $text; + } + + static private function addTextBlock(DOMElement|DOMNode $child, int $uri_id, array $npf, string $subtype = ''): array + { + if (empty($subtype) && (($child->childElementCount) ?? 0 == 1) && ($child->textContent == $child->firstChild->textContent)) { + return self::routeElements($child->firstChild, $uri_id, $npf); + } + + $element = ['type' => 'text']; + + if (!empty($subtype)) { + $element['subtype'] = $subtype; + } + + $text = self::fetchText($child); + + $element['text'] = $text['text']; + $element['formatting'] = $text['formatting']; + + if (empty($subtype)) { + switch ($child->tagName ?? '') { + case 'b': + case 'strong': + $element['formatting'][] = ['start' => 0, 'end' => strlen($element['text']), 'type' => 'bold']; + break; + + case 'i': + case 'em': + $element['formatting'][] = ['start' => 0, 'end' => strlen($element['text']), 'type' => 'italic']; + break; + + case 's': + $element['formatting'][] = ['start' => 0, 'end' => strlen($element['text']), 'type' => 'strikethrough']; + break; + + case 'span': + case 'p': + case 'div': + case 'details'; + case '': + break; + default: + print_r($element); + die($child->tagName . "\n"); + break; + } + } + + if (empty($element['formatting'])) { + unset($element['formatting']); + } + + $npf[] = $element; + + return $npf; + } + + static private function addListBlock(DOMElement|DOMNode $child, int $uri_id, array $npf, bool $ordered, int $level): array + { + foreach ($child->childNodes as $node) { + switch ($node->nodeName) { + case 'ul': + $npf = self::addListBlock($node, $uri_id, $npf, false, $level++); + case 'ol': + $npf = self::addListBlock($node, $uri_id, $npf, true, $level++); + case 'li': + $text = self::fetchText($node); + + $entry = [ + 'type' => 'text', + 'subtype' => $ordered ? 'ordered-list-item' : 'unordered-list-item', + 'text' => $text['text'] + ]; + if ($level > 0) { + $entry['indent_level'] = $level; + } + if (!empty($text['formatting'])) { + $entry['formatting'] = $text['formatting']; + } + $npf[] = $entry; + } + } + + return $npf; + } + + static private function addLinkBlock(int $uri_id, array $npf): array + { + foreach (Post\Media::getByURIId($uri_id, [Post\Media::HTML]) as $link) { + $host = parse_url($link['url'], PHP_URL_HOST); + if (in_array($host, ['www.youtube.com', 'youtu.be'])) { + $entry = [ + 'type' => 'video', + 'provider' => 'youtube', + 'url' => $link['url'], + ]; + } elseif (in_array($host, ['vimeo.com'])) { + $entry = [ + 'type' => 'video', + 'provider' => 'vimeo', + 'url' => $link['url'], + ]; + } elseif (in_array($host, ['open.spotify.com'])) { + $entry = [ + 'type' => 'audio', + 'provider' => 'spotify', + 'url' => $link['url'], + ]; + } else { + $entry = [ + 'type' => 'link', + 'url' => $link['url'], + ]; + if (!empty($link['name'])) { + $entry['title'] = $link['name']; + } + if (!empty($link['description'])) { + $entry['description'] = $link['description']; + } + if (!empty($link['author-name'])) { + $entry['author'] = $link['author-name']; + } + if (!empty($link['publisher-name'])) { + $entry['site_name'] = $link['publisher-name']; + } + } + + $npf[] = self::addPoster($link, $entry); + } + return $npf; + } +} diff --git a/src/Model/Post/Media.php b/src/Model/Post/Media.php index 0a9557bc64..e845138367 100644 --- a/src/Model/Post/Media.php +++ b/src/Model/Post/Media.php @@ -757,6 +757,17 @@ class Media return DBA::selectToArray('post-media', [], $condition, ['order' => ['id']]); } + public static function getByURL(int $uri_id, string $url, array $types = []) + { + $condition = ["`uri-id` = ? AND `url` = ? AND `type` != ?", $uri_id, $url, self::UNKNOWN]; + + if (!empty($types)) { + $condition = DBA::mergeConditions($condition, ['type' => $types]); + } + + return DBA::selectFirst('post-media', [], $condition); + } + /** * Retrieves the media attachment with the provided media id. * From 459a7099ca16b16c01644d5f8666dd83ae1002a7 Mon Sep 17 00:00:00 2001 From: Michael Date: Tue, 28 Mar 2023 05:25:33 +0000 Subject: [PATCH 02/12] Improved NPF --- src/Content/Text/NPF.php | 96 ++++++++++++++++++---------------------- 1 file changed, 42 insertions(+), 54 deletions(-) diff --git a/src/Content/Text/NPF.php b/src/Content/Text/NPF.php index 608cfc2e37..19e6a0937b 100644 --- a/src/Content/Text/NPF.php +++ b/src/Content/Text/NPF.php @@ -23,7 +23,6 @@ namespace Friendica\Content\Text; use DOMDocument; use DOMElement; -use DOMNode; use Friendica\Model\Photo; use Friendica\Model\Post; @@ -51,7 +50,14 @@ class NPF $node = $doc->getElementsByTagName('body')->item(0); foreach ($node->childNodes as $child) { - $npf = self::routeElements($child, $uri_id, $npf); + if ($child->nodeName == '#text') { + $npf[] = [ + 'type' => 'text', + 'text' => $child->textContent, + ]; + } else { + $npf = self::routeElements($child, $uri_id, $npf); + } } return self::addLinkBlock($uri_id, $npf); @@ -98,9 +104,9 @@ class NPF return trim($body); } - static private function routeElements(DOMElement|DOMNode $child, int $uri_id, array $npf): array + static private function routeElements(DOMElement $child, int $uri_id, array $npf): array { - switch ($child->tagName ?? '') { + switch ($child->nodeName) { case 'blockquote': $npf = self::addTextBlock($child, $uri_id, $npf, 'indented'); break; @@ -151,6 +157,7 @@ class NPF break; case 'table': + // Unsupported // $child->ownerDocument->saveHTML($child) break; @@ -165,7 +172,7 @@ class NPF return $npf; } - static private function addImageBlock(DOMElement|DOMNode $child, int $uri_id, array $npf): array + static private function addImageBlock(DOMElement $child, int $uri_id, array $npf): array { $attributes = []; foreach ($child->attributes as $key => $attribute) { @@ -221,7 +228,7 @@ class NPF return $npf; } - static private function addMediaBlock(DOMElement|DOMNode $child, int $uri_id, array $npf): array + static private function addMediaBlock(DOMElement $child, int $uri_id, array $npf): array { $attributes = []; foreach ($child->attributes as $key => $attribute) { @@ -297,34 +304,37 @@ class NPF return $entry; } - static private function fetchText(DOMElement|DOMNode $child, array $text = ['text' => '', 'formatting' => []]): array + static private function getTypeForNodeName(string $nodename): string + { + switch ($nodename) { + case 'b': + case 'strong': + return 'bold'; + + case 'i': + case 'em': + return 'italic'; + + case 's': + return 'strikethrough'; + } + return ''; + } + + static private function fetchText(DOMElement $child, array $text = ['text' => '', 'formatting' => []]): array { foreach ($child->childNodes as $node) { $start = strlen($text['text']); - switch ($node->nodeName) { - case 'b': - case 'strong': - $type = 'bold'; - break; + $type = self::getTypeForNodeName($node->nodeName); - case 'i': - case 'em': - $type = 'italic'; - break; - - case 's': - $type = 'strikethrough'; - break; - - default: - $type = ''; - break; - } if ($node->nodeName == 'br') { $text['text'] .= "\n"; - } else { + } elseif (($type != '') || in_array($node->nodeName, ['#text', 'code', 'a', 'p', 'span', 'u', 'img', 'summary', 'ul', 'blockquote', 'h3', 'ol'])) { $text['text'] .= $node->textContent; + } else { + echo $child->ownerDocument->saveHTML($child) . "\n"; + die($node->nodeName . "\n"); } if (!empty($type)) { $text['formatting'][] = ['start' => $start, 'end' => strlen($text['text']), 'type' => $type]; @@ -333,9 +343,9 @@ class NPF return $text; } - static private function addTextBlock(DOMElement|DOMNode $child, int $uri_id, array $npf, string $subtype = ''): array + static private function addTextBlock(DOMElement $child, int $uri_id, array $npf, string $subtype = ''): array { - if (empty($subtype) && (($child->childElementCount) ?? 0 == 1) && ($child->textContent == $child->firstChild->textContent)) { + if (empty($subtype) && ($child->textContent == $child->firstChild->textContent) && ($child->firstChild->nodeName != '#text')) { return self::routeElements($child->firstChild, $uri_id, $npf); } @@ -351,31 +361,9 @@ class NPF $element['formatting'] = $text['formatting']; if (empty($subtype)) { - switch ($child->tagName ?? '') { - case 'b': - case 'strong': - $element['formatting'][] = ['start' => 0, 'end' => strlen($element['text']), 'type' => 'bold']; - break; - - case 'i': - case 'em': - $element['formatting'][] = ['start' => 0, 'end' => strlen($element['text']), 'type' => 'italic']; - break; - - case 's': - $element['formatting'][] = ['start' => 0, 'end' => strlen($element['text']), 'type' => 'strikethrough']; - break; - - case 'span': - case 'p': - case 'div': - case 'details'; - case '': - break; - default: - print_r($element); - die($child->tagName . "\n"); - break; + $type = self::getTypeForNodeName($child->nodeName); + if (!empty($type)) { + $element['formatting'][] = ['start' => 0, 'end' => strlen($element['text']), 'type' => $type]; } } @@ -388,7 +376,7 @@ class NPF return $npf; } - static private function addListBlock(DOMElement|DOMNode $child, int $uri_id, array $npf, bool $ordered, int $level): array + static private function addListBlock(DOMElement $child, int $uri_id, array $npf, bool $ordered, int $level): array { foreach ($child->childNodes as $node) { switch ($node->nodeName) { From 5f683df711eb925fd7f5fca678b3f3e2c3166926 Mon Sep 17 00:00:00 2001 From: Michael Date: Wed, 29 Mar 2023 14:54:43 +0000 Subject: [PATCH 03/12] Nested blocks --- src/Content/Text/NPF.php | 356 +++++++++++++++++++++++++++------------ 1 file changed, 246 insertions(+), 110 deletions(-) diff --git a/src/Content/Text/NPF.php b/src/Content/Text/NPF.php index 19e6a0937b..71e836c667 100644 --- a/src/Content/Text/NPF.php +++ b/src/Content/Text/NPF.php @@ -48,22 +48,13 @@ class NPF return []; } - $node = $doc->getElementsByTagName('body')->item(0); - foreach ($node->childNodes as $child) { - if ($child->nodeName == '#text') { - $npf[] = [ - 'type' => 'text', - 'text' => $child->textContent, - ]; - } else { - $npf = self::routeElements($child, $uri_id, $npf); - } - } + $element = $doc->getElementsByTagName('body')->item(0); + $npf = self::routeChildren($element, $uri_id, 0, $npf); - return self::addLinkBlock($uri_id, $npf); + return self::addLinkBlock($uri_id, 0, $npf); } - public static function prepareBody(string $body): string + static private function prepareBody(string $body): string { $shared = BBCode::fetchShareAttributes($body); if (!empty($shared)) { @@ -104,43 +95,154 @@ class NPF return trim($body); } - static private function routeElements(DOMElement $child, int $uri_id, array $npf): array + static private function routeChildren(DOMElement $element, int $uri_id, int $level, array $npf): array { - switch ($child->nodeName) { + $text = ''; + $formatting = []; + + foreach ($element->childNodes as $child) { + switch ($child->nodeName) { + case 'blockquote': + $npf = self::addText($text, $formatting, $npf); + $npf = self::addQuoteBlock($child, $uri_id, $level, $npf); + break; + + case 'h1': + $npf = self::addText($text, $formatting, $npf); + $npf = self::addTextBlock($child, $uri_id, $level, $npf, 'heading1'); + break; + + case 'h2': + $npf = self::addText($text, $formatting, $npf); + $npf = self::addTextBlock($child, $uri_id, $level, $npf, 'heading1'); + break; + + case 'h3': + $npf = self::addText($text, $formatting, $npf); + $npf = self::addTextBlock($child, $uri_id, $level, $npf, 'heading1'); + break; + + case 'h4': + $npf = self::addText($text, $formatting, $npf); + $npf = self::addTextBlock($child, $uri_id, $level, $npf, 'heading2'); + break; + + case 'h5': + $npf = self::addText($text, $formatting, $npf); + $npf = self::addTextBlock($child, $uri_id, $level, $npf, 'heading2'); + break; + + case 'h6': + $npf = self::addText($text, $formatting, $npf); + $npf = self::addTextBlock($child, $uri_id, $level, $npf, 'heading2'); + break; + + case 'ul': + $npf = self::addText($text, $formatting, $npf); + $npf = self::addListBlock($child, $uri_id, $level, $npf, false, 0); + break; + + case 'ol': + $npf = self::addText($text, $formatting, $npf); + $npf = self::addListBlock($child, $uri_id, $level, $npf, true, 0); + break; + + case 'hr': + case 'br': + $text .= "\n"; + break; + + case 'pre': + case 'code': + $npf = self::addText($text, $formatting, $npf); + $npf = self::addTextBlock($child, $uri_id, $level, $npf, 'indented'); + break; + + case 'a': + $npf = self::addText($text, $formatting, $npf); + $npf = self::addMediaBlock($child, $uri_id, $level, $npf); + break; + + case 'table': + // Unsupported + // $child->ownerDocument->saveHTML($child) + break; + + case 'img': + $npf = self::addText($text, $formatting, $npf); + $npf = self::addImageBlock($child, $uri_id, $level, $npf); + break; + + case 'p': + case 'div': + $npf = self::addText($text, $formatting, $npf); + $npf = self::addTextBlock($child, $uri_id, $level, $npf); + break; + + default: + $text .= $child->textContent; + break; + } + } + return $npf; + } + + static private function addText(string $text, array $formatting, array $npf): array + { + if (empty($text)) { + return $npf; + } + $block = [ + 'type' => 'text', + 'text' => $text, + ]; + + if (!empty($formatting)) { + $block['formatting'] = $formatting; + } + + $npf[] = $block; + + return $npf; + } + + static private function routeElement(DOMElement $element, int $uri_id, int $level, array $npf): array + { + switch ($element->nodeName) { case 'blockquote': - $npf = self::addTextBlock($child, $uri_id, $npf, 'indented'); + $npf = self::addQuoteBlock($element, $uri_id, $level, $npf); break; case 'h1': - $npf = self::addTextBlock($child, $uri_id, $npf, 'heading1'); + $npf = self::addTextBlock($element, $uri_id, $level, $npf, 'heading1'); break; case 'h2': - $npf = self::addTextBlock($child, $uri_id, $npf, 'heading1'); + $npf = self::addTextBlock($element, $uri_id, $level, $npf, 'heading1'); break; case 'h3': - $npf = self::addTextBlock($child, $uri_id, $npf, 'heading1'); + $npf = self::addTextBlock($element, $uri_id, $level, $npf, 'heading1'); break; case 'h4': - $npf = self::addTextBlock($child, $uri_id, $npf, 'heading2'); + $npf = self::addTextBlock($element, $uri_id, $level, $npf, 'heading2'); break; case 'h5': - $npf = self::addTextBlock($child, $uri_id, $npf, 'heading2'); + $npf = self::addTextBlock($element, $uri_id, $level, $npf, 'heading2'); break; case 'h6': - $npf = self::addTextBlock($child, $uri_id, $npf, 'heading2'); + $npf = self::addTextBlock($element, $uri_id, $level, $npf, 'heading2'); break; case 'ul': - $npf = self::addListBlock($child, $uri_id, $npf, false, 0); + $npf = self::addListBlock($element, $uri_id, $level, $npf, false, 0); break; case 'ol': - $npf = self::addListBlock($child, $uri_id, $npf, true, 0); + $npf = self::addListBlock($element, $uri_id, $level, $npf, true, 0); break; case 'hr': @@ -149,57 +251,57 @@ class NPF case 'pre': case 'code': - $npf = self::addTextBlock($child, $uri_id, $npf, 'indented'); + $npf = self::addTextBlock($element, $uri_id, $level, $npf, 'indented'); break; case 'a': - $npf = self::addMediaBlock($child, $uri_id, $npf); + $npf = self::addMediaBlock($element, $uri_id, $level, $npf); break; case 'table': // Unsupported - // $child->ownerDocument->saveHTML($child) + // $element->ownerDocument->saveHTML($element) break; case 'img': - $npf = self::addImageBlock($child, $uri_id, $npf); + $npf = self::addImageBlock($element, $uri_id, $level, $npf); break; default: - $npf = self::addTextBlock($child, $uri_id, $npf); + $npf = self::addTextBlock($element, $uri_id, $level, $npf); break; } return $npf; } - static private function addImageBlock(DOMElement $child, int $uri_id, array $npf): array + static private function addImageBlock(DOMElement $element, int $uri_id, int $level, array $npf): array { $attributes = []; - foreach ($child->attributes as $key => $attribute) { + foreach ($element->attributes as $key => $attribute) { $attributes[$key] = $attribute->value; } if (empty($attributes['src'])) { return $npf; } - $entry = [ + $block = [ 'type' => 'image', 'media' => [], ]; if (!empty($attributes['alt'])) { - $entry['alt_text'] = $attributes['alt']; + $block['alt_text'] = $attributes['alt']; } if (!empty($attributes['title']) && ($attributes['alt'] ?? '' != $attributes['title'])) { - $entry['caption'] = $attributes['title']; + $block['caption'] = $attributes['title']; } $rid = Photo::ridFromURI($attributes['src']); if (!empty($rid)) { $photos = Photo::selectToArray([], ['resource-id' => $rid]); foreach ($photos as $photo) { - $entry['media'][] = [ + $block['media'][] = [ 'type' => $photo['type'], 'url' => str_replace('-0.', '-' . $photo['scale'] . '.', $attributes['src']), 'width' => $photo['width'], @@ -207,31 +309,35 @@ class NPF ]; } if (empty($attributes['alt']) && !empty($photos[0]['desc'])) { - $entry['alt_text'] = $photos[0]['desc']; + $block['alt_text'] = $photos[0]['desc']; } } elseif ($media = Post\Media::getByURL($uri_id, $attributes['src'], [Post\Media::IMAGE])) { - $entry['media'][] = [ + $block['media'][] = [ 'type' => $media['mimetype'], 'url' => $media['url'], 'width' => $media['width'], 'height' => $media['height'], ]; if (empty($attributes['alt']) && !empty($media['description'])) { - $entry['alt_text'] = $media['description']; + $block['alt_text'] = $media['description']; } } else { - $entry['media'][] = ['url' => $attributes['src']]; + $block['media'][] = ['url' => $attributes['src']]; } - $npf[] = $entry; + if ($level > 0) { + $block['indent_level'] = $level; + } + + $npf[] = $block; return $npf; } - static private function addMediaBlock(DOMElement $child, int $uri_id, array $npf): array + static private function addMediaBlock(DOMElement $element, int $uri_id, int $level, array $npf): array { $attributes = []; - foreach ($child->attributes as $key => $attribute) { + foreach ($element->attributes as $key => $attribute) { $attributes[$key] = $attribute->value; } if (empty($attributes['href'])) { @@ -242,7 +348,7 @@ class NPF if (!empty($media)) { switch ($media['type']) { case Post\Media::AUDIO: - $entry = [ + $block = [ 'type' => 'audio', 'media' => [ 'type' => $media['mimetype'], @@ -251,16 +357,16 @@ class NPF ]; if (!empty($media['name'])) { - $entry['title'] = $media['name']; + $block['title'] = $media['name']; } elseif (!empty($media['description'])) { - $entry['title'] = $media['description']; + $block['title'] = $media['description']; } - $npf[] = self::addPoster($media, $entry); + $block = self::addPoster($media, $block); break; case Post\Media::VIDEO: - $entry = [ + $block = [ 'type' => 'video', 'media' => [ 'type' => $media['mimetype'], @@ -268,25 +374,32 @@ class NPF ] ]; - $npf[] = self::addPoster($media, $entry); + $block = self::addPoster($media, $block); break; } } else { - $npf[] = [ + $block = [ 'type' => 'text', - 'text' => $child->textContent, + 'text' => $element->textContent, 'formatting' => [ 'start' => 0, - 'end' => strlen($child->textContent), + 'end' => strlen($element->textContent), 'type' => 'link', 'url' => $attributes['href'] ] ]; } + + if ($level > 0) { + $block['indent_level'] = $level; + } + + $npf[] = $block; + return $npf; } - static private function addPoster(array $media, array $entry): array + static private function addPoster(array $media, array $block): array { $poster = []; if (!empty($media['preview'])) { @@ -299,9 +412,9 @@ class NPF $poster['height'] = $media['preview-height']; } if (!empty($poster)) { - $entry['poster'] = $poster; + $block['poster'] = $poster; } - return $entry; + return $block; } static private function getTypeForNodeName(string $nodename): string @@ -321,20 +434,20 @@ class NPF return ''; } - static private function fetchText(DOMElement $child, array $text = ['text' => '', 'formatting' => []]): array + static private function fetchText(DOMElement $element, array $text = ['text' => '', 'formatting' => []]): array { - foreach ($child->childNodes as $node) { + foreach ($element->childNodes as $child) { $start = strlen($text['text']); - $type = self::getTypeForNodeName($node->nodeName); + $type = self::getTypeForNodeName($child->nodeName); - if ($node->nodeName == 'br') { + if ($child->nodeName == 'br') { $text['text'] .= "\n"; - } elseif (($type != '') || in_array($node->nodeName, ['#text', 'code', 'a', 'p', 'span', 'u', 'img', 'summary', 'ul', 'blockquote', 'h3', 'ol'])) { - $text['text'] .= $node->textContent; + } elseif (($type != '') || in_array($child->nodeName, ['#text', 'code', 'a', 'p', 'span', 'u', 'img', 'summary', 'ul', 'blockquote', 'h3', 'ol'])) { + $text['text'] .= $child->textContent; } else { - echo $child->ownerDocument->saveHTML($child) . "\n"; - die($node->nodeName . "\n"); + echo $element->ownerDocument->saveHTML($element) . "\n"; + die($child->nodeName . "\n"); } if (!empty($type)) { $text['formatting'][] = ['start' => $start, 'end' => strlen($text['text']), 'type' => $type]; @@ -343,110 +456,133 @@ class NPF return $text; } - static private function addTextBlock(DOMElement $child, int $uri_id, array $npf, string $subtype = ''): array + static private function addQuoteBlock(DOMElement $element, int $uri_id, int $level, array $npf): array { - if (empty($subtype) && ($child->textContent == $child->firstChild->textContent) && ($child->firstChild->nodeName != '#text')) { - return self::routeElements($child->firstChild, $uri_id, $npf); + $block = ['type' => 'text', 'subtype' => 'indented']; + + if ($level > 0) { + $block['indent_level'] = $level; } - $element = ['type' => 'text']; + $npf[] = $block; - if (!empty($subtype)) { - $element['subtype'] = $subtype; - } - - $text = self::fetchText($child); - - $element['text'] = $text['text']; - $element['formatting'] = $text['formatting']; - - if (empty($subtype)) { - $type = self::getTypeForNodeName($child->nodeName); - if (!empty($type)) { - $element['formatting'][] = ['start' => 0, 'end' => strlen($element['text']), 'type' => $type]; - } - } - - if (empty($element['formatting'])) { - unset($element['formatting']); - } - - $npf[] = $element; + $npf = self::routeChildren($element, $uri_id, 0, $npf); return $npf; } - static private function addListBlock(DOMElement $child, int $uri_id, array $npf, bool $ordered, int $level): array + static private function addTextBlock(DOMElement $element, int $uri_id, int $level, array $npf, string $subtype = ''): array { - foreach ($child->childNodes as $node) { - switch ($node->nodeName) { - case 'ul': - $npf = self::addListBlock($node, $uri_id, $npf, false, $level++); - case 'ol': - $npf = self::addListBlock($node, $uri_id, $npf, true, $level++); - case 'li': - $text = self::fetchText($node); + if (empty($subtype) && ($element->textContent == $element->firstChild->textContent) && ($element->firstChild->nodeName != '#text')) { + return self::routeElement($element->firstChild, $uri_id, $level, $npf); + } - $entry = [ + $block = ['type' => 'text']; + + if (!empty($subtype)) { + $block['subtype'] = $subtype; + } + + $text = self::fetchText($element); + + $block['text'] = $text['text']; + $block['formatting'] = $text['formatting']; + + if (empty($subtype)) { + $type = self::getTypeForNodeName($element->nodeName); + if (!empty($type)) { + $block['formatting'][] = ['start' => 0, 'end' => strlen($block['text']), 'type' => $type]; + } + } + + if (empty($block['formatting'])) { + unset($block['formatting']); + } + + if ($level > 0) { + $block['indent_level'] = $level; + } + + $npf[] = $block; + + return $npf; + } + + static private function addListBlock(DOMElement $element, int $uri_id, int $level, array $npf, bool $ordered): array + { + foreach ($element->childNodes as $child) { + switch ($child->nodeName) { + case 'ul': + $npf = self::addListBlock($child, $uri_id, $level++, $npf, false); + case 'ol': + $npf = self::addListBlock($child, $uri_id, $level++, $npf, true); + case 'li': + $text = self::fetchText($child); + + $block = [ 'type' => 'text', 'subtype' => $ordered ? 'ordered-list-item' : 'unordered-list-item', 'text' => $text['text'] ]; if ($level > 0) { - $entry['indent_level'] = $level; + $block['indent_level'] = $level; } if (!empty($text['formatting'])) { - $entry['formatting'] = $text['formatting']; + $block['formatting'] = $text['formatting']; } - $npf[] = $entry; + $npf[] = $block; } } return $npf; } - static private function addLinkBlock(int $uri_id, array $npf): array + static private function addLinkBlock(int $uri_id, int $level, array $npf): array { foreach (Post\Media::getByURIId($uri_id, [Post\Media::HTML]) as $link) { $host = parse_url($link['url'], PHP_URL_HOST); if (in_array($host, ['www.youtube.com', 'youtu.be'])) { - $entry = [ + $block = [ 'type' => 'video', 'provider' => 'youtube', 'url' => $link['url'], ]; } elseif (in_array($host, ['vimeo.com'])) { - $entry = [ + $block = [ 'type' => 'video', 'provider' => 'vimeo', 'url' => $link['url'], ]; } elseif (in_array($host, ['open.spotify.com'])) { - $entry = [ + $block = [ 'type' => 'audio', 'provider' => 'spotify', 'url' => $link['url'], ]; } else { - $entry = [ + $block = [ 'type' => 'link', 'url' => $link['url'], ]; if (!empty($link['name'])) { - $entry['title'] = $link['name']; + $block['title'] = $link['name']; } if (!empty($link['description'])) { - $entry['description'] = $link['description']; + $block['description'] = $link['description']; } if (!empty($link['author-name'])) { - $entry['author'] = $link['author-name']; + $block['author'] = $link['author-name']; } if (!empty($link['publisher-name'])) { - $entry['site_name'] = $link['publisher-name']; + $block['site_name'] = $link['publisher-name']; } } - $npf[] = self::addPoster($link, $entry); + if ($level > 0) { + $block['indent_level'] = $level; + } + + $npf[] = self::addPoster($link, $block); } return $npf; } From 70092a1aff9e820d8504d6f39e5f54ea88730ca9 Mon Sep 17 00:00:00 2001 From: Michael Date: Sat, 1 Apr 2023 21:51:30 +0000 Subject: [PATCH 04/12] Reworked parser --- src/Content/Text/NPF.php | 564 +++++++++++++++++---------------------- 1 file changed, 238 insertions(+), 326 deletions(-) diff --git a/src/Content/Text/NPF.php b/src/Content/Text/NPF.php index 71e836c667..8441ce646c 100644 --- a/src/Content/Text/NPF.php +++ b/src/Content/Text/NPF.php @@ -34,8 +34,6 @@ class NPF { static public function fromBBCode(string $bbcode, int $uri_id): array { - $npf = []; - $bbcode = self::prepareBody($bbcode); $html = BBCode::convert($bbcode, false, BBCode::CONNECTORS); @@ -44,14 +42,21 @@ class NPF } $doc = new DOMDocument(); + $doc->formatOutput = true; if (!@$doc->loadHTML(mb_convert_encoding($html, 'HTML-ENTITIES', 'UTF-8'))) { return []; } $element = $doc->getElementsByTagName('body')->item(0); - $npf = self::routeChildren($element, $uri_id, 0, $npf); + echo $element->ownerDocument->saveHTML($element) . "\n"; - return self::addLinkBlock($uri_id, 0, $npf); + $npf = []; + $text = ''; + $formatting = []; + + self::routeChildren($element, $uri_id, true, [], $npf, $text, $formatting); + + return self::addLinkBlockForUriId($uri_id, 0, $npf); } static private function prepareBody(string $body): string @@ -95,181 +100,276 @@ class NPF return trim($body); } - static private function routeChildren(DOMElement $element, int $uri_id, int $level, array $npf): array + static private function routeChildren(DOMElement $element, int $uri_id, bool $parse_structure, array $callstack, array &$npf, string &$text, array &$formatting) { - $text = ''; - $formatting = []; + if ($parse_structure && $text) { + self::addBlock($text, $formatting, $npf, $callstack); + } + + $callstack[] = $element->nodeName; + $level = self::getLevelByCallstack($callstack); foreach ($element->childNodes as $child) { switch ($child->nodeName) { - case 'blockquote': - $npf = self::addText($text, $formatting, $npf); - $npf = self::addQuoteBlock($child, $uri_id, $level, $npf); + case 'b': + case 'strong': + self::addFormatting($child, $uri_id, 'bold', $callstack, $npf, $text, $formatting); break; - case 'h1': - $npf = self::addText($text, $formatting, $npf); - $npf = self::addTextBlock($child, $uri_id, $level, $npf, 'heading1'); + case 'i': + case 'em': + self::addFormatting($child, $uri_id, 'italic', $callstack, $npf, $text, $formatting); break; - case 'h2': - $npf = self::addText($text, $formatting, $npf); - $npf = self::addTextBlock($child, $uri_id, $level, $npf, 'heading1'); + case 's': + self::addFormatting($child, $uri_id, 'strikethrough', $callstack, $npf, $text, $formatting); break; - - case 'h3': - $npf = self::addText($text, $formatting, $npf); - $npf = self::addTextBlock($child, $uri_id, $level, $npf, 'heading1'); + + case 'u': + case 'span': + self::addFormatting($child, $uri_id, '', $callstack, $npf, $text, $formatting); break; - - case 'h4': - $npf = self::addText($text, $formatting, $npf); - $npf = self::addTextBlock($child, $uri_id, $level, $npf, 'heading2'); - break; - - case 'h5': - $npf = self::addText($text, $formatting, $npf); - $npf = self::addTextBlock($child, $uri_id, $level, $npf, 'heading2'); - break; - - case 'h6': - $npf = self::addText($text, $formatting, $npf); - $npf = self::addTextBlock($child, $uri_id, $level, $npf, 'heading2'); - break; - - case 'ul': - $npf = self::addText($text, $formatting, $npf); - $npf = self::addListBlock($child, $uri_id, $level, $npf, false, 0); - break; - - case 'ol': - $npf = self::addText($text, $formatting, $npf); - $npf = self::addListBlock($child, $uri_id, $level, $npf, true, 0); - break; - + case 'hr': case 'br': - $text .= "\n"; + if (!empty($text)) { + $text .= "\n"; + } break; - - case 'pre': - case 'code': - $npf = self::addText($text, $formatting, $npf); - $npf = self::addTextBlock($child, $uri_id, $level, $npf, 'indented'); + + case '#text': + $text .= $child->textContent; break; - - case 'a': - $npf = self::addText($text, $formatting, $npf); - $npf = self::addMediaBlock($child, $uri_id, $level, $npf); - break; - + case 'table': - // Unsupported - // $child->ownerDocument->saveHTML($child) + case 'summary': + // Ignore tables and spoilers break; - + + case 'a': + if ($text) { + self::addInlineLink($child, $uri_id, $callstack, $npf, $text, $formatting); + } else { + $npf = self::addLinkBlock($child, $uri_id, $level, $npf); + } + break; + case 'img': - $npf = self::addText($text, $formatting, $npf); $npf = self::addImageBlock($child, $uri_id, $level, $npf); break; - case 'p': + case 'ol': case 'div': - $npf = self::addText($text, $formatting, $npf); - $npf = self::addTextBlock($child, $uri_id, $level, $npf); + case 'h1': + case 'h2': + case 'h3': + case 'h4': + case 'h5': + case 'h6': + case 'blockquote': + case 'p': + case 'pre': + case 'code': + case 'ul': + case 'li': + case 'details': + self::routeChildren($child, $uri_id, true, $callstack, $npf, $text, $formatting); break; default: - $text .= $child->textContent; + print_r($npf); + print_r($callstack); + die($child . "\n"); + } + } + + if ($parse_structure && $text) { + self::addBlock($text, $formatting, $npf, $callstack); + } + } + + static private function getLevelByCallstack($callstack): int + { + $level = 0; + foreach ($callstack as $entry) { + if (in_array($entry, ['ol', 'ul', 'blockquote'])) { + ++$level; + } + } + return max(0, $level - 1); + } + + static private function getSubTypeByCallstack($callstack): string + { + $subtype = ''; + foreach ($callstack as $entry) { + switch ($entry) { + case 'ol': + $subtype = 'ordered-list-item'; + break; + + case 'ul': + $subtype = 'unordered-list-item'; + break; + + case 'h1': + $subtype = 'heading1'; + break; + + case 'h2': + $subtype = 'heading1'; + break; + + case 'h3': + $subtype = 'heading1'; + break; + + case 'h4': + $subtype = 'heading2'; + break; + + case 'h5': + $subtype = 'heading2'; + break; + + case 'h6': + $subtype = 'heading2'; + break; + + case 'blockquote': + case 'pre': + case 'code': + $subtype = 'indented'; break; } } - return $npf; + return $subtype; } - static private function addText(string $text, array $formatting, array $npf): array + static private function addFormatting(DOMElement $element, int $uri_id, string $type, array $callstack, array &$npf, string &$text, array &$formatting) { - if (empty($text)) { - return $npf; + $start = mb_strlen($text); + self::routeChildren($element, $uri_id, false, $callstack, $npf, $text, $formatting); + + if (!empty($type)) { + $formatting[] = [ + 'start' => $start, + 'end' => mb_strlen($text), + 'type' => $type + ]; } + } + + static private function addInlineLink(DOMElement $element, int $uri_id, array $callstack, array &$npf, string &$text, array &$formatting) + { + $start = mb_strlen($text); + self::routeChildren($element, $uri_id, false, $callstack, $npf, $text, $formatting); + + $attributes = []; + foreach ($element->attributes as $key => $attribute) { + $attributes[$key] = trim($attribute->value); + } + if (!empty($attributes['href'])) { + $formatting[] = [ + 'start' => $start, + 'end' => mb_strlen($text), + 'type' => 'link', + 'url' => $attributes['href'] + ]; + } + } + + static private function addBlock(string &$text, array &$formatting, array &$npf, array $callstack) + { $block = [ - 'type' => 'text', - 'text' => $text, + 'callstack' => $callstack, + 'type' => 'text', + 'text' => $text, ]; if (!empty($formatting)) { $block['formatting'] = $formatting; } - $npf[] = $block; + $level = self::getLevelByCallstack($callstack); + if ($level > 0) { + $block['indent_level'] = $level; + } - return $npf; + $subtype = self::getSubTypeByCallstack($callstack); + if ($subtype) { + $block['subtype'] = $subtype; + } + + $npf[] = $block; + $text = ''; + $formatting = []; } - static private function routeElement(DOMElement $element, int $uri_id, int $level, array $npf): array + static private function addPoster(array $media, array $block): array { - switch ($element->nodeName) { - case 'blockquote': - $npf = self::addQuoteBlock($element, $uri_id, $level, $npf); - break; + $poster = []; + if (!empty($media['preview'])) { + $poster['url'] = $media['preview']; + } + if (!empty($media['preview-width'])) { + $poster['width'] = $media['preview-width']; + } + if (!empty($media['preview-height'])) { + $poster['height'] = $media['preview-height']; + } + if (!empty($poster)) { + $block['poster'] = $poster; + } + return $block; + } - case 'h1': - $npf = self::addTextBlock($element, $uri_id, $level, $npf, 'heading1'); - break; + static private function addLinkBlockForUriId(int $uri_id, int $level, array $npf): array + { + foreach (Post\Media::getByURIId($uri_id, [Post\Media::HTML]) as $link) { + $host = parse_url($link['url'], PHP_URL_HOST); + if (in_array($host, ['www.youtube.com', 'youtu.be'])) { + $block = [ + 'type' => 'video', + 'provider' => 'youtube', + 'url' => $link['url'], + ]; + } elseif (in_array($host, ['vimeo.com'])) { + $block = [ + 'type' => 'video', + 'provider' => 'vimeo', + 'url' => $link['url'], + ]; + } elseif (in_array($host, ['open.spotify.com'])) { + $block = [ + 'type' => 'audio', + 'provider' => 'spotify', + 'url' => $link['url'], + ]; + } else { + $block = [ + 'type' => 'link', + 'url' => $link['url'], + ]; + if (!empty($link['name'])) { + $block['title'] = $link['name']; + } + if (!empty($link['description'])) { + $block['description'] = $link['description']; + } + if (!empty($link['author-name'])) { + $block['author'] = $link['author-name']; + } + if (!empty($link['publisher-name'])) { + $block['site_name'] = $link['publisher-name']; + } + } - case 'h2': - $npf = self::addTextBlock($element, $uri_id, $level, $npf, 'heading1'); - break; + if ($level > 0) { + $block['indent_level'] = $level; + } - case 'h3': - $npf = self::addTextBlock($element, $uri_id, $level, $npf, 'heading1'); - break; - - case 'h4': - $npf = self::addTextBlock($element, $uri_id, $level, $npf, 'heading2'); - break; - - case 'h5': - $npf = self::addTextBlock($element, $uri_id, $level, $npf, 'heading2'); - break; - - case 'h6': - $npf = self::addTextBlock($element, $uri_id, $level, $npf, 'heading2'); - break; - - case 'ul': - $npf = self::addListBlock($element, $uri_id, $level, $npf, false, 0); - break; - - case 'ol': - $npf = self::addListBlock($element, $uri_id, $level, $npf, true, 0); - break; - - case 'hr': - case 'br': - break; - - case 'pre': - case 'code': - $npf = self::addTextBlock($element, $uri_id, $level, $npf, 'indented'); - break; - - case 'a': - $npf = self::addMediaBlock($element, $uri_id, $level, $npf); - break; - - case 'table': - // Unsupported - // $element->ownerDocument->saveHTML($element) - break; - - case 'img': - $npf = self::addImageBlock($element, $uri_id, $level, $npf); - break; - - default: - $npf = self::addTextBlock($element, $uri_id, $level, $npf); - break; + $npf[] = self::addPoster($link, $block); } return $npf; } @@ -278,7 +378,7 @@ class NPF { $attributes = []; foreach ($element->attributes as $key => $attribute) { - $attributes[$key] = $attribute->value; + $attributes[$key] = trim($attribute->value); } if (empty($attributes['src'])) { return $npf; @@ -293,7 +393,7 @@ class NPF $block['alt_text'] = $attributes['alt']; } - if (!empty($attributes['title']) && ($attributes['alt'] ?? '' != $attributes['title'])) { + if (!empty($attributes['title']) && (($attributes['alt'] ?? '') != $attributes['title'])) { $block['caption'] = $attributes['title']; } @@ -334,11 +434,11 @@ class NPF return $npf; } - static private function addMediaBlock(DOMElement $element, int $uri_id, int $level, array $npf): array + static private function addLinkBlock(DOMElement $element, int $uri_id, int $level, array $npf): array { $attributes = []; foreach ($element->attributes as $key => $attribute) { - $attributes[$key] = $attribute->value; + $attributes[$key] = trim($attribute->value); } if (empty($attributes['href'])) { return $npf; @@ -398,192 +498,4 @@ class NPF return $npf; } - - static private function addPoster(array $media, array $block): array - { - $poster = []; - if (!empty($media['preview'])) { - $poster['url'] = $media['preview']; - } - if (!empty($media['preview-width'])) { - $poster['width'] = $media['preview-width']; - } - if (!empty($media['preview-height'])) { - $poster['height'] = $media['preview-height']; - } - if (!empty($poster)) { - $block['poster'] = $poster; - } - return $block; - } - - static private function getTypeForNodeName(string $nodename): string - { - switch ($nodename) { - case 'b': - case 'strong': - return 'bold'; - - case 'i': - case 'em': - return 'italic'; - - case 's': - return 'strikethrough'; - } - return ''; - } - - static private function fetchText(DOMElement $element, array $text = ['text' => '', 'formatting' => []]): array - { - foreach ($element->childNodes as $child) { - $start = strlen($text['text']); - - $type = self::getTypeForNodeName($child->nodeName); - - if ($child->nodeName == 'br') { - $text['text'] .= "\n"; - } elseif (($type != '') || in_array($child->nodeName, ['#text', 'code', 'a', 'p', 'span', 'u', 'img', 'summary', 'ul', 'blockquote', 'h3', 'ol'])) { - $text['text'] .= $child->textContent; - } else { - echo $element->ownerDocument->saveHTML($element) . "\n"; - die($child->nodeName . "\n"); - } - if (!empty($type)) { - $text['formatting'][] = ['start' => $start, 'end' => strlen($text['text']), 'type' => $type]; - } - } - return $text; - } - - static private function addQuoteBlock(DOMElement $element, int $uri_id, int $level, array $npf): array - { - $block = ['type' => 'text', 'subtype' => 'indented']; - - if ($level > 0) { - $block['indent_level'] = $level; - } - - $npf[] = $block; - - $npf = self::routeChildren($element, $uri_id, 0, $npf); - - return $npf; - } - - static private function addTextBlock(DOMElement $element, int $uri_id, int $level, array $npf, string $subtype = ''): array - { - if (empty($subtype) && ($element->textContent == $element->firstChild->textContent) && ($element->firstChild->nodeName != '#text')) { - return self::routeElement($element->firstChild, $uri_id, $level, $npf); - } - - $block = ['type' => 'text']; - - if (!empty($subtype)) { - $block['subtype'] = $subtype; - } - - $text = self::fetchText($element); - - $block['text'] = $text['text']; - $block['formatting'] = $text['formatting']; - - if (empty($subtype)) { - $type = self::getTypeForNodeName($element->nodeName); - if (!empty($type)) { - $block['formatting'][] = ['start' => 0, 'end' => strlen($block['text']), 'type' => $type]; - } - } - - if (empty($block['formatting'])) { - unset($block['formatting']); - } - - if ($level > 0) { - $block['indent_level'] = $level; - } - - $npf[] = $block; - - return $npf; - } - - static private function addListBlock(DOMElement $element, int $uri_id, int $level, array $npf, bool $ordered): array - { - foreach ($element->childNodes as $child) { - switch ($child->nodeName) { - case 'ul': - $npf = self::addListBlock($child, $uri_id, $level++, $npf, false); - case 'ol': - $npf = self::addListBlock($child, $uri_id, $level++, $npf, true); - case 'li': - $text = self::fetchText($child); - - $block = [ - 'type' => 'text', - 'subtype' => $ordered ? 'ordered-list-item' : 'unordered-list-item', - 'text' => $text['text'] - ]; - if ($level > 0) { - $block['indent_level'] = $level; - } - if (!empty($text['formatting'])) { - $block['formatting'] = $text['formatting']; - } - $npf[] = $block; - } - } - - return $npf; - } - - static private function addLinkBlock(int $uri_id, int $level, array $npf): array - { - foreach (Post\Media::getByURIId($uri_id, [Post\Media::HTML]) as $link) { - $host = parse_url($link['url'], PHP_URL_HOST); - if (in_array($host, ['www.youtube.com', 'youtu.be'])) { - $block = [ - 'type' => 'video', - 'provider' => 'youtube', - 'url' => $link['url'], - ]; - } elseif (in_array($host, ['vimeo.com'])) { - $block = [ - 'type' => 'video', - 'provider' => 'vimeo', - 'url' => $link['url'], - ]; - } elseif (in_array($host, ['open.spotify.com'])) { - $block = [ - 'type' => 'audio', - 'provider' => 'spotify', - 'url' => $link['url'], - ]; - } else { - $block = [ - 'type' => 'link', - 'url' => $link['url'], - ]; - if (!empty($link['name'])) { - $block['title'] = $link['name']; - } - if (!empty($link['description'])) { - $block['description'] = $link['description']; - } - if (!empty($link['author-name'])) { - $block['author'] = $link['author-name']; - } - if (!empty($link['publisher-name'])) { - $block['site_name'] = $link['publisher-name']; - } - } - - if ($level > 0) { - $block['indent_level'] = $level; - } - - $npf[] = self::addPoster($link, $block); - } - return $npf; - } } From 0c7be66d79598247000cf4fdf6ee353fd8bfb08e Mon Sep 17 00:00:00 2001 From: Michael Date: Sat, 1 Apr 2023 23:17:39 +0000 Subject: [PATCH 05/12] No call by reference anymore --- src/Content/Text/NPF.php | 88 ++++++++++++++++++++++++++-------------- 1 file changed, 57 insertions(+), 31 deletions(-) diff --git a/src/Content/Text/NPF.php b/src/Content/Text/NPF.php index 8441ce646c..a6297ba975 100644 --- a/src/Content/Text/NPF.php +++ b/src/Content/Text/NPF.php @@ -32,6 +32,8 @@ use Friendica\Model\Post; */ class NPF { + static $heading_subtype = []; + static public function fromBBCode(string $bbcode, int $uri_id): array { $bbcode = self::prepareBody($bbcode); @@ -47,18 +49,30 @@ class NPF return []; } + self::setHeadingSubStyles($doc); + $element = $doc->getElementsByTagName('body')->item(0); - echo $element->ownerDocument->saveHTML($element) . "\n"; +// echo $element->ownerDocument->saveHTML($element) . "\n"; - $npf = []; - $text = ''; - $formatting = []; - - self::routeChildren($element, $uri_id, true, [], $npf, $text, $formatting); + list($npf, $text, $formatting) = self::routeChildren($element, $uri_id, true, []); return self::addLinkBlockForUriId($uri_id, 0, $npf); } + static function setHeadingSubStyles($doc) + { + self::$heading_subtype = []; + foreach (['h1', 'h2', 'h3', 'h4', 'h5', 'h6'] as $element) { + if ($doc->getElementsByTagName($element)->count() > 0) { + if (empty(self::$heading_subtype)) { + self::$heading_subtype[$element] = 'heading1'; + } else { + self::$heading_subtype[$element] = 'heading2'; + } + } + } + } + static private function prepareBody(string $body): string { $shared = BBCode::fetchShareAttributes($body); @@ -100,10 +114,10 @@ class NPF return trim($body); } - static private function routeChildren(DOMElement $element, int $uri_id, bool $parse_structure, array $callstack, array &$npf, string &$text, array &$formatting) + static private function routeChildren(DOMElement $element, int $uri_id, bool $parse_structure, array $callstack, array $npf = [], string $text = '', array $formatting = []): array { if ($parse_structure && $text) { - self::addBlock($text, $formatting, $npf, $callstack); + list($npf, $text, $formatting) = self::addBlock($text, $formatting, $npf, $callstack); } $callstack[] = $element->nodeName; @@ -113,21 +127,21 @@ class NPF switch ($child->nodeName) { case 'b': case 'strong': - self::addFormatting($child, $uri_id, 'bold', $callstack, $npf, $text, $formatting); + list($npf, $text, $formatting) = self::addFormatting($child, $uri_id, 'bold', $callstack, $npf, $text, $formatting); break; case 'i': case 'em': - self::addFormatting($child, $uri_id, 'italic', $callstack, $npf, $text, $formatting); + list($npf, $text, $formatting) = self::addFormatting($child, $uri_id, 'italic', $callstack, $npf, $text, $formatting); break; case 's': - self::addFormatting($child, $uri_id, 'strikethrough', $callstack, $npf, $text, $formatting); + list($npf, $text, $formatting) = self::addFormatting($child, $uri_id, 'strikethrough', $callstack, $npf, $text, $formatting); break; case 'u': case 'span': - self::addFormatting($child, $uri_id, '', $callstack, $npf, $text, $formatting); + list($npf, $text, $formatting) = self::addFormatting($child, $uri_id, '', $callstack, $npf, $text, $formatting); break; case 'hr': @@ -148,7 +162,7 @@ class NPF case 'a': if ($text) { - self::addInlineLink($child, $uri_id, $callstack, $npf, $text, $formatting); + list($npf, $text, $formatting) = self::addInlineLink($child, $uri_id, $callstack, $npf, $text, $formatting); } else { $npf = self::addLinkBlock($child, $uri_id, $level, $npf); } @@ -173,7 +187,7 @@ class NPF case 'ul': case 'li': case 'details': - self::routeChildren($child, $uri_id, true, $callstack, $npf, $text, $formatting); + list($npf, $text, $formatting) = self::routeChildren($child, $uri_id, true, $callstack, $npf, $text, $formatting); break; default: @@ -184,8 +198,9 @@ class NPF } if ($parse_structure && $text) { - self::addBlock($text, $formatting, $npf, $callstack); + list($npf, $text, $formatting) = self::addBlock($text, $formatting, $npf, $callstack); } + return [$npf, $text, $formatting]; } static private function getLevelByCallstack($callstack): int @@ -199,7 +214,7 @@ class NPF return max(0, $level - 1); } - static private function getSubTypeByCallstack($callstack): string + static private function getSubTypeByCallstack($callstack, string $text): string { $subtype = ''; foreach ($callstack as $entry) { @@ -213,43 +228,49 @@ class NPF break; case 'h1': - $subtype = 'heading1'; + $subtype = self::$heading_subtype[$entry]; break; case 'h2': - $subtype = 'heading1'; + $subtype = self::$heading_subtype[$entry]; break; case 'h3': - $subtype = 'heading1'; + $subtype = self::$heading_subtype[$entry]; break; case 'h4': - $subtype = 'heading2'; + $subtype = self::$heading_subtype[$entry]; break; case 'h5': - $subtype = 'heading2'; + $subtype = self::$heading_subtype[$entry]; break; case 'h6': - $subtype = 'heading2'; + $subtype = self::$heading_subtype[$entry]; break; - case 'blockquote': + case 'blockquote': + $subtype = strlen($text) < 100 ? 'quote' : 'indented'; + break; + case 'pre': - case 'code': $subtype = 'indented'; break; + + case 'code': + $subtype = 'chat'; + break; } } return $subtype; } - static private function addFormatting(DOMElement $element, int $uri_id, string $type, array $callstack, array &$npf, string &$text, array &$formatting) + static private function addFormatting(DOMElement $element, int $uri_id, string $type, array $callstack, array $npf, string $text, array $formatting): array { $start = mb_strlen($text); - self::routeChildren($element, $uri_id, false, $callstack, $npf, $text, $formatting); + list($npf, $text, $formatting) = self::routeChildren($element, $uri_id, false, $callstack, $npf, $text, $formatting); if (!empty($type)) { $formatting[] = [ @@ -258,12 +279,13 @@ class NPF 'type' => $type ]; } + return [$npf, $text, $formatting]; } - static private function addInlineLink(DOMElement $element, int $uri_id, array $callstack, array &$npf, string &$text, array &$formatting) + static private function addInlineLink(DOMElement $element, int $uri_id, array $callstack, array $npf, string $text, array $formatting): array { $start = mb_strlen($text); - self::routeChildren($element, $uri_id, false, $callstack, $npf, $text, $formatting); + list($npf, $text, $formatting) = self::routeChildren($element, $uri_id, false, $callstack, $npf, $text, $formatting); $attributes = []; foreach ($element->attributes as $key => $attribute) { @@ -277,13 +299,14 @@ class NPF 'url' => $attributes['href'] ]; } + return [$npf, $text, $formatting]; } - static private function addBlock(string &$text, array &$formatting, array &$npf, array $callstack) + static private function addBlock(string $text, array $formatting, array $npf, array $callstack): array { $block = [ - 'callstack' => $callstack, 'type' => 'text', + 'subtype' => '', 'text' => $text, ]; @@ -296,14 +319,17 @@ class NPF $block['indent_level'] = $level; } - $subtype = self::getSubTypeByCallstack($callstack); + $subtype = self::getSubTypeByCallstack($callstack, $text); if ($subtype) { $block['subtype'] = $subtype; + } else { + unset($block['subtype']); } $npf[] = $block; $text = ''; $formatting = []; + return [$npf, $text, $formatting]; } static private function addPoster(array $media, array $block): array From 63d0c88c5a54f0e1c1c41e8a1ea729f4f8af0425 Mon Sep 17 00:00:00 2001 From: Michael Date: Sun, 2 Apr 2023 09:09:55 +0000 Subject: [PATCH 06/12] Deactivate the attributes --- src/Content/Text/NPF.php | 30 ++++++------------------------ 1 file changed, 6 insertions(+), 24 deletions(-) diff --git a/src/Content/Text/NPF.php b/src/Content/Text/NPF.php index a6297ba975..f57ea15a87 100644 --- a/src/Content/Text/NPF.php +++ b/src/Content/Text/NPF.php @@ -172,28 +172,9 @@ class NPF $npf = self::addImageBlock($child, $uri_id, $level, $npf); break; - case 'ol': - case 'div': - case 'h1': - case 'h2': - case 'h3': - case 'h4': - case 'h5': - case 'h6': - case 'blockquote': - case 'p': - case 'pre': - case 'code': - case 'ul': - case 'li': - case 'details': + default: list($npf, $text, $formatting) = self::routeChildren($child, $uri_id, true, $callstack, $npf, $text, $formatting); break; - - default: - print_r($npf); - print_r($callstack); - die($child . "\n"); } } @@ -310,9 +291,10 @@ class NPF 'text' => $text, ]; - if (!empty($formatting)) { - $block['formatting'] = $formatting; - } + // Deactivated since Tumblr has got issues with it + //if (!empty($formatting)) { + // $block['formatting'] = $formatting; + //} $level = self::getLevelByCallstack($callstack); if ($level > 0) { @@ -345,7 +327,7 @@ class NPF $poster['height'] = $media['preview-height']; } if (!empty($poster)) { - $block['poster'] = $poster; + $block['poster'] = [$poster]; } return $block; } From 6957c3e5c855a80195653a7983382cbebff5e320 Mon Sep 17 00:00:00 2001 From: Michael Date: Sun, 2 Apr 2023 19:53:58 +0000 Subject: [PATCH 07/12] Deactivated not working stuff --- src/Content/Text/NPF.php | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/src/Content/Text/NPF.php b/src/Content/Text/NPF.php index f57ea15a87..d60b3a8e79 100644 --- a/src/Content/Text/NPF.php +++ b/src/Content/Text/NPF.php @@ -52,7 +52,6 @@ class NPF self::setHeadingSubStyles($doc); $element = $doc->getElementsByTagName('body')->item(0); -// echo $element->ownerDocument->saveHTML($element) . "\n"; list($npf, $text, $formatting) = self::routeChildren($element, $uri_id, true, []); @@ -186,6 +185,9 @@ class NPF static private function getLevelByCallstack($callstack): int { + // Deactivated, since Tumblr seems to have issues with the indent level + return 0; + $level = 0; foreach ($callstack as $entry) { if (in_array($entry, ['ol', 'ul', 'blockquote'])) { @@ -489,12 +491,13 @@ class NPF $block = [ 'type' => 'text', 'text' => $element->textContent, - 'formatting' => [ - 'start' => 0, - 'end' => strlen($element->textContent), - 'type' => 'link', - 'url' => $attributes['href'] - ] + // Deactivated, since Tumblr has got issues with the formatting + //'formatting' => [ + // 'start' => 0, + // 'end' => strlen($element->textContent), + // 'type' => 'link', + // 'url' => $attributes['href'] + //] ]; } From 88cc788c9b8acc2143c28c5ca3bc4727709a4c05 Mon Sep 17 00:00:00 2001 From: Michael Date: Tue, 4 Apr 2023 22:21:27 +0000 Subject: [PATCH 08/12] Fixed NPF, new OAuth library added --- composer.json | 1 + composer.lock | 57 +++++++++++++++++++++++++++++++++++++++- src/Content/Text/NPF.php | 27 +++++++++---------- 3 files changed, 69 insertions(+), 16 deletions(-) diff --git a/composer.json b/composer.json index bb675c651e..0a31151fdd 100644 --- a/composer.json +++ b/composer.json @@ -35,6 +35,7 @@ "friendica/json-ld": "^1.0", "geekwright/po": "^2.0", "guzzlehttp/guzzle": "^6.5", + "guzzlehttp/oauth-subscriber": "^0.6", "kornrunner/blurhash": "^1.2", "league/html-to-markdown": "^4.8", "level-2/dice": "^4", diff --git a/composer.lock b/composer.lock index 280e979e9e..10fa756386 100644 --- a/composer.lock +++ b/composer.lock @@ -4,7 +4,7 @@ "Read more about it at https://getcomposer.org/doc/01-basic-usage.md#installing-dependencies", "This file is @generated automatically" ], - "content-hash": "456d14e3ad9be265c5c9e6172a0d18d8", + "content-hash": "c208d7f5176358ea157f109c1c7d68dd", "packages": [ { "name": "asika/simple-console", @@ -967,6 +967,61 @@ ], "time": "2022-06-20T22:16:07+00:00" }, + { + "name": "guzzlehttp/oauth-subscriber", + "version": "0.6.0", + "source": { + "type": "git", + "url": "https://github.com/guzzle/oauth-subscriber.git", + "reference": "8d6cab29f8397e5712d00a383eeead36108a3c1f" + }, + "dist": { + "type": "zip", + "url": "https://api.github.com/repos/guzzle/oauth-subscriber/zipball/8d6cab29f8397e5712d00a383eeead36108a3c1f", + "reference": "8d6cab29f8397e5712d00a383eeead36108a3c1f", + "shasum": "" + }, + "require": { + "guzzlehttp/guzzle": "^6.5|^7.2", + "guzzlehttp/psr7": "^1.7|^2.0", + "php": ">=5.5.0" + }, + "require-dev": { + "phpunit/phpunit": "~4.0|^9.3.3" + }, + "suggest": { + "ext-openssl": "Required to sign using RSA-SHA1" + }, + "type": "library", + "extra": { + "branch-alias": { + "dev-master": "0.6-dev" + } + }, + "autoload": { + "psr-4": { + "GuzzleHttp\\Subscriber\\Oauth\\": "src" + } + }, + "notification-url": "https://packagist.org/downloads/", + "license": [ + "MIT" + ], + "authors": [ + { + "name": "Michael Dowling", + "email": "mtdowling@gmail.com", + "homepage": "https://github.com/mtdowling" + } + ], + "description": "Guzzle OAuth 1.0 subscriber", + "homepage": "http://guzzlephp.org/", + "keywords": [ + "Guzzle", + "oauth" + ], + "time": "2021-07-13T12:01:32+00:00" + }, { "name": "guzzlehttp/promises", "version": "1.5.2", diff --git a/src/Content/Text/NPF.php b/src/Content/Text/NPF.php index d60b3a8e79..a919562c74 100644 --- a/src/Content/Text/NPF.php +++ b/src/Content/Text/NPF.php @@ -185,9 +185,6 @@ class NPF static private function getLevelByCallstack($callstack): int { - // Deactivated, since Tumblr seems to have issues with the indent level - return 0; - $level = 0; foreach ($callstack as $entry) { if (in_array($entry, ['ol', 'ul', 'blockquote'])) { @@ -235,7 +232,7 @@ class NPF break; case 'blockquote': - $subtype = strlen($text) < 100 ? 'quote' : 'indented'; + $subtype = mb_strlen($text) < 100 ? 'quote' : 'indented'; break; case 'pre': @@ -293,10 +290,9 @@ class NPF 'text' => $text, ]; - // Deactivated since Tumblr has got issues with it - //if (!empty($formatting)) { - // $block['formatting'] = $formatting; - //} + if (!empty($formatting)) { + $block['formatting'] = $formatting; + } $level = self::getLevelByCallstack($callstack); if ($level > 0) { @@ -491,13 +487,14 @@ class NPF $block = [ 'type' => 'text', 'text' => $element->textContent, - // Deactivated, since Tumblr has got issues with the formatting - //'formatting' => [ - // 'start' => 0, - // 'end' => strlen($element->textContent), - // 'type' => 'link', - // 'url' => $attributes['href'] - //] + 'formatting' => [ + [ + 'start' => 0, + 'end' => mb_strlen($element->textContent), + 'type' => 'link', + 'url' => $attributes['href'] + ] + ] ]; } From e807da9d66da90b55494719e5a23f31530abbc50 Mon Sep 17 00:00:00 2001 From: Michael Date: Wed, 5 Apr 2023 20:08:53 +0000 Subject: [PATCH 09/12] Media elements added --- src/Content/Text/BBCode.php | 18 +++- src/Content/Text/NPF.php | 170 +++++++++++++++++++++++++++++------- 2 files changed, 154 insertions(+), 34 deletions(-) diff --git a/src/Content/Text/BBCode.php b/src/Content/Text/BBCode.php index 93dbb11760..715a2ba8f1 100644 --- a/src/Content/Text/BBCode.php +++ b/src/Content/Text/BBCode.php @@ -57,6 +57,7 @@ class BBCode const DIASPORA = 3; const CONNECTORS = 4; const TWITTER_API = 5; + const NPF = 6; const OSTATUS = 7; const TWITTER = 8; const BACKLINK = 8; @@ -1355,7 +1356,9 @@ class BBCode /// @todo Have a closer look at the different html modes // Handle attached links or videos - if (in_array($simple_html, [self::MASTODON_API, self::TWITTER_API, self::ACTIVITYPUB])) { + if ($simple_html == self::NPF) { + $text = self::removeAttachment($text); + } elseif (in_array($simple_html, [self::MASTODON_API, self::TWITTER_API, self::ACTIVITYPUB])) { $text = self::replaceAttachment($text); } elseif (!in_array($simple_html, [self::INTERNAL, self::EXTERNAL, self::CONNECTORS])) { $text = self::replaceAttachment($text, true); @@ -1605,7 +1608,18 @@ class BBCode // Simplify "video" element $text = preg_replace('(\[video[^\]]*?\ssrc\s?=\s?([^\s\]]+)[^\]]*?\].*?\[/video\])ism', '[video]$1[/video]', $text); - if ($try_oembed) { + if ($simple_html == self::NPF) { + $text = preg_replace( + "/\[video\](.*?)\[\/video\]/ism", + '

', + $text + ); + $text = preg_replace( + "/\[audio\](.*?)\[\/audio\]/ism", + '

', + $text + ); + } elseif ($try_oembed) { // html5 video and audio $text = preg_replace( "/\[video\](.*?\.(ogg|ogv|oga|ogm|webm|mp4).*?)\[\/video\]/ism", diff --git a/src/Content/Text/NPF.php b/src/Content/Text/NPF.php index a919562c74..9ae5f0d03c 100644 --- a/src/Content/Text/NPF.php +++ b/src/Content/Text/NPF.php @@ -34,11 +34,18 @@ class NPF { static $heading_subtype = []; + /** + * Convert BBCode into NPF (Tumblr Neue Post Format) + * + * @param string $bbcode + * @param integer $uri_id + * @return array NPF + */ static public function fromBBCode(string $bbcode, int $uri_id): array { $bbcode = self::prepareBody($bbcode); - $html = BBCode::convert($bbcode, false, BBCode::CONNECTORS); + $html = BBCode::convert($bbcode, false, BBCode::NPF); if (empty($html)) { return []; } @@ -58,7 +65,13 @@ class NPF return self::addLinkBlockForUriId($uri_id, 0, $npf); } - static function setHeadingSubStyles($doc) + /** + * Fetch the heading types + * + * @param DOMDocument $doc + * @return void + */ + static function setHeadingSubStyles(DOMDocument $doc) { self::$heading_subtype = []; foreach (['h1', 'h2', 'h3', 'h4', 'h5', 'h6'] as $element) { @@ -72,47 +85,61 @@ class NPF } } - static private function prepareBody(string $body): string + /** + * Prepare the BBCode for the NPF conversion + * + * @param string $bbcode + * @return string + */ + static private function prepareBody(string $bbcode): string { - $shared = BBCode::fetchShareAttributes($body); + $shared = BBCode::fetchShareAttributes($bbcode); if (!empty($shared)) { - $body = $shared['shared']; + $bbcode = $shared['shared']; } - $body = BBCode::removeAttachment($body); + $bbcode = preg_replace("/\[img\=([0-9]*)x([0-9]*)\](.*?)\[\/img\]/ism", '[img]$3[/img]', $bbcode); - $body = preg_replace("/\[img\=([0-9]*)x([0-9]*)\](.*?)\[\/img\]/ism", '[img]$3[/img]', $body); - - if (preg_match_all("#\[url=([^\]]+?)\]\s*\[img=([^\[\]]*)\]([^\[\]]*)\[\/img\]\s*\[/url\]#ism", $body, $pictures, PREG_SET_ORDER)) { + if (preg_match_all("#\[url=([^\]]+?)\]\s*\[img=([^\[\]]*)\]([^\[\]]*)\[\/img\]\s*\[/url\]#ism", $bbcode, $pictures, PREG_SET_ORDER)) { foreach ($pictures as $picture) { if (preg_match('#/photo/.*-[01]\.#ism', $picture[2]) && (preg_match('#/photo/.*-0\.#ism', $picture[1]) || preg_match('#/photos/.*/image/#ism', $picture[1]))) { - $body = str_replace($picture[0], "\n\n[img=" . str_replace('-1.', '-0.', $picture[2]) . "]" . $picture[3] . "[/img]\n\n", $body); + $bbcode = str_replace($picture[0], "\n\n[img=" . str_replace('-1.', '-0.', $picture[2]) . "]" . $picture[3] . "[/img]\n\n", $bbcode); } } } - $body = preg_replace("/\[img\=(.*?)\](.*?)\[\/img\]/ism", "\n\n[img=$1]$2[/img]\n\n", $body); + $bbcode = preg_replace("/\[img\=(.*?)\](.*?)\[\/img\]/ism", "\n\n[img=$1]$2[/img]\n\n", $bbcode); - if (preg_match_all("#\[url=([^\]]+?)\]\s*\[img\]([^\[]+?)\[/img\]\s*\[/url\]#ism", $body, $pictures, PREG_SET_ORDER)) { + if (preg_match_all("#\[url=([^\]]+?)\]\s*\[img\]([^\[]+?)\[/img\]\s*\[/url\]#ism", $bbcode, $pictures, PREG_SET_ORDER)) { foreach ($pictures as $picture) { if (preg_match('#/photo/.*-[01]\.#ism', $picture[2]) && (preg_match('#/photo/.*-0\.#ism', $picture[1]) || preg_match('#/photos/.*/image/#ism', $picture[1]))) { - $body = str_replace($picture[0], "\n\n[img]" . str_replace('-1.', '-0.', $picture[2]) . "[/img]\n\n", $body); + $bbcode = str_replace($picture[0], "\n\n[img]" . str_replace('-1.', '-0.', $picture[2]) . "[/img]\n\n", $bbcode); } } } - $body = preg_replace("/\[img\](.*?)\[\/img\]/ism", "\n\n[img]$1[/img]\n\n", $body); - $body = preg_replace("/\[audio\](.*?)\[\/audio\]/ism", "\n\n[audio]$1[/audio]\n\n", $body); - $body = preg_replace("/\[video\](.*?)\[\/video\]/ism", "\n\n[video]$1[/video]\n\n", $body); + $bbcode = preg_replace("/\[img\](.*?)\[\/img\]/ism", "\n\n[img]$1[/img]\n\n", $bbcode); do { - $oldbody = $body; - $body = str_replace(["\n\n\n"], ["\n\n"], $body); - } while ($oldbody != $body); + $oldbbcode = $bbcode; + $bbcode = str_replace(["\n\n\n"], ["\n\n"], $bbcode); + } while ($oldbbcode != $bbcode); - return trim($body); + return trim($bbcode); } + /** + * Walk recursively through the HTML + * + * @param DOMElement $element + * @param integer $uri_id + * @param boolean $parse_structure + * @param array $callstack + * @param array $npf + * @param string $text + * @param array $formatting + * @return array + */ static private function routeChildren(DOMElement $element, int $uri_id, bool $parse_structure, array $callstack, array $npf = [], string $text = '', array $formatting = []): array { if ($parse_structure && $text) { @@ -160,18 +187,19 @@ class NPF break; case 'a': - if ($text) { - list($npf, $text, $formatting) = self::addInlineLink($child, $uri_id, $callstack, $npf, $text, $formatting); - } else { - $npf = self::addLinkBlock($child, $uri_id, $level, $npf); - } + list($npf, $text, $formatting) = self::addInlineLink($child, $uri_id, $callstack, $npf, $text, $formatting); break; case 'img': $npf = self::addImageBlock($child, $uri_id, $level, $npf); break; - default: + case 'audio': + case 'video': + $npf = self::addMediaBlock($child, $uri_id, $level, $npf); + break; + + default: list($npf, $text, $formatting) = self::routeChildren($child, $uri_id, true, $callstack, $npf, $text, $formatting); break; } @@ -183,7 +211,13 @@ class NPF return [$npf, $text, $formatting]; } - static private function getLevelByCallstack($callstack): int + /** + * Return the correct indent level + * + * @param array $callstack + * @return integer + */ + static private function getLevelByCallstack(array $callstack): int { $level = 0; foreach ($callstack as $entry) { @@ -194,7 +228,14 @@ class NPF return max(0, $level - 1); } - static private function getSubTypeByCallstack($callstack, string $text): string + /** + * Detect the subtype via the HTML element callstack + * + * @param array $callstack + * @param string $text + * @return string + */ + static private function getSubTypeByCallstack(array $callstack, string $text): string { $subtype = ''; foreach ($callstack as $entry) { @@ -247,6 +288,18 @@ class NPF return $subtype; } + /** + * Add formatting for a text block + * + * @param DOMElement $element + * @param integer $uri_id + * @param string $type + * @param array $callstack + * @param array $npf + * @param string $text + * @param array $formatting + * @return array + */ static private function addFormatting(DOMElement $element, int $uri_id, string $type, array $callstack, array $npf, string $text, array $formatting): array { $start = mb_strlen($text); @@ -262,6 +315,17 @@ class NPF return [$npf, $text, $formatting]; } + /** + * Add an inline link for a text block + * + * @param DOMElement $element + * @param integer $uri_id + * @param array $callstack + * @param array $npf + * @param string $text + * @param array $formatting + * @return array + */ static private function addInlineLink(DOMElement $element, int $uri_id, array $callstack, array $npf, string $text, array $formatting): array { $start = mb_strlen($text); @@ -282,6 +346,15 @@ class NPF return [$npf, $text, $formatting]; } + /** + * Add a text block + * + * @param string $text + * @param array $formatting + * @param array $npf + * @param array $callstack + * @return array + */ static private function addBlock(string $text, array $formatting, array $npf, array $callstack): array { $block = [ @@ -312,6 +385,13 @@ class NPF return [$npf, $text, $formatting]; } + /** + * Add a block for a preview picture + * + * @param array $media + * @param array $block + * @return array + */ static private function addPoster(array $media, array $block): array { $poster = []; @@ -330,6 +410,14 @@ class NPF return $block; } + /** + * Add a link block from the HTML attachment of a given post uri-id + * + * @param integer $uri_id + * @param integer $level + * @param array $npf + * @return array + */ static private function addLinkBlockForUriId(int $uri_id, int $level, array $npf): array { foreach (Post\Media::getByURIId($uri_id, [Post\Media::HTML]) as $link) { @@ -380,6 +468,15 @@ class NPF return $npf; } + /** + * Add an image block + * + * @param DOMElement $element + * @param integer $uri_id + * @param integer $level + * @param array $npf + * @return array + */ static private function addImageBlock(DOMElement $element, int $uri_id, int $level, array $npf): array { $attributes = []; @@ -440,17 +537,26 @@ class NPF return $npf; } - static private function addLinkBlock(DOMElement $element, int $uri_id, int $level, array $npf): array + /** + * Add an audio or video block + * + * @param DOMElement $element + * @param integer $uri_id + * @param integer $level + * @param array $npf + * @return array + */ + static private function addMediaBlock(DOMElement $element, int $uri_id, int $level, array $npf): array { $attributes = []; foreach ($element->attributes as $key => $attribute) { $attributes[$key] = trim($attribute->value); } - if (empty($attributes['href'])) { + if (empty($attributes['src'])) { return $npf; } - $media = Post\Media::getByURL($uri_id, $attributes['href'], [Post\Media::AUDIO, Post\Media::VIDEO]); + $media = Post\Media::getByURL($uri_id, $attributes['src'], [Post\Media::AUDIO, Post\Media::VIDEO]); if (!empty($media)) { switch ($media['type']) { case Post\Media::AUDIO: @@ -492,7 +598,7 @@ class NPF 'start' => 0, 'end' => mb_strlen($element->textContent), 'type' => 'link', - 'url' => $attributes['href'] + 'url' => $attributes['src'] ] ] ]; From 2f7e22c4f754cd43a9de1ec2799544da0d7c9411 Mon Sep 17 00:00:00 2001 From: Michael Date: Thu, 6 Apr 2023 19:53:52 +0000 Subject: [PATCH 10/12] Fix code standards --- src/Content/Text/NPF.php | 75 ++++++++++++++++++++-------------------- 1 file changed, 38 insertions(+), 37 deletions(-) diff --git a/src/Content/Text/NPF.php b/src/Content/Text/NPF.php index 9ae5f0d03c..ea9304306e 100644 --- a/src/Content/Text/NPF.php +++ b/src/Content/Text/NPF.php @@ -32,7 +32,7 @@ use Friendica\Model\Post; */ class NPF { - static $heading_subtype = []; + private static $heading_subtype = []; /** * Convert BBCode into NPF (Tumblr Neue Post Format) @@ -41,7 +41,7 @@ class NPF * @param integer $uri_id * @return array NPF */ - static public function fromBBCode(string $bbcode, int $uri_id): array + public static function fromBBCode(string $bbcode, int $uri_id): array { $bbcode = self::prepareBody($bbcode); @@ -51,6 +51,7 @@ class NPF } $doc = new DOMDocument(); + $doc->formatOutput = true; if (!@$doc->loadHTML(mb_convert_encoding($html, 'HTML-ENTITIES', 'UTF-8'))) { return []; @@ -71,7 +72,7 @@ class NPF * @param DOMDocument $doc * @return void */ - static function setHeadingSubStyles(DOMDocument $doc) + private static function setHeadingSubStyles(DOMDocument $doc) { self::$heading_subtype = []; foreach (['h1', 'h2', 'h3', 'h4', 'h5', 'h6'] as $element) { @@ -91,7 +92,7 @@ class NPF * @param string $bbcode * @return string */ - static private function prepareBody(string $bbcode): string + private static function prepareBody(string $bbcode): string { $shared = BBCode::fetchShareAttributes($bbcode); if (!empty($shared)) { @@ -122,7 +123,7 @@ class NPF do { $oldbbcode = $bbcode; - $bbcode = str_replace(["\n\n\n"], ["\n\n"], $bbcode); + $bbcode = str_replace(["\n\n\n"], ["\n\n"], $bbcode); } while ($oldbbcode != $bbcode); return trim($bbcode); @@ -140,14 +141,14 @@ class NPF * @param array $formatting * @return array */ - static private function routeChildren(DOMElement $element, int $uri_id, bool $parse_structure, array $callstack, array $npf = [], string $text = '', array $formatting = []): array + private static function routeChildren(DOMElement $element, int $uri_id, bool $parse_structure, array $callstack, array $npf = [], string $text = '', array $formatting = []): array { if ($parse_structure && $text) { list($npf, $text, $formatting) = self::addBlock($text, $formatting, $npf, $callstack); } $callstack[] = $element->nodeName; - $level = self::getLevelByCallstack($callstack); + $level = self::getLevelByCallstack($callstack); foreach ($element->childNodes as $child) { switch ($child->nodeName) { @@ -155,12 +156,12 @@ class NPF case 'strong': list($npf, $text, $formatting) = self::addFormatting($child, $uri_id, 'bold', $callstack, $npf, $text, $formatting); break; - + case 'i': case 'em': list($npf, $text, $formatting) = self::addFormatting($child, $uri_id, 'italic', $callstack, $npf, $text, $formatting); break; - + case 's': list($npf, $text, $formatting) = self::addFormatting($child, $uri_id, 'strikethrough', $callstack, $npf, $text, $formatting); break; @@ -176,7 +177,7 @@ class NPF $text .= "\n"; } break; - + case '#text': $text .= $child->textContent; break; @@ -198,7 +199,7 @@ class NPF case 'video': $npf = self::addMediaBlock($child, $uri_id, $level, $npf); break; - + default: list($npf, $text, $formatting) = self::routeChildren($child, $uri_id, true, $callstack, $npf, $text, $formatting); break; @@ -217,7 +218,7 @@ class NPF * @param array $callstack * @return integer */ - static private function getLevelByCallstack(array $callstack): int + private static function getLevelByCallstack(array $callstack): int { $level = 0; foreach ($callstack as $entry) { @@ -235,7 +236,7 @@ class NPF * @param string $text * @return string */ - static private function getSubTypeByCallstack(array $callstack, string $text): string + private static function getSubTypeByCallstack(array $callstack, string $text): string { $subtype = ''; foreach ($callstack as $entry) { @@ -251,28 +252,28 @@ class NPF case 'h1': $subtype = self::$heading_subtype[$entry]; break; - + case 'h2': $subtype = self::$heading_subtype[$entry]; break; - + case 'h3': $subtype = self::$heading_subtype[$entry]; break; - + case 'h4': $subtype = self::$heading_subtype[$entry]; break; - + case 'h5': $subtype = self::$heading_subtype[$entry]; break; - + case 'h6': $subtype = self::$heading_subtype[$entry]; break; - - case 'blockquote': + + case 'blockquote': $subtype = mb_strlen($text) < 100 ? 'quote' : 'indented'; break; @@ -300,9 +301,10 @@ class NPF * @param array $formatting * @return array */ - static private function addFormatting(DOMElement $element, int $uri_id, string $type, array $callstack, array $npf, string $text, array $formatting): array + private static function addFormatting(DOMElement $element, int $uri_id, string $type, array $callstack, array $npf, string $text, array $formatting): array { $start = mb_strlen($text); + list($npf, $text, $formatting) = self::routeChildren($element, $uri_id, false, $callstack, $npf, $text, $formatting); if (!empty($type)) { @@ -326,9 +328,10 @@ class NPF * @param array $formatting * @return array */ - static private function addInlineLink(DOMElement $element, int $uri_id, array $callstack, array $npf, string $text, array $formatting): array + private static function addInlineLink(DOMElement $element, int $uri_id, array $callstack, array $npf, string $text, array $formatting): array { $start = mb_strlen($text); + list($npf, $text, $formatting) = self::routeChildren($element, $uri_id, false, $callstack, $npf, $text, $formatting); $attributes = []; @@ -355,12 +358,12 @@ class NPF * @param array $callstack * @return array */ - static private function addBlock(string $text, array $formatting, array $npf, array $callstack): array + private static function addBlock(string $text, array $formatting, array $npf, array $callstack): array { $block = [ - 'type' => 'text', - 'subtype' => '', - 'text' => $text, + 'type' => 'text', + 'subtype' => '', + 'text' => $text, ]; if (!empty($formatting)) { @@ -380,9 +383,7 @@ class NPF } $npf[] = $block; - $text = ''; - $formatting = []; - return [$npf, $text, $formatting]; + return [$npf, '', []]; } /** @@ -392,7 +393,7 @@ class NPF * @param array $block * @return array */ - static private function addPoster(array $media, array $block): array + private static function addPoster(array $media, array $block): array { $poster = []; if (!empty($media['preview'])) { @@ -418,7 +419,7 @@ class NPF * @param array $npf * @return array */ - static private function addLinkBlockForUriId(int $uri_id, int $level, array $npf): array + private static function addLinkBlockForUriId(int $uri_id, int $level, array $npf): array { foreach (Post\Media::getByURIId($uri_id, [Post\Media::HTML]) as $link) { $host = parse_url($link['url'], PHP_URL_HOST); @@ -477,7 +478,7 @@ class NPF * @param array $npf * @return array */ - static private function addImageBlock(DOMElement $element, int $uri_id, int $level, array $npf): array + private static function addImageBlock(DOMElement $element, int $uri_id, int $level, array $npf): array { $attributes = []; foreach ($element->attributes as $key => $attribute) { @@ -546,7 +547,7 @@ class NPF * @param array $npf * @return array */ - static private function addMediaBlock(DOMElement $element, int $uri_id, int $level, array $npf): array + private static function addMediaBlock(DOMElement $element, int $uri_id, int $level, array $npf): array { $attributes = []; foreach ($element->attributes as $key => $attribute) { @@ -561,7 +562,7 @@ class NPF switch ($media['type']) { case Post\Media::AUDIO: $block = [ - 'type' => 'audio', + 'type' => 'audio', 'media' => [ 'type' => $media['mimetype'], 'url' => $media['url'], @@ -579,7 +580,7 @@ class NPF case Post\Media::VIDEO: $block = [ - 'type' => 'video', + 'type' => 'video', 'media' => [ 'type' => $media['mimetype'], 'url' => $media['url'], @@ -591,8 +592,8 @@ class NPF } } else { $block = [ - 'type' => 'text', - 'text' => $element->textContent, + 'type' => 'text', + 'text' => $element->textContent, 'formatting' => [ [ 'start' => 0, From db85180d1e023c12682ea9b898a54459a3696c05 Mon Sep 17 00:00:00 2001 From: Michael Date: Thu, 6 Apr 2023 19:55:41 +0000 Subject: [PATCH 11/12] Fix more standards --- src/Content/Text/NPF.php | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/Content/Text/NPF.php b/src/Content/Text/NPF.php index ea9304306e..2f0b360837 100644 --- a/src/Content/Text/NPF.php +++ b/src/Content/Text/NPF.php @@ -361,9 +361,9 @@ class NPF private static function addBlock(string $text, array $formatting, array $npf, array $callstack): array { $block = [ - 'type' => 'text', - 'subtype' => '', - 'text' => $text, + 'type' => 'text', + 'subtype' => '', + 'text' => $text, ]; if (!empty($formatting)) { From 9d3ca4717175b6c04f95d2a54e679b9ee6207880 Mon Sep 17 00:00:00 2001 From: Michael Date: Thu, 6 Apr 2023 20:00:56 +0000 Subject: [PATCH 12/12] Fix spaces --- composer.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/composer.json b/composer.json index 0a31151fdd..6e0d9f984e 100644 --- a/composer.json +++ b/composer.json @@ -35,7 +35,7 @@ "friendica/json-ld": "^1.0", "geekwright/po": "^2.0", "guzzlehttp/guzzle": "^6.5", - "guzzlehttp/oauth-subscriber": "^0.6", + "guzzlehttp/oauth-subscriber": "^0.6", "kornrunner/blurhash": "^1.2", "league/html-to-markdown": "^4.8", "level-2/dice": "^4",