No link guessing for DFRN / Don't show redundant data

This commit is contained in:
Michael 2021-05-04 05:18:03 +00:00
parent 02ec019ddc
commit b3e511eb0c
3 changed files with 74 additions and 20 deletions

View File

@ -172,7 +172,7 @@ class Item
Logger::info('Updating per single row method', ['fields' => $fields, 'condition' => $condition]); Logger::info('Updating per single row method', ['fields' => $fields, 'condition' => $condition]);
$items = Post::select(['id', 'origin', 'uri-id', 'uid'], $condition); $items = Post::select(['id', 'origin', 'uri-id', 'uid', 'author-network'], $condition);
$notify_items = []; $notify_items = [];
@ -180,6 +180,10 @@ class Item
if (!empty($fields['body'])) { if (!empty($fields['body'])) {
Post\Media::insertFromAttachmentData($item['uri-id'], $fields['body']); Post\Media::insertFromAttachmentData($item['uri-id'], $fields['body']);
if ($item['author-network'] != Protocol::DFRN) {
Post\Media::insertFromRelevantUrl($item['uri-id'], $fields['body']);
}
$content_fields = ['raw-body' => trim($fields['raw-body'] ?? $fields['body'])]; $content_fields = ['raw-body' => trim($fields['raw-body'] ?? $fields['body'])];
// Remove all media attachments from the body and store them in the post-media table // Remove all media attachments from the body and store them in the post-media table
@ -967,12 +971,16 @@ class Item
unset($item['attachments']); unset($item['attachments']);
} }
Post\Media::insertFromAttachmentData($item['uri-id'], $item['body']);
if (!DBA::exists('contact', ['id' => $item['author-id'], 'network' => Protocol::DFRN])) {
Post\Media::insertFromRelevantUrl($item['uri-id'], $item['body']);
}
// Remove all media attachments from the body and store them in the post-media table // Remove all media attachments from the body and store them in the post-media table
$item['raw-body'] = Post\Media::insertFromBody($item['uri-id'], $item['raw-body']); $item['raw-body'] = Post\Media::insertFromBody($item['uri-id'], $item['raw-body']);
$item['raw-body'] = self::setHashtags($item['raw-body']); $item['raw-body'] = self::setHashtags($item['raw-body']);
Post\Media::insertFromAttachmentData($item['uri-id'], $item['body']);
// Check for hashtags in the body and repair or add hashtag links // Check for hashtags in the body and repair or add hashtag links
$item['body'] = self::setHashtags($item['body']); $item['body'] = self::setHashtags($item['body']);
@ -2891,28 +2899,48 @@ class Item
$data['preview'] = $attachment['preview'] ?? ''; $data['preview'] = $attachment['preview'] ?? '';
} }
} }
if (!empty($data['description']) && !empty($content)) {
similar_text($data['description'], $content, $percent);
} else {
$percent = 0;
}
if (!empty($data['description']) && (($data['title'] == $data['description']) || ($percent > 95) || (strpos($content, $data['description']) !== false))) {
$data['description'] = '';
}
} elseif (preg_match("/.*(\[attachment.*?\].*?\[\/attachment\]).*/ism", $body, $match)) { } elseif (preg_match("/.*(\[attachment.*?\].*?\[\/attachment\]).*/ism", $body, $match)) {
$data = BBCode::getAttachmentData($match[1]); $data = BBCode::getAttachmentData($match[1]);
} }
DI::profiler()->saveTimestamp($stamp1, 'rendering'); DI::profiler()->saveTimestamp($stamp1, 'rendering');
if (isset($data['url']) && !in_array($data['url'], $ignore_links)) { if (isset($data['url']) && !in_array($data['url'], $ignore_links)) {
$parts = parse_url($data['url']); if (!empty($data['description']) || !empty($data['image'] || !empty($data['preview']))) {
if (!empty($parts['scheme']) && !empty($parts['host'])) { $parts = parse_url($data['url']);
if (empty($data['provider_name'])) { if (!empty($parts['scheme']) && !empty($parts['host'])) {
$data['provider_name'] = $parts['host']; if (empty($data['provider_name'])) {
} $data['provider_name'] = $parts['host'];
if (empty($data['provider_url']) || empty(parse_url($data['provider_url'], PHP_URL_SCHEME))) { }
$data['provider_url'] = $parts['scheme'] . '://' . $parts['host']; if (empty($data['provider_url']) || empty(parse_url($data['provider_url'], PHP_URL_SCHEME))) {
$data['provider_url'] = $parts['scheme'] . '://' . $parts['host'];
if (!empty($parts['port'])) { if (!empty($parts['port'])) {
$data['provider_url'] .= ':' . $parts['port']; $data['provider_url'] .= ':' . $parts['port'];
}
} }
} }
// @todo Use a template
$rendered = BBCode::convertAttachment('', BBCode::INTERNAL, false, $data);
} elseif (!self::containsLink($content, $data['url'])) {
$rendered = Renderer::replaceMacros(Renderer::getMarkupTemplate('content/link.tpl'), [
'$url' => $data['url'],
'$title' => $data['title'],
]);
} else {
return $content;
} }
// @todo Use a template
$rendered = BBCode::convertAttachment('', BBCode::INTERNAL, false, $data);
if ($shared) { if ($shared) {
return str_replace(BBCode::BOTTOM_ANCHOR, BBCode::BOTTOM_ANCHOR . $rendered, $content); return str_replace(BBCode::BOTTOM_ANCHOR, BBCode::BOTTOM_ANCHOR . $rendered, $content);
} else { } else {

View File

@ -341,12 +341,6 @@ class Media
} }
} }
$url = PageInfo::getRelevantUrlFromBody($body);
if (!empty($url)) {
Logger::debug('Got page url', ['url' => $url]);
$attachments[$url] = ['uri-id' => $uriid, 'type' => self::UNKNOWN, 'url' => $url];
}
foreach ($attachments as $attachment) { foreach ($attachments as $attachment) {
// Only store attachments that are part of the unshared body // Only store attachments that are part of the unshared body
if (strpos($unshared_body, $attachment['url']) !== false) { if (strpos($unshared_body, $attachment['url']) !== false) {
@ -357,6 +351,37 @@ class Media
return trim($body); return trim($body);
} }
/**
* Add media links from a relevant url in the body
*
* @param integer $uriid
* @param string $body
*/
public static function insertFromRelevantUrl(int $uriid, string $body)
{
// Don't look at the shared content
$body = preg_replace("/\s*\[share .*?\].*?\[\/share\]\s*/ism", '', $body);
// Remove all hashtags and mentions
$body = preg_replace("/([#@!])\[url\=(.*?)\](.*?)\[\/url\]/ism", '', $body);
// Search for pure links
if (preg_match_all("/\[url\](https?:.*?)\[\/url\]/ism", $body, $matches)) {
foreach ($matches[1] as $url) {
Logger::info('Got page url (link without description)', ['uri-id' => $uriid, 'url' => $url]);
self::insert(['uri-id' => $uriid, 'type' => self::UNKNOWN, 'url' => $url]);
}
}
// Search for links with descriptions
if (preg_match_all("/\[url\=(https?:.*?)\].*?\[\/url\]/ism", $body, $matches)) {
foreach ($matches[1] as $url) {
Logger::info('Got page url (link with description)', ['uri-id' => $uriid, 'url' => $url]);
self::insert(['uri-id' => $uriid, 'type' => self::UNKNOWN, 'url' => $url]);
}
}
}
/** /**
* Add media links from the attachment field * Add media links from the attachment field
* *

View File

@ -0,0 +1 @@
<p><a href="{{$url}}">{{if $title}}{{$title}}{{else}}{{$url}}{{/if}}</a></p>