Improved mimeType detection and setting of the "type" field

This commit is contained in:
Michael 2021-04-14 19:12:01 +00:00
parent 0b239f368b
commit b65f73e348
3 changed files with 104 additions and 38 deletions

View File

@ -2684,7 +2684,12 @@ class Item
{ {
$leading = ''; $leading = '';
$trailing = ''; $trailing = '';
foreach (Post\Media::getByURIId($item['uri-id'], [Post\Media::DOCUMENT, Post\Media::TORRENT, Post\Media::UNKNOWN]) as $attachment) { foreach (Post\Media::getByURIId($item['uri-id'], [Post\Media::AUDIO, Post\Media::VIDEO,
Post\Media::DOCUMENT, Post\Media::TORRENT, Post\Media::UNKNOWN]) as $attachment) {
if (in_array($attachment['type'], [Post\Media::AUDIO, Post\Media::VIDEO]) && strpos($item['body'], $attachment['url'])) {
continue;
}
$mime = $attachment['mimetype']; $mime = $attachment['mimetype'];
$author = ['uid' => 0, 'id' => $item['author-id'], $author = ['uid' => 0, 'id' => $item['author-id'],

View File

@ -40,7 +40,12 @@ class Media
const IMAGE = 1; const IMAGE = 1;
const VIDEO = 2; const VIDEO = 2;
const AUDIO = 3; const AUDIO = 3;
const TEXT = 4;
const APPLICATION = 5;
const TORRENT = 16; const TORRENT = 16;
const HTML = 17;
const XML = 18;
const PLAIN = 19;
const DOCUMENT = 128; const DOCUMENT = 128;
/** /**
@ -51,7 +56,7 @@ class Media
*/ */
public static function insert(array $media, bool $force = false) public static function insert(array $media, bool $force = false)
{ {
if (empty($media['url']) || empty($media['uri-id']) || empty($media['type'])) { if (empty($media['url']) || empty($media['uri-id']) || !isset($media['type'])) {
Logger::warning('Incomplete media data', ['media' => $media]); Logger::warning('Incomplete media data', ['media' => $media]);
return; return;
} }
@ -64,12 +69,7 @@ class Media
return; return;
} }
$fields = ['mimetype', 'height', 'width', 'size', 'preview', 'preview-height', 'preview-width', 'description']; $media = self::unsetEmptyFields($media);
foreach ($fields as $field) {
if (empty($media[$field])) {
unset($media[$field]);
}
}
// We are storing as fast as possible to avoid duplicated network requests // We are storing as fast as possible to avoid duplicated network requests
// when fetching additional information for pictures and other content. // when fetching additional information for pictures and other content.
@ -78,6 +78,7 @@ class Media
$stored = $media; $stored = $media;
$media = self::fetchAdditionalData($media); $media = self::fetchAdditionalData($media);
$media = self::unsetEmptyFields($media);
if (array_diff_assoc($media, $stored)) { if (array_diff_assoc($media, $stored)) {
$result = DBA::insert('post-media', $media, Database::INSERT_UPDATE); $result = DBA::insert('post-media', $media, Database::INSERT_UPDATE);
@ -87,6 +88,23 @@ class Media
} }
} }
/**
* Remove empty media fields
*
* @param array $media
* @return array cleaned media array
*/
private static function unsetEmptyFields(array $media)
{
$fields = ['mimetype', 'height', 'width', 'size', 'preview', 'preview-height', 'preview-width', 'description'];
foreach ($fields as $field) {
if (empty($media[$field])) {
unset($media[$field]);
}
}
return $media;
}
/** /**
* Copy attachments from one uri-id to another * Copy attachments from one uri-id to another
* *
@ -130,23 +148,22 @@ class Media
public static function fetchAdditionalData(array $media) public static function fetchAdditionalData(array $media)
{ {
// Fetch the mimetype or size if missing. // Fetch the mimetype or size if missing.
// We don't do it for torrent links since they need special treatment. if (empty($media['mimetype']) || empty($media['size'])) {
// We don't do this for images, since we are fetching their details some lines later anyway.
if (!in_array($media['type'], [self::TORRENT, self::IMAGE]) && (empty($media['mimetype']) || empty($media['size']))) {
$timeout = DI::config()->get('system', 'xrd_timeout'); $timeout = DI::config()->get('system', 'xrd_timeout');
$curlResult = DI::httpRequest()->head($media['url'], ['timeout' => $timeout]); $curlResult = DI::httpRequest()->head($media['url'], ['timeout' => $timeout]);
if ($curlResult->isSuccess()) { if ($curlResult->isSuccess()) {
$header = $curlResult->getHeaderArray(); if (empty($media['mimetype'])) {
if (empty($media['mimetype']) && !empty($header['content-type'])) { $media['mimetype'] = $curlResult->getHeader('Content-Type');
$media['mimetype'] = $header['content-type'];
} }
if (empty($media['size']) && !empty($header['content-length'])) { if (empty($media['size'])) {
$media['size'] = $header['content-length']; $media['size'] = (int)$curlResult->getHeader('Content-Length');
} }
} else {
Logger::notice('Could not fetch head', ['media' => $media]);
} }
} }
$filetype = !empty($media['mimetype']) ? strtolower(substr($media['mimetype'], 0, strpos($media['mimetype'], '/'))) : ''; $filetype = !empty($media['mimetype']) ? strtolower(current(explode('/', $media['mimetype']))) : '';
if (($media['type'] == self::IMAGE) || ($filetype == 'image')) { if (($media['type'] == self::IMAGE) || ($filetype == 'image')) {
$imagedata = Images::getInfoFromURLCached($media['url']); $imagedata = Images::getInfoFromURLCached($media['url']);
@ -155,6 +172,8 @@ class Media
$media['size'] = $imagedata['size']; $media['size'] = $imagedata['size'];
$media['width'] = $imagedata[0]; $media['width'] = $imagedata[0];
$media['height'] = $imagedata[1]; $media['height'] = $imagedata[1];
} else {
Logger::notice('No image data', ['media' => $media]);
} }
if (!empty($media['preview'])) { if (!empty($media['preview'])) {
$imagedata = Images::getInfoFromURLCached($media['preview']); $imagedata = Images::getInfoFromURLCached($media['preview']);
@ -164,9 +183,65 @@ class Media
} }
} }
} }
if ($media['type'] != self::DOCUMENT) {
$media = self::addType($media);
}
return $media; return $media;
} }
/**
* Add the detected type to the media array
*
* @param array $data
* @return array data array with the detected type
*/
public static function addType(array $data)
{
if (empty($data['mimetype'])) {
Logger::info('No MimeType provided', ['media' => $data]);
return $data;
}
$type = explode('/', current(explode(';', $data['mimetype'])));
if (count($type) < 2) {
Logger::info('Unknown MimeType', ['type' => $type, 'media' => $data]);
$data['type'] = self::UNKNOWN;
return $data;
}
$filetype = strtolower($type[0]);
$subtype = strtolower($type[1]);
if ($filetype == 'image') {
$data['type'] = self::IMAGE;
} elseif ($filetype == 'video') {
$data['type'] = self::VIDEO;
} elseif ($filetype == 'audio') {
$data['type'] = self::AUDIO;
} elseif (($filetype == 'text') && ($subtype == 'html')) {
$data['type'] = self::HTML;
} elseif (($filetype == 'text') && ($subtype == 'xml')) {
$data['type'] = self::XML;
} elseif (($filetype == 'text') && ($subtype == 'plain')) {
$data['type'] = self::PLAIN;
} elseif ($filetype == 'text') {
$data['type'] = self::TEXT;
} elseif (($filetype == 'application') && ($subtype == 'x-bittorrent')) {
$data['type'] = self::TORRENT;
} elseif ($filetype == 'application') {
$data['type'] = self::APPLICATION;
} else {
$data['type'] = self::UNKNOWN;
Logger::info('Unknown type', ['filetype' => $filetype, 'subtype' => $subtype, 'media' => $data]);
return $data;
}
Logger::debug('Detected type', ['filetype' => $filetype, 'subtype' => $subtype, 'media' => $data]);
return $data;
}
/** /**
* Tests for path patterns that are usef for picture links in Friendica * Tests for path patterns that are usef for picture links in Friendica
* *

View File

@ -123,21 +123,7 @@ class Processor
} }
$data = ['uri-id' => $uriid]; $data = ['uri-id' => $uriid];
$data['type'] = Post\Media::UNKNOWN;
$filetype = strtolower(substr($attachment['mediaType'], 0, strpos($attachment['mediaType'], '/')));
if ($filetype == 'image') {
$data['type'] = Post\Media::IMAGE;
} elseif ($filetype == 'video') {
$data['type'] = Post\Media::VIDEO;
} elseif ($filetype == 'audio') {
$data['type'] = Post\Media::AUDIO;
} elseif (in_array($attachment['mediaType'], ['application/x-bittorrent', 'application/x-bittorrent;x-scheme-handler/magnet'])) {
$data['type'] = Post\Media::TORRENT;
} else {
Logger::info('Unknown type', ['attachment' => $attachment]);
return;
}
$data['url'] = $attachment['url']; $data['url'] = $attachment['url'];
$data['mimetype'] = $attachment['mediaType']; $data['mimetype'] = $attachment['mediaType'];
$data['height'] = $attachment['height'] ?? null; $data['height'] = $attachment['height'] ?? null;