Ensure ParseUrl::getSiteinfo always returns the url and type keys

This commit is contained in:
Hypolite Petovan 2020-07-17 19:18:27 -04:00
parent 25b3fa83fc
commit 886cf40036

View File

@ -55,14 +55,13 @@ class ParseUrl
* to avoid endless loops
*
* @return array which contains needed data for embedding
* string 'url' => The url of the parsed page
* string 'type' => Content type
* string 'title' => The title of the content
* string 'text' => The description for the content
* string 'image' => A preview image of the content (only available
* if $no_geuessing = false
* array'images' = Array of preview pictures
* string 'keywords' => The tags which belong to the content
* string 'url' => The url of the parsed page
* string 'type' => Content type
* string 'title' => (optional) The title of the content
* string 'text' => (optional) The description for the content
* string 'image' => (optional) A preview image of the content (only available if $no_geuessing = false)
* array 'images' => (optional) Array of preview pictures
* string 'keywords' => (optional) The tags which belong to the content
*
* @throws \Friendica\Network\HTTPException\InternalServerErrorException
* @see ParseUrl::getSiteinfo() for more information about scraping
@ -115,14 +114,13 @@ class ParseUrl
* @param int $count Internal counter to avoid endless loops
*
* @return array which contains needed data for embedding
* string 'url' => The url of the parsed page
* string 'type' => Content type
* string 'title' => The title of the content
* string 'text' => The description for the content
* string 'image' => A preview image of the content (only available
* if $no_geuessing = false
* array'images' = Array of preview pictures
* string 'keywords' => The tags which belong to the content
* string 'url' => The url of the parsed page
* string 'type' => Content type
* string 'title' => (optional) The title of the content
* string 'text' => (optional) The description for the content
* string 'image' => (optional) A preview image of the content (only available if $no_guessing = false)
* array 'images' => (optional) Array of preview pictures
* string 'keywords' => (optional) The tags which belong to the content
*
* @throws \Friendica\Network\HTTPException\InternalServerErrorException
* @todo https://developers.google.com/+/plugins/snippet/
@ -140,28 +138,27 @@ class ParseUrl
*/
public static function getSiteinfo($url, $no_guessing = false, $do_oembed = true, $count = 1)
{
$siteinfo = [];
// Check if the URL does contain a scheme
$scheme = parse_url($url, PHP_URL_SCHEME);
if ($scheme == '') {
$url = 'http://' . trim($url, '/');
$url = 'http://' . ltrim($url, '/');
}
$url = trim($url, "'\"");
$url = Network::stripTrackingQueryParams($url);
$siteinfo = [
'url' => $url,
'type' => 'link',
];
if ($count > 10) {
Logger::log('Endless loop detected for ' . $url, Logger::DEBUG);
return $siteinfo;
}
$url = trim($url, "'");
$url = trim($url, '"');
$url = Network::stripTrackingQueryParams($url);
$siteinfo['url'] = $url;
$siteinfo['type'] = 'link';
$curlResult = Network::curl($url);
if (!$curlResult->isSuccess()) {
return $siteinfo;