Merge pull request #13827 from annando/ocr
Use OCR to fetch text in images
This commit is contained in:
commit
faa1cbeaf6
|
@ -208,13 +208,17 @@ class Media
|
||||||
$filetype = !empty($media['mimetype']) ? strtolower(current(explode('/', $media['mimetype']))) : '';
|
$filetype = !empty($media['mimetype']) ? strtolower(current(explode('/', $media['mimetype']))) : '';
|
||||||
|
|
||||||
if (($media['type'] == self::IMAGE) || ($filetype == 'image')) {
|
if (($media['type'] == self::IMAGE) || ($filetype == 'image')) {
|
||||||
$imagedata = Images::getInfoFromURLCached($media['url']);
|
$imagedata = Images::getInfoFromURLCached($media['url'], empty($media['description']));
|
||||||
if ($imagedata) {
|
if ($imagedata) {
|
||||||
$media['mimetype'] = $imagedata['mime'];
|
$media['mimetype'] = $imagedata['mime'];
|
||||||
$media['size'] = $imagedata['size'];
|
$media['size'] = $imagedata['size'];
|
||||||
$media['width'] = $imagedata[0];
|
$media['width'] = $imagedata[0];
|
||||||
$media['height'] = $imagedata[1];
|
$media['height'] = $imagedata[1];
|
||||||
$media['blurhash'] = $imagedata['blurhash'] ?? null;
|
$media['blurhash'] = $imagedata['blurhash'] ?? null;
|
||||||
|
if (!empty($imagedata['description']) && empty($media['description'])) {
|
||||||
|
$media['description'] = $imagedata['description'];
|
||||||
|
Logger::debug('Detected text for image', $media);
|
||||||
|
}
|
||||||
} else {
|
} else {
|
||||||
Logger::notice('No image data', ['media' => $media]);
|
Logger::notice('No image data', ['media' => $media]);
|
||||||
}
|
}
|
||||||
|
|
|
@ -21,6 +21,7 @@
|
||||||
|
|
||||||
namespace Friendica\Util;
|
namespace Friendica\Util;
|
||||||
|
|
||||||
|
use Friendica\Core\Hook;
|
||||||
use Friendica\Core\Logger;
|
use Friendica\Core\Logger;
|
||||||
use Friendica\DI;
|
use Friendica\DI;
|
||||||
use Friendica\Model\Photo;
|
use Friendica\Model\Photo;
|
||||||
|
@ -181,10 +182,11 @@ class Images
|
||||||
* Gets info array from given URL, cached data has priority
|
* Gets info array from given URL, cached data has priority
|
||||||
*
|
*
|
||||||
* @param string $url
|
* @param string $url
|
||||||
|
* @param bool $ocr
|
||||||
* @return array Info
|
* @return array Info
|
||||||
* @throws \Friendica\Network\HTTPException\InternalServerErrorException
|
* @throws \Friendica\Network\HTTPException\InternalServerErrorException
|
||||||
*/
|
*/
|
||||||
public static function getInfoFromURLCached(string $url): array
|
public static function getInfoFromURLCached(string $url, bool $ocr = false): array
|
||||||
{
|
{
|
||||||
$data = [];
|
$data = [];
|
||||||
|
|
||||||
|
@ -192,12 +194,12 @@ class Images
|
||||||
return $data;
|
return $data;
|
||||||
}
|
}
|
||||||
|
|
||||||
$cacheKey = 'getInfoFromURL:' . sha1($url);
|
$cacheKey = 'getInfoFromURL:' . sha1($url . $ocr);
|
||||||
|
|
||||||
$data = DI::cache()->get($cacheKey);
|
$data = DI::cache()->get($cacheKey);
|
||||||
|
|
||||||
if (empty($data) || !is_array($data)) {
|
if (empty($data) || !is_array($data)) {
|
||||||
$data = self::getInfoFromURL($url);
|
$data = self::getInfoFromURL($url, $ocr);
|
||||||
|
|
||||||
DI::cache()->set($cacheKey, $data);
|
DI::cache()->set($cacheKey, $data);
|
||||||
}
|
}
|
||||||
|
@ -209,10 +211,11 @@ class Images
|
||||||
* Gets info from URL uncached
|
* Gets info from URL uncached
|
||||||
*
|
*
|
||||||
* @param string $url
|
* @param string $url
|
||||||
|
* @param bool $ocr
|
||||||
* @return array Info array
|
* @return array Info array
|
||||||
* @throws \Friendica\Network\HTTPException\InternalServerErrorException
|
* @throws \Friendica\Network\HTTPException\InternalServerErrorException
|
||||||
*/
|
*/
|
||||||
public static function getInfoFromURL(string $url): array
|
public static function getInfoFromURL(string $url, bool $ocr = false): array
|
||||||
{
|
{
|
||||||
$data = [];
|
$data = [];
|
||||||
|
|
||||||
|
@ -257,6 +260,14 @@ class Images
|
||||||
|
|
||||||
if ($image->isValid()) {
|
if ($image->isValid()) {
|
||||||
$data['blurhash'] = $image->getBlurHash();
|
$data['blurhash'] = $image->getBlurHash();
|
||||||
|
|
||||||
|
if ($ocr) {
|
||||||
|
$media = ['img_str' => $img_str];
|
||||||
|
Hook::callAll('ocr-detection', $media);
|
||||||
|
if (!empty($media['description'])) {
|
||||||
|
$data['description'] = $media['description'];
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
$data['size'] = $filesize;
|
$data['size'] = $filesize;
|
||||||
|
|
Loading…
Reference in New Issue
Block a user