[langfilter] Improve language detection by removing contiguous whitespace from the message

- HTML-heavy posts had several superfluous whitespace character putting them over the minimum message length
This commit is contained in:
Hypolite Petovan 2022-01-19 09:49:56 -05:00
parent 5c2071a72b
commit 34437e368f

View File

@ -119,11 +119,13 @@ function langfilter_prepare_body_content_filter(App $a, &$hook_data)
return; return;
} }
if (!empty($hook_data['item']['rendered-html'])) { $naked_body = strip_tags(
$naked_body = strip_tags($hook_data['item']['rendered-html']); $hook_data['item']['rendered-html']
} else { ??''?: // Equivalent of !empty()
$naked_body = BBCode::toPlaintext($hook_data['item']['body'], false); BBCode::convert($hook_data['item']['body'], false, BBCode::INTERNAL, true)
} );
$naked_body = preg_replace('#\s+#', ' ', trim($naked_body));
// Don't filter if body lenght is below minimum // Don't filter if body lenght is below minimum
$minlen = DI::pConfig()->get(local_user(), 'langfilter', 'minlength', 32); $minlen = DI::pConfig()->get(local_user(), 'langfilter', 'minlength', 32);