Statusnet: New intelligent shortening enabled.

This commit is contained in:
Michael 2012-11-06 01:32:48 +01:00
parent 5ff7a555da
commit 40b3b74452
2 changed files with 178 additions and 73 deletions

View File

@ -404,6 +404,107 @@ function short_link($url) {
return $slinky->short();
} };
function statusnet_shortenmsg($b, $max_char) {
require_once("include/bbcode.php");
require_once("include/html2plain.php");
// Looking for the first image
$image = '';
if(preg_match("/\[img\=([0-9]*)x([0-9]*)\](.*?)\[\/img\]/is",$b['body'],$matches))
$image = $matches[3];
if ($image == '')
if(preg_match("/\[img\](.*?)\[\/img\]/is",$b['body'],$matches))
$image = $matches[1];
$multipleimages = (strpos($b['body'], "[img") != strrpos($b['body'], "[img"));
// When saved into the database the content is sent through htmlspecialchars
// That means that we have to decode all image-urls
$image = htmlspecialchars_decode($image);
$body = $b["body"];
if ($b["title"] != "")
$body = $b["title"]."\n\n".$body;
// remove the recycle signs and the names since they aren't helpful on twitter
// recycle 1
$recycle = html_entity_decode("♲ ", ENT_QUOTES, 'UTF-8');
$body = preg_replace( '/'.$recycle.'\[url\=(\w+.*?)\](\w+.*?)\[\/url\]/i', "\n", $body);
// recycle 2 (Test)
$recycle = html_entity_decode("◌ ", ENT_QUOTES, 'UTF-8');
$body = preg_replace( '/'.$recycle.'\[url\=(\w+.*?)\](\w+.*?)\[\/url\]/i', "\n", $body);
// At first convert the text to html
$html = bbcode($body, false, false);
// Then convert it to plain text
//$msg = trim($b['title']." \n\n".html2plain($html, 0, true));
$msg = trim(html2plain($html, 0, true));
$msg = html_entity_decode($msg,ENT_QUOTES,'UTF-8');
// Removing multiple newlines
while (strpos($msg, "\n\n\n") !== false)
$msg = str_replace("\n\n\n", "\n\n", $msg);
// Removing multiple spaces
while (strpos($msg, " ") !== false)
$msg = str_replace(" ", " ", $msg);
$msg = trim($msg);
$link = '';
// look for bookmark-bbcode and handle it with priority
if(preg_match("/\[bookmark\=([^\]]*)\](.*?)\[\/bookmark\]/is",$b['body'],$matches))
$link = $matches[1];
$multiplelinks = (strpos($b['body'], "[bookmark") != strrpos($b['body'], "[bookmark"));
// If there is no bookmark element then take the first link
if ($link == '') {
$links = collecturls($html);
if (sizeof($links) > 0) {
reset($links);
$link = current($links);
}
$multiplelinks = (sizeof($links) > 1);
}
$msglink = "";
if ($multiplelinks)
$msglink = $b["plink"];
else if ($link != "")
$msglink = $link;
else if ($multipleimages)
$msglink = $b["plink"];
else if ($image != "")
$msglink = $image;
if (($msglink == "") and strlen($msg) > $max_char)
$msglink = $b["plink"];
if (strlen($msglink) > 20)
$msglink = short_link($msglink);
if (strlen(trim($msg." ".$msglink)) > $max_char) {
$msg = substr($msg, 0, $max_char - (strlen($msglink)));
$lastchar = substr($msg, -1);
$msg = substr($msg, 0, -1);
$pos = strrpos($msg, "\n");
if ($pos > 0)
$msg = substr($msg, 0, $pos-1);
else if ($lastchar != "\n")
$msg = substr($msg, 0, -3)."...";
}
$msg = str_replace("\n", " ", $msg);
// Removing multiple spaces - again
while (strpos($msg, " ") !== false)
$msg = str_replace(" ", " ", $msg);
return(trim($msg." ".$msglink));
}
function statusnet_post_hook(&$a,&$b) {
/**
@ -433,9 +534,12 @@ function statusnet_post_hook(&$a,&$b) {
// we can later send to StatusNet. This way we can "gain" some
// information during shortening of potential links but do not
// shorten all the links in a 200000 character long essay.
$intelligent_shortening = get_config('statusnet','intelligent_shortening');
if (!$intelligent_shortening) {
if (! $b['title']=='') {
$tmp = $b['title'].": \n".$b['body'];
// $tmp = substr($tmp, 0, 4*$max_char);
// $tmp = substr($tmp, 0, 4*$max_char);
} else {
$tmp = $b['body']; // substr($b['body'], 0, 3*$max_char);
}
@ -505,6 +609,8 @@ function statusnet_post_hook(&$a,&$b) {
}
$msg = trim($msg);
} else
$msg = statusnet_shortenmsg($b, $max_char);
// and now dent it :-)
if(strlen($msg)) {

View File

@ -282,10 +282,9 @@ function twitter_shortenmsg($b) {
// That means that we have to decode all image-urls
$image = htmlspecialchars_decode($image);
if ($b["title"] == "")
$body = $b["body"];
else
$body = $b["title"];
if ($b["title"] != "")
$body = $b["title"]."\n\n".$body;
// remove the recycle signs and the names since they aren't helpful on twitter
// recycle 1