friendica/library/langdet/docs/example_web.php

<?php

/**
 * example usage (web)
 *
 * @package Text_LanguageDetect
 * @version CVS: $Id$
 */

// browsers will encode multi-byte characters wrong unless they think the page is utf8-encoded
header('Content-type: text/html; charset=utf-8', true);

require_once 'Text/LanguageDetect.php';

$l = new Text_LanguageDetect;
if (isset($_REQUEST['q'])) {
    $q = stripslashes($_REQUEST['q']);
}

?>
<html>
<head>
<title>Text_LanguageDetect demonstration</title>
</head>
<body>
<h2>Text_LanguageDetect</h2>
<?
echo "<small>Supported languages:\n";
$langs = $l->getLanguages();
sort($langs);
foreach ($langs as $lang) {
    echo ucfirst($lang), ', ';
    $i++;
}

echo "<br />total $i</small><br /><br />";

?>
<form method="post">
Enter text to identify language (at least a couple of sentences):<br />
<textarea name="q" wrap="virtual" cols="80" rows="8"><?= $q ?></textarea>
<br />
<input type="submit" value="Submit" />
</form>
<?
if (isset($q) && strlen($q)) {
    $len = $l->utf8strlen($q);
    if ($len < 20) { // this value picked somewhat arbitrarily
        echo "Warning: string not very long ($len chars)<br />\n";
    }

    $result = $l->detectConfidence($q);

    if ($result == null) {
        echo "Text_LanguageDetect cannot identify this piece of text. <br /><br />\n";
    } else {
        echo "Text_LanguageDetect thinks this text is written in <b>{$result['language']}</b> ({$result['similarity']}, {$result['confidence']})<br /><br />\n";
    }

    $result = $l->detectUnicodeBlocks($q, false);
    if (!empty($result)) {
        arsort($result);
        echo "Unicode blocks present: ", join(', ', array_keys($result)), "\n<br /><br />";
    }
}

unset($l);

/* vim: set expandtab tabstop=4 shiftwidth=4 softtabstop=4: */

?>
</body></html>
language detection library 2012-05-12 04:55:18 -04:00			`<?php`

			`/**`
			`* example usage (web)`
			`*`
			`* @package Text_LanguageDetect`
updated pear LangDetect to version 1.0.0 2017-09-04 03:52:14 -04:00			`* @version CVS: $Id$`
language detection library 2012-05-12 04:55:18 -04:00			`*/`

			`// browsers will encode multi-byte characters wrong unless they think the page is utf8-encoded`
			`header('Content-type: text/html; charset=utf-8', true);`

			`require_once 'Text/LanguageDetect.php';`

			`$l = new Text_LanguageDetect;`
			`if (isset($_REQUEST['q'])) {`
			`$q = stripslashes($_REQUEST['q']);`
			`}`

			`?>`
			`<html>`
			`<head>`
			`<title>Text_LanguageDetect demonstration</title>`
			`</head>`
			`<body>`
			`<h2>Text_LanguageDetect</h2>`
			`<?`
			`echo "<small>Supported languages:\n";`
			`$langs = $l->getLanguages();`
			`sort($langs);`
			`foreach ($langs as $lang) {`
			`echo ucfirst($lang), ', ';`
			`$i++;`
			`}`

			`echo "<br />total $i</small><br /><br />";`

			`?>`
			`<form method="post">`
			`Enter text to identify language (at least a couple of sentences):<br />`
			`<textarea name="q" wrap="virtual" cols="80" rows="8"><?= $q ?></textarea>`
			`<br />`
			`<input type="submit" value="Submit" />`
			`</form>`
			`<?`
			`if (isset($q) && strlen($q)) {`
			`$len = $l->utf8strlen($q);`
			`if ($len < 20) { // this value picked somewhat arbitrarily`
			`echo "Warning: string not very long ($len chars)<br />\n";`
			`}`

			`$result = $l->detectConfidence($q);`

			`if ($result == null) {`
			`echo "Text_LanguageDetect cannot identify this piece of text. <br /><br />\n";`
			`} else {`
			`echo "Text_LanguageDetect thinks this text is written in <b>{$result['language']}</b> ({$result['similarity']}, {$result['confidence']})<br /><br />\n";`
			`}`

			`$result = $l->detectUnicodeBlocks($q, false);`
			`if (!empty($result)) {`
			`arsort($result);`
			`echo "Unicode blocks present: ", join(', ', array_keys($result)), "\n<br /><br />";`
			`}`
			`}`

			`unset($l);`

			`/* vim: set expandtab tabstop=4 shiftwidth=4 softtabstop=4: */`

			`?>`
			`</body></html>`