Moving the clean up code to a central place.

This commit is contained in:
Michael Vogel 2014-12-08 22:37:49 +01:00
parent 620ee6be0b
commit 04106ff6f5
2 changed files with 6 additions and 5 deletions

View File

@ -263,11 +263,6 @@ function scrape_feed($url) {
} }
try { try {
// Cleanup invalid HTML
$doc = new DOMDocument();
@$doc->loadHTML($s);
$s = $doc->saveHTML();
$dom = HTML5_Parser::parse($s); $dom = HTML5_Parser::parse($s);
} catch (DOMException $e) { } catch (DOMException $e) {
logger('scrape_feed: parse error: ' . $e); logger('scrape_feed: parse error: ' . $e);

View File

@ -17,6 +17,12 @@ class HTML5_Parser
* @return Parsed HTML as DOMDocument * @return Parsed HTML as DOMDocument
*/ */
static public function parse($text, $builder = null) { static public function parse($text, $builder = null) {
// Cleanup invalid HTML
$doc = new DOMDocument();
@$doc->loadHTML($text);
$text = $doc->saveHTML();
$tokenizer = new HTML5_Tokenizer($text, $builder); $tokenizer = new HTML5_Tokenizer($text, $builder);
$tokenizer->parse(); $tokenizer->parse();
return $tokenizer->save(); return $tokenizer->save();