forked from wallabag/wallabag
Use graby ContentExtractor to clean html
It might be better to re-use some graby functionalities to clean html instead of building a new system.
This commit is contained in:
@ -47,6 +47,16 @@ class ContentProxy
|
||||
{
|
||||
// ensure content is a bit cleaned up
|
||||
if (!empty($content['html'])) {
|
||||
$extractor = $this->graby->getExtractor();
|
||||
$contentExtracted = $extractor->process($content['html'], $url);
|
||||
|
||||
if ($contentExtracted) {
|
||||
$contentBlock = $extractor->getContent();
|
||||
$contentBlock->normalize();
|
||||
|
||||
$content['html'] = trim($contentBlock->innerHTML);
|
||||
}
|
||||
|
||||
$content['html'] = htmLawed($content['html'], [
|
||||
'safe' => 1,
|
||||
// which means: do not remove iframe elements
|
||||
|
||||
Reference in New Issue
Block a user