forked from wallabag/wallabag
More robust srcset image attribute handling
Linked to HTMLawed PR https://github.com/kesar/HTMLawed/pull/17
This commit is contained in:
@ -185,7 +185,7 @@ class DownloadImages
|
||||
*
|
||||
* @return array An array of urls
|
||||
*/
|
||||
protected function getSrcsetUrls(Crawler $imagesCrawler)
|
||||
private function getSrcsetUrls(Crawler $imagesCrawler)
|
||||
{
|
||||
$urls = [];
|
||||
$iterator = $imagesCrawler
|
||||
@ -193,9 +193,14 @@ class DownloadImages
|
||||
while ($iterator->valid()) {
|
||||
$srcsetAttribute = $iterator->current()->getAttribute('srcset');
|
||||
if ('' !== $srcsetAttribute) {
|
||||
$srcset = array_map('trim', explode(',', $srcsetAttribute));
|
||||
// Couldn't start with " OR ' OR a white space
|
||||
// Could be one or more white space
|
||||
// Must be one or more digits followed by w OR x
|
||||
$pattern = "/(?:[^\"'\s]+\s*(?:\d+[wx])+)/";
|
||||
preg_match_all($pattern, $srcsetAttribute, $matches);
|
||||
$srcset = call_user_func_array('array_merge', $matches);
|
||||
$srcsetUrls = array_map(function ($src) {
|
||||
return explode(' ', $src)[0];
|
||||
return trim(explode(' ', $src, 2)[0]);
|
||||
}, $srcset);
|
||||
$urls = array_merge($srcsetUrls, $urls);
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user