Fix images downloading with numeric HTML entity

This commit is contained in:
Simounet 2023-05-29 15:12:04 +02:00
parent d049e3787c
commit 548b610a17
No known key found for this signature in database
GPG key ID: 77D3B7DC794EB770
2 changed files with 24 additions and 5 deletions

View file

@ -86,12 +86,14 @@ class DownloadImages
continue;
}
// if image contains "&" and we can't find it in the html it might be because it's encoded as &
if (false !== stripos($image, '&') && false === stripos($html, $image)) {
$image = str_replace('&', '&', $image);
}
$html = str_replace($image, $newImage, $html);
// if image contains "&" and we can't find it in the html it might be because it's encoded as & or unicode
if (false !== stripos($image, '&') && false === stripos($html, $image)) {
$imageAmp = str_replace('&', '&', $image);
$html = str_replace($imageAmp, $newImage, $html);
$imageUnicode = str_replace('&', '&', $image);
$html = str_replace($imageUnicode, $newImage, $html);
}
}
return $html;

View file

@ -184,6 +184,23 @@ class DownloadImagesTest extends TestCase
$this->assertStringNotContainsString('f_auto,q_auto', $res, 'Image srcset attribute were not replaced');
}
public function testProcessImageWithNumericHtmlEntitySeparator()
{
$httpMockClient = new HttpMockClient();
$httpMockClient->addResponse(new Response(200, ['content-type' => 'image/jpeg'], file_get_contents(__DIR__ . '/../fixtures/image-no-content-type.jpg')));
$httpMockClient->addResponse(new Response(200, ['content-type' => 'image/jpeg'], file_get_contents(__DIR__ . '/../fixtures/image-no-content-type.jpg')));
$httpMockClient->addResponse(new Response(200, ['content-type' => 'image/jpeg'], file_get_contents(__DIR__ . '/../fixtures/image-no-content-type.jpg')));
$logHandler = new TestHandler();
$logger = new Logger('test', [$logHandler]);
$download = new DownloadImages($httpMockClient, sys_get_temp_dir() . '/wallabag_test', 'http://wallabag.io/', $logger);
// wordpress.com sites using & as an & alternative
$res = $download->processHtml(123, '<img srcset="https://example.com/20191204_133626-scaled.jpg?strip=info&#038;w=600&#038;ssl=1 600w,https://example.com/20191204_133626-scaled.jpg?strip=info&#038;w=900&#038;ssl=1 900w" src="https://example.com/20191204_133626-scaled.jpg?ssl=1"/>', 'https://example.com/about/');
$this->assertStringNotContainsString('https://example.com', $res, 'Image srcset attribute were not replaced');
}
public function testProcessImageWithNullPath()
{
$httpMockClient = new HttpMockClient();