Merge pull request #4844 from wallabag/feature/add-clean-downloaded-images

Add a command to clean downloaded images
This commit is contained in:
Jérémy Benoist 2020-12-16 20:21:03 +01:00 committed by GitHub
commit 1bf5419e8c
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
3 changed files with 131 additions and 25 deletions

View file

@ -7,8 +7,7 @@ parameters:
symfony:
container_xml_path: %rootDir%/../../../var/cache/test/appTestDebugProjectContainer.xml
# https://github.com/phpstan/phpstan/issues/694#issuecomment-350724288
autoload_files:
bootstrapFiles:
- vendor/bin/.phpunit/phpunit-8.3-0/vendor/autoload.php
inferPrivatePropertyTypeFromConstructor: true

View file

@ -0,0 +1,101 @@
<?php
namespace Wallabag\CoreBundle\Command;
use Symfony\Bundle\FrameworkBundle\Command\ContainerAwareCommand;
use Symfony\Component\Console\Input\InputInterface;
use Symfony\Component\Console\Input\InputOption;
use Symfony\Component\Console\Output\OutputInterface;
use Symfony\Component\Console\Style\SymfonyStyle;
use Symfony\Component\Finder\Finder;
class CleanDownloadedImagesCommand extends ContainerAwareCommand
{
protected function configure()
{
$this
->setName('wallabag:clean-downloaded-images')
->setDescription('Cleans downloaded images which are no more associated to an entry')
->addOption(
'dry-run',
null,
InputOption::VALUE_NONE,
'Do not remove images, just dump counters'
);
}
protected function execute(InputInterface $input, OutputInterface $output)
{
$io = new SymfonyStyle($input, $output);
$dryRun = (bool) $input->getOption('dry-run');
if ($dryRun) {
$io->text('Dry run mode <info>enabled</info> (no images will be removed)');
}
$downloadImages = $this->getContainer()->get('wallabag_core.entry.download_images');
$baseFolder = $downloadImages->getBaseFolder();
$io->text('Retrieve existing images');
// retrieve _existing_ folders in the image folder
$finder = new Finder();
$finder
->directories()
->ignoreDotFiles(true)
->depth(2)
->in($baseFolder);
$existingPaths = [];
foreach ($finder as $file) {
$existingPaths[] = $file->getFilename();
}
$io->text(sprintf(' -> <info>%d</info> images found', \count($existingPaths)));
$io->text('Retrieve valid folders attached to a user');
$entries = $this->getContainer()->get('wallabag_core.entry_repository')->findAllEntriesIdByUserId();
// retrieve _valid_ folders from existing entries
$validPaths = [];
foreach ($entries as $entry) {
$path = $downloadImages->getRelativePath($entry['id']);
if (!file_exists($baseFolder . '/' . $path)) {
continue;
}
// only store the hash, not the full path
$validPaths[] = explode('/', $path)[2];
}
$io->text(sprintf(' -> <info>%d</info> folders found', \count($validPaths)));
$deletedCount = 0;
$io->text('Remove images');
// check if existing path are valid, if not, remove all images and the folder
foreach ($existingPaths as $existingPath) {
if (!\in_array($existingPath, $validPaths, true)) {
$fullPath = $baseFolder . '/' . $existingPath[0] . '/' . $existingPath[1] . '/' . $existingPath;
$files = glob($fullPath . '/*.*');
if (!$dryRun) {
array_map('unlink', $files);
rmdir($fullPath);
}
$deletedCount += \count($files);
$io->text(sprintf('Deleted images in <info>%s</info>: <info>%d</info>', $existingPath, \count($files)));
}
}
$io->success(sprintf('Finished cleaning. %d deleted images', $deletedCount));
return 0;
}
}

View file

@ -37,6 +37,11 @@ class DownloadImages
$this->setFolder();
}
public function getBaseFolder()
{
return $this->baseFolder;
}
/**
* Process the html and extract images URLs from it.
*
@ -99,7 +104,7 @@ class DownloadImages
* @param string $url Url from where the image were found
* @param string $relativePath Relative local path to saved the image
*
* @return string Relative url to access the image from the web
* @return string|false Relative url to access the image from the web
*/
public function processSingleImage($entryId, $imagePath, $url, $relativePath = null)
{
@ -210,6 +215,29 @@ class DownloadImages
@rmdir($folderPath);
}
/**
* Generate the folder where we are going to save images based on the entry url.
*
* @param int $entryId ID of the entry
* @param bool $createFolder Should we create the folder for the given id?
*
* @return string
*/
public function getRelativePath($entryId, $createFolder = true)
{
$hashId = hash('crc32', $entryId);
$relativePath = $hashId[0] . '/' . $hashId[1] . '/' . $hashId;
$folderPath = $this->baseFolder . '/' . $relativePath;
if (!file_exists($folderPath) && $createFolder) {
mkdir($folderPath, 0777, true);
}
$this->logger->debug('DownloadImages: Folder used for that Entry id', ['folder' => $folderPath, 'entryId' => $entryId]);
return $relativePath;
}
/**
* Get images urls from the srcset image attribute.
*
@ -254,28 +282,6 @@ class DownloadImages
}
}
/**
* Generate the folder where we are going to save images based on the entry url.
*
* @param int $entryId ID of the entry
*
* @return string
*/
private function getRelativePath($entryId)
{
$hashId = hash('crc32', $entryId);
$relativePath = $hashId[0] . '/' . $hashId[1] . '/' . $hashId;
$folderPath = $this->baseFolder . '/' . $relativePath;
if (!file_exists($folderPath)) {
mkdir($folderPath, 0777, true);
}
$this->logger->debug('DownloadImages: Folder used for that Entry id', ['folder' => $folderPath, 'entryId' => $entryId]);
return $relativePath;
}
/**
* Make an $url absolute based on the $base.
*