From f43355855cb838c138f046fff3f95f60fbed117f Mon Sep 17 00:00:00 2001 From: epriestley Date: Thu, 21 Aug 2014 11:47:59 -0700 Subject: [PATCH] Add `bin/files compact` for sharing file data storage Summary: Fixes T5912. When we write files, we attempt to share storage if two files have the same content. In some cases, we may not share storage. Examples include: - Files migrated with `bin/files migrate` (it's simpler not to try to dedupe them). - Old files, from before storage was sharable (the mechanism did not exist). - Files broken by the bug fixed in T5912. Add a script to compact files by pointing files with the same content hash at the same file contnet. In the particular case of files broken by the bug in T5912, we know the hash of the file's content and will only point them at a file that we can load the data for, so this fixes them. Compaction is not hugely useful in general, but this script isn't too complex and the ability to fix damage from the bug in T5912 is desirable. We could remove this capability eventually. Test Plan: - Ran `files compact --all --dry-run` and sanity checked a bunch of the duplicates for actually being duplicates. - Migrated individual files with `files compact Fnnn --trace` and verified the storage compacted and all files survived the process. - Verified unused storage was correctly destroyed after removing the last reference to it. Reviewers: btrahan Reviewed By: btrahan Subscribers: epriestley Maniphest Tasks: T5912 Differential Revision: https://secure.phabricator.com/D10327 --- src/__phutil_library_map__.php | 2 + ...bricatorFilesManagementCompactWorkflow.php | 133 ++++++++++++++++++ .../files/storage/PhabricatorFile.php | 4 +- 3 files changed, 137 insertions(+), 2 deletions(-) create mode 100644 src/applications/files/management/PhabricatorFilesManagementCompactWorkflow.php diff --git a/src/__phutil_library_map__.php b/src/__phutil_library_map__.php index ef3dd661a7..2cf06ba8a8 100644 --- a/src/__phutil_library_map__.php +++ b/src/__phutil_library_map__.php @@ -1578,6 +1578,7 @@ phutil_register_library_map(array( 'PhabricatorFileUploadException' => 'applications/files/exception/PhabricatorFileUploadException.php', 'PhabricatorFilesApplication' => 'applications/files/application/PhabricatorFilesApplication.php', 'PhabricatorFilesConfigOptions' => 'applications/files/config/PhabricatorFilesConfigOptions.php', + 'PhabricatorFilesManagementCompactWorkflow' => 'applications/files/management/PhabricatorFilesManagementCompactWorkflow.php', 'PhabricatorFilesManagementEnginesWorkflow' => 'applications/files/management/PhabricatorFilesManagementEnginesWorkflow.php', 'PhabricatorFilesManagementMigrateWorkflow' => 'applications/files/management/PhabricatorFilesManagementMigrateWorkflow.php', 'PhabricatorFilesManagementPurgeWorkflow' => 'applications/files/management/PhabricatorFilesManagementPurgeWorkflow.php', @@ -4430,6 +4431,7 @@ phutil_register_library_map(array( 'PhabricatorFileUploadException' => 'Exception', 'PhabricatorFilesApplication' => 'PhabricatorApplication', 'PhabricatorFilesConfigOptions' => 'PhabricatorApplicationConfigOptions', + 'PhabricatorFilesManagementCompactWorkflow' => 'PhabricatorFilesManagementWorkflow', 'PhabricatorFilesManagementEnginesWorkflow' => 'PhabricatorFilesManagementWorkflow', 'PhabricatorFilesManagementMigrateWorkflow' => 'PhabricatorFilesManagementWorkflow', 'PhabricatorFilesManagementPurgeWorkflow' => 'PhabricatorFilesManagementWorkflow', diff --git a/src/applications/files/management/PhabricatorFilesManagementCompactWorkflow.php b/src/applications/files/management/PhabricatorFilesManagementCompactWorkflow.php new file mode 100644 index 0000000000..63f44228fe --- /dev/null +++ b/src/applications/files/management/PhabricatorFilesManagementCompactWorkflow.php @@ -0,0 +1,133 @@ +setName('compact') + ->setSynopsis( + pht( + 'Merge identical files to share the same storage. In some cases, '. + 'this can repair files with missing data.')) + ->setArguments( + array( + array( + 'name' => 'dry-run', + 'help' => pht('Show what would be compacted.'), + ), + array( + 'name' => 'all', + 'help' => pht('Compact all files.'), + ), + array( + 'name' => 'names', + 'wildcard' => true, + ), + )); + } + + public function execute(PhutilArgumentParser $args) { + $console = PhutilConsole::getConsole(); + + $iterator = $this->buildIterator($args); + if (!$iterator) { + throw new PhutilArgumentUsageException( + pht( + 'Either specify a list of files to compact, or use `--all` '. + 'to compact all files.')); + } + + $is_dry_run = $args->getArg('dry-run'); + + foreach ($iterator as $file) { + $monogram = $file->getMonogram(); + + $hash = $file->getContentHash(); + if (!$hash) { + $console->writeOut( + "%s\n", + pht('%s: No content hash.', $monogram)); + continue; + } + + // Find other files with the same content hash. We're going to point + // them at the data for this file. + $similar_files = id(new PhabricatorFile())->loadAllWhere( + 'contentHash = %s AND id != %d AND + (storageEngine != %s OR storageHandle != %s)', + $hash, + $file->getID(), + $file->getStorageEngine(), + $file->getStorageHandle()); + if (!$similar_files) { + $console->writeOut( + "%s\n", + pht('%s: No other files with the same content hash.', $monogram)); + continue; + } + + // Only compact files into this one if we can load the data. This + // prevents us from breaking working files if we're missing some data. + try { + $data = $file->loadFileData(); + } catch (Exception $ex) { + $data = null; + } + + if ($data === null) { + $console->writeOut( + "%s\n", + pht( + '%s: Unable to load file data; declining to compact.', + $monogram)); + continue; + } + + foreach ($similar_files as $similar_file) { + if ($is_dry_run) { + $console->writeOut( + "%s\n", + pht( + '%s: Would compact storage with %s.', + $monogram, + $similar_file->getMonogram())); + continue; + } + + $console->writeOut( + "%s\n", + pht( + '%s: Compacting storage with %s.', + $monogram, + $similar_file->getMonogram())); + + $old_instance = null; + try { + $old_instance = $similar_file->instantiateStorageEngine(); + $old_engine = $similar_file->getStorageEngine(); + $old_handle = $similar_file->getStorageHandle(); + } catch (Exception $ex) { + // If the old stuff is busted, we just won't try to delete the + // old data. + phlog($ex); + } + + $similar_file + ->setStorageEngine($file->getStorageEngine()) + ->setStorageHandle($file->getStorageHandle()) + ->save(); + + if ($old_instance) { + $similar_file->deleteFileDataIfUnused( + $old_instance, + $old_engine, + $old_handle); + } + } + } + + return 0; + } + +} diff --git a/src/applications/files/storage/PhabricatorFile.php b/src/applications/files/storage/PhabricatorFile.php index 133da30415..44ba9ca21f 100644 --- a/src/applications/files/storage/PhabricatorFile.php +++ b/src/applications/files/storage/PhabricatorFile.php @@ -455,7 +455,7 @@ final class PhabricatorFile extends PhabricatorFileDAO * Destroy stored file data if there are no remaining files which reference * it. */ - private function deleteFileDataIfUnused( + public function deleteFileDataIfUnused( PhabricatorFileStorageEngine $engine, $engine_identifier, $handle) { @@ -644,7 +644,7 @@ final class PhabricatorFile extends PhabricatorFileDAO return $supported; } - protected function instantiateStorageEngine() { + public function instantiateStorageEngine() { return self::buildEngine($this->getStorageEngine()); }