Add a chunking storage engine for files
Summary:
Ref T7149. This isn't complete and isn't active yet, but does basically work. I'll shore it up in the next few diffs.
The new workflow goes like this:
> Client, file.allocate(): I'd like to upload a file with length L, metadata M, and hash H.
Then the server returns `upload` (a boolean) and `filePHID` (a PHID). These mean:
| upload | filePHID | means |
|---|---|---|
| false | false | Server can't accept file.
| false | true | File data already known, file created from hash.
| true | false | Just upload normally.
| true | true | Query chunks to start or resume a chunked upload.
All but the last case are uninteresting and work like exising uploads with `file.uploadhash` (which we can eventually deprecate).
In the last case:
> Client, file.querychunks(): Give me a list of chunks that I should upload.
This returns all the chunks for the file. Chunks have a start byte, an end byte, and a "complete" flag to indicate that the server already has the data.
Then, the client fills in chunks by sending them:
> Client, file.uploadchunk(): Here is the data for one chunk.
This stuff doesn't work yet or has some caveats:
- I haven't tested resume much.
- Files need an "isPartial()" flag for partial uploads, and the UI needs to respect it.
- The JS client needs to become chunk-aware.
- Chunk size is set crazy low to make testing easier.
- Some debugging flags that I'll remove soon-ish.
- Downloading works, but still streams the whole file into memory.
- This storage engine is disabled by default (hardcoded as a unit test engine) because it's still sketchy.
- Need some code to remove the "isParital" flag when the last chunk is uploaded.
- Maybe do checksumming on chunks.
Test Plan:
- Hacked up `arc upload` (see next diff) to be chunk-aware and uploaded a readme in 18 32-byte chunks. Then downloaded it. Got the same file back that I uploaded.
- File UI now shows some basic chunk info for chunked files:
{F336434}
Reviewers: btrahan
Reviewed By: btrahan
Subscribers: joshuaspence, epriestley
Maniphest Tasks: T7149
Differential Revision: https://secure.phabricator.com/D12060
This commit is contained in:
@@ -0,0 +1,171 @@
|
||||
<?php
|
||||
|
||||
final class PhabricatorChunkedFileStorageEngine
|
||||
extends PhabricatorFileStorageEngine {
|
||||
|
||||
public function getEngineIdentifier() {
|
||||
return 'chunks';
|
||||
}
|
||||
|
||||
public function getEnginePriority() {
|
||||
return 60000;
|
||||
}
|
||||
|
||||
/**
|
||||
* We can write chunks if we have at least one valid storage engine
|
||||
* underneath us.
|
||||
*
|
||||
* This engine must not also be a chunk engine.
|
||||
*/
|
||||
public function canWriteFiles() {
|
||||
return (bool)$this->getWritableEngine();
|
||||
}
|
||||
|
||||
public function hasFilesizeLimit() {
|
||||
return false;
|
||||
}
|
||||
|
||||
public function isChunkEngine() {
|
||||
return true;
|
||||
}
|
||||
|
||||
public function isTestEngine() {
|
||||
// TODO: For now, prevent this from actually being selected.
|
||||
return true;
|
||||
}
|
||||
|
||||
public function writeFile($data, array $params) {
|
||||
// The chunk engine does not support direct writes.
|
||||
throw new PhutilMethodNotImplementedException();
|
||||
}
|
||||
|
||||
public function readFile($handle) {
|
||||
// This is inefficient, but makes the API work as expected.
|
||||
$chunks = $this->loadAllChunks($handle, true);
|
||||
|
||||
$buffer = '';
|
||||
foreach ($chunks as $chunk) {
|
||||
$data_file = $chunk->getDataFile();
|
||||
if (!$data_file) {
|
||||
throw new Exception(pht('This file data is incomplete!'));
|
||||
}
|
||||
|
||||
$buffer .= $chunk->getDataFile()->loadFileData();
|
||||
}
|
||||
|
||||
return $buffer;
|
||||
}
|
||||
|
||||
public function deleteFile($handle) {
|
||||
$engine = new PhabricatorDestructionEngine();
|
||||
$chunks = $this->loadAllChunks($handle);
|
||||
foreach ($chunks as $chunk) {
|
||||
$engine->destroyObject($chunk);
|
||||
}
|
||||
}
|
||||
|
||||
private function loadAllChunks($handle, $need_files) {
|
||||
$chunks = id(new PhabricatorFileChunkQuery())
|
||||
->setViewer(PhabricatorUser::getOmnipotentUser())
|
||||
->withChunkHandles(array($handle))
|
||||
->needDataFiles($need_files)
|
||||
->execute();
|
||||
|
||||
$chunks = msort($chunks, 'getByteStart');
|
||||
|
||||
return $chunks;
|
||||
}
|
||||
|
||||
/**
|
||||
* Compute a chunked file hash for the viewer.
|
||||
*
|
||||
* We can not currently compute a real hash for chunked file uploads (because
|
||||
* no process sees all of the file data).
|
||||
*
|
||||
* We also can not trust the hash that the user claims to have computed. If
|
||||
* we trust the user, they can upload some `evil.exe` and claim it has the
|
||||
* same file hash as `good.exe`. When another user later uploads the real
|
||||
* `good.exe`, we'll just create a reference to the existing `evil.exe`. Users
|
||||
* who download `good.exe` will then receive `evil.exe`.
|
||||
*
|
||||
* Instead, we rehash the user's claimed hash with account secrets. This
|
||||
* allows users to resume file uploads, but not collide with other users.
|
||||
*
|
||||
* Ideally, we'd like to be able to verify hashes, but this is complicated
|
||||
* and time consuming and gives us a fairly small benefit.
|
||||
*
|
||||
* @param PhabricatorUser Viewing user.
|
||||
* @param string Claimed file hash.
|
||||
* @return string Rehashed file hash.
|
||||
*/
|
||||
public static function getChunkedHash(PhabricatorUser $viewer, $hash) {
|
||||
if (!$viewer->getPHID()) {
|
||||
throw new Exception(
|
||||
pht('Unable to compute chunked hash without real viewer!'));
|
||||
}
|
||||
|
||||
$input = $viewer->getAccountSecret().':'.$hash.':'.$viewer->getPHID();
|
||||
return PhabricatorHash::digest($input);
|
||||
}
|
||||
|
||||
public function allocateChunks($length, array $properties) {
|
||||
$file = PhabricatorFile::newChunkedFile($this, $length, $properties);
|
||||
|
||||
$chunk_size = $this->getChunkSize();
|
||||
|
||||
$handle = $file->getStorageHandle();
|
||||
|
||||
$chunks = array();
|
||||
for ($ii = 0; $ii < $length; $ii += $chunk_size) {
|
||||
$chunks[] = PhabricatorFileChunk::initializeNewChunk(
|
||||
$handle,
|
||||
$ii,
|
||||
min($ii + $chunk_size, $length));
|
||||
}
|
||||
|
||||
$file->openTransaction();
|
||||
foreach ($chunks as $chunk) {
|
||||
$chunk->save();
|
||||
}
|
||||
$file->save();
|
||||
$file->saveTransaction();
|
||||
|
||||
return $file;
|
||||
}
|
||||
|
||||
private function getWritableEngine() {
|
||||
// NOTE: We can't just load writable engines or we'll loop forever.
|
||||
$engines = PhabricatorFileStorageEngine::loadAllEngines();
|
||||
|
||||
foreach ($engines as $engine) {
|
||||
if ($engine->isChunkEngine()) {
|
||||
continue;
|
||||
}
|
||||
|
||||
if ($engine->isTestEngine()) {
|
||||
continue;
|
||||
}
|
||||
|
||||
if (!$engine->canWriteFiles()) {
|
||||
continue;
|
||||
}
|
||||
|
||||
if ($engine->hasFilesizeLimit()) {
|
||||
if ($engine->getFilesizeLimit() < $this->getChunkSize()) {
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
private function getChunkSize() {
|
||||
// TODO: This is an artificially small size to make it easier to
|
||||
// test chunking.
|
||||
return 32;
|
||||
}
|
||||
|
||||
}
|
||||
Reference in New Issue
Block a user