From c4a36019392c728e942c813ca8aa3142d690a2a8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sybren=20A=2E=20St=C3=BCvel?= Date: Fri, 26 Aug 2016 11:25:05 +0200 Subject: [PATCH] Broken file_storage.py up into file_storage/{__init__,moving}.py --- .../__init__.py} | 68 --------------- pillar/api/file_storage/moving.py | 82 +++++++++++++++++++ pillar/cli.py | 2 +- 3 files changed, 83 insertions(+), 69 deletions(-) rename pillar/api/{file_storage.py => file_storage/__init__.py} (92%) create mode 100644 pillar/api/file_storage/moving.py diff --git a/pillar/api/file_storage.py b/pillar/api/file_storage/__init__.py similarity index 92% rename from pillar/api/file_storage.py rename to pillar/api/file_storage/__init__.py index 80a34ad3..3d335178 100644 --- a/pillar/api/file_storage.py +++ b/pillar/api/file_storage/__init__.py @@ -838,74 +838,6 @@ def compute_aggregate_length_items(file_docs): compute_aggregate_length(file_doc) -def change_file_storage_backend(file_id, dest_backend): - """Given a file document, move it to the specified backend (if not already - there) and update the document to reflect that. - Files on the original backend are not deleted automatically. - """ - - # Fetch file - files_collection = current_app.data.driver.db['files'] - f = files_collection.find_one(ObjectId(file_id)) - if f is None: - log.warning('File with _id: {} not found'.format(file_id)) - return - # Check that new backend differs from current one - if dest_backend == f['backend']: - log.warning('Destination backend ({}) matches the current backend, we ' - 'are not moving the file'.format(dest_backend)) - return - # TODO Check that new backend is allowed (make conf var) - # Check that the file has a project - if 'project' not in f: - log.warning('File document does not have a project') - return - # Upload file and variations to the new backend - move_file_to_backend(f, dest_backend) - # Update document to reflect the changes - - -def move_file_to_backend(file_doc, dest_backend): - # If the file is not local already, fetch it - if file_doc['backend'] != 'local': - # TODO ensure that file['link'] is up to date - local_file = fetch_file_from_link(file_doc['link']) - - # Upload to GCS - if dest_backend == 'gcs': - # Filenames on GCS do not contain paths, by our convention - internal_fname = os.path.basename(file_doc['file_path']) - # TODO check for name collisions - stream_to_gcs(file_doc['_id'], local_file['file_size'], - internal_fname=internal_fname, - project_id=str(file_doc['project']), - stream_for_gcs=local_file['local_file'], - content_type=local_file['content_type']) - - -def fetch_file_from_link(link): - """Utility to download a file from a remote location and return it with - additional info (for upload to a different storage backend). - """ - r = requests.get(link, stream=True) - # If the file is not found we will use one from the variations. Original - # files might not exists because they were too large to keep. - if r.status_code == 404: - pass - local_file = tempfile.NamedTemporaryFile( - dir=current_app.config['STORAGE_DIR']) - with open(local_file, 'wb') as f: - for chunk in r.iter_content(chunk_size=1024): - if chunk: - f.write(chunk) - file_dict = { - 'file_size': os.fstat(local_file.fileno()).st_size, - 'content_type': r.headers['content-type'], - 'local_file': local_file - } - return file_dict - - def setup_app(app, url_prefix): app.on_pre_GET_files += on_pre_get_files diff --git a/pillar/api/file_storage/moving.py b/pillar/api/file_storage/moving.py new file mode 100644 index 00000000..ee5b2ba9 --- /dev/null +++ b/pillar/api/file_storage/moving.py @@ -0,0 +1,82 @@ +"""Code for moving files between backends.""" + +__all__ = ['PrerequisiteNotMetError', 'change_file_storage_backend'] + + +class PrerequisiteNotMetError(RuntimeError): + """Raised when a file cannot be moved due to unmet prerequisites.""" + + +def change_file_storage_backend(file_id, dest_backend): + """Given a file document, move it to the specified backend (if not already + there) and update the document to reflect that. + Files on the original backend are not deleted automatically. + """ + + # Fetch file document + files_collection = current_app.data.driver.db['files'] + f = files_collection.find_one(ObjectId(file_id)) + if f is None: + raise ValueError('File with _id: {} not found'.format(file_id)) + + # Check that new backend differs from current one + if dest_backend == f['backend']: + log.warning('Destination backend ({}) matches the current backend, we ' + 'are not moving the file'.format(dest_backend)) + return + + # TODO Check that new backend is allowed (make conf var) + + # Check that the file has a project; without project, we don't know + # which bucket to store the file into. + if 'project' not in f: + raise PrerequisiteNotMetError('File document {} does not have a project'.format(file_id)) + + # Upload file (TODO: and variations) to the new backend + move_file_to_backend(f, dest_backend) + + # Update document to reflect the changes + + +def move_file_to_backend(file_doc, dest_backend): + # If the file is not local already, fetch it + if file_doc['backend'] != 'local': + # TODO ensure that file['link'] is up to date + local_file = fetch_file_from_link(file_doc['link']) + + # Upload to GCS + if dest_backend == 'gcs': + # Filenames on GCS do not contain paths, by our convention + internal_fname = os.path.basename(file_doc['file_path']) + # TODO check for name collisions + stream_to_gcs(file_doc['_id'], local_file['file_size'], + internal_fname=internal_fname, + project_id=str(file_doc['project']), + stream_for_gcs=local_file['local_file'], + content_type=local_file['content_type']) + + +def fetch_file_from_link(link): + """Utility to download a file from a remote location and return it with + additional info (for upload to a different storage backend). + """ + + r = requests.get(link, stream=True) + r.raise_for_status() + + # If the file is not found we will use one from the variations. Original + # files might not exists because they were too large to keep. + if r.status_code == 404: + pass + local_file = tempfile.NamedTemporaryFile( + dir=current_app.config['STORAGE_DIR']) + with open(local_file, 'wb') as f: + for chunk in r.iter_content(chunk_size=1024): + if chunk: + f.write(chunk) + file_dict = { + 'file_size': os.fstat(local_file.fileno()).st_size, + 'content_type': r.headers['content-type'], + 'local_file': local_file + } + return file_dict diff --git a/pillar/cli.py b/pillar/cli.py index c36e6849..523ad54a 100644 --- a/pillar/cli.py +++ b/pillar/cli.py @@ -398,5 +398,5 @@ def file_change_backend(file_id, dest_backend='gcs'): Files on the original backend are not deleted automatically. """ - from pillar.api.file_storage import change_file_storage_backend + from pillar.api.file_storage.moving import change_file_storage_backend change_file_storage_backend(file_id, dest_backend) \ No newline at end of file