Added CLI command for moving top-level nodes between projects.

Also introduces a slightly nicer way to get the database interface, and
an object-oriented way to allow dependency injection.
This commit is contained in:
2016-09-27 12:16:27 +02:00
parent c64fbf61ba
commit 407aefb9ad
8 changed files with 401 additions and 1 deletions

View File

@@ -12,6 +12,7 @@ import requests
import requests.exceptions
from . import stream_to_gcs, generate_all_links, ensure_valid_link
import pillar.api.utils.gcs
__all__ = ['PrerequisiteNotMetError', 'change_file_storage_backend']
@@ -138,7 +139,8 @@ def fetch_file_from_local(file_doc):
:param file_doc: dict with 'link' key pointing to a path in STORAGE_DIR, and
'content_type' key.
:type file_doc: dict
:rtype: dict
:rtype: dict self._log.info('Moving file %s to project %s', file_id, dest_proj['_id'])
"""
local_file = open(os.path.join(current_app.config['STORAGE_DIR'], file_doc['file_path']), 'rb')
@@ -148,3 +150,42 @@ def fetch_file_from_local(file_doc):
'local_file': local_file
}
return local_finfo
def gcs_move_to_bucket(file_id, dest_project_id, skip_gcs=False):
"""Moves a file from its own bucket to the new project_id bucket."""
files_coll = current_app.db()['files']
f = files_coll.find_one(file_id)
if f is None:
raise ValueError('File with _id: {} not found'.format(file_id))
# Check that new backend differs from current one
if f['backend'] != 'gcs':
raise ValueError('Only Google Cloud Storage is supported for now.')
# Move file and variations to the new bucket.
if skip_gcs:
log.warning('NOT ACTUALLY MOVING file %s on GCS, just updating MongoDB', file_id)
else:
src_project = f['project']
pillar.api.utils.gcs.copy_to_bucket(f['file_path'], src_project, dest_project_id)
for var in f.get('variations', []):
pillar.api.utils.gcs.copy_to_bucket(var['file_path'], src_project, dest_project_id)
# Update the file document after moving was successful.
log.info('Switching file %s to project %s', file_id, dest_project_id)
update_result = files_coll.update_one({'_id': file_id},
{'$set': {'project': dest_project_id}})
if update_result.matched_count != 1:
raise RuntimeError(
'Unable to update file %s in MongoDB: matched_count=%i; modified_count=%i' % (
file_id, update_result.matched_count, update_result.modified_count))
log.info('Switching file %s: matched_count=%i; modified_count=%i',
file_id, update_result.matched_count, update_result.modified_count)
# Regenerate the links for this file
f['project'] = dest_project_id
generate_all_links(f, now=datetime.datetime.now(tz=bson.tz_util.utc))

110
pillar/api/nodes/moving.py Normal file
View File

@@ -0,0 +1,110 @@
"""Code for moving around nodes."""
import attr
import flask_pymongo.wrappers
from bson import ObjectId
from pillar import attrs_extra
import pillar.api.file_storage.moving
@attr.s
class NodeMover(object):
db = attr.ib(validator=attr.validators.instance_of(flask_pymongo.wrappers.Database))
skip_gcs = attr.ib(default=False, validator=attr.validators.instance_of(bool))
_log = attrs_extra.log('%s.NodeMover' % __name__)
def change_project(self, node, dest_proj):
"""Moves a node and children to a new project."""
assert isinstance(node, dict)
assert isinstance(dest_proj, dict)
for move_node in self._children(node):
self._change_project(move_node, dest_proj)
def _change_project(self, node, dest_proj):
"""Changes the project of a single node, non-recursively."""
node_id = node['_id']
proj_id = dest_proj['_id']
self._log.info('Moving node %s to project %s', node_id, proj_id)
# Find all files in the node.
moved_files = set()
self._move_files(moved_files, dest_proj, self._files(node.get('picture', None)))
self._move_files(moved_files, dest_proj, self._files(node['properties'], 'file'))
self._move_files(moved_files, dest_proj, self._files(node['properties'], 'files', 'file'))
self._move_files(moved_files, dest_proj,
self._files(node['properties'], 'attachments', 'files', 'file'))
# Switch the node's project after its files have been moved.
self._log.info('Switching node %s to project %s', node_id, proj_id)
nodes_coll = self.db['nodes']
update_result = nodes_coll.update_one({'_id': node_id},
{'$set': {'project': proj_id}})
if update_result.matched_count != 1:
raise RuntimeError(
'Unable to update node %s in MongoDB: matched_count=%i; modified_count=%i' % (
node_id, update_result.matched_count, update_result.modified_count))
def _move_files(self, moved_files, dest_proj, file_generator):
"""Tries to find all files from the given properties."""
for file_id in file_generator:
if file_id in moved_files:
continue
moved_files.add(file_id)
self.move_file(dest_proj, file_id)
def move_file(self, dest_proj, file_id):
"""Moves a single file to another project"""
self._log.info('Moving file %s to project %s', file_id, dest_proj['_id'])
pillar.api.file_storage.moving.gcs_move_to_bucket(file_id, dest_proj['_id'],
skip_gcs=self.skip_gcs)
def _files(self, file_ref, *properties):
"""Yields file ObjectIDs."""
# Degenerate cases.
if not file_ref:
return
# Single ObjectID
if isinstance(file_ref, ObjectId):
assert not properties
yield file_ref
return
# List of ObjectIDs
if isinstance(file_ref, list):
for item in file_ref:
for subitem in self._files(item, *properties):
yield subitem
return
# Dict, use properties[0] as key
if isinstance(file_ref, dict):
try:
subref = file_ref[properties[0]]
except KeyError:
# Silently skip non-existing keys.
return
for subitem in self._files(subref, *properties[1:]):
yield subitem
return
raise TypeError('File ref is of type %s, not implemented' % type(file_ref))
def _children(self, node):
"""Generator, recursively yields the node and its children."""
yield node
nodes_coll = self.db['nodes']
for child in nodes_coll.find({'parent': node['_id']}):
# "yield from self.children(child)" was introduced in Python 3.3
for grandchild in self._children(child):
yield grandchild

View File

@@ -169,6 +169,15 @@ class GoogleCloudStorageBucket(object):
blob.content_disposition = u'attachment; filename="{0}"'.format(name)
blob.patch()
def copy_blob(self, blob, to_bucket):
"""Copies the given blob from this bucket to the other bucket.
Returns the new blob.
"""
assert isinstance(to_bucket, GoogleCloudStorageBucket)
return self.bucket.copy_blob(blob, to_bucket.bucket)
def update_file_name(node):
"""Assign to the CGS blob the same name of the asset node. This way when
@@ -222,3 +231,16 @@ def update_file_name(node):
if 'files' in node['properties']:
for file_props in node['properties']['files']:
_update_name(file_props['file'], file_props)
def copy_to_bucket(file_path, src_project_id, dest_project_id):
"""Copies a file from one bucket to the other."""
log.info('Copying %s from project bucket %s to %s',
file_path, src_project_id, dest_project_id)
src_storage = GoogleCloudStorageBucket(str(src_project_id))
dest_storage = GoogleCloudStorageBucket(str(dest_project_id))
blob = src_storage.Get(file_path, to_dict=False)
src_storage.copy_blob(blob, dest_storage)