From de801e41e37cb2d506c74318d902c735dea6c16d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sybren=20A=2E=20St=C3=BCvel?= Date: Tue, 13 Feb 2018 15:52:21 +0100 Subject: [PATCH] CLI command for moving all nodes+files to another project `manage.py operations merge_project src_url dst_url` moves all nodes and files from the project with `src_url` to the project with `dst_url`. This also moves soft-deleted files/nodes, as it ignores the _deleted field. The actual files on the storage backend are copied rather than moved. Note that this may invalidate the nodes, as their node type definition may differ between projects. Since we use direct MongoDB queries the nodes are moved to the new project anyway. This allows for a move-first-then-fix approach). --- pillar/api/projects/merging.py | 44 +++++++++++++++ pillar/cli/operations.py | 40 +++++++++++++- tests/test_api/test_project_merging.py | 75 ++++++++++++++++++++++++++ 3 files changed, 158 insertions(+), 1 deletion(-) create mode 100644 pillar/api/projects/merging.py create mode 100644 tests/test_api/test_project_merging.py diff --git a/pillar/api/projects/merging.py b/pillar/api/projects/merging.py new file mode 100644 index 00000000..46e37b37 --- /dev/null +++ b/pillar/api/projects/merging.py @@ -0,0 +1,44 @@ +"""Code for merging projects.""" +import logging + +from bson import ObjectId + +from pillar import current_app +from pillar.api.file_storage.moving import move_to_bucket +from pillar.api.utils import random_etag, utcnow + +log = logging.getLogger(__name__) + + +def merge_project(pid_from: ObjectId, pid_to: ObjectId): + """Move nodes and files from one project to another. + + Note that this may invalidate the nodes, as their node type definition + may differ between projects. + """ + log.info('Moving project contents from %s to %s', pid_from, pid_to) + assert isinstance(pid_from, ObjectId) + assert isinstance(pid_to, ObjectId) + + files_coll = current_app.db('files') + nodes_coll = current_app.db('nodes') + + # Move the files first. Since this requires API calls to an external + # service, this is more likely to go wrong than moving the nodes. + to_move = files_coll.find({'project': pid_from}, projection={'_id': 1}) + log.info('Moving %d files to project %s', to_move.count(), pid_to) + for file_doc in to_move: + fid = file_doc['_id'] + log.debug('moving file %s to project %s', fid, pid_to) + move_to_bucket(fid, pid_to) + + # Mass-move the nodes. + etag = random_etag() + result = nodes_coll.update_many( + {'project': pid_from}, + {'$set': {'project': pid_to, + '_etag': etag, + '_updated': utcnow(), + }} + ) + log.info('Moved %d nodes to project %s', result.modified_count, pid_to) diff --git a/pillar/cli/operations.py b/pillar/cli/operations.py index 3112b696..a8f457cf 100644 --- a/pillar/cli/operations.py +++ b/pillar/cli/operations.py @@ -122,6 +122,44 @@ def move_group_node_project(node_uuid, dest_proj_url, force=False, skip_gcs=Fals log.info('Done moving.') +@manager_operations.command +def merge_project(src_proj_url, dest_proj_url): + """Move all nodes and files from one project to the other.""" + + from pillar.api.projects import merging + + logging.getLogger('pillar').setLevel(logging.INFO) + + # Parse CLI args and get source and destination projects. + projs_coll = current_app.db('projects') + src_proj = projs_coll.find_one({'url': src_proj_url}, projection={'_id': 1}) + dest_proj = projs_coll.find_one({'url': dest_proj_url}, projection={'_id': 1}) + + if src_proj is None: + log.fatal("Source project url='%s' doesn't exist.", src_proj_url) + return 1 + if dest_proj is None: + log.fatal("Destination project url='%s' doesn't exist.", dest_proj_url) + return 2 + dpid = dest_proj['_id'] + spid = src_proj['_id'] + if spid == dpid: + log.fatal("Source and destination projects are the same!") + return 3 + + print() + try: + input(f'Press ENTER to start moving ALL NODES AND FILES from {spid} to {dpid}') + except KeyboardInterrupt: + print() + print('Aborted') + return 4 + print() + + merging.merge_project(spid, dpid) + log.info('Done moving.') + + @manager_operations.command def index_users_rebuild(): """Clear users index, update settings and reindex all users.""" @@ -160,7 +198,7 @@ def index_users_rebuild(): try: future.result() except Exception: - log.exception('Error updating user %i/%i %s', idx+1, user_count, user_ident) + log.exception('Error updating user %i/%i %s', idx + 1, user_count, user_ident) else: log.info('Updated user %i/%i %s', idx + 1, user_count, user_ident) diff --git a/tests/test_api/test_project_merging.py b/tests/test_api/test_project_merging.py new file mode 100644 index 00000000..ff3ad058 --- /dev/null +++ b/tests/test_api/test_project_merging.py @@ -0,0 +1,75 @@ +import pathlib +from unittest import mock + +from pillar.tests import AbstractPillarTest + +from bson import ObjectId + + +class ProjectMergerTest(AbstractPillarTest): + def setUp(self, **kwargs): + super().setUp(**kwargs) + + self.pid_from, self.uid_from = self.create_project_with_admin( + 24 * 'a', project_overrides={'url': 'from-url'}) + self.pid_to, self.uid_to = self.create_project_with_admin( + 24 * 'b', project_overrides={'url': 'to-url'}) + self.create_valid_auth_token(24 * 'a', 'from-token') + self.create_valid_auth_token(24 * 'b', 'to-token') + + def test_move_happy(self): + import pillar.tests.common_test_data as ctd + from pillar.api.file_storage_backends.local import LocalBucket + + fid = self._create_file_with_files() + nid = self.create_node({ + **ctd.EXAMPLE_NODE, + 'picture': fid, + 'properties': {'file': fid}, + 'project': self.pid_from, + }) + + from pillar.api.projects import merging + + with self.app.app_context(): + merging.merge_project(self.pid_from, self.pid_to) + + db_file = self.get(f'/api/files/{fid}').json() + db_node = self.get(f'/api/nodes/{nid}').json() + + self.assertEqual(db_file['project'], str(self.pid_to)) + self.assertEqual(db_node['project'], str(self.pid_to)) + + # Check the old and new locations of the files + with self.app.app_context(): + self._assert_files_exist(LocalBucket(self.pid_to), db_file) + self._assert_files_exist(LocalBucket(self.pid_from), db_file) + + def _assert_files_exist(self, bucket, db_file): + for var in [db_file] + db_file['variations']: + fname = var['file_path'] + blob = bucket.blob(fname) + self.assertTrue(blob.exists(), + f'blob for file {fname} does not exist in bucket {bucket}') + + def _create_file_with_files(self): + import io + from pillar.api.file_storage_backends.local import LocalBucket + + fid, db_file = self.ensure_file_exists({ + '_id': ObjectId(f'ffff{20 * "a"}'), + 'project': self.pid_from, + 'backend': 'local', + }) + + # Make sure the files on the filesystem exist. + with self.app.app_context(): + bucket = LocalBucket(db_file['project']) + for var in [db_file] + db_file['variations']: + fname = var['file_path'] + + contents = io.BytesIO(fname.encode()) + blob = bucket.blob(fname) + blob.create_from_file(contents, content_type='text/plain') + + return fid