diff --git a/pillar/api/projects/merging.py b/pillar/api/projects/merging.py new file mode 100644 index 00000000..46e37b37 --- /dev/null +++ b/pillar/api/projects/merging.py @@ -0,0 +1,44 @@ +"""Code for merging projects.""" +import logging + +from bson import ObjectId + +from pillar import current_app +from pillar.api.file_storage.moving import move_to_bucket +from pillar.api.utils import random_etag, utcnow + +log = logging.getLogger(__name__) + + +def merge_project(pid_from: ObjectId, pid_to: ObjectId): + """Move nodes and files from one project to another. + + Note that this may invalidate the nodes, as their node type definition + may differ between projects. + """ + log.info('Moving project contents from %s to %s', pid_from, pid_to) + assert isinstance(pid_from, ObjectId) + assert isinstance(pid_to, ObjectId) + + files_coll = current_app.db('files') + nodes_coll = current_app.db('nodes') + + # Move the files first. Since this requires API calls to an external + # service, this is more likely to go wrong than moving the nodes. + to_move = files_coll.find({'project': pid_from}, projection={'_id': 1}) + log.info('Moving %d files to project %s', to_move.count(), pid_to) + for file_doc in to_move: + fid = file_doc['_id'] + log.debug('moving file %s to project %s', fid, pid_to) + move_to_bucket(fid, pid_to) + + # Mass-move the nodes. + etag = random_etag() + result = nodes_coll.update_many( + {'project': pid_from}, + {'$set': {'project': pid_to, + '_etag': etag, + '_updated': utcnow(), + }} + ) + log.info('Moved %d nodes to project %s', result.modified_count, pid_to) diff --git a/pillar/cli/operations.py b/pillar/cli/operations.py index 3112b696..a8f457cf 100644 --- a/pillar/cli/operations.py +++ b/pillar/cli/operations.py @@ -122,6 +122,44 @@ def move_group_node_project(node_uuid, dest_proj_url, force=False, skip_gcs=Fals log.info('Done moving.') +@manager_operations.command +def merge_project(src_proj_url, dest_proj_url): + """Move all nodes and files from one project to the other.""" + + from pillar.api.projects import merging + + logging.getLogger('pillar').setLevel(logging.INFO) + + # Parse CLI args and get source and destination projects. + projs_coll = current_app.db('projects') + src_proj = projs_coll.find_one({'url': src_proj_url}, projection={'_id': 1}) + dest_proj = projs_coll.find_one({'url': dest_proj_url}, projection={'_id': 1}) + + if src_proj is None: + log.fatal("Source project url='%s' doesn't exist.", src_proj_url) + return 1 + if dest_proj is None: + log.fatal("Destination project url='%s' doesn't exist.", dest_proj_url) + return 2 + dpid = dest_proj['_id'] + spid = src_proj['_id'] + if spid == dpid: + log.fatal("Source and destination projects are the same!") + return 3 + + print() + try: + input(f'Press ENTER to start moving ALL NODES AND FILES from {spid} to {dpid}') + except KeyboardInterrupt: + print() + print('Aborted') + return 4 + print() + + merging.merge_project(spid, dpid) + log.info('Done moving.') + + @manager_operations.command def index_users_rebuild(): """Clear users index, update settings and reindex all users.""" @@ -160,7 +198,7 @@ def index_users_rebuild(): try: future.result() except Exception: - log.exception('Error updating user %i/%i %s', idx+1, user_count, user_ident) + log.exception('Error updating user %i/%i %s', idx + 1, user_count, user_ident) else: log.info('Updated user %i/%i %s', idx + 1, user_count, user_ident) diff --git a/tests/test_api/test_project_merging.py b/tests/test_api/test_project_merging.py new file mode 100644 index 00000000..ff3ad058 --- /dev/null +++ b/tests/test_api/test_project_merging.py @@ -0,0 +1,75 @@ +import pathlib +from unittest import mock + +from pillar.tests import AbstractPillarTest + +from bson import ObjectId + + +class ProjectMergerTest(AbstractPillarTest): + def setUp(self, **kwargs): + super().setUp(**kwargs) + + self.pid_from, self.uid_from = self.create_project_with_admin( + 24 * 'a', project_overrides={'url': 'from-url'}) + self.pid_to, self.uid_to = self.create_project_with_admin( + 24 * 'b', project_overrides={'url': 'to-url'}) + self.create_valid_auth_token(24 * 'a', 'from-token') + self.create_valid_auth_token(24 * 'b', 'to-token') + + def test_move_happy(self): + import pillar.tests.common_test_data as ctd + from pillar.api.file_storage_backends.local import LocalBucket + + fid = self._create_file_with_files() + nid = self.create_node({ + **ctd.EXAMPLE_NODE, + 'picture': fid, + 'properties': {'file': fid}, + 'project': self.pid_from, + }) + + from pillar.api.projects import merging + + with self.app.app_context(): + merging.merge_project(self.pid_from, self.pid_to) + + db_file = self.get(f'/api/files/{fid}').json() + db_node = self.get(f'/api/nodes/{nid}').json() + + self.assertEqual(db_file['project'], str(self.pid_to)) + self.assertEqual(db_node['project'], str(self.pid_to)) + + # Check the old and new locations of the files + with self.app.app_context(): + self._assert_files_exist(LocalBucket(self.pid_to), db_file) + self._assert_files_exist(LocalBucket(self.pid_from), db_file) + + def _assert_files_exist(self, bucket, db_file): + for var in [db_file] + db_file['variations']: + fname = var['file_path'] + blob = bucket.blob(fname) + self.assertTrue(blob.exists(), + f'blob for file {fname} does not exist in bucket {bucket}') + + def _create_file_with_files(self): + import io + from pillar.api.file_storage_backends.local import LocalBucket + + fid, db_file = self.ensure_file_exists({ + '_id': ObjectId(f'ffff{20 * "a"}'), + 'project': self.pid_from, + 'backend': 'local', + }) + + # Make sure the files on the filesystem exist. + with self.app.app_context(): + bucket = LocalBucket(db_file['project']) + for var in [db_file] + db_file['variations']: + fname = var['file_path'] + + contents = io.BytesIO(fname.encode()) + blob = bucket.blob(fname) + blob.create_from_file(contents, content_type='text/plain') + + return fid