CLI command for moving all nodes+files to another project

`manage.py operations merge_project src_url dst_url` moves all nodes and
files from the project with `src_url` to the project with `dst_url`.
This also moves soft-deleted files/nodes, as it ignores the _deleted
field. The actual files on the storage backend are copied rather than
moved.

Note that this may invalidate the nodes, as their node type definition
may differ between projects. Since we use direct MongoDB queries the
nodes are moved to the new project anyway. This allows for a
move-first-then-fix approach).
This commit is contained in:
Sybren A. Stüvel 2018-02-13 15:52:21 +01:00
parent cd42ce6cba
commit de801e41e3
3 changed files with 158 additions and 1 deletions

View File

@ -0,0 +1,44 @@
"""Code for merging projects."""
import logging
from bson import ObjectId
from pillar import current_app
from pillar.api.file_storage.moving import move_to_bucket
from pillar.api.utils import random_etag, utcnow
log = logging.getLogger(__name__)
def merge_project(pid_from: ObjectId, pid_to: ObjectId):
"""Move nodes and files from one project to another.
Note that this may invalidate the nodes, as their node type definition
may differ between projects.
"""
log.info('Moving project contents from %s to %s', pid_from, pid_to)
assert isinstance(pid_from, ObjectId)
assert isinstance(pid_to, ObjectId)
files_coll = current_app.db('files')
nodes_coll = current_app.db('nodes')
# Move the files first. Since this requires API calls to an external
# service, this is more likely to go wrong than moving the nodes.
to_move = files_coll.find({'project': pid_from}, projection={'_id': 1})
log.info('Moving %d files to project %s', to_move.count(), pid_to)
for file_doc in to_move:
fid = file_doc['_id']
log.debug('moving file %s to project %s', fid, pid_to)
move_to_bucket(fid, pid_to)
# Mass-move the nodes.
etag = random_etag()
result = nodes_coll.update_many(
{'project': pid_from},
{'$set': {'project': pid_to,
'_etag': etag,
'_updated': utcnow(),
}}
)
log.info('Moved %d nodes to project %s', result.modified_count, pid_to)

View File

@ -122,6 +122,44 @@ def move_group_node_project(node_uuid, dest_proj_url, force=False, skip_gcs=Fals
log.info('Done moving.')
@manager_operations.command
def merge_project(src_proj_url, dest_proj_url):
"""Move all nodes and files from one project to the other."""
from pillar.api.projects import merging
logging.getLogger('pillar').setLevel(logging.INFO)
# Parse CLI args and get source and destination projects.
projs_coll = current_app.db('projects')
src_proj = projs_coll.find_one({'url': src_proj_url}, projection={'_id': 1})
dest_proj = projs_coll.find_one({'url': dest_proj_url}, projection={'_id': 1})
if src_proj is None:
log.fatal("Source project url='%s' doesn't exist.", src_proj_url)
return 1
if dest_proj is None:
log.fatal("Destination project url='%s' doesn't exist.", dest_proj_url)
return 2
dpid = dest_proj['_id']
spid = src_proj['_id']
if spid == dpid:
log.fatal("Source and destination projects are the same!")
return 3
print()
try:
input(f'Press ENTER to start moving ALL NODES AND FILES from {spid} to {dpid}')
except KeyboardInterrupt:
print()
print('Aborted')
return 4
print()
merging.merge_project(spid, dpid)
log.info('Done moving.')
@manager_operations.command
def index_users_rebuild():
"""Clear users index, update settings and reindex all users."""
@ -160,7 +198,7 @@ def index_users_rebuild():
try:
future.result()
except Exception:
log.exception('Error updating user %i/%i %s', idx+1, user_count, user_ident)
log.exception('Error updating user %i/%i %s', idx + 1, user_count, user_ident)
else:
log.info('Updated user %i/%i %s', idx + 1, user_count, user_ident)

View File

@ -0,0 +1,75 @@
import pathlib
from unittest import mock
from pillar.tests import AbstractPillarTest
from bson import ObjectId
class ProjectMergerTest(AbstractPillarTest):
def setUp(self, **kwargs):
super().setUp(**kwargs)
self.pid_from, self.uid_from = self.create_project_with_admin(
24 * 'a', project_overrides={'url': 'from-url'})
self.pid_to, self.uid_to = self.create_project_with_admin(
24 * 'b', project_overrides={'url': 'to-url'})
self.create_valid_auth_token(24 * 'a', 'from-token')
self.create_valid_auth_token(24 * 'b', 'to-token')
def test_move_happy(self):
import pillar.tests.common_test_data as ctd
from pillar.api.file_storage_backends.local import LocalBucket
fid = self._create_file_with_files()
nid = self.create_node({
**ctd.EXAMPLE_NODE,
'picture': fid,
'properties': {'file': fid},
'project': self.pid_from,
})
from pillar.api.projects import merging
with self.app.app_context():
merging.merge_project(self.pid_from, self.pid_to)
db_file = self.get(f'/api/files/{fid}').json()
db_node = self.get(f'/api/nodes/{nid}').json()
self.assertEqual(db_file['project'], str(self.pid_to))
self.assertEqual(db_node['project'], str(self.pid_to))
# Check the old and new locations of the files
with self.app.app_context():
self._assert_files_exist(LocalBucket(self.pid_to), db_file)
self._assert_files_exist(LocalBucket(self.pid_from), db_file)
def _assert_files_exist(self, bucket, db_file):
for var in [db_file] + db_file['variations']:
fname = var['file_path']
blob = bucket.blob(fname)
self.assertTrue(blob.exists(),
f'blob for file {fname} does not exist in bucket {bucket}')
def _create_file_with_files(self):
import io
from pillar.api.file_storage_backends.local import LocalBucket
fid, db_file = self.ensure_file_exists({
'_id': ObjectId(f'ffff{20 * "a"}'),
'project': self.pid_from,
'backend': 'local',
})
# Make sure the files on the filesystem exist.
with self.app.app_context():
bucket = LocalBucket(db_file['project'])
for var in [db_file] + db_file['variations']:
fname = var['file_path']
contents = io.BytesIO(fname.encode())
blob = bucket.blob(fname)
blob.create_from_file(contents, content_type='text/plain')
return fid