pillar/tests/test_orphan_files.py
Sybren A. Stüvel b1d69b2304 Added orphan file finder. Works per project or pass 'all' for all projects.
This is quite a heavy thing to run, since it goes over all files of a
project, and then goes over every document in (almost) every collection
which has a property 'project' that's set to the project ID. It then goes
over every document to find all ObjectIDs and removes those from the set
of file ObjectIDs for that project. The remaining ObjectIDs are considered
orphans.

This is a very thorough search, but it doesn't require any knowledge of
the document and collection structure, so it should be future-proof.
2017-09-13 14:05:28 +02:00

97 lines
4.0 KiB
Python

import collections
import datetime
from bson import ObjectId, tz_util
from pymongo.results import UpdateResult
from pillar.tests import AbstractPillarTest
class OrphanFilesTest(AbstractPillarTest):
def test_find_orphan_files(self):
self.enter_app_context()
public1, _ = self.create_project_with_admin(
24 * 'a', project_overrides={'_id': ObjectId(), 'is_private': False})
public2, _ = self.create_project_with_admin(
24 * 'b', project_overrides={'_id': ObjectId(), 'is_private': False})
private1, _ = self.create_project_with_admin(
24 * 'c', project_overrides={'_id': ObjectId(), 'is_private': True})
private2, _ = self.create_project_with_admin(
24 * 'd', project_overrides={'_id': ObjectId(), 'is_private': None})
self.assertEqual(4, self.app.db('projects').count())
# Create files, some orphan and some used.
project_ids = (public1, public2, private1, private2)
file_ids = collections.defaultdict(list)
for pidx, pid in enumerate(project_ids):
for filenum in range(5):
generated_file_id = ObjectId(f'{pidx}{filenum}' + 22 * 'a')
file_id, _ = self.ensure_file_exists({
'_id': generated_file_id,
'project': pid,
'name': f'Test file p{pid} num {filenum}'
})
file_ids[pid].append(file_id)
proj_coll = self.app.db('projects')
for pid in project_ids:
fids = file_ids[pid]
# Use fids[4] as project image
res: UpdateResult = proj_coll.update_one({'_id': pid},
{'$set': {'picture': fids[4]}})
self.assertEqual(1, res.matched_count)
self.assertEqual(1, res.modified_count)
# Asset linking directly to fids[0]
self.create_node({
'_id': ObjectId(),
'project': pid,
'picture': ObjectId('572761f39837730efe8e1210'),
'description': '',
'node_type': 'asset',
'user': ObjectId(24 * 'a'),
'properties': {
'status': 'published',
'content_type': 'image',
'file': fids[0],
},
'name': 'Image direct link',
'_updated': datetime.datetime(2016, 5, 2, 14, 19, 58, 0, tzinfo=tz_util.utc),
'_created': datetime.datetime(2016, 5, 2, 14, 19, 37, 0, tzinfo=tz_util.utc),
'_etag': '6b8589b42c880e3626f43f3e82a5c5b946742687'
})
# Some other node type that has some random field pointing to fids[1].
self.create_node({
'_id': ObjectId(),
'project': pid,
'picture': ObjectId('572761f39837730efe8e1210'),
'description': '',
'node_type': 'totally-unknown',
'user': ObjectId(24 * 'a'),
'properties': {
'status': 'published',
'content_type': 'image',
'file': fids[0],
'random': {'field': [fids[1]]}
},
'name': 'Image random field',
'_updated': datetime.datetime(2016, 5, 2, 14, 19, 58, 0, tzinfo=tz_util.utc),
'_created': datetime.datetime(2016, 5, 2, 14, 19, 37, 0, tzinfo=tz_util.utc),
'_etag': '6b8589b42c880e3626f43f3e82a5c5b946742687'
})
# Completely unknown collection with document that points to fids[2]
unknown_coll = self.app.db('unknown')
unknown_coll.insert_one({
'project': pid,
'random': {'field': [fids[2]]}
})
# fids[3] is an orphan.
from pillar.cli.maintenance import _find_orphan_files
for pid in project_ids:
orphans = _find_orphan_files(pid)
self.assertEqual({file_ids[pid][3]}, orphans)