Added manage.py command to refresh (almost) expired links.

manage.py refresh_project_links  5672beecc0261b2005ed1a33 -c 4

This refreshes the oldest 4 links of that project.
This commit is contained in:
2016-04-01 13:03:27 +02:00
parent c13fca4f30
commit ce242e1de3
6 changed files with 156 additions and 9 deletions

View File

@@ -2,9 +2,11 @@ import datetime
import logging import logging
import os import os
from multiprocessing import Process from multiprocessing import Process
from hashlib import md5
import bson.tz_util import bson.tz_util
import eve.utils import eve.utils
import pymongo
from bson import ObjectId from bson import ObjectId
from eve.methods.patch import patch_internal from eve.methods.patch import patch_internal
from eve.methods.put import put_internal from eve.methods.put import put_internal
@@ -320,6 +322,8 @@ def generate_link(backend, file_path, project_id=None, is_public=False):
_scheme=app.config['SCHEME']) _scheme=app.config['SCHEME'])
elif backend == 'cdnsun': elif backend == 'cdnsun':
link = hash_file_path(file_path, None) link = hash_file_path(file_path, None)
elif backend == 'unittest':
link = md5(file_path).hexdigest()
else: else:
link = None link = None
return link return link
@@ -423,6 +427,33 @@ def on_pre_get_files(_, lookup):
_generate_all_links(file_doc, now) _generate_all_links(file_doc, now)
def refresh_links_for_project(project_uuid, chunk_size, expiry_seconds):
from application import app
log.info('Refreshing the first %i links for project %s', chunk_size, project_uuid)
# Retrieve expired links.
files_collection = app.data.driver.db['files']
now = datetime.datetime.now(tz=bson.tz_util.utc)
expire_before = now + datetime.timedelta(seconds=expiry_seconds)
log.info('Limiting to links that expire before %s', expire_before)
to_refresh = files_collection.find(
{'project': ObjectId(project_uuid),
'link_expires': {'$lt': expire_before},
}).sort([('link_expires', pymongo.ASCENDING)]).limit(chunk_size)
if to_refresh.count() == 0:
log.info('No links to refresh.')
return
for file_doc in to_refresh:
log.debug('Refreshing links for file %s', file_doc['_id'])
_generate_all_links(file_doc, now)
log.info('Refreshed %i links', min(chunk_size, to_refresh.count()))
def setup_app(app, url_prefix): def setup_app(app, url_prefix):
app.on_pre_GET_files += on_pre_get_files app.on_pre_GET_files += on_pre_get_files
app.on_post_POST_files += post_POST_files app.on_post_POST_files += post_POST_files

View File

@@ -643,5 +643,15 @@ def subscribe_node_owners():
after_inserting_nodes([n]) after_inserting_nodes([n])
@manager.command
def refresh_project_links(project, chunk_size=50):
"""Regenerates almost-expired file links for a certain project."""
chunk_size = int(chunk_size) # CLI parameters are passed as strings
from application.modules import file_storage
file_storage.refresh_links_for_project(project, chunk_size, 2 * 3600)
if __name__ == '__main__': if __name__ == '__main__':
manager.run() manager.run()

View File

@@ -406,7 +406,7 @@ files_schema = {
'backend': { 'backend': {
'type': 'string', 'type': 'string',
'required': True, 'required': True,
'allowed': ["attract-web", "pillar", "cdnsun", "gcs"] 'allowed': ["attract-web", "pillar", "cdnsun", "gcs", "unittest"]
}, },
'file_path': { 'file_path': {
'type': 'string', 'type': 'string',

View File

@@ -4,6 +4,7 @@ import sys
import logging import logging
import os import os
from bson import ObjectId
from eve.tests import TestMinimal from eve.tests import TestMinimal
import pymongo.collection import pymongo.collection
from flask.testing import FlaskClient from flask.testing import FlaskClient
@@ -48,19 +49,22 @@ class AbstractPillarTest(TestMinimal):
del sys.modules['application'] del sys.modules['application']
def ensure_file_exists(self, file_overrides=None): def ensure_file_exists(self, file_overrides=None):
self.ensure_project_exists()
with self.app.test_request_context(): with self.app.test_request_context():
files_collection = self.app.data.driver.db['files'] files_collection = self.app.data.driver.db['files']
projects_collection = self.app.data.driver.db['projects']
assert isinstance(files_collection, pymongo.collection.Collection) assert isinstance(files_collection, pymongo.collection.Collection)
file = copy.deepcopy(EXAMPLE_FILE) file = copy.deepcopy(EXAMPLE_FILE)
if file_overrides is not None: if file_overrides is not None:
file.update(file_overrides) file.update(file_overrides)
projects_collection.insert_one(EXAMPLE_PROJECT)
result = files_collection.insert_one(file) result = files_collection.insert_one(file)
file_id = result.inserted_id file_id = result.inserted_id
return file_id, file
# Re-fetch from the database, so that we're sure we return the same as is stored.
# This is necessary as datetimes are rounded by MongoDB.
from_db = files_collection.find_one(file_id)
return file_id, from_db
def ensure_project_exists(self, project_overrides=None): def ensure_project_exists(self, project_overrides=None):
with self.app.test_request_context(): with self.app.test_request_context():
@@ -71,9 +75,12 @@ class AbstractPillarTest(TestMinimal):
if project_overrides is not None: if project_overrides is not None:
project.update(project_overrides) project.update(project_overrides)
result = projects_collection.insert_one(project) found = projects_collection.find_one(project['_id'])
project_id = result.inserted_id if found is None:
return project_id, project result = projects_collection.insert_one(project)
return result.inserted_id, project
return found['_id'], found
def htp_blenderid_validate_unhappy(self): def htp_blenderid_validate_unhappy(self):
"""Sets up HTTPretty to mock unhappy validation flow.""" """Sets up HTTPretty to mock unhappy validation flow."""

View File

@@ -2,6 +2,7 @@ import datetime
from bson import tz_util, ObjectId from bson import tz_util, ObjectId
EXAMPLE_PROJECT_ID = ObjectId('5672beecc0261b2005ed1a33')
EXAMPLE_FILE = {u'_id': ObjectId('5672e2c1c379cf0007b31995'), EXAMPLE_FILE = {u'_id': ObjectId('5672e2c1c379cf0007b31995'),
u'_updated': datetime.datetime(2016, 3, 25, 10, 28, 24, tzinfo=tz_util.utc), u'_updated': datetime.datetime(2016, 3, 25, 10, 28, 24, tzinfo=tz_util.utc),
@@ -17,7 +18,7 @@ EXAMPLE_FILE = {u'_id': ObjectId('5672e2c1c379cf0007b31995'),
u'md5': '--', u'file_path': 'c2a5c897769ce1ef0eb10f8fa1c472bcb8e2d5a4-h.jpg', u'md5': '--', u'file_path': 'c2a5c897769ce1ef0eb10f8fa1c472bcb8e2d5a4-h.jpg',
u'size': 'h'}, ], u'size': 'h'}, ],
u'filename': 'brick_dutch_soft_bump.png', u'filename': 'brick_dutch_soft_bump.png',
u'project': ObjectId('5672beecc0261b2005ed1a33'), u'project': EXAMPLE_PROJECT_ID,
u'width': 2048, u'length': 6227670, u'user': ObjectId('56264fc4fa3a250344bd10c5'), u'width': 2048, u'length': 6227670, u'user': ObjectId('56264fc4fa3a250344bd10c5'),
u'content_type': 'image/png', u'_etag': '044ce3aede2e123e261c0d8bd77212f264d4f7b0', u'content_type': 'image/png', u'_etag': '044ce3aede2e123e261c0d8bd77212f264d4f7b0',
u'_created': datetime.datetime(2015, 12, 17, 16, 28, 49, tzinfo=tz_util.utc), u'_created': datetime.datetime(2015, 12, 17, 16, 28, 49, tzinfo=tz_util.utc),
@@ -30,7 +31,7 @@ EXAMPLE_FILE = {u'_id': ObjectId('5672e2c1c379cf0007b31995'),
EXAMPLE_PROJECT = { EXAMPLE_PROJECT = {
u'_created': datetime.datetime(2015, 12, 17, 13, 22, 56, tzinfo=tz_util.utc), u'_created': datetime.datetime(2015, 12, 17, 13, 22, 56, tzinfo=tz_util.utc),
u'_etag': u'cc4643e98d3606f87bbfaaa200bfbae941b642f3', u'_etag': u'cc4643e98d3606f87bbfaaa200bfbae941b642f3',
u'_id': ObjectId('5672beecc0261b2005ed1a33'), u'_id': EXAMPLE_PROJECT_ID,
u'_updated': datetime.datetime(2016, 1, 7, 18, 59, 4, tzinfo=tz_util.utc), u'_updated': datetime.datetime(2016, 1, 7, 18, 59, 4, tzinfo=tz_util.utc),
u'category': u'assets', u'category': u'assets',
u'description': u'Welcome to this curated collection of Blender Institute textures and image resources. This collection is an on-going project, as with each project we create a number of textures based on our own resources (photographs, scans, etc.) or made completely from scratch. At the moment you can find all the textures from the past Open Projects that were deemed re-usable. \r\n\r\nPeople who have contributed to these textures:\r\n\r\nAndrea Weikert, Andy Goralczyk, Basse Salmela, Ben Dansie, Campbell Barton, Enrico Valenza, Ian Hubert, Kjartan Tysdal, Manu J\xe4rvinen, Massimiliana Pulieso, Matt Ebb, Pablo Vazquez, Rob Tuytel, Roland Hess, Sarah Feldlaufer, S\xf6nke M\xe4ter', u'description': u'Welcome to this curated collection of Blender Institute textures and image resources. This collection is an on-going project, as with each project we create a number of textures based on our own resources (photographs, scans, etc.) or made completely from scratch. At the moment you can find all the textures from the past Open Projects that were deemed re-usable. \r\n\r\nPeople who have contributed to these textures:\r\n\r\nAndrea Weikert, Andy Goralczyk, Basse Salmela, Ben Dansie, Campbell Barton, Enrico Valenza, Ian Hubert, Kjartan Tysdal, Manu J\xe4rvinen, Massimiliana Pulieso, Matt Ebb, Pablo Vazquez, Rob Tuytel, Roland Hess, Sarah Feldlaufer, S\xf6nke M\xe4ter',

View File

@@ -0,0 +1,98 @@
"""Tests chunked refreshing of links."""
import copy
from bson import ObjectId, tz_util
from common_test_class import AbstractPillarTest
from common_test_data import EXAMPLE_FILE
from datetime import datetime, timedelta
class LinkRefreshTest(AbstractPillarTest):
# noinspection PyMethodOverriding
def ensure_file_exists(self, file_overrides):
file_id = file_overrides[u'_id']
file_overrides.update({
u'_id': ObjectId(file_id),
u'name': '%s.png' % file_id,
u'file_path': '%s.png' % file_id,
u'backend': 'unittest',
})
return super(LinkRefreshTest, self).ensure_file_exists(file_overrides)
def setUp(self, **kwargs):
super(LinkRefreshTest, self).setUp(**kwargs)
self.project_id, self.project = self.ensure_project_exists()
self.now = datetime.now(tz=tz_util.utc)
# All expired
expiry = [datetime(2016, 3, 22, 9, 28, 1, tzinfo=tz_util.utc),
datetime(2016, 3, 22, 9, 28, 2, tzinfo=tz_util.utc),
datetime(2016, 3, 22, 9, 28, 3, tzinfo=tz_util.utc),
self.now + timedelta(minutes=30), self.now + timedelta(minutes=90), ]
ids_and_files = [self.ensure_file_exists(file_overrides={
u'_id': 'cafef00ddeadbeef0000000%i' % file_idx,
u'link_expires': expiry})
for file_idx, expiry in enumerate(expiry)]
self.file_id, self.file = zip(*ids_and_files)
self.file = list(self.file) # otherwise it's a tuple, which is immutable.
# Get initial expiries from the database (it has a different precision than datetime).
self.expiry = [file_doc['link_expires'] for file_doc in self.file]
# Should be ordered by link expiry
assert self.file[0]['link_expires'] < self.file[1]['link_expires']
assert self.file[1]['link_expires'] < self.file[2]['link_expires']
assert self.file[2]['link_expires'] < self.file[3]['link_expires']
assert self.file[3]['link_expires'] < self.file[4]['link_expires']
# Files 0-2 should be expired already
assert self.file[2]['link_expires'] < self.now
# Files 3-4 should not be expired yet
assert self.now < self.file[3]['link_expires']
def _reload_from_db(self):
files_collection = self.app.data.driver.db['files']
for idx, file_id in enumerate(self.file_id):
self.file[idx] = files_collection.find_one(file_id)
def test_link_refresh(self):
hour_from_now = 3600
validity_seconds = self.app.config['FILE_LINK_VALIDITY']['unittest']
refreshed_lower_limit = self.now + timedelta(seconds=0.9 * validity_seconds)
with self.app.test_request_context():
from application.modules import file_storage
# First run: refresh files 0 and 1, don't touch 2-4 (due to chunking).
file_storage.refresh_links_for_project(self.project_id, 2, hour_from_now)
self._reload_from_db()
self.assertLess(refreshed_lower_limit, self.file[0]['link_expires'])
self.assertLess(refreshed_lower_limit, self.file[1]['link_expires'])
self.assertEqual(self.expiry[2], self.file[2]['link_expires'])
self.assertEqual(self.expiry[3], self.file[3]['link_expires'])
self.assertEqual(self.expiry[4], self.file[4]['link_expires'])
# Second run: refresh files 2 (expired) and 3 (within timedelta).
file_storage.refresh_links_for_project(self.project_id, 2, hour_from_now)
self._reload_from_db()
self.assertLess(refreshed_lower_limit, self.file[0]['link_expires'])
self.assertLess(refreshed_lower_limit, self.file[1]['link_expires'])
self.assertLess(refreshed_lower_limit, self.file[2]['link_expires'])
self.assertLess(refreshed_lower_limit, self.file[3]['link_expires'])
self.assertEqual(self.expiry[4], self.file[4]['link_expires'])
# Third run: refresh nothing, file 4 is out of timedelta.
file_storage.refresh_links_for_project(self.project_id, 2, hour_from_now)
self._reload_from_db()
self.assertLess(refreshed_lower_limit, self.file[0]['link_expires'])
self.assertLess(refreshed_lower_limit, self.file[1]['link_expires'])
self.assertLess(refreshed_lower_limit, self.file[2]['link_expires'])
self.assertLess(refreshed_lower_limit, self.file[3]['link_expires'])
self.assertEqual(self.expiry[4], self.file[4]['link_expires'])