Introducing file variations

We are ditching the excessively normalised data structure for files.
Now file variations are embedded in the original file document for
improved performance.
This commit is contained in:
Francesco Siddi 2015-11-25 16:16:09 +01:00
parent ff37eeb32a
commit 559589f927
4 changed files with 130 additions and 29 deletions

View File

@ -364,10 +364,14 @@ def parse_attachments(response):
slug_tag = "[{0}]".format(slug) slug_tag = "[{0}]".format(slug)
f = files_collection.find_one({'_id': f['file']}) f = files_collection.find_one({'_id': f['file']})
size = f['size'] if 'size' in f else 'l' size = f['size'] if 'size' in f else 'l'
p = files_collection.find_one({'parent': f['_id'], 'size': size}) # Get the correc variation from the file
l = generate_link(p['backend'], p['file_path'], str(p['project'])) thumbnail = next((item for item in f['variations'] if
item['size'] == size), None)
l = generate_link(f['backend'], thumbnail['file_path'], str(f['project']))
# Build Markdown img string # Build Markdown img string
l = '![{0}]({1} "{2}")'.format(slug, l, f['name']) l = '![{0}]({1} "{2}")'.format(slug, l, f['name'])
# Parse the content of the file and replace the attachment
# tag with the actual image link
field_content = field_content.replace(slug_tag, l) field_content = field_content.replace(slug_tag, l)
response[field_name] = field_content response[field_name] = field_content
@ -424,7 +428,12 @@ def generate_link(backend, file_path, project_id=None):
def before_returning_file(response): def before_returning_file(response):
# TODO: add project id to all files # TODO: add project id to all files
project_id = None if 'project' not in response else str(response['project']) project_id = None if 'project' not in response else str(response['project'])
response['link'] = generate_link(response['backend'], response['file_path'], project_id) response['link'] = generate_link(
response['backend'], response['file_path'], project_id)
if 'variations' in response:
for variation in response['variations']:
variation['link'] = generate_link(
response['backend'], variation['file_path'], project_id)
def before_returning_files(response): def before_returning_files(response):
for item in response['_items']: for item in response['_items']:

View File

@ -7,6 +7,7 @@ from flask import Blueprint
from flask import abort from flask import abort
from flask import jsonify from flask import jsonify
from flask import send_from_directory from flask import send_from_directory
from eve.methods.put import put_internal
from application import app from application import app
from application import post_item from application import post_item
from application.utils.imaging import generate_local_thumbnails from application.utils.imaging import generate_local_thumbnails
@ -48,6 +49,11 @@ def browse_gcs(bucket_name, subdir, file_path=None):
#@file_storage.route('/build_thumbnails/<path:file_path>') #@file_storage.route('/build_thumbnails/<path:file_path>')
def build_thumbnails(file_path=None, file_id=None): def build_thumbnails(file_path=None, file_id=None):
"""Given a file path or file ObjectId pointing to an image file, fetch it
and generate a set of predefined variations (using generate_local_thumbnails).
Return a list of dictionaries containing the various image properties and
variation properties.
"""
files_collection = app.data.driver.db['files'] files_collection = app.data.driver.db['files']
if file_path: if file_path:
# Search file with backend "pillar" and path=file_path # Search file with backend "pillar" and path=file_path
@ -58,8 +64,6 @@ def build_thumbnails(file_path=None, file_id=None):
file_ = files_collection.find_one({"_id": ObjectId(file_id)}) file_ = files_collection.find_one({"_id": ObjectId(file_id)})
file_path = file_['name'] file_path = file_['name']
user = file_['user']
file_full_path = os.path.join(app.config['SHARED_DIR'], file_path[:2], file_path) file_full_path = os.path.join(app.config['SHARED_DIR'], file_path[:2], file_path)
# Does the original file exist? # Does the original file exist?
if not os.path.isfile(file_full_path): if not os.path.isfile(file_full_path):
@ -68,17 +72,14 @@ def build_thumbnails(file_path=None, file_id=None):
thumbnails = generate_local_thumbnails(file_full_path, thumbnails = generate_local_thumbnails(file_full_path,
return_image_stats=True) return_image_stats=True)
file_variations = []
for size, thumbnail in thumbnails.iteritems(): for size, thumbnail in thumbnails.iteritems():
if thumbnail.get('exists'): if thumbnail.get('exists'):
# If a thumbnail was already made, we just continue # If a thumbnail was already made, we just continue
continue continue
basename = os.path.basename(thumbnail['file_path']) basename = os.path.basename(thumbnail['file_path'])
root, ext = os.path.splitext(basename) root, ext = os.path.splitext(basename)
file_object = dict( file_variation = dict(
name=root,
#description="Preview of file {0}".format(file_['name']),
user=user,
parent=file_['_id'],
size=size, size=size,
format=ext[1:], format=ext[1:],
width=thumbnail['width'], width=thumbnail['width'],
@ -86,16 +87,12 @@ def build_thumbnails(file_path=None, file_id=None):
content_type=thumbnail['content_type'], content_type=thumbnail['content_type'],
length=thumbnail['length'], length=thumbnail['length'],
md5=thumbnail['md5'], md5=thumbnail['md5'],
filename=basename,
backend=file_['backend'],
file_path=basename, file_path=basename,
project=file_['project']) )
# Commit to database
r = post_item('files', file_object)
if r[0]['_status'] == 'ERR':
return "", r[3] # The error code from the request
return "", 200 file_variations.append(file_variation)
return file_variations
@file_storage.route('/file', methods=['POST']) @file_storage.route('/file', methods=['POST'])
@ -120,14 +117,16 @@ def index(file_name=None):
def process_file(src_file): def process_file(src_file):
"""Process the file """Process the file
""" """
file_id = src_file['_id']
# Remove properties that do not belong in the collection
internal_fields = ['_id', '_etag', '_updated', '_created', '_status']
for field in internal_fields:
src_file.pop(field, None)
files_collection = app.data.driver.db['files'] files_collection = app.data.driver.db['files']
file_abs_path = os.path.join(app.config['SHARED_DIR'], src_file['name'][:2], src_file['name']) file_abs_path = os.path.join(app.config['SHARED_DIR'], src_file['name'][:2], src_file['name'])
src_file['length'] = os.stat(file_abs_path).st_size src_file['length'] = os.stat(file_abs_path).st_size
# Remove properties that do not belong in the collection
src_file.pop('_status', None)
src_file.pop('_links', None)
content_type = src_file['content_type'].split('/') content_type = src_file['content_type'].split('/')
src_file['format'] = content_type[1] src_file['format'] = content_type[1]
mime_type = content_type[0] mime_type = content_type[0]
@ -140,8 +139,7 @@ def process_file(src_file):
src_file['width'] = res[0] src_file['width'] = res[0]
src_file['height'] = res[1] src_file['height'] = res[1]
# Generate previews # Generate previews
src_file['variations'] = build_thumbnails(file_id=file_id)
build_thumbnails(file_id=src_file['_id'])
elif mime_type == 'video': elif mime_type == 'video':
pass pass
# Generate variations # Generate variations
@ -172,7 +170,7 @@ def process_file(src_file):
#description="Preview of file {0}".format(file_['name']), #description="Preview of file {0}".format(file_['name']),
project=src_file['project'], project=src_file['project'],
user=src_file['user'], user=src_file['user'],
parent=src_file['_id'], parent=file_id,
size="{0}p".format(res_y), size="{0}p".format(res_y),
duration=video_duration, duration=video_duration,
format=v, format=v,
@ -220,12 +218,9 @@ def process_file(src_file):
push_to_storage(str(src_file['project']), sync_path) push_to_storage(str(src_file['project']), sync_path)
else: else:
sync_path = file_abs_path sync_path = file_abs_path
#remote_storage_sync(sync_path)
# Update the original file with additional info, e.g. image resolution # Update the original file with additional info, e.g. image resolution
file_asset = files_collection.find_and_modify( r = put_internal('files', src_file, **{'_id': ObjectId(file_id)})
{'_id': src_file['_id']},
src_file)
def delete_file(file_item): def delete_file(file_item):

View File

@ -241,5 +241,64 @@ def add_parent_to_nodes():
print "Edited {0} nodes".format(nodes_index) print "Edited {0} nodes".format(nodes_index)
print "Orphan {0} nodes".format(nodes_orphan) print "Orphan {0} nodes".format(nodes_orphan)
@manager.command
def embed_children_in_files():
"""Embed children file objects in to their parent"""
files_collection = app.data.driver.db['files']
for f in files_collection.find():
# Give some feedback
print "processing {0}".format(f['_id'])
# Proceed only if the node is a child
file_id = f['_id']
if 'parent' in f:
# Get the parent node
parent = files_collection.find_one({'_id': f['parent']})
parent_id = parent['_id']
if not parent:
print "No parent found for {0}".format(file_id)
files_collection.remove({'_id': file_id})
continue
# Prepare to loop through the properties required for a variation
properties = ['content_type', 'duration', 'size', 'format', 'width',
'height', 'length', 'md5', 'file_path']
variation = {}
# Build dict with variation properties
for p in properties:
if p in f:
variation[p] = f[p]
# the variation was generated
if variation:
# If the parent file does not have a variation property
if 'variations' not in parent:
parent['variations'] = []
# Append the variation to the variations
parent['variations'].append(variation)
# Removed internal fields that would cause validation error
internal_fields = ['_id', '_etag', '_updated', '_created']
for field in internal_fields:
parent.pop(field, None)
p = put_internal('files', parent, **{'_id': parent_id})
if p[0]['_status'] == 'ERR':
print p[0]['_issues']
print "PARENT: {0}".format(parent)
print "VARIATION: {0}".format(variation)
return
@manager.command
def remove_children_files():
"""Remove any file object with a parent field"""
files_collection = app.data.driver.db['files']
for f in files_collection.find():
if 'parent' in f:
file_id = f['_id']
# Delete child object
files_collection.remove({'_id': file_id})
print "deleted {0}".format(file_id)
if __name__ == '__main__': if __name__ == '__main__':
manager.run() manager.run()

View File

@ -417,6 +417,44 @@ files_schema = {
'embeddable': True 'embeddable': True
}, },
}, },
'variations': { # File variations (used to be children, see above)
'type': 'list',
'schema': {
'type': 'dict',
'schema': {
'content_type': { # MIME type image/png video/mp4
'type': 'string',
'required': True,
},
'duration': {
'type': 'integer',
},
'size': { # xs, s, b, 720p, 2K
'type': 'string'
},
'format': { # human readable format, like mp4, HLS, webm, mov
'type': 'string'
},
'width': { # valid for images and video content_type
'type': 'integer'
},
'height': {
'type': 'integer'
},
'length': { # Size in bytes
'type': 'integer',
'required': True,
},
'md5': {
'type': 'string',
'required': True,
},
'file_path': {
'type': 'string',
},
}
}
},
'previews': { # Deprecated (see comments above) 'previews': { # Deprecated (see comments above)
'type': 'list', 'type': 'list',
'schema': { 'schema': {