Introducing file variations

We are ditching the excessively normalised data structure for files.
Now file variations are embedded in the original file document for
improved performance.
This commit is contained in:
Francesco Siddi 2015-11-25 16:16:09 +01:00
parent ff37eeb32a
commit 559589f927
4 changed files with 130 additions and 29 deletions

View File

@ -364,10 +364,14 @@ def parse_attachments(response):
slug_tag = "[{0}]".format(slug)
f = files_collection.find_one({'_id': f['file']})
size = f['size'] if 'size' in f else 'l'
p = files_collection.find_one({'parent': f['_id'], 'size': size})
l = generate_link(p['backend'], p['file_path'], str(p['project']))
# Get the correc variation from the file
thumbnail = next((item for item in f['variations'] if
item['size'] == size), None)
l = generate_link(f['backend'], thumbnail['file_path'], str(f['project']))
# Build Markdown img string
l = '![{0}]({1} "{2}")'.format(slug, l, f['name'])
# Parse the content of the file and replace the attachment
# tag with the actual image link
field_content = field_content.replace(slug_tag, l)
response[field_name] = field_content
@ -424,7 +428,12 @@ def generate_link(backend, file_path, project_id=None):
def before_returning_file(response):
# TODO: add project id to all files
project_id = None if 'project' not in response else str(response['project'])
response['link'] = generate_link(response['backend'], response['file_path'], project_id)
response['link'] = generate_link(
response['backend'], response['file_path'], project_id)
if 'variations' in response:
for variation in response['variations']:
variation['link'] = generate_link(
response['backend'], variation['file_path'], project_id)
def before_returning_files(response):
for item in response['_items']:

View File

@ -7,6 +7,7 @@ from flask import Blueprint
from flask import abort
from flask import jsonify
from flask import send_from_directory
from eve.methods.put import put_internal
from application import app
from application import post_item
from application.utils.imaging import generate_local_thumbnails
@ -48,6 +49,11 @@ def browse_gcs(bucket_name, subdir, file_path=None):
#@file_storage.route('/build_thumbnails/<path:file_path>')
def build_thumbnails(file_path=None, file_id=None):
"""Given a file path or file ObjectId pointing to an image file, fetch it
and generate a set of predefined variations (using generate_local_thumbnails).
Return a list of dictionaries containing the various image properties and
variation properties.
"""
files_collection = app.data.driver.db['files']
if file_path:
# Search file with backend "pillar" and path=file_path
@ -58,8 +64,6 @@ def build_thumbnails(file_path=None, file_id=None):
file_ = files_collection.find_one({"_id": ObjectId(file_id)})
file_path = file_['name']
user = file_['user']
file_full_path = os.path.join(app.config['SHARED_DIR'], file_path[:2], file_path)
# Does the original file exist?
if not os.path.isfile(file_full_path):
@ -68,17 +72,14 @@ def build_thumbnails(file_path=None, file_id=None):
thumbnails = generate_local_thumbnails(file_full_path,
return_image_stats=True)
file_variations = []
for size, thumbnail in thumbnails.iteritems():
if thumbnail.get('exists'):
# If a thumbnail was already made, we just continue
continue
basename = os.path.basename(thumbnail['file_path'])
root, ext = os.path.splitext(basename)
file_object = dict(
name=root,
#description="Preview of file {0}".format(file_['name']),
user=user,
parent=file_['_id'],
file_variation = dict(
size=size,
format=ext[1:],
width=thumbnail['width'],
@ -86,16 +87,12 @@ def build_thumbnails(file_path=None, file_id=None):
content_type=thumbnail['content_type'],
length=thumbnail['length'],
md5=thumbnail['md5'],
filename=basename,
backend=file_['backend'],
file_path=basename,
project=file_['project'])
# Commit to database
r = post_item('files', file_object)
if r[0]['_status'] == 'ERR':
return "", r[3] # The error code from the request
)
return "", 200
file_variations.append(file_variation)
return file_variations
@file_storage.route('/file', methods=['POST'])
@ -120,14 +117,16 @@ def index(file_name=None):
def process_file(src_file):
"""Process the file
"""
file_id = src_file['_id']
# Remove properties that do not belong in the collection
internal_fields = ['_id', '_etag', '_updated', '_created', '_status']
for field in internal_fields:
src_file.pop(field, None)
files_collection = app.data.driver.db['files']
file_abs_path = os.path.join(app.config['SHARED_DIR'], src_file['name'][:2], src_file['name'])
src_file['length'] = os.stat(file_abs_path).st_size
# Remove properties that do not belong in the collection
src_file.pop('_status', None)
src_file.pop('_links', None)
content_type = src_file['content_type'].split('/')
src_file['format'] = content_type[1]
mime_type = content_type[0]
@ -140,8 +139,7 @@ def process_file(src_file):
src_file['width'] = res[0]
src_file['height'] = res[1]
# Generate previews
build_thumbnails(file_id=src_file['_id'])
src_file['variations'] = build_thumbnails(file_id=file_id)
elif mime_type == 'video':
pass
# Generate variations
@ -172,7 +170,7 @@ def process_file(src_file):
#description="Preview of file {0}".format(file_['name']),
project=src_file['project'],
user=src_file['user'],
parent=src_file['_id'],
parent=file_id,
size="{0}p".format(res_y),
duration=video_duration,
format=v,
@ -220,12 +218,9 @@ def process_file(src_file):
push_to_storage(str(src_file['project']), sync_path)
else:
sync_path = file_abs_path
#remote_storage_sync(sync_path)
# Update the original file with additional info, e.g. image resolution
file_asset = files_collection.find_and_modify(
{'_id': src_file['_id']},
src_file)
r = put_internal('files', src_file, **{'_id': ObjectId(file_id)})
def delete_file(file_item):

View File

@ -241,5 +241,64 @@ def add_parent_to_nodes():
print "Edited {0} nodes".format(nodes_index)
print "Orphan {0} nodes".format(nodes_orphan)
@manager.command
def embed_children_in_files():
"""Embed children file objects in to their parent"""
files_collection = app.data.driver.db['files']
for f in files_collection.find():
# Give some feedback
print "processing {0}".format(f['_id'])
# Proceed only if the node is a child
file_id = f['_id']
if 'parent' in f:
# Get the parent node
parent = files_collection.find_one({'_id': f['parent']})
parent_id = parent['_id']
if not parent:
print "No parent found for {0}".format(file_id)
files_collection.remove({'_id': file_id})
continue
# Prepare to loop through the properties required for a variation
properties = ['content_type', 'duration', 'size', 'format', 'width',
'height', 'length', 'md5', 'file_path']
variation = {}
# Build dict with variation properties
for p in properties:
if p in f:
variation[p] = f[p]
# the variation was generated
if variation:
# If the parent file does not have a variation property
if 'variations' not in parent:
parent['variations'] = []
# Append the variation to the variations
parent['variations'].append(variation)
# Removed internal fields that would cause validation error
internal_fields = ['_id', '_etag', '_updated', '_created']
for field in internal_fields:
parent.pop(field, None)
p = put_internal('files', parent, **{'_id': parent_id})
if p[0]['_status'] == 'ERR':
print p[0]['_issues']
print "PARENT: {0}".format(parent)
print "VARIATION: {0}".format(variation)
return
@manager.command
def remove_children_files():
"""Remove any file object with a parent field"""
files_collection = app.data.driver.db['files']
for f in files_collection.find():
if 'parent' in f:
file_id = f['_id']
# Delete child object
files_collection.remove({'_id': file_id})
print "deleted {0}".format(file_id)
if __name__ == '__main__':
manager.run()

View File

@ -417,6 +417,44 @@ files_schema = {
'embeddable': True
},
},
'variations': { # File variations (used to be children, see above)
'type': 'list',
'schema': {
'type': 'dict',
'schema': {
'content_type': { # MIME type image/png video/mp4
'type': 'string',
'required': True,
},
'duration': {
'type': 'integer',
},
'size': { # xs, s, b, 720p, 2K
'type': 'string'
},
'format': { # human readable format, like mp4, HLS, webm, mov
'type': 'string'
},
'width': { # valid for images and video content_type
'type': 'integer'
},
'height': {
'type': 'integer'
},
'length': { # Size in bytes
'type': 'integer',
'required': True,
},
'md5': {
'type': 'string',
'required': True,
},
'file_path': {
'type': 'string',
},
}
}
},
'previews': { # Deprecated (see comments above)
'type': 'list',
'schema': {