File processing

Introducing the asset of type file creation. This involves making a
node collection entry of type asset, as well as a file collection
entry, plus all the needed variations if such file is an image or a
video. Further, depending on the storage backend (pillar or other) we
synchronise the files there using rsync. Currently this functionality
is available only via pillar-web, since a web interface is needed to
upload the file in a storage folder, which is shared between the two
applications.
This commit is contained in:
Francesco Siddi 2015-09-24 15:45:57 +02:00
parent 71dd30fa75
commit c2e0ae4002
11 changed files with 462 additions and 18 deletions

View File

@ -1,4 +1,4 @@
FROM debian FROM ubuntu
MAINTAINER Francesco Siddi <francesco@blender.org> MAINTAINER Francesco Siddi <francesco@blender.org>
RUN apt-get update && apt-get install -y \ RUN apt-get update && apt-get install -y \
@ -9,21 +9,34 @@ git \
nano \ nano \
zlib1g-dev \ zlib1g-dev \
libjpeg-dev \ libjpeg-dev \
software-properties-common
RUN add-apt-repository ppa:mc3man/trusty-media \
&& apt-get update && apt-get install -y \
ffmpeg
RUN mkdir /data RUN mkdir /data \
RUN mkdir /data/www && mkdir /data/www \
RUN mkdir /data/www/pillar && mkdir /data/www/pillar \
RUN mkdir /data/dev && mkdir /data/dev \
RUN mkdir /data/dev/pillar && mkdir /data/dev/pillar \
&& mkdir /storage \
&& mkdir /storage/shared \
&& mkdir /storage/pillar \
&& mkdir /data/config \
&& mkdir /storage/logs
RUN git clone https://github.com/armadillica/pillar.git /data/www/pillar RUN git clone https://github.com/armadillica/pillar.git /data/www/pillar
RUN pip install virtualenv RUN pip install virtualenv \
RUN virtualenv /data/venv && virtualenv /data/venv
RUN . /data/venv/bin/activate && pip install -r /data/www/pillar/requirements.txt RUN . /data/venv/bin/activate && pip install -r /data/www/pillar/requirements.txt
VOLUME /data/dev/pillar VOLUME /data/dev/pillar
VOLUME /data/config
VOLUME /storage/shared
VOLUME /storage/pillar
ENV MONGO_HOST mongo_pillar ENV MONGO_HOST mongo_pillar

View File

@ -295,15 +295,26 @@ def post_GET_user(request, payload):
compute_permissions(json_data['_id'], app.data.driver) compute_permissions(json_data['_id'], app.data.driver)
payload.data = json.dumps(json_data) payload.data = json.dumps(json_data)
app.on_post_GET_users += post_GET_user app.on_post_GET_users += post_GET_user
from modules.file_storage import process_file
def post_POST_files(request, payload):
"""After an file object has been created, we do the necessary processing
and further update it.
"""
process_file(request.get_json())
#app.on_pre_POST_files += pre_POST_files
app.on_post_POST_files += post_POST_files
from utils.cdn import hash_file_path from utils.cdn import hash_file_path
# Hook to check the backend of a file resource, to build an appropriate link # Hook to check the backend of a file resource, to build an appropriate link
# that can be used by the client to retrieve the actual file. # that can be used by the client to retrieve the actual file.
def generate_link(backend, path): def generate_link(backend, path):
if backend == 'pillar': if backend == 'pillar':
link = url_for('file_server.index', file_name=path, _external=True) link = url_for('file_storage.index', file_name=path, _external=True)
elif backend == 'cdnsun': elif backend == 'cdnsun':
link = hash_file_path(path, None) link = hash_file_path(path, None)
else: else:
@ -321,6 +332,7 @@ def before_returning_files(response):
app.on_fetched_item_files += before_returning_file app.on_fetched_item_files += before_returning_file
app.on_fetched_resource_files += before_returning_files app.on_fetched_resource_files += before_returning_files
# The file_server module needs app to be defined # The file_storage module needs app to be defined
from file_server import file_server from modules.file_storage import file_storage
app.register_blueprint(file_server, url_prefix='/file_server') #from modules.file_storage.serve import *
app.register_blueprint(file_storage, url_prefix='/storage')

View File

View File

@ -0,0 +1,192 @@
import os
from multiprocessing import Process
from bson import ObjectId
from flask import request
from flask import Blueprint
from application import app
from application import db
from application import post_item
from application.utils.imaging import generate_local_thumbnails
from application.utils.imaging import get_video_data
from application.utils.imaging import ffmpeg_encode
from application.utils.storage import remote_storage_sync
file_storage = Blueprint('file_storage', __name__,
template_folder='templates',
static_folder='../../static/storage',)
@file_storage.route('/build_thumbnails/<path:file_path>')
def build_thumbnails(file_path=None, file_id=None):
if file_path:
# Search file with backend "pillar" and path=file_path
file_ = db.files.find({"path": "{0}".format(file_path)})
file_ = file_[0]
if file_id:
file_ = db.files.find_one({"_id": ObjectId(file_id)})
file_path = file_['name']
user = file_['user']
file_full_path = os.path.join(app.config['SHARED_DIR'], file_path)
# Does the original file exist?
if not os.path.isfile(file_full_path):
return "", 404
else:
thumbnails = generate_local_thumbnails(file_full_path,
return_image_stats=True)
for size, thumbnail in thumbnails.iteritems():
if thumbnail.get('exists'):
# If a thumbnail was already made, we just continue
continue
basename = os.path.basename(thumbnail['path'])
root, ext = os.path.splitext(basename)
path = os.path.join(basename[:2], basename)
file_object = dict(
name=root,
#description="Preview of file {0}".format(file_['name']),
user=user,
parent=file_['_id'],
size=size,
format=ext[1:],
width=thumbnail['width'],
height=thumbnail['height'],
content_type=thumbnail['content_type'],
length=thumbnail['length'],
md5=thumbnail['md5'],
filename=basename,
backend='pillar',
path=path)
# Commit to database
r = post_item('files', file_object)
if r[0]['_status'] == 'ERR':
return "", r[3] # The error code from the request
return "", 200
@file_storage.route('/file', methods=['POST'])
@file_storage.route('/file/<path:file_name>')
def index(file_name=None):
#GET file
if file_name:
return file_storage.send_static_file(file_name)
#POST file
file_name = request.form['name']
folder_name = file_name[:2]
file_folder_path = os.path.join(app.config['FILE_STORAGE'],
folder_name)
if not os.path.exists(file_folder_path):
os.mkdir(file_folder_path)
file_path = os.path.join(file_folder_path, file_name)
request.files['data'].save(file_path)
return "{}", 200
def process_file(src_file):
"""Process the file
"""
file_abs_path = os.path.join(app.config['SHARED_DIR'], src_file['name'])
src_file['length'] = os.stat(file_abs_path).st_size
# Remove properties that do not belong in the collection
del src_file['_status']
del src_file['_links']
content_type = src_file['content_type'].split('/')
src_file['format'] = content_type[1]
mime_type = content_type[0]
src_file['path'] = src_file['name']
if mime_type == 'image':
from PIL import Image
im = Image.open(file_abs_path)
res = im.size
src_file['width'] = res[0]
src_file['height'] = res[1]
# Generate previews
build_thumbnails(file_id=src_file['_id'])
elif mime_type == 'video':
pass
# Generate variations
src_video_data = get_video_data(file_abs_path)
variations = {
'mp4': None,
'webm': None
}
if src_video_data['duration']:
src_file['duration'] = src_video_data['duration']
# Properly resize the video according to 720p and 1080p resolutions
if src_video_data['res_y'] < 1080:
res_y = 720
elif src_video_data['res_y'] >= 1080:
res_y = 1080
# Create variations in database
for v in variations:
root, ext = os.path.splitext(src_file['name'])
filename = "{0}-{1}p.{2}".format(root, res_y, v)
video_duration = None
if src_video_data['duration']:
video_duration = src_video_data['duration']
file_object = dict(
name=os.path.split(filename)[1],
#description="Preview of file {0}".format(file_['name']),
user=src_file['user'],
parent=src_file['_id'],
size="{0}p".format(res_y),
duration=video_duration,
format=v,
width=src_video_data['res_x'],
height=src_video_data['res_y'],
content_type="video/{0}".format(v),
length=0, # Available after encode
md5="", # Available after encode
filename=os.path.split(filename)[1],
backend='pillar',
path=filename)
file_object_id = db.files.save(file_object)
# Append the ObjectId to the new list
variations[v] = file_object_id
def encode(src, variations, res_y):
# For every variation in the list call video_encode
# print "encoding {0}".format(variations)
for v in variations:
path = ffmpeg_encode(file_abs_path, v, res_y)
# Update size data after encoding
# (TODO) update status (non existing now)
file_size = os.stat(path).st_size
variation = db.files.find_one(variations[v])
variation['length'] = file_size
# print variation
file_asset = db.files.find_and_modify(
{'_id': variations[v]},
variation)
# rsync the file file (this is async)
remote_storage_sync(path)
# When all encodes are done, delete source file
p = Process(target=encode, args=(file_abs_path, variations, res_y))
p.start()
if mime_type != 'video':
# Sync the whole subfolder
sync_path = os.path.split(file_abs_path)[0]
else:
sync_path = file_abs_path
remote_storage_sync(sync_path)
files = app.data.driver.db['files']
file_asset = files.find_and_modify(
{'_id': src_file['_id']},
src_file)

View File

@ -0,0 +1,78 @@
import os
from bson import ObjectId
from flask import request
from application import app
from application import db
from application import post_item
from application.utils.imaging import generate_local_thumbnails
from application.modules.file_storage import file_storage
@file_storage.route('/build_thumbnails/<path:file_path>')
def build_thumbnails(file_path=None, file_id=None):
if file_path:
# Search file with backend "pillar" and path=file_path
file_ = db.files.find({"path": "{0}".format(file_path)})
file_ = file_[0]
if file_id:
file_ = db.files.find_one({"_id": ObjectId(file_id)})
file_path = file_['name']
user = file_['user']
file_full_path = os.path.join(app.config['SHARED_DIR'], file_path)
# Does the original file exist?
if not os.path.isfile(file_full_path):
return "", 404
else:
thumbnails = generate_local_thumbnails(file_full_path,
return_image_stats=True)
for size, thumbnail in thumbnails.iteritems():
if thumbnail.get('exists'):
# If a thumbnail was already made, we just continue
continue
basename = os.path.basename(thumbnail['path'])
root, ext = os.path.splitext(basename)
path = os.path.join(basename[:2], basename)
file_object = dict(
name=root,
#description="Preview of file {0}".format(file_['name']),
user=user,
parent=file_['_id'],
size=size,
format=ext[1:],
width=thumbnail['width'],
height=thumbnail['height'],
content_type=thumbnail['content_type'],
length=thumbnail['length'],
md5=thumbnail['md5'],
filename=basename,
backend='pillar',
path=path)
# Commit to database
r = post_item('files', file_object)
if r[0]['_status'] == 'ERR':
return "", r[3] # The error code from the request
return "", 200
@file_storage.route('/file', methods=['POST'])
@file_storage.route('/file/<path:file_name>')
def index(file_name=None):
#GET file
if file_name:
return file_storage.send_static_file(file_name)
#POST file
file_name = request.form['name']
folder_name = file_name[:2]
file_folder_path = os.path.join(app.config['FILE_STORAGE'],
folder_name)
if not os.path.exists(file_folder_path):
os.mkdir(file_folder_path)
file_path = os.path.join(file_folder_path, file_name)
request.files['data'].save(file_path)
return "{}", 200

View File

@ -72,7 +72,9 @@ def pre_PATCH(request, lookup, data_driver):
def pre_POST(request, data_driver): def pre_POST(request, data_driver):
# Only Post allowed documents # Only Post allowed documents
action = 'POST' action = 'POST'
print (g.get('type_groups_permissions')) #print(g.get('type_groups_permissions'))
print(g.get('groups_permissions'))
print(g.get('world_permissions'))
# Is quering for one specific node # Is quering for one specific node
if action not in g.get('world_permissions') and \ if action not in g.get('world_permissions') and \
action not in g.get('groups_permissions'): action not in g.get('groups_permissions'):

View File

@ -1,4 +1,6 @@
import os import os
import json
import subprocess
from PIL import Image from PIL import Image
from application import app from application import app
@ -110,3 +112,92 @@ def resize_and_crop(img_path, modified_path, size, crop_type='middle'):
Image.ANTIALIAS) Image.ANTIALIAS)
# If the scale is the same, we do not need to crop # If the scale is the same, we do not need to crop
img.save(modified_path, "JPEG") img.save(modified_path, "JPEG")
def get_video_data(filepath):
outdata = False
ffprobe_ouput = json.loads(
subprocess.check_output(
[app.config['BIN_FFPROBE'],
'-loglevel',
'error',
'-show_streams',
filepath,
'-print_format',
'json']))
video = ffprobe_ouput['streams'][0]
if video['codec_type'] == 'video':
# If video is webm we can't get the duration (seems to be an ffprobe issue)
if video['codec_name'] == 'vp8':
duration = None
else:
duration = int(float(video['duration']))
outdata = dict(
duration = duration,
res_x = video['width'],
res_y = video['height'],
)
if video['sample_aspect_ratio'] != '1:1':
print '[warning] Pixel aspect ratio is not square!'
return outdata
def ffmpeg_encode(src, format, res_y=720):
# The specific FFMpeg command, called multiple times
args = []
args.append("-i")
args.append(src)
if format == 'mp4':
# Example mp4 encoding
# ffmpeg -i INPUT -vcodec libx264 -pix_fmt yuv420p -preset fast -crf 20
# -acodec libfdk_aac -ab 112k -ar 44100 -movflags +faststart OUTPUT
args.extend([
'-threads', '1',
'-vf', 'scale=-2:{0}'.format(res_y),
'-vcodec', 'libx264',
'-pix_fmt', 'yuv420p',
'-preset', 'fast',
'-crf', '20',
'-acodec', 'libfdk_aac', '-ab', '112k', '-ar', '44100',
'-movflags', '+faststart'])
elif format == 'webm':
# Example webm encoding
# ffmpeg -i INPUT -vcodec libvpx -g 120 -lag-in-frames 16 -deadline good
# -cpu-used 0 -vprofile 0 -qmax 51 -qmin 11 -slices 4 -b:v 2M -f webm
args.extend([
'-vf', 'scale=-2:{0}'.format(res_y),
'-vcodec', 'libvpx',
'-g', '120',
'-lag-in-frames', '16',
'-deadline', 'good',
'-cpu-used', '0',
'-vprofile', '0',
'-qmax', '51', '-qmin', '11', '-slices', '4','-b:v', '2M',
#'-acodec', 'libmp3lame', '-ab', '112k', '-ar', '44100',
'-f', 'webm'])
if not os.environ.get('VERBOSE'):
args.extend(['-loglevel', 'quiet'])
dst = os.path.splitext(src)
dst = "{0}-{1}p.{2}".format(dst[0], res_y, format)
args.append(dst)
print "Encoding {0} to {1}".format(src, format)
returncode = subprocess.call([app.config['BIN_FFMPEG']] + args)
if returncode == 0:
print "Successfully encoded {0}".format(dst)
else:
print "Error during encode"
print "Code: {0}".format(returncode)
print "Command: {0}".format(app.config['BIN_FFMPEG'] + " " + " ".join(args))
dst = None
# return path of the encoded video
return dst

View File

@ -0,0 +1,54 @@
import os
import subprocess
#import logging
from application import app
BIN_FFPROBE = app.config['BIN_FFPROBE']
BIN_FFMPEG = app.config['BIN_FFMPEG']
BIN_SSH = app.config['BIN_SSH']
BIN_RSYNC = app.config['BIN_RSYNC']
def get_sizedata(filepath):
outdata = dict(
size = int(os.stat(filepath).st_size)
)
return outdata
def rsync(path, remote_dir=''):
DRY_RUN = False
arguments=['--verbose', '--ignore-existing', '--recursive', '--human-readable']
logs_path = app.config['CDN_SYNC_LOGS']
storage_address = app.config['CDN_STORAGE_ADDRESS']
user = app.config['CDN_STORAGE_USER']
rsa_key_path = app.config['CDN_RSA_KEY']
known_hosts_path = app.config['CDN_KNOWN_HOSTS']
if DRY_RUN:
arguments.append('--dry-run')
folder_arguments = list(arguments)
if rsa_key_path:
folder_arguments.append(
'-e ' + BIN_SSH + ' -i ' + rsa_key_path + ' -o "StrictHostKeyChecking=no"')
# if known_hosts_path:
# folder_arguments.append("-o UserKnownHostsFile " + known_hosts_path)
folder_arguments.append("--log-file=" + logs_path + "/rsync.log")
folder_arguments.append(path)
folder_arguments.append(user + "@" + storage_address + ":/public/" + remote_dir)
# print (folder_arguments)
devnull = open(os.devnull, 'wb')
# DEBUG CONFIG
# print folder_arguments
# proc = subprocess.Popen(['rsync'] + folder_arguments)
# stdout, stderr = proc.communicate()
subprocess.Popen(['nohup', BIN_RSYNC] + folder_arguments, stdout=devnull, stderr=devnull)
def remote_storage_sync(path): #can be both folder and file
if os.path.isfile(path):
filename = os.path.split(path)[1]
rsync(path, filename[:2] + '/')
else:
if os.path.exists(path):
rsync(path)

View File

@ -7,6 +7,8 @@ class Config(object):
MAIL_USERNAME = '' MAIL_USERNAME = ''
MAIL_PASSWORD = '' MAIL_PASSWORD = ''
DEFAULT_MAIL_SENDER = '' DEFAULT_MAIL_SENDER = ''
RFC1123_DATE_FORMAT = '%a, %d %b %Y %H:%M:%S GMT'
class Development(Config): class Development(Config):
FILE_STORAGE = '{0}/application/static/storage'.format( FILE_STORAGE = '{0}/application/static/storage'.format(

View File

@ -914,7 +914,7 @@ def import_data(path):
@manager.command @manager.command
def make_thumbnails(): def make_thumbnails():
from application.file_server import build_thumbnails from application.modules.file_storage import build_thumbnails
files = db.files.find() files = db.files.find()
for f in files: for f in files:
if f['content_type'].split('/')[0] == 'image': if f['content_type'].split('/')[0] == 'image':

View File

@ -9,7 +9,7 @@ RESOURCE_METHODS = ['GET', 'POST', 'DELETE']
# individual items (defaults to read-only item access). # individual items (defaults to read-only item access).
ITEM_METHODS = ['GET', 'PUT', 'DELETE', 'PATCH'] ITEM_METHODS = ['GET', 'PUT', 'DELETE', 'PATCH']
PAGINATION_LIMIT = 999 PAGINATION_LIMIT = 25
# To be implemented on Eve 0.6 # To be implemented on Eve 0.6
# RETURN_MEDIA_AS_URL = True # RETURN_MEDIA_AS_URL = True
@ -281,7 +281,7 @@ files_schema = {
'format': { # human readable format, like mp4, HLS, webm, mov 'format': { # human readable format, like mp4, HLS, webm, mov
'type': 'string' 'type': 'string'
}, },
'width': { # valid for images and video contentType 'width': { # valid for images and video content_type
'type': 'integer' 'type': 'integer'
}, },
'height': { 'height': {
@ -310,7 +310,7 @@ files_schema = {
}, },
'path': { 'path': {
'type': 'string', 'type': 'string',
'required': True, #'required': True,
'unique': True, 'unique': True,
}, },
'previews': { # Deprecated (see comments above) 'previews': { # Deprecated (see comments above)