From c2e0ae40029d18a8ab818c0871b9bab81c315b52 Mon Sep 17 00:00:00 2001 From: Francesco Siddi Date: Thu, 24 Sep 2015 15:45:57 +0200 Subject: [PATCH] File processing Introducing the asset of type file creation. This involves making a node collection entry of type asset, as well as a file collection entry, plus all the needed variations if such file is an image or a video. Further, depending on the storage backend (pillar or other) we synchronise the files there using rsync. Currently this functionality is available only via pillar-web, since a web interface is needed to upload the file in a storage folder, which is shared between the two applications. --- docker/Dockerfile | 29 ++- pillar/application/__init__.py | 22 +- pillar/application/modules/__init__.py | 0 .../modules/file_storage/__init__.py | 192 ++++++++++++++++++ .../application/modules/file_storage/serve.py | 78 +++++++ pillar/application/pre_hooks.py | 4 +- pillar/application/utils/imaging.py | 91 +++++++++ pillar/application/utils/storage.py | 54 +++++ pillar/config.py.example | 2 + pillar/manage.py | 2 +- pillar/settings.py | 6 +- 11 files changed, 462 insertions(+), 18 deletions(-) create mode 100644 pillar/application/modules/__init__.py create mode 100644 pillar/application/modules/file_storage/__init__.py create mode 100644 pillar/application/modules/file_storage/serve.py create mode 100644 pillar/application/utils/storage.py diff --git a/docker/Dockerfile b/docker/Dockerfile index 546e2596..2c70e6fe 100644 --- a/docker/Dockerfile +++ b/docker/Dockerfile @@ -1,4 +1,4 @@ -FROM debian +FROM ubuntu MAINTAINER Francesco Siddi RUN apt-get update && apt-get install -y \ @@ -9,21 +9,34 @@ git \ nano \ zlib1g-dev \ libjpeg-dev \ +software-properties-common + +RUN add-apt-repository ppa:mc3man/trusty-media \ +&& apt-get update && apt-get install -y \ +ffmpeg -RUN mkdir /data -RUN mkdir /data/www -RUN mkdir /data/www/pillar -RUN mkdir /data/dev -RUN mkdir /data/dev/pillar +RUN mkdir /data \ +&& mkdir /data/www \ +&& mkdir /data/www/pillar \ +&& mkdir /data/dev \ +&& mkdir /data/dev/pillar \ +&& mkdir /storage \ +&& mkdir /storage/shared \ +&& mkdir /storage/pillar \ +&& mkdir /data/config \ +&& mkdir /storage/logs RUN git clone https://github.com/armadillica/pillar.git /data/www/pillar -RUN pip install virtualenv -RUN virtualenv /data/venv +RUN pip install virtualenv \ +&& virtualenv /data/venv RUN . /data/venv/bin/activate && pip install -r /data/www/pillar/requirements.txt VOLUME /data/dev/pillar +VOLUME /data/config +VOLUME /storage/shared +VOLUME /storage/pillar ENV MONGO_HOST mongo_pillar diff --git a/pillar/application/__init__.py b/pillar/application/__init__.py index 36aff667..4da99093 100644 --- a/pillar/application/__init__.py +++ b/pillar/application/__init__.py @@ -295,15 +295,26 @@ def post_GET_user(request, payload): compute_permissions(json_data['_id'], app.data.driver) payload.data = json.dumps(json_data) - app.on_post_GET_users += post_GET_user +from modules.file_storage import process_file + +def post_POST_files(request, payload): + """After an file object has been created, we do the necessary processing + and further update it. + """ + process_file(request.get_json()) + + +#app.on_pre_POST_files += pre_POST_files +app.on_post_POST_files += post_POST_files + from utils.cdn import hash_file_path # Hook to check the backend of a file resource, to build an appropriate link # that can be used by the client to retrieve the actual file. def generate_link(backend, path): if backend == 'pillar': - link = url_for('file_server.index', file_name=path, _external=True) + link = url_for('file_storage.index', file_name=path, _external=True) elif backend == 'cdnsun': link = hash_file_path(path, None) else: @@ -321,6 +332,7 @@ def before_returning_files(response): app.on_fetched_item_files += before_returning_file app.on_fetched_resource_files += before_returning_files -# The file_server module needs app to be defined -from file_server import file_server -app.register_blueprint(file_server, url_prefix='/file_server') +# The file_storage module needs app to be defined +from modules.file_storage import file_storage +#from modules.file_storage.serve import * +app.register_blueprint(file_storage, url_prefix='/storage') diff --git a/pillar/application/modules/__init__.py b/pillar/application/modules/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/pillar/application/modules/file_storage/__init__.py b/pillar/application/modules/file_storage/__init__.py new file mode 100644 index 00000000..5fd9dc64 --- /dev/null +++ b/pillar/application/modules/file_storage/__init__.py @@ -0,0 +1,192 @@ +import os +from multiprocessing import Process +from bson import ObjectId +from flask import request +from flask import Blueprint +from application import app +from application import db +from application import post_item +from application.utils.imaging import generate_local_thumbnails +from application.utils.imaging import get_video_data +from application.utils.imaging import ffmpeg_encode + +from application.utils.storage import remote_storage_sync + +file_storage = Blueprint('file_storage', __name__, + template_folder='templates', + static_folder='../../static/storage',) + + +@file_storage.route('/build_thumbnails/') +def build_thumbnails(file_path=None, file_id=None): + if file_path: + # Search file with backend "pillar" and path=file_path + file_ = db.files.find({"path": "{0}".format(file_path)}) + file_ = file_[0] + + if file_id: + file_ = db.files.find_one({"_id": ObjectId(file_id)}) + file_path = file_['name'] + + user = file_['user'] + + file_full_path = os.path.join(app.config['SHARED_DIR'], file_path) + # Does the original file exist? + if not os.path.isfile(file_full_path): + return "", 404 + else: + thumbnails = generate_local_thumbnails(file_full_path, + return_image_stats=True) + + for size, thumbnail in thumbnails.iteritems(): + if thumbnail.get('exists'): + # If a thumbnail was already made, we just continue + continue + basename = os.path.basename(thumbnail['path']) + root, ext = os.path.splitext(basename) + path = os.path.join(basename[:2], basename) + file_object = dict( + name=root, + #description="Preview of file {0}".format(file_['name']), + user=user, + parent=file_['_id'], + size=size, + format=ext[1:], + width=thumbnail['width'], + height=thumbnail['height'], + content_type=thumbnail['content_type'], + length=thumbnail['length'], + md5=thumbnail['md5'], + filename=basename, + backend='pillar', + path=path) + # Commit to database + r = post_item('files', file_object) + if r[0]['_status'] == 'ERR': + return "", r[3] # The error code from the request + + return "", 200 + + +@file_storage.route('/file', methods=['POST']) +@file_storage.route('/file/') +def index(file_name=None): + #GET file + if file_name: + return file_storage.send_static_file(file_name) + #POST file + file_name = request.form['name'] + folder_name = file_name[:2] + file_folder_path = os.path.join(app.config['FILE_STORAGE'], + folder_name) + if not os.path.exists(file_folder_path): + os.mkdir(file_folder_path) + file_path = os.path.join(file_folder_path, file_name) + request.files['data'].save(file_path) + + return "{}", 200 + + +def process_file(src_file): + """Process the file + """ + + file_abs_path = os.path.join(app.config['SHARED_DIR'], src_file['name']) + src_file['length'] = os.stat(file_abs_path).st_size + # Remove properties that do not belong in the collection + del src_file['_status'] + del src_file['_links'] + content_type = src_file['content_type'].split('/') + src_file['format'] = content_type[1] + mime_type = content_type[0] + src_file['path'] = src_file['name'] + + if mime_type == 'image': + from PIL import Image + im = Image.open(file_abs_path) + res = im.size + src_file['width'] = res[0] + src_file['height'] = res[1] + # Generate previews + + build_thumbnails(file_id=src_file['_id']) + elif mime_type == 'video': + pass + # Generate variations + src_video_data = get_video_data(file_abs_path) + variations = { + 'mp4': None, + 'webm': None + } + if src_video_data['duration']: + src_file['duration'] = src_video_data['duration'] + + # Properly resize the video according to 720p and 1080p resolutions + if src_video_data['res_y'] < 1080: + res_y = 720 + elif src_video_data['res_y'] >= 1080: + res_y = 1080 + + # Create variations in database + for v in variations: + root, ext = os.path.splitext(src_file['name']) + filename = "{0}-{1}p.{2}".format(root, res_y, v) + video_duration = None + if src_video_data['duration']: + video_duration = src_video_data['duration'] + + file_object = dict( + name=os.path.split(filename)[1], + #description="Preview of file {0}".format(file_['name']), + user=src_file['user'], + parent=src_file['_id'], + size="{0}p".format(res_y), + duration=video_duration, + format=v, + width=src_video_data['res_x'], + height=src_video_data['res_y'], + content_type="video/{0}".format(v), + length=0, # Available after encode + md5="", # Available after encode + filename=os.path.split(filename)[1], + backend='pillar', + path=filename) + + file_object_id = db.files.save(file_object) + # Append the ObjectId to the new list + variations[v] = file_object_id + + + def encode(src, variations, res_y): + # For every variation in the list call video_encode + # print "encoding {0}".format(variations) + for v in variations: + path = ffmpeg_encode(file_abs_path, v, res_y) + # Update size data after encoding + # (TODO) update status (non existing now) + file_size = os.stat(path).st_size + variation = db.files.find_one(variations[v]) + variation['length'] = file_size + # print variation + file_asset = db.files.find_and_modify( + {'_id': variations[v]}, + variation) + + # rsync the file file (this is async) + remote_storage_sync(path) + # When all encodes are done, delete source file + + + p = Process(target=encode, args=(file_abs_path, variations, res_y)) + p.start() + if mime_type != 'video': + # Sync the whole subfolder + sync_path = os.path.split(file_abs_path)[0] + else: + sync_path = file_abs_path + remote_storage_sync(sync_path) + + files = app.data.driver.db['files'] + file_asset = files.find_and_modify( + {'_id': src_file['_id']}, + src_file) diff --git a/pillar/application/modules/file_storage/serve.py b/pillar/application/modules/file_storage/serve.py new file mode 100644 index 00000000..7a00e24f --- /dev/null +++ b/pillar/application/modules/file_storage/serve.py @@ -0,0 +1,78 @@ +import os +from bson import ObjectId +from flask import request +from application import app +from application import db +from application import post_item +from application.utils.imaging import generate_local_thumbnails +from application.modules.file_storage import file_storage + + +@file_storage.route('/build_thumbnails/') +def build_thumbnails(file_path=None, file_id=None): + if file_path: + # Search file with backend "pillar" and path=file_path + file_ = db.files.find({"path": "{0}".format(file_path)}) + file_ = file_[0] + + if file_id: + file_ = db.files.find_one({"_id": ObjectId(file_id)}) + file_path = file_['name'] + + user = file_['user'] + + file_full_path = os.path.join(app.config['SHARED_DIR'], file_path) + # Does the original file exist? + if not os.path.isfile(file_full_path): + return "", 404 + else: + thumbnails = generate_local_thumbnails(file_full_path, + return_image_stats=True) + + for size, thumbnail in thumbnails.iteritems(): + if thumbnail.get('exists'): + # If a thumbnail was already made, we just continue + continue + basename = os.path.basename(thumbnail['path']) + root, ext = os.path.splitext(basename) + path = os.path.join(basename[:2], basename) + file_object = dict( + name=root, + #description="Preview of file {0}".format(file_['name']), + user=user, + parent=file_['_id'], + size=size, + format=ext[1:], + width=thumbnail['width'], + height=thumbnail['height'], + content_type=thumbnail['content_type'], + length=thumbnail['length'], + md5=thumbnail['md5'], + filename=basename, + backend='pillar', + path=path) + # Commit to database + r = post_item('files', file_object) + if r[0]['_status'] == 'ERR': + return "", r[3] # The error code from the request + + return "", 200 + + +@file_storage.route('/file', methods=['POST']) +@file_storage.route('/file/') +def index(file_name=None): + #GET file + if file_name: + return file_storage.send_static_file(file_name) + #POST file + file_name = request.form['name'] + folder_name = file_name[:2] + file_folder_path = os.path.join(app.config['FILE_STORAGE'], + folder_name) + if not os.path.exists(file_folder_path): + os.mkdir(file_folder_path) + file_path = os.path.join(file_folder_path, file_name) + request.files['data'].save(file_path) + + return "{}", 200 diff --git a/pillar/application/pre_hooks.py b/pillar/application/pre_hooks.py index 391d2465..0cf76251 100644 --- a/pillar/application/pre_hooks.py +++ b/pillar/application/pre_hooks.py @@ -72,7 +72,9 @@ def pre_PATCH(request, lookup, data_driver): def pre_POST(request, data_driver): # Only Post allowed documents action = 'POST' - print (g.get('type_groups_permissions')) + #print(g.get('type_groups_permissions')) + print(g.get('groups_permissions')) + print(g.get('world_permissions')) # Is quering for one specific node if action not in g.get('world_permissions') and \ action not in g.get('groups_permissions'): diff --git a/pillar/application/utils/imaging.py b/pillar/application/utils/imaging.py index 495fbea7..fc1a2968 100644 --- a/pillar/application/utils/imaging.py +++ b/pillar/application/utils/imaging.py @@ -1,4 +1,6 @@ import os +import json +import subprocess from PIL import Image from application import app @@ -110,3 +112,92 @@ def resize_and_crop(img_path, modified_path, size, crop_type='middle'): Image.ANTIALIAS) # If the scale is the same, we do not need to crop img.save(modified_path, "JPEG") + + +def get_video_data(filepath): + + outdata = False + + ffprobe_ouput = json.loads( + subprocess.check_output( + [app.config['BIN_FFPROBE'], + '-loglevel', + 'error', + '-show_streams', + filepath, + '-print_format', + 'json'])) + video = ffprobe_ouput['streams'][0] + + if video['codec_type'] == 'video': + # If video is webm we can't get the duration (seems to be an ffprobe issue) + if video['codec_name'] == 'vp8': + duration = None + else: + duration = int(float(video['duration'])) + outdata = dict( + duration = duration, + res_x = video['width'], + res_y = video['height'], + ) + if video['sample_aspect_ratio'] != '1:1': + print '[warning] Pixel aspect ratio is not square!' + + return outdata + + +def ffmpeg_encode(src, format, res_y=720): + # The specific FFMpeg command, called multiple times + args = [] + args.append("-i") + args.append(src) + + if format == 'mp4': + # Example mp4 encoding + # ffmpeg -i INPUT -vcodec libx264 -pix_fmt yuv420p -preset fast -crf 20 + # -acodec libfdk_aac -ab 112k -ar 44100 -movflags +faststart OUTPUT + args.extend([ + '-threads', '1', + '-vf', 'scale=-2:{0}'.format(res_y), + '-vcodec', 'libx264', + '-pix_fmt', 'yuv420p', + '-preset', 'fast', + '-crf', '20', + '-acodec', 'libfdk_aac', '-ab', '112k', '-ar', '44100', + '-movflags', '+faststart']) + elif format == 'webm': + # Example webm encoding + # ffmpeg -i INPUT -vcodec libvpx -g 120 -lag-in-frames 16 -deadline good + # -cpu-used 0 -vprofile 0 -qmax 51 -qmin 11 -slices 4 -b:v 2M -f webm + + args.extend([ + '-vf', 'scale=-2:{0}'.format(res_y), + '-vcodec', 'libvpx', + '-g', '120', + '-lag-in-frames', '16', + '-deadline', 'good', + '-cpu-used', '0', + '-vprofile', '0', + '-qmax', '51', '-qmin', '11', '-slices', '4','-b:v', '2M', + #'-acodec', 'libmp3lame', '-ab', '112k', '-ar', '44100', + '-f', 'webm']) + + if not os.environ.get('VERBOSE'): + args.extend(['-loglevel', 'quiet']) + + dst = os.path.splitext(src) + dst = "{0}-{1}p.{2}".format(dst[0], res_y, format) + args.append(dst) + print "Encoding {0} to {1}".format(src, format) + returncode = subprocess.call([app.config['BIN_FFMPEG']] + args) + if returncode == 0: + print "Successfully encoded {0}".format(dst) + else: + print "Error during encode" + print "Code: {0}".format(returncode) + print "Command: {0}".format(app.config['BIN_FFMPEG'] + " " + " ".join(args)) + dst = None + # return path of the encoded video + return dst + + diff --git a/pillar/application/utils/storage.py b/pillar/application/utils/storage.py new file mode 100644 index 00000000..cf649e81 --- /dev/null +++ b/pillar/application/utils/storage.py @@ -0,0 +1,54 @@ +import os +import subprocess +#import logging +from application import app + +BIN_FFPROBE = app.config['BIN_FFPROBE'] +BIN_FFMPEG = app.config['BIN_FFMPEG'] +BIN_SSH = app.config['BIN_SSH'] +BIN_RSYNC = app.config['BIN_RSYNC'] + + +def get_sizedata(filepath): + outdata = dict( + size = int(os.stat(filepath).st_size) + ) + return outdata + + +def rsync(path, remote_dir=''): + DRY_RUN = False + arguments=['--verbose', '--ignore-existing', '--recursive', '--human-readable'] + logs_path = app.config['CDN_SYNC_LOGS'] + storage_address = app.config['CDN_STORAGE_ADDRESS'] + user = app.config['CDN_STORAGE_USER'] + rsa_key_path = app.config['CDN_RSA_KEY'] + known_hosts_path = app.config['CDN_KNOWN_HOSTS'] + + if DRY_RUN: + arguments.append('--dry-run') + folder_arguments = list(arguments) + if rsa_key_path: + folder_arguments.append( + '-e ' + BIN_SSH + ' -i ' + rsa_key_path + ' -o "StrictHostKeyChecking=no"') + # if known_hosts_path: + # folder_arguments.append("-o UserKnownHostsFile " + known_hosts_path) + folder_arguments.append("--log-file=" + logs_path + "/rsync.log") + folder_arguments.append(path) + folder_arguments.append(user + "@" + storage_address + ":/public/" + remote_dir) + # print (folder_arguments) + devnull = open(os.devnull, 'wb') + # DEBUG CONFIG + # print folder_arguments + # proc = subprocess.Popen(['rsync'] + folder_arguments) + # stdout, stderr = proc.communicate() + subprocess.Popen(['nohup', BIN_RSYNC] + folder_arguments, stdout=devnull, stderr=devnull) + + +def remote_storage_sync(path): #can be both folder and file + if os.path.isfile(path): + filename = os.path.split(path)[1] + rsync(path, filename[:2] + '/') + else: + if os.path.exists(path): + rsync(path) diff --git a/pillar/config.py.example b/pillar/config.py.example index 322ecd1f..21d86206 100644 --- a/pillar/config.py.example +++ b/pillar/config.py.example @@ -7,6 +7,8 @@ class Config(object): MAIL_USERNAME = '' MAIL_PASSWORD = '' DEFAULT_MAIL_SENDER = '' + RFC1123_DATE_FORMAT = '%a, %d %b %Y %H:%M:%S GMT' + class Development(Config): FILE_STORAGE = '{0}/application/static/storage'.format( diff --git a/pillar/manage.py b/pillar/manage.py index 1fddae6a..740dd389 100644 --- a/pillar/manage.py +++ b/pillar/manage.py @@ -914,7 +914,7 @@ def import_data(path): @manager.command def make_thumbnails(): - from application.file_server import build_thumbnails + from application.modules.file_storage import build_thumbnails files = db.files.find() for f in files: if f['content_type'].split('/')[0] == 'image': diff --git a/pillar/settings.py b/pillar/settings.py index be5fef99..e476dacc 100644 --- a/pillar/settings.py +++ b/pillar/settings.py @@ -9,7 +9,7 @@ RESOURCE_METHODS = ['GET', 'POST', 'DELETE'] # individual items (defaults to read-only item access). ITEM_METHODS = ['GET', 'PUT', 'DELETE', 'PATCH'] -PAGINATION_LIMIT = 999 +PAGINATION_LIMIT = 25 # To be implemented on Eve 0.6 # RETURN_MEDIA_AS_URL = True @@ -281,7 +281,7 @@ files_schema = { 'format': { # human readable format, like mp4, HLS, webm, mov 'type': 'string' }, - 'width': { # valid for images and video contentType + 'width': { # valid for images and video content_type 'type': 'integer' }, 'height': { @@ -310,7 +310,7 @@ files_schema = { }, 'path': { 'type': 'string', - 'required': True, + #'required': True, 'unique': True, }, 'previews': { # Deprecated (see comments above)