Introducing Pillar Framework

Refactor of pillar-server and pillar-web into a single python package. This
simplifies the overall architecture of pillar applications.

Special thanks @sybren and @venomgfx
This commit is contained in:
2016-08-19 09:19:06 +02:00
parent a5e92e1d87
commit 2c5dc34ea2
232 changed files with 79508 additions and 2232 deletions

View File

@@ -0,0 +1,116 @@
import copy
import hashlib
import json
import urllib
import datetime
import functools
import logging
import bson.objectid
from eve import RFC1123_DATE_FORMAT
from flask import current_app
from werkzeug import exceptions as wz_exceptions
import pymongo.results
__all__ = ('remove_private_keys', 'PillarJSONEncoder')
log = logging.getLogger(__name__)
def remove_private_keys(document):
"""Removes any key that starts with an underscore, returns result as new
dictionary.
"""
doc_copy = copy.deepcopy(document)
for key in list(doc_copy.keys()):
if key.startswith('_'):
del doc_copy[key]
try:
del doc_copy['allowed_methods']
except KeyError:
pass
return doc_copy
class PillarJSONEncoder(json.JSONEncoder):
"""JSON encoder with support for Pillar resources."""
def default(self, obj):
if isinstance(obj, datetime.datetime):
return obj.strftime(RFC1123_DATE_FORMAT)
if isinstance(obj, bson.ObjectId):
return str(obj)
if isinstance(obj, pymongo.results.UpdateResult):
return obj.raw_result
# Let the base class default method raise the TypeError
return json.JSONEncoder.default(self, obj)
def dumps(mongo_doc, **kwargs):
"""json.dumps() for MongoDB documents."""
return json.dumps(mongo_doc, cls=PillarJSONEncoder, **kwargs)
def jsonify(mongo_doc, status=200, headers=None):
"""JSonifies a Mongo document into a Flask response object."""
return current_app.response_class(dumps(mongo_doc),
mimetype='application/json',
status=status,
headers=headers)
def skip_when_testing(func):
"""Decorator, skips the decorated function when app.config['TESTING']"""
@functools.wraps(func)
def wrapper(*args, **kwargs):
if current_app.config['TESTING']:
log.debug('Skipping call to %s(...) due to TESTING', func.func_name)
return None
return func(*args, **kwargs)
return wrapper
def project_get_node_type(project_document, node_type_node_name):
"""Return a node_type subdocument for a project. If none is found, return
None.
"""
if project_document is None:
return None
return next((node_type for node_type in project_document['node_types']
if node_type['name'] == node_type_node_name), None)
def str2id(document_id):
"""Returns the document ID as ObjectID, or raises a BadRequest exception.
:type document_id: str
:rtype: bson.ObjectId
:raises: wz_exceptions.BadRequest
"""
if not document_id:
log.debug('str2id(%r): Invalid Object ID', document_id)
raise wz_exceptions.BadRequest('Invalid object ID %r' % document_id)
try:
return bson.ObjectId(document_id)
except bson.objectid.InvalidId:
log.debug('str2id(%r): Invalid Object ID', document_id)
raise wz_exceptions.BadRequest('Invalid object ID %r' % document_id)
def gravatar(email, size=64):
parameters = {'s': str(size), 'd': 'mm'}
return "https://www.gravatar.com/avatar/" + \
hashlib.md5(str(email)).hexdigest() + \
"?" + urllib.urlencode(parameters)

View File

@@ -0,0 +1,98 @@
import logging
from bson import ObjectId
from flask import current_app
from pillar.api.file_storage import generate_link
from . import skip_when_testing
log = logging.getLogger(__name__)
INDEX_ALLOWED_USER_ROLES = {'admin', 'subscriber', 'demo'}
INDEX_ALLOWED_NODE_TYPES = {'asset', 'texture', 'group', 'hdri'}
@skip_when_testing
def algolia_index_user_save(user):
if current_app.algolia_index_users is None:
return
# Strip unneeded roles
if 'roles' in user:
roles = set(user['roles']).intersection(INDEX_ALLOWED_USER_ROLES)
else:
roles = set()
if current_app.algolia_index_users:
# Create or update Algolia index for the user
current_app.algolia_index_users.save_object({
'objectID': user['_id'],
'full_name': user['full_name'],
'username': user['username'],
'roles': list(roles),
'groups': user['groups'],
'email': user['email']
})
@skip_when_testing
def algolia_index_node_save(node):
if not current_app.algolia_index_nodes:
return
if node['node_type'] not in INDEX_ALLOWED_NODE_TYPES:
return
# If a nodes does not have status published, do not index
if node['properties'].get('status') != 'published':
return
projects_collection = current_app.data.driver.db['projects']
project = projects_collection.find_one({'_id': ObjectId(node['project'])})
users_collection = current_app.data.driver.db['users']
user = users_collection.find_one({'_id': ObjectId(node['user'])})
node_ob = {
'objectID': node['_id'],
'name': node['name'],
'project': {
'_id': project['_id'],
'name': project['name']
},
'created': node['_created'],
'updated': node['_updated'],
'node_type': node['node_type'],
'user': {
'_id': user['_id'],
'full_name': user['full_name']
},
}
if 'description' in node and node['description']:
node_ob['description'] = node['description']
if 'picture' in node and node['picture']:
files_collection = current_app.data.driver.db['files']
lookup = {'_id': ObjectId(node['picture'])}
picture = files_collection.find_one(lookup)
if picture['backend'] == 'gcs':
variation_t = next((item for item in picture['variations'] \
if item['size'] == 't'), None)
if variation_t:
node_ob['picture'] = generate_link(picture['backend'],
variation_t['file_path'], project_id=str(picture['project']),
is_public=True)
# If the node has world permissions, compute the Free permission
if 'permissions' in node and 'world' in node['permissions']:
if 'GET' in node['permissions']['world']:
node_ob['is_free'] = True
# Append the media key if the node is of node_type 'asset'
if node['node_type'] == 'asset':
node_ob['media'] = node['properties']['content_type']
# Add tags
if 'tags' in node['properties']:
node_ob['tags'] = node['properties']['tags']
current_app.algolia_index_nodes.save_object(node_ob)
@skip_when_testing
def algolia_index_node_delete(node):
if current_app.algolia_index_nodes is None:
return
current_app.algolia_index_nodes.delete_object(node['_id'])

View File

@@ -0,0 +1,224 @@
"""Generic authentication.
Contains functionality to validate tokens, create users and tokens, and make
unique usernames from emails. Calls out to the pillar_server.modules.blender_id
module for Blender ID communication.
"""
import logging
import datetime
from bson import tz_util
from flask import g
from flask import request
from flask import current_app
log = logging.getLogger(__name__)
def validate_token():
"""Validate the token provided in the request and populate the current_user
flask.g object, so that permissions and access to a resource can be defined
from it.
When the token is successfully validated, sets `g.current_user` to contain
the user information, otherwise it is set to None.
@returns True iff the user is logged in with a valid Blender ID token.
"""
if request.authorization:
token = request.authorization.username
oauth_subclient = request.authorization.password
else:
# Check the session, the user might be logged in through Flask-Login.
from pillar import auth
token = auth.get_blender_id_oauth_token()
if token and isinstance(token, (tuple, list)):
token = token[0]
oauth_subclient = None
if not token:
# If no authorization headers are provided, we are getting a request
# from a non logged in user. Proceed accordingly.
log.debug('No authentication headers, so not logged in.')
g.current_user = None
return False
return validate_this_token(token, oauth_subclient) is not None
def validate_this_token(token, oauth_subclient=None):
"""Validates a given token, and sets g.current_user.
:returns: the user in MongoDB, or None if not a valid token.
:rtype: dict
"""
g.current_user = None
_delete_expired_tokens()
# Check the users to see if there is one with this Blender ID token.
db_token = find_token(token, oauth_subclient)
if not db_token:
log.debug('Token %s not found in our local database.', token)
# If no valid token is found in our local database, we issue a new
# request to the Blender ID server to verify the validity of the token
# passed via the HTTP header. We will get basic user info if the user
# is authorized, and we will store the token in our local database.
from pillar.api import blender_id
db_user, status = blender_id.validate_create_user('', token, oauth_subclient)
else:
# log.debug("User is already in our database and token hasn't expired yet.")
users = current_app.data.driver.db['users']
db_user = users.find_one(db_token['user'])
if db_user is None:
log.debug('Validation failed, user not logged in')
return None
g.current_user = {'user_id': db_user['_id'],
'groups': db_user['groups'],
'roles': set(db_user.get('roles', []))}
return db_user
def find_token(token, is_subclient_token=False, **extra_filters):
"""Returns the token document, or None if it doesn't exist (or is expired)."""
tokens_collection = current_app.data.driver.db['tokens']
# TODO: remove expired tokens from collection.
lookup = {'token': token,
'is_subclient_token': True if is_subclient_token else {'$in': [False, None]},
'expire_time': {"$gt": datetime.datetime.now(tz=tz_util.utc)}}
lookup.update(extra_filters)
db_token = tokens_collection.find_one(lookup)
return db_token
def store_token(user_id, token, token_expiry, oauth_subclient_id=False):
"""Stores an authentication token.
:returns: the token document from MongoDB
"""
assert isinstance(token, (str, unicode)), 'token must be string type, not %r' % type(token)
token_data = {
'user': user_id,
'token': token,
'expire_time': token_expiry,
}
if oauth_subclient_id:
token_data['is_subclient_token'] = True
r, _, _, status = current_app.post_internal('tokens', token_data)
if status not in {200, 201}:
log.error('Unable to store authentication token: %s', r)
raise RuntimeError('Unable to store authentication token.')
token_data.update(r)
return token_data
def create_new_user(email, username, user_id):
"""Creates a new user in our local database.
@param email: the user's email
@param username: the username, which is also used as full name.
@param user_id: the user ID from the Blender ID server.
@returns: the user ID from our local database.
"""
user_data = create_new_user_document(email, user_id, username)
r = current_app.post_internal('users', user_data)
user_id = r[0]['_id']
return user_id
def create_new_user_document(email, user_id, username, provider='blender-id',
token=''):
"""Creates a new user document, without storing it in MongoDB. The token
parameter is a password in case provider is "local".
"""
user_data = {
'full_name': username,
'username': username,
'email': email,
'auth': [{
'provider': provider,
'user_id': str(user_id),
'token': token}],
'settings': {
'email_communications': 1
},
'groups': [],
}
return user_data
def make_unique_username(email):
"""Creates a unique username from the email address.
@param email: the email address
@returns: the new username
@rtype: str
"""
username = email.split('@')[0]
# Check for min length of username (otherwise validation fails)
username = "___{0}".format(username) if len(username) < 3 else username
users = current_app.data.driver.db['users']
user_from_username = users.find_one({'username': username})
if not user_from_username:
return username
# Username exists, make it unique by adding some number after it.
suffix = 1
while True:
unique_name = '%s%i' % (username, suffix)
user_from_username = users.find_one({'username': unique_name})
if user_from_username is None:
return unique_name
suffix += 1
def _delete_expired_tokens():
"""Deletes tokens that have expired.
For debugging, we keep expired tokens around for a few days, so that we
can determine that a token was expired rather than not created in the
first place. It also grants some leeway in clock synchronisation.
"""
token_coll = current_app.data.driver.db['tokens']
now = datetime.datetime.now(tz_util.utc)
expiry_date = now - datetime.timedelta(days=7)
result = token_coll.delete_many({'expire_time': {"$lt": expiry_date}})
# log.debug('Deleted %i expired authentication tokens', result.deleted_count)
def current_user_id():
"""None-safe fetching of user ID. Can return None itself, though."""
current_user = g.get('current_user') or {}
return current_user.get('user_id')
def setup_app(app):
@app.before_request
def validate_token_at_each_request():
validate_token()
return None

View File

@@ -0,0 +1,372 @@
import logging
import functools
from bson import ObjectId
from flask import g
from flask import abort
from flask import current_app
from werkzeug.exceptions import Forbidden
CHECK_PERMISSIONS_IMPLEMENTED_FOR = {'projects', 'nodes'}
log = logging.getLogger(__name__)
def check_permissions(collection_name, resource, method, append_allowed_methods=False,
check_node_type=None):
"""Check user permissions to access a node. We look up node permissions from
world to groups to users and match them with the computed user permissions.
If there is not match, we raise 403.
:param collection_name: name of the collection the resource comes from.
:param resource: resource from MongoDB
:type resource: dict
:param method: name of the requested HTTP method
:param append_allowed_methods: whether to return the list of allowed methods
in the resource. Only valid when method='GET'.
:param check_node_type: node type to check. Only valid when collection_name='projects'.
:type check_node_type: str
"""
if not has_permissions(collection_name, resource, method, append_allowed_methods,
check_node_type):
abort(403)
def compute_allowed_methods(collection_name, resource, check_node_type=None):
"""Computes the HTTP methods that are allowed on a given resource.
:param collection_name: name of the collection the resource comes from.
:param resource: resource from MongoDB
:type resource: dict
:param check_node_type: node type to check. Only valid when collection_name='projects'.
:type check_node_type: str
:returns: Set of allowed methods
:rtype: set
"""
# Check some input values.
if collection_name not in CHECK_PERMISSIONS_IMPLEMENTED_FOR:
raise ValueError('compute_allowed_methods only implemented for %s, not for %s',
CHECK_PERMISSIONS_IMPLEMENTED_FOR, collection_name)
if check_node_type is not None and collection_name != 'projects':
raise ValueError('check_node_type parameter is only valid for checking projects.')
computed_permissions = compute_aggr_permissions(collection_name, resource, check_node_type)
if not computed_permissions:
log.info('No permissions available to compute for resource %r',
resource.get('node_type', resource))
return set()
# Accumulate allowed methods from the user, group and world level.
allowed_methods = set()
current_user = g.current_user
if current_user:
# If the user is authenticated, proceed to compare the group permissions
for permission in computed_permissions.get('groups', ()):
if permission['group'] in current_user['groups']:
allowed_methods.update(permission['methods'])
for permission in computed_permissions.get('users', ()):
if current_user['user_id'] == permission['user']:
allowed_methods.update(permission['methods'])
# Check if the node is public or private. This must be set for non logged
# in users to see the content. For most BI projects this is on by default,
# while for private project this will not be set at all.
if 'world' in computed_permissions:
allowed_methods.update(computed_permissions['world'])
return allowed_methods
def has_permissions(collection_name, resource, method, append_allowed_methods=False,
check_node_type=None):
"""Check user permissions to access a node. We look up node permissions from
world to groups to users and match them with the computed user permissions.
:param collection_name: name of the collection the resource comes from.
:param resource: resource from MongoDB
:type resource: dict
:param method: name of the requested HTTP method
:param append_allowed_methods: whether to return the list of allowed methods
in the resource. Only valid when method='GET'.
:param check_node_type: node type to check. Only valid when collection_name='projects'.
:type check_node_type: str
:returns: True if the user has access, False otherwise.
:rtype: bool
"""
# Check some input values.
if append_allowed_methods and method != 'GET':
raise ValueError("append_allowed_methods only allowed with 'GET' method")
allowed_methods = compute_allowed_methods(collection_name, resource, check_node_type)
permission_granted = method in allowed_methods
if permission_granted:
if append_allowed_methods:
# TODO: rename this field _allowed_methods
if check_node_type:
node_type = next((node_type for node_type in resource['node_types']
if node_type['name'] == check_node_type))
assign_to = node_type
else:
assign_to = resource
assign_to['allowed_methods'] = list(set(allowed_methods))
return True
else:
log.debug('Permission denied, method %s not in allowed methods %s',
method, allowed_methods)
return False
def compute_aggr_permissions(collection_name, resource, check_node_type=None):
"""Returns a permissions dict."""
# We always need the know the project.
if collection_name == 'projects':
project = resource
if check_node_type is None:
return project['permissions']
node_type_name = check_node_type
else:
# Not a project, so it's a node.
assert 'project' in resource
assert 'node_type' in resource
node_type_name = resource['node_type']
if isinstance(resource['project'], dict):
# embedded project
project = resource['project']
else:
project_id = resource['project']
project = _find_project_node_type(project_id, node_type_name)
# Every node should have a project.
if project is None:
log.warning('Resource %s from "%s" refers to a project that does not exist.',
resource['_id'], collection_name)
raise Forbidden()
project_permissions = project['permissions']
# Find the node type from the project.
node_type = next((node_type for node_type in project['node_types']
if node_type['name'] == node_type_name), None)
if node_type is None: # This node type is not known, so doesn't give permissions.
node_type_permissions = {}
else:
node_type_permissions = node_type.get('permissions', {})
# For projects or specific node types in projects, we're done now.
if collection_name == 'projects':
return merge_permissions(project_permissions, node_type_permissions)
node_permissions = resource.get('permissions', {})
return merge_permissions(project_permissions, node_type_permissions, node_permissions)
def _find_project_node_type(project_id, node_type_name):
"""Returns the project with just the one named node type."""
# Cache result per request, as many nodes of the same project can be checked.
cache = g.get('_find_project_node_type_cache')
if cache is None:
cache = g._find_project_node_type_cache = {}
try:
return cache[(project_id, node_type_name)]
except KeyError:
pass
projects_collection = current_app.data.driver.db['projects']
project = projects_collection.find_one(
ObjectId(project_id),
{'permissions': 1,
'node_types': {'$elemMatch': {'name': node_type_name}},
'node_types.name': 1,
'node_types.permissions': 1})
cache[(project_id, node_type_name)] = project
return project
def merge_permissions(*args):
"""Merges all given permissions.
:param args: list of {'user': ..., 'group': ..., 'world': ...} dicts.
:returns: combined list of permissions.
"""
if not args:
return {}
if len(args) == 1:
return args[0]
effective = {}
# When testing we want stable results, and not be dependent on PYTHONHASH values etc.
if current_app.config['TESTING']:
maybe_sorted = sorted
else:
def maybe_sorted(arg):
return arg
def merge(field_name):
plural_name = field_name + 's'
from0 = args[0].get(plural_name, [])
from1 = args[1].get(plural_name, [])
asdict0 = {permission[field_name]: permission['methods'] for permission in from0}
asdict1 = {permission[field_name]: permission['methods'] for permission in from1}
keys = set(asdict0.keys() + asdict1.keys())
for key in maybe_sorted(keys):
methods0 = asdict0.get(key, [])
methods1 = asdict1.get(key, [])
methods = maybe_sorted(set(methods0).union(set(methods1)))
effective.setdefault(plural_name, []).append({field_name: key, u'methods': methods})
merge(u'user')
merge(u'group')
# Gather permissions for world
world0 = args[0].get('world', [])
world1 = args[1].get('world', [])
world_methods = set(world0).union(set(world1))
if world_methods:
effective[u'world'] = maybe_sorted(world_methods)
# Recurse for longer merges
if len(args) > 2:
return merge_permissions(effective, *args[2:])
return effective
def require_login(require_roles=set(),
require_all=False):
"""Decorator that enforces users to authenticate.
Optionally only allows access to users with a certain role.
:param require_roles: set of roles.
:param require_all:
When False (the default): if the user's roles have a
non-empty intersection with the given roles, access is granted.
When True: require the user to have all given roles before access is
granted.
"""
if not isinstance(require_roles, set):
raise TypeError('require_roles param should be a set, but is a %r' % type(require_roles))
if require_all and not require_roles:
raise ValueError('require_login(require_all=True) cannot be used with empty require_roles.')
def decorator(func):
@functools.wraps(func)
def wrapper(*args, **kwargs):
if not user_matches_roles(require_roles, require_all):
if g.current_user is None:
# We don't need to log at a higher level, as this is very common.
# Many browsers first try to see whether authentication is needed
# at all, before sending the password.
log.debug('Unauthenticated acces to %s attempted.', func)
else:
log.warning('User %s is authenticated, but does not have required roles %s to '
'access %s', g.current_user['user_id'], require_roles, func)
abort(403)
return func(*args, **kwargs)
return wrapper
return decorator
def ab_testing(require_roles=set(),
require_all=False):
"""Decorator that raises a 404 when the user doesn't match the roles..
:param require_roles: set of roles.
:param require_all:
When False (the default): if the user's roles have a
non-empty intersection with the given roles, access is granted.
When True: require the user to have all given roles before access is
granted.
"""
if not isinstance(require_roles, set):
raise TypeError('require_roles param should be a set, but is a %r' % type(require_roles))
if require_all and not require_roles:
raise ValueError('require_login(require_all=True) cannot be used with empty require_roles.')
def decorator(func):
@functools.wraps(func)
def wrapper(*args, **kwargs):
if not user_matches_roles(require_roles, require_all):
abort(404)
return func(*args, **kwargs)
return wrapper
return decorator
def user_has_role(role, user=None):
"""Returns True iff the user is logged in and has the given role."""
if user is None:
user = g.get('current_user')
if user is None:
return False
roles = user.get('roles') or ()
return role in roles
def user_matches_roles(require_roles=set(),
require_all=False):
"""Returns True iff the user's roles matches the query.
:param require_roles: set of roles.
:param require_all:
When False (the default): if the user's roles have a
non-empty intersection with the given roles, returns True.
When True: require the user to have all given roles before
returning True.
"""
if not isinstance(require_roles, set):
raise TypeError('require_roles param should be a set, but is a %r' % type(require_roles))
if require_all and not require_roles:
raise ValueError('require_login(require_all=True) cannot be used with empty require_roles.')
current_user = g.get('current_user')
if current_user is None:
return False
intersection = require_roles.intersection(current_user['roles'])
if require_all:
return len(intersection) == len(require_roles)
return not bool(require_roles) or bool(intersection)
def is_admin(user):
"""Returns True iff the given user has the admin role."""
return user_has_role(u'admin', user)

39
pillar/api/utils/cdn.py Normal file
View File

@@ -0,0 +1,39 @@
import datetime
from hashlib import md5
from flask import current_app
def hash_file_path(file_path, expiry_timestamp=None):
if not file_path.startswith('/'):
file_path = '/' + file_path
service_domain = current_app.config['CDN_SERVICE_DOMAIN']
domain_subfolder = current_app.config['CDN_CONTENT_SUBFOLDER']
asset_url = current_app.config['CDN_SERVICE_DOMAIN_PROTOCOL'] + \
'://' + \
service_domain + \
domain_subfolder + \
file_path
if current_app.config['CDN_USE_URL_SIGNING']:
url_signing_key = current_app.config['CDN_URL_SIGNING_KEY']
hash_string = domain_subfolder + file_path + url_signing_key
if not expiry_timestamp:
expiry_timestamp = datetime.datetime.now() + datetime.timedelta(hours=24)
expiry_timestamp = expiry_timestamp.strftime('%s')
hash_string = expiry_timestamp + hash_string
expiry_timestamp = "," + str(expiry_timestamp)
hashed_file_path = md5(hash_string).digest().encode('base64')[:-1]
hashed_file_path = hashed_file_path.replace('+', '-')
hashed_file_path = hashed_file_path.replace('/', '_')
asset_url = asset_url + \
'?secure=' + \
hashed_file_path + \
expiry_timestamp
return asset_url

View File

@@ -0,0 +1,54 @@
import logging
import os
from flask import current_app
log = logging.getLogger(__name__)
class Encoder:
"""Generic Encoder wrapper. Provides a consistent API, independent from
the encoding backend enabled.
"""
@staticmethod
def job_create(src_file):
"""Create an encoding job. Return the backend used as well as an id.
"""
if current_app.config['ENCODING_BACKEND'] != 'zencoder' or \
current_app.encoding_service_client is None:
log.error('I can only work with Zencoder, check the config file.')
return None
if src_file['backend'] != 'gcs':
log.error('Unable to work with storage backend %r', src_file['backend'])
return None
# Build the specific GCS input url, assuming the file is stored
# in the _ subdirectory
storage_base = "gcs://{0}/_/".format(src_file['project'])
file_input = os.path.join(storage_base, src_file['file_path'])
options = dict(notifications=current_app.config['ZENCODER_NOTIFICATIONS_URL'])
outputs = [{'format': v['format'],
'url': os.path.join(storage_base, v['file_path'])}
for v in src_file['variations']]
r = current_app.encoding_service_client.job.create(file_input,
outputs=outputs,
options=options)
if r.code != 201:
log.error('Error %i creating Zencoder job: %s', r.code, r.body)
return None
return {'process_id': r.body['id'],
'backend': 'zencoder'}
@staticmethod
def job_progress(job_id):
from zencoder import Zencoder
if isinstance(current_app.encoding_service_client, Zencoder):
r = current_app.encoding_service_client.job.progress(int(job_id))
return r.body
else:
return None

222
pillar/api/utils/gcs.py Normal file
View File

@@ -0,0 +1,222 @@
import os
import time
import datetime
import logging
from bson import ObjectId
from gcloud.storage.client import Client
from gcloud.exceptions import NotFound
from flask import current_app, g
from werkzeug.local import LocalProxy
log = logging.getLogger(__name__)
def get_client():
"""Stores the GCS client on the global Flask object.
The GCS client is not user-specific anyway.
"""
_gcs = getattr(g, '_gcs_client', None)
if _gcs is None:
_gcs = g._gcs_client = Client()
return _gcs
# This hides the specifics of how/where we store the GCS client,
# and allows the rest of the code to use 'gcs' as a simple variable
# that does the right thing.
gcs = LocalProxy(get_client)
class GoogleCloudStorageBucket(object):
"""Cloud Storage bucket interface. We create a bucket for every project. In
the bucket we create first level subdirs as follows:
- '_' (will contain hashed assets, and stays on top of default listing)
- 'svn' (svn checkout mirror)
- 'shared' (any additional folder of static folder that is accessed via a
node of 'storage' node_type)
:type bucket_name: string
:param bucket_name: Name of the bucket.
:type subdir: string
:param subdir: The local entry point to browse the bucket.
"""
def __init__(self, bucket_name, subdir='_/'):
try:
self.bucket = gcs.get_bucket(bucket_name)
except NotFound:
self.bucket = gcs.bucket(bucket_name)
# Hardcode the bucket location to EU
self.bucket.location = 'EU'
# Optionally enable CORS from * (currently only used for vrview)
# self.bucket.cors = [
# {
# "origin": ["*"],
# "responseHeader": ["Content-Type"],
# "method": ["GET", "HEAD", "DELETE"],
# "maxAgeSeconds": 3600
# }
# ]
self.bucket.create()
self.subdir = subdir
def List(self, path=None):
"""Display the content of a subdir in the project bucket. If the path
points to a file the listing is simply empty.
:type path: string
:param path: The relative path to the directory or asset.
"""
if path and not path.endswith('/'):
path += '/'
prefix = os.path.join(self.subdir, path)
fields_to_return = 'nextPageToken,items(name,size,contentType),prefixes'
req = self.bucket.list_blobs(fields=fields_to_return, prefix=prefix,
delimiter='/')
files = []
for f in req:
filename = os.path.basename(f.name)
if filename != '': # Skip own folder name
files.append(dict(
path=os.path.relpath(f.name, self.subdir),
text=filename,
type=f.content_type))
directories = []
for dir_path in req.prefixes:
directory_name = os.path.basename(os.path.normpath(dir_path))
directories.append(dict(
text=directory_name,
path=os.path.relpath(dir_path, self.subdir),
type='group_storage',
children=True))
# print os.path.basename(os.path.normpath(path))
list_dict = dict(
name=os.path.basename(os.path.normpath(path)),
type='group_storage',
children=files + directories
)
return list_dict
def blob_to_dict(self, blob):
blob.reload()
expiration = datetime.datetime.now() + datetime.timedelta(days=1)
expiration = int(time.mktime(expiration.timetuple()))
return dict(
updated=blob.updated,
name=os.path.basename(blob.name),
size=blob.size,
content_type=blob.content_type,
signed_url=blob.generate_signed_url(expiration),
public_url=blob.public_url)
def Get(self, path, to_dict=True):
"""Get selected file info if the path matches.
:type path: string
:param path: The relative path to the file.
:type to_dict: bool
:param to_dict: Return the object as a dictionary.
"""
path = os.path.join(self.subdir, path)
blob = self.bucket.blob(path)
if blob.exists():
if to_dict:
return self.blob_to_dict(blob)
else:
return blob
else:
return None
def Post(self, full_path, path=None):
"""Create new blob and upload data to it.
"""
path = path if path else os.path.join('_', os.path.basename(full_path))
blob = self.bucket.blob(path)
if blob.exists():
return None
blob.upload_from_filename(full_path)
return blob
# return self.blob_to_dict(blob) # Has issues with threading
def Delete(self, path):
"""Delete blob (when removing an asset or replacing a preview)"""
# We want to get the actual blob to delete
blob = self.Get(path, to_dict=False)
try:
blob.delete()
return True
except NotFound:
return None
def update_name(self, blob, name):
"""Set the ContentDisposition metadata so that when a file is downloaded
it has a human-readable name.
"""
blob.content_disposition = u'attachment; filename="{0}"'.format(name)
blob.patch()
def update_file_name(node):
"""Assign to the CGS blob the same name of the asset node. This way when
downloading an asset we get a human-readable name.
"""
# Process only files that are not processing
if node['properties'].get('status', '') == 'processing':
return
def _format_name(name, override_ext, size=None, map_type=u''):
root, _ = os.path.splitext(name)
size = u'-{}'.format(size) if size else u''
map_type = u'-{}'.format(map_type) if map_type else u''
return u'{}{}{}{}'.format(root, size, map_type, override_ext)
def _update_name(file_id, file_props):
files_collection = current_app.data.driver.db['files']
file_doc = files_collection.find_one({'_id': ObjectId(file_id)})
if file_doc is None or file_doc.get('backend') != 'gcs':
return
# For textures -- the map type should be part of the name.
map_type = file_props.get('map_type', u'')
storage = GoogleCloudStorageBucket(str(node['project']))
blob = storage.Get(file_doc['file_path'], to_dict=False)
# Pick file extension from original filename
_, ext = os.path.splitext(file_doc['filename'])
name = _format_name(node['name'], ext, map_type=map_type)
storage.update_name(blob, name)
# Assign the same name to variations
for v in file_doc.get('variations', []):
_, override_ext = os.path.splitext(v['file_path'])
name = _format_name(node['name'], override_ext, v['size'], map_type=map_type)
blob = storage.Get(v['file_path'], to_dict=False)
if blob is None:
log.info('Unable to find blob for file %s in project %s. This can happen if the '
'video encoding is still processing.', v['file_path'], node['project'])
continue
storage.update_name(blob, name)
# Currently we search for 'file' and 'files' keys in the object properties.
# This could become a bit more flexible and realy on a true reference of the
# file object type from the schema.
if 'file' in node['properties']:
_update_name(node['properties']['file'], {})
if 'files' in node['properties']:
for file_props in node['properties']['files']:
_update_name(file_props['file'], file_props)

203
pillar/api/utils/imaging.py Normal file
View File

@@ -0,0 +1,203 @@
import os
import json
import subprocess
from PIL import Image
from flask import current_app
def generate_local_thumbnails(name_base, src):
"""Given a source image, use Pillow to generate thumbnails according to the
application settings.
:param name_base: the thumbnail will get a field 'name': '{basename}-{thumbsize}.jpg'
:type name_base: str
:param src: the path of the image to be thumbnailed
:type src: str
"""
thumbnail_settings = current_app.config['UPLOADS_LOCAL_STORAGE_THUMBNAILS']
thumbnails = []
save_to_base, _ = os.path.splitext(src)
name_base, _ = os.path.splitext(name_base)
for size, settings in thumbnail_settings.iteritems():
dst = '{0}-{1}{2}'.format(save_to_base, size, '.jpg')
name = '{0}-{1}{2}'.format(name_base, size, '.jpg')
if settings['crop']:
resize_and_crop(src, dst, settings['size'])
width, height = settings['size']
else:
im = Image.open(src).convert('RGB')
im.thumbnail(settings['size'])
im.save(dst, "JPEG")
width, height = im.size
thumb_info = {'size': size,
'file_path': name,
'local_path': dst,
'length': os.stat(dst).st_size,
'width': width,
'height': height,
'md5': '',
'content_type': 'image/jpeg'}
if size == 't':
thumb_info['is_public'] = True
thumbnails.append(thumb_info)
return thumbnails
def resize_and_crop(img_path, modified_path, size, crop_type='middle'):
"""
Resize and crop an image to fit the specified size. Thanks to:
https://gist.github.com/sigilioso/2957026
args:
img_path: path for the image to resize.
modified_path: path to store the modified image.
size: `(width, height)` tuple.
crop_type: can be 'top', 'middle' or 'bottom', depending on this
value, the image will cropped getting the 'top/left', 'middle' or
'bottom/right' of the image to fit the size.
raises:
Exception: if can not open the file in img_path of there is problems
to save the image.
ValueError: if an invalid `crop_type` is provided.
"""
# If height is higher we resize vertically, if not we resize horizontally
img = Image.open(img_path).convert('RGB')
# Get current and desired ratio for the images
img_ratio = img.size[0] / float(img.size[1])
ratio = size[0] / float(size[1])
# The image is scaled/cropped vertically or horizontally depending on the ratio
if ratio > img_ratio:
img = img.resize((size[0], int(round(size[0] * img.size[1] / img.size[0]))),
Image.ANTIALIAS)
# Crop in the top, middle or bottom
if crop_type == 'top':
box = (0, 0, img.size[0], size[1])
elif crop_type == 'middle':
box = (0, int(round((img.size[1] - size[1]) / 2)), img.size[0],
int(round((img.size[1] + size[1]) / 2)))
elif crop_type == 'bottom':
box = (0, img.size[1] - size[1], img.size[0], img.size[1])
else:
raise ValueError('ERROR: invalid value for crop_type')
img = img.crop(box)
elif ratio < img_ratio:
img = img.resize((int(round(size[1] * img.size[0] / img.size[1])), size[1]),
Image.ANTIALIAS)
# Crop in the top, middle or bottom
if crop_type == 'top':
box = (0, 0, size[0], img.size[1])
elif crop_type == 'middle':
box = (int(round((img.size[0] - size[0]) / 2)), 0,
int(round((img.size[0] + size[0]) / 2)), img.size[1])
elif crop_type == 'bottom':
box = (img.size[0] - size[0], 0, img.size[0], img.size[1])
else:
raise ValueError('ERROR: invalid value for crop_type')
img = img.crop(box)
else:
img = img.resize((size[0], size[1]),
Image.ANTIALIAS)
# If the scale is the same, we do not need to crop
img.save(modified_path, "JPEG")
def get_video_data(filepath):
"""Return video duration and resolution given an input file path"""
outdata = None
ffprobe_inspect = [
current_app.config['BIN_FFPROBE'],
'-loglevel',
'error',
'-show_streams',
filepath,
'-print_format',
'json']
ffprobe_ouput = json.loads(subprocess.check_output(ffprobe_inspect))
video_stream = None
# Loop throught audio and video streams searching for the video
for stream in ffprobe_ouput['streams']:
if stream['codec_type'] == 'video':
video_stream = stream
if video_stream:
# If video is webm we can't get the duration (seems to be an ffprobe
# issue)
if video_stream['codec_name'] == 'vp8':
duration = None
else:
duration = int(float(video_stream['duration']))
outdata = dict(
duration=duration,
res_x=video_stream['width'],
res_y=video_stream['height'],
)
if video_stream['sample_aspect_ratio'] != '1:1':
print '[warning] Pixel aspect ratio is not square!'
return outdata
def ffmpeg_encode(src, format, res_y=720):
# The specific FFMpeg command, called multiple times
args = []
args.append("-i")
args.append(src)
if format == 'mp4':
# Example mp4 encoding
# ffmpeg -i INPUT -vcodec libx264 -pix_fmt yuv420p -preset fast -crf 20
# -acodec libfdk_aac -ab 112k -ar 44100 -movflags +faststart OUTPUT
args.extend([
'-threads', '1',
'-vf', 'scale=-2:{0}'.format(res_y),
'-vcodec', 'libx264',
'-pix_fmt', 'yuv420p',
'-preset', 'fast',
'-crf', '20',
'-acodec', 'libfdk_aac', '-ab', '112k', '-ar', '44100',
'-movflags', '+faststart'])
elif format == 'webm':
# Example webm encoding
# ffmpeg -i INPUT -vcodec libvpx -g 120 -lag-in-frames 16 -deadline good
# -cpu-used 0 -vprofile 0 -qmax 51 -qmin 11 -slices 4 -b:v 2M -f webm
args.extend([
'-vf', 'scale=-2:{0}'.format(res_y),
'-vcodec', 'libvpx',
'-g', '120',
'-lag-in-frames', '16',
'-deadline', 'good',
'-cpu-used', '0',
'-vprofile', '0',
'-qmax', '51', '-qmin', '11', '-slices', '4', '-b:v', '2M',
# '-acodec', 'libmp3lame', '-ab', '112k', '-ar', '44100',
'-f', 'webm'])
if not os.environ.get('VERBOSE'):
args.extend(['-loglevel', 'quiet'])
dst = os.path.splitext(src)
dst = "{0}-{1}p.{2}".format(dst[0], res_y, format)
args.append(dst)
print "Encoding {0} to {1}".format(src, format)
returncode = subprocess.call([current_app.config['BIN_FFMPEG']] + args)
if returncode == 0:
print "Successfully encoded {0}".format(dst)
else:
print "Error during encode"
print "Code: {0}".format(returncode)
print "Command: {0}".format(current_app.config['BIN_FFMPEG'] + " " + " ".join(args))
dst = None
# return path of the encoded video
return dst

29
pillar/api/utils/mongo.py Normal file
View File

@@ -0,0 +1,29 @@
"""Utility functions for MongoDB stuff."""
from bson import ObjectId
from flask import current_app
from werkzeug.exceptions import NotFound
def find_one_or_404(collection_name, object_id,
projection=None):
"""Returns the found object from the collection, or raises a NotFound exception.
:param collection_name: name of the collection, such as 'users' or 'files'
:type collection_name: str
:param object_id: ID of the object to find.
:type object_id: str or bson.ObjectId
:returns: the found object
:rtype: dict
:raises: werkzeug.exceptions.NotFound
"""
collection = current_app.data.driver.db[collection_name]
found = collection.find_one(ObjectId(object_id),
projection=projection)
if found is None:
raise NotFound()
return found

View File

@@ -0,0 +1,83 @@
import subprocess
import os
from flask import current_app
from pillar.api.utils.gcs import GoogleCloudStorageBucket
def get_sizedata(filepath):
outdata = dict(
size=int(os.stat(filepath).st_size)
)
return outdata
def rsync(path, remote_dir=''):
BIN_SSH = current_app.config['BIN_SSH']
BIN_RSYNC = current_app.config['BIN_RSYNC']
DRY_RUN = False
arguments = ['--verbose', '--ignore-existing', '--recursive', '--human-readable']
logs_path = current_app.config['CDN_SYNC_LOGS']
storage_address = current_app.config['CDN_STORAGE_ADDRESS']
user = current_app.config['CDN_STORAGE_USER']
rsa_key_path = current_app.config['CDN_RSA_KEY']
known_hosts_path = current_app.config['CDN_KNOWN_HOSTS']
if DRY_RUN:
arguments.append('--dry-run')
folder_arguments = list(arguments)
if rsa_key_path:
folder_arguments.append(
'-e ' + BIN_SSH + ' -i ' + rsa_key_path + ' -o "StrictHostKeyChecking=no"')
# if known_hosts_path:
# folder_arguments.append("-o UserKnownHostsFile " + known_hosts_path)
folder_arguments.append("--log-file=" + logs_path + "/rsync.log")
folder_arguments.append(path)
folder_arguments.append(user + "@" + storage_address + ":/public/" + remote_dir)
# print (folder_arguments)
devnull = open(os.devnull, 'wb')
# DEBUG CONFIG
# print folder_arguments
# proc = subprocess.Popen(['rsync'] + folder_arguments)
# stdout, stderr = proc.communicate()
subprocess.Popen(['nohup', BIN_RSYNC] + folder_arguments, stdout=devnull, stderr=devnull)
def remote_storage_sync(path): # can be both folder and file
if os.path.isfile(path):
filename = os.path.split(path)[1]
rsync(path, filename[:2] + '/')
else:
if os.path.exists(path):
rsync(path)
else:
raise IOError('ERROR: path not found')
def push_to_storage(project_id, full_path, backend='cgs'):
"""Move a file from temporary/processing local storage to a storage endpoint.
By default we store items in a Google Cloud Storage bucket named after the
project id.
"""
def push_single_file(project_id, full_path, backend):
if backend == 'cgs':
storage = GoogleCloudStorageBucket(project_id, subdir='_')
blob = storage.Post(full_path)
# XXX Make public on the fly if it's an image and small preview.
# This should happen by reading the database (push to storage
# should change to accomodate it).
if blob is not None and full_path.endswith('-t.jpg'):
blob.make_public()
os.remove(full_path)
if os.path.isfile(full_path):
push_single_file(project_id, full_path, backend)
else:
if os.path.exists(full_path):
for root, dirs, files in os.walk(full_path):
for name in files:
push_single_file(project_id, os.path.join(root, name), backend)
else:
raise IOError('ERROR: path not found')