Google Cloud Storage support

We introduce a new node_type, storage, which can support different file
storage backends and makes them available for browsing via dedicated
entry points in the application. We intend to keep pillar storage
agnostic and support both internal/local storages as well as
remote/hosted solutions.
This commit is contained in:
Francesco Siddi 2015-10-03 17:07:14 +02:00
parent 89a9509657
commit 53f881e96c
6 changed files with 222 additions and 7 deletions

View File

@ -9,6 +9,9 @@ git \
nano \
zlib1g-dev \
libjpeg-dev \
python-crypto \
python-openssl \
libssl-dev \
software-properties-common
RUN add-apt-repository ppa:mc3man/trusty-media \

View File

@ -1,22 +1,50 @@
import os
import json
from multiprocessing import Process
from bson import ObjectId
from flask import request
from flask import Blueprint
from flask import abort
from flask import jsonify
from application import app
from application import db
from application import post_item
from application.utils.imaging import generate_local_thumbnails
from application.utils.imaging import get_video_data
from application.utils.imaging import ffmpeg_encode
from application.utils.storage import remote_storage_sync
from application.utils.gcs import GoogleCloudStorageBucket
file_storage = Blueprint('file_storage', __name__,
template_folder='templates',
static_folder='../../static/storage',)
@file_storage.route('/gcs/<bucket_name>/<subdir>/')
@file_storage.route('/gcs/<bucket_name>/<subdir>/<path:file_path>')
def browse_gcs(bucket_name, subdir, file_path=None):
"""Browse the content of a Google Cloud Storage bucket"""
# Initialize storage client
storage = GoogleCloudStorageBucket(bucket_name, subdir=subdir)
if file_path:
# If we provided a file_path, we try to fetch it
file_object = storage.Get(file_path)
if file_object:
# If it exists, return file properties in a dictionary
return jsonify(file_object)
else:
listing = storage.List(file_path)
return jsonify(listing)
# We always return an empty listing even if the directory does not
# exist. This can be changed later.
# return abort(404)
else:
listing = storage.List('')
return jsonify(listing)
@file_storage.route('/build_thumbnails/<path:file_path>')
def build_thumbnails(file_path=None, file_id=None):
if file_path:
@ -180,7 +208,7 @@ def process_file(src_file):
p = Process(target=encode, args=(file_abs_path, variations, res_y))
p.start()
if mime_type != 'video':
# Sync the whole subfolder
# Sync the whole subdir
sync_path = os.path.split(file_abs_path)[0]
else:
sync_path = file_abs_path

View File

@ -0,0 +1,112 @@
import os
import time
import datetime
from gcloud.storage.client import Client
from oauth2client.client import SignedJwtAssertionCredentials
from application import app
class GoogleCloudStorageBucket(object):
"""Cloud Storage bucket interface. We create a bucket for every project. In
the bucket we create first level subdirs as follows:
- '_' (will contain hashed assets, and stays on top of defaul listing)
- 'svn' (svn checkout mirror)
- 'shared' (any additional folder of static folder that is accessed via a
node of 'storage' node_type)
:type bucket_name: string
:param bucket_name: Name of the bucket.
:type subdir: string
:param subdir: The local entrypoint to browse the bucket.
"""
def __init__(self, bucket_name, subdir='_/'):
CGS_PROJECT_NAME = app.config['CGS_PROJECT_NAME']
GCS_CLIENT_EMAIL = app.config['GCS_CLIENT_EMAIL']
GCS_PRIVATE_KEY_PEM = app.config['GCS_PRIVATE_KEY_PEM']
GCS_PRIVATE_KEY_P12 = app.config['GCS_PRIVATE_KEY_P12']
# Load private key in pem format (used by the API)
with open(GCS_PRIVATE_KEY_PEM) as f:
private_key_pem = f.read()
credentials_pem = SignedJwtAssertionCredentials(GCS_CLIENT_EMAIL,
private_key_pem,
'https://www.googleapis.com/auth/devstorage.read_write')
# Load private key in p12 format (used by the singed urls generator)
with open(GCS_PRIVATE_KEY_P12) as f:
private_key_pkcs12 = f.read()
self.credentials_p12 = SignedJwtAssertionCredentials(GCS_CLIENT_EMAIL,
private_key_pkcs12,
'https://www.googleapis.com/auth/devstorage.read_write')
gcs = Client(project=CGS_PROJECT_NAME, credentials=credentials_pem)
self.bucket = gcs.get_bucket(bucket_name)
self.subdir = subdir
def List(self, path=None):
"""Display the content of a subdir in the project bucket. If the path
points to a file the listing is simply empty.
:type path: string
:param path: The relative path to the directory or asset.
"""
if path and not path.endswith('/'):
path += '/'
prefix = os.path.join(self.subdir, path)
fields_to_return = 'nextPageToken,items(name,size,contentType),prefixes'
req = self.bucket.list_blobs(fields=fields_to_return, prefix=prefix,
delimiter='/')
files = []
for f in req:
filename = os.path.basename(f.name)
if filename != '': # Skip own folder name
files.append(dict(
path=os.path.relpath(f.name, self.subdir),
text=filename,
type=f.content_type))
directories = []
for dir_path in req.prefixes:
directory_name = os.path.basename(os.path.normpath(dir_path))
directories.append(dict(
text=directory_name,
path=os.path.relpath(dir_path, self.subdir),
type='group_storage',
children=True))
# print os.path.basename(os.path.normpath(path))
list_dict = dict(
name=os.path.basename(os.path.normpath(path)),
type='group_storage',
children = files + directories
)
return list_dict
def Get(self, path):
"""Get selected file info if the path matches.
:type path: string
:param path: The relative path to the file.
"""
path = os.path.join(self.subdir, path)
f = self.bucket.blob(path)
if f.exists():
f.reload()
expiration = datetime.datetime.now() + datetime.timedelta(days=1)
expiration = int(time.mktime(expiration.timetuple()))
file_dict = dict(
updated=f.updated,
name=os.path.basename(f.name),
size=f.size,
content_type=f.content_type,
signed_url=f.generate_signed_url(expiration, credentials=self.credentials_p12))
return file_dict
else:
return None

View File

@ -13,13 +13,21 @@ class Config(object):
class Development(Config):
FILE_STORAGE = '{0}/application/static/storage'.format(
os.path.join(os.path.dirname(__file__)))
SHARED_DIR = '/storage/shared'
PORT = 5000
HOST = '0.0.0.0'
DEBUG = True
CDN_USE_URL_SIGNING = False
CDN_SERVICE_DOMAIN_PROTOCOL = 'https'
CDN_SERVICE_DOMAIN = ''
CDN_CONTENT_SUBFOLDER = ''
CDN_URL_SIGNING_KEY = ''
CDN_STORAGE_USER = ''
CDN_STORAGE_ADDRESS = ''
CDN_SYNC_LOGS = ''
CDN_RSA_KEY = ''
CDN_KNOWN_HOSTS = ''
UPLOADS_LOCAL_STORAGE_THUMBNAILS = {
's': {'size': (90, 90), 'crop': True},
@ -29,3 +37,16 @@ class Development(Config):
'l': {'size': (1024, 1024), 'crop': False},
'h': {'size': (2048, 2048), 'crop': False}
}
BIN_FFPROBE ='/usr/bin/ffprobe'
BIN_FFMPEG = '/usr/bin/ffmpeg'
BIN_SSH = '/usr/bin/ssh'
BIN_RSYNC = '/usr/bin/rsync'
GCS_CLIENT_EMAIL = ''
GCS_PRIVATE_KEY_P12 = ''
GCS_PRIVATE_KEY_PEM = ''
CGS_PROJECT_NAME = ''
class Deployment(Development): pass

View File

@ -636,6 +636,40 @@ def populate_node_types(old_ids={}):
}
}
node_type_storage = {
'name': 'storage',
'description': 'Entrypoint to a remote or local storage solution',
'dyn_schema': {
# The project ID, use for lookups in the storage backend. For example
# when using Google Cloud Storage, the project id will be the name
# of the bucket.
'project': {
'type': 'objectid',
'data_relation': {
'resource': 'nodes',
'field': '_id'
},
},
# The entry point in a subdirectory of the main storage for the project
'subdir': {
'type': 'string',
},
# Which backend is used to store the files (gcs, pillar, bam, cdnsun)
'backend': {
'type': 'string',
},
},
'form_schema': {
'subdir': {},
'project': {},
'backend': {}
},
'parent': {
"node_types": ["group", "project"]
}
}
from pymongo import MongoClient
client = MongoClient(MONGO_HOST, 27017)
@ -671,6 +705,7 @@ def populate_node_types(old_ids={}):
# upgrade(comment_node_type, old_ids)
upgrade(project_node_type, old_ids)
upgrade(asset_node_type, old_ids)
upgrade(node_type_storage, old_ids)
@manager.command

View File

@ -1,17 +1,33 @@
Cerberus==0.8
cffi==1.2.1
cryptography==1.0.2
enum34==1.0.4
Eve==0.5.3
Events==0.2.1
Flask==0.10.1
Flask-PyMongo==0.3.0
Flask-Script==2.0.5
gcloud==0.7.1
google-apitools==0.4.11
httplib2==0.9.2
idna==2.0
ipaddress==1.0.14
itsdangerous==0.24
Jinja2==2.7.3
MarkupSafe==0.23
oauth2client==1.5.1
Pillow==2.8.1
Werkzeug==0.10.1
argparse==1.2.1
distribute==0.6.24
itsdangerous==0.24
protobuf==3.0.0a1
protorpc==0.11.1
pyasn1==0.1.9
pyasn1-modules==0.0.8
pycparser==2.14
pycrypto==2.6.1
pymongo==2.8
pyOpenSSL==0.15.1
requests==2.6.0
rsa==3.2
simplejson==3.6.5
wsgiref==0.1.2
six==1.9.0
Werkzeug==0.10.1
wheel==0.24.0