Filename hashing on GCS
This commit is contained in:
parent
401bfeea98
commit
92970d5b6a
@ -2,7 +2,7 @@ import datetime
|
|||||||
import logging
|
import logging
|
||||||
import os
|
import os
|
||||||
import tempfile
|
import tempfile
|
||||||
import multiprocessing
|
import uuid
|
||||||
from hashlib import md5
|
from hashlib import md5
|
||||||
|
|
||||||
import bson.tz_util
|
import bson.tz_util
|
||||||
@ -12,14 +12,14 @@ from bson import ObjectId
|
|||||||
from eve.methods.patch import patch_internal
|
from eve.methods.patch import patch_internal
|
||||||
from eve.methods.post import post_internal
|
from eve.methods.post import post_internal
|
||||||
from eve.methods.put import put_internal
|
from eve.methods.put import put_internal
|
||||||
from flask import Blueprint, safe_join
|
from flask import Blueprint
|
||||||
from flask import jsonify
|
from flask import jsonify
|
||||||
from flask import request
|
from flask import request
|
||||||
from flask import send_from_directory
|
from flask import send_from_directory
|
||||||
from flask import url_for, helpers
|
from flask import url_for, helpers
|
||||||
from flask import current_app
|
from flask import current_app
|
||||||
from flask import g
|
from flask import g
|
||||||
from werkzeug.exceptions import UnprocessableEntity, NotFound, InternalServerError, Forbidden
|
from werkzeug.exceptions import NotFound, InternalServerError
|
||||||
|
|
||||||
from application import utils
|
from application import utils
|
||||||
from application.utils import remove_private_keys
|
from application.utils import remove_private_keys
|
||||||
@ -27,10 +27,7 @@ from application.utils.authorization import require_login
|
|||||||
from application.utils.cdn import hash_file_path
|
from application.utils.cdn import hash_file_path
|
||||||
from application.utils.encoding import Encoder
|
from application.utils.encoding import Encoder
|
||||||
from application.utils.gcs import GoogleCloudStorageBucket
|
from application.utils.gcs import GoogleCloudStorageBucket
|
||||||
from application.utils.imaging import ffmpeg_encode
|
|
||||||
from application.utils.imaging import generate_local_thumbnails
|
from application.utils.imaging import generate_local_thumbnails
|
||||||
from application.utils.imaging import get_video_data
|
|
||||||
from application.utils.storage import push_to_storage
|
|
||||||
|
|
||||||
log = logging.getLogger(__name__)
|
log = logging.getLogger(__name__)
|
||||||
|
|
||||||
@ -139,7 +136,7 @@ def _process_video(gcs, file_id, local_file, src_file):
|
|||||||
log.info('Processing video for file %s', file_id)
|
log.info('Processing video for file %s', file_id)
|
||||||
|
|
||||||
# Create variations
|
# Create variations
|
||||||
root, _ = os.path.splitext(src_file['filename'])
|
root, _ = os.path.splitext(src_file['file_path'])
|
||||||
src_file['variations'] = []
|
src_file['variations'] = []
|
||||||
|
|
||||||
for v in ('mp4', 'webm'):
|
for v in ('mp4', 'webm'):
|
||||||
@ -147,7 +144,7 @@ def _process_video(gcs, file_id, local_file, src_file):
|
|||||||
file_variation = dict(
|
file_variation = dict(
|
||||||
format=v,
|
format=v,
|
||||||
content_type='video/{}'.format(v),
|
content_type='video/{}'.format(v),
|
||||||
file_path='{}.{}'.format(root, v),
|
file_path='{}-{}.{}'.format(root, v, v),
|
||||||
size='',
|
size='',
|
||||||
duration=0,
|
duration=0,
|
||||||
width=0,
|
width=0,
|
||||||
@ -452,7 +449,7 @@ def stream_to_gcs(project_id):
|
|||||||
if not project:
|
if not project:
|
||||||
raise NotFound('Project %s does not exist' % project_id)
|
raise NotFound('Project %s does not exist' % project_id)
|
||||||
|
|
||||||
file_id, fname, status = create_file_doc_for_upload(project['_id'], uploaded_file)
|
file_id, internal_fname, status = create_file_doc_for_upload(project['_id'], uploaded_file)
|
||||||
|
|
||||||
if uploaded_file.content_type.startswith('image/'):
|
if uploaded_file.content_type.startswith('image/'):
|
||||||
# We need to do local thumbnailing, so we have to write the stream
|
# We need to do local thumbnailing, so we have to write the stream
|
||||||
@ -468,7 +465,7 @@ def stream_to_gcs(project_id):
|
|||||||
# Upload the file to GCS.
|
# Upload the file to GCS.
|
||||||
try:
|
try:
|
||||||
gcs = GoogleCloudStorageBucket(project_id)
|
gcs = GoogleCloudStorageBucket(project_id)
|
||||||
blob = gcs.bucket.blob('_/' + fname, chunk_size=256 * 1024 * 2)
|
blob = gcs.bucket.blob('_/' + internal_fname, chunk_size=256 * 1024 * 2)
|
||||||
blob.upload_from_file(stream_for_gcs,
|
blob.upload_from_file(stream_for_gcs,
|
||||||
content_type=uploaded_file.mimetype,
|
content_type=uploaded_file.mimetype,
|
||||||
size=uploaded_file.content_length)
|
size=uploaded_file.content_length)
|
||||||
@ -482,7 +479,7 @@ def stream_to_gcs(project_id):
|
|||||||
blob.reload()
|
blob.reload()
|
||||||
update_file_doc(file_id,
|
update_file_doc(file_id,
|
||||||
status='queued_for_processing',
|
status='queued_for_processing',
|
||||||
file_path=fname,
|
file_path=internal_fname,
|
||||||
length=blob.size)
|
length=blob.size)
|
||||||
|
|
||||||
process_file(gcs, file_id, local_file)
|
process_file(gcs, file_id, local_file)
|
||||||
@ -491,7 +488,7 @@ def stream_to_gcs(project_id):
|
|||||||
if local_file is not None:
|
if local_file is not None:
|
||||||
local_file.close()
|
local_file.close()
|
||||||
|
|
||||||
log.debug('Handled uploaded file id=%s, fname=%s, size=%i', file_id, fname, blob.size)
|
log.debug('Handled uploaded file id=%s, fname=%s, size=%i', file_id, internal_fname, blob.size)
|
||||||
|
|
||||||
# Status is 200 if the file already existed, and 201 if it was newly created.
|
# Status is 200 if the file already existed, and 201 if it was newly created.
|
||||||
return jsonify(status='ok', file_id=str(file_id)), status
|
return jsonify(status='ok', file_id=str(file_id)), status
|
||||||
@ -514,19 +511,24 @@ def create_file_doc_for_upload(project_id, uploaded_file):
|
|||||||
|
|
||||||
:param uploaded_file: file from request.files['form-key']
|
:param uploaded_file: file from request.files['form-key']
|
||||||
:type uploaded_file: werkzeug.datastructures.FileStorage
|
:type uploaded_file: werkzeug.datastructures.FileStorage
|
||||||
:returns: a tuple (file_id, filename, status), where 'filename' is the secured
|
:returns: a tuple (file_id, filename, status), where 'filename' is the internal
|
||||||
name stored in file_doc['filename'].
|
filename used on GCS.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
project_id = ObjectId(project_id)
|
project_id = ObjectId(project_id)
|
||||||
|
|
||||||
# TODO: hash the filename with path info to get the internal name.
|
# Hash the filename with path info to get the internal name. This should
|
||||||
internal_filename = uploaded_file.filename
|
# be unique for the project.
|
||||||
|
# internal_filename = uploaded_file.filename
|
||||||
|
_, ext = os.path.splitext(uploaded_file.filename)
|
||||||
|
internal_filename = uuid.uuid4().hex + ext
|
||||||
|
|
||||||
# See if we can find a pre-existing file doc
|
# For now, we don't support overwriting files, and create a new one every time.
|
||||||
files = current_app.data.driver.db['files']
|
# # See if we can find a pre-existing file doc.
|
||||||
file_doc = files.find_one({'project': project_id,
|
# files = current_app.data.driver.db['files']
|
||||||
'name': internal_filename})
|
# file_doc = files.find_one({'project': project_id,
|
||||||
|
# 'name': internal_filename})
|
||||||
|
file_doc = None
|
||||||
|
|
||||||
# TODO: at some point do name-based and content-based content-type sniffing.
|
# TODO: at some point do name-based and content-based content-type sniffing.
|
||||||
new_props = {'filename': uploaded_file.filename,
|
new_props = {'filename': uploaded_file.filename,
|
||||||
@ -548,7 +550,7 @@ def create_file_doc_for_upload(project_id, uploaded_file):
|
|||||||
status, file_fields)
|
status, file_fields)
|
||||||
raise InternalServerError()
|
raise InternalServerError()
|
||||||
|
|
||||||
return file_fields['_id'], uploaded_file.filename, status
|
return file_fields['_id'], internal_filename, status
|
||||||
|
|
||||||
|
|
||||||
def setup_app(app, url_prefix):
|
def setup_app(app, url_prefix):
|
||||||
|
Loading…
x
Reference in New Issue
Block a user