WIP: Switch between FS storage and S3 per static asset #104417

Draft
Anna Sirota wants to merge 4 commits from alternative-storage into main

When changing the target branch, be careful to rebase the branch in your fork to match. See documentation.
7 changed files with 174 additions and 10 deletions

View File

@ -1,11 +1,16 @@
"""Custom file storage classes.""" """Custom file storage classes."""
import logging import logging
from botocore.client import Config
from django.conf import settings from django.conf import settings
from django.db import models
from django.db.models.fields.files import FieldFile
from botocore.client import Config
import boto3 import boto3
import botocore.exceptions import botocore.exceptions
import nginx_secure_links.storages
from storages.backends.s3boto3 import S3Boto3Storage from storages.backends.s3boto3 import S3Boto3Storage
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
@ -49,7 +54,7 @@ def _get_s3_client():
) )
def get_s3_url(path, expires_in_seconds=3600): def get_s3_url(path, expires_in_seconds=settings.FILE_LINK_EXPIRE_SECONDS):
"""Generate a pre-signed S3 URL to a given path.""" """Generate a pre-signed S3 URL to a given path."""
global _s3_client global _s3_client
if not _s3_client: if not _s3_client:
@ -94,7 +99,7 @@ def get_s3_post_url_and_fields(
bucket=settings.AWS_STORAGE_BUCKET_NAME, bucket=settings.AWS_STORAGE_BUCKET_NAME,
fields=None, fields=None,
conditions=None, conditions=None,
expires_in_seconds=3600, expires_in_seconds=settings.FILE_LINK_EXPIRE_SECONDS,
): ):
"""Generate a presigned URL S3 POST request to upload a file to a given bucket and path. """Generate a presigned URL S3 POST request to upload a file to a given bucket and path.
@ -123,3 +128,36 @@ def get_s3_post_url_and_fields(
# The response contains the presigned URL and required fields # The response contains the presigned URL and required fields
return response return response
class DynamicStorageFieldFile(FieldFile):
"""Defines which storage the file is located at."""
def __init__(self, instance, *args, **kwargs):
"""Choose between S3 and file system storage depending on `source_storage`."""
super().__init__(instance, *args, **kwargs)
if instance.source_storage is None: # S3 is default
self.storage = S3Boto3CustomStorage()
elif instance.source_storage == 'fs':
self.storage = nginx_secure_links.storages.FileStorage()
else:
raise
class CustomFileField(models.FileField):
"""Defines which storage the file field is located at."""
attr_class = DynamicStorageFieldFile
def pre_save(self, model_instance, add):
"""Choose between S3 and file system storage depending on `source_storage`."""
if model_instance.source_storage is None:
storage = S3Boto3CustomStorage()
elif model_instance.source_storage == 'fs':
storage = nginx_secure_links.storages.FileStorage()
else:
raise
self.storage = storage
model_instance.source.storage = storage
# TODO: do the same for thumbnail?
return super().pre_save(model_instance, add)

View File

@ -29,6 +29,7 @@ django-background-tasks-updated @ git+https://projects.blender.org/infrastructur
django-countries==7.5.1 django-countries==7.5.1
django-loginas==0.3.11 django-loginas==0.3.11
django-nested-admin==4.0.2 django-nested-admin==4.0.2
django-nginx-secure-links==0.0.7
django-pipeline==3.1.0 django-pipeline==3.1.0
django-s3direct==2.0.3 django-s3direct==2.0.3
django-storages[google]==1.11.1 django-storages[google]==1.11.1

View File

@ -66,7 +66,7 @@ class StaticAssetAdmin(AdminUserDefaultMixin, nested_admin.NestedModelAdmin):
{ {
'fields': [ 'fields': [
'id', 'id',
'source', ('source', 'source_storage'),
'original_filename', 'original_filename',
'size_bytes', 'size_bytes',
('source_type', 'content_type'), ('source_type', 'content_type'),

View File

@ -0,0 +1,55 @@
# noqa: D100
import logging
import os.path
from django.core.files.storage import FileSystemStorage
from django.core.management.base import BaseCommand
from static_assets.models.static_assets import StaticAsset
file_system_storage = FileSystemStorage()
logger = logging.getLogger(__name__)
logger.setLevel(logging.DEBUG)
class Command(BaseCommand):
"""Download static asset files of given IDs to file system storage."""
help = "Download static asset files of given IDs to file system storage."
def add_arguments(self, parser):
"""Add range of IDs to command options."""
parser.add_argument('--min-id', type=int)
parser.add_argument('--mex-id', type=int)
def handle(self, *args, **options): # noqa: D102
id_min = options['min_id']
id_mex = options['mex_id']
for sa in StaticAsset.objects.filter(id__gte=id_min, id__lt=id_mex).order_by('id'):
# TODO: optionally also update `source_storage` field with `fs`
self._download_to_file_system_storage(sa)
def _download_to_file_system_storage(self, sa: StaticAsset):
if sa.thumbnail:
self._save(sa.thumbnail, prefix='public')
try:
video = sa.video
for variation in video.variations.all():
self._save(variation.source)
for track in video.tracks.all():
self._save(track.source)
except StaticAsset.video.RelatedObjectDoesNotExist:
pass
# sa.image has no extra files
self._save(sa.source)
def _save(self, field, prefix=''):
output_path = os.path.join(prefix, field.name)
logger.info('Downloading %s to path %s', field, output_path)
if file_system_storage.exists(output_path):
logger.warning('%s exists', output_path)
return
f = field.open()
file_system_storage.save(output_path, f)
f.close()
logger.info('Downloaded %s to path %s', field, output_path)

View File

@ -0,0 +1,57 @@
# Generated by Django 4.2.13 on 2024-07-09 10:06
import common.storage
import common.upload_paths
from django.db import migrations, models
class Migration(migrations.Migration):
dependencies = [
('static_assets', '0012_allow_blank_license'),
]
operations = [
migrations.AddField(
model_name='staticasset',
name='source_storage',
field=models.CharField(
blank=True, choices=[(None, 'S3'), ('fs', 'File System'), ('fsp', 'File System Public')], max_length=3, null=True
),
),
migrations.AddField(
model_name='videotrack',
name='source_storage',
field=models.CharField(
blank=True, choices=[(None, 'S3'), ('fs', 'File System'), ('fsp', 'File System Public')], max_length=3, null=True
),
),
migrations.AddField(
model_name='videovariation',
name='source_storage',
field=models.CharField(
blank=True, choices=[(None, 'S3'), ('fs', 'File System'), ('fsp', 'File System Public')], max_length=3, null=True
),
),
migrations.AlterField(
model_name='staticasset',
name='source',
field=common.storage.CustomFileField(
blank=True, max_length=256, upload_to=common.upload_paths.get_upload_to_hashed_path
),
),
migrations.AlterField(
model_name='videotrack',
name='source',
field=common.storage.CustomFileField(
blank=True, max_length=256, upload_to=common.upload_paths.get_upload_to_hashed_path
),
),
migrations.AlterField(
model_name='videovariation',
name='source',
field=common.storage.CustomFileField(
blank=True, max_length=256, upload_to=common.upload_paths.get_upload_to_hashed_path
),
),
]

View File

@ -14,14 +14,20 @@ from django.utils.text import slugify
import looper.model_mixins import looper.model_mixins
from common import mixins from common import mixins
from common.storage import CustomFileField
from common.upload_paths import get_upload_to_hashed_path from common.upload_paths import get_upload_to_hashed_path
from static_assets.models import License from static_assets.models import License
from static_assets.tasks import create_video_processing_job, create_video_transcribing_job from static_assets.tasks import create_video_processing_job, create_video_transcribing_job
import common.storage
User = get_user_model() User = get_user_model()
log = logging.getLogger(__name__) log = logging.getLogger(__name__)
STORAGE_CHOICES = [
(None, 'S3'),
('fs', 'File System'),
('fsp', 'File System Public'),
]
def _get_default_license_id() -> Optional[int]: def _get_default_license_id() -> Optional[int]:
cc_by = License.objects.filter(slug='cc-by').first() cc_by = License.objects.filter(slug='cc-by').first()
@ -44,12 +50,12 @@ class StaticAsset(
class Meta: class Meta:
ordering = ['-date_created'] ordering = ['-date_created']
source = models.FileField( source = CustomFileField(
upload_to=get_upload_to_hashed_path, upload_to=get_upload_to_hashed_path,
storage=common.storage.S3Boto3CustomStorage(),
blank=True, blank=True,
max_length=256, max_length=256,
) )
source_storage = models.CharField(max_length=3, null=True, blank=True, choices=STORAGE_CHOICES)
source_type = models.CharField( source_type = models.CharField(
choices=StaticAssetFileTypeChoices.choices, choices=StaticAssetFileTypeChoices.choices,
max_length=5, max_length=5,
@ -316,7 +322,8 @@ class VideoVariation(models.Model):
height = models.PositiveIntegerField(blank=True, null=True) height = models.PositiveIntegerField(blank=True, null=True)
width = models.PositiveIntegerField(blank=True, null=True) width = models.PositiveIntegerField(blank=True, null=True)
resolution_label = models.CharField(max_length=32, blank=True) resolution_label = models.CharField(max_length=32, blank=True)
source = models.FileField(upload_to=get_upload_to_hashed_path, blank=True, max_length=256) source = CustomFileField(upload_to=get_upload_to_hashed_path, blank=True, max_length=256)
source_storage = models.CharField(max_length=3, null=True, blank=True, choices=STORAGE_CHOICES)
size_bytes = models.BigIntegerField(editable=False) size_bytes = models.BigIntegerField(editable=False)
content_type = models.CharField(max_length=256, blank=True) content_type = models.CharField(max_length=256, blank=True)
@ -356,7 +363,8 @@ class VideoTrack(models.Model):
language = models.CharField( language = models.CharField(
blank=False, null=False, max_length=5, choices=VideoTrackLanguageCodeChoices.choices blank=False, null=False, max_length=5, choices=VideoTrackLanguageCodeChoices.choices
) )
source = models.FileField(upload_to=get_upload_to_hashed_path, blank=True, max_length=256) source = CustomFileField(upload_to=get_upload_to_hashed_path, blank=True, max_length=256)
source_storage = models.CharField(max_length=3, null=True, blank=True, choices=STORAGE_CHOICES)
@property @property
def url(self) -> str: def url(self) -> str:

View File

@ -75,6 +75,7 @@ INSTALLED_APPS = [
'rest_framework', 'rest_framework',
'rest_framework.authtoken', 'rest_framework.authtoken',
's3direct', 's3direct',
'nginx_secure_links',
] ]
AUTH_USER_MODEL = 'users.User' AUTH_USER_MODEL = 'users.User'
@ -335,7 +336,6 @@ SITE_ID = 1
# Required by Django Debug Toolbar # Required by Django Debug Toolbar
INTERNAL_IPS = ['127.0.0.1'] INTERNAL_IPS = ['127.0.0.1']
TAGGIT_CASE_INSENSITIVE = True TAGGIT_CASE_INSENSITIVE = True
DEFAULT_AUTO_FIELD = 'django.db.models.AutoField' DEFAULT_AUTO_FIELD = 'django.db.models.AutoField'
@ -692,3 +692,8 @@ STRIPE_CHECKOUT_SUBMIT_TYPE = 'pay'
# Maximum number of attempts for failing background tasks # Maximum number of attempts for failing background tasks
MAX_ATTEMPTS = 3 MAX_ATTEMPTS = 3
FILE_LINK_EXPIRE_SECONDS = 3600
SECURE_LINK_SECRET_KEY = _get('SECURE_LINK_SECRET_KEY')
SECURE_LINK_EXPIRATION_SECONDS = FILE_LINK_EXPIRE_SECONDS
SECURE_LINK_PUBLIC_PREFIXES = ['public']