WIP: Switch between FS storage and S3 per static asset #104417

Draft
Anna Sirota wants to merge 4 commits from alternative-storage into main

When changing the target branch, be careful to rebase the branch in your fork to match. See documentation.
5 changed files with 173 additions and 7 deletions
Showing only changes of commit 672b3bf7f3 - Show all commits

View File

@ -1,8 +1,12 @@
"""Custom file storage classes."""
import logging
from botocore.client import Config
from django.conf import settings
from django.core.files.storage import FileSystemStorage
from django.db import models
from django.db.models.fields.files import FieldFile
from botocore.client import Config
import boto3
import botocore.exceptions
@ -123,3 +127,36 @@ def get_s3_post_url_and_fields(
# The response contains the presigned URL and required fields
return response
class DynamicStorageFieldFile(FieldFile):
"""Defines which storage the file is located at."""
def __init__(self, instance, *args, **kwargs):
"""Choose between S3 and file system storage depending on `source_storage`."""
super().__init__(instance, *args, **kwargs)
if instance.source_storage is None: # S3 is default
self.storage = S3Boto3CustomStorage()
elif instance.source_storage == 'fs':
self.storage = FileSystemStorage()
else:
raise
class CustomFileField(models.FileField):
"""Defines which storage the file field is located at."""
attr_class = DynamicStorageFieldFile
def pre_save(self, model_instance, add):
"""Choose between S3 and file system storage depending on `source_storage`."""
if model_instance.source_storage is None:
storage = S3Boto3CustomStorage()
elif model_instance.source_storage == 'fs':
storage = FileSystemStorage()
else:
raise
self.storage = storage
model_instance.source.storage = storage
# TODO: do the same for thumbnail?
return super().pre_save(model_instance, add)

View File

@ -66,7 +66,7 @@ class StaticAssetAdmin(AdminUserDefaultMixin, nested_admin.NestedModelAdmin):
{
'fields': [
'id',
'source',
('source', 'source_storage'),
'original_filename',
'size_bytes',
('source_type', 'content_type'),

View File

@ -0,0 +1,55 @@
# noqa: D100
import logging
import os.path
from django.core.files.storage import FileSystemStorage
from django.core.management.base import BaseCommand
from static_assets.models.static_assets import StaticAsset
file_system_storage = FileSystemStorage()
logger = logging.getLogger(__name__)
logger.setLevel(logging.DEBUG)
class Command(BaseCommand):
"""Download static asset files of given IDs to file system storage."""
help = "Download static asset files of given IDs to file system storage."
def add_arguments(self, parser):
"""Add range of IDs to command options."""
parser.add_argument('--min-id', type=int)
parser.add_argument('--mex-id', type=int)
def handle(self, *args, **options): # noqa: D102
id_min = options['min_id']
id_mex = options['mex_id']
for sa in StaticAsset.objects.filter(id__gte=id_min, id__lt=id_mex).order_by('id'):
# TODO: optionally also update `source_storage` field with `fs`
self._download_to_file_system_storage(sa)
def _download_to_file_system_storage(self, sa: StaticAsset):
if sa.thumbnail:
self._save(sa.thumbnail, prefix='thumbnails')
try:
video = sa.video
for variation in video.variations.all():
self._save(variation.source)
for track in video.tracks.all():
self._save(track.source)
except StaticAsset.video.RelatedObjectDoesNotExist:
pass
# sa.image has no extra files
self._save(sa.source)
def _save(self, field, prefix=''):
output_path = os.path.join(prefix, field.name)
logger.info('Downloading %s to path %s', field, output_path)
if file_system_storage.exists(output_path):
logger.warning('%s exists', output_path)
return
f = field.open()
file_system_storage.save(output_path, f)
f.close()
logger.info('Downloaded %s to path %s', field, output_path)

View File

@ -0,0 +1,57 @@
# Generated by Django 4.2.13 on 2024-07-09 10:06
import common.storage
import common.upload_paths
from django.db import migrations, models
class Migration(migrations.Migration):
dependencies = [
('static_assets', '0012_allow_blank_license'),
]
operations = [
migrations.AddField(
model_name='staticasset',
name='source_storage',
field=models.CharField(
blank=True, choices=[(None, 'S3'), ('fs', 'File System')], max_length=3, null=True
),
),
migrations.AddField(
model_name='videotrack',
name='source_storage',
field=models.CharField(
blank=True, choices=[(None, 'S3'), ('fs', 'File System')], max_length=3, null=True
),
),
migrations.AddField(
model_name='videovariation',
name='source_storage',
field=models.CharField(
blank=True, choices=[(None, 'S3'), ('fs', 'File System')], max_length=3, null=True
),
),
migrations.AlterField(
model_name='staticasset',
name='source',
field=common.storage.CustomFileField(
blank=True, max_length=256, upload_to=common.upload_paths.get_upload_to_hashed_path
),
),
migrations.AlterField(
model_name='videotrack',
name='source',
field=common.storage.CustomFileField(
blank=True, max_length=256, upload_to=common.upload_paths.get_upload_to_hashed_path
),
),
migrations.AlterField(
model_name='videovariation',
name='source',
field=common.storage.CustomFileField(
blank=True, max_length=256, upload_to=common.upload_paths.get_upload_to_hashed_path
),
),
]

View File

@ -14,10 +14,10 @@ from django.utils.text import slugify
import looper.model_mixins
from common import mixins
from common.storage import CustomFileField
from common.upload_paths import get_upload_to_hashed_path
from static_assets.models import License
from static_assets.tasks import create_video_processing_job, create_video_transcribing_job
import common.storage
User = get_user_model()
log = logging.getLogger(__name__)
@ -44,12 +44,17 @@ class StaticAsset(
class Meta:
ordering = ['-date_created']
source = models.FileField(
source = CustomFileField(
upload_to=get_upload_to_hashed_path,
storage=common.storage.S3Boto3CustomStorage(),
blank=True,
max_length=256,
)
source_storage = models.CharField(
max_length=3,
null=True,
blank=True,
choices=[(None, 'S3'), ('fs', 'File System')],
)
source_type = models.CharField(
choices=StaticAssetFileTypeChoices.choices,
max_length=5,
@ -316,7 +321,13 @@ class VideoVariation(models.Model):
height = models.PositiveIntegerField(blank=True, null=True)
width = models.PositiveIntegerField(blank=True, null=True)
resolution_label = models.CharField(max_length=32, blank=True)
source = models.FileField(upload_to=get_upload_to_hashed_path, blank=True, max_length=256)
source = CustomFileField(upload_to=get_upload_to_hashed_path, blank=True, max_length=256)
source_storage = models.CharField(
max_length=3,
null=True,
blank=True,
choices=[(None, 'S3'), ('fs', 'File System')],
)
size_bytes = models.BigIntegerField(editable=False)
content_type = models.CharField(max_length=256, blank=True)
@ -356,7 +367,13 @@ class VideoTrack(models.Model):
language = models.CharField(
blank=False, null=False, max_length=5, choices=VideoTrackLanguageCodeChoices.choices
)
source = models.FileField(upload_to=get_upload_to_hashed_path, blank=True, max_length=256)
source = CustomFileField(upload_to=get_upload_to_hashed_path, blank=True, max_length=256)
source_storage = models.CharField(
max_length=3,
null=True,
blank=True,
choices=[(None, 'S3'), ('fs', 'File System')],
)
@property
def url(self) -> str: