Extra validation of the uploaded ZIP #73

Merged
Anna Sirota merged 13 commits from validation-single-theme-xml into main 2024-04-11 12:32:50 +02:00
14 changed files with 202 additions and 65 deletions

View File

@ -58,6 +58,7 @@ EXTENSION_TYPE_PLURAL = {
EXTENSION_TYPE_CHOICES.THEME: _('Themes'), EXTENSION_TYPE_CHOICES.THEME: _('Themes'),
} }
EXTENSION_SLUGS_PATH = '|'.join(EXTENSION_TYPE_SLUGS.values()) EXTENSION_SLUGS_PATH = '|'.join(EXTENSION_TYPE_SLUGS.values())
EXTENSION_SLUG_TYPES = {v: k for k, v in EXTENSION_TYPE_SLUGS_SINGULAR.items()}
ALLOWED_EXTENSION_MIMETYPES = ('application/zip', ) ALLOWED_EXTENSION_MIMETYPES = ('application/zip', )
# FIXME: this controls the initial widget rendered server-side, and server-side validation # FIXME: this controls the initial widget rendered server-side, and server-side validation

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

View File

@ -74,6 +74,14 @@ class CreateFileTest(TestCase):
version = combined_meta_data.get("version", "0.1.0") version = combined_meta_data.get("version", "0.1.0")
extension_id = combined_meta_data.get("id", "foobar").strip() extension_id = combined_meta_data.get("id", "foobar").strip()
type_slug = combined_meta_data['type']
init_path = None
if type_slug == 'add-on':
# Add the required __init__.py file
init_path = os.path.join(self.temp_directory, '__init__.py')
with open(init_path, 'w') as init_file:
init_file.write('')
with open(manifest_path, "w") as manifest_file: with open(manifest_path, "w") as manifest_file:
toml.dump(combined_meta_data, manifest_file) toml.dump(combined_meta_data, manifest_file)
@ -81,6 +89,10 @@ class CreateFileTest(TestCase):
with zipfile.ZipFile(output_path, "w") as my_zip: with zipfile.ZipFile(output_path, "w") as my_zip:
arcname = f"{extension_id}-{version}/{os.path.basename(manifest_path)}" arcname = f"{extension_id}-{version}/{os.path.basename(manifest_path)}"
my_zip.write(manifest_path, arcname=arcname) my_zip.write(manifest_path, arcname=arcname)
if init_path:
# Write the __init__.py file too
arcname = f"{extension_id}-{version}/{os.path.basename(init_path)}"
my_zip.write(init_path, arcname=arcname)
os.remove(manifest_path) os.remove(manifest_path)
return output_path return output_path
@ -259,7 +271,7 @@ class ValidateManifestTest(CreateFileTest):
self.client.force_login(user) self.client.force_login(user)
file_data = { file_data = {
"id": "<b>id-with-hyphens</b>", "id": "id-with-hyphens",
} }
bad_file = self._create_file_from_data("theme.zip", file_data, self.user) bad_file = self._create_file_from_data("theme.zip", file_data, self.user)
@ -271,7 +283,7 @@ class ValidateManifestTest(CreateFileTest):
self.assertEqual(response.status_code, 200) self.assertEqual(response.status_code, 200)
error = response.context['form'].errors.get('source') error = response.context['form'].errors.get('source')
self.assertEqual(len(error), 1) self.assertEqual(len(error), 1)
self.assertIn('"&lt;b&gt;id-with-hyphens&lt;/b&gt;"', error[0]) self.assertIn('"id-with-hyphens"', error[0])
class ValidateManifestFields(TestCase): class ValidateManifestFields(TestCase):

View File

@ -59,6 +59,27 @@ EXPECTED_EXTENSION_DATA = {
'version_str': '1.0.8', 'version_str': '1.0.8',
}, },
} }
EXPECTED_VALIDATION_ERRORS = {
'empty.txt': {'source': ['Only .zip files are accepted.']},
'empty.zip': {'source': ['The submitted file is empty.']},
'invalid-archive.zip': {'source': ['Only .zip files are accepted.']},
'invalid-manifest-path.zip': {
'source': [
'The manifest file should be at the top level of the archive, or one level deep.',
],
},
'invalid-addon-no-init.zip': {
'source': ['An add-on should have an __init__.py file.'],
},
'invalid-addon-dir-no-init.zip': {
'source': ['An add-on should have an __init__.py file.'],
},
'invalid-no-manifest.zip': {
'source': ['The manifest file is missing.'],
},
'invalid-manifest-toml.zip': {'source': ['Could not parse the manifest file.']},
'invalid-theme-multiple-xmls.zip': {'source': ['A theme should have exactly one XML file.']},
}
class SubmitFileTest(TestCase): class SubmitFileTest(TestCase):
@ -116,46 +137,28 @@ class SubmitFileTest(TestCase):
{'agreed_with_terms': ['This field is required.']}, {'agreed_with_terms': ['This field is required.']},
) )
def test_validation_errors_invalid_extension(self): def test_validation_errors(self):
self.assertEqual(Extension.objects.count(), 0) self.assertEqual(Extension.objects.count(), 0)
user = UserFactory() user = UserFactory()
self.client.force_login(user) self.client.force_login(user)
with open(TEST_FILES_DIR / 'empty.txt', 'rb') as fp: for test_archive, extected_errors in EXPECTED_VALIDATION_ERRORS.items():
with self.subTest(test_archive=test_archive):
with open(TEST_FILES_DIR / test_archive, 'rb') as fp:
response = self.client.post(self.url, {'source': fp, 'agreed_with_terms': True}) response = self.client.post(self.url, {'source': fp, 'agreed_with_terms': True})
self.assertEqual(response.status_code, 200) self.assertEqual(response.status_code, 200)
self.assertDictEqual( self.assertDictEqual(response.context['form'].errors, extected_errors)
response.context['form'].errors,
{'source': ['Only .zip files are accepted.']},
)
def test_validation_errors_empty_file(self): def test_addon_without_top_level_directory(self):
self.assertEqual(Extension.objects.count(), 0) self.assertEqual(Extension.objects.count(), 0)
user = UserFactory() user = UserFactory()
self.client.force_login(user) self.client.force_login(user)
with open(TEST_FILES_DIR / 'empty.zip', 'rb') as fp: with open(TEST_FILES_DIR / 'addon-without-dir.zip', 'rb') as fp:
response = self.client.post(self.url, {'source': fp, 'agreed_with_terms': True}) response = self.client.post(self.url, {'source': fp, 'agreed_with_terms': True})
self.assertEqual(response.status_code, 200) self.assertEqual(response.status_code, 302)
self.assertDictEqual(
response.context['form'].errors,
{'source': ['The submitted file is empty.']},
)
def test_validation_errors_not_actually_a_zip(self):
self.assertEqual(Extension.objects.count(), 0)
user = UserFactory()
self.client.force_login(user)
with open(TEST_FILES_DIR / 'not_a.zip', 'rb') as fp:
response = self.client.post(self.url, {'source': fp, 'agreed_with_terms': True})
self.assertDictEqual(
response.context['form'].errors,
{'source': ['Only .zip files are accepted.']},
)
def test_theme_file(self): def test_theme_file(self):
self.assertEqual(File.objects.count(), 0) self.assertEqual(File.objects.count(), 0)

View File

@ -12,10 +12,7 @@ from .validators import (
FileMIMETypeValidator, FileMIMETypeValidator,
ManifestValidator, ManifestValidator,
) )
from constants.base import ( from constants.base import EXTENSION_SLUG_TYPES, ALLOWED_EXTENSION_MIMETYPES
EXTENSION_TYPE_SLUGS_SINGULAR,
ALLOWED_EXTENSION_MIMETYPES,
)
import files.models import files.models
import files.utils as utils import files.utils as utils
@ -28,6 +25,20 @@ logger = logging.getLogger(__name__)
class FileForm(forms.ModelForm): class FileForm(forms.ModelForm):
msg_only_zip_files = _('Only .zip files are accepted.') msg_only_zip_files = _('Only .zip files are accepted.')
# Mimicking how django.forms.fields.Field handles validation error messages.
# TODO: maybe this should be a custom SourceFileField with all these validators and messages
error_messages = {
'invalid_manifest_path': _(
'The manifest file should be at the top level of the archive, or one level deep.'
),
# TODO: surface TOML parsing errors?
'invalid_manifest_toml': _('Could not parse the manifest file.'),
'invalid_missing_init': _('An add-on should have an __init__.py file.'),
'missing_or_multiple_theme_xml': _('A theme should have exactly one XML file.'),
'invalid_zip_archive': msg_only_zip_files,
'missing_manifest_toml': _('The manifest file is missing.'),
}
class Meta: class Meta:
model = files.models.File model = files.models.File
fields = ('source', 'type', 'metadata', 'agreed_with_terms', 'user') fields = ('source', 'type', 'metadata', 'agreed_with_terms', 'user')
@ -38,7 +49,7 @@ class FileForm(forms.ModelForm):
validators=[ validators=[
FileMIMETypeValidator( FileMIMETypeValidator(
allowed_mimetypes=ALLOWED_EXTENSION_MIMETYPES, allowed_mimetypes=ALLOWED_EXTENSION_MIMETYPES,
message=msg_only_zip_files, message=error_messages['invalid_zip_archive'],
), ),
], ],
widget=forms.ClearableFileInput( widget=forms.ClearableFileInput(
@ -128,22 +139,19 @@ class FileForm(forms.ModelForm):
errors = [] errors = []
if not zipfile.is_zipfile(file_path): if not zipfile.is_zipfile(file_path):
errors.append('File is not .zip') raise forms.ValidationError(self.error_messages['invalid_zip_archive'])
manifest = utils.read_manifest_from_zip(file_path) manifest, error_codes = utils.read_manifest_from_zip(file_path)
for code in error_codes:
errors.append(forms.ValidationError(self.error_messages[code]))
if errors:
self.add_error('source', errors)
if manifest is None: if manifest:
errors.append('A valid manifest file could not be found')
else:
ManifestValidator(manifest) ManifestValidator(manifest)
ExtensionIDManifestValidator(manifest, self.extension) ExtensionIDManifestValidator(manifest, self.extension)
extension_types = {v: k for k, v in EXTENSION_TYPE_SLUGS_SINGULAR.items()}
if errors:
raise forms.ValidationError({'source': errors}, code='invalid')
self.cleaned_data['metadata'] = manifest self.cleaned_data['metadata'] = manifest
# TODO: Error handling self.cleaned_data['type'] = EXTENSION_SLUG_TYPES[manifest['type']]
self.cleaned_data['type'] = extension_types[manifest['type']]
return self.cleaned_data return self.cleaned_data

View File

@ -1,6 +1,6 @@
from django.test import TestCase from django.test import TestCase
from files.utils import find_file_inside_zip_list from files.utils import find_path_by_name, find_exact_path, filter_paths_by_ext
class UtilsTest(TestCase): class UtilsTest(TestCase):
@ -10,7 +10,7 @@ class UtilsTest(TestCase):
name_list = [ name_list = [
"blender_manifest.toml", "blender_manifest.toml",
] ]
manifest_file = find_file_inside_zip_list(self.manifest, name_list) manifest_file = find_path_by_name(name_list, self.manifest)
self.assertEqual(manifest_file, "blender_manifest.toml") self.assertEqual(manifest_file, "blender_manifest.toml")
def test_find_manifest_nested(self): def test_find_manifest_nested(self):
@ -23,21 +23,21 @@ class UtilsTest(TestCase):
"foobar-1.0.3/manifest.toml", "foobar-1.0.3/manifest.toml",
"foobar-1.0.3/manifest.json", "foobar-1.0.3/manifest.json",
] ]
manifest_file = find_file_inside_zip_list(self.manifest, name_list) manifest_file = find_path_by_name(name_list, self.manifest)
self.assertEqual(manifest_file, "foobar-1.0.3/blender_manifest.toml") self.assertEqual(manifest_file, "foobar-1.0.3/blender_manifest.toml")
def test_find_manifest_no_zipped_folder(self): def test_find_manifest_no_zipped_folder(self):
name_list = [ name_list = [
"foobar-1.0.3/blender_manifest.toml", "foobar-1.0.3/blender_manifest.toml",
] ]
manifest_file = find_file_inside_zip_list(self.manifest, name_list) manifest_file = find_path_by_name(name_list, self.manifest)
self.assertEqual(manifest_file, "foobar-1.0.3/blender_manifest.toml") self.assertEqual(manifest_file, "foobar-1.0.3/blender_manifest.toml")
def test_find_manifest_no_manifest(self): def test_find_manifest_no_manifest(self):
name_list = [ name_list = [
"foobar-1.0.3/", "foobar-1.0.3/",
] ]
manifest_file = find_file_inside_zip_list(self.manifest, name_list) manifest_file = find_path_by_name(name_list, self.manifest)
self.assertEqual(manifest_file, None) self.assertEqual(manifest_file, None)
def test_find_manifest_with_space(self): def test_find_manifest_with_space(self):
@ -47,5 +47,54 @@ class UtilsTest(TestCase):
"foobar-1.0.3/blender_manifest.toml.txt", "foobar-1.0.3/blender_manifest.toml.txt",
"blender_manifest.toml/my_files.py", "blender_manifest.toml/my_files.py",
] ]
manifest_file = find_file_inside_zip_list(self.manifest, name_list) manifest_file = find_path_by_name(name_list, self.manifest)
self.assertEqual(manifest_file, None) self.assertEqual(manifest_file, None)
def test_find_exact_path_found(self):
name_list = [
'foobar-1.0.3/theme.xml',
'foobar-1.0.3/theme1.xml',
'foobar-1.0.3/theme2.txt',
'foobar-1.0.3/__init__.py',
'foobar-1.0.3/foobar/__init__.py',
'foobar-1.0.3/foobar-1.0.3/__init__.py',
'blender_manifest.toml',
]
path = find_exact_path(name_list, 'foobar-1.0.3/__init__.py')
self.assertEqual(path, 'foobar-1.0.3/__init__.py')
def test_find_exact_path_nothing_found(self):
name_list = [
'foobar-1.0.3/theme.xml',
'foobar-1.0.3/theme1.xml',
'foobar-1.0.3/theme2.txt',
'foobar-1.0.3/foobar/__init__.py',
'foobar-1.0.3/foobar-1.0.3/__init__.py',
'blender_manifest.toml',
]
path = find_exact_path(name_list, 'foobar-1.0.3/__init__.py')
self.assertIsNone(path)
def test_filter_paths_by_ext_found(self):
name_list = [
'foobar-1.0.3/theme.xml',
'foobar-1.0.3/theme1.xml',
'foobar-1.0.3/theme2.txt',
'foobar-1.0.3/__init__.py',
'foobar-1.0.3/foobar-1.0.3/__init__.py',
'blender_manifest.toml',
]
paths = filter_paths_by_ext(name_list, '.xml')
self.assertEqual(list(paths), ['foobar-1.0.3/theme.xml', 'foobar-1.0.3/theme1.xml'])
def test_filter_paths_by_ext_nothing_found(self):
name_list = [
'foobar-1.0.3/theme.xml',
'foobar-1.0.3/theme1.md.xml',
'foobar-1.0.3/theme2.txt',
'foobar-1.0.3/__init__.py',
'foobar-1.0.3/foobar-1.0.3/__init__.py',
'blender_manifest.toml',
]
paths = filter_paths_by_ext(name_list, '.md')
self.assertEqual(list(paths), [])

View File

@ -5,6 +5,7 @@ import logging
import mimetypes import mimetypes
import os import os
import toml import toml
import typing
import zipfile import zipfile
from lxml import etree from lxml import etree
@ -48,41 +49,104 @@ def get_sha256_from_value(value: str):
return hash_.hexdigest() return hash_.hexdigest()
def find_file_inside_zip_list(file_to_read: str, name_list: list) -> str: def find_path_by_name(paths: typing.List[str], name: str) -> typing.Optional[str]:
"""Return the first occurance of file_to_read insize a zip name_list""" """Return the first occurrence of file name in a given list of paths."""
for file_path in name_list: for file_path in paths:
# Remove leading/trailing whitespace from file path # Remove leading/trailing whitespace from file path
file_path_stripped = file_path.strip() file_path_stripped = file_path.strip()
# Check if the basename of the stripped path is equal to the target file name # Check if the basename of the stripped path is equal to the target file name
if os.path.basename(file_path_stripped) == file_to_read: if os.path.basename(file_path_stripped) == name:
return file_path_stripped return file_path_stripped
return None return None
def find_exact_path(paths: typing.List[str], exact_path: str) -> typing.Optional[str]:
"""Return a first path equal to a given one if it exists in a given list of paths."""
matching_paths = (path for path in paths if path == exact_path)
return next(matching_paths, None)
def filter_paths_by_ext(paths: typing.List[str], ext: str) -> typing.Iterable[str]:
"""Generate a list of paths having a given extension from a given list of paths."""
for file_path in paths:
# Get file path's extension
_, file_path_ext = os.path.splitext(file_path)
# Check if this file's extension matches the extension we are looking for
if file_path_ext.lower() == ext.lower():
yield file_path
def read_manifest_from_zip(archive_path): def read_manifest_from_zip(archive_path):
file_to_read = 'blender_manifest.toml' """Read and validate extension's manifest file and contents of the archive.
In any extension archive, a valid `blender_manifest.toml` file is expected
to be found at the top level of the archive, or inside a single nested directory.
Additionally, depending on the extension type defined in the manifest,
the archive is expected to have a particular file structure:
* for themes, a single XML file is expected next to the manifest;
* for add-ons, the following structure is expected:
```
some-addon.zip
an-optional-dir
blender_manifest.toml
__init__.py
(...)
```
"""
manifest_name = 'blender_manifest.toml'
error_codes = []
try: try:
with zipfile.ZipFile(archive_path) as myzip: with zipfile.ZipFile(archive_path) as myzip:
manifest_filepath = find_file_inside_zip_list(file_to_read, myzip.namelist()) bad_file = myzip.testzip()
if bad_file is not None:
logger.error('Bad file in ZIP')
error_codes.append('invalid_zip_archive')
return None, error_codes
file_list = myzip.namelist()
manifest_filepath = find_path_by_name(file_list, manifest_name)
if manifest_filepath is None: if manifest_filepath is None:
logger.info(f"File '{file_to_read}' not found in the archive.") logger.info(f"File '{manifest_name}' not found in the archive.")
return None error_codes.append('missing_manifest_toml')
return None, error_codes
# Manifest file is expected to be no deeper than one directory down
if os.path.dirname(os.path.dirname(manifest_filepath)) != '':
error_codes.append('invalid_manifest_path')
return None, error_codes
# Extract the file content # Extract the file content
with myzip.open(manifest_filepath) as file_content: with myzip.open(manifest_filepath) as file_content:
# TODO: handle TOML loading error
toml_content = toml.loads(file_content.read().decode()) toml_content = toml.loads(file_content.read().decode())
return toml_content
# If manifest was parsed successfully, do additional type-specific validation
type_slug = toml_content['type']
if type_slug == 'theme':
theme_xmls = filter_paths_by_ext(file_list, '.xml')
if len(list(theme_xmls)) != 1:
error_codes.append('missing_or_multiple_theme_xml')
elif type_slug == 'add-on':
# __init__.py is expected to be next to the manifest
expected_init_path = os.path.join(os.path.dirname(manifest_filepath), '__init__.py')
init_filepath = find_exact_path(file_list, expected_init_path)
if not init_filepath:
error_codes.append('invalid_missing_init')
return toml_content, error_codes
except toml.decoder.TomlDecodeError as e: except toml.decoder.TomlDecodeError as e:
# TODO: This error should be propagate to the user
logger.error(f"Manifest Error: {e.msg}") logger.error(f"Manifest Error: {e.msg}")
error_codes.append('invalid_manifest_toml')
except Exception as e: except Exception as e:
logger.error(f"Error extracting from archive: {e}") logger.error(f"Error extracting from archive: {e}")
error_codes.append('invalid_zip_archive')
return None return None, error_codes
def guess_mimetype_from_ext(file_name: str) -> str: def guess_mimetype_from_ext(file_name: str) -> str: