File scanning: validate wheel digests against pypi.org #199
@ -23,8 +23,8 @@ def _record_changes(
|
|||||||
def schedule_scan(file: files.models.File) -> None:
|
def schedule_scan(file: files.models.File) -> None:
|
||||||
"""Schedule a scan of a given file."""
|
"""Schedule a scan of a given file."""
|
||||||
logger.info('Scheduling a scan for file pk=%s', file.pk)
|
logger.info('Scheduling a scan for file pk=%s', file.pk)
|
||||||
verbose_name = f'clamdscan of "{file.source.name}"'
|
verbose_name = f'scan of "{file.source.name}"'
|
||||||
files.tasks.clamdscan(file_id=file.pk, creator=file, verbose_name=verbose_name)
|
files.tasks.scan_file(file_id=file.pk, creator=file, verbose_name=verbose_name)
|
||||||
|
|
||||||
|
|
||||||
@receiver(post_save, sender=files.models.File)
|
@receiver(post_save, sender=files.models.File)
|
||||||
|
@ -12,21 +12,23 @@ logger = logging.getLogger(__name__)
|
|||||||
|
|
||||||
|
|
||||||
@background(schedule={'action': TaskSchedule.RESCHEDULE_EXISTING})
|
@background(schedule={'action': TaskSchedule.RESCHEDULE_EXISTING})
|
||||||
def clamdscan(file_id: int):
|
def scan_file(file_id: int):
|
||||||
"""Run a scan of a given file and save its output as a FileValidation record."""
|
"""Run a scan of a given file and save its output as a FileValidation record."""
|
||||||
file = files.models.File.objects.get(pk=file_id)
|
file = files.models.File.objects.get(pk=file_id)
|
||||||
abs_path = os.path.join(settings.MEDIA_ROOT, file.source.path)
|
abs_path = os.path.join(settings.MEDIA_ROOT, file.source.path)
|
||||||
scan_status, scan_found = files.utils.run_clamdscan(abs_path)
|
clamd_scan_status, clamd_scan_found = files.utils.run_clamdscan(abs_path)
|
||||||
logger.info('File pk=%s scanned: %s', file.pk, (scan_status, scan_found))
|
logger.info('File pk=%s scanned by clamd: %s', file.pk, (clamd_scan_status, clamd_scan_found))
|
||||||
scan_result = {'clamdscan': [scan_status, scan_found]}
|
scan_result = {'clamdscan': [clamd_scan_status, clamd_scan_found]}
|
||||||
is_ok = scan_status == 'OK'
|
is_ok = clamd_scan_status == 'OK'
|
||||||
file_validation, is_new = files.models.FileValidation.objects.get_or_create(
|
if is_ok and (wheels := files.utils.get_wheels_from_manifest(file.metadata)):
|
||||||
|
if invalid_wheels := files.utils.validate_wheels(abs_path, wheels):
|
||||||
|
logger.info('File pk=%s has invalid wheels: %s', file.pk, invalid_wheels)
|
||||||
|
is_ok = False
|
||||||
|
scan_result['invalid_wheels'] = invalid_wheels
|
||||||
|
|
||||||
|
files.models.FileValidation.objects.update_or_create(
|
||||||
file=file, defaults={'results': scan_result, 'is_ok': is_ok}
|
file=file, defaults={'results': scan_result, 'is_ok': is_ok}
|
||||||
)
|
)
|
||||||
if not is_new:
|
|
||||||
file_validation.results = scan_result
|
|
||||||
file_validation.is_ok = is_ok
|
|
||||||
file_validation.save(update_fields={'results', 'is_ok', 'date_modified'})
|
|
||||||
|
|
||||||
|
|
||||||
@background(schedule={'action': TaskSchedule.RESCHEDULE_EXISTING})
|
@background(schedule={'action': TaskSchedule.RESCHEDULE_EXISTING})
|
||||||
|
@ -38,12 +38,12 @@ class FileScanTest(TestCase):
|
|||||||
# A background task should have been created
|
# A background task should have been created
|
||||||
task = Task.objects.created_by(creator=file).first()
|
task = Task.objects.created_by(creator=file).first()
|
||||||
self.assertIsNotNone(task)
|
self.assertIsNotNone(task)
|
||||||
self.assertEqual(task.task_name, 'files.tasks.clamdscan')
|
self.assertEqual(task.task_name, 'files.tasks.scan_file')
|
||||||
self.assertEqual(task.task_params, f'[[], {{"file_id": {file.pk}}}]')
|
self.assertEqual(task.task_params, f'[[], {{"file_id": {file.pk}}}]')
|
||||||
|
|
||||||
# Actually run the task as if by background runner
|
# Actually run the task as if by background runner
|
||||||
task_args, task_kwargs = task.params()
|
task_args, task_kwargs = task.params()
|
||||||
files.tasks.clamdscan.task_function(*task_args, **task_kwargs)
|
files.tasks.scan_file.task_function(*task_args, **task_kwargs)
|
||||||
|
|
||||||
file.refresh_from_db()
|
file.refresh_from_db()
|
||||||
self.assertFalse(file.validation.is_ok)
|
self.assertFalse(file.validation.is_ok)
|
||||||
@ -68,12 +68,12 @@ class FileScanTest(TestCase):
|
|||||||
# A background task should have been created
|
# A background task should have been created
|
||||||
task = Task.objects.created_by(creator=file).first()
|
task = Task.objects.created_by(creator=file).first()
|
||||||
self.assertIsNotNone(task)
|
self.assertIsNotNone(task)
|
||||||
self.assertEqual(task.task_name, 'files.tasks.clamdscan')
|
self.assertEqual(task.task_name, 'files.tasks.scan_file')
|
||||||
self.assertEqual(task.task_params, f'[[], {{"file_id": {file.pk}}}]')
|
self.assertEqual(task.task_params, f'[[], {{"file_id": {file.pk}}}]')
|
||||||
|
|
||||||
# Actually run the task as if by background runner
|
# Actually run the task as if by background runner
|
||||||
task_args, task_kwargs = task.params()
|
task_args, task_kwargs = task.params()
|
||||||
files.tasks.clamdscan.task_function(*task_args, **task_kwargs)
|
files.tasks.scan_file.task_function(*task_args, **task_kwargs)
|
||||||
|
|
||||||
self.assertFalse(file.validation.is_ok)
|
self.assertFalse(file.validation.is_ok)
|
||||||
file.validation.refresh_from_db()
|
file.validation.refresh_from_db()
|
||||||
@ -95,12 +95,12 @@ class FileScanTest(TestCase):
|
|||||||
# A background task should have been created
|
# A background task should have been created
|
||||||
task = Task.objects.created_by(creator=file).first()
|
task = Task.objects.created_by(creator=file).first()
|
||||||
self.assertIsNotNone(task)
|
self.assertIsNotNone(task)
|
||||||
self.assertEqual(task.task_name, 'files.tasks.clamdscan')
|
self.assertEqual(task.task_name, 'files.tasks.scan_file')
|
||||||
self.assertEqual(task.task_params, f'[[], {{"file_id": {file.pk}}}]')
|
self.assertEqual(task.task_params, f'[[], {{"file_id": {file.pk}}}]')
|
||||||
|
|
||||||
# Actually run the task as if by background runner
|
# Actually run the task as if by background runner
|
||||||
task_args, task_kwargs = task.params()
|
task_args, task_kwargs = task.params()
|
||||||
files.tasks.clamdscan.task_function(*task_args, **task_kwargs)
|
files.tasks.scan_file.task_function(*task_args, **task_kwargs)
|
||||||
|
|
||||||
file.refresh_from_db()
|
file.refresh_from_db()
|
||||||
self.assertTrue(file.validation.is_ok)
|
self.assertTrue(file.validation.is_ok)
|
||||||
|
@ -1,7 +1,10 @@
|
|||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from unittest.mock import patch, ANY
|
from unittest.mock import patch, ANY
|
||||||
import dataclasses
|
import dataclasses
|
||||||
|
import io
|
||||||
|
import os
|
||||||
import tempfile
|
import tempfile
|
||||||
|
import zipfile
|
||||||
|
|
||||||
from django.test import TestCase
|
from django.test import TestCase
|
||||||
|
|
||||||
@ -11,8 +14,10 @@ from files.utils import (
|
|||||||
find_exact_path,
|
find_exact_path,
|
||||||
find_path_by_name,
|
find_path_by_name,
|
||||||
get_thumbnail_upload_to,
|
get_thumbnail_upload_to,
|
||||||
|
get_wheels_from_manifest,
|
||||||
make_thumbnails,
|
make_thumbnails,
|
||||||
validate_file_list,
|
validate_file_list,
|
||||||
|
validate_wheels,
|
||||||
)
|
)
|
||||||
|
|
||||||
# Reusing test files from the extensions app
|
# Reusing test files from the extensions app
|
||||||
@ -290,3 +295,62 @@ class UtilsTest(TestCase):
|
|||||||
validate_file_list(test.toml_content, test.manifest_filepath, test.file_list),
|
validate_file_list(test.toml_content, test.manifest_filepath, test.file_list),
|
||||||
test.name,
|
test.name,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
def test_get_wheels_from_manifest(self):
|
||||||
|
@dataclasses.dataclass
|
||||||
|
class TestParams:
|
||||||
|
name: str
|
||||||
|
toml_content: dict
|
||||||
|
expected: list
|
||||||
|
|
||||||
|
for test in [
|
||||||
|
TestParams(
|
||||||
|
name='no wheels',
|
||||||
|
toml_content={'type': 'add-on'},
|
||||||
|
expected=None,
|
||||||
|
),
|
||||||
|
TestParams(
|
||||||
|
name='top-level wheels',
|
||||||
|
toml_content={
|
||||||
|
'type': 'add-on',
|
||||||
|
'wheels': ['./wheels/1.whl', './wheels/2.whl'],
|
||||||
|
},
|
||||||
|
expected=['./wheels/1.whl', './wheels/2.whl'],
|
||||||
|
),
|
||||||
|
TestParams(
|
||||||
|
name='build.generated wheels',
|
||||||
|
toml_content={
|
||||||
|
'type': 'add-on',
|
||||||
|
'wheels': ['./wheels/1.whl', './wheels/2.whl'],
|
||||||
|
'build': {'generated': {'wheels': ['./wheels/1.whl']}},
|
||||||
|
},
|
||||||
|
expected=['./wheels/1.whl'],
|
||||||
|
),
|
||||||
|
]:
|
||||||
|
with self.subTest(**dataclasses.asdict(test)):
|
||||||
|
self.assertEqual(
|
||||||
|
test.expected,
|
||||||
|
get_wheels_from_manifest(test.toml_content),
|
||||||
|
test.name,
|
||||||
|
)
|
||||||
|
|
||||||
|
@patch(
|
||||||
|
'files.utils.get_wheel_sha256_from_pypi',
|
||||||
|
lambda _, __: ('blahblah', None),
|
||||||
|
)
|
||||||
|
def test_validate_wheels(self):
|
||||||
|
buff = io.BytesIO()
|
||||||
|
with tempfile.TemporaryDirectory() as output_dir:
|
||||||
|
test_file_path = os.path.join(output_dir, 'test_file.zip')
|
||||||
|
with zipfile.ZipFile(buff, mode='w') as file:
|
||||||
|
file.writestr('blender_manifest.toml', b'wheels = ["wheels/1.whl"]')
|
||||||
|
file.writestr('wheels/1.whl', b'')
|
||||||
|
|
||||||
|
with open(test_file_path, 'wb') as f:
|
||||||
|
f.write(buff.getvalue())
|
||||||
|
|
||||||
|
self.assertEqual(
|
||||||
|
validate_wheels(test_file_path, ['wheels/1.whl']).get('wheels/1.whl'),
|
||||||
|
'digest in archive=e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855'
|
||||||
|
', digest on pypi=blahblah',
|
||||||
|
)
|
||||||
|
@ -11,6 +11,7 @@ import toml
|
|||||||
import typing
|
import typing
|
||||||
import zipfile
|
import zipfile
|
||||||
|
|
||||||
|
from packaging.utils import InvalidWheelFilename, parse_wheel_filename
|
||||||
from PIL import Image
|
from PIL import Image
|
||||||
from django.conf import settings
|
from django.conf import settings
|
||||||
from django.core.files.storage import default_storage
|
from django.core.files.storage import default_storage
|
||||||
@ -18,6 +19,7 @@ from ffmpeg import FFmpeg, FFmpegFileNotFound, FFmpegInvalidCommand, FFmpegError
|
|||||||
from lxml import etree
|
from lxml import etree
|
||||||
import clamd
|
import clamd
|
||||||
import magic
|
import magic
|
||||||
|
import requests
|
||||||
|
|
||||||
from constants.base import THUMBNAIL_FORMAT, THUMBNAIL_SIZES, THUMBNAIL_QUALITY
|
from constants.base import THUMBNAIL_FORMAT, THUMBNAIL_SIZES, THUMBNAIL_QUALITY
|
||||||
|
|
||||||
@ -29,6 +31,7 @@ FORBIDDEN_FILEPATHS = [
|
|||||||
'Thumbs.db',
|
'Thumbs.db',
|
||||||
'ehthumbs.db',
|
'ehthumbs.db',
|
||||||
]
|
]
|
||||||
|
MANIFEST_NAME = 'blender_manifest.toml'
|
||||||
MODULE_DIR = Path(__file__).resolve().parent
|
MODULE_DIR = Path(__file__).resolve().parent
|
||||||
THEME_SCHEMA = []
|
THEME_SCHEMA = []
|
||||||
|
|
||||||
@ -113,7 +116,6 @@ def read_manifest_from_zip(archive_path):
|
|||||||
└─ (...)
|
└─ (...)
|
||||||
```
|
```
|
||||||
"""
|
"""
|
||||||
manifest_name = 'blender_manifest.toml'
|
|
||||||
error_codes = []
|
error_codes = []
|
||||||
file_list = []
|
file_list = []
|
||||||
manifest_content = None
|
manifest_content = None
|
||||||
@ -127,10 +129,10 @@ def read_manifest_from_zip(archive_path):
|
|||||||
return None, error_codes
|
return None, error_codes
|
||||||
|
|
||||||
file_list = myzip.namelist()
|
file_list = myzip.namelist()
|
||||||
manifest_filepath = find_path_by_name(file_list, manifest_name)
|
manifest_filepath = find_path_by_name(file_list, MANIFEST_NAME)
|
||||||
|
|
||||||
if manifest_filepath is None:
|
if manifest_filepath is None:
|
||||||
logger.info(f"File '{manifest_name}' not found in the archive.")
|
logger.info(f"File '{MANIFEST_NAME}' not found in the archive.")
|
||||||
error_codes.append('missing_manifest_toml')
|
error_codes.append('missing_manifest_toml')
|
||||||
return None, error_codes
|
return None, error_codes
|
||||||
|
|
||||||
@ -169,6 +171,19 @@ def find_forbidden_filepaths(file_list):
|
|||||||
return result
|
return result
|
||||||
|
|
||||||
|
|
||||||
|
def get_wheels_from_manifest(manifest):
|
||||||
|
wheels = None
|
||||||
|
if (
|
||||||
|
'build' in manifest
|
||||||
|
and 'generated' in manifest['build']
|
||||||
|
and 'wheels' in manifest['build']['generated']
|
||||||
|
):
|
||||||
|
wheels = manifest['build']['generated']['wheels']
|
||||||
|
else:
|
||||||
|
wheels = manifest.get('wheels')
|
||||||
|
return wheels
|
||||||
|
|
||||||
|
|
||||||
def validate_file_list(toml_content, manifest_filepath, file_list):
|
def validate_file_list(toml_content, manifest_filepath, file_list):
|
||||||
"""Check the files in in the archive against manifest."""
|
"""Check the files in in the archive against manifest."""
|
||||||
error_codes = []
|
error_codes = []
|
||||||
@ -194,16 +209,7 @@ def validate_file_list(toml_content, manifest_filepath, file_list):
|
|||||||
init_filepath = find_exact_path(file_list, expected_init_path)
|
init_filepath = find_exact_path(file_list, expected_init_path)
|
||||||
if not init_filepath:
|
if not init_filepath:
|
||||||
error_codes.append('invalid_missing_init')
|
error_codes.append('invalid_missing_init')
|
||||||
wheels = None
|
if wheels := get_wheels_from_manifest(toml_content):
|
||||||
if (
|
|
||||||
'build' in toml_content
|
|
||||||
and 'generated' in toml_content['build']
|
|
||||||
and 'wheels' in toml_content['build']['generated']
|
|
||||||
):
|
|
||||||
wheels = toml_content['build']['generated']['wheels']
|
|
||||||
else:
|
|
||||||
wheels = toml_content.get('wheels')
|
|
||||||
if wheels:
|
|
||||||
for wheel in wheels:
|
for wheel in wheels:
|
||||||
expected_wheel_path = _canonical_path(wheel, manifest_filepath)
|
expected_wheel_path = _canonical_path(wheel, manifest_filepath)
|
||||||
wheel_filepath = find_exact_path(file_list, expected_wheel_path)
|
wheel_filepath = find_exact_path(file_list, expected_wheel_path)
|
||||||
@ -363,3 +369,45 @@ def extract_frame(source_path: str, output_path: str, at_time: str = '00:00:00.0
|
|||||||
except (FFmpegError, FFmpegFileNotFound, FFmpegInvalidCommand) as e:
|
except (FFmpegError, FFmpegFileNotFound, FFmpegInvalidCommand) as e:
|
||||||
logger.exception(f'Failed to extract a frame: {e.message}, {" ".join(ffmpeg.arguments)}')
|
logger.exception(f'Failed to extract a frame: {e.message}, {" ".join(ffmpeg.arguments)}')
|
||||||
raise
|
raise
|
||||||
|
|
||||||
|
|
||||||
|
def get_wheel_sha256_from_pypi(wheel_name, session):
|
||||||
|
try:
|
||||||
|
name, version, *_ = parse_wheel_filename(wheel_name)
|
||||||
|
except InvalidWheelFilename:
|
||||||
|
return (None, 'invalid wheel filename')
|
||||||
|
url = f'https://pypi.org/pypi/{name}/{version}/json'
|
||||||
|
r = session.get(
|
||||||
|
url,
|
||||||
|
headers={'User-Agent': 'extensions.blender.org <extensions@blender.org>'},
|
||||||
|
timeout=10,
|
||||||
|
)
|
||||||
|
if r.status_code == 404:
|
||||||
|
return (None, f'wheel not found: {url}')
|
||||||
|
if r.status_code >= 500:
|
||||||
|
raise Exception(f'{url} returned {r.status_code} error')
|
||||||
|
data = r.json()
|
||||||
|
for item in data.get('urls', []):
|
||||||
|
if item['filename'] == wheel_name and item['packagetype'] == 'bdist_wheel':
|
||||||
|
return (item['digests']['sha256'], None)
|
||||||
|
return (None, 'no matching $.urls item in json response')
|
||||||
|
|
||||||
|
|
||||||
|
def validate_wheels(archive_path, wheels):
|
||||||
|
results = {}
|
||||||
|
with zipfile.ZipFile(archive_path) as myzip:
|
||||||
|
manifest_filepath = find_path_by_name(myzip.namelist(), MANIFEST_NAME)
|
||||||
|
session = requests.Session()
|
||||||
|
for wheel in wheels:
|
||||||
|
wheel_path_in_archive = _canonical_path(wheel, manifest_filepath)
|
||||||
|
wheel_digest = None
|
||||||
|
with myzip.open(wheel_path_in_archive) as wheel_file:
|
||||||
|
wheel_digest = get_sha256(wheel_file)
|
||||||
|
wheel_name = os.path.basename(wheel)
|
||||||
|
pypi_digest, err = get_wheel_sha256_from_pypi(wheel_name, session)
|
||||||
|
if err:
|
||||||
|
results[wheel] = err
|
||||||
|
continue
|
||||||
|
if pypi_digest != wheel_digest:
|
||||||
|
results[wheel] = f'digest in archive={wheel_digest}, digest on pypi={pypi_digest}'
|
||||||
|
return results
|
||||||
|
@ -39,6 +39,7 @@ maxminddb==2.2.0
|
|||||||
mistune==2.0.4
|
mistune==2.0.4
|
||||||
multidict==6.0.2
|
multidict==6.0.2
|
||||||
oauthlib==3.2.0
|
oauthlib==3.2.0
|
||||||
|
packaging==24.1
|
||||||
Pillow==9.2.0
|
Pillow==9.2.0
|
||||||
python-ffmpeg==2.0.12
|
python-ffmpeg==2.0.12
|
||||||
python-magic==0.4.27
|
python-magic==0.4.27
|
||||||
|
Loading…
Reference in New Issue
Block a user