File scanning: validate wheel digests against pypi.org #199
@ -23,8 +23,8 @@ def _record_changes(
|
||||
def schedule_scan(file: files.models.File) -> None:
|
||||
"""Schedule a scan of a given file."""
|
||||
logger.info('Scheduling a scan for file pk=%s', file.pk)
|
||||
verbose_name = f'clamdscan of "{file.source.name}"'
|
||||
files.tasks.clamdscan(file_id=file.pk, creator=file, verbose_name=verbose_name)
|
||||
verbose_name = f'scan of "{file.source.name}"'
|
||||
files.tasks.scan_file(file_id=file.pk, creator=file, verbose_name=verbose_name)
|
||||
|
||||
|
||||
@receiver(post_save, sender=files.models.File)
|
||||
|
@ -12,21 +12,24 @@ logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
@background(schedule={'action': TaskSchedule.RESCHEDULE_EXISTING})
|
||||
def clamdscan(file_id: int):
|
||||
def scan_file(file_id: int):
|
||||
"""Run a scan of a given file and save its output as a FileValidation record."""
|
||||
file = files.models.File.objects.get(pk=file_id)
|
||||
abs_path = os.path.join(settings.MEDIA_ROOT, file.source.path)
|
||||
scan_status, scan_found = files.utils.run_clamdscan(abs_path)
|
||||
logger.info('File pk=%s scanned: %s', file.pk, (scan_status, scan_found))
|
||||
scan_result = {'clamdscan': [scan_status, scan_found]}
|
||||
is_ok = scan_status == 'OK'
|
||||
file_validation, is_new = files.models.FileValidation.objects.get_or_create(
|
||||
clamd_scan_status, clamd_scan_found = files.utils.run_clamdscan(abs_path)
|
||||
logger.info('File pk=%s scanned by clamd: %s', file.pk, (clamd_scan_status, clamd_scan_found))
|
||||
scan_result = {'clamdscan': [clamd_scan_status, clamd_scan_found]}
|
||||
is_ok = clamd_scan_status == 'OK'
|
||||
if is_ok and (wheels := file.metadata.get('wheels', None)):
|
||||
invalid_wheels = files.utils.validate_wheels(abs_path, wheels)
|
||||
if invalid_wheels:
|
||||
logger.info('File pk=%s has invalid wheels: %s', file.pk, invalid_wheels)
|
||||
is_ok = False
|
||||
scan_result['invalid_wheels'] = invalid_wheels
|
||||
|
||||
files.models.FileValidation.objects.update_or_create(
|
||||
file=file, defaults={'results': scan_result, 'is_ok': is_ok}
|
||||
)
|
||||
if not is_new:
|
||||
file_validation.results = scan_result
|
||||
file_validation.is_ok = is_ok
|
||||
file_validation.save(update_fields={'results', 'is_ok', 'date_modified'})
|
||||
|
||||
|
||||
@background(schedule={'action': TaskSchedule.RESCHEDULE_EXISTING})
|
||||
|
@ -11,6 +11,7 @@ import toml
|
||||
import typing
|
||||
import zipfile
|
||||
|
||||
from packaging.utils import InvalidWheelFilename, parse_wheel_filename
|
||||
from PIL import Image
|
||||
from django.conf import settings
|
||||
from django.core.files.storage import default_storage
|
||||
@ -18,6 +19,7 @@ from ffmpeg import FFmpeg, FFmpegFileNotFound, FFmpegInvalidCommand, FFmpegError
|
||||
from lxml import etree
|
||||
import clamd
|
||||
import magic
|
||||
import requests
|
||||
|
||||
from constants.base import THUMBNAIL_FORMAT, THUMBNAIL_SIZES, THUMBNAIL_QUALITY
|
||||
|
||||
@ -29,6 +31,7 @@ FORBIDDEN_FILEPATHS = [
|
||||
'Thumbs.db',
|
||||
'ehthumbs.db',
|
||||
]
|
||||
MANIFEST_NAME = 'blender_manifest.toml'
|
||||
MODULE_DIR = Path(__file__).resolve().parent
|
||||
THEME_SCHEMA = []
|
||||
|
||||
@ -113,7 +116,6 @@ def read_manifest_from_zip(archive_path):
|
||||
└─ (...)
|
||||
```
|
||||
"""
|
||||
manifest_name = 'blender_manifest.toml'
|
||||
error_codes = []
|
||||
file_list = []
|
||||
manifest_content = None
|
||||
@ -127,10 +129,10 @@ def read_manifest_from_zip(archive_path):
|
||||
return None, error_codes
|
||||
|
||||
file_list = myzip.namelist()
|
||||
manifest_filepath = find_path_by_name(file_list, manifest_name)
|
||||
manifest_filepath = find_path_by_name(file_list, MANIFEST_NAME)
|
||||
|
||||
if manifest_filepath is None:
|
||||
logger.info(f"File '{manifest_name}' not found in the archive.")
|
||||
logger.info(f"File '{MANIFEST_NAME}' not found in the archive.")
|
||||
error_codes.append('missing_manifest_toml')
|
||||
return None, error_codes
|
||||
|
||||
@ -352,3 +354,45 @@ def extract_frame(source_path: str, output_path: str, at_time: str = '00:00:00.0
|
||||
except (FFmpegError, FFmpegFileNotFound, FFmpegInvalidCommand) as e:
|
||||
logger.exception(f'Failed to extract a frame: {e.message}, {" ".join(ffmpeg.arguments)}')
|
||||
raise
|
||||
|
||||
|
||||
def get_wheel_sha256_from_pypi(wheel_name, session):
|
||||
try:
|
||||
name, version, *_ = parse_wheel_filename(wheel_name)
|
||||
except InvalidWheelFilename:
|
||||
return (None, 'invalid wheel filename')
|
||||
url = f'https://pypi.org/pypi/{name}/{version}/json'
|
||||
r = session.get(
|
||||
url,
|
||||
headers={'User-Agent': 'extensions.blender.org <extensions@blender.org>'},
|
||||
timeout=10,
|
||||
)
|
||||
if r.status_code == 404:
|
||||
return (None, f'wheel not found: {url}')
|
||||
if r.status_code >= 500:
|
||||
raise Exception(f'{url} returned {r.status_code} error')
|
||||
data = r.json()
|
||||
for item in data.get('urls', []):
|
||||
if item['filename'] == wheel_name and item['packagetype'] == 'bdist_wheel':
|
||||
return (item['digests']['sha256'], None)
|
||||
return (None, 'no matching $.urls item in json response')
|
||||
|
||||
|
||||
def validate_wheels(archive_path, wheels):
|
||||
results = {}
|
||||
with zipfile.ZipFile(archive_path) as myzip:
|
||||
manifest_filepath = find_path_by_name(myzip.namelist(), MANIFEST_NAME)
|
||||
session = requests.Session()
|
||||
for wheel in wheels:
|
||||
wheel_path_in_archive = _canonical_path(wheel, manifest_filepath)
|
||||
wheel_digest = None
|
||||
with myzip.open(wheel_path_in_archive) as wheel_file:
|
||||
wheel_digest = get_sha256(wheel_file)
|
||||
wheel_name = os.path.basename(wheel)
|
||||
pypi_digest, err = get_wheel_sha256_from_pypi(wheel_name, session)
|
||||
if err:
|
||||
results[wheel] = err
|
||||
continue
|
||||
if pypi_digest != wheel_digest:
|
||||
results[wheel] = f'digest in archive={wheel_digest}, digest on pypi={pypi_digest}'
|
||||
return results
|
||||
|
@ -39,6 +39,7 @@ maxminddb==2.2.0
|
||||
mistune==2.0.4
|
||||
multidict==6.0.2
|
||||
oauthlib==3.2.0
|
||||
packaging==24.1
|
||||
Pillow==9.2.0
|
||||
python-ffmpeg==2.0.12
|
||||
python-magic==0.4.27
|
||||
|
Loading…
Reference in New Issue
Block a user