diff --git a/extensions/signals.py b/extensions/signals.py index e6f2d682..780abdd9 100644 --- a/extensions/signals.py +++ b/extensions/signals.py @@ -5,7 +5,6 @@ from django.dispatch import receiver import django.dispatch import extensions.models -import extensions.tasks import files.models version_changed = django.dispatch.Signal() diff --git a/extensions/tasks.py b/extensions/tasks.py deleted file mode 100644 index e69de29b..00000000 diff --git a/extensions/templates/extensions/detail.html b/extensions/templates/extensions/detail.html index 912d9e70..7c0ed8e4 100644 --- a/extensions/templates/extensions/detail.html +++ b/extensions/templates/extensions/detail.html @@ -5,6 +5,12 @@ {% block page_title %}{{ extension.name }}{% endblock page_title %} {% block content %} + {% if extension.latest_version %} + {% with latest=extension.latest_version %} + {% include "files/components/scan_details.html" with file=latest.file %} + {% endwith %} + {% endif %} + {% has_maintainer extension as is_maintainer %} {% with latest=extension.latest_version %} diff --git a/files/admin.py b/files/admin.py index e6306157..8c8fa1bc 100644 --- a/files/admin.py +++ b/files/admin.py @@ -1,6 +1,28 @@ from django.contrib import admin +import background_task.admin +import background_task.models -from .models import File +from .models import File, FileValidation +import files.signals + + +def scan_selected_files(self, request, queryset): + """Scan selected files.""" + for instance in queryset: + files.signals.schedule_scan(instance) + + +class FileValidationInlineAdmin(admin.StackedInline): + model = FileValidation + readonly_fields = ('date_created', 'date_modified', 'is_ok', 'results') + extra = 0 + + def _nope(self, request, obj): + return False + + has_add_permission = _nope + has_change_permission = _nope + has_delete_permission = _nope @admin.register(File) @@ -9,13 +31,14 @@ class FileAdmin(admin.ModelAdmin): save_on_top = True list_filter = ( + 'validation__is_ok', 'type', 'status', 'date_status_changed', 'date_approved', 'date_deleted', ) - list_display = ('original_name', 'extension', 'user', 'date_created', 'type', 'status') + list_display = ('original_name', 'extension', 'user', 'date_created', 'type', 'status', 'is_ok') list_select_related = ('version__extension', 'user') @@ -77,3 +100,56 @@ class FileAdmin(admin.ModelAdmin): }, ), ) + + inlines = [FileValidationInlineAdmin] + actions = [scan_selected_files] + + def is_ok(self, obj): + return obj.validation.is_ok if hasattr(obj, 'validation') else None + + is_ok.boolean = True + + +try: + admin.site.unregister(background_task.models.Task) + admin.site.unregister(background_task.models.CompletedTask) +except admin.site.NotRegistered: + pass + + +class TaskMixin: + """Modify a few properties of background tasks displayed in admin.""" + + def no_errors(self, obj): + """Replace background_task's "has_error". + + Make Django's red/green boolean icons less confusing + in the context of "there's an error during task run". + """ + return not bool(obj.last_error) + + no_errors.boolean = True + + +@admin.register(background_task.models.Task) +@admin.register(background_task.models.CompletedTask) +class TaskAdmin(background_task.admin.TaskAdmin, TaskMixin): + date_hierarchy = 'run_at' + list_display = [ + 'run_at', + 'task_name', + 'task_params', + 'attempts', + 'no_errors', + 'locked_by', + 'locked_by_pid_running', + ] + list_filter = ( + 'task_name', + 'run_at', + 'failed_at', + 'locked_at', + 'attempts', + 'creator_content_type', + ) + search_fields = ['task_name', 'task_params', 'last_error', 'verbose_name'] diff --git a/files/migrations/0005_rename_validation_filevalidation_results_and_more.py b/files/migrations/0005_rename_validation_filevalidation_results_and_more.py new file mode 100644 index 00000000..21d01245 --- /dev/null +++ b/files/migrations/0005_rename_validation_filevalidation_results_and_more.py @@ -0,0 +1,40 @@ +# Generated by Django 4.2.11 on 2024-04-12 09:05 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ('files', '0004_alter_file_status'), + ] + + operations = [ + migrations.RenameField( + model_name='filevalidation', + old_name='validation', + new_name='results', + ), + migrations.AlterField( + model_name='filevalidation', + name='results', + field=models.JSONField(), + ), + migrations.RemoveField( + model_name='filevalidation', + name='errors', + ), + migrations.RemoveField( + model_name='filevalidation', + name='notices', + ), + migrations.RemoveField( + model_name='filevalidation', + name='warnings', + ), + migrations.RenameField( + model_name='filevalidation', + old_name='is_valid', + new_name='is_ok', + ), + ] diff --git a/files/models.py b/files/models.py index 246542cd..1d6f2d02 100644 --- a/files/models.py +++ b/files/models.py @@ -206,11 +206,8 @@ class File(CreatedModifiedMixin, TrackChangesMixin, SoftDeleteMixin, models.Mode class FileValidation(CreatedModifiedMixin, TrackChangesMixin, models.Model): - track_changes_to_fields = {'is_valid', 'errors', 'warnings', 'notices', 'validation'} + track_changes_to_fields = {'is_ok', 'results'} file = models.OneToOneField(File, related_name='validation', on_delete=models.CASCADE) - is_valid = models.BooleanField(default=False) - errors = models.IntegerField(default=0) - warnings = models.IntegerField(default=0) - notices = models.IntegerField(default=0) - validation = models.TextField() + is_ok = models.BooleanField(default=False) + results = models.JSONField() diff --git a/files/signals.py b/files/signals.py index 239506c9..3388efad 100644 --- a/files/signals.py +++ b/files/signals.py @@ -1,7 +1,12 @@ -from django.db.models.signals import pre_save +import logging + +from django.db.models.signals import pre_save, post_save from django.dispatch import receiver import files.models +import files.tasks + +logger = logging.getLogger(__name__) @receiver(pre_save, sender=files.models.File) @@ -9,3 +14,19 @@ def _record_changes(sender: object, instance: files.models.File, **kwargs: objec was_changed, old_state = instance.pre_save_record() instance.record_status_change(was_changed, old_state, **kwargs) + + +def schedule_scan(file: files.models.File) -> None: + """Schedule a scan of a given file.""" + logger.info('Scheduling a scan for file pk=%s', file.pk) + files.tasks.clamdscan(file_id=file.pk, creator=file, verbose_name=file.source.name) + + +@receiver(post_save, sender=files.models.File) +def _scan_new_file( + sender: object, instance: files.models.File, created: bool, **kwargs: object +) -> None: + if not created: + return + + schedule_scan(instance) diff --git a/files/tasks.py b/files/tasks.py new file mode 100644 index 00000000..8468d384 --- /dev/null +++ b/files/tasks.py @@ -0,0 +1,36 @@ +import logging +import os.path + +from background_task import background +from background_task.tasks import TaskSchedule +from django.conf import settings + +import files.models +import files.utils + +logger = logging.getLogger(__name__) + + +@background(schedule={'action': TaskSchedule.RESCHEDULE_EXISTING}) +def clamdscan(file_id: int): + """Run a scan of a given file and save its output as a FileValidation record.""" + file = files.models.File.objects.get(pk=file_id) + abs_path = os.path.join(settings.MEDIA_ROOT, file.source.path) + completed_process = files.utils.run_clamdscan(abs_path) + logger.info('File pk=%s scanned: exit code %s', file.pk, completed_process.returncode) + scan_result = { + 'clamdscan': { + 'args': completed_process.args, + 'stdout': completed_process.stdout.decode(), + 'stderr': completed_process.stderr.decode(), + 'returncode': completed_process.returncode, + } + } + is_ok = completed_process.returncode == 0 + file_validation, is_new = files.models.FileValidation.objects.get_or_create( + file=file, defaults={'results': scan_result, 'is_ok': is_ok} + ) + if not is_new: + file_validation.results = scan_result + file_validation.is_ok = is_ok + file_validation.save(update_fields={'results', 'is_ok'}) diff --git a/files/templates/files/components/scan_details.html b/files/templates/files/components/scan_details.html new file mode 100644 index 00000000..4a83d374 --- /dev/null +++ b/files/templates/files/components/scan_details.html @@ -0,0 +1,21 @@ +{% load common i18n %} +{# FIXME: we might want to rephrase is_moderator in terms of Django's (group) permissions #} +{% if perms.files.view_file or request.user.is_moderator %} + {% with file_validation=file.validation %} + {% if file_validation and not file_validation.is_ok %} +
+
+

⚠ {% trans "Suspicious upload" %}

+ {% blocktrans asvar alert_text %}Scan of the {{ file }} indicates malicious content.{% endblocktrans %} +

+ {{ alert_text }} + {% if perms.files.view_file %}{# Moderators don't necessarily have access to the admin #} + {% url 'admin:files_file_change' file.pk as admin_file_url %} + {% trans "See details" %} + {% endif %} +

+
+
+ {% endif %} + {% endwith %} +{% endif %} diff --git a/files/templates/files/components/scan_details_flag.html b/files/templates/files/components/scan_details_flag.html new file mode 100644 index 00000000..7b896fce --- /dev/null +++ b/files/templates/files/components/scan_details_flag.html @@ -0,0 +1,10 @@ +{% load common i18n %} +{# FIXME: we might want to rephrase is_moderator in terms of Django's (group) permissions #} +{% if perms.files.view_file or request.user.is_moderator %} + {% with file_validation=file.validation %} + {% if file_validation and not file_validation.is_ok %} + {% blocktrans asvar alert_text %}Scan of the {{ file }} indicates malicious content.{% endblocktrans %} + + {% endif %} + {% endwith %} +{% endif %} diff --git a/files/tests/test_signals.py b/files/tests/test_signals.py new file mode 100644 index 00000000..a2d73122 --- /dev/null +++ b/files/tests/test_signals.py @@ -0,0 +1,112 @@ +import os +import shutil +import tempfile +import unittest + +from background_task.models import Task +from django.conf import settings +from django.test import TestCase, override_settings + +from common.tests.factories.files import FileFactory +import files.models +import files.tasks + + +@unittest.skipUnless(shutil.which('clamdscan'), 'requires clamdscan') +@override_settings(MEDIA_ROOT='/tmp/') +class FileScanTest(TestCase): + def setUp(self): + super().setUp() + self.temp_directory = tempfile.mkdtemp(prefix=settings.MEDIA_ROOT) + + def tearDown(self): + super().tearDown() + shutil.rmtree(self.temp_directory) + + def test_scan_flags_found_invalid(self): + test_file_path = os.path.join(self.temp_directory, 'test_file.zip') + test_content = ( + b'X5O!P%@AP[4\PZX54(P^)7CC)7}$EICAR-STANDARD-ANTIVIRUS-TEST-FILE!$H+H*' # noqa: W605 + ) + with open(test_file_path, 'wb+') as test_file: + test_file.write(test_content) + + file = FileFactory(source=test_file_path) + self.assertFalse(hasattr(file, 'validation')) + + # A background task should have been created + task = Task.objects.created_by(creator=file).first() + self.assertIsNotNone(task) + self.assertEqual(task.task_name, 'files.tasks.clamdscan') + self.assertEqual(task.task_params, f'[[], {{"file_id": {file.pk}}}]') + + # Actually run the task as if by background runner + task_args, task_kwargs = task.params() + files.tasks.clamdscan.task_function(*task_args, **task_kwargs) + + file.refresh_from_db() + self.assertFalse(file.validation.is_ok) + result = file.validation.results['clamdscan'] + self.assertEqual(result['returncode'], 1) + stdout_lines = result['stdout'].split('\n') + self.assertIn(f'{file.source.name}: Win.Test.EICAR_HDB-1 FOUND', stdout_lines[0]) + self.assertEqual(result['stderr'], '') + + def test_scan_flags_found_invalid_updates_existing_validation(self): + test_file_path = os.path.join(self.temp_directory, 'test_file.zip') + test_content = ( + b'X5O!P%@AP[4\PZX54(P^)7CC)7}$EICAR-STANDARD-ANTIVIRUS-TEST-FILE!$H+H*' # noqa: W605 + ) + with open(test_file_path, 'wb+') as test_file: + test_file.write(test_content) + + file = FileFactory(source=test_file_path) + # Make sure validation record exists before scanner runs + existing_validation = files.models.FileValidation(file=file, results={}) + existing_validation.save() + self.assertTrue(hasattr(file, 'validation')) + + # A background task should have been created + task = Task.objects.created_by(creator=file).first() + self.assertIsNotNone(task) + self.assertEqual(task.task_name, 'files.tasks.clamdscan') + self.assertEqual(task.task_params, f'[[], {{"file_id": {file.pk}}}]') + + # Actually run the task as if by background runner + task_args, task_kwargs = task.params() + files.tasks.clamdscan.task_function(*task_args, **task_kwargs) + + self.assertFalse(file.validation.is_ok) + file.validation.refresh_from_db() + result = file.validation.results['clamdscan'] + self.assertEqual(result['returncode'], 1) + stdout_lines = result['stdout'].split('\n') + self.assertIn(f'{file.source.name}: Win.Test.EICAR_HDB-1 FOUND', stdout_lines[0]) + self.assertEqual(result['stderr'], '') + self.assertEqual(existing_validation.pk, file.validation.pk) + + def test_scan_flags_nothing_found_valid(self): + test_file_path = os.path.join(self.temp_directory, 'test_file.zip') + with open(test_file_path, 'wb+') as test_file: + test_file.write(b'some file') + + file = FileFactory(source=test_file_path) + self.assertFalse(hasattr(file, 'validation')) + + # A background task should have been created + task = Task.objects.created_by(creator=file).first() + self.assertIsNotNone(task) + self.assertEqual(task.task_name, 'files.tasks.clamdscan') + self.assertEqual(task.task_params, f'[[], {{"file_id": {file.pk}}}]') + + # Actually run the task as if by background runner + task_args, task_kwargs = task.params() + files.tasks.clamdscan.task_function(*task_args, **task_kwargs) + + file.refresh_from_db() + self.assertTrue(file.validation.is_ok) + result = file.validation.results['clamdscan'] + self.assertEqual(result['returncode'], 0) + stdout_lines = result['stdout'].split('\n') + self.assertIn(f'{file.source.name}: OK', stdout_lines[0]) + self.assertEqual(result['stderr'], '') diff --git a/files/utils.py b/files/utils.py index 3c4f413f..c0bb26ec 100644 --- a/files/utils.py +++ b/files/utils.py @@ -4,6 +4,8 @@ import io import logging import mimetypes import os +import os.path +import subprocess import toml import typing import zipfile @@ -161,3 +163,12 @@ def guess_mimetype_from_content(file_obj) -> str: # This file might be read again by validation or other utilities file_obj.seek(0) return mimetype_from_bytes + + +def run_clamdscan(abs_path: str) -> 'subprocess.CompletedProcess': + logger.info('Scanning file at path=%s', abs_path) + scan_args = ['clamdscan', '--fdpass', abs_path] + logger.info('Running %s', scan_args) + completed_process = subprocess.run(scan_args, capture_output=True) + logger.info('File at path=%s scanned: exit code %s', abs_path, completed_process.returncode) + return completed_process diff --git a/playbooks/install.yaml b/playbooks/install.yaml index 44840ccd..c625ae40 100644 --- a/playbooks/install.yaml +++ b/playbooks/install.yaml @@ -9,6 +9,8 @@ - name: Installing required packages ansible.builtin.apt: name={{ item }} state=present with_items: + - clamav-daemon + - clamdscan - git - libpq-dev - nginx-full diff --git a/reviewers/templates/reviewers/components/review_list_item.html b/reviewers/templates/reviewers/components/review_list_item.html index 0bbe5b15..8de1cfb5 100644 --- a/reviewers/templates/reviewers/components/review_list_item.html +++ b/reviewers/templates/reviewers/components/review_list_item.html @@ -22,6 +22,7 @@ {{ extension.review_activity.all.last.date_created|naturaltime_compact }} {% endif %} + {% include "files/components/scan_details_flag.html" with file=extension.latest_version.file %}