extensions-website/files/utils.py

from pathlib import Path
import hashlib
import io
import logging
import mimetypes
import os
import toml
import zipfile

from lxml import etree
import magic

logger = logging.getLogger(__name__)
MODULE_DIR = Path(__file__).resolve().parent
THEME_SCHEMA = []


def _get_theme_schema():
    if not THEME_SCHEMA:
        with open(MODULE_DIR / 'theme.xsd', 'rb') as f:
            THEME_SCHEMA.append(etree.XMLSchema(etree.XML(f.read())))
    return THEME_SCHEMA[0]


def get_sha256(file_obj):
    """Calculate a sha256 hash for `file_obj`.

    `file_obj` must either be be an open file descriptor, in which case the
    caller needs to take care of closing it properly, or a django File-like
    object with a chunks() method to iterate over its contents.
    """
    hash_ = hashlib.sha256()
    if hasattr(file_obj, 'chunks') and callable(file_obj.chunks):
        iterator = file_obj.chunks()
    else:
        iterator = iter(lambda: file_obj.read(io.DEFAULT_BUFFER_SIZE), b'')
    for chunk in iterator:
        hash_.update(chunk)
    # This file might be read again by validation or other utilities
    file_obj.seek(0)
    return hash_.hexdigest()


def get_sha256_from_value(value: str):
    """Calculate a sha256 hash for a given string value."""
    hash_ = hashlib.sha256()
    hash_.update(str(value).encode())
    return hash_.hexdigest()


def find_file_inside_zip_list(file_to_read: str, name_list: list) -> str:
    """Return the first occurance of file_to_read insize a zip name_list"""
    for file_path in name_list:
        # Remove leading/trailing whitespace from file path
        file_path_stripped = file_path.strip()
        # Check if the basename of the stripped path is equal to the target file name
        if os.path.basename(file_path_stripped) == file_to_read:
            return file_path_stripped
    return None


def read_manifest_from_zip(archive_path):
    file_to_read = 'blender_manifest.toml'
    try:
        with zipfile.ZipFile(archive_path) as myzip:
            manifest_filepath = find_file_inside_zip_list(file_to_read, myzip.namelist())

            if manifest_filepath is None:
                logger.info(f"File '{file_to_read}' not found in the archive.")
                return None

            # Extract the file content
            with myzip.open(manifest_filepath) as file_content:
                # TODO: handle TOML loading error
                toml_content = toml.loads(file_content.read().decode())
                return toml_content

    except toml.decoder.TomlDecodeError as e:
        # TODO: This error should be propagate to the user
        logger.error(f"Manifest Error: {e.msg}")

    except Exception as e:
        logger.error(f"Error extracting from archive: {e}")

    return None


def guess_mimetype_from_ext(file_name: str) -> str:
    """Guess MIME-type from the extension of the given file name."""
    mimetype_from_ext, _ = mimetypes.guess_type(file_name)
    return mimetype_from_ext


def guess_mimetype_from_content(file_obj) -> str:
    """Guess MIME-type based on a portion of the given file's bytes."""
    mimetype_from_bytes = magic.from_buffer(file_obj.read(2048), mime=True)
    # This file might be read again by validation or other utilities
    file_obj.seek(0)
    return mimetype_from_bytes