This repository has been archived on 2023-10-09. You can view files and clone it. You cannot open issues or pull requests or push a commit.
Files
blender-archive/source/blender/blenlib/intern/filereader_zstd.c
2021-08-24 12:49:00 +10:00

336 lines
9.7 KiB
C

/*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version 2
* of the License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software Foundation,
* Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
*
* The Original Code is Copyright (C) 2021 Blender Foundation
* All rights reserved.
*/
/** \file
* \ingroup bli
*/
#include <string.h>
#include <zstd.h>
#include "BLI_blenlib.h"
#include "BLI_endian_switch.h"
#include "BLI_filereader.h"
#include "BLI_math_base.h"
#include "MEM_guardedalloc.h"
typedef struct {
FileReader reader;
FileReader *base;
ZSTD_DCtx *ctx;
ZSTD_inBuffer in_buf;
size_t in_buf_max_size;
struct {
int num_frames;
size_t *compressed_ofs;
size_t *uncompressed_ofs;
char *cached_content;
int cached_frame;
} seek;
} ZstdReader;
static bool zstd_read_u32(FileReader *base, uint32_t *val)
{
if (base->read(base, val, sizeof(uint32_t)) != sizeof(uint32_t)) {
return false;
}
#ifdef __BIG_ENDIAN__
BLI_endian_switch_uint32(val);
#endif
return true;
}
static bool zstd_read_seek_table(ZstdReader *zstd)
{
FileReader *base = zstd->base;
/* The seek table frame is at the end of the file, so seek there
* and verify that there is enough data. */
if (base->seek(base, -4, SEEK_END) < 13) {
return false;
}
uint32_t magic;
if (!zstd_read_u32(base, &magic) || magic != 0x8F92EAB1) {
return false;
}
uint8_t flags;
if (base->seek(base, -5, SEEK_END) < 0 || base->read(base, &flags, 1) != 1) {
return false;
}
/* Bit 7 indicates check-sums. Bits 5 and 6 must be zero. */
bool has_checksums = (flags & 0x80);
if (flags & 0x60) {
return false;
}
uint32_t num_frames;
if (base->seek(base, -9, SEEK_END) < 0 || !zstd_read_u32(base, &num_frames)) {
return false;
}
/* Each frame has either 2 or 3 uint32_t, and after that we have
* num_frames, flags and magic for another 9 bytes. */
uint32_t expected_frame_length = num_frames * (has_checksums ? 12 : 8) + 9;
/* The frame starts with another magic number and its length, but these
* two fields are not included when counting length. */
off64_t frame_start_ofs = 8 + expected_frame_length;
/* Sanity check: Before the start of the seek table frame,
* there must be num_frames frames, each of which at least 8 bytes long. */
off64_t seek_frame_start = base->seek(base, -frame_start_ofs, SEEK_END);
if (seek_frame_start < num_frames * 8) {
return false;
}
if (!zstd_read_u32(base, &magic) || magic != 0x184D2A5E) {
return false;
}
uint32_t frame_length;
if (!zstd_read_u32(base, &frame_length) || frame_length != expected_frame_length) {
return false;
}
zstd->seek.num_frames = num_frames;
zstd->seek.compressed_ofs = MEM_malloc_arrayN(num_frames + 1, sizeof(size_t), __func__);
zstd->seek.uncompressed_ofs = MEM_malloc_arrayN(num_frames + 1, sizeof(size_t), __func__);
size_t compressed_ofs = 0;
size_t uncompressed_ofs = 0;
for (int i = 0; i < num_frames; i++) {
uint32_t compressed_size, uncompressed_size;
if (!zstd_read_u32(base, &compressed_size) || !zstd_read_u32(base, &uncompressed_size)) {
break;
}
if (has_checksums && base->seek(base, 4, SEEK_CUR) < 0) {
break;
}
zstd->seek.compressed_ofs[i] = compressed_ofs;
zstd->seek.uncompressed_ofs[i] = uncompressed_ofs;
compressed_ofs += compressed_size;
uncompressed_ofs += uncompressed_size;
}
zstd->seek.compressed_ofs[num_frames] = compressed_ofs;
zstd->seek.uncompressed_ofs[num_frames] = uncompressed_ofs;
/* Seek to the end of the previous frame for the following #BHead frame detection. */
if (seek_frame_start != compressed_ofs || base->seek(base, seek_frame_start, SEEK_SET) < 0) {
MEM_freeN(zstd->seek.compressed_ofs);
MEM_freeN(zstd->seek.uncompressed_ofs);
memset(&zstd->seek, 0, sizeof(zstd->seek));
return false;
}
zstd->seek.cached_frame = -1;
return true;
}
/* Find out which frame contains the given position in the uncompressed stream.
* Basically just bisection. */
static int zstd_frame_from_pos(ZstdReader *zstd, size_t pos)
{
int low = 0, high = zstd->seek.num_frames;
if (pos >= zstd->seek.uncompressed_ofs[zstd->seek.num_frames]) {
return -1;
}
while (low + 1 < high) {
int mid = low + ((high - low) >> 1);
if (zstd->seek.uncompressed_ofs[mid] <= pos) {
low = mid;
}
else {
high = mid;
}
}
return low;
}
/* Ensure that the currently loaded frame is the correct one. */
static const char *zstd_ensure_cache(ZstdReader *zstd, int frame)
{
if (zstd->seek.cached_frame == frame) {
/* Cached frame matches, so just return it. */
return zstd->seek.cached_content;
}
/* Cached frame doesn't match, so discard it and cache the wanted one instead. */
MEM_SAFE_FREE(zstd->seek.cached_content);
size_t compressed_size = zstd->seek.compressed_ofs[frame + 1] - zstd->seek.compressed_ofs[frame];
size_t uncompressed_size = zstd->seek.uncompressed_ofs[frame + 1] -
zstd->seek.uncompressed_ofs[frame];
char *uncompressed_data = MEM_mallocN(uncompressed_size, __func__);
char *compressed_data = MEM_mallocN(compressed_size, __func__);
if (zstd->base->seek(zstd->base, zstd->seek.compressed_ofs[frame], SEEK_SET) < 0 ||
zstd->base->read(zstd->base, compressed_data, compressed_size) < compressed_size) {
MEM_freeN(compressed_data);
MEM_freeN(uncompressed_data);
return NULL;
}
size_t res = ZSTD_decompressDCtx(
zstd->ctx, uncompressed_data, uncompressed_size, compressed_data, compressed_size);
MEM_freeN(compressed_data);
if (ZSTD_isError(res) || res < uncompressed_size) {
MEM_freeN(uncompressed_data);
return NULL;
}
zstd->seek.cached_frame = frame;
zstd->seek.cached_content = uncompressed_data;
return uncompressed_data;
}
static ssize_t zstd_read_seekable(FileReader *reader, void *buffer, size_t size)
{
ZstdReader *zstd = (ZstdReader *)reader;
size_t end_offset = zstd->reader.offset + size, read_len = 0;
while (zstd->reader.offset < end_offset) {
int frame = zstd_frame_from_pos(zstd, zstd->reader.offset);
if (frame < 0) {
/* EOF is reached, so return as much as we can. */
break;
}
const char *framedata = zstd_ensure_cache(zstd, frame);
if (framedata == NULL) {
/* Error while reading the frame, so return as much as we can. */
break;
}
size_t frame_end_offset = min_zz(zstd->seek.uncompressed_ofs[frame + 1], end_offset);
size_t frame_read_len = frame_end_offset - zstd->reader.offset;
size_t offset_in_frame = zstd->reader.offset - zstd->seek.uncompressed_ofs[frame];
memcpy((char *)buffer + read_len, framedata + offset_in_frame, frame_read_len);
read_len += frame_read_len;
zstd->reader.offset = frame_end_offset;
}
return read_len;
}
static off64_t zstd_seek(FileReader *reader, off64_t offset, int whence)
{
ZstdReader *zstd = (ZstdReader *)reader;
off64_t new_pos;
if (whence == SEEK_SET) {
new_pos = offset;
}
else if (whence == SEEK_END) {
new_pos = zstd->seek.uncompressed_ofs[zstd->seek.num_frames] + offset;
}
else {
new_pos = zstd->reader.offset + offset;
}
if (new_pos < 0 || new_pos > zstd->seek.uncompressed_ofs[zstd->seek.num_frames]) {
return -1;
}
zstd->reader.offset = new_pos;
return zstd->reader.offset;
}
static ssize_t zstd_read(FileReader *reader, void *buffer, size_t size)
{
ZstdReader *zstd = (ZstdReader *)reader;
ZSTD_outBuffer output = {buffer, size, 0};
while (output.pos < output.size) {
if (zstd->in_buf.pos == zstd->in_buf.size) {
/* Ran out of buffered input data, read some more. */
zstd->in_buf.pos = 0;
ssize_t readsize = zstd->base->read(
zstd->base, (char *)zstd->in_buf.src, zstd->in_buf_max_size);
if (readsize > 0) {
/* We got some data, so mark the buffer as refilled. */
zstd->in_buf.size = readsize;
}
else {
/* The underlying file is EOF, so return as much as we can. */
break;
}
}
if (ZSTD_isError(ZSTD_decompressStream(zstd->ctx, &output, &zstd->in_buf))) {
break;
}
}
zstd->reader.offset += output.pos;
return output.pos;
}
static void zstd_close(FileReader *reader)
{
ZstdReader *zstd = (ZstdReader *)reader;
ZSTD_freeDCtx(zstd->ctx);
if (zstd->reader.seek) {
MEM_freeN(zstd->seek.uncompressed_ofs);
MEM_freeN(zstd->seek.compressed_ofs);
MEM_freeN(zstd->seek.cached_content);
}
else {
MEM_freeN((void *)zstd->in_buf.src);
}
zstd->base->close(zstd->base);
MEM_freeN(zstd);
}
FileReader *BLI_filereader_new_zstd(FileReader *base)
{
ZstdReader *zstd = MEM_callocN(sizeof(ZstdReader), __func__);
zstd->ctx = ZSTD_createDCtx();
zstd->base = base;
if (zstd_read_seek_table(zstd)) {
zstd->reader.read = zstd_read_seekable;
zstd->reader.seek = zstd_seek;
}
else {
zstd->reader.read = zstd_read;
zstd->reader.seek = NULL;
zstd->in_buf_max_size = ZSTD_DStreamInSize();
zstd->in_buf.src = MEM_mallocN(zstd->in_buf_max_size, "zstd in buf");
zstd->in_buf.size = zstd->in_buf_max_size;
/* This signals that the buffer has run out,
* which will make the read function refill it on the first call. */
zstd->in_buf.pos = zstd->in_buf_max_size;
}
zstd->reader.close = zstd_close;
return (FileReader *)zstd;
}