ffmpeg: optimize ffmpeg_postprocess #116309

Merged
Aras Pranckevicius merged 4 commits from aras_p/blender:ffmpeg-threaded-decode-conv into main 2023-12-19 18:29:01 +01:00
3 changed files with 73 additions and 58 deletions

View File

@ -39,9 +39,11 @@ enum {
FFMPEG_PRESET_AV1 = 8,
};
struct AVFrame;
struct RenderData;
struct ReportList;
struct Scene;
struct SwsContext;
int BKE_ffmpeg_start(void *context_v,
const Scene *scene,
@ -73,4 +75,8 @@ bool BKE_ffmpeg_alpha_channel_is_supported(const RenderData *rd);
void *BKE_ffmpeg_context_create(void);
void BKE_ffmpeg_context_free(void *context_v);
SwsContext *BKE_ffmpeg_sws_get_context(
int width, int height, int av_src_format, int av_dst_format, int sws_flags);
void BKE_ffmpeg_sws_scale_frame(SwsContext *ctx, AVFrame *dst, const AVFrame *src);
Review

I am bit sad, that you can't use AVPixelFormat type. You should be able to add include file with the type to BKE_writeffmpeg.hh but it says, that it can't find the file, which is weird to me.

I am bit sad, that you can't use `AVPixelFormat` type. You should be able to add include file with the type to `BKE_writeffmpeg.hh` but it says, that it can't find the file, which is weird to me.

That's because at least blenloader library that includes BKE_writeffmpeg.hh does not setup FFMPEG include directories for itself. I'd rather keep it that way, i.e. sure it would be nicer to use AVPixelFormat here, but the downside is that any users of this header file would now need to take care of setting up FFMPEG directories. Sounds like a larger hassle.

That's because at least `blenloader` library that includes `BKE_writeffmpeg.hh` does not setup FFMPEG include directories for itself. I'd rather keep it that way, i.e. sure it would be nicer to use `AVPixelFormat` here, but the downside is that any users of this header file would now need to take care of setting up FFMPEG directories. Sounds like a larger hassle.
Review

Thanks for checking this. IMO FFMpegCodecData::type enum belongs to DNA. I would say, that BLO code should have no business caring about libs we build with, unless it uses them to process some data.

Thanks for checking this. IMO `FFMpegCodecData::type` enum belongs to DNA. I would say, that BLO code should have no business caring about libs we build with, unless it uses them to process some data.
#endif

View File

@ -420,17 +420,7 @@ static AVFrame *generate_video_frame(FFMpegContext *context, const uint8_t *pixe
/* Convert to the output pixel format, if it's different that Blender's internal one. */
if (context->img_convert_frame != nullptr) {
BLI_assert(context->img_convert_ctx != NULL);
# if defined(FFMPEG_SWSCALE_THREADING)
sws_scale_frame(context->img_convert_ctx, context->current_frame, rgb_frame);
# else
sws_scale(context->img_convert_ctx,
(const uint8_t *const *)rgb_frame->data,
rgb_frame->linesize,
0,
codec->height,
context->current_frame->data,
context->current_frame->linesize);
# endif
BKE_ffmpeg_sws_scale_frame(context->img_convert_ctx, context->current_frame, rgb_frame);
}
return context->current_frame;
@ -677,10 +667,8 @@ static const AVCodec *get_av1_encoder(
return codec;
}
static SwsContext *get_threaded_sws_context(int width,
int height,
AVPixelFormat src_format,
AVPixelFormat dst_format)
SwsContext *BKE_ffmpeg_sws_get_context(
int width, int height, int av_src_format, int av_dst_format, int sws_flags)
{
# if defined(FFMPEG_SWSCALE_THREADING)
/* sws_getContext does not allow passing flags that ask for multi-threaded
@ -691,11 +679,11 @@ static SwsContext *get_threaded_sws_context(int width,
}
av_opt_set_int(c, "srcw", width, 0);
av_opt_set_int(c, "srch", height, 0);
av_opt_set_int(c, "src_format", src_format, 0);
av_opt_set_int(c, "src_format", av_src_format, 0);
av_opt_set_int(c, "dstw", width, 0);
av_opt_set_int(c, "dsth", height, 0);
av_opt_set_int(c, "dst_format", dst_format, 0);
av_opt_set_int(c, "sws_flags", SWS_BICUBIC, 0);
av_opt_set_int(c, "dst_format", av_dst_format, 0);
av_opt_set_int(c, "sws_flags", sws_flags, 0);
av_opt_set_int(c, "threads", BLI_system_thread_count(), 0);
if (sws_init_context(c, nullptr, nullptr) < 0) {
@ -705,11 +693,11 @@ static SwsContext *get_threaded_sws_context(int width,
# else
SwsContext *c = sws_getContext(width,
height,
src_format,
AVPixelFormat(av_src_format),
width,
height,
dst_format,
SWS_BICUBIC,
AVPixelFormat(av_dst_format),
sws_flags,
nullptr,
nullptr,
nullptr);
@ -717,6 +705,14 @@ static SwsContext *get_threaded_sws_context(int width,
return c;
}
void BKE_ffmpeg_sws_scale_frame(SwsContext *ctx, AVFrame *dst, const AVFrame *src)
{
# if defined(FFMPEG_SWSCALE_THREADING)
sws_scale_frame(ctx, dst, src);
# else
sws_scale(ctx, src->data, src->linesize, 0, src->height, dst->data, dst->linesize);
# endif
}
/* prepare a video stream for the output file */
@ -955,8 +951,8 @@ static AVStream *alloc_video_stream(FFMpegContext *context,
else {
/* Output pixel format is different, allocate frame for conversion. */
context->img_convert_frame = alloc_picture(AV_PIX_FMT_RGBA, c->width, c->height);
context->img_convert_ctx = get_threaded_sws_context(
c->width, c->height, AV_PIX_FMT_RGBA, c->pix_fmt);
context->img_convert_ctx = BKE_ffmpeg_sws_get_context(
c->width, c->height, AV_PIX_FMT_RGBA, c->pix_fmt, SWS_BICUBIC);
}
avcodec_parameters_from_context(st->codecpar, c);

View File

@ -63,6 +63,7 @@
#ifdef WITH_FFMPEG
# include "BKE_global.h" /* ENDIAN_ORDER */
# include "BKE_writeffmpeg.hh"
extern "C" {
# include <libavcodec/avcodec.h>
@ -694,16 +695,12 @@ static int startffmpeg(anim *anim)
1);
}
anim->img_convert_ctx = sws_getContext(anim->x,
anim->y,
anim->pCodecCtx->pix_fmt,
anim->x,
anim->y,
AV_PIX_FMT_RGBA,
SWS_BILINEAR | SWS_PRINT_INFO | SWS_FULL_CHR_H_INT,
nullptr,
nullptr,
nullptr);
anim->img_convert_ctx = BKE_ffmpeg_sws_get_context(anim->x,
anim->y,
anim->pCodecCtx->pix_fmt,
AV_PIX_FMT_RGBA,
SWS_BILINEAR | SWS_PRINT_INFO |
SWS_FULL_CHR_H_INT);
if (!anim->img_convert_ctx) {
fprintf(stderr, "Can't transform color space??? Bailing out...\n");
@ -846,32 +843,48 @@ static void ffmpeg_postprocess(anim *anim, AVFrame *input, ImBuf *ibuf)
}
}
sws_scale(anim->img_convert_ctx,
(const uint8_t *const *)input->data,
input->linesize,
0,
anim->y,
anim->pFrameRGB->data,
anim->pFrameRGB->linesize);
/* If final destination image layout matches that of decoded RGB frame (including
* any line padding done by ffmpeg for SIMD alignment), we can directly
* decode into that, doing the vertical flip in the same step. Otherwise have
* to do a separate flip. */
const int ibuf_linesize = ibuf->x * 4;
const int rgb_linesize = anim->pFrameRGB->linesize[0];
bool scale_to_ibuf = (rgb_linesize == ibuf_linesize);
/* swscale on arm64 before ffmpeg 6.0 (libswscale major version 7)
* could not handle negative line sizes. That has been fixed in all major
* ffmpeg releases in early 2023, but easier to just check for "below 7". */
# if (defined(__aarch64__) || defined(_M_ARM64)) && (LIBSWSCALE_VERSION_MAJOR < 7)
scale_to_ibuf = false;
# endif
uint8_t *rgb_data = anim->pFrameRGB->data[0];
if (scale_to_ibuf) {
/* Decode RGB and do vertical flip directly into destination image, by using negative
* line size. */
anim->pFrameRGB->linesize[0] = -ibuf_linesize;
anim->pFrameRGB->data[0] = ibuf->byte_buffer.data + (ibuf->y - 1) * ibuf_linesize;
BKE_ffmpeg_sws_scale_frame(anim->img_convert_ctx, anim->pFrameRGB, input);
anim->pFrameRGB->linesize[0] = rgb_linesize;
anim->pFrameRGB->data[0] = rgb_data;
}
else {
/* Decode, then do vertical flip into destination. */
BKE_ffmpeg_sws_scale_frame(anim->img_convert_ctx, anim->pFrameRGB, input);
/* Use negative line size to do vertical image flip. */
const int src_linesize[4] = {-rgb_linesize, 0, 0, 0};
const uint8_t *const src[4] = {
rgb_data + (anim->y - 1) * rgb_linesize, nullptr, nullptr, nullptr};
int dst_size = av_image_get_buffer_size(AVPixelFormat(anim->pFrameRGB->format),
anim->pFrameRGB->width,
anim->pFrameRGB->height,
1);
av_image_copy_to_buffer(
ibuf->byte_buffer.data, dst_size, src, src_linesize, AV_PIX_FMT_RGBA, anim->x, anim->y, 1);
}
/* Copy the valid bytes from the aligned buffer vertically flipped into ImBuf */
int aligned_stride = anim->pFrameRGB->linesize[0];
const uint8_t *const src[4] = {
anim->pFrameRGB->data[0] + (anim->y - 1) * aligned_stride, nullptr, nullptr, nullptr};
/* NOTE: Negative linesize is used to copy and flip image at once with function
* `av_image_copy_to_buffer`. This could cause issues in future and image may need to be flipped
* explicitly. */
const int src_linesize[4] = {-anim->pFrameRGB->linesize[0], 0, 0, 0};
int dst_size = av_image_get_buffer_size(
AVPixelFormat(anim->pFrameRGB->format), anim->pFrameRGB->width, anim->pFrameRGB->height, 1);
av_image_copy_to_buffer((uint8_t *)ibuf->byte_buffer.data,
dst_size,
src,
src_linesize,
AV_PIX_FMT_RGBA,
anim->x,
anim->y,
1);
if (filter_y) {
IMB_filtery(ibuf);
}