diff --git a/source/blender/blenkernel/BKE_writeffmpeg.hh b/source/blender/blenkernel/BKE_writeffmpeg.hh
index 171d30f3100..7910371e9a3 100644
--- a/source/blender/blenkernel/BKE_writeffmpeg.hh
+++ b/source/blender/blenkernel/BKE_writeffmpeg.hh
@@ -39,9 +39,11 @@ enum {
   FFMPEG_PRESET_AV1 = 8,
 };
 
+struct AVFrame;
 struct RenderData;
 struct ReportList;
 struct Scene;
+struct SwsContext;
 
 int BKE_ffmpeg_start(void *context_v,
                      const Scene *scene,
@@ -73,4 +75,8 @@ bool BKE_ffmpeg_alpha_channel_is_supported(const RenderData *rd);
 void *BKE_ffmpeg_context_create(void);
 void BKE_ffmpeg_context_free(void *context_v);
 
+SwsContext *BKE_ffmpeg_sws_get_context(
+    int width, int height, int av_src_format, int av_dst_format, int sws_flags);
+void BKE_ffmpeg_sws_scale_frame(SwsContext *ctx, AVFrame *dst, const AVFrame *src);
+
 #endif
diff --git a/source/blender/blenkernel/intern/writeffmpeg.cc b/source/blender/blenkernel/intern/writeffmpeg.cc
index 76304e8beda..bf530be42ea 100644
--- a/source/blender/blenkernel/intern/writeffmpeg.cc
+++ b/source/blender/blenkernel/intern/writeffmpeg.cc
@@ -420,17 +420,7 @@ static AVFrame *generate_video_frame(FFMpegContext *context, const uint8_t *pixe
   /* Convert to the output pixel format, if it's different that Blender's internal one. */
   if (context->img_convert_frame != nullptr) {
     BLI_assert(context->img_convert_ctx != NULL);
-#  if defined(FFMPEG_SWSCALE_THREADING)
-    sws_scale_frame(context->img_convert_ctx, context->current_frame, rgb_frame);
-#  else
-    sws_scale(context->img_convert_ctx,
-              (const uint8_t *const *)rgb_frame->data,
-              rgb_frame->linesize,
-              0,
-              codec->height,
-              context->current_frame->data,
-              context->current_frame->linesize);
-#  endif
+    BKE_ffmpeg_sws_scale_frame(context->img_convert_ctx, context->current_frame, rgb_frame);
   }
 
   return context->current_frame;
@@ -677,10 +667,8 @@ static const AVCodec *get_av1_encoder(
   return codec;
 }
 
-static SwsContext *get_threaded_sws_context(int width,
-                                            int height,
-                                            AVPixelFormat src_format,
-                                            AVPixelFormat dst_format)
+SwsContext *BKE_ffmpeg_sws_get_context(
+    int width, int height, int av_src_format, int av_dst_format, int sws_flags)
 {
 #  if defined(FFMPEG_SWSCALE_THREADING)
   /* sws_getContext does not allow passing flags that ask for multi-threaded
@@ -691,11 +679,11 @@ static SwsContext *get_threaded_sws_context(int width,
   }
   av_opt_set_int(c, "srcw", width, 0);
   av_opt_set_int(c, "srch", height, 0);
-  av_opt_set_int(c, "src_format", src_format, 0);
+  av_opt_set_int(c, "src_format", av_src_format, 0);
   av_opt_set_int(c, "dstw", width, 0);
   av_opt_set_int(c, "dsth", height, 0);
-  av_opt_set_int(c, "dst_format", dst_format, 0);
-  av_opt_set_int(c, "sws_flags", SWS_BICUBIC, 0);
+  av_opt_set_int(c, "dst_format", av_dst_format, 0);
+  av_opt_set_int(c, "sws_flags", sws_flags, 0);
   av_opt_set_int(c, "threads", BLI_system_thread_count(), 0);
 
   if (sws_init_context(c, nullptr, nullptr) < 0) {
@@ -705,11 +693,11 @@ static SwsContext *get_threaded_sws_context(int width,
 #  else
   SwsContext *c = sws_getContext(width,
                                  height,
-                                 src_format,
+                                 AVPixelFormat(av_src_format),
                                  width,
                                  height,
-                                 dst_format,
-                                 SWS_BICUBIC,
+                                 AVPixelFormat(av_dst_format),
+                                 sws_flags,
                                  nullptr,
                                  nullptr,
                                  nullptr);
@@ -717,6 +705,14 @@ static SwsContext *get_threaded_sws_context(int width,
 
   return c;
 }
+void BKE_ffmpeg_sws_scale_frame(SwsContext *ctx, AVFrame *dst, const AVFrame *src)
+{
+#  if defined(FFMPEG_SWSCALE_THREADING)
+  sws_scale_frame(ctx, dst, src);
+#  else
+  sws_scale(ctx, src->data, src->linesize, 0, src->height, dst->data, dst->linesize);
+#  endif
+}
 
 /* prepare a video stream for the output file */
 
@@ -955,8 +951,8 @@ static AVStream *alloc_video_stream(FFMpegContext *context,
   else {
     /* Output pixel format is different, allocate frame for conversion. */
     context->img_convert_frame = alloc_picture(AV_PIX_FMT_RGBA, c->width, c->height);
-    context->img_convert_ctx = get_threaded_sws_context(
-        c->width, c->height, AV_PIX_FMT_RGBA, c->pix_fmt);
+    context->img_convert_ctx = BKE_ffmpeg_sws_get_context(
+        c->width, c->height, AV_PIX_FMT_RGBA, c->pix_fmt, SWS_BICUBIC);
   }
 
   avcodec_parameters_from_context(st->codecpar, c);
diff --git a/source/blender/imbuf/intern/anim_movie.cc b/source/blender/imbuf/intern/anim_movie.cc
index 1f2eff42c59..66f173f0816 100644
--- a/source/blender/imbuf/intern/anim_movie.cc
+++ b/source/blender/imbuf/intern/anim_movie.cc
@@ -63,6 +63,7 @@
 
 #ifdef WITH_FFMPEG
 #  include "BKE_global.h" /* ENDIAN_ORDER */
+#  include "BKE_writeffmpeg.hh"
 
 extern "C" {
 #  include <libavcodec/avcodec.h>
@@ -694,16 +695,12 @@ static int startffmpeg(anim *anim)
         1);
   }
 
-  anim->img_convert_ctx = sws_getContext(anim->x,
-                                         anim->y,
-                                         anim->pCodecCtx->pix_fmt,
-                                         anim->x,
-                                         anim->y,
-                                         AV_PIX_FMT_RGBA,
-                                         SWS_BILINEAR | SWS_PRINT_INFO | SWS_FULL_CHR_H_INT,
-                                         nullptr,
-                                         nullptr,
-                                         nullptr);
+  anim->img_convert_ctx = BKE_ffmpeg_sws_get_context(anim->x,
+                                                     anim->y,
+                                                     anim->pCodecCtx->pix_fmt,
+                                                     AV_PIX_FMT_RGBA,
+                                                     SWS_BILINEAR | SWS_PRINT_INFO |
+                                                         SWS_FULL_CHR_H_INT);
 
   if (!anim->img_convert_ctx) {
     fprintf(stderr, "Can't transform color space??? Bailing out...\n");
@@ -846,32 +843,48 @@ static void ffmpeg_postprocess(anim *anim, AVFrame *input, ImBuf *ibuf)
     }
   }
 
-  sws_scale(anim->img_convert_ctx,
-            (const uint8_t *const *)input->data,
-            input->linesize,
-            0,
-            anim->y,
-            anim->pFrameRGB->data,
-            anim->pFrameRGB->linesize);
+  /* If final destination image layout matches that of decoded RGB frame (including
+   * any line padding done by ffmpeg for SIMD alignment), we can directly
+   * decode into that, doing the vertical flip in the same step. Otherwise have
+   * to do a separate flip. */
+  const int ibuf_linesize = ibuf->x * 4;
+  const int rgb_linesize = anim->pFrameRGB->linesize[0];
+  bool scale_to_ibuf = (rgb_linesize == ibuf_linesize);
+  /* swscale on arm64 before ffmpeg 6.0 (libswscale major version 7)
+   * could not handle negative line sizes. That has been fixed in all major
+   * ffmpeg releases in early 2023, but easier to just check for "below 7". */
+#  if (defined(__aarch64__) || defined(_M_ARM64)) && (LIBSWSCALE_VERSION_MAJOR < 7)
+  scale_to_ibuf = false;
+#  endif
+  uint8_t *rgb_data = anim->pFrameRGB->data[0];
+
+  if (scale_to_ibuf) {
+    /* Decode RGB and do vertical flip directly into destination image, by using negative
+     * line size. */
+    anim->pFrameRGB->linesize[0] = -ibuf_linesize;
+    anim->pFrameRGB->data[0] = ibuf->byte_buffer.data + (ibuf->y - 1) * ibuf_linesize;
+
+    BKE_ffmpeg_sws_scale_frame(anim->img_convert_ctx, anim->pFrameRGB, input);
+
+    anim->pFrameRGB->linesize[0] = rgb_linesize;
+    anim->pFrameRGB->data[0] = rgb_data;
+  }
+  else {
+    /* Decode, then do vertical flip into destination. */
+    BKE_ffmpeg_sws_scale_frame(anim->img_convert_ctx, anim->pFrameRGB, input);
+
+    /* Use negative line size to do vertical image flip. */
+    const int src_linesize[4] = {-rgb_linesize, 0, 0, 0};
+    const uint8_t *const src[4] = {
+        rgb_data + (anim->y - 1) * rgb_linesize, nullptr, nullptr, nullptr};
+    int dst_size = av_image_get_buffer_size(AVPixelFormat(anim->pFrameRGB->format),
+                                            anim->pFrameRGB->width,
+                                            anim->pFrameRGB->height,
+                                            1);
+    av_image_copy_to_buffer(
+        ibuf->byte_buffer.data, dst_size, src, src_linesize, AV_PIX_FMT_RGBA, anim->x, anim->y, 1);
+  }
 
-  /* Copy the valid bytes from the aligned buffer vertically flipped into ImBuf */
-  int aligned_stride = anim->pFrameRGB->linesize[0];
-  const uint8_t *const src[4] = {
-      anim->pFrameRGB->data[0] + (anim->y - 1) * aligned_stride, nullptr, nullptr, nullptr};
-  /* NOTE: Negative linesize is used to copy and flip image at once with function
-   * `av_image_copy_to_buffer`. This could cause issues in future and image may need to be flipped
-   * explicitly. */
-  const int src_linesize[4] = {-anim->pFrameRGB->linesize[0], 0, 0, 0};
-  int dst_size = av_image_get_buffer_size(
-      AVPixelFormat(anim->pFrameRGB->format), anim->pFrameRGB->width, anim->pFrameRGB->height, 1);
-  av_image_copy_to_buffer((uint8_t *)ibuf->byte_buffer.data,
-                          dst_size,
-                          src,
-                          src_linesize,
-                          AV_PIX_FMT_RGBA,
-                          anim->x,
-                          anim->y,
-                          1);
   if (filter_y) {
     IMB_filtery(ibuf);
   }