FFmpeg
ops_dispatch.c
Go to the documentation of this file.
1 /**
2  * Copyright (C) 2025 Niklas Haas
3  *
4  * This file is part of FFmpeg.
5  *
6  * FFmpeg is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU Lesser General Public
8  * License as published by the Free Software Foundation; either
9  * version 2.1 of the License, or (at your option) any later version.
10  *
11  * FFmpeg is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14  * Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public
17  * License along with FFmpeg; if not, write to the Free Software
18  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19  */
20 
21 #include "libavutil/avassert.h"
22 #include "libavutil/cpu.h"
23 #include "libavutil/mathematics.h"
24 #include "libavutil/mem.h"
25 #include "libavutil/mem_internal.h"
26 #include "libavutil/refstruct.h"
27 
28 #include "ops.h"
29 #include "ops_internal.h"
30 #include "ops_dispatch.h"
31 #include "swscale_internal.h"
32 
33 typedef struct SwsOpPass {
37  size_t num_blocks;
42  int planes_in;
46  int idx_in[4];
47  int idx_out[4];
48  int *offsets_y;
52  bool memcpy_out;
53  size_t tail_blocks;
54  uint8_t *tail_buf; /* extra memory for fixing unpadded tails */
55  unsigned int tail_buf_size;
56 } SwsOpPass;
57 
58 static int compile_backend(SwsContext *ctx, const SwsOpBackend *backend,
59  const SwsOpList *ops, SwsCompiledOp *out)
60 {
61  SwsOpList *copy;
62  SwsCompiledOp compiled = {0};
63  int ret = 0;
64 
66  if (!copy)
67  return AVERROR(ENOMEM);
68 
69  /* Ensure these are always set during compilation */
71 
72  ret = backend->compile(ctx, copy, &compiled);
73  if (ret < 0) {
74  int msg_lev = ret == AVERROR(ENOTSUP) ? AV_LOG_TRACE : AV_LOG_ERROR;
75  av_log(ctx, msg_lev, "Backend '%s' failed to compile operations: %s\n",
76  backend->name, av_err2str(ret));
77  goto fail;
78  }
79 
80  compiled.backend = backend;
81  *out = compiled;
82 
83  av_log(ctx, AV_LOG_VERBOSE, "Compiled using backend '%s': "
84  "block size = %d, over-read = %d, over-write = %d, cpu flags = 0x%x\n",
85  backend->name, out->block_size, out->over_read, out->over_write,
86  out->cpu_flags);
87 
89 
90 fail:
92  return ret;
93 }
94 
96  const SwsOpList *ops, SwsCompiledOp *out)
97 {
98  if (backend)
99  return compile_backend(ctx, backend, ops, out);
100 
101  const SwsBackend enabled = ff_sws_enabled_backends(ctx);
102  for (int n = 0; ff_sws_op_backends[n]; n++) {
103  const SwsOpBackend *backend = ff_sws_op_backends[n];
104  if (ops->src.hw_format != backend->hw_format ||
105  ops->dst.hw_format != backend->hw_format ||
106  !(enabled & backend->flags))
107  continue;
108  if (compile_backend(ctx, backend, ops, out) < 0)
109  continue;
110 
111  return 0;
112  }
113 
114  return AVERROR(ENOTSUP);
115 }
116 
118 {
119  if (comp->free)
120  comp->free(comp->priv);
121 
122  *comp = (SwsCompiledOp) {0};
123 }
124 
125 static void op_pass_free(void *ptr)
126 {
127  SwsOpPass *p = ptr;
128  if (!p)
129  return;
130 
131  ff_sws_compiled_op_unref(&p->comp);
132  av_refstruct_unref(&p->offsets_y);
133  av_free(p->exec_base.in_bump_y);
134  av_free(p->exec_base.in_offset_x);
135  av_free(p->tail_buf);
136  av_free(p);
137 }
138 
139 static inline void get_row_data(const SwsOpPass *p, const int y_dst,
140  const uint8_t *in[4], uint8_t *out[4])
141 {
142  const SwsOpExec *base = &p->exec_base;
143  const int y_src = p->offsets_y ? p->offsets_y[y_dst] : y_dst;
144  for (int i = 0; i < p->planes_in; i++)
145  in[i] = base->in[i] + (y_src >> base->in_sub_y[i]) * base->in_stride[i];
146  for (int i = 0; i < p->planes_out; i++)
147  out[i] = base->out[i] + (y_dst >> base->out_sub_y[i]) * base->out_stride[i];
148 }
149 
150 static inline int get_lines_in(const SwsOpPass *p, const int y, const int h,
151  const int plane)
152 {
153  const SwsOpExec *base = &p->exec_base;
154  if (!p->offsets_y)
155  return h >> base->in_sub_y[plane];
156 
157  const int y0 = p->offsets_y[y] >> base->in_sub_y[plane];
158  const int y1 = p->offsets_y[y + h - 1] >> base->in_sub_y[plane];
159  return y1 - y0 + 1;
160 }
161 
162 static inline size_t pixel_bytes(size_t pixels, int pixel_bits,
163  enum AVRounding rounding)
164 {
165  const uint64_t bits = (uint64_t) pixels * pixel_bits;
166  switch (rounding) {
167  case AV_ROUND_ZERO:
168  case AV_ROUND_DOWN:
169  return bits >> 3;
170  case AV_ROUND_INF:
171  case AV_ROUND_UP:
172  return (bits + 7) >> 3;
173  default:
174  av_unreachable("Invalid rounding mode");
175  return (size_t) -1;
176  }
177 }
178 
179 static size_t safe_bytes_pad(int linesize, int plane_pad)
180 {
181  av_assert1(linesize);
182  int64_t safe_bytes = FFABS((int64_t) linesize) - plane_pad;
183  return FFMAX(safe_bytes, 0);
184 }
185 
186 static size_t safe_blocks_offset(size_t num_blocks, unsigned block_size,
187  ptrdiff_t safe_offset,
188  const int32_t *offset_bytes)
189 {
190  size_t safe_blocks = num_blocks;
191  while (safe_blocks && offset_bytes[safe_blocks * block_size - 1] > safe_offset)
192  safe_blocks--;
193  return safe_blocks;
194 }
195 
196 static int op_pass_setup(const SwsFrame *out, const SwsFrame *in,
197  const SwsPass *pass)
198 {
199  const AVPixFmtDescriptor *indesc = av_pix_fmt_desc_get(in->format);
200  const AVPixFmtDescriptor *outdesc = av_pix_fmt_desc_get(out->format);
201  const bool float_in = indesc->flags & AV_PIX_FMT_FLAG_FLOAT;
202 
203  SwsOpPass *p = pass->priv;
204  SwsOpExec *exec = &p->exec_base;
205  const SwsCompiledOp *comp = &p->comp;
206 
207  /* Set up main loop parameters */
208  const unsigned block_size = comp->block_size;
209  const size_t num_blocks = (pass->width + block_size - 1) / block_size;
210  const size_t aligned_w = num_blocks * block_size;
211  if (aligned_w < pass->width) /* overflow */
212  return AVERROR(EINVAL);
213  p->num_blocks = num_blocks;
214  p->memcpy_first = false;
215  p->memcpy_last = false;
216  p->memcpy_out = false;
217 
218  size_t safe_blocks = num_blocks;
219  for (int i = 0; i < p->planes_in; i++) {
220  int idx = p->idx_in[i];
221  int chroma = idx == 1 || idx == 2;
222  int sub_x = chroma ? indesc->log2_chroma_w : 0;
223  int sub_y = chroma ? indesc->log2_chroma_h : 0;
224 
225  size_t input_bytes = in->linesize[idx];
226  if (p->filter_size_h && float_in) {
227  /* Floating point inputs may contain NaN / Infinity in the padding */
228  const int plane_w = AV_CEIL_RSHIFT(in->width, sub_x);
229  input_bytes = pixel_bytes(plane_w, p->pixel_bits_in, AV_ROUND_UP);
230  }
231 
232  size_t safe_bytes = safe_bytes_pad(input_bytes, comp->over_read);
233  size_t safe_blocks_in;
234  if (exec->in_offset_x) {
235  size_t filter_size = pixel_bytes(p->filter_size_h, p->pixel_bits_in,
236  AV_ROUND_UP);
237  safe_blocks_in = safe_blocks_offset(num_blocks, block_size,
238  safe_bytes - filter_size,
239  exec->in_offset_x);
240  } else {
241  safe_blocks_in = safe_bytes / exec->block_size_in;
242  }
243 
244  if (safe_blocks_in < num_blocks) {
245  p->memcpy_first |= in->linesize[idx] < 0;
246  p->memcpy_last |= in->linesize[idx] > 0;
247  safe_blocks = FFMIN(safe_blocks, safe_blocks_in);
248  }
249 
250  size_t loop_size = num_blocks * exec->block_size_in;
251  exec->in[i] = in->data[idx];
252  exec->in_stride[i] = in->linesize[idx];
253  exec->in_bump[i] = in->linesize[idx] - loop_size;
254  exec->in_sub_y[i] = sub_y;
255  exec->in_sub_x[i] = sub_x;
256  }
257 
258  for (int i = 0; i < p->planes_out; i++) {
259  int idx = p->idx_out[i];
260  int chroma = idx == 1 || idx == 2;
261  int sub_x = chroma ? outdesc->log2_chroma_w : 0;
262  int sub_y = chroma ? outdesc->log2_chroma_h : 0;
263  size_t safe_bytes = safe_bytes_pad(out->linesize[idx], comp->over_write);
264  size_t safe_blocks_out = safe_bytes / exec->block_size_out;
265  if (safe_blocks_out < num_blocks) {
266  p->memcpy_out = true;
267  safe_blocks = FFMIN(safe_blocks, safe_blocks_out);
268  }
269 
270  size_t loop_size = num_blocks * exec->block_size_out;
271  exec->out[i] = out->data[idx];
272  exec->out_stride[i] = out->linesize[idx];
273  exec->out_bump[i] = out->linesize[idx] - loop_size;
274  exec->out_sub_y[i] = sub_y;
275  exec->out_sub_x[i] = sub_x;
276  }
277 
278  const bool memcpy_in = p->memcpy_first || p->memcpy_last;
279  if (!memcpy_in && !p->memcpy_out) {
280  av_assert0(safe_blocks == num_blocks);
281  return 0;
282  }
283 
284  /* Set-up tail section parameters and buffers */
285  SwsOpExec *tail = &p->exec_tail;
286  const int align = av_cpu_max_align();
287  size_t alloc_size = 0;
288  *tail = *exec;
289 
290  const size_t safe_width = safe_blocks * block_size;
291  const size_t tail_size = pass->width - safe_width;
292  p->tail_off_out = pixel_bytes(safe_width, p->pixel_bits_out, AV_ROUND_DOWN);
293  p->tail_size_out = pixel_bytes(tail_size, p->pixel_bits_out, AV_ROUND_UP);
294  p->tail_blocks = num_blocks - safe_blocks;
295 
296  if (exec->in_offset_x) {
297  p->tail_off_in = exec->in_offset_x[safe_width];
298  p->tail_size_in = exec->in_offset_x[pass->width - 1] - p->tail_off_in;
299  p->tail_size_in += pixel_bytes(p->filter_size_h, p->pixel_bits_in, AV_ROUND_UP);
300  } else {
301  p->tail_off_in = pixel_bytes(safe_width, p->pixel_bits_in, AV_ROUND_DOWN);
302  p->tail_size_in = pixel_bytes(tail_size, p->pixel_bits_in, AV_ROUND_UP);
303  }
304 
305  const size_t alloc_width = aligned_w - safe_width;
306  for (int i = 0; memcpy_in && i < p->planes_in; i++) {
307  size_t needed_size;
308  if (exec->in_offset_x) {
309  /* The input offset map is already padded to multiples of the block
310  * size, and clamps the input offsets to the image boundaries; so
311  * we just need to compensate for the comp->over_read */
312  needed_size = p->tail_size_in;
313  } else {
314  needed_size = pixel_bytes(alloc_width, p->pixel_bits_in, AV_ROUND_UP);
315  }
316  size_t loop_size = p->tail_blocks * exec->block_size_in;
317  tail->in_stride[i] = FFALIGN(needed_size + comp->over_read, align);
318  tail->in_bump[i] = tail->in_stride[i] - loop_size;
319  alloc_size += tail->in_stride[i] * in->height;
320  }
321 
322  for (int i = 0; p->memcpy_out && i < p->planes_out; i++) {
323  size_t needed_size = pixel_bytes(alloc_width, p->pixel_bits_out, AV_ROUND_UP);
324  size_t loop_size = p->tail_blocks * exec->block_size_out;
325  tail->out_stride[i] = FFALIGN(needed_size + comp->over_write, align);
326  tail->out_bump[i] = tail->out_stride[i] - loop_size;
327  alloc_size += tail->out_stride[i] * out->height;
328  }
329 
330  if (memcpy_in && exec->in_offset_x) {
331  /* `in_offset_x` is indexed relative to the line start, not the start
332  * of the section being processed; so we need to over-allocate this
333  * array to the full width of the image, even though we will only
334  * partially fill in the offsets relevant to the tail region */
335  alloc_size += aligned_w * sizeof(*exec->in_offset_x);
336  }
337 
338  av_fast_mallocz(&p->tail_buf, &p->tail_buf_size, alloc_size);
339  if (!p->tail_buf)
340  return AVERROR(ENOMEM);
341 
342  uint8_t *tail_buf = p->tail_buf;
343  for (int i = 0; memcpy_in && i < p->planes_in; i++) {
344  tail->in[i] = tail_buf;
345  tail_buf += tail->in_stride[i] * in->height;
346  }
347 
348  for (int i = 0; p->memcpy_out && i < p->planes_out; i++) {
349  tail->out[i] = tail_buf;
350  tail_buf += tail->out_stride[i] * out->height;
351  }
352 
353  if (memcpy_in && exec->in_offset_x) {
354  tail->in_offset_x = (int32_t *) tail_buf;
355  for (int i = safe_width; i < aligned_w; i++)
356  tail->in_offset_x[i] = exec->in_offset_x[i] - p->tail_off_in;
357  }
358 
359  return 0;
360 }
361 
362 static void copy_lines(uint8_t *dst, const size_t dst_stride,
363  const uint8_t *src, const size_t src_stride,
364  const int h, const size_t bytes)
365 {
366  for (int y = 0; y < h; y++) {
367  memcpy(dst, src, bytes);
368  dst += dst_stride;
369  src += src_stride;
370  }
371 }
372 
373 static void op_pass_run(const SwsFrame *out, const SwsFrame *in, const int y,
374  const int h, const SwsPass *pass)
375 {
376  const SwsOpPass *p = pass->priv;
377  const SwsCompiledOp *comp = &p->comp;
378 
379  /* Fill exec metadata for this slice */
380  DECLARE_ALIGNED_32(SwsOpExec, exec) = p->exec_base;
381  exec.slice_y = y;
382  exec.slice_h = h;
383 
384  /**
385  * To ensure safety, we need to consider the following:
386  *
387  * 1. We can overread the input, unless this is the last line of an
388  * unpadded buffer. All defined operations can handle arbitrary pixel
389  * input, so overread of arbitrary data is fine. For flipped images,
390  * this condition is actually *inverted* to where the first line is
391  * the one at the end of the buffer.
392  *
393  * 2. We can overwrite the output, as long as we don't write more than the
394  * amount of pixels that fit into one linesize. So we always need to
395  * memcpy the last column on the output side if unpadded.
396  */
397 
398  const bool memcpy_in = p->memcpy_last && y + h == pass->height ||
399  p->memcpy_first && y == 0;
400  const bool memcpy_out = p->memcpy_out;
401  const size_t num_blocks = p->num_blocks;
402  const size_t tail_blocks = p->tail_blocks;
403 
404  get_row_data(p, y, exec.in, exec.out);
405  if (!memcpy_in && !memcpy_out) {
406  /* Fast path (fully aligned/padded inputs and outputs) */
407  comp->func(&exec, comp->priv, 0, y, num_blocks, y + h);
408  return;
409  }
410 
411  /* Non-aligned case (slow path); process main blocks as normal, and
412  * a separate tail (via memcpy into an appropriately padded buffer) */
413  if (num_blocks > tail_blocks) {
414  for (int i = 0; i < 4; i++) {
415  /* We process fewer blocks, so the in_bump needs to be increased
416  * to reflect that the plane pointers are left on the last block,
417  * not the end of the processed line, after each loop iteration */
418  exec.in_bump[i] += exec.block_size_in * tail_blocks;
419  exec.out_bump[i] += exec.block_size_out * tail_blocks;
420  }
421 
422  comp->func(&exec, comp->priv, 0, y, num_blocks - tail_blocks, y + h);
423  }
424 
425  DECLARE_ALIGNED_32(SwsOpExec, tail) = p->exec_tail;
426  tail.slice_y = y;
427  tail.slice_h = h;
428 
429  for (int i = 0; i < p->planes_in; i++) {
430  /* Input offsets are relative to the base pointer */
431  if (!exec.in_offset_x || memcpy_in)
432  exec.in[i] += p->tail_off_in;
433  tail.in[i] += y * tail.in_stride[i];
434  }
435  for (int i = 0; i < p->planes_out; i++) {
436  exec.out[i] += p->tail_off_out;
437  tail.out[i] += y * tail.out_stride[i];
438  }
439 
440  for (int i = 0; i < p->planes_in; i++) {
441  if (memcpy_in) {
442  const int lines = get_lines_in(p, y, h, i);
443  copy_lines((uint8_t *) tail.in[i], tail.in_stride[i],
444  exec.in[i], exec.in_stride[i], lines, p->tail_size_in);
445  } else {
446  /* Reuse input pointers directly */
447  const size_t loop_size = tail_blocks * exec.block_size_in;
448  tail.in[i] = exec.in[i];
449  tail.in_stride[i] = exec.in_stride[i];
450  tail.in_bump[i] = exec.in_stride[i] - loop_size;
451  }
452  }
453 
454  for (int i = 0; !memcpy_out && i < p->planes_out; i++) {
455  /* Reuse output pointers directly */
456  const size_t loop_size = tail_blocks * exec.block_size_out;
457  tail.out[i] = exec.out[i];
458  tail.out_stride[i] = exec.out_stride[i];
459  tail.out_bump[i] = exec.out_stride[i] - loop_size;
460  }
461 
462  /* Dispatch kernel over tail */
463  av_assert1(tail_blocks > 0);
464  comp->func(&tail, comp->priv, num_blocks - tail_blocks, y, num_blocks, y + h);
465 
466  for (int i = 0; memcpy_out && i < p->planes_out; i++) {
467  const int lines = h >> tail.out_sub_y[i];
468  copy_lines(exec.out[i], exec.out_stride[i],
469  tail.out[i], tail.out_stride[i], lines, p->tail_size_out);
470  }
471 }
472 
473 static int rw_planes(const SwsOp *op)
474 {
475  return op->rw.packed ? 1 : op->rw.elems;
476 }
477 
478 static int rw_pixel_bits(const SwsOp *op)
479 {
480  const int elems = op->rw.packed ? op->rw.elems : 1;
481  const int size = ff_sws_pixel_type_size(op->type);
482  const int bits = 8 >> op->rw.frac;
483  av_assert1(bits >= 1);
484  return elems * size * bits;
485 }
486 
487 static void align_pass(SwsPass *pass, int block_size, int over_rw, int pixel_bits)
488 {
489  if (!pass)
490  return;
491 
492  /* Add at least as many pixels as needed to cover the padding requirement */
493  const int pad = (over_rw * 8 + pixel_bits - 1) / pixel_bits;
494 
495  SwsPassBuffer *buf = pass->output;
496  buf->width_align = FFMAX(buf->width_align, block_size);
497  buf->width_pad = FFMAX(buf->width_pad, pad);
498 }
499 
500 static int compile(SwsGraph *graph, const SwsOpBackend *backend,
501  const SwsOpList *ops, SwsPass *input, SwsPass **output)
502 {
503  SwsContext *ctx = graph->ctx;
504  SwsOpPass *p = av_mallocz(sizeof(*p));
505  if (!p)
506  return AVERROR(ENOMEM);
507 
508  int ret = ff_sws_ops_compile(ctx, backend, ops, &p->comp);
509  if (ret < 0)
510  goto fail;
511  else if (!output)
512  goto fail; /* nothing to do, just return */
513 
514  const SwsCompiledOp *comp = &p->comp;
515  const SwsFormat *dst = &ops->dst;
516  if (p->comp.opaque) {
517  SwsCompiledOp c = *comp;
518  av_free(p);
519  ret = ff_sws_graph_add_pass(graph, dst->format, dst->width, dst->height,
520  input, c.slice_align, c.func_opaque,
521  NULL, c.priv, c.free, output);
522  if (ret >= 0)
523  (*output)->backend = comp->backend->flags;
524  return ret;
525  }
526 
527  const SwsOp *read = ff_sws_op_list_input(ops);
528  const SwsOp *write = ff_sws_op_list_output(ops);
529  p->planes_in = rw_planes(read);
530  p->planes_out = rw_planes(write);
531  p->pixel_bits_in = rw_pixel_bits(read);
532  p->pixel_bits_out = rw_pixel_bits(write);
533  p->exec_base = (SwsOpExec) {
534  .width = dst->width,
535  .height = dst->height,
536  };
537 
538  const int64_t block_bits_in = (int64_t) comp->block_size * p->pixel_bits_in;
539  const int64_t block_bits_out = (int64_t) comp->block_size * p->pixel_bits_out;
540  if (block_bits_in & 0x7 || block_bits_out & 0x7) {
541  av_log(ctx, AV_LOG_ERROR, "Block size must be a multiple of the pixel size.\n");
542  ret = AVERROR(EINVAL);
543  goto fail;
544  }
545 
546  p->exec_base.block_size_in = block_bits_in >> 3;
547  p->exec_base.block_size_out = block_bits_out >> 3;
548 
549  for (int i = 0; i < 4; i++) {
550  p->idx_in[i] = i < p->planes_in ? ops->plane_src[i] : -1;
551  p->idx_out[i] = i < p->planes_out ? ops->plane_dst[i] : -1;
552  }
553 
554  const SwsFilterWeights *filter = read->rw.kernel;
555  if (read->rw.filter == SWS_OP_FILTER_V) {
556  p->offsets_y = av_refstruct_ref(filter->offsets);
557 
558  /* Compute relative pointer bumps for each output line */
559  int32_t *bump = av_malloc_array(filter->dst_size, sizeof(*bump));
560  if (!bump) {
561  ret = AVERROR(ENOMEM);
562  goto fail;
563  }
564 
565  int line = filter->offsets[0];
566  for (int y = 0; y < filter->dst_size - 1; y++) {
567  int next = filter->offsets[y + 1];
568  bump[y] = next - line - 1;
569  line = next;
570  }
571  bump[filter->dst_size - 1] = 0;
572  p->exec_base.in_bump_y = bump;
573  } else if (read->rw.filter == SWS_OP_FILTER_H) {
574  /* Compute pixel offset map for each output line */
575  const int pixels = FFALIGN(filter->dst_size, p->comp.block_size);
576  int32_t *offset = av_malloc_array(pixels, sizeof(*offset));
577  if (!offset) {
578  ret = AVERROR(ENOMEM);
579  goto fail;
580  }
581  p->exec_base.in_offset_x = offset;
582 
583  for (int x = 0; x < filter->dst_size; x++) {
584  /* Sanity check; if the tap would land on a half-pixel, we cannot
585  * reasonably expect the implementation to know about this. Just
586  * error out in such (theoretical) cases. */
587  int64_t bits = (int64_t) filter->offsets[x] * p->pixel_bits_in;
588  if ((bits & 0x7) || (bits >> 3) > INT32_MAX) {
589  ret = AVERROR(EINVAL);
590  goto fail;
591  }
592  offset[x] = bits >> 3;
593  }
594  for (int x = filter->dst_size; x < pixels; x++)
595  offset[x] = offset[filter->dst_size - 1];
596  p->exec_base.block_size_in = 0; /* ptr does not advance */
597  p->filter_size_h = filter->filter_size;
598  }
599 
600  ret = ff_sws_graph_add_pass(graph, dst->format, dst->width, dst->height,
601  input, comp->slice_align, op_pass_run,
603  if (ret < 0)
604  return ret;
605 
606  (*output)->backend = comp->backend->flags;
607  align_pass(input, comp->block_size, comp->over_read, p->pixel_bits_in);
608  align_pass(*output, comp->block_size, comp->over_write, p->pixel_bits_out);
609  return 0;
610 
611 fail:
612  op_pass_free(p);
613  return ret;
614 }
615 
616 int ff_sws_compile_pass(SwsGraph *graph, const SwsOpBackend *backend,
617  SwsOpList **pops, int flags, SwsPass *input,
618  SwsPass **output)
619 {
620  const int passes_orig = graph->num_passes;
621  SwsContext *ctx = graph->ctx;
622  SwsOpList *ops = *pops;
623  int ret = 0;
624 
625  /* Check if the whole operation graph is an end-to-end no-op */
626  if (ff_sws_op_list_is_noop(ops)) {
627  if (output)
628  *output = input;
629  goto out;
630  }
631 
632  const SwsOp *read = ff_sws_op_list_input(ops);
633  const SwsOp *write = ff_sws_op_list_output(ops);
634  if (!read || !write) {
635  av_log(ctx, AV_LOG_ERROR, "First and last operations must be a read "
636  "and write, respectively.\n");
637  ret = AVERROR(EINVAL);
638  goto out;
639  }
640 
641  if (flags & SWS_OP_FLAG_OPTIMIZE) {
643  if (ret < 0)
644  goto out;
645  av_log(ctx, AV_LOG_DEBUG, "Operation list after optimizing:\n");
647  }
648 
649  ret = compile(graph, backend, ops, input, output);
650  if (ret != AVERROR(ENOTSUP))
651  goto out;
652 
653  av_log(ctx, AV_LOG_DEBUG, "Retrying with separated filter passes.\n");
654  SwsPass *prev = input;
655  bool first = true;
656  while (ops) {
657  SwsOpList *rest;
658  ret = ff_sws_op_list_subpass(ops, &rest);
659  if (ret < 0)
660  goto out;
661 
662  if (first && !rest) {
663  /* No point in compiling an unsplit pass again */
664  ret = AVERROR(ENOTSUP);
665  goto out;
666  }
667 
668  ret = compile(graph, backend, ops, prev, output ? &prev : NULL);
669  if (ret < 0) {
670  ff_sws_op_list_free(&rest);
671  goto out;
672  }
673 
674  ff_sws_op_list_free(&ops);
675  first = false;
676  ops = rest;
677  }
678 
679  if (output) {
680  /* Return last subpass successfully compiled */
681  av_log(ctx, AV_LOG_VERBOSE, "Using %d separate passes.\n",
682  graph->num_passes - passes_orig);
683  *output = prev;
684  }
685 
686 out:
687  if (ret == AVERROR(ENOTSUP)) {
688  av_log(ctx, AV_LOG_WARNING, "No backend found for operations:\n");
690  }
691  if (ret < 0)
692  ff_sws_graph_rollback(graph, passes_orig);
693  ff_sws_op_list_free(&ops);
694  *pops = NULL;
695  return ret;
696 }
flags
const SwsFlags flags[]
Definition: swscale.c:85
SwsOpPass::tail_buf
uint8_t * tail_buf
Definition: ops_dispatch.c:54
copy_lines
static void copy_lines(uint8_t *dst, const size_t dst_stride, const uint8_t *src, const size_t src_stride, const int h, const size_t bytes)
Definition: ops_dispatch.c:362
AV_ROUND_UP
@ AV_ROUND_UP
Round toward +infinity.
Definition: mathematics.h:134
SwsOpPass::tail_buf_size
unsigned int tail_buf_size
Definition: ops_dispatch.c:55
rw_planes
static int rw_planes(const SwsOp *op)
Definition: ops_dispatch.c:473
ff_sws_op_list_free
void ff_sws_op_list_free(SwsOpList **p_ops)
Definition: ops.c:620
AV_LOG_WARNING
#define AV_LOG_WARNING
Something somehow does not look correct.
Definition: log.h:216
SwsGraph::ctx
SwsContext * ctx
Definition: graph.h:123
SwsPass
Represents a single filter pass in the scaling graph.
Definition: graph.h:75
AVERROR
Filter the word “frame” indicates either a video frame or a group of audio as stored in an AVFrame structure Format for each input and each output the list of supported formats For video that means pixel format For audio that means channel sample they are references to shared objects When the negotiation mechanism computes the intersection of the formats supported at each end of a all references to both lists are replaced with a reference to the intersection And when a single format is eventually chosen for a link amongst the remaining all references to the list are updated That means that if a filter requires that its input and output have the same format amongst a supported all it has to do is use a reference to the same list of formats query_formats can leave some formats unset and return AVERROR(EAGAIN) to cause the negotiation mechanism toagain later. That can be used by filters with complex requirements to use the format negotiated on one link to set the formats supported on another. Frame references ownership and permissions
SwsOpPass::idx_in
int idx_in[4]
Definition: ops_dispatch.c:46
SwsOpPass::tail_size_out
int tail_size_out
Definition: ops_dispatch.c:41
ff_sws_op_list_duplicate
SwsOpList * ff_sws_op_list_duplicate(const SwsOpList *ops)
Returns a duplicate of ops, or NULL on OOM.
Definition: ops.c:634
mem_internal.h
out
static FILE * out
Definition: movenc.c:55
SwsOpPass::exec_tail
SwsOpExec exec_tail
Definition: ops_dispatch.c:36
comp
static void comp(unsigned char *dst, ptrdiff_t dst_stride, unsigned char *src, ptrdiff_t src_stride, int add)
Definition: eamad.c:79
SwsOpBackend::flags
SwsBackend flags
Definition: ops_dispatch.h:135
SwsOpExec::in_bump
ptrdiff_t in_bump[4]
Pointer bump, difference between stride and processed line size.
Definition: ops_dispatch.h:51
ff_sws_op_list_input
const SwsOp * ff_sws_op_list_input(const SwsOpList *ops)
Returns the input operation for a given op list, or NULL if there is none (e.g.
Definition: ops.c:671
av_pix_fmt_desc_get
const AVPixFmtDescriptor * av_pix_fmt_desc_get(enum AVPixelFormat pix_fmt)
Definition: pixdesc.c:3456
SwsOpExec::out_stride
ptrdiff_t out_stride[4]
Definition: ops_dispatch.h:42
SwsOpExec::in
const uint8_t * in[4]
Definition: ops_dispatch.h:37
int64_t
long long int64_t
Definition: coverity.c:34
output
filter_frame For filters that do not use the this method is called when a frame is pushed to the filter s input It can be called at any time except in a reentrant way If the input frame is enough to produce output
Definition: filter_design.txt:226
AV_PIX_FMT_FLAG_FLOAT
#define AV_PIX_FMT_FLAG_FLOAT
The pixel format contains IEEE-754 floating point values.
Definition: pixdesc.h:158
ops.h
SwsFilterWeights
Represents a computed filter kernel.
Definition: filters.h:64
SwsOpExec::block_size_in
int32_t block_size_in
Definition: ops_dispatch.h:57
chroma
static av_always_inline void chroma(WaveformContext *s, AVFrame *in, AVFrame *out, int component, int intensity, int offset_y, int offset_x, int column, int mirror, int jobnr, int nb_jobs)
Definition: vf_waveform.c:1639
AV_ROUND_ZERO
@ AV_ROUND_ZERO
Round toward zero.
Definition: mathematics.h:131
AVRounding
AVRounding
Rounding methods.
Definition: mathematics.h:130
AV_LOG_VERBOSE
#define AV_LOG_VERBOSE
Detailed information.
Definition: log.h:226
base
uint8_t base
Definition: vp3data.h:128
filter
void(* filter)(uint8_t *src, int stride, int qscale)
Definition: h263dsp.c:29
SwsFrame::width
int width
Dimensions and format.
Definition: format.h:229
mathematics.h
ops_dispatch.h
FFMAX
#define FFMAX(a, b)
Definition: macros.h:47
SwsOpExec::in_stride
ptrdiff_t in_stride[4]
Definition: ops_dispatch.h:41
SwsOpPass::tail_blocks
size_t tail_blocks
Definition: ops_dispatch.c:53
SwsOpBackend::name
const char * name
Definition: ops_dispatch.h:134
SwsOpPass::idx_out
int idx_out[4]
Definition: ops_dispatch.c:47
ff_sws_pixel_type_size
int ff_sws_pixel_type_size(SwsPixelType type)
Definition: ops.c:77
cpu.h
SwsPass::width
int width
Definition: graph.h:86
ff_sws_op_list_subpass
int ff_sws_op_list_subpass(SwsOpList *ops, SwsOpList **out_rest)
Eliminate SWS_OP_FILTER_* operations by merging them with prior SWS_OP_READ operations.
Definition: ops_optimizer.c:948
SwsOpList::plane_dst
uint8_t plane_dst[4]
Definition: ops.h:266
ff_sws_op_list_print
void ff_sws_op_list_print(void *log, int lev, int lev_extra, const SwsOpList *ops)
Print out the contents of an operation list.
Definition: ops.c:961
ff_sws_op_backends
const SwsOpBackend *const ff_sws_op_backends[]
Definition: ops.c:45
SwsFrame::data
uint8_t * data[4]
Definition: format.h:223
SwsOpBackend::hw_format
enum AVPixelFormat hw_format
If NONE, backend only supports software frames.
Definition: ops_dispatch.h:150
SwsOpPass::memcpy_last
bool memcpy_last
Definition: ops_dispatch.c:51
refstruct.h
get_row_data
static void get_row_data(const SwsOpPass *p, const int y_dst, const uint8_t *in[4], uint8_t *out[4])
Definition: ops_dispatch.c:139
safe_blocks_offset
static size_t safe_blocks_offset(size_t num_blocks, unsigned block_size, ptrdiff_t safe_offset, const int32_t *offset_bytes)
Definition: ops_dispatch.c:186
SwsFrame
Represents a view into a single field of frame data.
Definition: format.h:221
SwsBackend
SwsBackend
Definition: swscale.h:110
first
trying all byte sequences megabyte in length and selecting the best looking sequence will yield cases to try But first
Definition: rate_distortion.txt:12
avassert.h
AV_LOG_TRACE
#define AV_LOG_TRACE
Extremely verbose debugging, useful for libav* development.
Definition: log.h:236
AV_LOG_ERROR
#define AV_LOG_ERROR
Something went wrong and cannot losslessly be recovered.
Definition: log.h:210
SwsFrame::format
enum AVPixelFormat format
Definition: format.h:230
SwsPass::priv
void * priv
Definition: graph.h:111
AV_CEIL_RSHIFT
#define AV_CEIL_RSHIFT(a, b)
Definition: common.h:60
op
static int op(uint8_t **dst, const uint8_t *dst_end, GetByteContext *gb, int pixel, int count, int *x, int width, int linesize)
Perform decode operation.
Definition: anm.c:76
bits
uint8_t bits
Definition: vp3data.h:128
av_assert0
#define av_assert0(cond)
assert() equivalent, that is always enabled.
Definition: avassert.h:42
AV_LOG_DEBUG
#define AV_LOG_DEBUG
Stuff which is only useful for libav* developers.
Definition: log.h:231
SwsGraph::num_passes
int num_passes
Definition: graph.h:134
ctx
static AVFormatContext * ctx
Definition: movenc.c:49
AVPixFmtDescriptor::log2_chroma_w
uint8_t log2_chroma_w
Amount to shift the luma width right to find the chroma width.
Definition: pixdesc.h:80
ff_sws_op_list_output
const SwsOp * ff_sws_op_list_output(const SwsOpList *ops)
Returns the output operation for a given op list, or NULL if there is none.
Definition: ops.c:680
SWS_OP_FILTER_H
@ SWS_OP_FILTER_H
Definition: ops.h:61
av_mallocz
#define av_mallocz(s)
Definition: tableprint_vlc.h:31
SwsOpPass::comp
SwsCompiledOp comp
Definition: ops_dispatch.c:34
SwsOpBackend
Definition: ops_dispatch.h:133
FFABS
#define FFABS(a)
Absolute value, Note, INT_MIN / INT64_MIN result in undefined behavior as they are not representable ...
Definition: common.h:74
if
if(ret)
Definition: filter_design.txt:179
SwsOpExec
Copyright (C) 2026 Niklas Haas.
Definition: ops_dispatch.h:35
fail
#define fail
Definition: test.h:478
ff_sws_op_list_is_noop
bool ff_sws_op_list_is_noop(const SwsOpList *ops)
Returns whether an op list represents a true no-op operation, i.e.
Definition: ops.c:719
op_pass_free
static void op_pass_free(void *ptr)
Definition: ops_dispatch.c:125
NULL
#define NULL
Definition: coverity.c:32
ff_sws_compiled_op_unref
void ff_sws_compiled_op_unref(SwsCompiledOp *comp)
Definition: ops_dispatch.c:117
av_unreachable
#define av_unreachable(msg)
Asserts that are used as compiler optimization hints depending upon ASSERT_LEVEL and NBDEBUG.
Definition: avassert.h:116
av_fast_mallocz
void av_fast_mallocz(void *ptr, unsigned int *size, size_t min_size)
Allocate and clear a buffer, reusing the given one if large enough.
Definition: mem.c:562
SWS_OP_FILTER_V
@ SWS_OP_FILTER_V
Definition: ops.h:62
rw_pixel_bits
static int rw_pixel_bits(const SwsOp *op)
Definition: ops_dispatch.c:478
compile
static int compile(SwsGraph *graph, const SwsOpBackend *backend, const SwsOpList *ops, SwsPass *input, SwsPass **output)
Definition: ops_dispatch.c:500
AVPixFmtDescriptor::flags
uint64_t flags
Combination of AV_PIX_FMT_FLAG_...
Definition: pixdesc.h:94
c
Undefined Behavior In the C some operations are like signed integer dereferencing freed accessing outside allocated Undefined Behavior must not occur in a C it is not safe even if the output of undefined operations is unused The unsafety may seem nit picking but Optimizing compilers have in fact optimized code on the assumption that no undefined Behavior occurs Optimizing code based on wrong assumptions can and has in some cases lead to effects beyond the output of computations The signed integer overflow problem in speed critical code Code which is highly optimized and works with signed integers sometimes has the problem that often the output of the computation does not c
Definition: undefined.txt:32
SwsOpPass::filter_size_h
int filter_size_h
Definition: ops_dispatch.c:49
SwsOpBackend::compile
int(* compile)(SwsContext *ctx, const SwsOpList *ops, SwsCompiledOp *out)
Compile an operation list to an implementation chain.
Definition: ops_dispatch.h:143
AV_ROUND_DOWN
@ AV_ROUND_DOWN
Round toward -infinity.
Definition: mathematics.h:133
SwsPass::height
int height
Definition: graph.h:86
SwsOpExec::block_size_out
int32_t block_size_out
Definition: ops_dispatch.h:58
copy
static void copy(const float *p1, float *p2, const int length)
Definition: vf_vaguedenoiser.c:186
ff_sws_enabled_backends
SwsBackend ff_sws_enabled_backends(const SwsContext *ctx)
Definition: utils.c:71
SwsFrame::height
int height
Definition: format.h:229
dst
uint8_t ptrdiff_t const uint8_t ptrdiff_t int intptr_t intptr_t int int16_t * dst
Definition: dsp.h:87
SwsOpExec::in_sub_x
uint8_t in_sub_x[4]
Definition: ops_dispatch.h:62
i
#define i(width, name, range_min, range_max)
Definition: cbs_h264.c:63
av_err2str
#define av_err2str(errnum)
Convenience macro, the return value should be used only directly in function arguments but never stan...
Definition: error.h:122
size
int size
Definition: twinvq_data.h:10344
op_pass_setup
static int op_pass_setup(const SwsFrame *out, const SwsFrame *in, const SwsPass *pass)
Definition: ops_dispatch.c:196
SwsOpPass::offsets_y
int * offsets_y
Definition: ops_dispatch.c:48
SwsOpList::src
SwsFormat src
Definition: ops.h:263
ff_sws_op_list_update_comps
void ff_sws_op_list_update_comps(SwsOpList *ops)
Infer + propagate known information about components.
Definition: ops.c:341
compile_backend
static int compile_backend(SwsContext *ctx, const SwsOpBackend *backend, const SwsOpList *ops, SwsCompiledOp *out)
Definition: ops_dispatch.c:58
SwsFormat
Definition: format.h:77
SwsCompiledOp::backend
const struct SwsOpBackend * backend
Definition: ops_dispatch.h:115
align
static const uint8_t *BS_FUNC() align(BSCTX *bc)
Skip bits to a byte boundary.
Definition: bitstream_template.h:419
av_refstruct_ref
void * av_refstruct_ref(void *obj)
Create a new reference to an object managed via this API, i.e.
Definition: refstruct.c:140
SwsPass::output
SwsPassBuffer * output
Filter output buffer.
Definition: graph.h:99
offset
it s the only field you need to keep assuming you have a context There is some magic you don t need to care about around this just let it vf offset
Definition: writing_filters.txt:86
line
Definition: graph2dot.c:48
SWS_OP_FLAG_OPTIMIZE
@ SWS_OP_FLAG_OPTIMIZE
Definition: ops.h:342
input
and forward the test the status of outputs and forward it to the corresponding return FFERROR_NOT_READY If the filters stores internally one or a few frame for some input
Definition: filter_design.txt:172
SwsOpPass::planes_in
int planes_in
Definition: ops_dispatch.c:42
av_refstruct_unref
void av_refstruct_unref(void *objp)
Decrement the reference count of the underlying object and automatically free the object if there are...
Definition: refstruct.c:120
SwsOpExec::out
uint8_t * out[4]
Definition: ops_dispatch.h:38
get_lines_in
static int get_lines_in(const SwsOpPass *p, const int y, const int h, const int plane)
Definition: ops_dispatch.c:150
ff_sws_op_list_optimize
int ff_sws_op_list_optimize(SwsOpList *ops)
Fuse compatible and eliminate redundant operations, as well as replacing some operations with more ef...
Definition: ops_optimizer.c:350
SwsPassBuffer::width_align
int width_align
Definition: graph.h:66
SwsOpPass::pixel_bits_out
int pixel_bits_out
Definition: ops_dispatch.c:45
SwsOpExec::in_offset_x
int32_t * in_offset_x
Pixel offset map; for horizontal scaling, in bytes.
Definition: ops_dispatch.h:80
SwsOpPass::planes_out
int planes_out
Definition: ops_dispatch.c:43
AV_ROUND_INF
@ AV_ROUND_INF
Round away from zero.
Definition: mathematics.h:132
av_malloc_array
#define av_malloc_array(a, b)
Definition: tableprint_vlc.h:32
SwsOpPass::tail_size_in
int tail_size_in
Definition: ops_dispatch.c:40
av_cpu_max_align
size_t av_cpu_max_align(void)
Get the maximum data alignment that may be required by FFmpeg.
Definition: cpu.c:287
av_assert1
#define av_assert1(cond)
assert() equivalent, that does not lie in speed critical code.
Definition: avassert.h:58
swscale_internal.h
DECLARE_ALIGNED_32
#define DECLARE_ALIGNED_32(t, v)
Definition: mem_internal.h:113
FFMIN
#define FFMIN(a, b)
Definition: macros.h:49
ops_internal.h
SwsOpPass
Copyright (C) 2025 Niklas Haas.
Definition: ops_dispatch.c:33
pixel_bytes
static size_t pixel_bytes(size_t pixels, int pixel_bits, enum AVRounding rounding)
Definition: ops_dispatch.c:162
SwsOp
Definition: ops.h:208
SwsOpExec::out_sub_y
uint8_t out_sub_y[4]
Definition: ops_dispatch.h:61
SwsOpExec::out_sub_x
uint8_t out_sub_x[4]
Definition: ops_dispatch.h:62
SwsOpPass::memcpy_first
bool memcpy_first
Definition: ops_dispatch.c:50
ff_sws_graph_add_pass
int ff_sws_graph_add_pass(SwsGraph *graph, enum AVPixelFormat fmt, int width, int height, SwsPass *input, int align, SwsPassFunc run, SwsPassSetup setup, void *priv, void(*free_cb)(void *priv), SwsPass **out_pass)
Allocate and add a new pass to the filter graph.
Definition: graph.c:175
ret
ret
Definition: filter_design.txt:187
SwsOpList::dst
SwsFormat dst
Definition: ops.h:263
SwsCompiledOp
Definition: ops_dispatch.h:100
SwsPassBuffer::width_pad
int width_pad
Definition: graph.h:67
SwsFormat::hw_format
enum AVPixelFormat hw_format
Definition: format.h:81
Windows::Graphics::DirectX::Direct3D11::p
IDirect3DDxgiInterfaceAccess _COM_Outptr_ void ** p
Definition: vsrc_gfxcapture_winrt.hpp:53
SwsOpPass::num_blocks
size_t num_blocks
Definition: ops_dispatch.c:37
safe_bytes_pad
static size_t safe_bytes_pad(int linesize, int plane_pad)
Definition: ops_dispatch.c:179
SwsOpPass::exec_base
SwsOpExec exec_base
Definition: ops_dispatch.c:35
ff_sws_compile_pass
int ff_sws_compile_pass(SwsGraph *graph, const SwsOpBackend *backend, SwsOpList **pops, int flags, SwsPass *input, SwsPass **output)
Resolves an operation list to a graph pass.
Definition: ops_dispatch.c:616
SwsOpExec::in_sub_y
uint8_t in_sub_y[4]
Definition: ops_dispatch.h:61
SwsOpPass::pixel_bits_in
int pixel_bits_in
Definition: ops_dispatch.c:44
SwsOpPass::tail_off_in
int tail_off_in
Definition: ops_dispatch.c:38
SwsOpPass::memcpy_out
bool memcpy_out
Definition: ops_dispatch.c:52
mem.h
SwsGraph
Filter graph, which represents a 'baked' pixel format conversion.
Definition: graph.h:122
AVPixFmtDescriptor
Descriptor that unambiguously describes how the bits of a pixel are stored in the up to 4 data planes...
Definition: pixdesc.h:69
align_pass
static void align_pass(SwsPass *pass, int block_size, int over_rw, int pixel_bits)
Definition: ops_dispatch.c:487
av_free
#define av_free(p)
Definition: tableprint_vlc.h:34
FFALIGN
#define FFALIGN(x, a)
Definition: macros.h:78
op_pass_run
static void op_pass_run(const SwsFrame *out, const SwsFrame *in, const int y, const int h, const SwsPass *pass)
Definition: ops_dispatch.c:373
int32_t
int32_t
Definition: audioconvert.c:56
av_log
#define av_log(a,...)
Definition: tableprint_vlc.h:27
SwsPassBuffer
Represents an output buffer for a filter pass.
Definition: graph.h:59
h
h
Definition: vp9dsp_template.c:2070
width
#define width
Definition: dsp.h:89
SwsOpList::plane_src
uint8_t plane_src[4]
Definition: ops.h:266
SwsOpList
Helper struct for representing a list of operations.
Definition: ops.h:258
SwsContext
Main external API structure.
Definition: swscale.h:229
SwsOpPass::tail_off_out
int tail_off_out
Definition: ops_dispatch.c:39
SwsFrame::linesize
int linesize[4]
Definition: format.h:224
AVPixFmtDescriptor::log2_chroma_h
uint8_t log2_chroma_h
Amount to shift the luma height right to find the chroma height.
Definition: pixdesc.h:89
src
#define src
Definition: vp8dsp.c:248
SwsOpExec::out_bump
ptrdiff_t out_bump[4]
Definition: ops_dispatch.h:52
read
static uint32_t BS_FUNC() read(BSCTX *bc, unsigned int n)
Return n bits from the buffer, n has to be in the 0-32 range.
Definition: bitstream_template.h:239
ff_sws_ops_compile
int ff_sws_ops_compile(SwsContext *ctx, const SwsOpBackend *backend, const SwsOpList *ops, SwsCompiledOp *out)
Attempt to compile a list of operations using a specific backend, or the best available backend if ba...
Definition: ops_dispatch.c:95
ff_sws_graph_rollback
void ff_sws_graph_rollback(SwsGraph *graph, int since_idx)
Remove all passes added since the given index.
Definition: graph.c:909