FFmpeg
ops_optimizer.c
Go to the documentation of this file.
1 /**
2  * Copyright (C) 2025 Niklas Haas
3  *
4  * This file is part of FFmpeg.
5  *
6  * FFmpeg is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU Lesser General Public
8  * License as published by the Free Software Foundation; either
9  * version 2.1 of the License, or (at your option) any later version.
10  *
11  * FFmpeg is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14  * Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public
17  * License along with FFmpeg; if not, write to the Free Software
18  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19  */
20 
21 #include "libavutil/avassert.h"
22 #include "libavutil/bswap.h"
23 #include "libavutil/rational.h"
24 
25 #include "ops.h"
26 #include "ops_internal.h"
27 
28 #define RET(x) \
29  do { \
30  if ((ret = (x)) < 0) \
31  return ret; \
32  } while (0)
33 
34 /**
35  * Try to commute a clear op with the next operation. Makes any adjustments
36  * to the operations as needed, but does not perform the actual commutation.
37  *
38  * Returns whether successful.
39  */
40 static bool op_commute_clear(SwsOp *op, SwsOp *next)
41 {
42  SwsOp tmp;
43 
44  av_assert1(op->op == SWS_OP_CLEAR);
45  switch (next->op) {
46  case SWS_OP_CONVERT:
47  op->type = next->convert.to;
48  /* fall through */
49  case SWS_OP_LSHIFT:
50  case SWS_OP_RSHIFT:
51  case SWS_OP_DITHER:
52  case SWS_OP_MIN:
53  case SWS_OP_MAX:
54  case SWS_OP_SCALE:
55  case SWS_OP_READ:
56  case SWS_OP_SWIZZLE:
57  ff_sws_apply_op_q(next, op->c.q4);
58  return true;
59  case SWS_OP_SWAP_BYTES:
60  switch (next->type) {
61  case SWS_PIXEL_U16:
62  ff_sws_apply_op_q(next, op->c.q4); /* always works */
63  return true;
64  case SWS_PIXEL_U32:
65  for (int i = 0; i < 4; i++) {
66  uint32_t v = av_bswap32(op->c.q4[i].num);
67  if (v > INT_MAX)
68  return false; /* can't represent as AVRational anymore */
69  tmp.c.q4[i] = Q(v);
70  }
71  op->c = tmp.c;
72  return true;
73  default:
74  return false;
75  }
76  case SWS_OP_INVALID:
77  case SWS_OP_WRITE:
78  case SWS_OP_LINEAR:
79  case SWS_OP_PACK:
80  case SWS_OP_UNPACK:
81  case SWS_OP_CLEAR:
82  return false;
83  case SWS_OP_TYPE_NB:
84  break;
85  }
86 
87  av_unreachable("Invalid operation type!");
88  return false;
89 }
90 
91  /**
92  * Try to commute a swizzle op with the next operation. Makes any adjustments
93  * to the operations as needed, but does not perform the actual commutation.
94  *
95  * Returns whether successful.
96  */
97 static bool op_commute_swizzle(SwsOp *op, SwsOp *next)
98 {
99  bool seen[4] = {0};
100 
101  av_assert1(op->op == SWS_OP_SWIZZLE);
102  switch (next->op) {
103  case SWS_OP_CONVERT:
104  op->type = next->convert.to;
105  /* fall through */
106  case SWS_OP_SWAP_BYTES:
107  case SWS_OP_LSHIFT:
108  case SWS_OP_RSHIFT:
109  case SWS_OP_SCALE:
110  return true;
111 
112  /**
113  * We can commute per-channel ops only if the per-channel constants are the
114  * same for all duplicated channels; e.g.:
115  * SWIZZLE {0, 0, 0, 3}
116  * NEXT {x, x, x, w}
117  * ->
118  * NEXT {x, _, _, w}
119  * SWIZZLE {0, 0, 0, 3}
120  */
121  case SWS_OP_MIN:
122  case SWS_OP_MAX: {
123  const SwsConst c = next->c;
124  for (int i = 0; i < 4; i++) {
125  if (next->comps.unused[i])
126  continue;
127  const int j = op->swizzle.in[i];
128  if (seen[j] && av_cmp_q(next->c.q4[j], c.q4[i]))
129  return false;
130  next->c.q4[j] = c.q4[i];
131  seen[j] = true;
132  }
133  return true;
134  }
135 
136  case SWS_OP_DITHER: {
137  const SwsDitherOp d = next->dither;
138  for (int i = 0; i < 4; i++) {
139  if (next->comps.unused[i])
140  continue;
141  const int j = op->swizzle.in[i];
142  if (seen[j] && next->dither.y_offset[j] != d.y_offset[i])
143  return false;
144  next->dither.y_offset[j] = d.y_offset[i];
145  seen[j] = true;
146  }
147  return true;
148  }
149 
150  case SWS_OP_INVALID:
151  case SWS_OP_READ:
152  case SWS_OP_WRITE:
153  case SWS_OP_SWIZZLE:
154  case SWS_OP_CLEAR:
155  case SWS_OP_LINEAR:
156  case SWS_OP_PACK:
157  case SWS_OP_UNPACK:
158  return false;
159  case SWS_OP_TYPE_NB:
160  break;
161  }
162 
163  av_unreachable("Invalid operation type!");
164  return false;
165 }
166 
167 /* returns log2(x) only if x is a power of two, or 0 otherwise */
168 static int exact_log2(const int x)
169 {
170  int p;
171  if (x <= 0)
172  return 0;
173  p = av_log2(x);
174  return (1 << p) == x ? p : 0;
175 }
176 
177 static int exact_log2_q(const AVRational x)
178 {
179  if (x.den == 1)
180  return exact_log2(x.num);
181  else if (x.num == 1)
182  return -exact_log2(x.den);
183  else
184  return 0;
185 }
186 
187 /**
188  * If a linear operation can be reduced to a scalar multiplication, returns
189  * the corresponding scaling factor, or 0 otherwise.
190  */
191 static bool extract_scalar(const SwsLinearOp *c, SwsComps prev, SwsComps next,
192  SwsConst *out_scale)
193 {
194  SwsConst scale = {0};
195 
196  /* There are components not on the main diagonal */
197  if (c->mask & ~SWS_MASK_DIAG4)
198  return false;
199 
200  for (int i = 0; i < 4; i++) {
201  const AVRational s = c->m[i][i];
202  if ((prev.flags[i] & SWS_COMP_ZERO) || next.unused[i])
203  continue;
204  if (scale.q.den && av_cmp_q(s, scale.q))
205  return false;
206  scale.q = s;
207  }
208 
209  if (scale.q.den)
210  *out_scale = scale;
211  return scale.q.den;
212 }
213 
214 /* Extracts an integer clear operation (subset) from the given linear op. */
216  SwsConst *out_clear)
217 {
218  SwsConst clear = {0};
219  bool ret = false;
220 
221  for (int i = 0; i < 4; i++) {
222  bool const_row = c->m[i][4].den == 1; /* offset is integer */
223  for (int j = 0; j < 4; j++) {
224  const_row &= c->m[i][j].num == 0 || /* scalar is zero */
225  (prev.flags[j] & SWS_COMP_ZERO); /* input is zero */
226  }
227  if (const_row && (c->mask & SWS_MASK_ROW(i))) {
228  clear.q4[i] = c->m[i][4];
229  for (int j = 0; j < 5; j++)
230  c->m[i][j] = Q(i == j);
231  c->mask &= ~SWS_MASK_ROW(i);
232  ret = true;
233  }
234  }
235 
236  if (ret)
237  *out_clear = clear;
238  return ret;
239 }
240 
241 /* Unswizzle a linear operation by aligning single-input rows with
242  * their corresponding diagonal */
243 static bool extract_swizzle(SwsLinearOp *op, SwsComps prev, SwsSwizzleOp *out_swiz)
244 {
245  SwsSwizzleOp swiz = SWS_SWIZZLE(0, 1, 2, 3);
246  SwsLinearOp c = *op;
247 
248  /* Find non-zero coefficients in the main 4x4 matrix */
249  uint32_t nonzero = 0;
250  for (int i = 0; i < 4; i++) {
251  for (int j = 0; j < 4; j++) {
252  if (!c.m[i][j].num || (prev.flags[j] & SWS_COMP_ZERO))
253  continue;
254  nonzero |= SWS_MASK(i, j);
255  }
256  }
257 
258  /* If a value is unique in its row and the target column is
259  * empty, move it there and update the input swizzle */
260  for (int i = 0; i < 4; i++) {
261  if (nonzero & SWS_MASK_COL(i))
262  continue; /* target column is not empty */
263  for (int j = 0; j < 4; j++) {
264  if ((nonzero & SWS_MASK_ROW(i)) == SWS_MASK(i, j)) {
265  /* Move coefficient to the diagonal */
266  c.m[i][i] = c.m[i][j];
267  c.m[i][j] = Q(0);
268  swiz.in[i] = j;
269  break;
270  }
271  }
272  }
273 
274  if (swiz.mask == SWS_SWIZZLE(0, 1, 2, 3).mask)
275  return false; /* no swizzle was identified */
276 
277  c.mask = ff_sws_linear_mask(c);
278  *out_swiz = swiz;
279  *op = c;
280  return true;
281 }
282 
284 {
285  int ret;
286 
287 retry:
289 
290  /* Apply all in-place optimizations (that do not re-order the list) */
291  for (int n = 0; n < ops->num_ops; n++) {
292  SwsOp dummy = {0};
293  SwsOp *op = &ops->ops[n];
294  SwsOp *prev = n ? &ops->ops[n - 1] : &dummy;
295  SwsOp *next = n + 1 < ops->num_ops ? &ops->ops[n + 1] : &dummy;
296 
297  /* common helper variable */
298  bool noop = true;
299 
300  if (next->comps.unused[0] && next->comps.unused[1] &&
301  next->comps.unused[2] && next->comps.unused[3])
302  {
303  /* Remove completely unused operations */
304  ff_sws_op_list_remove_at(ops, n, 1);
305  goto retry;
306  }
307 
308  switch (op->op) {
309  case SWS_OP_READ:
310  /* "Compress" planar reads where not all components are needed */
311  if (!op->rw.packed) {
312  SwsSwizzleOp swiz = SWS_SWIZZLE(0, 1, 2, 3);
313  int nb_planes = 0;
314  for (int i = 0; i < op->rw.elems; i++) {
315  if (next->comps.unused[i]) {
316  swiz.in[i] = 3 - (i - nb_planes); /* map to unused plane */
317  continue;
318  }
319 
320  const int idx = nb_planes++;
321  av_assert1(idx <= i);
322  ops->order_src.in[idx] = ops->order_src.in[i];
323  swiz.in[i] = idx;
324  }
325 
326  if (nb_planes < op->rw.elems) {
327  op->rw.elems = nb_planes;
328  RET(ff_sws_op_list_insert_at(ops, n + 1, &(SwsOp) {
329  .op = SWS_OP_SWIZZLE,
330  .type = op->type,
331  .swizzle = swiz,
332  }));
333  goto retry;
334  }
335  }
336  break;
337 
338  case SWS_OP_SWAP_BYTES:
339  /* Redundant (double) swap */
340  if (next->op == SWS_OP_SWAP_BYTES) {
341  ff_sws_op_list_remove_at(ops, n, 2);
342  goto retry;
343  }
344  break;
345 
346  case SWS_OP_UNPACK:
347  /* Redundant unpack+pack */
348  if (next->op == SWS_OP_PACK && next->type == op->type &&
349  next->pack.pattern[0] == op->pack.pattern[0] &&
350  next->pack.pattern[1] == op->pack.pattern[1] &&
351  next->pack.pattern[2] == op->pack.pattern[2] &&
352  next->pack.pattern[3] == op->pack.pattern[3])
353  {
354  ff_sws_op_list_remove_at(ops, n, 2);
355  goto retry;
356  }
357  break;
358 
359  case SWS_OP_LSHIFT:
360  case SWS_OP_RSHIFT:
361  /* Two shifts in the same direction */
362  if (next->op == op->op) {
363  op->c.u += next->c.u;
364  ff_sws_op_list_remove_at(ops, n + 1, 1);
365  goto retry;
366  }
367 
368  /* No-op shift */
369  if (!op->c.u) {
370  ff_sws_op_list_remove_at(ops, n, 1);
371  goto retry;
372  }
373  break;
374 
375  case SWS_OP_CLEAR:
376  for (int i = 0; i < 4; i++) {
377  if (!op->c.q4[i].den)
378  continue;
379 
380  if ((prev->comps.flags[i] & SWS_COMP_ZERO) &&
381  !(prev->comps.flags[i] & SWS_COMP_GARBAGE) &&
382  op->c.q4[i].num == 0)
383  {
384  /* Redundant clear-to-zero of zero component */
385  op->c.q4[i].den = 0;
386  } else if (next->comps.unused[i]) {
387  /* Unnecessary clear of unused component */
388  op->c.q4[i] = (AVRational) {0, 0};
389  } else if (op->c.q4[i].den) {
390  noop = false;
391  }
392  }
393 
394  if (noop) {
395  ff_sws_op_list_remove_at(ops, n, 1);
396  goto retry;
397  }
398 
399  /* Transitive clear */
400  if (next->op == SWS_OP_CLEAR) {
401  for (int i = 0; i < 4; i++) {
402  if (next->c.q4[i].den)
403  op->c.q4[i] = next->c.q4[i];
404  }
405  ff_sws_op_list_remove_at(ops, n + 1, 1);
406  goto retry;
407  }
408  break;
409 
410  case SWS_OP_SWIZZLE:
411  for (int i = 0; i < 4; i++) {
412  if (next->comps.unused[i])
413  continue;
414  if (op->swizzle.in[i] != i)
415  noop = false;
416  }
417 
418  /* Identity swizzle */
419  if (noop) {
420  ff_sws_op_list_remove_at(ops, n, 1);
421  goto retry;
422  }
423 
424  /* Transitive swizzle */
425  if (next->op == SWS_OP_SWIZZLE) {
426  const SwsSwizzleOp orig = op->swizzle;
427  for (int i = 0; i < 4; i++)
428  op->swizzle.in[i] = orig.in[next->swizzle.in[i]];
429  ff_sws_op_list_remove_at(ops, n + 1, 1);
430  goto retry;
431  }
432 
433  /* Swizzle planes instead of components, if possible */
434  if (prev->op == SWS_OP_READ && !prev->rw.packed) {
435  for (int dst = 0; dst < prev->rw.elems; dst++) {
436  const int src = op->swizzle.in[dst];
437  if (src > dst && src < prev->rw.elems) {
438  FFSWAP(int, ops->order_src.in[dst], ops->order_src.in[src]);
439  for (int i = dst; i < 4; i++) {
440  if (op->swizzle.in[i] == dst)
441  op->swizzle.in[i] = src;
442  else if (op->swizzle.in[i] == src)
443  op->swizzle.in[i] = dst;
444  }
445  goto retry;
446  }
447  }
448  }
449 
450  if (next->op == SWS_OP_WRITE && !next->rw.packed) {
451  for (int dst = 0; dst < next->rw.elems; dst++) {
452  const int src = op->swizzle.in[dst];
453  if (src > dst && src < next->rw.elems) {
454  FFSWAP(int, ops->order_dst.in[dst], ops->order_dst.in[src]);
455  FFSWAP(int, op->swizzle.in[dst], op->swizzle.in[src]);
456  goto retry;
457  }
458  }
459  }
460  break;
461 
462  case SWS_OP_CONVERT:
463  /* No-op conversion */
464  if (op->type == op->convert.to) {
465  ff_sws_op_list_remove_at(ops, n, 1);
466  goto retry;
467  }
468 
469  /* Transitive conversion */
470  if (next->op == SWS_OP_CONVERT &&
471  op->convert.expand == next->convert.expand)
472  {
473  av_assert1(op->convert.to == next->type);
474  op->convert.to = next->convert.to;
475  ff_sws_op_list_remove_at(ops, n + 1, 1);
476  goto retry;
477  }
478 
479  /* Conversion followed by integer expansion */
480  if (next->op == SWS_OP_SCALE && !op->convert.expand &&
481  !av_cmp_q(next->c.q, ff_sws_pixel_expand(op->type, op->convert.to)))
482  {
483  op->convert.expand = true;
484  ff_sws_op_list_remove_at(ops, n + 1, 1);
485  goto retry;
486  }
487  break;
488 
489  case SWS_OP_MIN:
490  for (int i = 0; i < 4; i++) {
491  if (next->comps.unused[i] || !op->c.q4[i].den)
492  continue;
493  if (av_cmp_q(op->c.q4[i], prev->comps.max[i]) < 0)
494  noop = false;
495  }
496 
497  if (noop) {
498  ff_sws_op_list_remove_at(ops, n, 1);
499  goto retry;
500  }
501  break;
502 
503  case SWS_OP_MAX:
504  for (int i = 0; i < 4; i++) {
505  if (next->comps.unused[i] || !op->c.q4[i].den)
506  continue;
507  if (av_cmp_q(prev->comps.min[i], op->c.q4[i]) < 0)
508  noop = false;
509  }
510 
511  if (noop) {
512  ff_sws_op_list_remove_at(ops, n, 1);
513  goto retry;
514  }
515  break;
516 
517  case SWS_OP_DITHER:
518  for (int i = 0; i < 4; i++) {
519  noop &= (prev->comps.flags[i] & SWS_COMP_EXACT) ||
520  next->comps.unused[i];
521  }
522 
523  if (noop) {
524  ff_sws_op_list_remove_at(ops, n, 1);
525  goto retry;
526  }
527  break;
528 
529  case SWS_OP_LINEAR: {
530  SwsSwizzleOp swizzle;
531  SwsConst c;
532 
533  /* No-op (identity) linear operation */
534  if (!op->lin.mask) {
535  ff_sws_op_list_remove_at(ops, n, 1);
536  goto retry;
537  }
538 
539  if (next->op == SWS_OP_LINEAR) {
540  /* 5x5 matrix multiplication after appending [ 0 0 0 0 1 ] */
541  const SwsLinearOp m1 = op->lin;
542  const SwsLinearOp m2 = next->lin;
543  for (int i = 0; i < 4; i++) {
544  for (int j = 0; j < 5; j++) {
545  AVRational sum = Q(0);
546  for (int k = 0; k < 4; k++)
547  sum = av_add_q(sum, av_mul_q(m2.m[i][k], m1.m[k][j]));
548  if (j == 4) /* m1.m[4][j] == 1 */
549  sum = av_add_q(sum, m2.m[i][4]);
550  op->lin.m[i][j] = sum;
551  }
552  }
553  op->lin.mask = ff_sws_linear_mask(op->lin);
554  ff_sws_op_list_remove_at(ops, n + 1, 1);
555  goto retry;
556  }
557 
558  /* Optimize away zero columns */
559  for (int j = 0; j < 4; j++) {
560  const uint32_t col = SWS_MASK_COL(j);
561  if (!(prev->comps.flags[j] & SWS_COMP_ZERO) || !(op->lin.mask & col))
562  continue;
563  for (int i = 0; i < 4; i++)
564  op->lin.m[i][j] = Q(i == j);
565  op->lin.mask &= ~col;
566  goto retry;
567  }
568 
569  /* Optimize away unused rows */
570  for (int i = 0; i < 4; i++) {
571  const uint32_t row = SWS_MASK_ROW(i);
572  if (!next->comps.unused[i] || !(op->lin.mask & row))
573  continue;
574  for (int j = 0; j < 5; j++)
575  op->lin.m[i][j] = Q(i == j);
576  op->lin.mask &= ~row;
577  goto retry;
578  }
579 
580  /* Convert constant rows to explicit clear instruction */
581  if (extract_constant_rows(&op->lin, prev->comps, &c)) {
582  RET(ff_sws_op_list_insert_at(ops, n + 1, &(SwsOp) {
583  .op = SWS_OP_CLEAR,
584  .type = op->type,
585  .comps = op->comps,
586  .c = c,
587  }));
588  goto retry;
589  }
590 
591  /* Multiplication by scalar constant */
592  if (extract_scalar(&op->lin, prev->comps, next->comps, &c)) {
593  op->op = SWS_OP_SCALE;
594  op->c = c;
595  goto retry;
596  }
597 
598  /* Swizzle by fixed pattern */
599  if (extract_swizzle(&op->lin, prev->comps, &swizzle)) {
600  RET(ff_sws_op_list_insert_at(ops, n, &(SwsOp) {
601  .op = SWS_OP_SWIZZLE,
602  .type = op->type,
603  .swizzle = swizzle,
604  }));
605  goto retry;
606  }
607  break;
608  }
609 
610  case SWS_OP_SCALE: {
611  const int factor2 = exact_log2_q(op->c.q);
612 
613  /* No-op scaling */
614  if (op->c.q.num == 1 && op->c.q.den == 1) {
615  ff_sws_op_list_remove_at(ops, n, 1);
616  goto retry;
617  }
618 
619  /* Scaling by exact power of two */
620  if (factor2 && ff_sws_pixel_type_is_int(op->type)) {
621  op->op = factor2 > 0 ? SWS_OP_LSHIFT : SWS_OP_RSHIFT;
622  op->c.u = FFABS(factor2);
623  goto retry;
624  }
625  break;
626  }
627  }
628  }
629 
630  /* Push clears to the back to void any unused components */
631  for (int n = 0; n < ops->num_ops - 1; n++) {
632  SwsOp *op = &ops->ops[n];
633  SwsOp *next = &ops->ops[n + 1];
634 
635  switch (op->op) {
636  case SWS_OP_CLEAR:
637  if (op_commute_clear(op, next)) {
638  FFSWAP(SwsOp, *op, *next);
639  goto retry;
640  }
641  break;
642  }
643  }
644 
645  /* Apply any remaining preferential re-ordering optimizations; do these
646  * last because they are more likely to block other optimizations if done
647  * too aggressively */
648  for (int n = 0; n < ops->num_ops - 1; n++) {
649  SwsOp *op = &ops->ops[n];
650  SwsOp *next = &ops->ops[n + 1];
651 
652  switch (op->op) {
653  case SWS_OP_SWIZZLE: {
654  /* Try to push swizzles towards the output */
655  if (op_commute_swizzle(op, next)) {
656  FFSWAP(SwsOp, *op, *next);
657  goto retry;
658  }
659  break;
660  }
661 
662  case SWS_OP_SCALE:
663  /* Scaling by integer before conversion to int */
664  if (op->c.q.den == 1 && next->op == SWS_OP_CONVERT &&
666  {
667  op->type = next->convert.to;
668  FFSWAP(SwsOp, *op, *next);
669  goto retry;
670  }
671  break;
672  }
673  }
674 
675  return 0;
676 }
677 
678 int ff_sws_solve_shuffle(const SwsOpList *const ops, uint8_t shuffle[],
679  int size, uint8_t clear_val,
680  int *read_bytes, int *write_bytes)
681 {
682  if (!ops->num_ops)
683  return AVERROR(EINVAL);
684 
685  const SwsOp read = ops->ops[0];
686  const int read_size = ff_sws_pixel_type_size(read.type);
687  uint32_t mask[4] = {0};
688 
689  if (read.op != SWS_OP_READ || read.rw.frac ||
690  (!read.rw.packed && read.rw.elems > 1))
691  return AVERROR(ENOTSUP);
692 
693  for (int i = 0; i < read.rw.elems; i++)
694  mask[i] = 0x01010101 * i * read_size + 0x03020100;
695 
696  for (int opidx = 1; opidx < ops->num_ops; opidx++) {
697  const SwsOp *op = &ops->ops[opidx];
698  switch (op->op) {
699  case SWS_OP_SWIZZLE: {
700  uint32_t orig[4] = { mask[0], mask[1], mask[2], mask[3] };
701  for (int i = 0; i < 4; i++)
702  mask[i] = orig[op->swizzle.in[i]];
703  break;
704  }
705 
706  case SWS_OP_SWAP_BYTES:
707  for (int i = 0; i < 4; i++) {
708  switch (ff_sws_pixel_type_size(op->type)) {
709  case 2: mask[i] = av_bswap16(mask[i]); break;
710  case 4: mask[i] = av_bswap32(mask[i]); break;
711  }
712  }
713  break;
714 
715  case SWS_OP_CLEAR:
716  for (int i = 0; i < 4; i++) {
717  if (!op->c.q4[i].den)
718  continue;
719  if (op->c.q4[i].num != 0 || !clear_val)
720  return AVERROR(ENOTSUP);
721  mask[i] = 0x1010101ul * clear_val;
722  }
723  break;
724 
725  case SWS_OP_CONVERT: {
726  if (!op->convert.expand)
727  return AVERROR(ENOTSUP);
728  for (int i = 0; i < 4; i++) {
729  switch (ff_sws_pixel_type_size(op->type)) {
730  case 1: mask[i] = 0x01010101 * (mask[i] & 0xFF); break;
731  case 2: mask[i] = 0x00010001 * (mask[i] & 0xFFFF); break;
732  }
733  }
734  break;
735  }
736 
737  case SWS_OP_WRITE: {
738  if (op->rw.frac || (!op->rw.packed && op->rw.elems > 1))
739  return AVERROR(ENOTSUP);
740 
741  /* Initialize to no-op */
742  memset(shuffle, clear_val, size);
743 
744  const int write_size = ff_sws_pixel_type_size(op->type);
745  const int read_chunk = read.rw.elems * read_size;
746  const int write_chunk = op->rw.elems * write_size;
747  const int num_groups = size / FFMAX(read_chunk, write_chunk);
748  for (int n = 0; n < num_groups; n++) {
749  const int base_in = n * read_chunk;
750  const int base_out = n * write_chunk;
751  for (int i = 0; i < op->rw.elems; i++) {
752  const int offset = base_out + i * write_size;
753  for (int b = 0; b < write_size; b++) {
754  const uint8_t idx = mask[i] >> (b * 8);
755  if (idx != clear_val)
756  shuffle[offset + b] = base_in + idx;
757  }
758  }
759  }
760 
761  *read_bytes = num_groups * read_chunk;
762  *write_bytes = num_groups * write_chunk;
763  return num_groups;
764  }
765 
766  default:
767  return AVERROR(ENOTSUP);
768  }
769  }
770 
771  return AVERROR(EINVAL);
772 }
SWS_OP_READ
@ SWS_OP_READ
Definition: ops.h:47
SWS_PIXEL_U16
@ SWS_PIXEL_U16
Definition: ops.h:33
SwsComps::flags
unsigned flags[4]
Definition: ops.h:90
SWS_OP_SWIZZLE
@ SWS_OP_SWIZZLE
Definition: ops.h:50
AVERROR
Filter the word “frame” indicates either a video frame or a group of audio as stored in an AVFrame structure Format for each input and each output the list of supported formats For video that means pixel format For audio that means channel sample they are references to shared objects When the negotiation mechanism computes the intersection of the formats supported at each end of a all references to both lists are replaced with a reference to the intersection And when a single format is eventually chosen for a link amongst the remaining all references to the list are updated That means that if a filter requires that its input and output have the same format amongst a supported all it has to do is use a reference to the same list of formats query_formats can leave some formats unset and return AVERROR(EAGAIN) to cause the negotiation mechanism toagain later. That can be used by filters with complex requirements to use the format negotiated on one link to set the formats supported on another. Frame references ownership and permissions
SWS_OP_LSHIFT
@ SWS_OP_LSHIFT
Definition: ops.h:55
SWS_OP_UNPACK
@ SWS_OP_UNPACK
Definition: ops.h:53
SwsSwizzleOp::mask
uint32_t mask
Definition: ops.h:126
SwsConst
Definition: ops.h:79
SWS_COMP_ZERO
@ SWS_COMP_ZERO
Definition: ops.h:75
SWS_OP_CLEAR
@ SWS_OP_CLEAR
Definition: ops.h:59
ff_sws_linear_mask
uint32_t ff_sws_linear_mask(const SwsLinearOp c)
Definition: ops.c:595
SwsOp::swizzle
SwsSwizzleOp swizzle
Definition: ops.h:193
SwsLinearOp::m
AVRational m[4][5]
Generalized 5x5 affine transformation: [ Out.x ] = [ A B C D E ] [ Out.y ] = [ F G H I J ] * [ x y z ...
Definition: ops.h:158
SwsComps::unused
bool unused[4]
Definition: ops.h:91
SwsOp::convert
SwsConvertOp convert
Definition: ops.h:194
rational.h
mask
int mask
Definition: mediacodecdec_common.c:154
SwsOp::rw
SwsReadWriteOp rw
Definition: ops.h:191
ops.h
SWS_OP_DITHER
@ SWS_OP_DITHER
Definition: ops.h:67
read_bytes
static void read_bytes(const uint8_t *src, float *dst, int src_stride, int dst_stride, int width, int height, float scale)
Definition: vf_nnedi.c:442
b
#define b
Definition: input.c:42
ff_sws_op_list_optimize
int ff_sws_op_list_optimize(SwsOpList *ops)
Fuse compatible and eliminate redundant operations, as well as replacing some operations with more ef...
Definition: ops_optimizer.c:283
SWS_PIXEL_U32
@ SWS_PIXEL_U32
Definition: ops.h:34
SWS_OP_TYPE_NB
@ SWS_OP_TYPE_NB
Definition: ops.h:69
FFMAX
#define FFMAX(a, b)
Definition: macros.h:47
ff_sws_pixel_type_size
int ff_sws_pixel_type_size(SwsPixelType type)
Definition: ops.c:65
SWS_MASK_ROW
#define SWS_MASK_ROW(I)
Definition: ops.h:164
SwsComps::max
AVRational max[4]
Definition: ops.h:95
SwsOpList::num_ops
int num_ops
Definition: ops.h:224
SWS_MASK_COL
#define SWS_MASK_COL(J)
Definition: ops.h:165
SwsDitherOp
Definition: ops.h:139
dummy
int dummy
Definition: motion.c:64
SwsOp::c
SwsConst c
Definition: ops.h:196
SwsSwizzleOp
Definition: ops.h:120
ff_sws_pixel_type_is_int
bool ff_sws_pixel_type_is_int(SwsPixelType type)
Definition: ops.c:80
AVRational::num
int num
Numerator.
Definition: rational.h:59
SwsOp::op
SwsOpType op
Definition: ops.h:187
Q
#define Q(q)
SWS_OP_SCALE
@ SWS_OP_SCALE
Definition: ops.h:63
avassert.h
SwsDitherOp::y_offset
uint8_t y_offset[4]
Definition: ops.h:142
s
#define s(width, name)
Definition: cbs_vp9.c:198
SWS_SWIZZLE
#define SWS_SWIZZLE(X, Y, Z, W)
Definition: ops.h:132
SwsComps::min
AVRational min[4]
Definition: ops.h:95
read_chunk
static int read_chunk(AVFormatContext *s)
Definition: dhav.c:173
op
static int op(uint8_t **dst, const uint8_t *dst_end, GetByteContext *gb, int pixel, int count, int *x, int width, int linesize)
Perform decode operation.
Definition: anm.c:76
SWS_OP_MIN
@ SWS_OP_MIN
Definition: ops.h:61
exact_log2_q
static int exact_log2_q(const AVRational x)
Definition: ops_optimizer.c:177
ff_sws_pixel_expand
static AVRational ff_sws_pixel_expand(SwsPixelType from, SwsPixelType to)
Definition: ops_internal.h:30
SWS_OP_LINEAR
@ SWS_OP_LINEAR
Definition: ops.h:66
tmp
static uint8_t tmp[40]
Definition: aes_ctr.c:52
FFABS
#define FFABS(a)
Absolute value, Note, INT_MIN / INT64_MIN result in undefined behavior as they are not representable ...
Definition: common.h:74
SWS_OP_PACK
@ SWS_OP_PACK
Definition: ops.h:54
SwsOp::dither
SwsDitherOp dither
Definition: ops.h:195
AVRational
Rational number (pair of numerator and denominator).
Definition: rational.h:58
av_unreachable
#define av_unreachable(msg)
Asserts that are used as compiler optimization hints depending upon ASSERT_LEVEL and NBDEBUG.
Definition: avassert.h:116
SWS_COMP_GARBAGE
@ SWS_COMP_GARBAGE
Definition: ops.h:73
SwsConvertOp::to
SwsPixelType to
Definition: ops.h:135
ff_sws_op_list_remove_at
void ff_sws_op_list_remove_at(SwsOpList *ops, int index, int count)
Definition: ops.c:524
RET
#define RET(x)
Copyright (C) 2025 Niklas Haas.
Definition: ops_optimizer.c:28
SWS_MASK
#define SWS_MASK(I, J)
Definition: ops.h:162
c
Undefined Behavior In the C some operations are like signed integer dereferencing freed accessing outside allocated Undefined Behavior must not occur in a C it is not safe even if the output of undefined operations is unused The unsafety may seem nit picking but Optimizing compilers have in fact optimized code on the assumption that no undefined Behavior occurs Optimizing code based on wrong assumptions can and has in some cases lead to effects beyond the output of computations The signed integer overflow problem in speed critical code Code which is highly optimized and works with signed integers sometimes has the problem that often the output of the computation does not c
Definition: undefined.txt:32
ff_sws_apply_op_q
void ff_sws_apply_op_q(const SwsOp *op, AVRational x[4])
Apply an operation to an AVRational.
Definition: ops.c:108
SwsConvertOp::expand
bool expand
Definition: ops.h:136
SwsOpList::order_dst
SwsSwizzleOp order_dst
Definition: ops.h:227
SwsPackOp::pattern
uint8_t pattern[4]
Packed bits are assumed to be LSB-aligned within the underlying integer type; i.e.
Definition: ops.h:117
SwsConst::q
AVRational q
Definition: ops.h:82
extract_constant_rows
static bool extract_constant_rows(SwsLinearOp *c, SwsComps prev, SwsConst *out_clear)
Definition: ops_optimizer.c:215
dst
uint8_t ptrdiff_t const uint8_t ptrdiff_t int intptr_t intptr_t int int16_t * dst
Definition: dsp.h:87
av_bswap32
#define av_bswap32
Definition: bswap.h:47
SwsOp::type
SwsPixelType type
Definition: ops.h:188
ff_sws_op_list_insert_at
int ff_sws_op_list_insert_at(SwsOpList *ops, int index, SwsOp *op)
Definition: ops.c:534
size
int size
Definition: twinvq_data.h:10344
SWS_OP_RSHIFT
@ SWS_OP_RSHIFT
Definition: ops.h:56
SwsOp::lin
SwsLinearOp lin
Definition: ops.h:190
SWS_OP_INVALID
@ SWS_OP_INVALID
Definition: ops.h:44
extract_scalar
static bool extract_scalar(const SwsLinearOp *c, SwsComps prev, SwsComps next, SwsConst *out_scale)
If a linear operation can be reduced to a scalar multiplication, returns the corresponding scaling fa...
Definition: ops_optimizer.c:191
ff_sws_op_list_update_comps
void ff_sws_op_list_update_comps(SwsOpList *ops)
Infer + propagate known information about components.
Definition: ops.c:225
SWS_OP_WRITE
@ SWS_OP_WRITE
Definition: ops.h:48
offset
it s the only field you need to keep assuming you have a context There is some magic you don t need to care about around this just let it vf offset
Definition: writing_filters.txt:86
SwsOp::comps
SwsComps comps
Metadata about the operation's input/output components.
Definition: ops.h:206
SwsLinearOp
Definition: ops.h:145
noop
#define noop(a)
Definition: h264chroma_template.c:71
i
#define i(width, name, range_min, range_max)
Definition: cbs_h2645.c:256
extract_swizzle
static bool extract_swizzle(SwsLinearOp *op, SwsComps prev, SwsSwizzleOp *out_swiz)
Definition: ops_optimizer.c:243
SwsOpList::ops
SwsOp * ops
Definition: ops.h:223
SwsOpList::order_src
SwsSwizzleOp order_src
Definition: ops.h:227
av_assert1
#define av_assert1(cond)
assert() equivalent, that does not lie in speed critical code.
Definition: avassert.h:58
SwsConst::q4
AVRational q4[4]
Definition: ops.h:81
ops_internal.h
SwsOp
Definition: ops.h:186
write_bytes
static void write_bytes(const float *src, uint8_t *dst, int src_stride, int dst_stride, int width, int height, int depth, float scale)
Definition: vf_nnedi.c:484
av_cmp_q
static int av_cmp_q(AVRational a, AVRational b)
Compare two rationals.
Definition: rational.h:89
ret
ret
Definition: filter_design.txt:187
bswap.h
FFSWAP
#define FFSWAP(type, a, b)
Definition: macros.h:52
SWS_OP_MAX
@ SWS_OP_MAX
Definition: ops.h:62
op_commute_swizzle
static bool op_commute_swizzle(SwsOp *op, SwsOp *next)
Try to commute a swizzle op with the next operation.
Definition: ops_optimizer.c:97
SwsComps
Definition: ops.h:89
SwsConst::u
unsigned u
Definition: ops.h:83
AVRational::den
int den
Denominator.
Definition: rational.h:60
SwsReadWriteOp::packed
bool packed
Definition: ops.h:101
SWS_OP_SWAP_BYTES
@ SWS_OP_SWAP_BYTES
Definition: ops.h:49
ff_sws_solve_shuffle
int ff_sws_solve_shuffle(const SwsOpList *const ops, uint8_t shuffle[], int size, uint8_t clear_val, int *read_bytes, int *write_bytes)
"Solve" an op list into a fixed shuffle mask, with an optional ability to also directly clear the out...
Definition: ops_optimizer.c:678
Windows::Graphics::DirectX::Direct3D11::p
IDirect3DDxgiInterfaceAccess _COM_Outptr_ void ** p
Definition: vsrc_gfxcapture_winrt.hpp:53
av_mul_q
AVRational av_mul_q(AVRational b, AVRational c)
Multiply two rationals.
Definition: rational.c:80
SWS_COMP_EXACT
@ SWS_COMP_EXACT
Definition: ops.h:74
SwsReadWriteOp::elems
uint8_t elems
Definition: ops.h:99
scale
static void scale(int *out, const int *in, const int w, const int h, const int shift)
Definition: intra.c:278
av_add_q
AVRational av_add_q(AVRational b, AVRational c)
Add two rationals.
Definition: rational.c:93
SWS_MASK_DIAG4
@ SWS_MASK_DIAG4
Definition: ops.h:178
SwsSwizzleOp::in
uint8_t in[4]
Definition: ops.h:127
SWS_OP_CONVERT
@ SWS_OP_CONVERT
Definition: ops.h:60
op_commute_clear
static bool op_commute_clear(SwsOp *op, SwsOp *next)
Try to commute a clear op with the next operation.
Definition: ops_optimizer.c:40
SwsOpList
Helper struct for representing a list of operations.
Definition: ops.h:222
av_bswap16
#define av_bswap16
Definition: bswap.h:28
SwsOp::pack
SwsPackOp pack
Definition: ops.h:192
shuffle
static uint64_t shuffle(uint64_t in, const uint8_t *shuffle, int shuffle_len)
Definition: des.c:179
av_log2
int av_log2(unsigned v)
Definition: intmath.c:26
src
#define src
Definition: vp8dsp.c:248
read
static uint32_t BS_FUNC() read(BSCTX *bc, unsigned int n)
Return n bits from the buffer, n has to be in the 0-32 range.
Definition: bitstream_template.h:239
exact_log2
static int exact_log2(const int x)
Definition: ops_optimizer.c:168