FFmpeg
ops_optimizer.c
Go to the documentation of this file.
1 /**
2  * Copyright (C) 2025 Niklas Haas
3  *
4  * This file is part of FFmpeg.
5  *
6  * FFmpeg is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU Lesser General Public
8  * License as published by the Free Software Foundation; either
9  * version 2.1 of the License, or (at your option) any later version.
10  *
11  * FFmpeg is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14  * Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public
17  * License along with FFmpeg; if not, write to the Free Software
18  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19  */
20 
21 #include "libavutil/attributes.h"
22 #include "libavutil/avassert.h"
23 #include "libavutil/bswap.h"
24 #include "libavutil/rational.h"
25 
26 #include "ops.h"
27 #include "ops_internal.h"
28 
29 #define RET(x) \
30  do { \
31  if ((ret = (x)) < 0) \
32  return ret; \
33  } while (0)
34 
35 /**
36  * Try to commute a clear op with the next operation. Makes any adjustments
37  * to the operations as needed, but does not perform the actual commutation.
38  *
39  * Returns whether successful.
40  */
41 static bool op_commute_clear(SwsOp *op, SwsOp *next)
42 {
43  SwsClearOp tmp = {0};
44 
45  av_assert1(op->op == SWS_OP_CLEAR);
46  switch (next->op) {
47  case SWS_OP_CONVERT:
48  op->type = next->convert.to;
50  case SWS_OP_LSHIFT:
51  case SWS_OP_RSHIFT:
52  case SWS_OP_DITHER:
53  case SWS_OP_MIN:
54  case SWS_OP_MAX:
55  case SWS_OP_SCALE:
56  case SWS_OP_READ:
57  ff_sws_apply_op_q(next, op->clear.value);
58  return true;
59  case SWS_OP_FILTER_H:
60  case SWS_OP_FILTER_V:
61  op->type = next->filter.type;
62  return true;
63  case SWS_OP_SWIZZLE:
64  ff_sws_comp_mask_swizzle(&op->clear.mask, &next->swizzle);
65  ff_sws_apply_op_q(next, op->clear.value);
66  return true;
67  case SWS_OP_SWAP_BYTES:
68  switch (next->type) {
69  case SWS_PIXEL_U16:
70  ff_sws_apply_op_q(next, op->clear.value); /* always works */
71  return true;
72  case SWS_PIXEL_U32:
73  for (int i = 0; i < 4; i++) {
74  if (!SWS_COMP_TEST(op->clear.mask, i))
75  continue;
76  uint32_t v = av_bswap32(op->clear.value[i].num);
77  if (v > INT_MAX)
78  return false; /* can't represent as AVRational anymore */
79  tmp.value[i] = Q(v);
80  }
81  op->clear = tmp;
82  return true;
83  default:
84  return false;
85  }
86  case SWS_OP_INVALID:
87  case SWS_OP_WRITE:
88  case SWS_OP_LINEAR:
89  case SWS_OP_PACK:
90  case SWS_OP_UNPACK:
91  case SWS_OP_CLEAR:
92  return false;
93  case SWS_OP_TYPE_NB:
94  break;
95  }
96 
97  av_unreachable("Invalid operation type!");
98  return false;
99 }
100 
101  /**
102  * Try to commute a swizzle op with the next operation. Makes any adjustments
103  * to the operations as needed, but does not perform the actual commutation.
104  *
105  * Returns whether successful.
106  */
107 static bool op_commute_swizzle(SwsOp *op, SwsOp *next)
108 {
109  bool seen[4] = {0};
110 
111  av_assert1(op->op == SWS_OP_SWIZZLE);
112  switch (next->op) {
113  case SWS_OP_CONVERT:
114  op->type = next->convert.to;
116  case SWS_OP_SWAP_BYTES:
117  case SWS_OP_LSHIFT:
118  case SWS_OP_RSHIFT:
119  case SWS_OP_SCALE:
120  return true;
121  case SWS_OP_FILTER_H:
122  case SWS_OP_FILTER_V:
123  op->type = next->filter.type;
124  return true;
125 
126  /**
127  * We can commute per-channel ops only if the per-channel constants are the
128  * same for all duplicated channels; e.g.:
129  * SWIZZLE {0, 0, 0, 3}
130  * NEXT {x, x, x, w}
131  * ->
132  * NEXT {x, _, _, w}
133  * SWIZZLE {0, 0, 0, 3}
134  */
135  case SWS_OP_MIN:
136  case SWS_OP_MAX: {
137  const SwsClampOp c = next->clamp;
138  for (int i = 0; i < 4; i++) {
139  if (!SWS_OP_NEEDED(op, i))
140  continue;
141  const int j = op->swizzle.in[i];
142  if (seen[j] && av_cmp_q(next->clamp.limit[j], c.limit[i]))
143  return false;
144  next->clamp.limit[j] = c.limit[i];
145  seen[j] = true;
146  }
147  return true;
148  }
149 
150  case SWS_OP_DITHER: {
151  const SwsDitherOp d = next->dither;
152  for (int i = 0; i < 4; i++) {
153  if (!SWS_OP_NEEDED(op, i))
154  continue;
155  const int j = op->swizzle.in[i];
156  if (seen[j] && next->dither.y_offset[j] != d.y_offset[i])
157  return false;
158  next->dither.y_offset[j] = d.y_offset[i];
159  seen[j] = true;
160  }
161  return true;
162  }
163 
164  case SWS_OP_INVALID:
165  case SWS_OP_READ:
166  case SWS_OP_WRITE:
167  case SWS_OP_SWIZZLE:
168  case SWS_OP_CLEAR:
169  case SWS_OP_LINEAR:
170  case SWS_OP_PACK:
171  case SWS_OP_UNPACK:
172  return false;
173  case SWS_OP_TYPE_NB:
174  break;
175  }
176 
177  av_unreachable("Invalid operation type!");
178  return false;
179 }
180 
181 /**
182  * Try to commute a filter op with the previous operation. Makes any
183  * adjustments to the operations as needed, but does not perform the actual
184  * commutation.
185  *
186  * Returns whether successful.
187  */
188 static bool op_commute_filter(SwsOp *op, SwsOp *prev)
189 {
190  av_assert0(!ff_sws_pixel_type_is_int(op->filter.type));
191 
192  switch (prev->op) {
193  case SWS_OP_SWIZZLE:
194  case SWS_OP_SCALE:
195  case SWS_OP_LINEAR:
196  case SWS_OP_DITHER:
197  prev->type = op->filter.type;
198  return true;
199  case SWS_OP_CONVERT:
200  case SWS_OP_INVALID:
201  case SWS_OP_READ:
202  case SWS_OP_WRITE:
203  case SWS_OP_SWAP_BYTES:
204  case SWS_OP_UNPACK:
205  case SWS_OP_PACK:
206  case SWS_OP_LSHIFT:
207  case SWS_OP_RSHIFT:
208  case SWS_OP_CLEAR:
209  case SWS_OP_MIN:
210  case SWS_OP_MAX:
211  case SWS_OP_FILTER_H:
212  case SWS_OP_FILTER_V:
213  return false;
214  case SWS_OP_TYPE_NB:
215  break;
216  }
217 
218  av_unreachable("Invalid operation type!");
219  return false;
220 }
221 
222 /* returns log2(x) only if x is a power of two, or 0 otherwise */
223 static int exact_log2(const int x)
224 {
225  int p;
226  if (x <= 0)
227  return 0;
228  p = av_log2(x);
229  return (1 << p) == x ? p : 0;
230 }
231 
232 static int exact_log2_q(const AVRational x)
233 {
234  if (x.den == 1)
235  return exact_log2(x.num);
236  else if (x.num == 1)
237  return -exact_log2(x.den);
238  else
239  return 0;
240 }
241 
242 /**
243  * If a linear operation can be reduced to a scalar multiplication, returns
244  * the corresponding scaling factor, or 0 otherwise.
245  */
246 static bool extract_scalar(const SwsLinearOp *c,
247  const SwsComps *comps, const SwsComps *prev,
248  SwsScaleOp *out_scale)
249 {
250  SwsScaleOp scale = {0};
251 
252  /* There are components not on the main diagonal */
253  if (c->mask & ~SWS_MASK_DIAG4)
254  return false;
255 
256  for (int i = 0; i < 4; i++) {
257  const AVRational s = c->m[i][i];
258  if ((prev->flags[i] & SWS_COMP_ZERO) ||
259  (comps->flags[i] & SWS_COMP_GARBAGE))
260  continue;
261  if (scale.factor.den && av_cmp_q(s, scale.factor))
262  return false;
263  scale.factor = s;
264  }
265 
266  if (scale.factor.den)
267  *out_scale = scale;
268  return scale.factor.den;
269 }
270 
271 /* Extracts an integer clear operation (subset) from the given linear op. */
272 static bool extract_constant_rows(SwsLinearOp *c, const SwsComps *prev,
273  SwsClearOp *out_clear)
274 {
275  SwsClearOp clear = {0};
276  bool ret = false;
277 
278  for (int i = 0; i < 4; i++) {
279  bool const_row = c->m[i][4].den == 1; /* offset is integer */
280  for (int j = 0; j < 4; j++) {
281  const_row &= c->m[i][j].num == 0 || /* scalar is zero */
282  (prev->flags[j] & SWS_COMP_ZERO); /* input is zero */
283  }
284  if (const_row && (c->mask & SWS_MASK_ROW(i))) {
285  clear.mask |= SWS_COMP(i);
286  clear.value[i] = c->m[i][4];
287  for (int j = 0; j < 5; j++)
288  c->m[i][j] = Q(i == j);
289  c->mask &= ~SWS_MASK_ROW(i);
290  ret = true;
291  }
292  }
293 
294  if (ret)
295  *out_clear = clear;
296  return ret;
297 }
298 
299 /* Unswizzle a linear operation by aligning single-input rows with
300  * their corresponding diagonal */
301 static bool extract_swizzle(SwsLinearOp *op, const SwsComps *prev,
302  SwsSwizzleOp *out_swiz)
303 {
304  SwsSwizzleOp swiz = SWS_SWIZZLE(0, 1, 2, 3);
305  SwsLinearOp c = *op;
306 
307  /* Find non-zero coefficients in the main 4x4 matrix */
308  uint32_t nonzero = 0;
309  for (int i = 0; i < 4; i++) {
310  for (int j = 0; j < 4; j++) {
311  if (!c.m[i][j].num || (prev->flags[j] & SWS_COMP_ZERO))
312  continue;
313  nonzero |= SWS_MASK(i, j);
314  }
315  }
316 
317  /* If a value is unique in its row and the target column is
318  * empty, move it there and update the input swizzle */
319  for (int i = 0; i < 4; i++) {
320  if (nonzero & SWS_MASK_COL(i))
321  continue; /* target column is not empty */
322  for (int j = 0; j < 4; j++) {
323  if ((nonzero & SWS_MASK_ROW(i)) == SWS_MASK(i, j)) {
324  /* Move coefficient to the diagonal */
325  c.m[i][i] = c.m[i][j];
326  c.m[i][j] = Q(0);
327  swiz.in[i] = j;
328  break;
329  }
330  }
331  }
332 
333  if (swiz.mask == SWS_SWIZZLE(0, 1, 2, 3).mask)
334  return false; /* no swizzle was identified */
335 
336  c.mask = ff_sws_linear_mask(&c);
337  *out_swiz = swiz;
338  *op = c;
339  return true;
340 }
341 
342 static int op_result_is_exact(const SwsOp *op)
343 {
344  for (int i = 0; i < 4; i++) {
345  if (SWS_OP_NEEDED(op, i) && !(op->comps.flags[i] & SWS_COMP_EXACT))
346  return false;
347  }
348 
349  return true;
350 }
351 
353 {
354  int ret;
355 
356 retry:
358 
359  /* Try to push filters towards the input; do this first to unblock
360  * in-place optimizations like linear op fusion */
361  for (int n = 1; n < ops->num_ops; n++) {
362  SwsOp *op = &ops->ops[n];
363  SwsOp *prev = &ops->ops[n - 1];
364 
365  switch (op->op) {
366  case SWS_OP_FILTER_H:
367  case SWS_OP_FILTER_V:
368  if (op_commute_filter(op, prev)) {
369  FFSWAP(SwsOp, *op, *prev);
370  goto retry;
371  }
372 
373  /* Merge filter with prior conversion */
374  if (prev->op == SWS_OP_CONVERT && !prev->convert.expand) {
375  int size_from = ff_sws_pixel_type_size(prev->type);
376  int size_to = ff_sws_pixel_type_size(op->type);
377  av_assert1(prev->convert.to == op->type);
378  if (size_from < size_to) {
379  op->type = prev->type;
380  ff_sws_op_list_remove_at(ops, n - 1, 1);
381  goto retry;
382  }
383  }
384  break;
385  }
386  }
387 
388  /* Apply all in-place optimizations (that do not re-order the list) */
389  for (int n = 0; n < ops->num_ops; n++) {
390  SwsOp dummy = {0};
391  SwsOp *op = &ops->ops[n];
392  SwsOp *prev = n ? &ops->ops[n - 1] : &dummy;
393  SwsOp *next = n + 1 < ops->num_ops ? &ops->ops[n + 1] : &dummy;
394 
395  /* common helper variable */
397  bool noop = true;
398 
399  if (!needed && op->op != SWS_OP_WRITE) {
400  /* Remove any operation whose output is not needed */
401  ff_sws_op_list_remove_at(ops, n, 1);
402  goto retry;
403  }
404 
405  switch (op->op) {
406  case SWS_OP_READ:
407  /* "Compress" planar reads where not all components are needed */
408  if (op->rw.mode == SWS_RW_PLANAR) {
409  SwsSwizzleOp swiz = SWS_SWIZZLE(0, 1, 2, 3);
410  int nb_planes = 0;
411  for (int i = 0; i < op->rw.elems; i++) {
412  if (!SWS_OP_NEEDED(op, i)) {
413  swiz.in[i] = 3 - (i - nb_planes); /* map to unused plane */
414  continue;
415  }
416 
417  const int idx = nb_planes++;
418  av_assert1(idx <= i);
419  ops->plane_src[idx] = ops->plane_src[i];
420  swiz.in[i] = idx;
421  }
422 
423  if (nb_planes < op->rw.elems) {
424  op->rw.elems = nb_planes;
425  RET(ff_sws_op_list_insert_at(ops, n + 1, &(SwsOp) {
426  .op = SWS_OP_SWIZZLE,
427  .type = op->rw.filter.op ? op->rw.filter.type : op->type,
428  .swizzle = swiz,
429  }));
430  goto retry;
431  }
432  }
433  break;
434 
435  case SWS_OP_SWAP_BYTES:
436  /* Redundant (double) swap */
437  if (next->op == SWS_OP_SWAP_BYTES) {
438  ff_sws_op_list_remove_at(ops, n, 2);
439  goto retry;
440  }
441  break;
442 
443  case SWS_OP_UNPACK:
444  /* Redundant unpack+pack */
445  if (next->op == SWS_OP_PACK && next->type == op->type &&
446  next->pack.pattern[0] == op->pack.pattern[0] &&
447  next->pack.pattern[1] == op->pack.pattern[1] &&
448  next->pack.pattern[2] == op->pack.pattern[2] &&
449  next->pack.pattern[3] == op->pack.pattern[3])
450  {
451  ff_sws_op_list_remove_at(ops, n, 2);
452  goto retry;
453  }
454  break;
455 
456  case SWS_OP_LSHIFT:
457  case SWS_OP_RSHIFT:
458  /* Two shifts in the same direction */
459  if (next->op == op->op) {
460  op->shift.amount += next->shift.amount;
461  ff_sws_op_list_remove_at(ops, n + 1, 1);
462  goto retry;
463  }
464 
465  /* No-op shift */
466  if (!op->shift.amount) {
467  ff_sws_op_list_remove_at(ops, n, 1);
468  goto retry;
469  }
470  break;
471 
472  case SWS_OP_CLEAR:
473  for (int i = 0; i < 4; i++) {
474  if (!SWS_COMP_TEST(op->clear.mask, i))
475  continue;
476 
477  if ((prev->comps.flags[i] & SWS_COMP_ZERO) &&
478  !(prev->comps.flags[i] & SWS_COMP_GARBAGE) &&
479  op->clear.value[i].num == 0)
480  {
481  /* Redundant clear-to-zero of zero component */
482  op->clear.mask ^= SWS_COMP(i);
483  } else if (!SWS_OP_NEEDED(op, i)) {
484  /* Unnecessary clear of unused component */
485  op->clear.mask ^= SWS_COMP(i);
486  } else {
487  noop = false;
488  }
489  }
490 
491  if (noop) {
492  ff_sws_op_list_remove_at(ops, n, 1);
493  goto retry;
494  }
495 
496  /* Transitive clear */
497  if (next->op == SWS_OP_CLEAR) {
498  for (int i = 0; i < 4; i++) {
499  if (SWS_COMP_TEST(next->clear.mask, i))
500  op->clear.value[i] = next->clear.value[i];
501  }
502  op->clear.mask |= next->clear.mask;
503  ff_sws_op_list_remove_at(ops, n + 1, 1);
504  goto retry;
505  }
506  break;
507 
508  case SWS_OP_SWIZZLE:
509  for (int i = 0; i < 4; i++) {
510  if (!SWS_OP_NEEDED(op, i))
511  continue;
512  if (op->swizzle.in[i] != i)
513  noop = false;
514  }
515 
516  /* Identity swizzle */
517  if (noop) {
518  ff_sws_op_list_remove_at(ops, n, 1);
519  goto retry;
520  }
521 
522  /* Transitive swizzle */
523  if (next->op == SWS_OP_SWIZZLE) {
524  const SwsSwizzleOp orig = op->swizzle;
525  for (int i = 0; i < 4; i++)
526  op->swizzle.in[i] = orig.in[next->swizzle.in[i]];
527  ff_sws_op_list_remove_at(ops, n + 1, 1);
528  goto retry;
529  }
530 
531  /* Swizzle planes instead of components, if possible */
532  if (prev->op == SWS_OP_READ && prev->rw.mode == SWS_RW_PLANAR) {
533  for (int dst = 0; dst < prev->rw.elems; dst++) {
534  const int src = op->swizzle.in[dst];
535  if (src > dst && src < prev->rw.elems) {
536  FFSWAP(int, ops->plane_src[dst], ops->plane_src[src]);
537  for (int i = dst; i < 4; i++) {
538  if (op->swizzle.in[i] == dst)
539  op->swizzle.in[i] = src;
540  else if (op->swizzle.in[i] == src)
541  op->swizzle.in[i] = dst;
542  }
543  goto retry;
544  }
545  }
546  }
547 
548  if (next->op == SWS_OP_WRITE && next->rw.mode == SWS_RW_PLANAR) {
549  for (int dst = 0; dst < next->rw.elems; dst++) {
550  const int src = op->swizzle.in[dst];
551  if (src > dst && src < next->rw.elems) {
552  FFSWAP(int, ops->plane_dst[dst], ops->plane_dst[src]);
553  FFSWAP(int, op->swizzle.in[dst], op->swizzle.in[src]);
554  goto retry;
555  }
556  }
557  }
558  break;
559 
560  case SWS_OP_CONVERT:
561  /* No-op conversion */
562  if (op->type == op->convert.to) {
563  ff_sws_op_list_remove_at(ops, n, 1);
564  goto retry;
565  }
566 
567  /* Transitive conversion */
568  if (next->op == SWS_OP_CONVERT &&
569  op->convert.expand == next->convert.expand)
570  {
571  av_assert1(op->convert.to == next->type);
572  op->convert.to = next->convert.to;
573  ff_sws_op_list_remove_at(ops, n + 1, 1);
574  goto retry;
575  }
576 
577  /* Conversion followed by integer expansion */
578  if (next->op == SWS_OP_SCALE && !op->convert.expand &&
579  ff_sws_pixel_type_is_int(op->type) &&
580  ff_sws_pixel_type_is_int(op->convert.to) &&
581  !av_cmp_q(next->scale.factor,
582  ff_sws_pixel_expand(op->type, op->convert.to)))
583  {
584  op->convert.expand = true;
585  ff_sws_op_list_remove_at(ops, n + 1, 1);
586  goto retry;
587  }
588  break;
589 
590  case SWS_OP_MIN:
591  for (int i = 0; i < 4; i++) {
592  if (!SWS_OP_NEEDED(op, i) || !op->clamp.limit[i].den)
593  continue;
594  if (av_cmp_q(op->clamp.limit[i], prev->comps.max[i]) < 0)
595  noop = false;
596  }
597 
598  if (noop) {
599  ff_sws_op_list_remove_at(ops, n, 1);
600  goto retry;
601  }
602  break;
603 
604  case SWS_OP_MAX:
605  for (int i = 0; i < 4; i++) {
606  if (!SWS_OP_NEEDED(op, i) || !op->clamp.limit[i].den)
607  continue;
608  if (av_cmp_q(prev->comps.min[i], op->clamp.limit[i]) < 0)
609  noop = false;
610  }
611 
612  if (noop) {
613  ff_sws_op_list_remove_at(ops, n, 1);
614  goto retry;
615  }
616  break;
617 
618  case SWS_OP_DITHER:
619  for (int i = 0; i < 4; i++) {
620  if (op->dither.y_offset[i] < 0)
621  continue;
622  if (!SWS_OP_NEEDED(op, i) || (prev->comps.flags[i] & SWS_COMP_EXACT)) {
623  op->dither.y_offset[i] = -1; /* unnecessary dither */
624  goto retry;
625  } else {
626  noop = false;
627  }
628  }
629 
630  if (noop) {
631  ff_sws_op_list_remove_at(ops, n, 1);
632  goto retry;
633  }
634  break;
635 
636  case SWS_OP_LINEAR: {
637  SwsSwizzleOp swizzle;
638  SwsClearOp clear;
640 
641  /* No-op (identity) linear operation */
642  if (!op->lin.mask) {
643  ff_sws_op_list_remove_at(ops, n, 1);
644  goto retry;
645  }
646 
647  if (next->op == SWS_OP_LINEAR) {
648  /* 5x5 matrix multiplication after appending [ 0 0 0 0 1 ] */
649  const SwsLinearOp m1 = op->lin;
650  const SwsLinearOp m2 = next->lin;
651  for (int i = 0; i < 4; i++) {
652  for (int j = 0; j < 5; j++) {
653  AVRational sum = Q(0);
654  for (int k = 0; k < 4; k++)
655  sum = av_add_q(sum, av_mul_q(m2.m[i][k], m1.m[k][j]));
656  if (j == 4) /* m1.m[4][j] == 1 */
657  sum = av_add_q(sum, m2.m[i][4]);
658  op->lin.m[i][j] = sum;
659  }
660  }
661  op->lin.mask = ff_sws_linear_mask(&op->lin);
662  ff_sws_op_list_remove_at(ops, n + 1, 1);
663  goto retry;
664  }
665 
666  /* Optimize away zero columns */
667  for (int j = 0; j < 4; j++) {
668  const uint32_t col = SWS_MASK_COL(j);
669  if (!(prev->comps.flags[j] & SWS_COMP_ZERO) || !(op->lin.mask & col))
670  continue;
671  for (int i = 0; i < 4; i++)
672  op->lin.m[i][j] = Q(i == j);
673  op->lin.mask &= ~col;
674  goto retry;
675  }
676 
677  /* Optimize away unused rows */
678  for (int i = 0; i < 4; i++) {
679  const uint32_t row = SWS_MASK_ROW(i);
680  if (SWS_OP_NEEDED(op, i) || !(op->lin.mask & row))
681  continue;
682  for (int j = 0; j < 5; j++)
683  op->lin.m[i][j] = Q(i == j);
684  op->lin.mask &= ~row;
685  goto retry;
686  }
687 
688  /* Convert constant rows to explicit clear instruction */
689  if (extract_constant_rows(&op->lin, &prev->comps, &clear)) {
690  RET(ff_sws_op_list_insert_at(ops, n + 1, &(SwsOp) {
691  .op = SWS_OP_CLEAR,
692  .type = op->type,
693  .comps = op->comps,
694  .clear = clear,
695  }));
696  goto retry;
697  }
698 
699  /* Multiplication by scalar constant */
700  if (extract_scalar(&op->lin, &op->comps, &prev->comps, &scale)) {
701  op->op = SWS_OP_SCALE;
702  op->scale = scale;
703  goto retry;
704  }
705 
706  /* Swizzle by fixed pattern */
707  if (extract_swizzle(&op->lin, &prev->comps, &swizzle)) {
708  RET(ff_sws_op_list_insert_at(ops, n, &(SwsOp) {
709  .op = SWS_OP_SWIZZLE,
710  .type = op->type,
711  .swizzle = swizzle,
712  }));
713  goto retry;
714  }
715  break;
716  }
717 
718  case SWS_OP_SCALE: {
719  const int factor2 = exact_log2_q(op->scale.factor);
720 
721  /* No-op scaling */
722  if (op->scale.factor.num == 1 && op->scale.factor.den == 1) {
723  ff_sws_op_list_remove_at(ops, n, 1);
724  goto retry;
725  }
726 
727  /* Merge consecutive scaling operations (that don't overflow) */
728  if (next->op == SWS_OP_SCALE) {
729  int64_t p = op->scale.factor.num * (int64_t) next->scale.factor.num;
730  int64_t q = op->scale.factor.den * (int64_t) next->scale.factor.den;
731  if (FFABS(p) <= INT_MAX && FFABS(q) <= INT_MAX) {
732  av_reduce(&op->scale.factor.num, &op->scale.factor.den, p, q, INT_MAX);
733  ff_sws_op_list_remove_at(ops, n + 1, 1);
734  goto retry;
735  }
736  }
737 
738  /* Scaling by exact power of two */
739  if (factor2 && ff_sws_pixel_type_is_int(op->type)) {
740  op->op = factor2 > 0 ? SWS_OP_LSHIFT : SWS_OP_RSHIFT;
741  op->shift.amount = FFABS(factor2);
742  goto retry;
743  }
744  break;
745  }
746 
747  case SWS_OP_FILTER_H:
748  case SWS_OP_FILTER_V:
749  /* Merge with prior simple planar read */
750  if (prev->op == SWS_OP_READ && !prev->rw.filter.op &&
751  prev->rw.mode == SWS_RW_PLANAR && !prev->rw.frac) {
752  prev->rw.filter.op = op->op;
753  prev->rw.filter.kernel = av_refstruct_ref(op->filter.kernel);
754  prev->rw.filter.type = op->filter.type;
755  ff_sws_op_list_remove_at(ops, n, 1);
756  goto retry;
757  }
758  break;
759  }
760  }
761 
762  /* Push clears to the back to void any unused components */
763  for (int n = 0; n < ops->num_ops - 1; n++) {
764  SwsOp *op = &ops->ops[n];
765  SwsOp *next = &ops->ops[n + 1];
766 
767  switch (op->op) {
768  case SWS_OP_CLEAR:
769  if (op_commute_clear(op, next)) {
770  FFSWAP(SwsOp, *op, *next);
771  goto retry;
772  }
773  break;
774  }
775  }
776 
777  /* Apply any remaining preferential re-ordering optimizations; do these
778  * last because they are more likely to block other optimizations if done
779  * too aggressively */
780  for (int n = 0; n < ops->num_ops - 1; n++) {
781  SwsOp *op = &ops->ops[n];
782  SwsOp *next = &ops->ops[n + 1];
783 
784  switch (op->op) {
785  case SWS_OP_SWIZZLE: {
786  /* Try to push swizzles towards the output */
787  if (op_commute_swizzle(op, next)) {
788  FFSWAP(SwsOp, *op, *next);
789  goto retry;
790  }
791  break;
792  }
793 
794  case SWS_OP_SCALE:
795  /* Exact integer multiplication */
796  if (op->scale.factor.den == 1 && next->op == SWS_OP_CONVERT &&
799  {
800  op->type = next->convert.to;
801  FFSWAP(SwsOp, *op, *next);
802  goto retry;
803  }
804  break;
805  }
806  }
807 
808  return 0;
809 }
810 
811 int ff_sws_solve_shuffle(const SwsOpList *const ops, uint8_t shuffle[],
812  int size, uint8_t clear_val,
813  int *read_bytes, int *write_bytes)
814 {
815  if (!ops->num_ops)
816  return AVERROR(EINVAL);
817 
818  const SwsOp *read = ff_sws_op_list_input(ops);
819  if (!read || read->rw.frac || read->rw.filter.op || ff_sws_rw_op_planes(read) > 1)
820  return AVERROR(ENOTSUP);
821 
822  const int read_size = ff_sws_pixel_type_size(read->type);
823  uint32_t mask[4] = {0};
824  for (int i = 0; i < read->rw.elems; i++)
825  mask[i] = 0x01010101 * i * read_size + 0x03020100;
826 
827  for (int opidx = 1; opidx < ops->num_ops; opidx++) {
828  const SwsOp *op = &ops->ops[opidx];
829  switch (op->op) {
830  case SWS_OP_SWIZZLE: {
831  uint32_t orig[4] = { mask[0], mask[1], mask[2], mask[3] };
832  for (int i = 0; i < 4; i++)
833  mask[i] = orig[op->swizzle.in[i]];
834  break;
835  }
836 
837  case SWS_OP_SWAP_BYTES:
838  for (int i = 0; i < 4; i++) {
839  switch (ff_sws_pixel_type_size(op->type)) {
840  case 2: mask[i] = av_bswap16(mask[i]); break;
841  case 4: mask[i] = av_bswap32(mask[i]); break;
842  }
843  }
844  break;
845 
846  case SWS_OP_CLEAR:
847  for (int i = 0; i < 4; i++) {
848  if (!SWS_COMP_TEST(op->clear.mask, i))
849  continue;
850  if (op->clear.value[i].num != 0 || !clear_val)
851  return AVERROR(ENOTSUP);
852  mask[i] = 0x1010101ul * clear_val;
853  }
854  break;
855 
856  case SWS_OP_CONVERT: {
857  if (!op->convert.expand)
858  return AVERROR(ENOTSUP);
859  for (int i = 0; i < 4; i++) {
860  switch (ff_sws_pixel_type_size(op->type)) {
861  case 1: mask[i] = 0x01010101 * (mask[i] & 0xFF); break;
862  case 2: mask[i] = 0x00010001 * (mask[i] & 0xFFFF); break;
863  }
864  }
865  break;
866  }
867 
868  case SWS_OP_WRITE: {
869  if (op->rw.frac || op->rw.filter.op || ff_sws_rw_op_planes(op) > 1)
870  return AVERROR(ENOTSUP);
871 
872  /* Initialize to no-op */
873  memset(shuffle, clear_val, size);
874 
875  const int write_size = ff_sws_pixel_type_size(op->type);
876  const int read_chunk = read->rw.elems * read_size;
877  const int write_chunk = op->rw.elems * write_size;
878  const int num_groups = size / FFMAX(read_chunk, write_chunk);
879  for (int n = 0; n < num_groups; n++) {
880  const int base_in = n * read_chunk;
881  const int base_out = n * write_chunk;
882  for (int i = 0; i < op->rw.elems; i++) {
883  const int offset = base_out + i * write_size;
884  for (int b = 0; b < write_size; b++) {
885  const uint8_t idx = mask[i] >> (b * 8);
886  if (idx != clear_val)
887  shuffle[offset + b] = base_in + idx;
888  }
889  }
890  }
891 
892  *read_bytes = num_groups * read_chunk;
893  *write_bytes = num_groups * write_chunk;
894  return num_groups;
895  }
896 
897  default:
898  return AVERROR(ENOTSUP);
899  }
900  }
901 
902  return AVERROR(EINVAL);
903 }
904 
905 /**
906  * Determine a suitable intermediate buffer format for a given combination
907  * of pixel types and number of planes. The exact interpretation of these
908  * formats does not matter at all; since they will only ever be used as
909  * temporary intermediate buffers. We still need to pick *some* format as
910  * a consequence of ff_sws_graph_add_pass() taking an AVPixelFormat for the
911  * output buffer.
912  */
913 static enum AVPixelFormat get_planar_fmt(SwsPixelType type, int nb_planes)
914 {
915  switch (ff_sws_pixel_type_size(type)) {
916  case 1:
917  switch (nb_planes) {
918  case 1: return AV_PIX_FMT_GRAY8;
919  case 2: return AV_PIX_FMT_YUV444P; // FIXME: no 2-plane planar fmt
920  case 3: return AV_PIX_FMT_YUV444P;
921  case 4: return AV_PIX_FMT_YUVA444P;
922  }
923  break;
924  case 2:
925  switch (nb_planes) {
926  case 1: return AV_PIX_FMT_GRAY16;
927  case 2: return AV_PIX_FMT_YUV444P16; // FIXME: no 2-plane planar fmt
928  case 3: return AV_PIX_FMT_YUV444P16;
929  case 4: return AV_PIX_FMT_YUVA444P16;
930  }
931  break;
932  case 4:
933  switch (nb_planes) {
934  case 1: return AV_PIX_FMT_GRAYF32;
935  case 2: return AV_PIX_FMT_GBRPF32; // FIXME: no 2-plane planar fmt
936  case 3: return AV_PIX_FMT_GBRPF32;
937  case 4: return AV_PIX_FMT_GBRAPF32;
938  }
939  break;
940  }
941 
942  av_unreachable("Invalid pixel type or number of planes?");
943  return AV_PIX_FMT_NONE;
944 }
945 
946 static void get_input_size(const SwsOpList *ops, SwsFormat *fmt)
947 {
948  fmt->width = ops->src.width;
949  fmt->height = ops->src.height;
950 
951  const SwsOp *read = ff_sws_op_list_input(ops);
952  if (read && read->rw.filter.op == SWS_OP_FILTER_V) {
953  fmt->height = read->rw.filter.kernel->dst_size;
954  } else if (read && read->rw.filter.op == SWS_OP_FILTER_H) {
955  fmt->width = read->rw.filter.kernel->dst_size;
956  }
957 }
958 
960 {
961  const SwsOp *op;
962  int ret, idx;
963 
964  for (idx = 0; idx < ops1->num_ops; idx++) {
965  op = &ops1->ops[idx];
966  if (op->op == SWS_OP_FILTER_H || op->op == SWS_OP_FILTER_V)
967  break;
968  }
969 
970  if (idx == ops1->num_ops) {
971  *out_rest = NULL;
972  return 0;
973  }
974 
975  av_assert0(idx > 0);
976  const SwsOp *prev = &ops1->ops[idx - 1];
977 
978  SwsOpList *ops2 = ff_sws_op_list_duplicate(ops1);
979  if (!ops2)
980  return AVERROR(ENOMEM);
981 
982  /**
983  * Not all components may be needed; but we need the ones that *are*
984  * used to be contiguous for the write/read operations. So, first
985  * compress them into a linearly ascending list of components
986  */
987  int nb_planes = 0;
988  SwsSwizzleOp swiz_wr = SWS_SWIZZLE(0, 1, 2, 3);
989  SwsSwizzleOp swiz_rd = SWS_SWIZZLE(0, 1, 2, 3);
990  for (int i = 0; i < 4; i++) {
991  if (SWS_OP_NEEDED(prev, i)) {
992  const int o = nb_planes++;
993  swiz_wr.in[o] = i;
994  swiz_rd.in[i] = o;
995  }
996  }
997 
998  /* Determine metadata for the intermediate format */
999  const SwsPixelType type = op->type;
1000  ops2->src.format = get_planar_fmt(type, nb_planes);
1001  ops2->src.desc = av_pix_fmt_desc_get(ops2->src.format);
1002  get_input_size(ops1, &ops2->src);
1003  ops1->dst = ops2->src;
1004 
1005  for (int i = 0; i < nb_planes; i++) {
1006  const int idx = swiz_wr.in[i];
1007  ops1->plane_dst[i] = ops2->plane_src[i] = i;
1008  ops2->comps_src.flags[i] = prev->comps.flags[idx];
1009  ops2->comps_src.min[i] = prev->comps.min[idx];
1010  ops2->comps_src.max[i] = prev->comps.max[idx];
1011  }
1012 
1013  ff_sws_op_list_remove_at(ops1, idx, ops1->num_ops - idx);
1014  ff_sws_op_list_remove_at(ops2, 0, idx);
1015  op = NULL; /* the above command may invalidate op */
1016 
1017  if (swiz_wr.mask != SWS_SWIZZLE(0, 1, 2, 3).mask) {
1018  ret = ff_sws_op_list_append(ops1, &(SwsOp) {
1019  .op = SWS_OP_SWIZZLE,
1020  .type = type,
1021  .swizzle = swiz_wr,
1022  });
1023  if (ret < 0)
1024  goto fail;
1025  }
1026 
1027  ret = ff_sws_op_list_append(ops1, &(SwsOp) {
1028  .op = SWS_OP_WRITE,
1029  .type = type,
1030  .rw.elems = nb_planes,
1031  });
1032  if (ret < 0)
1033  goto fail;
1034 
1035  ret = ff_sws_op_list_insert_at(ops2, 0, &(SwsOp) {
1036  .op = SWS_OP_READ,
1037  .type = type,
1038  .rw.elems = nb_planes,
1039  });
1040  if (ret < 0)
1041  goto fail;
1042 
1043  if (swiz_rd.mask != SWS_SWIZZLE(0, 1, 2, 3).mask) {
1044  ret = ff_sws_op_list_insert_at(ops2, 1, &(SwsOp) {
1045  .op = SWS_OP_SWIZZLE,
1046  .type = type,
1047  .swizzle = swiz_rd,
1048  });
1049  if (ret < 0)
1050  goto fail;
1051  }
1052 
1053  ret = ff_sws_op_list_optimize(ops1);
1054  if (ret < 0)
1055  goto fail;
1056 
1057  ret = ff_sws_op_list_optimize(ops2);
1058  if (ret < 0)
1059  goto fail;
1060 
1061  *out_rest = ops2;
1062  return 0;
1063 
1064 fail:
1065  ff_sws_op_list_free(&ops2);
1066  return ret;
1067 }
SWS_OP_READ
@ SWS_OP_READ
Definition: ops.h:38
ff_sws_op_list_free
void ff_sws_op_list_free(SwsOpList **p_ops)
Definition: ops.c:634
ff_sws_rw_op_planes
int ff_sws_rw_op_planes(const SwsOp *op)
Return the number of planes involved in a read/write operation.
Definition: ops.c:170
AVPixelFormat
AVPixelFormat
Pixel format.
Definition: pixfmt.h:71
SWS_OP_SWIZZLE
@ SWS_OP_SWIZZLE
Definition: ops.h:41
AVERROR
Filter the word “frame” indicates either a video frame or a group of audio as stored in an AVFrame structure Format for each input and each output the list of supported formats For video that means pixel format For audio that means channel sample they are references to shared objects When the negotiation mechanism computes the intersection of the formats supported at each end of a all references to both lists are replaced with a reference to the intersection And when a single format is eventually chosen for a link amongst the remaining all references to the list are updated That means that if a filter requires that its input and output have the same format amongst a supported all it has to do is use a reference to the same list of formats query_formats can leave some formats unset and return AVERROR(EAGAIN) to cause the negotiation mechanism toagain later. That can be used by filters with complex requirements to use the format negotiated on one link to set the formats supported on another. Frame references ownership and permissions
SwsClearOp::value
AVRational value[4]
Definition: ops.h:159
SWS_OP_LSHIFT
@ SWS_OP_LSHIFT
Definition: ops.h:46
SWS_OP_UNPACK
@ SWS_OP_UNPACK
Definition: ops.h:44
ff_sws_op_list_duplicate
SwsOpList * ff_sws_op_list_duplicate(const SwsOpList *ops)
Returns a duplicate of ops, or NULL on OOM.
Definition: ops.c:648
SwsClearOp
Definition: ops.h:157
SWS_RW_PLANAR
@ SWS_RW_PLANAR
Note: 1-component reads are either SWS_RW_PLANAR or SWS_RW_PACKED, depending on the underlying interp...
Definition: ops.h:97
extract_scalar
static bool extract_scalar(const SwsLinearOp *c, const SwsComps *comps, const SwsComps *prev, SwsScaleOp *out_scale)
If a linear operation can be reduced to a scalar multiplication, returns the corresponding scaling fa...
Definition: ops_optimizer.c:246
SwsSwizzleOp::mask
uint32_t mask
Definition: ops.h:144
extract_constant_rows
static bool extract_constant_rows(SwsLinearOp *c, const SwsComps *prev, SwsClearOp *out_clear)
Definition: ops_optimizer.c:272
SwsOpList::comps_src
SwsComps comps_src
Source component metadata associated with pixel values from each corresponding component (in plane/me...
Definition: ops.h:302
ff_sws_op_list_input
const SwsOp * ff_sws_op_list_input(const SwsOpList *ops)
Returns the input operation for a given op list, or NULL if there is none (e.g.
Definition: ops.c:685
av_pix_fmt_desc_get
const AVPixFmtDescriptor * av_pix_fmt_desc_get(enum AVPixelFormat pix_fmt)
Definition: pixdesc.c:3456
SWS_COMP_ZERO
@ SWS_COMP_ZERO
Definition: ops.h:75
SWS_OP_CLEAR
@ SWS_OP_CLEAR
Definition: ops.h:50
SwsOp::swizzle
SwsSwizzleOp swizzle
Definition: ops.h:235
SwsLinearOp::m
AVRational m[4][5]
Generalized 5x5 affine transformation: [ Out.x ] = [ A B C D E ] [ Out.y ] = [ F G H I J ] * [ x y z ...
Definition: ops.h:195
SwsOp::convert
SwsConvertOp convert
Definition: ops.h:238
rational.h
int64_t
long long int64_t
Definition: coverity.c:34
ff_sws_op_list_append
int ff_sws_op_list_append(SwsOpList *ops, SwsOp *op)
These will take over ownership of op and set it to {0}, even on failure.
Definition: ops.c:728
mask
int mask
Definition: mediacodecdec_common.c:154
SwsOp::rw
SwsReadWriteOp rw
Definition: ops.h:233
ops.h
SWS_OP_DITHER
@ SWS_OP_DITHER
Definition: ops.h:58
read_bytes
static void read_bytes(const uint8_t *src, float *dst, int src_stride, int dst_stride, int width, int height, float scale)
Definition: vf_nnedi.c:442
b
#define b
Definition: input.c:43
get_input_size
static void get_input_size(const SwsOpList *ops, SwsFormat *fmt)
Definition: ops_optimizer.c:946
ff_sws_op_list_optimize
int ff_sws_op_list_optimize(SwsOpList *ops)
Fuse compatible and eliminate redundant operations, as well as replacing some operations with more ef...
Definition: ops_optimizer.c:352
SwsClampOp::limit
AVRational limit[4]
Definition: ops.h:168
SWS_OP_TYPE_NB
@ SWS_OP_TYPE_NB
Definition: ops.h:64
FFMAX
#define FFMAX(a, b)
Definition: macros.h:47
ff_sws_pixel_type_size
int ff_sws_pixel_type_size(SwsPixelType type)
Definition: ops.c:77
dummy
static int dummy
Definition: ffplay.c:3751
ff_sws_comp_mask_needed
SwsCompMask ff_sws_comp_mask_needed(const SwsOp *op)
Definition: ops.c:160
SWS_MASK_ROW
#define SWS_MASK_ROW(I)
Definition: ops.h:201
SwsComps::max
AVRational max[4]
Definition: ops.h:84
SwsOpList::plane_dst
uint8_t plane_dst[4]
Definition: ops.h:291
SwsClearOp::mask
SwsCompMask mask
Definition: ops.h:158
SWS_COMP_TEST
#define SWS_COMP_TEST(mask, X)
Definition: uops.h:71
SwsOpList::num_ops
int num_ops
Definition: ops.h:285
SWS_MASK_COL
#define SWS_MASK_COL(J)
Definition: ops.h:202
SwsDitherOp
Definition: ops.h:175
AV_PIX_FMT_YUVA444P16
#define AV_PIX_FMT_YUVA444P16
Definition: pixfmt.h:597
SwsSwizzleOp
Definition: ops.h:138
ff_sws_pixel_type_is_int
bool ff_sws_pixel_type_is_int(SwsPixelType type)
Definition: ops.c:92
type
it s the only field you need to keep assuming you have a context There is some magic you don t need to care about around this just let it vf type
Definition: writing_filters.txt:86
AV_PIX_FMT_GRAY16
#define AV_PIX_FMT_GRAY16
Definition: pixfmt.h:522
SWS_MASK_DIAG4
@ SWS_MASK_DIAG4
Definition: ops.h:215
av_reduce
int av_reduce(int *dst_num, int *dst_den, int64_t num, int64_t den, int64_t max)
Reduce a fraction.
Definition: rational.c:35
AVRational::num
int num
Numerator.
Definition: rational.h:59
SwsOp::op
SwsOpType op
Definition: ops.h:229
Q
#define Q(q)
SWS_OP_SCALE
@ SWS_OP_SCALE
Definition: ops.h:54
avassert.h
SwsOp::clear
SwsClearOp clear
Definition: ops.h:237
SwsFormat::height
int height
Definition: format.h:78
SWS_OP_NEEDED
#define SWS_OP_NEEDED(op, idx)
Definition: ops.h:255
SwsScaleOp::factor
AVRational factor
Definition: ops.h:172
s
#define s(width, name)
Definition: cbs_vp9.c:198
AV_PIX_FMT_YUV444P16
#define AV_PIX_FMT_YUV444P16
Definition: pixfmt.h:552
SWS_SWIZZLE
#define SWS_SWIZZLE(X, Y, Z, W)
Definition: ops.h:150
SwsComps::min
AVRational min[4]
Definition: ops.h:84
read_chunk
static int read_chunk(AVFormatContext *s)
Definition: dhav.c:173
op
static int op(uint8_t **dst, const uint8_t *dst_end, GetByteContext *gb, int pixel, int count, int *x, int width, int linesize)
Perform decode operation.
Definition: anm.c:76
av_assert0
#define av_assert0(cond)
assert() equivalent, that is always enabled.
Definition: avassert.h:42
SWS_OP_MIN
@ SWS_OP_MIN
Definition: ops.h:52
exact_log2_q
static int exact_log2_q(const AVRational x)
Definition: ops_optimizer.c:232
extract_swizzle
static bool extract_swizzle(SwsLinearOp *op, const SwsComps *prev, SwsSwizzleOp *out_swiz)
Definition: ops_optimizer.c:301
ff_sws_pixel_expand
static AVRational ff_sws_pixel_expand(SwsPixelType from, SwsPixelType to)
Definition: ops_internal.h:31
SwsCompMask
uint8_t SwsCompMask
Bit-mask of components.
Definition: uops.h:61
SWS_OP_LINEAR
@ SWS_OP_LINEAR
Definition: ops.h:57
op_commute_filter
static bool op_commute_filter(SwsOp *op, SwsOp *prev)
Try to commute a filter op with the previous operation.
Definition: ops_optimizer.c:188
SWS_OP_FILTER_H
@ SWS_OP_FILTER_H
Definition: ops.h:61
AV_PIX_FMT_GRAYF32
#define AV_PIX_FMT_GRAYF32
Definition: pixfmt.h:582
tmp
static uint8_t tmp[40]
Definition: aes_ctr.c:52
FFABS
#define FFABS(a)
Absolute value, Note, INT_MIN / INT64_MIN result in undefined behavior as they are not representable ...
Definition: common.h:74
SWS_OP_PACK
@ SWS_OP_PACK
Definition: ops.h:45
SwsOp::dither
SwsDitherOp dither
Definition: ops.h:241
SwsReadWriteOp::kernel
SwsFilterWeights * kernel
Definition: ops.h:125
fail
#define fail
Definition: test.h:478
NULL
#define NULL
Definition: coverity.c:32
AVRational
Rational number (pair of numerator and denominator).
Definition: rational.h:58
av_unreachable
#define av_unreachable(msg)
Asserts that are used as compiler optimization hints depending upon ASSERT_LEVEL and NBDEBUG.
Definition: avassert.h:116
SwsReadWriteOp::frac
uint8_t frac
Definition: ops.h:114
av_fallthrough
#define av_fallthrough
Definition: attributes.h:67
SWS_COMP_GARBAGE
@ SWS_COMP_GARBAGE
Definition: ops.h:73
SwsConvertOp::to
SwsPixelType to
Definition: ops.h:163
ff_sws_op_list_subpass
int ff_sws_op_list_subpass(SwsOpList *ops1, SwsOpList **out_rest)
Eliminate SWS_OP_FILTER_* operations by merging them with prior SWS_OP_READ operations.
Definition: ops_optimizer.c:959
SWS_OP_FILTER_V
@ SWS_OP_FILTER_V
Definition: ops.h:62
SwsOp::clamp
SwsClampOp clamp
Definition: ops.h:239
ff_sws_op_list_remove_at
void ff_sws_op_list_remove_at(SwsOpList *ops, int index, int count)
Definition: ops.c:703
attributes.h
RET
#define RET(x)
Copyright (C) 2025 Niklas Haas.
Definition: ops_optimizer.c:29
AV_PIX_FMT_GRAY8
@ AV_PIX_FMT_GRAY8
Y , 8bpp.
Definition: pixfmt.h:81
SWS_MASK
#define SWS_MASK(I, J)
Definition: ops.h:199
SwsPixelType
SwsPixelType
Definition: uops.h:38
c
Undefined Behavior In the C some operations are like signed integer dereferencing freed accessing outside allocated Undefined Behavior must not occur in a C it is not safe even if the output of undefined operations is unused The unsafety may seem nit picking but Optimizing compilers have in fact optimized code on the assumption that no undefined Behavior occurs Optimizing code based on wrong assumptions can and has in some cases lead to effects beyond the output of computations The signed integer overflow problem in speed critical code Code which is highly optimized and works with signed integers sometimes has the problem that often the output of the computation does not c
Definition: undefined.txt:32
ff_sws_apply_op_q
void ff_sws_apply_op_q(const SwsOp *op, AVRational x[4])
Apply an operation to an AVRational.
Definition: ops.c:194
SwsConvertOp::expand
bool expand
Definition: ops.h:164
SwsPackOp::pattern
uint8_t pattern[4]
Packed bits are assumed to be LSB-aligned within the underlying integer type; i.e.
Definition: ops.h:135
ff_sws_comp_mask_swizzle
void ff_sws_comp_mask_swizzle(SwsCompMask *mask, const SwsSwizzleOp *swiz)
Definition: ops.c:147
dst
uint8_t ptrdiff_t const uint8_t ptrdiff_t int intptr_t intptr_t int int16_t * dst
Definition: dsp.h:87
SwsClampOp
Definition: ops.h:167
av_bswap32
#define av_bswap32
Definition: bswap.h:47
i
#define i(width, name, range_min, range_max)
Definition: cbs_h264.c:63
SwsOp::type
SwsPixelType type
Definition: ops.h:230
AV_PIX_FMT_GBRPF32
#define AV_PIX_FMT_GBRPF32
Definition: pixfmt.h:578
ff_sws_op_list_insert_at
int ff_sws_op_list_insert_at(SwsOpList *ops, int index, SwsOp *op)
Definition: ops.c:714
ff_sws_linear_mask
uint32_t ff_sws_linear_mask(const SwsLinearOp *c)
Definition: ops.c:773
size
int size
Definition: twinvq_data.h:10344
SWS_OP_RSHIFT
@ SWS_OP_RSHIFT
Definition: ops.h:47
SwsOp::lin
SwsLinearOp lin
Definition: ops.h:232
SwsOpList::src
SwsFormat src
Definition: ops.h:288
SWS_OP_INVALID
@ SWS_OP_INVALID
Definition: ops.h:35
ff_sws_op_list_update_comps
void ff_sws_op_list_update_comps(SwsOpList *ops)
Infer + propagate known information about components.
Definition: ops.c:355
SwsFormat
Definition: format.h:77
SwsShiftOp::amount
uint8_t amount
Definition: ops.h:154
SWS_OP_WRITE
@ SWS_OP_WRITE
Definition: ops.h:39
SWS_COMP
#define SWS_COMP(X)
Definition: uops.h:70
SWS_PIXEL_U32
@ SWS_PIXEL_U32
Definition: uops.h:42
av_refstruct_ref
void * av_refstruct_ref(void *obj)
Create a new reference to an object managed via this API, i.e.
Definition: refstruct.c:140
AV_PIX_FMT_YUVA444P
@ AV_PIX_FMT_YUVA444P
planar YUV 4:4:4 32bpp, (1 Cr & Cb sample per 1x1 Y & A samples)
Definition: pixfmt.h:174
offset
it s the only field you need to keep assuming you have a context There is some magic you don t need to care about around this just let it vf offset
Definition: writing_filters.txt:86
SwsOp::comps
SwsComps comps
Metadata about the operation's input/output components.
Definition: ops.h:252
SwsScaleOp
Definition: ops.h:171
SwsLinearOp
Definition: ops.h:182
get_planar_fmt
static enum AVPixelFormat get_planar_fmt(SwsPixelType type, int nb_planes)
Determine a suitable intermediate buffer format for a given combination of pixel types and number of ...
Definition: ops_optimizer.c:913
noop
#define noop(a)
Definition: h264chroma_template.c:71
SwsReadWriteOp::op
SwsOpType op
Definition: ops.h:124
SwsFormat::format
enum AVPixelFormat format
Definition: format.h:81
SwsOp::filter
SwsFilterOp filter
Definition: ops.h:242
SwsOpList::ops
SwsOp * ops
Definition: ops.h:284
SwsFilterOp::type
SwsPixelType type
Definition: ops.h:225
SwsReadWriteOp::type
SwsPixelType type
Definition: ops.h:126
SwsFormat::desc
const AVPixFmtDescriptor * desc
Definition: format.h:86
av_assert1
#define av_assert1(cond)
assert() equivalent, that does not lie in speed critical code.
Definition: avassert.h:58
needed
The exact code depends on how similar the blocks are and how related they are to the and needs to apply these operations to the correct inlink or outlink if there are several Macros are available to factor that when no extra processing is needed
Definition: filter_design.txt:212
ops_internal.h
SwsFormat::width
int width
Definition: format.h:78
SwsOp
Definition: ops.h:228
write_bytes
static void write_bytes(const float *src, uint8_t *dst, int src_stride, int dst_stride, int width, int height, int depth, float scale)
Definition: vf_nnedi.c:484
av_cmp_q
static int av_cmp_q(AVRational a, AVRational b)
Compare two rationals.
Definition: rational.h:89
SwsComps::flags
SwsCompFlags flags[4]
Definition: ops.h:80
ret
ret
Definition: filter_design.txt:187
bswap.h
FFSWAP
#define FFSWAP(type, a, b)
Definition: macros.h:52
SwsOpList::dst
SwsFormat dst
Definition: ops.h:288
SWS_OP_MAX
@ SWS_OP_MAX
Definition: ops.h:53
SwsReadWriteOp::filter
struct SwsReadWriteOp::@571 filter
Filter kernel to apply to each plane while sampling.
op_commute_swizzle
static bool op_commute_swizzle(SwsOp *op, SwsOp *next)
Try to commute a swizzle op with the next operation.
Definition: ops_optimizer.c:107
SwsComps
Definition: ops.h:79
AVRational::den
int den
Denominator.
Definition: rational.h:60
AV_PIX_FMT_NONE
@ AV_PIX_FMT_NONE
Definition: pixfmt.h:72
AV_PIX_FMT_GBRAPF32
#define AV_PIX_FMT_GBRAPF32
Definition: pixfmt.h:579
SWS_OP_SWAP_BYTES
@ SWS_OP_SWAP_BYTES
Definition: ops.h:40
op_result_is_exact
static int op_result_is_exact(const SwsOp *op)
Definition: ops_optimizer.c:342
SwsOp::shift
SwsShiftOp shift
Definition: ops.h:236
ff_sws_solve_shuffle
int ff_sws_solve_shuffle(const SwsOpList *const ops, uint8_t shuffle[], int size, uint8_t clear_val, int *read_bytes, int *write_bytes)
"Solve" an op list into a fixed shuffle mask, with an optional ability to also directly clear the out...
Definition: ops_optimizer.c:811
Windows::Graphics::DirectX::Direct3D11::p
IDirect3DDxgiInterfaceAccess _COM_Outptr_ void ** p
Definition: vsrc_gfxcapture_winrt.hpp:53
av_mul_q
AVRational av_mul_q(AVRational b, AVRational c)
Multiply two rationals.
Definition: rational.c:80
AV_PIX_FMT_YUV444P
@ AV_PIX_FMT_YUV444P
planar YUV 4:4:4, 24bpp, (1 Cr & Cb sample per 1x1 Y samples)
Definition: pixfmt.h:78
SWS_COMP_EXACT
@ SWS_COMP_EXACT
Definition: ops.h:74
SwsReadWriteOp::elems
uint8_t elems
Definition: ops.h:113
SwsDitherOp::y_offset
int8_t y_offset[4]
Definition: ops.h:179
scale
static void scale(int *out, const int *in, const int w, const int h, const int shift)
Definition: intra.c:278
av_add_q
AVRational av_add_q(AVRational b, AVRational c)
Add two rationals.
Definition: rational.c:93
SwsSwizzleOp::in
uint8_t in[4]
Definition: ops.h:145
SWS_OP_CONVERT
@ SWS_OP_CONVERT
Definition: ops.h:51
SwsOp::scale
SwsScaleOp scale
Definition: ops.h:240
op_commute_clear
static bool op_commute_clear(SwsOp *op, SwsOp *next)
Try to commute a clear op with the next operation.
Definition: ops_optimizer.c:41
SwsReadWriteOp::mode
SwsReadWriteMode mode
Examples: rgba = 4x u8 packed yuv444p = 3x u8 rgb565 = 1x u16 <- use SWS_OP_UNPACK to unpack monow = ...
Definition: ops.h:112
SwsOpList::plane_src
uint8_t plane_src[4]
Definition: ops.h:291
SwsOpList
Helper struct for representing a list of operations.
Definition: ops.h:283
av_bswap16
#define av_bswap16
Definition: bswap.h:28
SwsOp::pack
SwsPackOp pack
Definition: ops.h:234
SWS_PIXEL_U16
@ SWS_PIXEL_U16
Definition: uops.h:41
shuffle
static uint64_t shuffle(uint64_t in, const uint8_t *shuffle, int shuffle_len)
Definition: des.c:179
av_log2
int av_log2(unsigned v)
Definition: intmath.c:26
src
#define src
Definition: vp8dsp.c:248
read
static uint32_t BS_FUNC() read(BSCTX *bc, unsigned int n)
Return n bits from the buffer, n has to be in the 0-32 range.
Definition: bitstream_template.h:239
exact_log2
static int exact_log2(const int x)
Definition: ops_optimizer.c:223