FFmpeg
sw_ops.c
Go to the documentation of this file.
1 /**
2  * Copyright (C) 2025 Niklas Haas
3  *
4  * This file is part of FFmpeg.
5  *
6  * FFmpeg is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License as published by
8  * the Free Software Foundation; either version 2 of the License, or
9  * (at your option) any later version.
10  *
11  * FFmpeg is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14  * GNU General Public License for more details.
15  *
16  * You should have received a copy of the GNU General Public License along
17  * with FFmpeg; if not, write to the Free Software Foundation, Inc.,
18  * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
19  */
20 
21 #include <string.h>
22 
23 #include "libavutil/avassert.h"
24 #include "libavutil/mem_internal.h"
25 #include "libavutil/refstruct.h"
26 
27 #include "libswscale/ops.h"
29 
30 #include "checkasm.h"
31 
32 enum {
33  LINES = 2,
34  NB_PLANES = 4,
35  PIXELS = 64,
36 };
37 
38 enum {
43 };
44 
45 #define FMT(fmt, ...) tprintf((char[256]) {0}, 256, fmt, __VA_ARGS__)
46 static const char *tprintf(char buf[], size_t size, const char *fmt, ...)
47 {
48  va_list ap;
49  va_start(ap, fmt);
50  vsnprintf(buf, size, fmt, ap);
51  va_end(ap);
52  return buf;
53 }
54 
55 static int rw_pixel_bits(const SwsOp *op)
56 {
57  const int elems = op->rw.packed ? op->rw.elems : 1;
58  const int size = ff_sws_pixel_type_size(op->type);
59  const int bits = 8 >> op->rw.frac;
60  av_assert1(bits >= 1);
61  return elems * size * bits;
62 }
63 
64 static float rndf(void)
65 {
66  union { uint32_t u; float f; } x;
67  do {
68  x.u = rnd();
69  } while (!isnormal(x.f));
70  return x.f;
71 }
72 
73 static void fill32f(float *line, int num, unsigned range)
74 {
75  const float scale = (float) range / UINT32_MAX;
76  for (int i = 0; i < num; i++)
77  line[i] = range ? scale * rnd() : rndf();
78 }
79 
80 static void fill32(uint32_t *line, int num, unsigned range)
81 {
82  for (int i = 0; i < num; i++)
83  line[i] = (range && range < UINT_MAX) ? rnd() % (range + 1) : rnd();
84 }
85 
86 static void fill16(uint16_t *line, int num, unsigned range)
87 {
88  if (!range) {
89  fill32((uint32_t *) line, AV_CEIL_RSHIFT(num, 1), 0);
90  } else {
91  for (int i = 0; i < num; i++)
92  line[i] = rnd() % (range + 1);
93  }
94 }
95 
96 static void fill8(uint8_t *line, int num, unsigned range)
97 {
98  if (!range) {
99  fill32((uint32_t *) line, AV_CEIL_RSHIFT(num, 2), 0);
100  } else {
101  for (int i = 0; i < num; i++)
102  line[i] = rnd() % (range + 1);
103  }
104 }
105 
106 static void check_ops(const char *report, const unsigned ranges[NB_PLANES],
107  const SwsOp *ops)
108 {
110  SwsCompiledOp comp_ref = {0}, comp_new = {0};
111  const SwsOpBackend *backend_new = NULL;
112  SwsOpList oplist = { .ops = (SwsOp *) ops };
113  const SwsOp *read_op, *write_op;
114  static const unsigned def_ranges[4] = {0};
115  if (!ranges)
116  ranges = def_ranges;
117 
118  declare_func(void, const SwsOpExec *, const void *, int bx, int y, int bx_end, int y_end);
119 
120  DECLARE_ALIGNED_64(char, src0)[NB_PLANES][LINES][PIXELS * sizeof(uint32_t[4])];
121  DECLARE_ALIGNED_64(char, src1)[NB_PLANES][LINES][PIXELS * sizeof(uint32_t[4])];
122  DECLARE_ALIGNED_64(char, dst0)[NB_PLANES][LINES][PIXELS * sizeof(uint32_t[4])];
123  DECLARE_ALIGNED_64(char, dst1)[NB_PLANES][LINES][PIXELS * sizeof(uint32_t[4])];
124 
125  if (!ctx)
126  return;
128 
129  read_op = &ops[0];
130  for (oplist.num_ops = 0; ops[oplist.num_ops].op; oplist.num_ops++)
131  write_op = &ops[oplist.num_ops];
132 
133  const int read_size = PIXELS * rw_pixel_bits(read_op) >> 3;
134  const int write_size = PIXELS * rw_pixel_bits(write_op) >> 3;
135 
136  for (int p = 0; p < NB_PLANES; p++) {
137  void *plane = src0[p];
138  switch (read_op->type) {
139  case U8: fill8(plane, sizeof(src0[p]) / sizeof(uint8_t), ranges[p]); break;
140  case U16: fill16(plane, sizeof(src0[p]) / sizeof(uint16_t), ranges[p]); break;
141  case U32: fill32(plane, sizeof(src0[p]) / sizeof(uint32_t), ranges[p]); break;
142  case F32: fill32f(plane, sizeof(src0[p]) / sizeof(uint32_t), ranges[p]); break;
143  }
144  }
145 
146  memcpy(src1, src0, sizeof(src0));
147  memset(dst0, 0, sizeof(dst0));
148  memset(dst1, 0, sizeof(dst1));
149 
150  /* Compile `ops` using both the asm and c backends */
151  for (int n = 0; ff_sws_op_backends[n]; n++) {
152  const SwsOpBackend *backend = ff_sws_op_backends[n];
153  const bool is_ref = !strcmp(backend->name, "c");
154  if (is_ref || !comp_new.func) {
156  int ret = ff_sws_ops_compile_backend(ctx, backend, &oplist, &comp);
157  if (ret == AVERROR(ENOTSUP))
158  continue;
159  else if (ret < 0)
160  fail();
161  else if (PIXELS % comp.block_size != 0)
162  fail();
163 
164  if (is_ref)
165  comp_ref = comp;
166  if (!comp_new.func) {
167  comp_new = comp;
168  backend_new = backend;
169  }
170  }
171  }
172 
173  av_assert0(comp_ref.func && comp_new.func);
174 
175  SwsOpExec exec = {0};
176  exec.width = PIXELS;
177  exec.height = exec.slice_h = 1;
178  for (int i = 0; i < NB_PLANES; i++) {
179  exec.in_stride[i] = sizeof(src0[i][0]);
180  exec.out_stride[i] = sizeof(dst0[i][0]);
181  exec.in_bump[i] = exec.in_stride[i] - read_size;
182  exec.out_bump[i] = exec.out_stride[i] - write_size;
183  }
184 
185  /**
186  * Don't use check_func() because the actual function pointer may be a
187  * wrapper shared by multiple implementations. Instead, take a hash of both
188  * the backend pointer and the active CPU flags.
189  */
190  uintptr_t id = (uintptr_t) backend_new;
191  id ^= (id << 6) + (id >> 2) + 0x9e3779b97f4a7c15 + comp_new.cpu_flags;
192 
194  if (checkasm_check_func((void *) id, "%s", report)) {
195  func_new = comp_new.func;
196  func_ref = comp_ref.func;
197 
198  exec.block_size_in = comp_ref.block_size * rw_pixel_bits(read_op) >> 3;
199  exec.block_size_out = comp_ref.block_size * rw_pixel_bits(write_op) >> 3;
200  for (int i = 0; i < NB_PLANES; i++) {
201  exec.in[i] = (void *) src0[i];
202  exec.out[i] = (void *) dst0[i];
203  }
204  call_ref(&exec, comp_ref.priv, 0, 0, PIXELS / comp_ref.block_size, LINES);
205 
206  exec.block_size_in = comp_new.block_size * rw_pixel_bits(read_op) >> 3;
207  exec.block_size_out = comp_new.block_size * rw_pixel_bits(write_op) >> 3;
208  for (int i = 0; i < NB_PLANES; i++) {
209  exec.in[i] = (void *) src1[i];
210  exec.out[i] = (void *) dst1[i];
211  }
212  call_new(&exec, comp_new.priv, 0, 0, PIXELS / comp_new.block_size, LINES);
213 
214  for (int i = 0; i < NB_PLANES; i++) {
215  const char *name = FMT("%s[%d]", report, i);
216  const int stride = sizeof(dst0[i][0]);
217 
218  switch (write_op->type) {
219  case U8:
220  checkasm_check(uint8_t, (void *) dst0[i], stride,
221  (void *) dst1[i], stride,
222  write_size, LINES, name);
223  break;
224  case U16:
225  checkasm_check(uint16_t, (void *) dst0[i], stride,
226  (void *) dst1[i], stride,
227  write_size >> 1, LINES, name);
228  break;
229  case U32:
230  checkasm_check(uint32_t, (void *) dst0[i], stride,
231  (void *) dst1[i], stride,
232  write_size >> 2, LINES, name);
233  break;
234  case F32:
235  checkasm_check(float_ulp, (void *) dst0[i], stride,
236  (void *) dst1[i], stride,
237  write_size >> 2, LINES, name, 0);
238  break;
239  }
240 
241  if (write_op->rw.packed)
242  break;
243  }
244 
245  bench_new(&exec, comp_new.priv, 0, 0, PIXELS / comp_new.block_size, LINES);
246  }
247 
248  if (comp_new.func != comp_ref.func && comp_new.free)
249  comp_new.free(comp_new.priv);
250  if (comp_ref.free)
251  comp_ref.free(comp_ref.priv);
253 }
254 
255 #define CHECK_RANGES(NAME, RANGES, N_IN, N_OUT, IN, OUT, ...) \
256  do { \
257  check_ops(NAME, RANGES, (SwsOp[]) { \
258  { \
259  .op = SWS_OP_READ, \
260  .type = IN, \
261  .rw.elems = N_IN, \
262  }, \
263  __VA_ARGS__, \
264  { \
265  .op = SWS_OP_WRITE, \
266  .type = OUT, \
267  .rw.elems = N_OUT, \
268  }, {0} \
269  }); \
270  } while (0)
271 
272 #define MK_RANGES(R) ((const unsigned[]) { R, R, R, R })
273 #define CHECK_RANGE(NAME, RANGE, N_IN, N_OUT, IN, OUT, ...) \
274  CHECK_RANGES(NAME, MK_RANGES(RANGE), N_IN, N_OUT, IN, OUT, __VA_ARGS__)
275 
276 #define CHECK_COMMON_RANGE(NAME, RANGE, IN, OUT, ...) \
277  CHECK_RANGE(FMT("%s_p1000", NAME), RANGE, 1, 1, IN, OUT, __VA_ARGS__); \
278  CHECK_RANGE(FMT("%s_p1110", NAME), RANGE, 3, 3, IN, OUT, __VA_ARGS__); \
279  CHECK_RANGE(FMT("%s_p1111", NAME), RANGE, 4, 4, IN, OUT, __VA_ARGS__); \
280  CHECK_RANGE(FMT("%s_p1001", NAME), RANGE, 4, 2, IN, OUT, __VA_ARGS__, { \
281  .op = SWS_OP_SWIZZLE, \
282  .type = OUT, \
283  .swizzle = SWS_SWIZZLE(0, 3, 1, 2), \
284  })
285 
286 #define CHECK(NAME, N_IN, N_OUT, IN, OUT, ...) \
287  CHECK_RANGE(NAME, 0, N_IN, N_OUT, IN, OUT, __VA_ARGS__)
288 
289 #define CHECK_COMMON(NAME, IN, OUT, ...) \
290  CHECK_COMMON_RANGE(NAME, 0, IN, OUT, __VA_ARGS__)
291 
292 static void check_read_write(void)
293 {
294  for (SwsPixelType t = U8; t < SWS_PIXEL_TYPE_NB; t++) {
295  const char *type = ff_sws_pixel_type_name(t);
296  for (int i = 1; i <= 4; i++) {
297  /* Test N->N planar read/write */
298  for (int o = 1; o <= i; o++) {
299  check_ops(FMT("rw_%d_%d_%s", i, o, type), NULL, (SwsOp[]) {
300  {
301  .op = SWS_OP_READ,
302  .type = t,
303  .rw.elems = i,
304  }, {
305  .op = SWS_OP_WRITE,
306  .type = t,
307  .rw.elems = o,
308  }, {0}
309  });
310  }
311 
312  /* Test packed read/write */
313  if (i == 1)
314  continue;
315 
316  check_ops(FMT("read_packed%d_%s", i, type), NULL, (SwsOp[]) {
317  {
318  .op = SWS_OP_READ,
319  .type = t,
320  .rw.elems = i,
321  .rw.packed = true,
322  }, {
323  .op = SWS_OP_WRITE,
324  .type = t,
325  .rw.elems = i,
326  }, {0}
327  });
328 
329  check_ops(FMT("write_packed%d_%s", i, type), NULL, (SwsOp[]) {
330  {
331  .op = SWS_OP_READ,
332  .type = t,
333  .rw.elems = i,
334  }, {
335  .op = SWS_OP_WRITE,
336  .type = t,
337  .rw.elems = i,
338  .rw.packed = true,
339  }, {0}
340  });
341  }
342  }
343 
344  /* Test fractional reads/writes */
345  for (int frac = 1; frac <= 3; frac++) {
346  const int bits = 8 >> frac;
347  const int range = (1 << bits) - 1;
348  if (bits == 2)
349  continue; /* no 2 bit packed formats currently exist */
350 
351  check_ops(FMT("read_frac%d", frac), NULL, (SwsOp[]) {
352  {
353  .op = SWS_OP_READ,
354  .type = U8,
355  .rw.elems = 1,
356  .rw.frac = frac,
357  }, {
358  .op = SWS_OP_WRITE,
359  .type = U8,
360  .rw.elems = 1,
361  }, {0}
362  });
363 
364  check_ops(FMT("write_frac%d", frac), MK_RANGES(range), (SwsOp[]) {
365  {
366  .op = SWS_OP_READ,
367  .type = U8,
368  .rw.elems = 1,
369  }, {
370  .op = SWS_OP_WRITE,
371  .type = U8,
372  .rw.elems = 1,
373  .rw.frac = frac,
374  }, {0}
375  });
376  }
377 }
378 
379 static void check_swap_bytes(void)
380 {
381  CHECK_COMMON("swap_bytes_16", U16, U16, {
382  .op = SWS_OP_SWAP_BYTES,
383  .type = U16,
384  });
385 
386  CHECK_COMMON("swap_bytes_32", U32, U32, {
387  .op = SWS_OP_SWAP_BYTES,
388  .type = U32,
389  });
390 }
391 
392 static void check_pack_unpack(void)
393 {
394  const struct {
396  SwsPackOp op;
397  } patterns[] = {
398  { U8, {{ 3, 3, 2 }}},
399  { U8, {{ 2, 3, 3 }}},
400  { U8, {{ 1, 2, 1 }}},
401  {U16, {{ 5, 6, 5 }}},
402  {U16, {{ 5, 5, 5 }}},
403  {U16, {{ 4, 4, 4 }}},
404  {U32, {{ 2, 10, 10, 10 }}},
405  {U32, {{10, 10, 10, 2 }}},
406  };
407 
408  for (int i = 0; i < FF_ARRAY_ELEMS(patterns); i++) {
409  const SwsPixelType type = patterns[i].type;
410  const SwsPackOp pack = patterns[i].op;
411  const int num = pack.pattern[3] ? 4 : 3;
412  const char *pat = FMT("%d%d%d%d", pack.pattern[0], pack.pattern[1],
413  pack.pattern[2], pack.pattern[3]);
414  const int total = pack.pattern[0] + pack.pattern[1] +
415  pack.pattern[2] + pack.pattern[3];
416  const unsigned ranges[4] = {
417  (1 << pack.pattern[0]) - 1,
418  (1 << pack.pattern[1]) - 1,
419  (1 << pack.pattern[2]) - 1,
420  (1 << pack.pattern[3]) - 1,
421  };
422 
423  CHECK_RANGES(FMT("pack_%s", pat), ranges, num, 1, type, type, {
424  .op = SWS_OP_PACK,
425  .type = type,
426  .pack = pack,
427  });
428 
429  CHECK_RANGE(FMT("unpack_%s", pat), (1 << total) - 1, 1, num, type, type, {
430  .op = SWS_OP_UNPACK,
431  .type = type,
432  .pack = pack,
433  });
434  }
435 }
436 
438 {
439  const unsigned num = rnd();
440  if (ff_sws_pixel_type_is_int(t)) {
441  const unsigned mask = (1 << (ff_sws_pixel_type_size(t) * 8)) - 1;
442  return (AVRational) { num & mask, 1 };
443  } else {
444  const unsigned den = rnd();
445  return (AVRational) { num, den ? den : 1 };
446  }
447 }
448 
449 static void check_clear(void)
450 {
451  for (SwsPixelType t = U8; t < SWS_PIXEL_TYPE_NB; t++) {
452  const char *type = ff_sws_pixel_type_name(t);
453  const int bits = ff_sws_pixel_type_size(t) * 8;
454 
455  /* TODO: AVRational can't fit 32 bit constants */
456  if (bits < 32) {
457  const AVRational chroma = (AVRational) { 1 << (bits - 1), 1};
458  const AVRational alpha = (AVRational) { (1 << bits) - 1, 1};
459  const AVRational zero = (AVRational) { 0, 1};
460  const AVRational none = {0};
461 
462  const SwsConst patterns[] = {
463  /* Zero only */
464  {.q4 = { none, none, none, zero }},
465  {.q4 = { zero, none, none, none }},
466  /* Alpha only */
467  {.q4 = { none, none, none, alpha }},
468  {.q4 = { alpha, none, none, none }},
469  /* Chroma only */
470  {.q4 = { chroma, chroma, none, none }},
471  {.q4 = { none, chroma, chroma, none }},
472  {.q4 = { none, none, chroma, chroma }},
473  {.q4 = { chroma, none, chroma, none }},
474  {.q4 = { none, chroma, none, chroma }},
475  /* Alpha+chroma */
476  {.q4 = { chroma, chroma, none, alpha }},
477  {.q4 = { none, chroma, chroma, alpha }},
478  {.q4 = { alpha, none, chroma, chroma }},
479  {.q4 = { chroma, none, chroma, alpha }},
480  {.q4 = { alpha, chroma, none, chroma }},
481  /* Random values */
482  {.q4 = { none, rndq(t), rndq(t), rndq(t) }},
483  {.q4 = { none, rndq(t), rndq(t), rndq(t) }},
484  {.q4 = { none, rndq(t), rndq(t), rndq(t) }},
485  {.q4 = { none, rndq(t), rndq(t), rndq(t) }},
486  };
487 
488  for (int i = 0; i < FF_ARRAY_ELEMS(patterns); i++) {
489  CHECK(FMT("clear_pattern_%s[%d]", type, i), 4, 4, t, t, {
490  .op = SWS_OP_CLEAR,
491  .type = t,
492  .c = patterns[i],
493  });
494  }
495  } else if (!ff_sws_pixel_type_is_int(t)) {
496  /* Floating point YUV doesn't exist, only alpha needs to be cleared */
497  CHECK(FMT("clear_alpha_%s", type), 4, 4, t, t, {
498  .op = SWS_OP_CLEAR,
499  .type = t,
500  .c.q4[3] = { 0, 1 },
501  });
502  }
503  }
504 }
505 
506 static void check_shift(void)
507 {
508  for (SwsPixelType t = U16; t < SWS_PIXEL_TYPE_NB; t++) {
509  const char *type = ff_sws_pixel_type_name(t);
510  if (!ff_sws_pixel_type_is_int(t))
511  continue;
512 
513  for (int shift = 1; shift <= 8; shift++) {
514  CHECK_COMMON(FMT("lshift%d_%s", shift, type), t, t, {
515  .op = SWS_OP_LSHIFT,
516  .type = t,
517  .c.u = shift,
518  });
519 
520  CHECK_COMMON(FMT("rshift%d_%s", shift, type), t, t, {
521  .op = SWS_OP_RSHIFT,
522  .type = t,
523  .c.u = shift,
524  });
525  }
526  }
527 }
528 
529 static void check_swizzle(void)
530 {
531  for (SwsPixelType t = U8; t < SWS_PIXEL_TYPE_NB; t++) {
532  const char *type = ff_sws_pixel_type_name(t);
533  static const int patterns[][4] = {
534  /* Pure swizzle */
535  {3, 0, 1, 2},
536  {3, 0, 2, 1},
537  {2, 1, 0, 3},
538  {3, 2, 1, 0},
539  {3, 1, 0, 2},
540  {3, 2, 0, 1},
541  {1, 2, 0, 3},
542  {1, 0, 2, 3},
543  {2, 0, 1, 3},
544  {2, 3, 1, 0},
545  {2, 1, 3, 0},
546  {1, 2, 3, 0},
547  {1, 3, 2, 0},
548  {0, 2, 1, 3},
549  {0, 2, 3, 1},
550  {0, 3, 1, 2},
551  {3, 1, 2, 0},
552  {0, 3, 2, 1},
553  /* Luma expansion */
554  {0, 0, 0, 3},
555  {3, 0, 0, 0},
556  {0, 0, 0, 1},
557  {1, 0, 0, 0},
558  };
559 
560  for (int i = 0; i < FF_ARRAY_ELEMS(patterns); i++) {
561  const int x = patterns[i][0], y = patterns[i][1],
562  z = patterns[i][2], w = patterns[i][3];
563  CHECK(FMT("swizzle_%d%d%d%d_%s", x, y, z, w, type), 4, 4, t, t, {
564  .op = SWS_OP_SWIZZLE,
565  .type = t,
566  .swizzle = SWS_SWIZZLE(x, y, z, w),
567  });
568  }
569  }
570 }
571 
572 static void check_convert(void)
573 {
574  for (SwsPixelType i = U8; i < SWS_PIXEL_TYPE_NB; i++) {
575  const char *itype = ff_sws_pixel_type_name(i);
576  const int isize = ff_sws_pixel_type_size(i);
577  for (SwsPixelType o = U8; o < SWS_PIXEL_TYPE_NB; o++) {
578  const char *otype = ff_sws_pixel_type_name(o);
579  const int osize = ff_sws_pixel_type_size(o);
580  const char *name = FMT("convert_%s_%s", itype, otype);
581  if (i == o)
582  continue;
583 
584  if (isize < osize || !ff_sws_pixel_type_is_int(o)) {
585  CHECK_COMMON(name, i, o, {
586  .op = SWS_OP_CONVERT,
587  .type = i,
588  .convert.to = o,
589  });
590  } else if (isize > osize || !ff_sws_pixel_type_is_int(i)) {
591  uint32_t range = (1 << osize * 8) - 1;
593  .op = SWS_OP_CONVERT,
594  .type = i,
595  .convert.to = o,
596  });
597  }
598  }
599  }
600 
601  /* Check expanding conversions */
602  CHECK_COMMON("expand16", U8, U16, {
603  .op = SWS_OP_CONVERT,
604  .type = U8,
605  .convert.to = U16,
606  .convert.expand = true,
607  });
608 
609  CHECK_COMMON("expand32", U8, U32, {
610  .op = SWS_OP_CONVERT,
611  .type = U8,
612  .convert.to = U32,
613  .convert.expand = true,
614  });
615 }
616 
617 static void check_dither(void)
618 {
619  for (SwsPixelType t = F32; t < SWS_PIXEL_TYPE_NB; t++) {
620  const char *type = ff_sws_pixel_type_name(t);
622  continue;
623 
624  /* Test all sizes up to 256x256 */
625  for (int size_log2 = 0; size_log2 <= 8; size_log2++) {
626  const int size = 1 << size_log2;
628  if (!matrix) {
629  fail();
630  return;
631  }
632 
633  if (size == 1) {
634  matrix[0] = (AVRational) { 1, 2 };
635  } else {
636  for (int i = 0; i < size * size; i++)
637  matrix[i] = rndq(t);
638  }
639 
640  CHECK_COMMON(FMT("dither_%dx%d_%s", size, size, type), t, t, {
641  .op = SWS_OP_DITHER,
642  .type = t,
643  .dither.size_log2 = size_log2,
644  .dither.matrix = matrix,
645  });
646 
648  }
649  }
650 }
651 
652 static void check_min_max(void)
653 {
654  for (SwsPixelType t = U8; t < SWS_PIXEL_TYPE_NB; t++) {
655  const char *type = ff_sws_pixel_type_name(t);
656  CHECK_COMMON(FMT("min_%s", type), t, t, {
657  .op = SWS_OP_MIN,
658  .type = t,
659  .c.q4 = { rndq(t), rndq(t), rndq(t), rndq(t) },
660  });
661 
662  CHECK_COMMON(FMT("max_%s", type), t, t, {
663  .op = SWS_OP_MAX,
664  .type = t,
665  .c.q4 = { rndq(t), rndq(t), rndq(t), rndq(t) },
666  });
667  }
668 }
669 
670 static void check_linear(void)
671 {
672  static const struct {
673  const char *name;
674  uint32_t mask;
675  } patterns[] = {
676  { "noop", 0 },
677  { "luma", SWS_MASK_LUMA },
678  { "alpha", SWS_MASK_ALPHA },
679  { "luma+alpha", SWS_MASK_LUMA | SWS_MASK_ALPHA },
680  { "dot3", 0x7 },
681  { "dot4", 0xF },
682  { "row0", SWS_MASK_ROW(0) },
683  { "row0+alpha", SWS_MASK_ROW(0) | SWS_MASK_ALPHA },
684  { "off3", SWS_MASK_OFF3 },
685  { "off3+alpha", SWS_MASK_OFF3 | SWS_MASK_ALPHA },
686  { "diag3", SWS_MASK_DIAG3 },
687  { "diag4", SWS_MASK_DIAG4 },
688  { "diag3+alpha", SWS_MASK_DIAG3 | SWS_MASK_ALPHA },
689  { "diag3+off3", SWS_MASK_DIAG3 | SWS_MASK_OFF3 },
690  { "diag3+off3+alpha", SWS_MASK_DIAG3 | SWS_MASK_OFF3 | SWS_MASK_ALPHA },
691  { "diag4+off4", SWS_MASK_DIAG4 | SWS_MASK_OFF4 },
692  { "matrix3", SWS_MASK_MAT3 },
693  { "matrix3+off3", SWS_MASK_MAT3 | SWS_MASK_OFF3 },
694  { "matrix3+off3+alpha", SWS_MASK_MAT3 | SWS_MASK_OFF3 | SWS_MASK_ALPHA },
695  { "matrix4", SWS_MASK_MAT4 },
696  { "matrix4+off4", SWS_MASK_MAT4 | SWS_MASK_OFF4 },
697  };
698 
699  for (SwsPixelType t = F32; t < SWS_PIXEL_TYPE_NB; t++) {
700  const char *type = ff_sws_pixel_type_name(t);
702  continue;
703 
704  for (int p = 0; p < FF_ARRAY_ELEMS(patterns); p++) {
705  const uint32_t mask = patterns[p].mask;
706  SwsLinearOp lin = { .mask = mask };
707 
708  for (int i = 0; i < 4; i++) {
709  for (int j = 0; j < 5; j++) {
710  if (mask & SWS_MASK(i, j)) {
711  lin.m[i][j] = rndq(t);
712  } else {
713  lin.m[i][j] = (AVRational) { i == j, 1 };
714  }
715  }
716  }
717 
718  CHECK(FMT("linear_%s_%s", patterns[p].name, type), 4, 4, t, t, {
719  .op = SWS_OP_LINEAR,
720  .type = t,
721  .lin = lin,
722  });
723  }
724  }
725 }
726 
727 static void check_scale(void)
728 {
729  for (SwsPixelType t = F32; t < SWS_PIXEL_TYPE_NB; t++) {
730  const char *type = ff_sws_pixel_type_name(t);
731  const int bits = ff_sws_pixel_type_size(t) * 8;
732  if (ff_sws_pixel_type_is_int(t)) {
733  /* Ensure the result won't exceed the value range */
734  const unsigned max = (1 << bits) - 1;
735  const unsigned scale = rnd() & max;
736  const unsigned range = max / (scale ? scale : 1);
737  CHECK_COMMON_RANGE(FMT("scale_%s", type), range, t, t, {
738  .op = SWS_OP_SCALE,
739  .type = t,
740  .c.q = { scale, 1 },
741  });
742  } else {
743  CHECK_COMMON(FMT("scale_%s", type), t, t, {
744  .op = SWS_OP_SCALE,
745  .type = t,
746  .c.q = rndq(t),
747  });
748  }
749  }
750 }
751 
753 {
755  report("read_write");
757  report("swap_bytes");
759  report("pack_unpack");
760  check_clear();
761  report("clear");
762  check_shift();
763  report("shift");
764  check_swizzle();
765  report("swizzle");
766  check_convert();
767  report("convert");
768  check_dither();
769  report("dither");
770  check_min_max();
771  report("min_max");
772  check_linear();
773  report("linear");
774  check_scale();
775  report("scale");
776 }
SWS_OP_READ
@ SWS_OP_READ
Definition: ops.h:48
SWS_PIXEL_U16
@ SWS_PIXEL_U16
Definition: ops.h:33
name
it s the only field you need to keep assuming you have a context There is some magic you don t need to care about around this just let it vf default minimum maximum flags name is the option name
Definition: writing_filters.txt:88
SWS_OP_SWIZZLE
@ SWS_OP_SWIZZLE
Definition: ops.h:58
AVERROR
Filter the word “frame” indicates either a video frame or a group of audio as stored in an AVFrame structure Format for each input and each output the list of supported formats For video that means pixel format For audio that means channel sample they are references to shared objects When the negotiation mechanism computes the intersection of the formats supported at each end of a all references to both lists are replaced with a reference to the intersection And when a single format is eventually chosen for a link amongst the remaining all references to the list are updated That means that if a filter requires that its input and output have the same format amongst a supported all it has to do is use a reference to the same list of formats query_formats can leave some formats unset and return AVERROR(EAGAIN) to cause the negotiation mechanism toagain later. That can be used by filters with complex requirements to use the format negotiated on one link to set the formats supported on another. Frame references ownership and permissions
SwsCompiledOp::func
SwsOpFunc func
Definition: ops_internal.h:91
SWS_OP_LSHIFT
@ SWS_OP_LSHIFT
Definition: ops.h:56
SWS_OP_UNPACK
@ SWS_OP_UNPACK
Definition: ops.h:51
mem_internal.h
comp
static void comp(unsigned char *dst, ptrdiff_t dst_stride, unsigned char *src, ptrdiff_t src_stride, int add)
Definition: eamad.c:79
SWS_MASK_ALPHA
@ SWS_MASK_ALPHA
Definition: ops.h:163
SwsOpExec::in_bump
ptrdiff_t in_bump[4]
Definition: ops_internal.h:66
SwsConst
Definition: ops.h:77
SWS_OP_CLEAR
@ SWS_OP_CLEAR
Definition: ops.h:55
CHECK_COMMON
#define CHECK_COMMON(NAME, IN, OUT,...)
Definition: sw_ops.c:289
SwsOpExec::in
const uint8_t * in[4]
Definition: ops_internal.h:58
SwsOpExec::out_stride
ptrdiff_t out_stride[4]
Definition: ops_internal.h:63
SwsLinearOp::m
AVRational m[4][5]
Generalized 5x5 affine transformation: [ Out.x ] = [ A B C D E ] [ Out.y ] = [ F G H I J ] * [ x y z ...
Definition: ops.h:151
matrix
Definition: vc1dsp.c:43
src1
const pixel * src1
Definition: h264pred_template.c:420
mask
int mask
Definition: mediacodecdec_common.c:154
SwsOp::rw
SwsReadWriteOp rw
Definition: ops.h:184
check_min_max
static void check_min_max(void)
Definition: sw_ops.c:652
ops.h
w
uint8_t w
Definition: llviddspenc.c:38
u
#define u(width, name, range_min, range_max)
Definition: cbs_apv.c:68
SWS_OP_DITHER
@ SWS_OP_DITHER
Definition: ops.h:60
checkasm_check_sw_ops
void checkasm_check_sw_ops(void)
Definition: sw_ops.c:752
SWS_BITEXACT
@ SWS_BITEXACT
Definition: swscale.h:156
SwsOpExec::block_size_in
int32_t block_size_in
Definition: ops_internal.h:72
check_convert
static void check_convert(void)
Definition: sw_ops.c:572
chroma
static av_always_inline void chroma(WaveformContext *s, AVFrame *in, AVFrame *out, int component, int intensity, int offset_y, int offset_x, int column, int mirror, int jobnr, int nb_jobs)
Definition: vf_waveform.c:1639
func_ref
static av_unused void * func_ref
Definition: checkasm.h:186
check_swap_bytes
static void check_swap_bytes(void)
Definition: sw_ops.c:379
CHECK_COMMON_RANGE
#define CHECK_COMMON_RANGE(NAME, RANGE, IN, OUT,...)
Definition: sw_ops.c:276
check_read_write
static void check_read_write(void)
Definition: sw_ops.c:292
max
#define max(a, b)
Definition: cuda_runtime.h:33
SWS_PIXEL_U32
@ SWS_PIXEL_U32
Definition: ops.h:34
SwsOpExec::in_stride
ptrdiff_t in_stride[4]
Definition: ops_internal.h:62
call_ref
#define call_ref(...)
Definition: checkasm.h:206
check_linear
static void check_linear(void)
Definition: sw_ops.c:670
SwsOpBackend::name
const char * name
Definition: ops_internal.h:104
ff_sws_pixel_type_size
int ff_sws_pixel_type_size(SwsPixelType type)
Definition: ops.c:64
rndf
static float rndf(void)
Definition: sw_ops.c:64
SWS_MASK_ROW
#define SWS_MASK_ROW(I)
Definition: ops.h:157
DECLARE_ALIGNED_64
#define DECLARE_ALIGNED_64(t, v)
Definition: mem_internal.h:114
check_clear
static void check_clear(void)
Definition: sw_ops.c:449
SwsPixelType
SwsPixelType
Copyright (C) 2025 Niklas Haas.
Definition: ops.h:30
CHECK_RANGE
#define CHECK_RANGE(NAME, RANGE, N_IN, N_OUT, IN, OUT,...)
Definition: sw_ops.c:273
func_new
static av_unused void * func_new
Definition: checkasm.h:186
SWS_MASK_DIAG4
@ SWS_MASK_DIAG4
Definition: ops.h:171
SWS_PIXEL_F32
@ SWS_PIXEL_F32
Definition: ops.h:35
U16
@ U16
Definition: sw_ops.c:40
ff_sws_op_backends
const SwsOpBackend *const ff_sws_op_backends[]
Definition: ops.c:34
check_scale
static void check_scale(void)
Definition: sw_ops.c:727
fail
#define fail()
Definition: checkasm.h:200
SwsOpList::num_ops
int num_ops
Definition: ops.h:211
checkasm.h
SWS_PIXEL_U8
@ SWS_PIXEL_U8
Definition: ops.h:32
ff_sws_pixel_type_is_int
bool ff_sws_pixel_type_is_int(SwsPixelType type)
Definition: ops.c:79
type
it s the only field you need to keep assuming you have a context There is some magic you don t need to care about around this just let it vf type
Definition: writing_filters.txt:86
ff_sws_ops_compile_backend
int ff_sws_ops_compile_backend(SwsContext *ctx, const SwsOpBackend *backend, const SwsOpList *ops, SwsCompiledOp *out)
Attempt to compile a list of operations using a specific backend.
refstruct.h
SwsLinearOp::mask
uint32_t mask
Definition: ops.h:152
av_refstruct_allocz
static void * av_refstruct_allocz(size_t size)
Equivalent to av_refstruct_alloc_ext(size, 0, NULL, NULL)
Definition: refstruct.h:105
SwsOp::op
SwsOpType op
Definition: ops.h:180
SWS_OP_SCALE
@ SWS_OP_SCALE
Definition: ops.h:64
avassert.h
rnd
#define rnd()
Definition: checkasm.h:184
FF_ARRAY_ELEMS
#define FF_ARRAY_ELEMS(a)
Definition: sinewin_tablegen.c:29
SWS_MASK_MAT4
@ SWS_MASK_MAT4
Definition: ops.h:173
float
float
Definition: af_crystalizer.c:122
AV_CEIL_RSHIFT
#define AV_CEIL_RSHIFT(a, b)
Definition: common.h:60
SWS_SWIZZLE
#define SWS_SWIZZLE(X, Y, Z, W)
Definition: ops.h:126
F32
@ F32
Definition: sw_ops.c:42
AVFormatContext::flags
int flags
Flags modifying the (de)muxer behaviour.
Definition: avformat.h:1415
SWS_MASK_OFF4
@ SWS_MASK_OFF4
Definition: ops.h:172
op
static int op(uint8_t **dst, const uint8_t *dst_end, GetByteContext *gb, int pixel, int count, int *x, int width, int linesize)
Perform decode operation.
Definition: anm.c:76
bits
uint8_t bits
Definition: vp3data.h:128
av_assert0
#define av_assert0(cond)
assert() equivalent, that is always enabled.
Definition: avassert.h:41
U8
@ U8
Definition: sw_ops.c:39
SWS_OP_MIN
@ SWS_OP_MIN
Definition: ops.h:65
SWS_MASK_MAT3
@ SWS_MASK_MAT3
Definition: ops.h:167
fill32
static void fill32(uint32_t *line, int num, unsigned range)
Definition: sw_ops.c:80
MK_RANGES
#define MK_RANGES(R)
Definition: sw_ops.c:272
ctx
AVFormatContext * ctx
Definition: movenc.c:49
FMT
#define FMT(fmt,...)
Definition: sw_ops.c:45
SWS_OP_LINEAR
@ SWS_OP_LINEAR
Definition: ops.h:63
SwsOpBackend
Definition: ops_internal.h:103
SWS_MASK_DIAG3
@ SWS_MASK_DIAG3
Definition: ops.h:165
SWS_OP_PACK
@ SWS_OP_PACK
Definition: ops.h:52
SwsOpExec::height
int32_t height
Definition: ops_internal.h:70
SwsOpExec
Global execution context for all compiled functions.
Definition: ops_internal.h:56
fill16
static void fill16(uint16_t *line, int num, unsigned range)
Definition: sw_ops.c:86
rw_pixel_bits
static int rw_pixel_bits(const SwsOp *op)
Definition: sw_ops.c:55
call_new
#define call_new(...)
Definition: checkasm.h:309
NULL
#define NULL
Definition: coverity.c:32
tprintf
static const char * tprintf(char buf[], size_t size, const char *fmt,...)
Definition: sw_ops.c:46
AVRational
Rational number (pair of numerator and denominator).
Definition: rational.h:58
checkasm_save_context
#define checkasm_save_context()
Definition: checkasm.h:76
SwsOpExec::slice_h
int32_t slice_h
Definition: ops_internal.h:71
SWS_MASK
#define SWS_MASK(I, J)
Definition: ops.h:155
SwsPackOp::pattern
uint8_t pattern[4]
Definition: ops.h:111
f
f
Definition: af_crystalizer.c:122
SwsOpExec::block_size_out
int32_t block_size_out
Definition: ops_internal.h:73
sws_alloc_context
SwsContext * sws_alloc_context(void)
Allocate an empty SwsContext and set its fields to default values.
Definition: utils.c:1018
shift
static int shift(int a, int b)
Definition: bonk.c:261
SwsOp::type
SwsPixelType type
Definition: ops.h:181
SWS_MASK_OFF3
@ SWS_MASK_OFF3
Definition: ops.h:166
check_ops
static void check_ops(const char *report, const unsigned ranges[NB_PLANES], const SwsOp *ops)
Definition: sw_ops.c:106
size
int size
Definition: twinvq_data.h:10344
SWS_OP_RSHIFT
@ SWS_OP_RSHIFT
Definition: ops.h:57
SWS_MASK_LUMA
@ SWS_MASK_LUMA
Definition: ops.h:162
range
enum AVColorRange range
Definition: mediacodec_wrapper.c:2594
SWS_OP_WRITE
@ SWS_OP_WRITE
Definition: ops.h:49
line
Definition: graph2dot.c:48
SwsLinearOp
Definition: ops.h:138
zero
static int zero(InterplayACMContext *s, unsigned ind, unsigned col)
Definition: interplayacm.c:121
av_refstruct_unref
void av_refstruct_unref(void *objp)
Decrement the reference count of the underlying object and automatically free the object if there are...
Definition: refstruct.c:120
fill32f
static void fill32f(float *line, int num, unsigned range)
Definition: sw_ops.c:73
SwsOpExec::out
uint8_t * out[4]
Definition: ops_internal.h:59
report
#define report
Definition: checkasm.h:203
U32
@ U32
Definition: sw_ops.c:41
bench_new
#define bench_new(...)
Definition: checkasm.h:394
i
#define i(width, name, range_min, range_max)
Definition: cbs_h2645.c:256
checkasm_check_func
void * checkasm_check_func(void *func, const char *name,...)
Definition: checkasm.c:1047
SwsOpList::ops
SwsOp * ops
Definition: ops.h:210
SwsPackOp
Definition: ops.h:110
vsnprintf
#define vsnprintf
Definition: snprintf.h:36
av_assert1
#define av_assert1(cond)
assert() equivalent, that does not lie in speed critical code.
Definition: avassert.h:57
SwsConst::q4
AVRational q4[4]
Definition: ops.h:79
ops_internal.h
SwsOp
Definition: ops.h:179
SwsOpExec::width
int32_t width
Definition: ops_internal.h:70
SwsCompiledOp::priv
void * priv
Definition: ops_internal.h:99
stride
#define stride
Definition: h264pred_template.c:536
SwsCompiledOp::block_size
int block_size
Definition: ops_internal.h:93
ret
ret
Definition: filter_design.txt:187
check_shift
static void check_shift(void)
Definition: sw_ops.c:506
SWS_OP_MAX
@ SWS_OP_MAX
Definition: ops.h:66
SwsCompiledOp
Definition: ops_internal.h:90
check_swizzle
static void check_swizzle(void)
Definition: sw_ops.c:529
SWS_PIXEL_TYPE_NB
@ SWS_PIXEL_TYPE_NB
Definition: ops.h:36
SwsReadWriteOp::packed
bool packed
Definition: ops.h:99
ff_sws_pixel_type_name
const char * ff_sws_pixel_type_name(SwsPixelType type)
Definition: ops.c:49
SWS_OP_SWAP_BYTES
@ SWS_OP_SWAP_BYTES
Definition: ops.h:50
NB_PLANES
@ NB_PLANES
Definition: sw_ops.c:34
PIXELS
@ PIXELS
Definition: sw_ops.c:35
src0
const pixel *const src0
Definition: h264pred_template.c:419
rndq
static AVRational rndq(SwsPixelType t)
Definition: sw_ops.c:437
declare_func
#define declare_func(ret,...)
Definition: checkasm.h:195
scale
static void scale(int *out, const int *in, const int w, const int h, const int shift)
Definition: intra.c:273
alpha
static const int16_t alpha[]
Definition: ilbcdata.h:55
LINES
@ LINES
Definition: sw_ops.c:33
SWS_OP_CONVERT
@ SWS_OP_CONVERT
Definition: ops.h:59
fill8
static void fill8(uint8_t *line, int num, unsigned range)
Definition: sw_ops.c:96
sws_free_context
void sws_free_context(SwsContext **ctx)
Free the context and everything associated with it, and write NULL to the provided pointer.
Definition: utils.c:2326
check_pack_unpack
static void check_pack_unpack(void)
Definition: sw_ops.c:392
checkasm_check
#define checkasm_check(prefix,...)
Definition: checkasm.h:441
SwsOpList
Helper struct for representing a list of operations.
Definition: ops.h:209
SwsContext
Main external API structure.
Definition: swscale.h:189
CHECK
#define CHECK(NAME, N_IN, N_OUT, IN, OUT,...)
Definition: sw_ops.c:286
CHECK_RANGES
#define CHECK_RANGES(NAME, RANGES, N_IN, N_OUT, IN, OUT,...)
Definition: sw_ops.c:255
SwsOpExec::out_bump
ptrdiff_t out_bump[4]
Definition: ops_internal.h:67
check_dither
static void check_dither(void)
Definition: sw_ops.c:617
SwsCompiledOp::free
void(* free)(void *priv)
Definition: ops_internal.h:100