Go to the documentation of this file.
26 #include "../ops_chain.h"
28 #define DECL_ENTRY(TYPE, MASK, NAME, ...) \
29 static const SwsOpEntry op_##NAME = { \
30 .type = SWS_PIXEL_##TYPE, \
35 #define DECL_ASM(TYPE, MASK, NAME, ...) \
36 void ff_##NAME(void); \
37 DECL_ENTRY(TYPE, MASK, NAME, \
41 #define DECL_PATTERN(TYPE, NAME, X, Y, Z, W, ...) \
42 DECL_ASM(TYPE, SWS_COMP_MASK(X, Y, Z, W), p##X##Y##Z##W##_##NAME, \
46 #define REF_PATTERN(NAME, X, Y, Z, W) \
47 &op_p##X##Y##Z##W##_##NAME
49 #define DECL_COMMON_PATTERNS(TYPE, NAME, ...) \
50 DECL_PATTERN(TYPE, NAME, 1, 0, 0, 0, __VA_ARGS__); \
51 DECL_PATTERN(TYPE, NAME, 1, 0, 0, 1, __VA_ARGS__); \
52 DECL_PATTERN(TYPE, NAME, 1, 1, 1, 0, __VA_ARGS__); \
53 DECL_PATTERN(TYPE, NAME, 1, 1, 1, 1, __VA_ARGS__) \
55 #define REF_COMMON_PATTERNS(NAME) \
56 REF_PATTERN(NAME, 1, 0, 0, 0), \
57 REF_PATTERN(NAME, 1, 0, 0, 1), \
58 REF_PATTERN(NAME, 1, 1, 1, 0), \
59 REF_PATTERN(NAME, 1, 1, 1, 1)
66 if (
op->rw.packed &&
op->rw.elems == 3) {
76 #define DECL_RW(EXT, TYPE, NAME, OP, ELEMS, PACKED, FRAC) \
77 DECL_ASM(TYPE, SWS_COMP_ELEMS(ELEMS), NAME##ELEMS##EXT, \
79 .rw = { .elems = ELEMS, .packed = PACKED, .frac = FRAC }, \
83 #define DECL_PACKED_RW(EXT, DEPTH) \
84 DECL_RW(EXT, U##DEPTH, read##DEPTH##_packed, READ, 2, true, 0) \
85 DECL_RW(EXT, U##DEPTH, read##DEPTH##_packed, READ, 3, true, 0) \
86 DECL_RW(EXT, U##DEPTH, read##DEPTH##_packed, READ, 4, true, 0) \
87 DECL_RW(EXT, U##DEPTH, write##DEPTH##_packed, WRITE, 2, true, 0) \
88 DECL_RW(EXT, U##DEPTH, write##DEPTH##_packed, WRITE, 3, true, 0) \
89 DECL_RW(EXT, U##DEPTH, write##DEPTH##_packed, WRITE, 4, true, 0) \
91 #define DECL_PACK_UNPACK(EXT, TYPE, X, Y, Z, W) \
92 DECL_ASM(TYPE, SWS_COMP(0), pack_##X##Y##Z##W##EXT, \
94 .pack.pattern = {X, Y, Z, W}, \
97 DECL_ASM(TYPE, SWS_COMP_MASK(X, Y, Z, W), unpack_##X##Y##Z##W##EXT, \
98 .op = SWS_OP_UNPACK, \
99 .pack.pattern = {X, Y, Z, W}, \
105 for (
int i = 0;
i < 16;
i++)
110 #define DECL_SWAP_BYTES(EXT, TYPE, X, Y, Z, W) \
111 DECL_ENTRY(TYPE, SWS_COMP_MASK(X, Y, Z, W), \
112 p##X##Y##Z##W##_swap_bytes_##TYPE##EXT, \
113 .op = SWS_OP_SWAP_BYTES, \
114 .func = ff_p##X##Y##Z##W##_shuffle##EXT, \
115 .setup = setup_swap_bytes, \
118 #define DECL_CLEAR_ALPHA(EXT, IDX) \
119 DECL_ASM(U8, SWS_COMP_ALL, clear_alpha##IDX##EXT, \
120 .op = SWS_OP_CLEAR, \
121 .clear.mask = SWS_COMP(IDX), \
122 .clear.value[IDX] = { -1, 1 }, \
125 #define DECL_CLEAR_ZERO(EXT, IDX) \
126 DECL_ASM(U8, SWS_COMP_ALL, clear_zero##IDX##EXT, \
127 .op = SWS_OP_CLEAR, \
128 .clear.mask = SWS_COMP(IDX), \
129 .clear.value[IDX] = { 0, 1 }, \
135 for (
int i = 0;
i < 4;
i++)
136 out->priv.u32[
i] = (uint32_t)
op->clear.value[
i].num;
140 #define DECL_CLEAR(EXT, X, Y, Z, W) \
141 DECL_ASM(U8, SWS_COMP_ALL, p##X##Y##Z##W##_clear##EXT, \
142 .op = SWS_OP_CLEAR, \
143 .setup = setup_clear, \
144 .clear.mask = SWS_COMP_MASK(X, Y, Z, W), \
147 #define DECL_SWIZZLE(EXT, X, Y, Z, W) \
148 DECL_ASM(U8, SWS_COMP_ALL, swizzle_##X##Y##Z##W##EXT, \
149 .op = SWS_OP_SWIZZLE, \
150 .swizzle.in = {X, Y, Z, W}, \
153 #define DECL_CONVERT(EXT, FROM, TO) \
154 DECL_COMMON_PATTERNS(FROM, convert_##FROM##_##TO##EXT, \
155 .op = SWS_OP_CONVERT, \
156 .convert.to = SWS_PIXEL_##TO, \
159 #define DECL_EXPAND(EXT, FROM, TO) \
160 DECL_COMMON_PATTERNS(FROM, expand_##FROM##_##TO##EXT, \
161 .op = SWS_OP_CONVERT, \
162 .convert.to = SWS_PIXEL_##TO, \
163 .convert.expand = true, \
172 #define DECL_SHIFT16(EXT) \
173 DECL_COMMON_PATTERNS(U16, lshift16##EXT, \
174 .op = SWS_OP_LSHIFT, \
175 .setup = setup_shift, \
179 DECL_COMMON_PATTERNS(U16, rshift16##EXT, \
180 .op = SWS_OP_RSHIFT, \
181 .setup = setup_shift, \
185 #define DECL_MIN_MAX(EXT) \
186 DECL_COMMON_PATTERNS(F32, min##EXT, \
188 .setup = ff_sws_setup_clamp, \
191 DECL_COMMON_PATTERNS(F32, max##EXT, \
193 .setup = ff_sws_setup_clamp, \
196 #define DECL_SCALE(EXT) \
197 DECL_COMMON_PATTERNS(F32, scale##EXT, \
198 .op = SWS_OP_SCALE, \
199 .setup = ff_sws_setup_scale, \
203 #define DECL_EXPAND_BITS(EXT, BITS) \
204 DECL_ASM(U##BITS, SWS_COMP(0), expand_bits##BITS##EXT, \
205 .op = SWS_OP_SCALE, \
206 .scale = { .num = ((1 << (BITS)) - 1), .den = 1 }, \
213 if (!
op->dither.size_log2) {
219 const int size = 1 <<
op->dither.size_log2;
220 const int8_t *off =
op->dither.y_offset;
222 for (
int i = 0;
i < 4;
i++) {
224 max_offset =
FFMAX(max_offset, off[
i] & (
size - 1));
232 const int num_rows =
size + max_offset;
239 matrix[
i] = (
float)
op->dither.matrix[
i].num /
op->dither.matrix[
i].den;
244 static_assert(
sizeof(
out->priv.ptr) <=
sizeof(int16_t[4]),
245 ">8 byte pointers not supported");
246 assert(max_offset *
stride <= INT16_MAX);
247 int16_t *off_out = &
out->priv.i16[4];
248 for (
int i = 0;
i < 4;
i++)
249 off_out[
i] = off[
i] >= 0 ? (off[
i] & (
size - 1)) *
stride : -1;
254 #define DECL_DITHER0(EXT) \
255 DECL_COMMON_PATTERNS(F32, dither0##EXT, \
256 .op = SWS_OP_DITHER, \
257 .setup = setup_dither, \
260 #define DECL_DITHER(EXT, SIZE) \
261 DECL_ASM(F32, SWS_COMP_ALL, dither##SIZE##EXT, \
262 .op = SWS_OP_DITHER, \
263 .setup = setup_dither, \
264 .dither_size = SIZE, \
276 for (
int y = 0; y < 4; y++) {
277 for (
int x = 0; x < 5; x++)
278 matrix[y * 5 + x] = (
float)
op->lin.m[y][x].num /
op->lin.m[y][x].den;
284 #define DECL_LINEAR(EXT, NAME, MASK) \
285 DECL_ASM(F32, SWS_COMP_ALL, NAME##EXT, \
286 .op = SWS_OP_LINEAR, \
287 .setup = setup_linear, \
288 .linear_mask = (MASK), \
306 for (
int i = 0;
i <
op->rw.elems;
i++) {
319 static_assert(
sizeof(
out->priv.ptr) <=
sizeof(
int32_t[2]),
320 ">8 byte pointers not supported");
327 for (
int i = 0;
i <
filter->num_weights;
i++)
357 const int taps_align =
sizeof(
int32_t) / pixel_size;
358 const int filter_size =
filter->filter_size;
360 const size_t aligned_size =
FFALIGN(filter_size, taps_align);
361 const size_t line_size =
FFALIGN(
filter->dst_size, block_size);
363 if (aligned_size > INT_MAX)
380 const int mmsize = block_size * 2;
381 const int gather_size = mmsize /
sizeof(
int32_t);
382 for (
size_t x = 0; x < line_size; x += block_size) {
383 const int elems =
FFMIN(block_size,
filter->dst_size - x);
384 for (
int j = 0; j < filter_size; j++) {
385 const int jb = j & ~(taps_align - 1);
386 const int ji = j - jb;
387 const size_t idx_base = x * aligned_size + jb * block_size + ji;
388 for (
int i = 0;
i < elems;
i++) {
389 const int w =
filter->weights[(x +
i) * filter_size + j];
390 size_t idx = idx_base;
402 const int gather_base =
i & ~(gather_size - 1);
403 const int gather_pos =
i - gather_base;
404 const int lane_idx = gather_pos >> 2;
405 const int pos_in_lane = gather_pos & 3;
406 idx += gather_base * 4
407 + (pos_in_lane >> 1) * (mmsize / 2)
409 + (pos_in_lane & 1) * 4;
411 idx +=
i * taps_align;
424 out->priv.uptr[1] = aligned_size;
426 out->over_read = (aligned_size - filter_size) * pixel_size;
461 const int taps_align = 16 / sizeof_weights;
462 const int pixels_align = 4;
463 const int filter_size =
filter->filter_size;
464 const size_t aligned_size =
FFALIGN(filter_size, taps_align);
490 for (
int x = 0; x <
filter->dst_size; x++) {
491 for (
int j = 0; j < filter_size; j++) {
492 const int xb = x & ~(pixels_align - 1);
493 const int jb = j & ~(taps_align - 1);
494 const int xi = x - xb, ji = j - jb;
495 const int w =
filter->weights[x * filter_size + j];
496 const int idx = xb * aligned_size + jb * pixels_align +
xi * taps_align + ji;
507 out->priv.uptr[1] = aligned_size * sizeof_weights;
509 out->over_read = (aligned_size - filter_size) * pixel_size;
513 #define DECL_FILTER(EXT, TYPE, DIR, NAME, ELEMS, ...) \
514 DECL_ASM(TYPE, SWS_COMP_ELEMS(ELEMS), NAME##ELEMS##_##TYPE##EXT, \
517 .rw.filter = SWS_OP_FILTER_##DIR, \
521 #define DECL_FILTERS(EXT, TYPE, DIR, NAME, ...) \
522 DECL_FILTER(EXT, TYPE, DIR, NAME, 1, __VA_ARGS__) \
523 DECL_FILTER(EXT, TYPE, DIR, NAME, 2, __VA_ARGS__) \
524 DECL_FILTER(EXT, TYPE, DIR, NAME, 3, __VA_ARGS__) \
525 DECL_FILTER(EXT, TYPE, DIR, NAME, 4, __VA_ARGS__)
527 #define DECL_FILTERS_GENERIC(EXT, TYPE) \
528 DECL_FILTERS(EXT, TYPE, V, filter_v, .setup = setup_filter_v) \
529 DECL_FILTERS(EXT, TYPE, V, filter_fma_v, .setup = setup_filter_v, \
530 .check = check_filter_fma) \
531 DECL_FILTERS(EXT, TYPE, H, filter_h, .setup = setup_filter_h) \
532 DECL_FILTERS(EXT, TYPE, H, filter_4x4_h, .setup = setup_filter_4x4_h, \
533 .check = check_filter_4x4_h)
535 #define REF_FILTERS(NAME, SUFFIX) \
536 &op_##NAME##1##SUFFIX, \
537 &op_##NAME##2##SUFFIX, \
538 &op_##NAME##3##SUFFIX, \
539 &op_##NAME##4##SUFFIX
541 #define DECL_FUNCS_8(SIZE, EXT, FLAG) \
542 DECL_RW(EXT, U8, read_planar, READ, 1, false, 0) \
543 DECL_RW(EXT, U8, read_planar, READ, 2, false, 0) \
544 DECL_RW(EXT, U8, read_planar, READ, 3, false, 0) \
545 DECL_RW(EXT, U8, read_planar, READ, 4, false, 0) \
546 DECL_RW(EXT, U8, write_planar, WRITE, 1, false, 0) \
547 DECL_RW(EXT, U8, write_planar, WRITE, 2, false, 0) \
548 DECL_RW(EXT, U8, write_planar, WRITE, 3, false, 0) \
549 DECL_RW(EXT, U8, write_planar, WRITE, 4, false, 0) \
550 DECL_RW(EXT, U8, read_nibbles, READ, 1, false, 1) \
551 DECL_RW(EXT, U8, read_bits, READ, 1, false, 3) \
552 DECL_RW(EXT, U8, write_bits, WRITE, 1, false, 3) \
553 DECL_EXPAND_BITS(EXT, 8) \
554 DECL_PACKED_RW(EXT, 8) \
555 DECL_PACK_UNPACK(EXT, U8, 1, 2, 1, 0) \
556 DECL_PACK_UNPACK(EXT, U8, 3, 3, 2, 0) \
557 DECL_PACK_UNPACK(EXT, U8, 2, 3, 3, 0) \
558 void ff_p1000_shuffle##EXT(void); \
559 void ff_p1001_shuffle##EXT(void); \
560 void ff_p1110_shuffle##EXT(void); \
561 void ff_p1111_shuffle##EXT(void); \
562 DECL_SWIZZLE(EXT, 3, 0, 1, 2) \
563 DECL_SWIZZLE(EXT, 3, 0, 2, 1) \
564 DECL_SWIZZLE(EXT, 2, 1, 0, 3) \
565 DECL_SWIZZLE(EXT, 3, 2, 1, 0) \
566 DECL_SWIZZLE(EXT, 3, 1, 0, 2) \
567 DECL_SWIZZLE(EXT, 3, 2, 0, 1) \
568 DECL_SWIZZLE(EXT, 1, 2, 0, 3) \
569 DECL_SWIZZLE(EXT, 1, 0, 2, 3) \
570 DECL_SWIZZLE(EXT, 2, 0, 1, 3) \
571 DECL_SWIZZLE(EXT, 2, 3, 1, 0) \
572 DECL_SWIZZLE(EXT, 2, 1, 3, 0) \
573 DECL_SWIZZLE(EXT, 1, 2, 3, 0) \
574 DECL_SWIZZLE(EXT, 1, 3, 2, 0) \
575 DECL_SWIZZLE(EXT, 0, 2, 1, 3) \
576 DECL_SWIZZLE(EXT, 0, 2, 3, 1) \
577 DECL_SWIZZLE(EXT, 0, 3, 1, 2) \
578 DECL_SWIZZLE(EXT, 3, 1, 2, 0) \
579 DECL_SWIZZLE(EXT, 0, 3, 2, 1) \
580 DECL_SWIZZLE(EXT, 0, 0, 0, 3) \
581 DECL_SWIZZLE(EXT, 3, 0, 0, 0) \
582 DECL_SWIZZLE(EXT, 0, 0, 0, 1) \
583 DECL_SWIZZLE(EXT, 1, 0, 0, 0) \
584 DECL_CLEAR_ALPHA(EXT, 0) \
585 DECL_CLEAR_ALPHA(EXT, 1) \
586 DECL_CLEAR_ALPHA(EXT, 3) \
587 DECL_CLEAR_ZERO(EXT, 0) \
588 DECL_CLEAR_ZERO(EXT, 1) \
589 DECL_CLEAR_ZERO(EXT, 3) \
590 DECL_CLEAR(EXT, 0, 0, 0, 1) \
591 DECL_CLEAR(EXT, 1, 0, 0, 0) \
592 DECL_CLEAR(EXT, 1, 1, 0, 0) \
593 DECL_CLEAR(EXT, 0, 1, 1, 0) \
594 DECL_CLEAR(EXT, 0, 0, 1, 1) \
595 DECL_CLEAR(EXT, 1, 0, 1, 0) \
596 DECL_CLEAR(EXT, 0, 1, 0, 1) \
597 DECL_CLEAR(EXT, 0, 1, 1, 1) \
598 DECL_CLEAR(EXT, 1, 0, 1, 1) \
599 DECL_CLEAR(EXT, 1, 1, 0, 1) \
601 static const SwsOpTable ops8##EXT = { \
602 .cpu_flags = AV_CPU_FLAG_##FLAG, \
603 .block_size = SIZE, \
605 &op_read_planar1##EXT, \
606 &op_read_planar2##EXT, \
607 &op_read_planar3##EXT, \
608 &op_read_planar4##EXT, \
609 &op_write_planar1##EXT, \
610 &op_write_planar2##EXT, \
611 &op_write_planar3##EXT, \
612 &op_write_planar4##EXT, \
613 &op_read8_packed2##EXT, \
614 &op_read8_packed3##EXT, \
615 &op_read8_packed4##EXT, \
616 &op_write8_packed2##EXT, \
617 &op_write8_packed3##EXT, \
618 &op_write8_packed4##EXT, \
619 &op_read_nibbles1##EXT, \
620 &op_read_bits1##EXT, \
621 &op_write_bits1##EXT, \
622 &op_expand_bits8##EXT, \
623 &op_pack_1210##EXT, \
624 &op_pack_3320##EXT, \
625 &op_pack_2330##EXT, \
626 &op_unpack_1210##EXT, \
627 &op_unpack_3320##EXT, \
628 &op_unpack_2330##EXT, \
629 &op_swizzle_3012##EXT, \
630 &op_swizzle_3021##EXT, \
631 &op_swizzle_2103##EXT, \
632 &op_swizzle_3210##EXT, \
633 &op_swizzle_3102##EXT, \
634 &op_swizzle_3201##EXT, \
635 &op_swizzle_1203##EXT, \
636 &op_swizzle_1023##EXT, \
637 &op_swizzle_2013##EXT, \
638 &op_swizzle_2310##EXT, \
639 &op_swizzle_2130##EXT, \
640 &op_swizzle_1230##EXT, \
641 &op_swizzle_1320##EXT, \
642 &op_swizzle_0213##EXT, \
643 &op_swizzle_0231##EXT, \
644 &op_swizzle_0312##EXT, \
645 &op_swizzle_3120##EXT, \
646 &op_swizzle_0321##EXT, \
647 &op_swizzle_0003##EXT, \
648 &op_swizzle_0001##EXT, \
649 &op_swizzle_3000##EXT, \
650 &op_swizzle_1000##EXT, \
651 &op_clear_alpha0##EXT, \
652 &op_clear_alpha1##EXT, \
653 &op_clear_alpha3##EXT, \
654 &op_clear_zero0##EXT, \
655 &op_clear_zero1##EXT, \
656 &op_clear_zero3##EXT, \
657 REF_PATTERN(clear##EXT, 0, 0, 0, 1), \
658 REF_PATTERN(clear##EXT, 1, 0, 0, 0), \
659 REF_PATTERN(clear##EXT, 1, 1, 0, 0), \
660 REF_PATTERN(clear##EXT, 0, 1, 1, 0), \
661 REF_PATTERN(clear##EXT, 0, 0, 1, 1), \
662 REF_PATTERN(clear##EXT, 1, 0, 1, 0), \
663 REF_PATTERN(clear##EXT, 0, 1, 0, 1), \
664 REF_PATTERN(clear##EXT, 0, 1, 1, 1), \
665 REF_PATTERN(clear##EXT, 1, 0, 1, 1), \
666 REF_PATTERN(clear##EXT, 1, 1, 0, 1), \
671 #define DECL_FUNCS_16(SIZE, EXT, FLAG) \
672 DECL_PACKED_RW(EXT, 16) \
673 DECL_EXPAND_BITS(EXT, 16) \
674 DECL_PACK_UNPACK(EXT, U16, 4, 4, 4, 0) \
675 DECL_PACK_UNPACK(EXT, U16, 5, 5, 5, 0) \
676 DECL_PACK_UNPACK(EXT, U16, 5, 6, 5, 0) \
677 DECL_SWAP_BYTES(EXT, U16, 1, 0, 0, 0) \
678 DECL_SWAP_BYTES(EXT, U16, 1, 0, 0, 1) \
679 DECL_SWAP_BYTES(EXT, U16, 1, 1, 1, 0) \
680 DECL_SWAP_BYTES(EXT, U16, 1, 1, 1, 1) \
682 DECL_CONVERT(EXT, U8, U16) \
683 DECL_CONVERT(EXT, U16, U8) \
684 DECL_EXPAND(EXT, U8, U16) \
686 static const SwsOpTable ops16##EXT = { \
687 .cpu_flags = AV_CPU_FLAG_##FLAG, \
688 .block_size = SIZE, \
690 &op_read16_packed2##EXT, \
691 &op_read16_packed3##EXT, \
692 &op_read16_packed4##EXT, \
693 &op_write16_packed2##EXT, \
694 &op_write16_packed3##EXT, \
695 &op_write16_packed4##EXT, \
696 &op_pack_4440##EXT, \
697 &op_pack_5550##EXT, \
698 &op_pack_5650##EXT, \
699 &op_unpack_4440##EXT, \
700 &op_unpack_5550##EXT, \
701 &op_unpack_5650##EXT, \
702 &op_expand_bits16##EXT, \
703 REF_COMMON_PATTERNS(swap_bytes_U16##EXT), \
704 REF_COMMON_PATTERNS(convert_U8_U16##EXT), \
705 REF_COMMON_PATTERNS(convert_U16_U8##EXT), \
706 REF_COMMON_PATTERNS(expand_U8_U16##EXT), \
707 REF_COMMON_PATTERNS(lshift16##EXT), \
708 REF_COMMON_PATTERNS(rshift16##EXT), \
713 #define DECL_FUNCS_32(SIZE, EXT, FLAG) \
714 DECL_PACKED_RW(_m2##EXT, 32) \
715 DECL_PACK_UNPACK(_m2##EXT, U32, 10, 10, 10, 2) \
716 DECL_PACK_UNPACK(_m2##EXT, U32, 2, 10, 10, 10) \
717 DECL_SWAP_BYTES(_m2##EXT, U32, 1, 0, 0, 0) \
718 DECL_SWAP_BYTES(_m2##EXT, U32, 1, 0, 0, 1) \
719 DECL_SWAP_BYTES(_m2##EXT, U32, 1, 1, 1, 0) \
720 DECL_SWAP_BYTES(_m2##EXT, U32, 1, 1, 1, 1) \
721 DECL_CONVERT(EXT, U8, U32) \
722 DECL_CONVERT(EXT, U32, U8) \
723 DECL_CONVERT(EXT, U16, U32) \
724 DECL_CONVERT(EXT, U32, U16) \
725 DECL_CONVERT(EXT, U8, F32) \
726 DECL_CONVERT(EXT, F32, U8) \
727 DECL_CONVERT(EXT, U16, F32) \
728 DECL_CONVERT(EXT, F32, U16) \
729 DECL_EXPAND(EXT, U8, U32) \
733 DECL_DITHER(EXT, 1) \
734 DECL_DITHER(EXT, 2) \
735 DECL_DITHER(EXT, 3) \
736 DECL_DITHER(EXT, 4) \
737 DECL_DITHER(EXT, 5) \
738 DECL_DITHER(EXT, 6) \
739 DECL_DITHER(EXT, 7) \
740 DECL_DITHER(EXT, 8) \
741 DECL_LINEAR(EXT, luma, SWS_MASK_LUMA) \
742 DECL_LINEAR(EXT, alpha, SWS_MASK_ALPHA) \
743 DECL_LINEAR(EXT, lumalpha, SWS_MASK_LUMA | SWS_MASK_ALPHA) \
744 DECL_LINEAR(EXT, yalpha, SWS_MASK(1, 1)) \
745 DECL_LINEAR(EXT, dot3, 0x7) \
746 DECL_LINEAR(EXT, dot3a, 0x7 | SWS_MASK_ALPHA) \
747 DECL_LINEAR(EXT, row0, SWS_MASK_ROW(0) ^ SWS_MASK(0, 3)) \
748 DECL_LINEAR(EXT, diag3, SWS_MASK_DIAG3) \
749 DECL_LINEAR(EXT, diag4, SWS_MASK_DIAG4) \
750 DECL_LINEAR(EXT, diagoff3, SWS_MASK_DIAG3 | SWS_MASK_OFF3) \
751 DECL_LINEAR(EXT, affine3, SWS_MASK_MAT3 | SWS_MASK_OFF3) \
752 DECL_LINEAR(EXT, affine3uv, \
753 SWS_MASK_MAT3 | SWS_MASK_OFF(1) | SWS_MASK_OFF(2)) \
754 DECL_LINEAR(EXT, affine3x, \
755 SWS_MASK_MAT3 ^ SWS_MASK(0, 1) | SWS_MASK_OFF3) \
756 DECL_LINEAR(EXT, affine3xa, \
757 SWS_MASK_MAT3 ^ SWS_MASK(0, 1) | SWS_MASK_OFF3 | SWS_MASK_ALPHA) \
758 DECL_LINEAR(EXT, affine3xy, \
759 SWS_MASK_MAT3 ^ SWS_MASK(0, 0) ^ SWS_MASK(0, 1) | SWS_MASK_OFF3) \
760 DECL_LINEAR(EXT, affine3a, \
761 SWS_MASK_MAT3 | SWS_MASK_OFF3 | SWS_MASK_ALPHA) \
762 DECL_FILTERS_GENERIC(EXT, U8) \
763 DECL_FILTERS_GENERIC(EXT, U16) \
764 DECL_FILTERS_GENERIC(EXT, F32) \
766 static const SwsOpTable ops32##EXT = { \
767 .cpu_flags = AV_CPU_FLAG_##FLAG, \
768 .block_size = SIZE, \
770 &op_read32_packed2_m2##EXT, \
771 &op_read32_packed3_m2##EXT, \
772 &op_read32_packed4_m2##EXT, \
773 &op_write32_packed2_m2##EXT, \
774 &op_write32_packed3_m2##EXT, \
775 &op_write32_packed4_m2##EXT, \
776 &op_pack_1010102_m2##EXT, \
777 &op_pack_2101010_m2##EXT, \
778 &op_unpack_1010102_m2##EXT, \
779 &op_unpack_2101010_m2##EXT, \
780 REF_COMMON_PATTERNS(swap_bytes_U32_m2##EXT), \
781 REF_COMMON_PATTERNS(convert_U8_U32##EXT), \
782 REF_COMMON_PATTERNS(convert_U32_U8##EXT), \
783 REF_COMMON_PATTERNS(convert_U16_U32##EXT), \
784 REF_COMMON_PATTERNS(convert_U32_U16##EXT), \
785 REF_COMMON_PATTERNS(convert_U8_F32##EXT), \
786 REF_COMMON_PATTERNS(convert_F32_U8##EXT), \
787 REF_COMMON_PATTERNS(convert_U16_F32##EXT), \
788 REF_COMMON_PATTERNS(convert_F32_U16##EXT), \
789 REF_COMMON_PATTERNS(expand_U8_U32##EXT), \
790 REF_COMMON_PATTERNS(min##EXT), \
791 REF_COMMON_PATTERNS(max##EXT), \
792 REF_COMMON_PATTERNS(scale##EXT), \
793 REF_COMMON_PATTERNS(dither0##EXT), \
813 &op_affine3uv##EXT, \
815 &op_affine3xa##EXT, \
816 &op_affine3xy##EXT, \
818 REF_FILTERS(filter_fma_v, _U8##EXT), \
819 REF_FILTERS(filter_fma_v, _U16##EXT), \
820 REF_FILTERS(filter_fma_v, _F32##EXT), \
821 REF_FILTERS(filter_4x4_h, _U8##EXT), \
822 REF_FILTERS(filter_4x4_h, _U16##EXT), \
823 REF_FILTERS(filter_4x4_h, _F32##EXT), \
824 REF_FILTERS(filter_v, _U8##EXT), \
825 REF_FILTERS(filter_v, _U16##EXT), \
826 REF_FILTERS(filter_v, _F32##EXT), \
827 REF_FILTERS(filter_h, _U8##EXT), \
828 REF_FILTERS(filter_h, _U16##EXT), \
829 REF_FILTERS(filter_h, _F32##EXT), \
875 return !(
op->rw.elems > 1 &&
op->rw.packed) && !
op->rw.frac && !
op->rw.filter;
884 static int movsize(
const int bytes,
const int mmsize)
886 return bytes <= 4 ? 4 :
907 const int num_lanes = mmsize / 16;
915 .block_size = pixels * num_lanes,
916 .over_read =
movsize(in_total, mmsize) - in_total,
917 .over_write =
movsize(out_total, mmsize) - out_total,
926 #define ASSIGN_SHUFFLE_FUNC(IN, OUT, EXT) \
928 SWS_DECL_FUNC(ff_packed_shuffle##IN##_##OUT##_##EXT); \
929 if (in_total == IN && out_total == OUT) \
930 out->func = ff_packed_shuffle##IN##_##OUT##_##EXT; \
958 static_assert(
sizeof(uint32_t) ==
sizeof(
int),
"int size mismatch");
967 for (
int i = 0;
i < 4;
i++) {
971 case 1:
c.u32 = 0x1010101
U * res.
priv.
u8[
i];
break;
976 op->clear.value[
i].num =
c.i;
977 op->clear.value[
i].den = 1;
1008 int op_block_size =
out->block_size;
1019 op, op_block_size, chain);
1027 #define ASSIGN_PROCESS_FUNC(NAME) \
1029 SWS_DECL_FUNC(NAME); \
1035 const int read_planes =
read ? (
read->rw.packed ? 1 :
read->rw.elems) : 0;
1037 switch (
FFMAX(read_planes, write_planes)) {
Copyright (C) 2025 Niklas Haas.
static bool check_filter_fma(const SwsImplParams *params)
Filter the word “frame” indicates either a video frame or a group of audio as stored in an AVFrame structure Format for each input and each output the list of supported formats For video that means pixel format For audio that means channel sample they are references to shared objects When the negotiation mechanism computes the intersection of the formats supported at each end of a all references to both lists are replaced with a reference to the intersection And when a single format is eventually chosen for a link amongst the remaining all references to the list are updated That means that if a filter requires that its input and output have the same format amongst a supported all it has to do is use a reference to the same list of formats query_formats can leave some formats unset and return AVERROR(EAGAIN) to cause the negotiation mechanism toagain later. That can be used by filters with complex requirements to use the format negotiated on one link to set the formats supported on another. Frame references ownership and permissions
int ff_sws_setup_clear(const SwsImplParams *params, SwsImplResult *out)
#define ASSIGN_PROCESS_FUNC(NAME)
static av_const int get_mmsize(const int cpu_flags)
const SwsOp * ff_sws_op_list_input(const SwsOpList *ops)
Returns the input operation for a given op list, or NULL if there is none (e.g.
int ff_sws_op_list_max_size(const SwsOpList *ops)
Returns the size of the largest pixel type used in ops.
const SwsOpBackend backend_x86
int ff_sws_op_compile_tables(SwsContext *ctx, const SwsOpTable *const tables[], int num_tables, const SwsOp *op, const int block_size, SwsOpChain *chain)
"Compile" a single op by looking it up in a list of fixed size op tables.
static void normalize_clear(SwsOp *op)
Represents a computed filter kernel.
static void read_bytes(const uint8_t *src, float *dst, int src_stride, int dst_stride, int width, int height, float scale)
#define DECL_FUNCS_32(SIZE, EXT, FLAG)
void(* filter)(uint8_t *src, int stride, int qscale)
int av_get_cpu_flags(void)
Return the flags which specify extensions supported by the CPU.
static atomic_int cpu_flags
static int setup_linear(const SwsImplParams *params, SwsImplResult *out)
int ff_sws_pixel_type_size(SwsPixelType type)
void * av_memdup(const void *p, size_t size)
Duplicate a buffer with av_malloc().
static int setup_dither(const SwsImplParams *params, SwsImplResult *out)
#define SWS_COMP_TEST(mask, X)
bool ff_sws_pixel_type_is_int(SwsPixelType type)
#define AV_CPU_FLAG_SLOW_GATHER
CPU has slow gathers.
#define AV_CPU_FLAG_AVX512
AVX-512 functions: requires OS support even if YMM/ZMM registers aren't used.
void(* free[SWS_MAX_OPS+1])(SwsOpPriv *)
#define AV_LOG_TRACE
Extremely verbose debugging, useful for libav* development.
#define FF_ARRAY_ELEMS(a)
SwsOpChain * ff_sws_op_chain_alloc(void)
int flags
Flags modifying the (de)muxer behaviour.
static int op(uint8_t **dst, const uint8_t *dst_end, GetByteContext *gb, int pixel, int count, int *x, int width, int linesize)
Perform decode operation.
static int setup_clear(const SwsImplParams *params, SwsImplResult *out)
static AVFormatContext * ctx
#define AV_CPU_FLAG_SSE4
Penryn SSE4.1 functions.
const SwsOp * ff_sws_op_list_output(const SwsOpList *ops)
Returns the output operation for a given op list, or NULL if there is none.
SwsFilterWeights * kernel
Compiled "chain" of operations, which can be dispatched efficiently.
Rational number (pair of numerator and denominator).
static const SwsOpTable *const tables[]
static bool check_filter_4x4_h(const SwsImplParams *params)
static int setup_rw(const SwsImplParams *params, SwsImplResult *out)
static int solve_shuffle(const SwsOpList *ops, int mmsize, SwsCompiledOp *out)
static int setup_filter_4x4_h(const SwsImplParams *params, SwsImplResult *out)
Undefined Behavior In the C some operations are like signed integer dereferencing freed accessing outside allocated Undefined Behavior must not occur in a C it is not safe even if the output of undefined operations is unused The unsafety may seem nit picking but Optimizing compilers have in fact optimized code on the assumption that no undefined Behavior occurs Optimizing code based on wrong assumptions can and has in some cases lead to effects beyond the output of computations The signed integer overflow problem in speed critical code Code which is highly optimized and works with signed integers sometimes has the problem that often the output of the computation does not c
#define AV_CPU_FLAG_AVX2
AVX2 functions: requires OS support even if YMM registers aren't used.
#define i(width, name, range_min, range_max)
static int movsize(const int bytes, const int mmsize)
static int setup_swap_bytes(const SwsImplParams *params, SwsImplResult *out)
@ SWS_FILTER_SCALE
14-bit coefficients are picked to fit comfortably within int16_t for efficient SIMD processing (e....
void ff_sws_op_chain_free_cb(void *ptr)
static int compile(SwsContext *ctx, SwsOpList *ops, SwsCompiledOp *out)
static void ff_sws_op_chain_free(SwsOpChain *chain)
static const int weights[]
static bool op_is_type_invariant(const SwsOp *op)
Returns true if the operation's implementation only depends on the block size, and not the underlying...
#define av_assert1(cond)
assert() equivalent, that does not lie in speed critical code.
static int hscale_sizeof_weight(const SwsOp *op)
static void write_bytes(const float *src, uint8_t *dst, int src_stride, int dst_stride, int width, int height, int depth, float scale)
void * av_calloc(size_t nmemb, size_t size)
static void ff_op_priv_free(SwsOpPriv *priv)
static int av_cmp_q(AVRational a, AVRational b)
Compare two rationals.
static int setup_shift(const SwsImplParams *params, SwsImplResult *out)
#define ASSIGN_SHUFFLE_FUNC(IN, OUT, EXT)
int ff_sws_solve_shuffle(const SwsOpList *ops, uint8_t shuffle[], int size, uint8_t clear_val, int *read_bytes, int *write_bytes)
"Solve" an op list into a fixed shuffle mask, with an optional ability to also directly clear the out...
AVRational av_mul_q(AVRational b, AVRational c)
Multiply two rationals.
uint8_t elems
Examples: rgba = 4x u8 packed yuv444p = 3x u8 rgb565 = 1x u16 <- use SWS_OP_UNPACK to unpack monow = ...
static void scale(int *out, const int *in, const int w, const int h, const int shift)
static int setup_filter_v(const SwsImplParams *params, SwsImplResult *out)
#define DECL_FUNCS_16(SIZE, EXT, FLAG)
#define xi(width, name, var, range_min, range_max, subs,...)
Helper struct for representing a list of operations.
#define DECL_FUNCS_8(SIZE, EXT, FLAG)
Main external API structure.
static uint64_t shuffle(uint64_t in, const uint8_t *shuffle, int shuffle_len)
static uint32_t BS_FUNC() read(BSCTX *bc, unsigned int n)
Return n bits from the buffer, n has to be in the 0-32 range.
static int setup_filter_h(const SwsImplParams *params, SwsImplResult *out)