Go to the documentation of this file.
26 #include "../ops_chain.h"
28 #define DECL_ENTRY(TYPE, NAME, ...) \
29 static const SwsOpEntry op_##NAME = { \
30 .type = SWS_PIXEL_##TYPE, \
34 #define DECL_ASM(TYPE, NAME, ...) \
35 void ff_##NAME(void); \
36 DECL_ENTRY(TYPE, NAME, \
40 #define DECL_PATTERN(TYPE, NAME, X, Y, Z, W, ...) \
41 DECL_ASM(TYPE, p##X##Y##Z##W##_##NAME, \
42 .unused = { !X, !Y, !Z, !W }, \
46 #define REF_PATTERN(NAME, X, Y, Z, W) \
47 &op_p##X##Y##Z##W##_##NAME
49 #define DECL_COMMON_PATTERNS(TYPE, NAME, ...) \
50 DECL_PATTERN(TYPE, NAME, 1, 0, 0, 0, __VA_ARGS__); \
51 DECL_PATTERN(TYPE, NAME, 1, 0, 0, 1, __VA_ARGS__); \
52 DECL_PATTERN(TYPE, NAME, 1, 1, 1, 0, __VA_ARGS__); \
53 DECL_PATTERN(TYPE, NAME, 1, 1, 1, 1, __VA_ARGS__) \
55 #define REF_COMMON_PATTERNS(NAME) \
56 REF_PATTERN(NAME, 1, 0, 0, 0), \
57 REF_PATTERN(NAME, 1, 0, 0, 1), \
58 REF_PATTERN(NAME, 1, 1, 1, 0), \
59 REF_PATTERN(NAME, 1, 1, 1, 1)
66 if (
op->rw.packed &&
op->rw.elems == 3) {
76 #define DECL_RW(EXT, TYPE, NAME, OP, ELEMS, PACKED, FRAC) \
77 DECL_ASM(TYPE, NAME##ELEMS##EXT, \
79 .rw = { .elems = ELEMS, .packed = PACKED, .frac = FRAC }, \
83 #define DECL_PACKED_RW(EXT, DEPTH) \
84 DECL_RW(EXT, U##DEPTH, read##DEPTH##_packed, READ, 2, true, 0) \
85 DECL_RW(EXT, U##DEPTH, read##DEPTH##_packed, READ, 3, true, 0) \
86 DECL_RW(EXT, U##DEPTH, read##DEPTH##_packed, READ, 4, true, 0) \
87 DECL_RW(EXT, U##DEPTH, write##DEPTH##_packed, WRITE, 2, true, 0) \
88 DECL_RW(EXT, U##DEPTH, write##DEPTH##_packed, WRITE, 3, true, 0) \
89 DECL_RW(EXT, U##DEPTH, write##DEPTH##_packed, WRITE, 4, true, 0) \
91 #define DECL_PACK_UNPACK(EXT, TYPE, X, Y, Z, W) \
92 DECL_ASM(TYPE, pack_##X##Y##Z##W##EXT, \
94 .pack.pattern = {X, Y, Z, W}, \
97 DECL_ASM(TYPE, unpack_##X##Y##Z##W##EXT, \
98 .op = SWS_OP_UNPACK, \
99 .pack.pattern = {X, Y, Z, W}, \
105 for (
int i = 0;
i < 16;
i++)
110 #define DECL_SWAP_BYTES(EXT, TYPE, X, Y, Z, W) \
111 DECL_ENTRY(TYPE, p##X##Y##Z##W##_swap_bytes_##TYPE##EXT, \
112 .op = SWS_OP_SWAP_BYTES, \
113 .unused = { !X, !Y, !Z, !W }, \
114 .func = ff_p##X##Y##Z##W##_shuffle##EXT, \
115 .setup = setup_swap_bytes, \
118 #define DECL_CLEAR_ALPHA(EXT, IDX) \
119 DECL_ASM(U8, clear_alpha##IDX##EXT, \
120 .op = SWS_OP_CLEAR, \
122 .unused[IDX] = true, \
125 #define DECL_CLEAR_ZERO(EXT, IDX) \
126 DECL_ASM(U8, clear_zero##IDX##EXT, \
127 .op = SWS_OP_CLEAR, \
129 .unused[IDX] = true, \
135 for (
int i = 0;
i < 4;
i++)
136 out->priv.u32[
i] = (uint32_t)
op->clear.value[
i].num;
140 #define DECL_CLEAR(EXT, X, Y, Z, W) \
141 DECL_PATTERN(U8, clear##EXT, X, Y, Z, W, \
142 .op = SWS_OP_CLEAR, \
143 .setup = setup_clear, \
147 #define DECL_SWIZZLE(EXT, X, Y, Z, W) \
148 DECL_ASM(U8, swizzle_##X##Y##Z##W##EXT, \
149 .op = SWS_OP_SWIZZLE, \
150 .swizzle.in = {X, Y, Z, W}, \
153 #define DECL_CONVERT(EXT, FROM, TO) \
154 DECL_COMMON_PATTERNS(FROM, convert_##FROM##_##TO##EXT, \
155 .op = SWS_OP_CONVERT, \
156 .convert.to = SWS_PIXEL_##TO, \
159 #define DECL_EXPAND(EXT, FROM, TO) \
160 DECL_COMMON_PATTERNS(FROM, expand_##FROM##_##TO##EXT, \
161 .op = SWS_OP_CONVERT, \
162 .convert.to = SWS_PIXEL_##TO, \
163 .convert.expand = true, \
172 #define DECL_SHIFT16(EXT) \
173 DECL_COMMON_PATTERNS(U16, lshift16##EXT, \
174 .op = SWS_OP_LSHIFT, \
175 .setup = setup_shift, \
179 DECL_COMMON_PATTERNS(U16, rshift16##EXT, \
180 .op = SWS_OP_RSHIFT, \
181 .setup = setup_shift, \
185 #define DECL_MIN_MAX(EXT) \
186 DECL_COMMON_PATTERNS(F32, min##EXT, \
188 .setup = ff_sws_setup_clamp, \
192 DECL_COMMON_PATTERNS(F32, max##EXT, \
194 .setup = ff_sws_setup_clamp, \
198 #define DECL_SCALE(EXT) \
199 DECL_COMMON_PATTERNS(F32, scale##EXT, \
200 .op = SWS_OP_SCALE, \
201 .setup = ff_sws_setup_scale, \
205 #define DECL_EXPAND_BITS(EXT, BITS) \
206 DECL_ASM(U##BITS, expand_bits##BITS##EXT, \
207 .op = SWS_OP_SCALE, \
208 .scale = { .num = ((1 << (BITS)) - 1), .den = 1 }, \
215 if (!
op->dither.size_log2) {
221 const int size = 1 <<
op->dither.size_log2;
222 const int8_t *off =
op->dither.y_offset;
224 for (
int i = 0;
i < 4;
i++) {
226 max_offset =
FFMAX(max_offset, off[
i] & (
size - 1));
234 const int num_rows =
size + max_offset;
241 matrix[
i] = (
float)
op->dither.matrix[
i].num /
op->dither.matrix[
i].den;
246 static_assert(
sizeof(
out->priv.ptr) <=
sizeof(int16_t[4]),
247 ">8 byte pointers not supported");
248 assert(max_offset *
stride <= INT16_MAX);
249 int16_t *off_out = &
out->priv.i16[4];
250 for (
int i = 0;
i < 4;
i++)
251 off_out[
i] = off[
i] >= 0 ? (off[
i] & (
size - 1)) *
stride : -1;
256 #define DECL_DITHER(DECL_MACRO, EXT, SIZE) \
257 DECL_MACRO(F32, dither##SIZE##EXT, \
258 .op = SWS_OP_DITHER, \
259 .setup = setup_dither, \
260 .dither_size = SIZE, \
272 for (
int y = 0; y < 4; y++) {
273 for (
int x = 0; x < 5; x++)
274 matrix[y * 5 + x] = (
float)
op->lin.m[y][x].num /
op->lin.m[y][x].den;
280 #define DECL_LINEAR(EXT, NAME, MASK) \
281 DECL_ASM(F32, NAME##EXT, \
282 .op = SWS_OP_LINEAR, \
283 .setup = setup_linear, \
284 .linear_mask = (MASK), \
302 for (
int i = 0;
i <
op->rw.elems;
i++) {
315 static_assert(
sizeof(
out->priv.ptr) <=
sizeof(
int32_t[2]),
316 ">8 byte pointers not supported");
323 for (
int i = 0;
i <
filter->num_weights;
i++)
353 const int filter_size =
filter->filter_size;
355 const size_t aligned_size =
FFALIGN(filter_size, taps_align);
356 const size_t line_size =
FFALIGN(
filter->dst_size, block_size);
358 if (aligned_size > INT_MAX)
375 const int mmsize = block_size * 2;
376 const int gather_size = mmsize /
sizeof(
int32_t);
377 for (
size_t x = 0; x < line_size; x += block_size) {
378 const int elems =
FFMIN(block_size,
filter->dst_size - x);
379 for (
int j = 0; j < filter_size; j++) {
380 const int jb = j & ~(taps_align - 1);
381 const int ji = j - jb;
382 const size_t idx_base = x * aligned_size + jb * block_size + ji;
383 for (
int i = 0;
i < elems;
i++) {
384 const int w =
filter->weights[(x +
i) * filter_size + j];
385 size_t idx = idx_base;
397 const int gather_base =
i & ~(gather_size - 1);
398 const int gather_pos =
i - gather_base;
399 const int lane_idx = gather_pos >> 2;
400 const int pos_in_lane = gather_pos & 3;
401 idx += gather_base * 4
402 + (pos_in_lane >> 1) * (mmsize / 2)
404 + (pos_in_lane & 1) * 4;
406 idx +=
i * taps_align;
419 out->priv.uptr[1] = aligned_size;
454 const int taps_align = 16 / sizeof_weights;
455 const int pixels_align = 4;
456 const int filter_size =
filter->filter_size;
457 const size_t aligned_size =
FFALIGN(filter_size, taps_align);
483 for (
int x = 0; x <
filter->dst_size; x++) {
484 for (
int j = 0; j < filter_size; j++) {
485 const int xb = x & ~(pixels_align - 1);
486 const int jb = j & ~(taps_align - 1);
487 const int xi = x - xb, ji = j - jb;
488 const int w =
filter->weights[x * filter_size + j];
489 const int idx = xb * aligned_size + jb * pixels_align +
xi * taps_align + ji;
500 out->priv.uptr[1] = aligned_size * sizeof_weights;
505 #define DECL_FILTER(EXT, TYPE, DIR, NAME, ELEMS, ...) \
506 DECL_ASM(TYPE, NAME##ELEMS##_##TYPE##EXT, \
509 .rw.filter = SWS_OP_FILTER_##DIR, \
513 #define DECL_FILTERS(EXT, TYPE, DIR, NAME, ...) \
514 DECL_FILTER(EXT, TYPE, DIR, NAME, 1, __VA_ARGS__) \
515 DECL_FILTER(EXT, TYPE, DIR, NAME, 2, __VA_ARGS__) \
516 DECL_FILTER(EXT, TYPE, DIR, NAME, 3, __VA_ARGS__) \
517 DECL_FILTER(EXT, TYPE, DIR, NAME, 4, __VA_ARGS__)
519 #define DECL_FILTERS_GENERIC(EXT, TYPE) \
520 DECL_FILTERS(EXT, TYPE, V, filter_v, .setup = setup_filter_v) \
521 DECL_FILTERS(EXT, TYPE, V, filter_fma_v, .setup = setup_filter_v, \
522 .check = check_filter_fma) \
523 DECL_FILTERS(EXT, TYPE, H, filter_h, .setup = setup_filter_h) \
524 DECL_FILTERS(EXT, TYPE, H, filter_4x4_h, .setup = setup_filter_4x4_h, \
525 .check = check_filter_4x4_h)
527 #define REF_FILTERS(NAME, SUFFIX) \
528 &op_##NAME##1##SUFFIX, \
529 &op_##NAME##2##SUFFIX, \
530 &op_##NAME##3##SUFFIX, \
531 &op_##NAME##4##SUFFIX
533 #define DECL_FUNCS_8(SIZE, EXT, FLAG) \
534 DECL_RW(EXT, U8, read_planar, READ, 1, false, 0) \
535 DECL_RW(EXT, U8, read_planar, READ, 2, false, 0) \
536 DECL_RW(EXT, U8, read_planar, READ, 3, false, 0) \
537 DECL_RW(EXT, U8, read_planar, READ, 4, false, 0) \
538 DECL_RW(EXT, U8, write_planar, WRITE, 1, false, 0) \
539 DECL_RW(EXT, U8, write_planar, WRITE, 2, false, 0) \
540 DECL_RW(EXT, U8, write_planar, WRITE, 3, false, 0) \
541 DECL_RW(EXT, U8, write_planar, WRITE, 4, false, 0) \
542 DECL_RW(EXT, U8, read_nibbles, READ, 1, false, 1) \
543 DECL_RW(EXT, U8, read_bits, READ, 1, false, 3) \
544 DECL_RW(EXT, U8, write_bits, WRITE, 1, false, 3) \
545 DECL_EXPAND_BITS(EXT, 8) \
546 DECL_PACKED_RW(EXT, 8) \
547 DECL_PACK_UNPACK(EXT, U8, 1, 2, 1, 0) \
548 DECL_PACK_UNPACK(EXT, U8, 3, 3, 2, 0) \
549 DECL_PACK_UNPACK(EXT, U8, 2, 3, 3, 0) \
550 void ff_p1000_shuffle##EXT(void); \
551 void ff_p1001_shuffle##EXT(void); \
552 void ff_p1110_shuffle##EXT(void); \
553 void ff_p1111_shuffle##EXT(void); \
554 DECL_SWIZZLE(EXT, 3, 0, 1, 2) \
555 DECL_SWIZZLE(EXT, 3, 0, 2, 1) \
556 DECL_SWIZZLE(EXT, 2, 1, 0, 3) \
557 DECL_SWIZZLE(EXT, 3, 2, 1, 0) \
558 DECL_SWIZZLE(EXT, 3, 1, 0, 2) \
559 DECL_SWIZZLE(EXT, 3, 2, 0, 1) \
560 DECL_SWIZZLE(EXT, 1, 2, 0, 3) \
561 DECL_SWIZZLE(EXT, 1, 0, 2, 3) \
562 DECL_SWIZZLE(EXT, 2, 0, 1, 3) \
563 DECL_SWIZZLE(EXT, 2, 3, 1, 0) \
564 DECL_SWIZZLE(EXT, 2, 1, 3, 0) \
565 DECL_SWIZZLE(EXT, 1, 2, 3, 0) \
566 DECL_SWIZZLE(EXT, 1, 3, 2, 0) \
567 DECL_SWIZZLE(EXT, 0, 2, 1, 3) \
568 DECL_SWIZZLE(EXT, 0, 2, 3, 1) \
569 DECL_SWIZZLE(EXT, 0, 3, 1, 2) \
570 DECL_SWIZZLE(EXT, 3, 1, 2, 0) \
571 DECL_SWIZZLE(EXT, 0, 3, 2, 1) \
572 DECL_SWIZZLE(EXT, 0, 0, 0, 3) \
573 DECL_SWIZZLE(EXT, 3, 0, 0, 0) \
574 DECL_SWIZZLE(EXT, 0, 0, 0, 1) \
575 DECL_SWIZZLE(EXT, 1, 0, 0, 0) \
576 DECL_CLEAR_ALPHA(EXT, 0) \
577 DECL_CLEAR_ALPHA(EXT, 1) \
578 DECL_CLEAR_ALPHA(EXT, 3) \
579 DECL_CLEAR_ZERO(EXT, 0) \
580 DECL_CLEAR_ZERO(EXT, 1) \
581 DECL_CLEAR_ZERO(EXT, 3) \
582 DECL_CLEAR(EXT, 1, 1, 1, 0) \
583 DECL_CLEAR(EXT, 0, 1, 1, 1) \
584 DECL_CLEAR(EXT, 0, 0, 1, 1) \
585 DECL_CLEAR(EXT, 1, 0, 0, 1) \
586 DECL_CLEAR(EXT, 1, 1, 0, 0) \
587 DECL_CLEAR(EXT, 0, 1, 0, 1) \
588 DECL_CLEAR(EXT, 1, 0, 1, 0) \
589 DECL_CLEAR(EXT, 1, 0, 0, 0) \
590 DECL_CLEAR(EXT, 0, 1, 0, 0) \
591 DECL_CLEAR(EXT, 0, 0, 1, 0) \
593 static const SwsOpTable ops8##EXT = { \
594 .cpu_flags = AV_CPU_FLAG_##FLAG, \
595 .block_size = SIZE, \
597 &op_read_planar1##EXT, \
598 &op_read_planar2##EXT, \
599 &op_read_planar3##EXT, \
600 &op_read_planar4##EXT, \
601 &op_write_planar1##EXT, \
602 &op_write_planar2##EXT, \
603 &op_write_planar3##EXT, \
604 &op_write_planar4##EXT, \
605 &op_read8_packed2##EXT, \
606 &op_read8_packed3##EXT, \
607 &op_read8_packed4##EXT, \
608 &op_write8_packed2##EXT, \
609 &op_write8_packed3##EXT, \
610 &op_write8_packed4##EXT, \
611 &op_read_nibbles1##EXT, \
612 &op_read_bits1##EXT, \
613 &op_write_bits1##EXT, \
614 &op_expand_bits8##EXT, \
615 &op_pack_1210##EXT, \
616 &op_pack_3320##EXT, \
617 &op_pack_2330##EXT, \
618 &op_unpack_1210##EXT, \
619 &op_unpack_3320##EXT, \
620 &op_unpack_2330##EXT, \
621 &op_swizzle_3012##EXT, \
622 &op_swizzle_3021##EXT, \
623 &op_swizzle_2103##EXT, \
624 &op_swizzle_3210##EXT, \
625 &op_swizzle_3102##EXT, \
626 &op_swizzle_3201##EXT, \
627 &op_swizzle_1203##EXT, \
628 &op_swizzle_1023##EXT, \
629 &op_swizzle_2013##EXT, \
630 &op_swizzle_2310##EXT, \
631 &op_swizzle_2130##EXT, \
632 &op_swizzle_1230##EXT, \
633 &op_swizzle_1320##EXT, \
634 &op_swizzle_0213##EXT, \
635 &op_swizzle_0231##EXT, \
636 &op_swizzle_0312##EXT, \
637 &op_swizzle_3120##EXT, \
638 &op_swizzle_0321##EXT, \
639 &op_swizzle_0003##EXT, \
640 &op_swizzle_0001##EXT, \
641 &op_swizzle_3000##EXT, \
642 &op_swizzle_1000##EXT, \
643 &op_clear_alpha0##EXT, \
644 &op_clear_alpha1##EXT, \
645 &op_clear_alpha3##EXT, \
646 &op_clear_zero0##EXT, \
647 &op_clear_zero1##EXT, \
648 &op_clear_zero3##EXT, \
649 REF_PATTERN(clear##EXT, 1, 1, 1, 0), \
650 REF_PATTERN(clear##EXT, 0, 1, 1, 1), \
651 REF_PATTERN(clear##EXT, 0, 0, 1, 1), \
652 REF_PATTERN(clear##EXT, 1, 0, 0, 1), \
653 REF_PATTERN(clear##EXT, 1, 1, 0, 0), \
654 REF_PATTERN(clear##EXT, 0, 1, 0, 1), \
655 REF_PATTERN(clear##EXT, 1, 0, 1, 0), \
656 REF_PATTERN(clear##EXT, 1, 0, 0, 0), \
657 REF_PATTERN(clear##EXT, 0, 1, 0, 0), \
658 REF_PATTERN(clear##EXT, 0, 0, 1, 0), \
663 #define DECL_FUNCS_16(SIZE, EXT, FLAG) \
664 DECL_PACKED_RW(EXT, 16) \
665 DECL_EXPAND_BITS(EXT, 16) \
666 DECL_PACK_UNPACK(EXT, U16, 4, 4, 4, 0) \
667 DECL_PACK_UNPACK(EXT, U16, 5, 5, 5, 0) \
668 DECL_PACK_UNPACK(EXT, U16, 5, 6, 5, 0) \
669 DECL_SWAP_BYTES(EXT, U16, 1, 0, 0, 0) \
670 DECL_SWAP_BYTES(EXT, U16, 1, 0, 0, 1) \
671 DECL_SWAP_BYTES(EXT, U16, 1, 1, 1, 0) \
672 DECL_SWAP_BYTES(EXT, U16, 1, 1, 1, 1) \
674 DECL_CONVERT(EXT, U8, U16) \
675 DECL_CONVERT(EXT, U16, U8) \
676 DECL_EXPAND(EXT, U8, U16) \
678 static const SwsOpTable ops16##EXT = { \
679 .cpu_flags = AV_CPU_FLAG_##FLAG, \
680 .block_size = SIZE, \
682 &op_read16_packed2##EXT, \
683 &op_read16_packed3##EXT, \
684 &op_read16_packed4##EXT, \
685 &op_write16_packed2##EXT, \
686 &op_write16_packed3##EXT, \
687 &op_write16_packed4##EXT, \
688 &op_pack_4440##EXT, \
689 &op_pack_5550##EXT, \
690 &op_pack_5650##EXT, \
691 &op_unpack_4440##EXT, \
692 &op_unpack_5550##EXT, \
693 &op_unpack_5650##EXT, \
694 &op_expand_bits16##EXT, \
695 REF_COMMON_PATTERNS(swap_bytes_U16##EXT), \
696 REF_COMMON_PATTERNS(convert_U8_U16##EXT), \
697 REF_COMMON_PATTERNS(convert_U16_U8##EXT), \
698 REF_COMMON_PATTERNS(expand_U8_U16##EXT), \
699 REF_COMMON_PATTERNS(lshift16##EXT), \
700 REF_COMMON_PATTERNS(rshift16##EXT), \
705 #define DECL_FUNCS_32(SIZE, EXT, FLAG) \
706 DECL_PACKED_RW(_m2##EXT, 32) \
707 DECL_PACK_UNPACK(_m2##EXT, U32, 10, 10, 10, 2) \
708 DECL_PACK_UNPACK(_m2##EXT, U32, 2, 10, 10, 10) \
709 DECL_SWAP_BYTES(_m2##EXT, U32, 1, 0, 0, 0) \
710 DECL_SWAP_BYTES(_m2##EXT, U32, 1, 0, 0, 1) \
711 DECL_SWAP_BYTES(_m2##EXT, U32, 1, 1, 1, 0) \
712 DECL_SWAP_BYTES(_m2##EXT, U32, 1, 1, 1, 1) \
713 DECL_CONVERT(EXT, U8, U32) \
714 DECL_CONVERT(EXT, U32, U8) \
715 DECL_CONVERT(EXT, U16, U32) \
716 DECL_CONVERT(EXT, U32, U16) \
717 DECL_CONVERT(EXT, U8, F32) \
718 DECL_CONVERT(EXT, F32, U8) \
719 DECL_CONVERT(EXT, U16, F32) \
720 DECL_CONVERT(EXT, F32, U16) \
721 DECL_EXPAND(EXT, U8, U32) \
724 DECL_DITHER(DECL_COMMON_PATTERNS, EXT, 0) \
725 DECL_DITHER(DECL_ASM, EXT, 1) \
726 DECL_DITHER(DECL_ASM, EXT, 2) \
727 DECL_DITHER(DECL_ASM, EXT, 3) \
728 DECL_DITHER(DECL_ASM, EXT, 4) \
729 DECL_DITHER(DECL_ASM, EXT, 5) \
730 DECL_DITHER(DECL_ASM, EXT, 6) \
731 DECL_DITHER(DECL_ASM, EXT, 7) \
732 DECL_DITHER(DECL_ASM, EXT, 8) \
733 DECL_LINEAR(EXT, luma, SWS_MASK_LUMA) \
734 DECL_LINEAR(EXT, alpha, SWS_MASK_ALPHA) \
735 DECL_LINEAR(EXT, lumalpha, SWS_MASK_LUMA | SWS_MASK_ALPHA) \
736 DECL_LINEAR(EXT, dot3, 0x7) \
737 DECL_LINEAR(EXT, row0, SWS_MASK_ROW(0)) \
738 DECL_LINEAR(EXT, row0a, SWS_MASK_ROW(0) | SWS_MASK_ALPHA) \
739 DECL_LINEAR(EXT, diag3, SWS_MASK_DIAG3) \
740 DECL_LINEAR(EXT, diag4, SWS_MASK_DIAG4) \
741 DECL_LINEAR(EXT, diagoff3, SWS_MASK_DIAG3 | SWS_MASK_OFF3) \
742 DECL_LINEAR(EXT, matrix3, SWS_MASK_MAT3) \
743 DECL_LINEAR(EXT, affine3, SWS_MASK_MAT3 | SWS_MASK_OFF3) \
744 DECL_LINEAR(EXT, affine3a, SWS_MASK_MAT3 | SWS_MASK_OFF3 | SWS_MASK_ALPHA) \
745 DECL_LINEAR(EXT, matrix4, SWS_MASK_MAT4) \
746 DECL_LINEAR(EXT, affine4, SWS_MASK_MAT4 | SWS_MASK_OFF4) \
747 DECL_FILTERS_GENERIC(EXT, U8) \
748 DECL_FILTERS_GENERIC(EXT, U16) \
749 DECL_FILTERS_GENERIC(EXT, F32) \
751 static const SwsOpTable ops32##EXT = { \
752 .cpu_flags = AV_CPU_FLAG_##FLAG, \
753 .block_size = SIZE, \
755 &op_read32_packed2_m2##EXT, \
756 &op_read32_packed3_m2##EXT, \
757 &op_read32_packed4_m2##EXT, \
758 &op_write32_packed2_m2##EXT, \
759 &op_write32_packed3_m2##EXT, \
760 &op_write32_packed4_m2##EXT, \
761 &op_pack_1010102_m2##EXT, \
762 &op_pack_2101010_m2##EXT, \
763 &op_unpack_1010102_m2##EXT, \
764 &op_unpack_2101010_m2##EXT, \
765 REF_COMMON_PATTERNS(swap_bytes_U32_m2##EXT), \
766 REF_COMMON_PATTERNS(convert_U8_U32##EXT), \
767 REF_COMMON_PATTERNS(convert_U32_U8##EXT), \
768 REF_COMMON_PATTERNS(convert_U16_U32##EXT), \
769 REF_COMMON_PATTERNS(convert_U32_U16##EXT), \
770 REF_COMMON_PATTERNS(convert_U8_F32##EXT), \
771 REF_COMMON_PATTERNS(convert_F32_U8##EXT), \
772 REF_COMMON_PATTERNS(convert_U16_F32##EXT), \
773 REF_COMMON_PATTERNS(convert_F32_U16##EXT), \
774 REF_COMMON_PATTERNS(expand_U8_U32##EXT), \
775 REF_COMMON_PATTERNS(min##EXT), \
776 REF_COMMON_PATTERNS(max##EXT), \
777 REF_COMMON_PATTERNS(scale##EXT), \
778 REF_COMMON_PATTERNS(dither0##EXT), \
801 REF_FILTERS(filter_fma_v, _U8##EXT), \
802 REF_FILTERS(filter_fma_v, _U16##EXT), \
803 REF_FILTERS(filter_fma_v, _F32##EXT), \
804 REF_FILTERS(filter_4x4_h, _U8##EXT), \
805 REF_FILTERS(filter_4x4_h, _U16##EXT), \
806 REF_FILTERS(filter_4x4_h, _F32##EXT), \
807 REF_FILTERS(filter_v, _U8##EXT), \
808 REF_FILTERS(filter_v, _U16##EXT), \
809 REF_FILTERS(filter_v, _F32##EXT), \
810 REF_FILTERS(filter_h, _U8##EXT), \
811 REF_FILTERS(filter_h, _U16##EXT), \
812 REF_FILTERS(filter_h, _F32##EXT), \
858 return !(
op->rw.elems > 1 &&
op->rw.packed) && !
op->rw.frac && !
op->rw.filter;
883 const int num_lanes = mmsize / 16;
886 const int read_size = in_total <= 4 ? 4 :
894 .block_size = pixels * num_lanes,
895 .over_read = read_size - in_total,
896 .over_write = mmsize - out_total,
905 #define ASSIGN_SHUFFLE_FUNC(IN, OUT, EXT) \
907 SWS_DECL_FUNC(ff_packed_shuffle##IN##_##OUT##_##EXT); \
908 if (in_total == IN && out_total == OUT) \
909 out->func = ff_packed_shuffle##IN##_##OUT##_##EXT; \
937 static_assert(
sizeof(uint32_t) ==
sizeof(
int),
"int size mismatch");
946 for (
int i = 0;
i < 4;
i++) {
947 if (!
op->clear.value[
i].den)
950 case 1:
c.u32 = 0x1010101
U * res.
priv.
u8[
i];
break;
955 op->clear.value[
i].num =
c.i;
956 op->clear.value[
i].den = 1;
987 int op_block_size =
out->block_size;
998 ops,
i, op_block_size, chain);
1006 #define ASSIGN_PROCESS_FUNC(NAME) \
1008 SWS_DECL_FUNC(NAME); \
1014 const int read_planes =
read ? (
read->rw.packed ? 1 :
read->rw.elems) : 0;
1016 switch (
FFMAX(read_planes, write_planes)) {
Copyright (C) 2025 Niklas Haas.
static bool check_filter_fma(const SwsImplParams *params)
Filter the word “frame” indicates either a video frame or a group of audio as stored in an AVFrame structure Format for each input and each output the list of supported formats For video that means pixel format For audio that means channel sample they are references to shared objects When the negotiation mechanism computes the intersection of the formats supported at each end of a all references to both lists are replaced with a reference to the intersection And when a single format is eventually chosen for a link amongst the remaining all references to the list are updated That means that if a filter requires that its input and output have the same format amongst a supported all it has to do is use a reference to the same list of formats query_formats can leave some formats unset and return AVERROR(EAGAIN) to cause the negotiation mechanism toagain later. That can be used by filters with complex requirements to use the format negotiated on one link to set the formats supported on another. Frame references ownership and permissions
int ff_sws_setup_clear(const SwsImplParams *params, SwsImplResult *out)
#define ASSIGN_PROCESS_FUNC(NAME)
static av_const int get_mmsize(const int cpu_flags)
const SwsOp * ff_sws_op_list_input(const SwsOpList *ops)
Returns the input operation for a given op list, or NULL if there is none (e.g.
int ff_sws_op_list_max_size(const SwsOpList *ops)
Returns the size of the largest pixel type used in ops.
const SwsOpBackend backend_x86
static void normalize_clear(SwsOp *op)
Represents a computed filter kernel.
static void read_bytes(const uint8_t *src, float *dst, int src_stride, int dst_stride, int width, int height, float scale)
#define DECL_FUNCS_32(SIZE, EXT, FLAG)
void(* filter)(uint8_t *src, int stride, int qscale)
int av_get_cpu_flags(void)
Return the flags which specify extensions supported by the CPU.
static atomic_int cpu_flags
static int setup_linear(const SwsImplParams *params, SwsImplResult *out)
int ff_sws_pixel_type_size(SwsPixelType type)
void * av_memdup(const void *p, size_t size)
Duplicate a buffer with av_malloc().
static int setup_dither(const SwsImplParams *params, SwsImplResult *out)
bool ff_sws_pixel_type_is_int(SwsPixelType type)
#define AV_CPU_FLAG_SLOW_GATHER
CPU has slow gathers.
#define AV_CPU_FLAG_AVX512
AVX-512 functions: requires OS support even if YMM/ZMM registers aren't used.
void(* free[SWS_MAX_OPS+1])(SwsOpPriv *)
#define AV_LOG_TRACE
Extremely verbose debugging, useful for libav* development.
#define FF_ARRAY_ELEMS(a)
SwsOpChain * ff_sws_op_chain_alloc(void)
int flags
Flags modifying the (de)muxer behaviour.
static int op(uint8_t **dst, const uint8_t *dst_end, GetByteContext *gb, int pixel, int count, int *x, int width, int linesize)
Perform decode operation.
static int setup_clear(const SwsImplParams *params, SwsImplResult *out)
static AVFormatContext * ctx
#define AV_CPU_FLAG_SSE4
Penryn SSE4.1 functions.
const SwsOp * ff_sws_op_list_output(const SwsOpList *ops)
Returns the output operation for a given op list, or NULL if there is none.
SwsFilterWeights * kernel
Compiled "chain" of operations, which can be dispatched efficiently.
Rational number (pair of numerator and denominator).
static const SwsOpTable *const tables[]
static bool check_filter_4x4_h(const SwsImplParams *params)
static int setup_rw(const SwsImplParams *params, SwsImplResult *out)
static int solve_shuffle(const SwsOpList *ops, int mmsize, SwsCompiledOp *out)
static int setup_filter_4x4_h(const SwsImplParams *params, SwsImplResult *out)
Undefined Behavior In the C some operations are like signed integer dereferencing freed accessing outside allocated Undefined Behavior must not occur in a C it is not safe even if the output of undefined operations is unused The unsafety may seem nit picking but Optimizing compilers have in fact optimized code on the assumption that no undefined Behavior occurs Optimizing code based on wrong assumptions can and has in some cases lead to effects beyond the output of computations The signed integer overflow problem in speed critical code Code which is highly optimized and works with signed integers sometimes has the problem that often the output of the computation does not c
@ SWS_FILTER_SCALE
14-bit coefficients are picked to fit comfortably within int16_t for efficient SIMD processing (e....
#define AV_CPU_FLAG_AVX2
AVX2 functions: requires OS support even if YMM registers aren't used.
#define i(width, name, range_min, range_max)
static int setup_swap_bytes(const SwsImplParams *params, SwsImplResult *out)
int ff_sws_op_compile_tables(SwsContext *ctx, const SwsOpTable *const tables[], int num_tables, SwsOpList *ops, int ops_index, const int block_size, SwsOpChain *chain)
"Compile" a single op by looking it up in a list of fixed size op tables.
void ff_sws_op_chain_free_cb(void *ptr)
static int compile(SwsContext *ctx, SwsOpList *ops, SwsCompiledOp *out)
static void ff_sws_op_chain_free(SwsOpChain *chain)
static const int weights[]
static bool op_is_type_invariant(const SwsOp *op)
Returns true if the operation's implementation only depends on the block size, and not the underlying...
#define av_assert1(cond)
assert() equivalent, that does not lie in speed critical code.
static int hscale_sizeof_weight(const SwsOp *op)
static void write_bytes(const float *src, uint8_t *dst, int src_stride, int dst_stride, int width, int height, int depth, float scale)
void * av_calloc(size_t nmemb, size_t size)
static void ff_op_priv_free(SwsOpPriv *priv)
static int av_cmp_q(AVRational a, AVRational b)
Compare two rationals.
static int setup_shift(const SwsImplParams *params, SwsImplResult *out)
#define ASSIGN_SHUFFLE_FUNC(IN, OUT, EXT)
int ff_sws_solve_shuffle(const SwsOpList *ops, uint8_t shuffle[], int size, uint8_t clear_val, int *read_bytes, int *write_bytes)
"Solve" an op list into a fixed shuffle mask, with an optional ability to also directly clear the out...
AVRational av_mul_q(AVRational b, AVRational c)
Multiply two rationals.
uint8_t elems
Examples: rgba = 4x u8 packed yuv444p = 3x u8 rgb565 = 1x u16 <- use SWS_OP_UNPACK to unpack monow = ...
static void scale(int *out, const int *in, const int w, const int h, const int shift)
static int setup_filter_v(const SwsImplParams *params, SwsImplResult *out)
#define DECL_FUNCS_16(SIZE, EXT, FLAG)
#define xi(width, name, var, range_min, range_max, subs,...)
Helper struct for representing a list of operations.
#define DECL_FUNCS_8(SIZE, EXT, FLAG)
Main external API structure.
static uint64_t shuffle(uint64_t in, const uint8_t *shuffle, int shuffle_len)
static uint32_t BS_FUNC() read(BSCTX *bc, unsigned int n)
Return n bits from the buffer, n has to be in the 0-32 range.
static int setup_filter_h(const SwsImplParams *params, SwsImplResult *out)