FFmpeg
output_lsx.c
Go to the documentation of this file.
1 /*
2  * Copyright (C) 2023 Loongson Technology Corporation Limited
3  * Contributed by Lu Wang <wanglu@loongson.cn>
4  *
5  * This file is part of FFmpeg.
6  *
7  * FFmpeg is free software; you can redistribute it and/or
8  * modify it under the terms of the GNU Lesser General Public
9  * License as published by the Free Software Foundation; either
10  * version 2.1 of the License, or (at your option) any later version.
11  *
12  * FFmpeg is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15  * Lesser General Public License for more details.
16  *
17  * You should have received a copy of the GNU Lesser General Public
18  * License along with FFmpeg; if not, write to the Free Software
19  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20  */
21 
22 #include "swscale_loongarch.h"
24 
25 
26 /*Copy from libswscale/output.c*/
27 static av_always_inline void
28 yuv2rgb_write(uint8_t *_dest, int i, int Y1, int Y2,
29  unsigned A1, unsigned A2,
30  const void *_r, const void *_g, const void *_b, int y,
31  enum AVPixelFormat target, int hasAlpha)
32 {
33  if (target == AV_PIX_FMT_ARGB || target == AV_PIX_FMT_RGBA ||
34  target == AV_PIX_FMT_ABGR || target == AV_PIX_FMT_BGRA) {
35  uint32_t *dest = (uint32_t *) _dest;
36  const uint32_t *r = (const uint32_t *) _r;
37  const uint32_t *g = (const uint32_t *) _g;
38  const uint32_t *b = (const uint32_t *) _b;
39 
40 #if CONFIG_SMALL
41  dest[i * 2 + 0] = r[Y1] + g[Y1] + b[Y1];
42  dest[i * 2 + 1] = r[Y2] + g[Y2] + b[Y2];
43 #else
44 #if defined(ASSERT_LEVEL) && ASSERT_LEVEL > 1
45  int sh = (target == AV_PIX_FMT_RGB32_1 ||
46  target == AV_PIX_FMT_BGR32_1) ? 0 : 24;
47  av_assert2((((r[Y1] + g[Y1] + b[Y1]) >> sh) & 0xFF) == 0xFF);
48 #endif
49  dest[i * 2 + 0] = r[Y1] + g[Y1] + b[Y1];
50  dest[i * 2 + 1] = r[Y2] + g[Y2] + b[Y2];
51 #endif
52  } else if (target == AV_PIX_FMT_RGB24 || target == AV_PIX_FMT_BGR24) {
53  uint8_t *dest = (uint8_t *) _dest;
54  const uint8_t *r = (const uint8_t *) _r;
55  const uint8_t *g = (const uint8_t *) _g;
56  const uint8_t *b = (const uint8_t *) _b;
57 
58 #define r_b ((target == AV_PIX_FMT_RGB24) ? r : b)
59 #define b_r ((target == AV_PIX_FMT_RGB24) ? b : r)
60 
61  dest[i * 6 + 0] = r_b[Y1];
62  dest[i * 6 + 1] = g[Y1];
63  dest[i * 6 + 2] = b_r[Y1];
64  dest[i * 6 + 3] = r_b[Y2];
65  dest[i * 6 + 4] = g[Y2];
66  dest[i * 6 + 5] = b_r[Y2];
67 #undef r_b
68 #undef b_r
69  } else if (target == AV_PIX_FMT_RGB565 || target == AV_PIX_FMT_BGR565 ||
70  target == AV_PIX_FMT_RGB555 || target == AV_PIX_FMT_BGR555 ||
71  target == AV_PIX_FMT_RGB444 || target == AV_PIX_FMT_BGR444) {
72  uint16_t *dest = (uint16_t *) _dest;
73  const uint16_t *r = (const uint16_t *) _r;
74  const uint16_t *g = (const uint16_t *) _g;
75  const uint16_t *b = (const uint16_t *) _b;
76  int dr1, dg1, db1, dr2, dg2, db2;
77 
78  if (target == AV_PIX_FMT_RGB565 || target == AV_PIX_FMT_BGR565) {
79  dr1 = ff_dither_2x2_8[ y & 1 ][0];
80  dg1 = ff_dither_2x2_4[ y & 1 ][0];
81  db1 = ff_dither_2x2_8[(y & 1) ^ 1][0];
82  dr2 = ff_dither_2x2_8[ y & 1 ][1];
83  dg2 = ff_dither_2x2_4[ y & 1 ][1];
84  db2 = ff_dither_2x2_8[(y & 1) ^ 1][1];
85  } else if (target == AV_PIX_FMT_RGB555 || target == AV_PIX_FMT_BGR555) {
86  dr1 = ff_dither_2x2_8[ y & 1 ][0];
87  dg1 = ff_dither_2x2_8[ y & 1 ][1];
88  db1 = ff_dither_2x2_8[(y & 1) ^ 1][0];
89  dr2 = ff_dither_2x2_8[ y & 1 ][1];
90  dg2 = ff_dither_2x2_8[ y & 1 ][0];
91  db2 = ff_dither_2x2_8[(y & 1) ^ 1][1];
92  } else {
93  dr1 = ff_dither_4x4_16[ y & 3 ][0];
94  dg1 = ff_dither_4x4_16[ y & 3 ][1];
95  db1 = ff_dither_4x4_16[(y & 3) ^ 3][0];
96  dr2 = ff_dither_4x4_16[ y & 3 ][1];
97  dg2 = ff_dither_4x4_16[ y & 3 ][0];
98  db2 = ff_dither_4x4_16[(y & 3) ^ 3][1];
99  }
100 
101  dest[i * 2 + 0] = r[Y1 + dr1] + g[Y1 + dg1] + b[Y1 + db1];
102  dest[i * 2 + 1] = r[Y2 + dr2] + g[Y2 + dg2] + b[Y2 + db2];
103  } else { /* 8/4 bits */
104  uint8_t *dest = (uint8_t *) _dest;
105  const uint8_t *r = (const uint8_t *) _r;
106  const uint8_t *g = (const uint8_t *) _g;
107  const uint8_t *b = (const uint8_t *) _b;
108  int dr1, dg1, db1, dr2, dg2, db2;
109 
110  if (target == AV_PIX_FMT_RGB8 || target == AV_PIX_FMT_BGR8) {
111  const uint8_t * const d64 = ff_dither_8x8_73[y & 7];
112  const uint8_t * const d32 = ff_dither_8x8_32[y & 7];
113  dr1 = dg1 = d32[(i * 2 + 0) & 7];
114  db1 = d64[(i * 2 + 0) & 7];
115  dr2 = dg2 = d32[(i * 2 + 1) & 7];
116  db2 = d64[(i * 2 + 1) & 7];
117  } else {
118  const uint8_t * const d64 = ff_dither_8x8_73 [y & 7];
119  const uint8_t * const d128 = ff_dither_8x8_220[y & 7];
120  dr1 = db1 = d128[(i * 2 + 0) & 7];
121  dg1 = d64[(i * 2 + 0) & 7];
122  dr2 = db2 = d128[(i * 2 + 1) & 7];
123  dg2 = d64[(i * 2 + 1) & 7];
124  }
125 
126  if (target == AV_PIX_FMT_RGB4 || target == AV_PIX_FMT_BGR4) {
127  dest[i] = r[Y1 + dr1] + g[Y1 + dg1] + b[Y1 + db1] +
128  ((r[Y2 + dr2] + g[Y2 + dg2] + b[Y2 + db2]) << 4);
129  } else {
130  dest[i * 2 + 0] = r[Y1 + dr1] + g[Y1 + dg1] + b[Y1 + db1];
131  dest[i * 2 + 1] = r[Y2 + dr2] + g[Y2 + dg2] + b[Y2 + db2];
132  }
133  }
134 }
135 
136 #define WRITE_YUV2RGB_LSX(vec_y1, vec_y2, vec_u, vec_v, t1, t2, t3, t4) \
137 { \
138  Y1 = __lsx_vpickve2gr_w(vec_y1, t1); \
139  Y2 = __lsx_vpickve2gr_w(vec_y2, t2); \
140  U = __lsx_vpickve2gr_w(vec_u, t3); \
141  V = __lsx_vpickve2gr_w(vec_v, t4); \
142  r = c->table_rV[V]; \
143  g = (c->table_gU[U] + c->table_gV[V]); \
144  b = c->table_bU[U]; \
145  yuv2rgb_write(dest, count, Y1, Y2, 0, 0, \
146  r, g, b, y, target, 0); \
147  count++; \
148 }
149 
150 static void
151 yuv2rgb_X_template_lsx(SwsInternal *c, const int16_t *lumFilter,
152  const int16_t **lumSrc, int lumFilterSize,
153  const int16_t *chrFilter, const int16_t **chrUSrc,
154  const int16_t **chrVSrc, int chrFilterSize,
155  const int16_t **alpSrc, uint8_t *dest, int dstW,
156  int y, enum AVPixelFormat target, int hasAlpha)
157 {
158  int i, j;
159  int count = 0;
160  int t = 1 << 18;
161  int len = dstW >> 5;
162  int res = dstW & 31;
163  int len_count = (dstW + 1) >> 1;
164  const void *r, *g, *b;
165  int head = YUVRGB_TABLE_HEADROOM;
166  __m128i headroom = __lsx_vreplgr2vr_w(head);
167 
168  for (i = 0; i < len; i++) {
169  int Y1, Y2, U, V, count_lum = count << 1;
170  __m128i l_src1, l_src2, l_src3, l_src4, u_src1, u_src2, v_src1, v_src2;
171  __m128i yl_ev, yl_ev1, yl_ev2, yl_od1, yl_od2, yh_ev1, yh_ev2, yh_od1, yh_od2;
172  __m128i u_ev1, u_ev2, u_od1, u_od2, v_ev1, v_ev2, v_od1, v_od2, temp;
173 
174  yl_ev = __lsx_vldrepl_w(&t, 0);
175  yl_ev1 = yl_ev;
176  yl_od1 = yl_ev;
177  yh_ev1 = yl_ev;
178  yh_od1 = yl_ev;
179  u_ev1 = yl_ev;
180  v_ev1 = yl_ev;
181  u_od1 = yl_ev;
182  v_od1 = yl_ev;
183  yl_ev2 = yl_ev;
184  yl_od2 = yl_ev;
185  yh_ev2 = yl_ev;
186  yh_od2 = yl_ev;
187  u_ev2 = yl_ev;
188  v_ev2 = yl_ev;
189  u_od2 = yl_ev;
190  v_od2 = yl_ev;
191 
192  for (j = 0; j < lumFilterSize; j++) {
193  temp = __lsx_vldrepl_h((lumFilter + j), 0);
194  DUP2_ARG2(__lsx_vld, lumSrc[j] + count_lum, 0, lumSrc[j] + count_lum,
195  16, l_src1, l_src2);
196  DUP2_ARG2(__lsx_vld, lumSrc[j] + count_lum, 32, lumSrc[j] + count_lum,
197  48, l_src3, l_src4);
198  yl_ev1 = __lsx_vmaddwev_w_h(yl_ev1, temp, l_src1);
199  yl_od1 = __lsx_vmaddwod_w_h(yl_od1, temp, l_src1);
200  yh_ev1 = __lsx_vmaddwev_w_h(yh_ev1, temp, l_src3);
201  yh_od1 = __lsx_vmaddwod_w_h(yh_od1, temp, l_src3);
202  yl_ev2 = __lsx_vmaddwev_w_h(yl_ev2, temp, l_src2);
203  yl_od2 = __lsx_vmaddwod_w_h(yl_od2, temp, l_src2);
204  yh_ev2 = __lsx_vmaddwev_w_h(yh_ev2, temp, l_src4);
205  yh_od2 = __lsx_vmaddwod_w_h(yh_od2, temp, l_src4);
206  }
207  for (j = 0; j < chrFilterSize; j++) {
208  DUP2_ARG2(__lsx_vld, chrUSrc[j] + count, 0, chrVSrc[j] + count, 0,
209  u_src1, v_src1);
210  DUP2_ARG2(__lsx_vld, chrUSrc[j] + count, 16, chrVSrc[j] + count, 16,
211  u_src2, v_src2);
212  temp = __lsx_vldrepl_h((chrFilter + j), 0);
213  u_ev1 = __lsx_vmaddwev_w_h(u_ev1, temp, u_src1);
214  u_od1 = __lsx_vmaddwod_w_h(u_od1, temp, u_src1);
215  v_ev1 = __lsx_vmaddwev_w_h(v_ev1, temp, v_src1);
216  v_od1 = __lsx_vmaddwod_w_h(v_od1, temp, v_src1);
217  u_ev2 = __lsx_vmaddwev_w_h(u_ev2, temp, u_src2);
218  u_od2 = __lsx_vmaddwod_w_h(u_od2, temp, u_src2);
219  v_ev2 = __lsx_vmaddwev_w_h(v_ev2, temp, v_src2);
220  v_od2 = __lsx_vmaddwod_w_h(v_od2, temp, v_src2);
221  }
222  yl_ev1 = __lsx_vsrai_w(yl_ev1, 19);
223  yh_ev1 = __lsx_vsrai_w(yh_ev1, 19);
224  yl_od1 = __lsx_vsrai_w(yl_od1, 19);
225  yh_od1 = __lsx_vsrai_w(yh_od1, 19);
226  u_ev1 = __lsx_vsrai_w(u_ev1, 19);
227  v_ev1 = __lsx_vsrai_w(v_ev1, 19);
228  u_od1 = __lsx_vsrai_w(u_od1, 19);
229  v_od1 = __lsx_vsrai_w(v_od1, 19);
230  yl_ev2 = __lsx_vsrai_w(yl_ev2, 19);
231  yh_ev2 = __lsx_vsrai_w(yh_ev2, 19);
232  yl_od2 = __lsx_vsrai_w(yl_od2, 19);
233  yh_od2 = __lsx_vsrai_w(yh_od2, 19);
234  u_ev2 = __lsx_vsrai_w(u_ev2, 19);
235  v_ev2 = __lsx_vsrai_w(v_ev2, 19);
236  u_od2 = __lsx_vsrai_w(u_od2, 19);
237  v_od2 = __lsx_vsrai_w(v_od2, 19);
238  u_ev1 = __lsx_vadd_w(u_ev1, headroom);
239  v_ev1 = __lsx_vadd_w(v_ev1, headroom);
240  u_od1 = __lsx_vadd_w(u_od1, headroom);
241  v_od1 = __lsx_vadd_w(v_od1, headroom);
242  u_ev2 = __lsx_vadd_w(u_ev2, headroom);
243  v_ev2 = __lsx_vadd_w(v_ev2, headroom);
244  u_od2 = __lsx_vadd_w(u_od2, headroom);
245  v_od2 = __lsx_vadd_w(v_od2, headroom);
246 
247  WRITE_YUV2RGB_LSX(yl_ev1, yl_od1, u_ev1, v_ev1, 0, 0, 0, 0);
248  WRITE_YUV2RGB_LSX(yl_ev1, yl_od1, u_od1, v_od1, 1, 1, 0, 0);
249  WRITE_YUV2RGB_LSX(yl_ev1, yl_od1, u_ev1, v_ev1, 2, 2, 1, 1);
250  WRITE_YUV2RGB_LSX(yl_ev1, yl_od1, u_od1, v_od1, 3, 3, 1, 1);
251  WRITE_YUV2RGB_LSX(yl_ev2, yl_od2, u_ev1, v_ev1, 0, 0, 2, 2);
252  WRITE_YUV2RGB_LSX(yl_ev2, yl_od2, u_od1, v_od1, 1, 1, 2, 2);
253  WRITE_YUV2RGB_LSX(yl_ev2, yl_od2, u_ev1, v_ev1, 2, 2, 3, 3);
254  WRITE_YUV2RGB_LSX(yl_ev2, yl_od2, u_od1, v_od1, 3, 3, 3, 3);
255  WRITE_YUV2RGB_LSX(yh_ev1, yh_od1, u_ev2, v_ev2, 0, 0, 0, 0);
256  WRITE_YUV2RGB_LSX(yh_ev1, yh_od1, u_od2, v_od2, 1, 1, 0, 0);
257  WRITE_YUV2RGB_LSX(yh_ev1, yh_od1, u_ev2, v_ev2, 2, 2, 1, 1);
258  WRITE_YUV2RGB_LSX(yh_ev1, yh_od1, u_od2, v_od2, 3, 3, 1, 1);
259  WRITE_YUV2RGB_LSX(yh_ev2, yh_od2, u_ev2, v_ev2, 0, 0, 2, 2);
260  WRITE_YUV2RGB_LSX(yh_ev2, yh_od2, u_od2, v_od2, 1, 1, 2, 2);
261  WRITE_YUV2RGB_LSX(yh_ev2, yh_od2, u_ev2, v_ev2, 2, 2, 3, 3);
262  WRITE_YUV2RGB_LSX(yh_ev2, yh_od2, u_od2, v_od2, 3, 3, 3, 3);
263  }
264 
265  if (res >= 16) {
266  int Y1, Y2, U, V, count_lum = count << 1;
267  __m128i l_src1, l_src2, u_src1, v_src1;
268  __m128i yl_ev, yl_ev1, yl_ev2, yl_od1, yl_od2;
269  __m128i u_ev1, u_od1, v_ev1, v_od1, temp;
270 
271  yl_ev = __lsx_vldrepl_w(&t, 0);
272  yl_ev1 = yl_ev;
273  yl_od1 = yl_ev;
274  u_ev1 = yl_ev;
275  v_ev1 = yl_ev;
276  u_od1 = yl_ev;
277  v_od1 = yl_ev;
278  yl_ev2 = yl_ev;
279  yl_od2 = yl_ev;
280 
281  for (j = 0; j < lumFilterSize; j++) {
282  temp = __lsx_vldrepl_h((lumFilter + j), 0);
283  DUP2_ARG2(__lsx_vld, lumSrc[j] + count_lum, 0, lumSrc[j] + count_lum,
284  16, l_src1, l_src2);
285  yl_ev1 = __lsx_vmaddwev_w_h(yl_ev1, temp, l_src1);
286  yl_od1 = __lsx_vmaddwod_w_h(yl_od1, temp, l_src1);
287  yl_ev2 = __lsx_vmaddwev_w_h(yl_ev2, temp, l_src2);
288  yl_od2 = __lsx_vmaddwod_w_h(yl_od2, temp, l_src2);
289  }
290  for (j = 0; j < chrFilterSize; j++) {
291  DUP2_ARG2(__lsx_vld, chrUSrc[j] + count, 0, chrVSrc[j] + count, 0,
292  u_src1, v_src1);
293  temp = __lsx_vldrepl_h((chrFilter + j), 0);
294  u_ev1 = __lsx_vmaddwev_w_h(u_ev1, temp, u_src1);
295  u_od1 = __lsx_vmaddwod_w_h(u_od1, temp, u_src1);
296  v_ev1 = __lsx_vmaddwev_w_h(v_ev1, temp, v_src1);
297  v_od1 = __lsx_vmaddwod_w_h(v_od1, temp, v_src1);
298  }
299  yl_ev1 = __lsx_vsrai_w(yl_ev1, 19);
300  yl_od1 = __lsx_vsrai_w(yl_od1, 19);
301  u_ev1 = __lsx_vsrai_w(u_ev1, 19);
302  v_ev1 = __lsx_vsrai_w(v_ev1, 19);
303  u_od1 = __lsx_vsrai_w(u_od1, 19);
304  v_od1 = __lsx_vsrai_w(v_od1, 19);
305  yl_ev2 = __lsx_vsrai_w(yl_ev2, 19);
306  yl_od2 = __lsx_vsrai_w(yl_od2, 19);
307  u_ev1 = __lsx_vadd_w(u_ev1, headroom);
308  v_ev1 = __lsx_vadd_w(v_ev1, headroom);
309  u_od1 = __lsx_vadd_w(u_od1, headroom);
310  v_od1 = __lsx_vadd_w(v_od1, headroom);
311 
312  WRITE_YUV2RGB_LSX(yl_ev1, yl_od1, u_ev1, v_ev1, 0, 0, 0, 0);
313  WRITE_YUV2RGB_LSX(yl_ev1, yl_od1, u_od1, v_od1, 1, 1, 0, 0);
314  WRITE_YUV2RGB_LSX(yl_ev1, yl_od1, u_ev1, v_ev1, 2, 2, 1, 1);
315  WRITE_YUV2RGB_LSX(yl_ev1, yl_od1, u_od1, v_od1, 3, 3, 1, 1);
316  WRITE_YUV2RGB_LSX(yl_ev2, yl_od2, u_ev1, v_ev1, 0, 0, 2, 2);
317  WRITE_YUV2RGB_LSX(yl_ev2, yl_od2, u_od1, v_od1, 1, 1, 2, 2);
318  WRITE_YUV2RGB_LSX(yl_ev2, yl_od2, u_ev1, v_ev1, 2, 2, 3, 3);
319  WRITE_YUV2RGB_LSX(yl_ev2, yl_od2, u_od1, v_od1, 3, 3, 3, 3);
320  res -= 16;
321  }
322 
323  if (res >= 8) {
324  int Y1, Y2, U, V, count_lum = count << 1;
325  __m128i l_src1, u_src, v_src;
326  __m128i yl_ev, yl_od;
327  __m128i u_ev, u_od, v_ev, v_od, temp;
328 
329  yl_ev = __lsx_vldrepl_w(&t, 0);
330  yl_od = yl_ev;
331  u_ev = yl_ev;
332  v_ev = yl_ev;
333  u_od = yl_ev;
334  v_od = yl_ev;
335  for (j = 0; j < lumFilterSize; j++) {
336  temp = __lsx_vldrepl_h((lumFilter + j), 0);
337  l_src1 = __lsx_vld(lumSrc[j] + count_lum, 0);
338  yl_ev = __lsx_vmaddwev_w_h(yl_ev, temp, l_src1);
339  yl_od = __lsx_vmaddwod_w_h(yl_od, temp, l_src1);
340  }
341  for (j = 0; j < chrFilterSize; j++) {
342  DUP2_ARG2(__lsx_vld, chrUSrc[j] + count, 0, chrVSrc[j] + count, 0,
343  u_src, v_src);
344  temp = __lsx_vldrepl_h((chrFilter + j), 0);
345  u_ev = __lsx_vmaddwev_w_h(u_ev, temp, u_src);
346  u_od = __lsx_vmaddwod_w_h(u_od, temp, u_src);
347  v_ev = __lsx_vmaddwev_w_h(v_ev, temp, v_src);
348  v_od = __lsx_vmaddwod_w_h(v_od, temp, v_src);
349  }
350  yl_ev = __lsx_vsrai_w(yl_ev, 19);
351  yl_od = __lsx_vsrai_w(yl_od, 19);
352  u_ev = __lsx_vsrai_w(u_ev, 19);
353  v_ev = __lsx_vsrai_w(v_ev, 19);
354  u_od = __lsx_vsrai_w(u_od, 19);
355  v_od = __lsx_vsrai_w(v_od, 19);
356  u_ev = __lsx_vadd_w(u_ev, headroom);
357  v_ev = __lsx_vadd_w(v_ev, headroom);
358  u_od = __lsx_vadd_w(u_od, headroom);
359  v_od = __lsx_vadd_w(v_od, headroom);
360  WRITE_YUV2RGB_LSX(yl_ev, yl_od, u_ev, v_ev, 0, 0, 0, 0);
361  WRITE_YUV2RGB_LSX(yl_ev, yl_od, u_od, v_od, 1, 1, 0, 0);
362  WRITE_YUV2RGB_LSX(yl_ev, yl_od, u_ev, v_ev, 2, 2, 1, 1);
363  WRITE_YUV2RGB_LSX(yl_ev, yl_od, u_od, v_od, 3, 3, 1, 1);
364  res -= 8;
365  }
366 
367  if (res >= 4) {
368  int Y1, Y2, U, V, count_lum = count << 1;
369  __m128i l_src1, u_src, v_src;
370  __m128i yl_ev, yl_od;
371  __m128i u_ev, u_od, v_ev, v_od, temp;
372 
373  yl_ev = __lsx_vldrepl_w(&t, 0);
374  yl_od = yl_ev;
375  u_ev = yl_ev;
376  v_ev = yl_ev;
377  u_od = yl_ev;
378  v_od = yl_ev;
379  for (j = 0; j < lumFilterSize; j++) {
380  temp = __lsx_vldrepl_h((lumFilter + j), 0);
381  l_src1 = __lsx_vld(lumSrc[j] + count_lum, 0);
382  yl_ev = __lsx_vmaddwev_w_h(yl_ev, temp, l_src1);
383  yl_od = __lsx_vmaddwod_w_h(yl_od, temp, l_src1);
384  }
385  for (j = 0; j < chrFilterSize; j++) {
386  DUP2_ARG2(__lsx_vld, chrUSrc[j] + count, 0, chrVSrc[j] + count, 0,
387  u_src, v_src);
388  temp = __lsx_vldrepl_h((chrFilter + j), 0);
389  u_ev = __lsx_vmaddwev_w_h(u_ev, temp, u_src);
390  u_od = __lsx_vmaddwod_w_h(u_od, temp, u_src);
391  v_ev = __lsx_vmaddwev_w_h(v_ev, temp, v_src);
392  v_od = __lsx_vmaddwod_w_h(v_od, temp, v_src);
393  }
394  yl_ev = __lsx_vsrai_w(yl_ev, 19);
395  yl_od = __lsx_vsrai_w(yl_od, 19);
396  u_ev = __lsx_vsrai_w(u_ev, 19);
397  v_ev = __lsx_vsrai_w(v_ev, 19);
398  u_od = __lsx_vsrai_w(u_od, 19);
399  v_od = __lsx_vsrai_w(v_od, 19);
400  u_ev = __lsx_vadd_w(u_ev, headroom);
401  v_ev = __lsx_vadd_w(v_ev, headroom);
402  u_od = __lsx_vadd_w(u_od, headroom);
403  v_od = __lsx_vadd_w(v_od, headroom);
404  WRITE_YUV2RGB_LSX(yl_ev, yl_od, u_ev, v_ev, 0, 0, 0, 0);
405  WRITE_YUV2RGB_LSX(yl_ev, yl_od, u_od, v_od, 1, 1, 0, 0);
406  res -= 4;
407  }
408 
409  if (res >= 2) {
410  int Y1, Y2, U, V, count_lum = count << 1;
411  __m128i l_src1, u_src, v_src;
412  __m128i yl_ev, yl_od;
413  __m128i u_ev, u_od, v_ev, v_od, temp;
414 
415  yl_ev = __lsx_vldrepl_w(&t, 0);
416  yl_od = yl_ev;
417  u_ev = yl_ev;
418  v_ev = yl_ev;
419  u_od = yl_ev;
420  v_od = yl_ev;
421  for (j = 0; j < lumFilterSize; j++) {
422  temp = __lsx_vldrepl_h((lumFilter + j), 0);
423  l_src1 = __lsx_vld(lumSrc[j] + count_lum, 0);
424  yl_ev = __lsx_vmaddwev_w_h(yl_ev, temp, l_src1);
425  yl_od = __lsx_vmaddwod_w_h(yl_od, temp, l_src1);
426  }
427  for (j = 0; j < chrFilterSize; j++) {
428  DUP2_ARG2(__lsx_vld, chrUSrc[j] + count, 0, chrVSrc[j] + count, 0,
429  u_src, v_src);
430  temp = __lsx_vldrepl_h((chrFilter + j), 0);
431  u_ev = __lsx_vmaddwev_w_h(u_ev, temp, u_src);
432  u_od = __lsx_vmaddwod_w_h(u_od, temp, u_src);
433  v_ev = __lsx_vmaddwev_w_h(v_ev, temp, v_src);
434  v_od = __lsx_vmaddwod_w_h(v_od, temp, v_src);
435  }
436  yl_ev = __lsx_vsrai_w(yl_ev, 19);
437  yl_od = __lsx_vsrai_w(yl_od, 19);
438  u_ev = __lsx_vsrai_w(u_ev, 19);
439  v_ev = __lsx_vsrai_w(v_ev, 19);
440  u_od = __lsx_vsrai_w(u_od, 19);
441  v_od = __lsx_vsrai_w(v_od, 19);
442  u_ev = __lsx_vadd_w(u_ev, headroom);
443  v_ev = __lsx_vadd_w(v_ev, headroom);
444  u_od = __lsx_vadd_w(u_od, headroom);
445  v_od = __lsx_vadd_w(v_od, headroom);
446  WRITE_YUV2RGB_LSX(yl_ev, yl_od, u_ev, v_ev, 0, 0, 0, 0);
447  res -= 2;
448  }
449 
450  for (; count < len_count; count++) {
451  int Y1 = 1 << 18;
452  int Y2 = Y1;
453  int U = Y1;
454  int V = Y1;
455 
456  for (j = 0; j < lumFilterSize; j++) {
457  Y1 += lumSrc[j][count * 2] * lumFilter[j];
458  Y2 += lumSrc[j][count * 2 + 1] * lumFilter[j];
459  }
460  for (j = 0; j < chrFilterSize; j++) {
461  U += chrUSrc[j][count] * chrFilter[j];
462  V += chrVSrc[j][count] * chrFilter[j];
463  }
464  Y1 >>= 19;
465  Y2 >>= 19;
466  U >>= 19;
467  V >>= 19;
468  r = c->table_rV[V + YUVRGB_TABLE_HEADROOM];
469  g = (c->table_gU[U + YUVRGB_TABLE_HEADROOM] +
470  c->table_gV[V + YUVRGB_TABLE_HEADROOM]);
471  b = c->table_bU[U + YUVRGB_TABLE_HEADROOM];
472 
473  yuv2rgb_write(dest, count, Y1, Y2, 0, 0,
474  r, g, b, y, target, 0);
475  }
476 }
477 
478 static void
479 yuv2rgb_2_template_lsx(SwsInternal *c, const int16_t *buf[2],
480  const int16_t *ubuf[2], const int16_t *vbuf[2],
481  const int16_t *abuf[2], uint8_t *dest, int dstW,
482  int yalpha, int uvalpha, int y,
483  enum AVPixelFormat target, int hasAlpha)
484 {
485  const int16_t *buf0 = buf[0], *buf1 = buf[1],
486  *ubuf0 = ubuf[0], *ubuf1 = ubuf[1],
487  *vbuf0 = vbuf[0], *vbuf1 = vbuf[1];
488  int yalpha1 = 4096 - yalpha;
489  int uvalpha1 = 4096 - uvalpha;
490  int i, count = 0;
491  int len = dstW - 7;
492  int len_count = (dstW + 1) >> 1;
493  const void *r, *g, *b;
494  int head = YUVRGB_TABLE_HEADROOM;
495  __m128i v_yalpha1 = __lsx_vreplgr2vr_w(yalpha1);
496  __m128i v_uvalpha1 = __lsx_vreplgr2vr_w(uvalpha1);
497  __m128i v_yalpha = __lsx_vreplgr2vr_w(yalpha);
498  __m128i v_uvalpha = __lsx_vreplgr2vr_w(uvalpha);
499  __m128i headroom = __lsx_vreplgr2vr_w(head);
500  __m128i zero = __lsx_vldi(0);
501 
502  for (i = 0; i < len; i += 8) {
503  int Y1, Y2, U, V;
504  int i_dex = i << 1;
505  int c_dex = count << 1;
506  __m128i y0_h, y0_l, y0, u0, v0;
507  __m128i y1_h, y1_l, y1, u1, v1;
508  __m128i y_l, y_h, u, v;
509 
510  DUP4_ARG2(__lsx_vldx, buf0, i_dex, ubuf0, c_dex, vbuf0, c_dex,
511  buf1, i_dex, y0, u0, v0, y1);
512  DUP2_ARG2(__lsx_vldx, ubuf1, c_dex, vbuf1, c_dex, u1, v1);
513  DUP2_ARG2(__lsx_vsllwil_w_h, y0, 0, y1, 0, y0_l, y1_l);
514  DUP2_ARG1(__lsx_vexth_w_h, y0, y1, y0_h, y1_h);
515  DUP4_ARG2(__lsx_vilvl_h, zero, u0, zero, u1, zero, v0, zero, v1,
516  u0, u1, v0, v1);
517  y0_l = __lsx_vmul_w(y0_l, v_yalpha1);
518  y0_h = __lsx_vmul_w(y0_h, v_yalpha1);
519  u0 = __lsx_vmul_w(u0, v_uvalpha1);
520  v0 = __lsx_vmul_w(v0, v_uvalpha1);
521  y_l = __lsx_vmadd_w(y0_l, v_yalpha, y1_l);
522  y_h = __lsx_vmadd_w(y0_h, v_yalpha, y1_h);
523  u = __lsx_vmadd_w(u0, v_uvalpha, u1);
524  v = __lsx_vmadd_w(v0, v_uvalpha, v1);
525  y_l = __lsx_vsrai_w(y_l, 19);
526  y_h = __lsx_vsrai_w(y_h, 19);
527  u = __lsx_vsrai_w(u, 19);
528  v = __lsx_vsrai_w(v, 19);
529  u = __lsx_vadd_w(u, headroom);
530  v = __lsx_vadd_w(v, headroom);
531  WRITE_YUV2RGB_LSX(y_l, y_l, u, v, 0, 1, 0, 0);
532  WRITE_YUV2RGB_LSX(y_l, y_l, u, v, 2, 3, 1, 1);
533  WRITE_YUV2RGB_LSX(y_h, y_h, u, v, 0, 1, 2, 2);
534  WRITE_YUV2RGB_LSX(y_h, y_h, u, v, 2, 3, 3, 3);
535  }
536  if (dstW - i >= 4) {
537  int Y1, Y2, U, V;
538  int i_dex = i << 1;
539  __m128i y0_l, y0, u0, v0;
540  __m128i y1_l, y1, u1, v1;
541  __m128i y_l, u, v;
542 
543  y0 = __lsx_vldx(buf0, i_dex);
544  u0 = __lsx_vldrepl_d((ubuf0 + count), 0);
545  v0 = __lsx_vldrepl_d((vbuf0 + count), 0);
546  y1 = __lsx_vldx(buf1, i_dex);
547  u1 = __lsx_vldrepl_d((ubuf1 + count), 0);
548  v1 = __lsx_vldrepl_d((vbuf1 + count), 0);
549  DUP2_ARG2(__lsx_vilvl_h, zero, y0, zero, y1, y0_l, y1_l);
550  DUP4_ARG2(__lsx_vilvl_h, zero, u0, zero, u1, zero, v0, zero, v1,
551  u0, u1, v0, v1);
552  y0_l = __lsx_vmul_w(y0_l, v_yalpha1);
553  u0 = __lsx_vmul_w(u0, v_uvalpha1);
554  v0 = __lsx_vmul_w(v0, v_uvalpha1);
555  y_l = __lsx_vmadd_w(y0_l, v_yalpha, y1_l);
556  u = __lsx_vmadd_w(u0, v_uvalpha, u1);
557  v = __lsx_vmadd_w(v0, v_uvalpha, v1);
558  y_l = __lsx_vsrai_w(y_l, 19);
559  u = __lsx_vsrai_w(u, 19);
560  v = __lsx_vsrai_w(v, 19);
561  u = __lsx_vadd_w(u, headroom);
562  v = __lsx_vadd_w(v, headroom);
563  WRITE_YUV2RGB_LSX(y_l, y_l, u, v, 0, 1, 0, 0);
564  WRITE_YUV2RGB_LSX(y_l, y_l, u, v, 2, 3, 1, 1);
565  i += 4;
566  }
567  for (; count < len_count; count++) {
568  int Y1 = (buf0[count * 2] * yalpha1 +
569  buf1[count * 2] * yalpha) >> 19;
570  int Y2 = (buf0[count * 2 + 1] * yalpha1 +
571  buf1[count * 2 + 1] * yalpha) >> 19;
572  int U = (ubuf0[count] * uvalpha1 + ubuf1[count] * uvalpha) >> 19;
573  int V = (vbuf0[count] * uvalpha1 + vbuf1[count] * uvalpha) >> 19;
574 
575  r = c->table_rV[V + YUVRGB_TABLE_HEADROOM],
576  g = (c->table_gU[U + YUVRGB_TABLE_HEADROOM] +
577  c->table_gV[V + YUVRGB_TABLE_HEADROOM]),
578  b = c->table_bU[U + YUVRGB_TABLE_HEADROOM];
579 
580  yuv2rgb_write(dest, count, Y1, Y2, 0, 0,
581  r, g, b, y, target, 0);
582  }
583 }
584 
585 static void
586 yuv2rgb_1_template_lsx(SwsInternal *c, const int16_t *buf0,
587  const int16_t *ubuf[2], const int16_t *vbuf[2],
588  const int16_t *abuf0, uint8_t *dest, int dstW,
589  int uvalpha, int y, enum AVPixelFormat target,
590  int hasAlpha)
591 {
592  const int16_t *ubuf0 = ubuf[0], *vbuf0 = vbuf[0];
593  int i;
594  int len = (dstW - 7);
595  int len_count = (dstW + 1) >> 1;
596  const void *r, *g, *b;
597 
598  if (uvalpha == 0) {
599  int count = 0;
600  int head = YUVRGB_TABLE_HEADROOM;
601  __m128i headroom = __lsx_vreplgr2vr_h(head);
602 
603  for (i = 0; i < len; i += 8) {
604  int Y1, Y2, U, V;
605  int i_dex = i << 1;
606  int c_dex = count << 1;
607  __m128i src_y, src_u, src_v;
608  __m128i u, v, uv, y_l, y_h;
609 
610  src_y = __lsx_vldx(buf0, i_dex);
611  DUP2_ARG2(__lsx_vldx, ubuf0, c_dex, vbuf0, c_dex, src_u, src_v);
612  src_y = __lsx_vsrari_h(src_y, 7);
613  src_u = __lsx_vsrari_h(src_u, 7);
614  src_v = __lsx_vsrari_h(src_v, 7);
615  y_l = __lsx_vsllwil_w_h(src_y, 0);
616  y_h = __lsx_vexth_w_h(src_y);
617  uv = __lsx_vilvl_h(src_v, src_u);
618  u = __lsx_vaddwev_w_h(uv, headroom);
619  v = __lsx_vaddwod_w_h(uv, headroom);
620  WRITE_YUV2RGB_LSX(y_l, y_l, u, v, 0, 1, 0, 0);
621  WRITE_YUV2RGB_LSX(y_l, y_l, u, v, 2, 3, 1, 1);
622  WRITE_YUV2RGB_LSX(y_h, y_h, u, v, 0, 1, 2, 2);
623  WRITE_YUV2RGB_LSX(y_h, y_h, u, v, 2, 3, 3, 3);
624  }
625  if (dstW - i >= 4){
626  int Y1, Y2, U, V;
627  int i_dex = i << 1;
628  __m128i src_y, src_u, src_v;
629  __m128i y_l, u, v, uv;
630 
631  src_y = __lsx_vldx(buf0, i_dex);
632  src_u = __lsx_vldrepl_d((ubuf0 + count), 0);
633  src_v = __lsx_vldrepl_d((vbuf0 + count), 0);
634  y_l = __lsx_vsrari_h(src_y, 7);
635  y_l = __lsx_vsllwil_w_h(y_l, 0);
636  uv = __lsx_vilvl_h(src_v, src_u);
637  uv = __lsx_vsrari_h(uv, 7);
638  u = __lsx_vaddwev_w_h(uv, headroom);
639  v = __lsx_vaddwod_w_h(uv, headroom);
640  WRITE_YUV2RGB_LSX(y_l, y_l, u, v, 0, 1, 0, 0);
641  WRITE_YUV2RGB_LSX(y_l, y_l, u, v, 2, 3, 1, 1);
642  i += 4;
643  }
644  for (; count < len_count; count++) {
645  int Y1 = (buf0[count * 2 ] + 64) >> 7;
646  int Y2 = (buf0[count * 2 + 1] + 64) >> 7;
647  int U = (ubuf0[count] + 64) >> 7;
648  int V = (vbuf0[count] + 64) >> 7;
649 
650  r = c->table_rV[V + YUVRGB_TABLE_HEADROOM],
651  g = (c->table_gU[U + YUVRGB_TABLE_HEADROOM] +
652  c->table_gV[V + YUVRGB_TABLE_HEADROOM]),
653  b = c->table_bU[U + YUVRGB_TABLE_HEADROOM];
654 
655  yuv2rgb_write(dest, count, Y1, Y2, 0, 0,
656  r, g, b, y, target, 0);
657  }
658  } else {
659  const int16_t *ubuf1 = ubuf[1], *vbuf1 = vbuf[1];
660  int count = 0;
661  int HEADROOM = YUVRGB_TABLE_HEADROOM;
662  int uvalpha1 = 4096 - uvalpha;
663  __m128i headroom = __lsx_vreplgr2vr_w(HEADROOM);
664  __m128i uvalpha_tmp1 = __lsx_vreplgr2vr_h(uvalpha1);
665  __m128i uvalpha_tmp = __lsx_vreplgr2vr_h(uvalpha);
666 
667  for (i = 0; i < len; i += 8) {
668  int Y1, Y2, U, V;
669  int i_dex = i << 1;
670  int c_dex = count << 1;
671  __m128i src_y, src_u0, src_v0, src_u1, src_v1;
672  __m128i y_l, y_h, u1, u2, v1, v2, u_ev, v_od;
673 
674  DUP4_ARG2(__lsx_vldx, buf0, i_dex, ubuf0, c_dex, vbuf0, c_dex,
675  ubuf1, c_dex, src_y, src_u0, src_v0, src_u1);
676  src_v1 = __lsx_vldx(vbuf1, c_dex);
677  src_y = __lsx_vsrari_h(src_y, 7);
678 
679  u_ev = __lsx_vmulwev_w_h(src_u0, uvalpha_tmp1);
680  v_od = __lsx_vmulwod_w_h(src_u0, uvalpha_tmp1);
681  u1 = __lsx_vmaddwev_w_h(u_ev, src_u1, uvalpha_tmp);
682  v1 = __lsx_vmaddwod_w_h(v_od, src_u1, uvalpha_tmp);
683  u_ev = __lsx_vmulwev_w_h(src_v0, uvalpha_tmp1);
684  v_od = __lsx_vmulwod_w_h(src_v0, uvalpha_tmp1);
685  u2 = __lsx_vmaddwev_w_h(u_ev, src_v1, uvalpha_tmp);
686  v2 = __lsx_vmaddwod_w_h(v_od, src_v1, uvalpha_tmp);
687 
688  y_l = __lsx_vsllwil_w_h(src_y, 0);
689  y_h = __lsx_vexth_w_h(src_y);
690  u1 = __lsx_vsrari_w(u1, 19);
691  v1 = __lsx_vsrari_w(v1, 19);
692  u2 = __lsx_vsrari_w(u2, 19);
693  v2 = __lsx_vsrari_w(v2, 19);
694  u1 = __lsx_vadd_w(u1, headroom);
695  v1 = __lsx_vadd_w(v1, headroom);
696  u2 = __lsx_vadd_w(u2, headroom);
697  v2 = __lsx_vadd_w(v2, headroom);
698  WRITE_YUV2RGB_LSX(y_l, y_l, u1, u2, 0, 1, 0, 0);
699  WRITE_YUV2RGB_LSX(y_l, y_l, v1, v2, 2, 3, 0, 0);
700  WRITE_YUV2RGB_LSX(y_h, y_h, u1, u2, 0, 1, 1, 1);
701  WRITE_YUV2RGB_LSX(y_h, y_h, v1, v2, 2, 3, 1, 1);
702  }
703  for (; count < len_count; count++) {
704  int Y1 = (buf0[count * 2 ] + 64) >> 7;
705  int Y2 = (buf0[count * 2 + 1] + 64) >> 7;
706  int U = (ubuf0[count] + ubuf1[count] + 128) >> 8;
707  int V = (vbuf0[count] + vbuf1[count] + 128) >> 8;
708 
709  r = c->table_rV[V + YUVRGB_TABLE_HEADROOM],
710  g = (c->table_gU[U + YUVRGB_TABLE_HEADROOM] +
711  c->table_gV[V + YUVRGB_TABLE_HEADROOM]),
712  b = c->table_bU[U + YUVRGB_TABLE_HEADROOM];
713 
714  yuv2rgb_write(dest, count, Y1, Y2, 0, 0,
715  r, g, b, y, target, 0);
716  }
717  }
718 }
719 
720 #define YUV2RGBWRAPPERX(name, base, ext, fmt, hasAlpha) \
721 static void name ## ext ## _X_lsx(SwsInternal *c, const int16_t *lumFilter, \
722  const int16_t **lumSrc, int lumFilterSize, \
723  const int16_t *chrFilter, const int16_t **chrUSrc, \
724  const int16_t **chrVSrc, int chrFilterSize, \
725  const int16_t **alpSrc, uint8_t *dest, int dstW, \
726  int y) \
727 { \
728  name ## base ## _X_template_lsx(c, lumFilter, lumSrc, lumFilterSize, \
729  chrFilter, chrUSrc, chrVSrc, chrFilterSize, \
730  alpSrc, dest, dstW, y, fmt, hasAlpha); \
731 }
732 
733 #define YUV2RGBWRAPPERX2(name, base, ext, fmt, hasAlpha) \
734 YUV2RGBWRAPPERX(name, base, ext, fmt, hasAlpha) \
735 static void name ## ext ## _2_lsx(SwsInternal *c, const int16_t *buf[2], \
736  const int16_t *ubuf[2], const int16_t *vbuf[2], \
737  const int16_t *abuf[2], uint8_t *dest, int dstW, \
738  int yalpha, int uvalpha, int y) \
739 { \
740  name ## base ## _2_template_lsx(c, buf, ubuf, vbuf, abuf, dest, \
741  dstW, yalpha, uvalpha, y, fmt, hasAlpha); \
742 }
743 
744 #define YUV2RGBWRAPPER(name, base, ext, fmt, hasAlpha) \
745 YUV2RGBWRAPPERX2(name, base, ext, fmt, hasAlpha) \
746 static void name ## ext ## _1_lsx(SwsInternal *c, const int16_t *buf0, \
747  const int16_t *ubuf[2], const int16_t *vbuf[2], \
748  const int16_t *abuf0, uint8_t *dest, int dstW, \
749  int uvalpha, int y) \
750 { \
751  name ## base ## _1_template_lsx(c, buf0, ubuf, vbuf, abuf0, dest, \
752  dstW, uvalpha, y, fmt, hasAlpha); \
753 }
754 
755 #if CONFIG_SMALL
756 #else
757 #if CONFIG_SWSCALE_ALPHA
758 #endif
761 #endif
762 YUV2RGBWRAPPER(yuv2, rgb, rgb24, AV_PIX_FMT_RGB24, 0)
763 YUV2RGBWRAPPER(yuv2, rgb, bgr24, AV_PIX_FMT_BGR24, 0)
770 
771 // This function is copied from libswscale/output.c
773  uint8_t *dest, int i, int R, int A, int G, int B,
774  int y, enum AVPixelFormat target, int hasAlpha, int err[4])
775 {
776  int isrgb8 = target == AV_PIX_FMT_BGR8 || target == AV_PIX_FMT_RGB8;
777 
778  if ((R | G | B) & 0xC0000000) {
779  R = av_clip_uintp2(R, 30);
780  G = av_clip_uintp2(G, 30);
781  B = av_clip_uintp2(B, 30);
782  }
783 
784  switch(target) {
785  case AV_PIX_FMT_ARGB:
786  dest[0] = hasAlpha ? A : 255;
787  dest[1] = R >> 22;
788  dest[2] = G >> 22;
789  dest[3] = B >> 22;
790  break;
791  case AV_PIX_FMT_RGB24:
792  dest[0] = R >> 22;
793  dest[1] = G >> 22;
794  dest[2] = B >> 22;
795  break;
796  case AV_PIX_FMT_RGBA:
797  dest[0] = R >> 22;
798  dest[1] = G >> 22;
799  dest[2] = B >> 22;
800  dest[3] = hasAlpha ? A : 255;
801  break;
802  case AV_PIX_FMT_ABGR:
803  dest[0] = hasAlpha ? A : 255;
804  dest[1] = B >> 22;
805  dest[2] = G >> 22;
806  dest[3] = R >> 22;
807  break;
808  case AV_PIX_FMT_BGR24:
809  dest[0] = B >> 22;
810  dest[1] = G >> 22;
811  dest[2] = R >> 22;
812  break;
813  case AV_PIX_FMT_BGRA:
814  dest[0] = B >> 22;
815  dest[1] = G >> 22;
816  dest[2] = R >> 22;
817  dest[3] = hasAlpha ? A : 255;
818  break;
821  case AV_PIX_FMT_BGR8:
822  case AV_PIX_FMT_RGB8:
823  {
824  int r,g,b;
825 
826  switch (c->opts.dither) {
827  default:
828  case SWS_DITHER_AUTO:
829  case SWS_DITHER_ED:
830  R >>= 22;
831  G >>= 22;
832  B >>= 22;
833  R += (7*err[0] + 1*c->dither_error[0][i] + 5*c->dither_error[0][i+1] + 3*c->dither_error[0][i+2])>>4;
834  G += (7*err[1] + 1*c->dither_error[1][i] + 5*c->dither_error[1][i+1] + 3*c->dither_error[1][i+2])>>4;
835  B += (7*err[2] + 1*c->dither_error[2][i] + 5*c->dither_error[2][i+1] + 3*c->dither_error[2][i+2])>>4;
836  c->dither_error[0][i] = err[0];
837  c->dither_error[1][i] = err[1];
838  c->dither_error[2][i] = err[2];
839  r = R >> (isrgb8 ? 5 : 7);
840  g = G >> (isrgb8 ? 5 : 6);
841  b = B >> (isrgb8 ? 6 : 7);
842  r = av_clip(r, 0, isrgb8 ? 7 : 1);
843  g = av_clip(g, 0, isrgb8 ? 7 : 3);
844  b = av_clip(b, 0, isrgb8 ? 3 : 1);
845  err[0] = R - r*(isrgb8 ? 36 : 255);
846  err[1] = G - g*(isrgb8 ? 36 : 85);
847  err[2] = B - b*(isrgb8 ? 85 : 255);
848  break;
849  case SWS_DITHER_A_DITHER:
850  if (isrgb8) {
851  /* see http://pippin.gimp.org/a_dither/ for details/origin */
852 #define A_DITHER(u,v) (((((u)+((v)*236))*119)&0xff))
853  r = (((R >> 19) + A_DITHER(i,y) -96)>>8);
854  g = (((G >> 19) + A_DITHER(i + 17,y) - 96)>>8);
855  b = (((B >> 20) + A_DITHER(i + 17*2,y) -96)>>8);
856  r = av_clip_uintp2(r, 3);
857  g = av_clip_uintp2(g, 3);
858  b = av_clip_uintp2(b, 2);
859  } else {
860  r = (((R >> 21) + A_DITHER(i,y)-256)>>8);
861  g = (((G >> 19) + A_DITHER(i + 17,y)-256)>>8);
862  b = (((B >> 21) + A_DITHER(i + 17*2,y)-256)>>8);
863  r = av_clip_uintp2(r, 1);
864  g = av_clip_uintp2(g, 2);
865  b = av_clip_uintp2(b, 1);
866  }
867  break;
868  case SWS_DITHER_X_DITHER:
869  if (isrgb8) {
870  /* see http://pippin.gimp.org/a_dither/ for details/origin */
871 #define X_DITHER(u,v) (((((u)^((v)*237))*181)&0x1ff)/2)
872  r = (((R >> 19) + X_DITHER(i,y) - 96)>>8);
873  g = (((G >> 19) + X_DITHER(i + 17,y) - 96)>>8);
874  b = (((B >> 20) + X_DITHER(i + 17*2,y) - 96)>>8);
875  r = av_clip_uintp2(r, 3);
876  g = av_clip_uintp2(g, 3);
877  b = av_clip_uintp2(b, 2);
878  } else {
879  r = (((R >> 21) + X_DITHER(i,y)-256)>>8);
880  g = (((G >> 19) + X_DITHER(i + 17,y)-256)>>8);
881  b = (((B >> 21) + X_DITHER(i + 17*2,y)-256)>>8);
882  r = av_clip_uintp2(r, 1);
883  g = av_clip_uintp2(g, 2);
884  b = av_clip_uintp2(b, 1);
885  }
886 
887  break;
888  }
889 
890  if(target == AV_PIX_FMT_BGR4_BYTE) {
891  dest[0] = r + 2*g + 8*b;
892  } else if(target == AV_PIX_FMT_RGB4_BYTE) {
893  dest[0] = b + 2*g + 8*r;
894  } else if(target == AV_PIX_FMT_BGR8) {
895  dest[0] = r + 8*g + 64*b;
896  } else if(target == AV_PIX_FMT_RGB8) {
897  dest[0] = b + 4*g + 32*r;
898  } else
899  av_assert2(0);
900  break; }
901  }
902 }
903 
904 #define YUVTORGB_SETUP_LSX \
905  int y_offset = c->yuv2rgb_y_offset; \
906  int y_coeff = c->yuv2rgb_y_coeff; \
907  int v2r_coe = c->yuv2rgb_v2r_coeff; \
908  int v2g_coe = c->yuv2rgb_v2g_coeff; \
909  int u2g_coe = c->yuv2rgb_u2g_coeff; \
910  int u2b_coe = c->yuv2rgb_u2b_coeff; \
911  __m128i offset = __lsx_vreplgr2vr_w(y_offset); \
912  __m128i coeff = __lsx_vreplgr2vr_w(y_coeff); \
913  __m128i v2r = __lsx_vreplgr2vr_w(v2r_coe); \
914  __m128i v2g = __lsx_vreplgr2vr_w(v2g_coe); \
915  __m128i u2g = __lsx_vreplgr2vr_w(u2g_coe); \
916  __m128i u2b = __lsx_vreplgr2vr_w(u2b_coe); \
917 
918 #define YUVTORGB_LSX(y, u, v, R, G, B, offset, coeff, \
919  y_temp, v2r, v2g, u2g, u2b) \
920 { \
921  y = __lsx_vsub_w(y, offset); \
922  y = __lsx_vmul_w(y, coeff); \
923  y = __lsx_vadd_w(y, y_temp); \
924  R = __lsx_vmadd_w(y, v, v2r); \
925  v = __lsx_vmadd_w(y, v, v2g); \
926  G = __lsx_vmadd_w(v, u, u2g); \
927  B = __lsx_vmadd_w(y, u, u2b); \
928 }
929 
930 #define WRITE_FULL_A_LSX(r, g, b, a, t1, s) \
931 { \
932  R = __lsx_vpickve2gr_w(r, t1); \
933  G = __lsx_vpickve2gr_w(g, t1); \
934  B = __lsx_vpickve2gr_w(b, t1); \
935  A = __lsx_vpickve2gr_w(a, t1); \
936  if (A & 0x100) \
937  A = av_clip_uint8(A); \
938  yuv2rgb_write_full(c, dest, i + s, R, A, G, B, y, target, hasAlpha, err);\
939  dest += step; \
940 }
941 
942 #define WRITE_FULL_LSX(r, g, b, t1, s) \
943 { \
944  R = __lsx_vpickve2gr_w(r, t1); \
945  G = __lsx_vpickve2gr_w(g, t1); \
946  B = __lsx_vpickve2gr_w(b, t1); \
947  yuv2rgb_write_full(c, dest, i + s, R, 0, G, B, y, target, hasAlpha, err); \
948  dest += step; \
949 }
950 
951 static void
952 yuv2rgb_full_X_template_lsx(SwsInternal *c, const int16_t *lumFilter,
953  const int16_t **lumSrc, int lumFilterSize,
954  const int16_t *chrFilter, const int16_t **chrUSrc,
955  const int16_t **chrVSrc, int chrFilterSize,
956  const int16_t **alpSrc, uint8_t *dest,
957  int dstW, int y, enum AVPixelFormat target,
958  int hasAlpha)
959 {
960  int i, j, B, G, R, A;
961  int step = (target == AV_PIX_FMT_RGB24 ||
962  target == AV_PIX_FMT_BGR24) ? 3 : 4;
963  int err[4] = {0};
964  int a_temp = 1 << 18;
965  int templ = 1 << 9;
966  int tempc = templ - (128 << 19);
967  int ytemp = 1 << 21;
968  int len = dstW - 7;
969  __m128i y_temp = __lsx_vreplgr2vr_w(ytemp);
971 
972  if( target == AV_PIX_FMT_BGR4_BYTE || target == AV_PIX_FMT_RGB4_BYTE
973  || target == AV_PIX_FMT_BGR8 || target == AV_PIX_FMT_RGB8)
974  step = 1;
975 
976  for (i = 0; i < len; i += 8) {
977  __m128i l_src, u_src, v_src;
978  __m128i y_ev, y_od, u_ev, u_od, v_ev, v_od, temp;
979  __m128i R_ev, R_od, G_ev, G_od, B_ev, B_od;
980  int n = i << 1;
981 
982  y_ev = y_od = __lsx_vreplgr2vr_w(templ);
983  u_ev = u_od = v_ev = v_od = __lsx_vreplgr2vr_w(tempc);
984  for (j = 0; j < lumFilterSize; j++) {
985  temp = __lsx_vldrepl_h((lumFilter + j), 0);
986  l_src = __lsx_vldx(lumSrc[j], n);
987  y_ev = __lsx_vmaddwev_w_h(y_ev, l_src, temp);
988  y_od = __lsx_vmaddwod_w_h(y_od, l_src, temp);
989  }
990  for (j = 0; j < chrFilterSize; j++) {
991  temp = __lsx_vldrepl_h((chrFilter + j), 0);
992  DUP2_ARG2(__lsx_vldx, chrUSrc[j], n, chrVSrc[j], n,
993  u_src, v_src);
994  DUP2_ARG3(__lsx_vmaddwev_w_h, u_ev, u_src, temp, v_ev,
995  v_src, temp, u_ev, v_ev);
996  DUP2_ARG3(__lsx_vmaddwod_w_h, u_od, u_src, temp, v_od,
997  v_src, temp, u_od, v_od);
998  }
999  y_ev = __lsx_vsrai_w(y_ev, 10);
1000  y_od = __lsx_vsrai_w(y_od, 10);
1001  u_ev = __lsx_vsrai_w(u_ev, 10);
1002  u_od = __lsx_vsrai_w(u_od, 10);
1003  v_ev = __lsx_vsrai_w(v_ev, 10);
1004  v_od = __lsx_vsrai_w(v_od, 10);
1005  YUVTORGB_LSX(y_ev, u_ev, v_ev, R_ev, G_ev, B_ev, offset, coeff,
1006  y_temp, v2r, v2g, u2g, u2b);
1007  YUVTORGB_LSX(y_od, u_od, v_od, R_od, G_od, B_od, offset, coeff,
1008  y_temp, v2r, v2g, u2g, u2b);
1009 
1010  if (hasAlpha) {
1011  __m128i a_src, a_ev, a_od;
1012 
1013  a_ev = a_od = __lsx_vreplgr2vr_w(a_temp);
1014  for (j = 0; j < lumFilterSize; j++) {
1015  temp = __lsx_vldrepl_h(lumFilter + j, 0);
1016  a_src = __lsx_vldx(alpSrc[j], n);
1017  a_ev = __lsx_vmaddwev_w_h(a_ev, a_src, temp);
1018  a_od = __lsx_vmaddwod_w_h(a_od, a_src, temp);
1019  }
1020  a_ev = __lsx_vsrai_w(a_ev, 19);
1021  a_od = __lsx_vsrai_w(a_od, 19);
1022  WRITE_FULL_A_LSX(R_ev, G_ev, B_ev, a_ev, 0, 0);
1023  WRITE_FULL_A_LSX(R_od, G_od, B_od, a_od, 0, 1);
1024  WRITE_FULL_A_LSX(R_ev, G_ev, B_ev, a_ev, 1, 2);
1025  WRITE_FULL_A_LSX(R_od, G_od, B_od, a_od, 1, 3);
1026  WRITE_FULL_A_LSX(R_ev, G_ev, B_ev, a_ev, 2, 4);
1027  WRITE_FULL_A_LSX(R_od, G_od, B_od, a_od, 2, 5);
1028  WRITE_FULL_A_LSX(R_ev, G_ev, B_ev, a_ev, 3, 6);
1029  WRITE_FULL_A_LSX(R_od, G_od, B_od, a_od, 3, 7);
1030  } else {
1031  WRITE_FULL_LSX(R_ev, G_ev, B_ev, 0, 0);
1032  WRITE_FULL_LSX(R_od, G_od, B_od, 0, 1);
1033  WRITE_FULL_LSX(R_ev, G_ev, B_ev, 1, 2);
1034  WRITE_FULL_LSX(R_od, G_od, B_od, 1, 3);
1035  WRITE_FULL_LSX(R_ev, G_ev, B_ev, 2, 4);
1036  WRITE_FULL_LSX(R_od, G_od, B_od, 2, 5);
1037  WRITE_FULL_LSX(R_ev, G_ev, B_ev, 3, 6);
1038  WRITE_FULL_LSX(R_od, G_od, B_od, 3, 7);
1039  }
1040  }
1041  if (dstW - i >= 4) {
1042  __m128i l_src, u_src, v_src;
1043  __m128i y_ev, u_ev, v_ev, uv, temp;
1044  __m128i R_ev, G_ev, B_ev;
1045  int n = i << 1;
1046 
1047  y_ev = __lsx_vreplgr2vr_w(templ);
1048  u_ev = v_ev = __lsx_vreplgr2vr_w(tempc);
1049  for (j = 0; j < lumFilterSize; j++) {
1050  temp = __lsx_vldrepl_h((lumFilter + j), 0);
1051  l_src = __lsx_vldx(lumSrc[j], n);
1052  l_src = __lsx_vilvl_h(l_src, l_src);
1053  y_ev = __lsx_vmaddwev_w_h(y_ev, l_src, temp);
1054  }
1055  for (j = 0; j < chrFilterSize; j++) {
1056  temp = __lsx_vldrepl_h((chrFilter + j), 0);
1057  DUP2_ARG2(__lsx_vldx, chrUSrc[j], n, chrVSrc[j], n, u_src, v_src);
1058  uv = __lsx_vilvl_h(v_src, u_src);
1059  u_ev = __lsx_vmaddwev_w_h(u_ev, uv, temp);
1060  v_ev = __lsx_vmaddwod_w_h(v_ev, uv, temp);
1061  }
1062  y_ev = __lsx_vsrai_w(y_ev, 10);
1063  u_ev = __lsx_vsrai_w(u_ev, 10);
1064  v_ev = __lsx_vsrai_w(v_ev, 10);
1065  YUVTORGB_LSX(y_ev, u_ev, v_ev, R_ev, G_ev, B_ev, offset, coeff,
1066  y_temp, v2r, v2g, u2g, u2b);
1067 
1068  if (hasAlpha) {
1069  __m128i a_src, a_ev;
1070 
1071  a_ev = __lsx_vreplgr2vr_w(a_temp);
1072  for (j = 0; j < lumFilterSize; j++) {
1073  temp = __lsx_vldrepl_h(lumFilter + j, 0);
1074  a_src = __lsx_vldx(alpSrc[j], n);
1075  a_src = __lsx_vilvl_h(a_src, a_src);
1076  a_ev = __lsx_vmaddwev_w_h(a_ev, a_src, temp);
1077  }
1078  a_ev = __lsx_vsrai_w(a_ev, 19);
1079  WRITE_FULL_A_LSX(R_ev, G_ev, B_ev, a_ev, 0, 0);
1080  WRITE_FULL_A_LSX(R_ev, G_ev, B_ev, a_ev, 1, 1);
1081  WRITE_FULL_A_LSX(R_ev, G_ev, B_ev, a_ev, 2, 2);
1082  WRITE_FULL_A_LSX(R_ev, G_ev, B_ev, a_ev, 3, 3);
1083  } else {
1084  WRITE_FULL_LSX(R_ev, G_ev, B_ev, 0, 0);
1085  WRITE_FULL_LSX(R_ev, G_ev, B_ev, 1, 1);
1086  WRITE_FULL_LSX(R_ev, G_ev, B_ev, 2, 2);
1087  WRITE_FULL_LSX(R_ev, G_ev, B_ev, 3, 3);
1088  }
1089  i += 4;
1090  }
1091  for (; i < dstW; i++) {
1092  int Y = templ;
1093  int V, U = V = tempc;
1094 
1095  A = 0;
1096  for (j = 0; j < lumFilterSize; j++) {
1097  Y += lumSrc[j][i] * lumFilter[j];
1098  }
1099  for (j = 0; j < chrFilterSize; j++) {
1100  U += chrUSrc[j][i] * chrFilter[j];
1101  V += chrVSrc[j][i] * chrFilter[j];
1102 
1103  }
1104  Y >>= 10;
1105  U >>= 10;
1106  V >>= 10;
1107  if (hasAlpha) {
1108  A = 1 << 18;
1109  for (j = 0; j < lumFilterSize; j++) {
1110  A += alpSrc[j][i] * lumFilter[j];
1111  }
1112  A >>= 19;
1113  if (A & 0x100)
1114  A = av_clip_uint8(A);
1115  }
1116  Y -= y_offset;
1117  Y *= y_coeff;
1118  Y += ytemp;
1119  R = (unsigned)Y + V * v2r_coe;
1120  G = (unsigned)Y + V * v2g_coe + U * u2g_coe;
1121  B = (unsigned)Y + U * u2b_coe;
1122  yuv2rgb_write_full(c, dest, i, R, A, G, B, y, target, hasAlpha, err);
1123  dest += step;
1124  }
1125  c->dither_error[0][i] = err[0];
1126  c->dither_error[1][i] = err[1];
1127  c->dither_error[2][i] = err[2];
1128 }
1129 
1130 static void
1132  const int16_t *ubuf[2], const int16_t *vbuf[2],
1133  const int16_t *abuf[2], uint8_t *dest, int dstW,
1134  int yalpha, int uvalpha, int y,
1135  enum AVPixelFormat target, int hasAlpha)
1136 {
1137  const int16_t *buf0 = buf[0], *buf1 = buf[1],
1138  *ubuf0 = ubuf[0], *ubuf1 = ubuf[1],
1139  *vbuf0 = vbuf[0], *vbuf1 = vbuf[1],
1140  *abuf0 = hasAlpha ? abuf[0] : NULL,
1141  *abuf1 = hasAlpha ? abuf[1] : NULL;
1142  int yalpha1 = 4096 - yalpha;
1143  int uvalpha1 = 4096 - uvalpha;
1144  int uvtemp = 128 << 19;
1145  int atemp = 1 << 18;
1146  int err[4] = {0};
1147  int ytemp = 1 << 21;
1148  int len = dstW - 7;
1149  int i, R, G, B, A;
1150  int step = (target == AV_PIX_FMT_RGB24 ||
1151  target == AV_PIX_FMT_BGR24) ? 3 : 4;
1152  __m128i v_uvalpha1 = __lsx_vreplgr2vr_w(uvalpha1);
1153  __m128i v_yalpha1 = __lsx_vreplgr2vr_w(yalpha1);
1154  __m128i v_uvalpha = __lsx_vreplgr2vr_w(uvalpha);
1155  __m128i v_yalpha = __lsx_vreplgr2vr_w(yalpha);
1156  __m128i uv = __lsx_vreplgr2vr_w(uvtemp);
1157  __m128i a_bias = __lsx_vreplgr2vr_w(atemp);
1158  __m128i y_temp = __lsx_vreplgr2vr_w(ytemp);
1160 
1161  av_assert2(yalpha <= 4096U);
1162  av_assert2(uvalpha <= 4096U);
1163 
1164  if( target == AV_PIX_FMT_BGR4_BYTE || target == AV_PIX_FMT_RGB4_BYTE
1165  || target == AV_PIX_FMT_BGR8 || target == AV_PIX_FMT_RGB8)
1166  step = 1;
1167 
1168  for (i = 0; i < len; i += 8) {
1169  __m128i b0, b1, ub0, ub1, vb0, vb1;
1170  __m128i y0_l, y0_h, y1_l, y1_h, u0_l, u0_h;
1171  __m128i v0_l, v0_h, u1_l, u1_h, v1_l, v1_h;
1172  __m128i y_l, y_h, v_l, v_h, u_l, u_h;
1173  __m128i R_l, R_h, G_l, G_h, B_l, B_h;
1174  int n = i << 1;
1175 
1176  DUP4_ARG2(__lsx_vldx, buf0, n, buf1, n, ubuf0,
1177  n, ubuf1, n, b0, b1, ub0, ub1);
1178  DUP2_ARG2(__lsx_vldx, vbuf0, n, vbuf1, n, vb0 , vb1);
1179  DUP2_ARG2(__lsx_vsllwil_w_h, b0, 0, b1, 0, y0_l, y1_l);
1180  DUP4_ARG2(__lsx_vsllwil_w_h, ub0, 0, ub1, 0, vb0, 0, vb1, 0,
1181  u0_l, u1_l, v0_l, v1_l);
1182  DUP2_ARG1(__lsx_vexth_w_h, b0, b1, y0_h, y1_h);
1183  DUP4_ARG1(__lsx_vexth_w_h, ub0, ub1, vb0, vb1,
1184  u0_h, u1_h, v0_h, v1_h);
1185  y0_l = __lsx_vmul_w(y0_l, v_yalpha1);
1186  y0_h = __lsx_vmul_w(y0_h, v_yalpha1);
1187  u0_l = __lsx_vmul_w(u0_l, v_uvalpha1);
1188  u0_h = __lsx_vmul_w(u0_h, v_uvalpha1);
1189  v0_l = __lsx_vmul_w(v0_l, v_uvalpha1);
1190  v0_h = __lsx_vmul_w(v0_h, v_uvalpha1);
1191  y_l = __lsx_vmadd_w(y0_l, v_yalpha, y1_l);
1192  y_h = __lsx_vmadd_w(y0_h, v_yalpha, y1_h);
1193  u_l = __lsx_vmadd_w(u0_l, v_uvalpha, u1_l);
1194  u_h = __lsx_vmadd_w(u0_h, v_uvalpha, u1_h);
1195  v_l = __lsx_vmadd_w(v0_l, v_uvalpha, v1_l);
1196  v_h = __lsx_vmadd_w(v0_h, v_uvalpha, v1_h);
1197  u_l = __lsx_vsub_w(u_l, uv);
1198  u_h = __lsx_vsub_w(u_h, uv);
1199  v_l = __lsx_vsub_w(v_l, uv);
1200  v_h = __lsx_vsub_w(v_h, uv);
1201  y_l = __lsx_vsrai_w(y_l, 10);
1202  y_h = __lsx_vsrai_w(y_h, 10);
1203  u_l = __lsx_vsrai_w(u_l, 10);
1204  u_h = __lsx_vsrai_w(u_h, 10);
1205  v_l = __lsx_vsrai_w(v_l, 10);
1206  v_h = __lsx_vsrai_w(v_h, 10);
1207  YUVTORGB_LSX(y_l, u_l, v_l, R_l, G_l, B_l, offset, coeff,
1208  y_temp, v2r, v2g, u2g, u2b);
1209  YUVTORGB_LSX(y_h, u_h, v_h, R_h, G_h, B_h, offset, coeff,
1210  y_temp, v2r, v2g, u2g, u2b);
1211 
1212  if (hasAlpha) {
1213  __m128i a0, a1, a0_l, a0_h;
1214  __m128i a_l, a_h, a1_l, a1_h;
1215 
1216  DUP2_ARG2(__lsx_vldx, abuf0, n, abuf1, n, a0, a1);
1217  DUP2_ARG2(__lsx_vsllwil_w_h, a0, 0, a1, 0, a0_l, a1_l);
1218  DUP2_ARG1(__lsx_vexth_w_h, a0, a1, a0_h, a1_h);
1219  a_l = __lsx_vmadd_w(a_bias, a0_l, v_yalpha1);
1220  a_h = __lsx_vmadd_w(a_bias, a0_h, v_yalpha1);
1221  a_l = __lsx_vmadd_w(a_l, v_yalpha, a1_l);
1222  a_h = __lsx_vmadd_w(a_h, v_yalpha, a1_h);
1223  a_l = __lsx_vsrai_w(a_l, 19);
1224  a_h = __lsx_vsrai_w(a_h, 19);
1225  WRITE_FULL_A_LSX(R_l, G_l, B_l, a_l, 0, 0);
1226  WRITE_FULL_A_LSX(R_l, G_l, B_l, a_l, 1, 1);
1227  WRITE_FULL_A_LSX(R_l, G_l, B_l, a_l, 2, 2);
1228  WRITE_FULL_A_LSX(R_l, G_l, B_l, a_l, 3, 3);
1229  WRITE_FULL_A_LSX(R_h, G_h, B_h, a_h, 0, 4);
1230  WRITE_FULL_A_LSX(R_h, G_h, B_h, a_h, 1, 5);
1231  WRITE_FULL_A_LSX(R_h, G_h, B_h, a_h, 2, 6);
1232  WRITE_FULL_A_LSX(R_h, G_h, B_h, a_h, 3, 7);
1233  } else {
1234  WRITE_FULL_LSX(R_l, G_l, B_l, 0, 0);
1235  WRITE_FULL_LSX(R_l, G_l, B_l, 1, 1);
1236  WRITE_FULL_LSX(R_l, G_l, B_l, 2, 2);
1237  WRITE_FULL_LSX(R_l, G_l, B_l, 3, 3);
1238  WRITE_FULL_LSX(R_h, G_h, B_h, 0, 4);
1239  WRITE_FULL_LSX(R_h, G_h, B_h, 1, 5);
1240  WRITE_FULL_LSX(R_h, G_h, B_h, 2, 6);
1241  WRITE_FULL_LSX(R_h, G_h, B_h, 3, 7);
1242  }
1243  }
1244  if (dstW - i >= 4) {
1245  __m128i b0, b1, ub0, ub1, vb0, vb1;
1246  __m128i y0_l, y1_l, u0_l;
1247  __m128i v0_l, u1_l, v1_l;
1248  __m128i y_l, u_l, v_l;
1249  __m128i R_l, G_l, B_l;
1250  int n = i << 1;
1251 
1252  DUP4_ARG2(__lsx_vldx, buf0, n, buf1, n, ubuf0, n,
1253  ubuf1, n, b0, b1, ub0, ub1);
1254  DUP2_ARG2(__lsx_vldx, vbuf0, n, vbuf1, n, vb0, vb1);
1255  DUP2_ARG2(__lsx_vsllwil_w_h, b0, 0, b1, 0, y0_l, y1_l);
1256  DUP4_ARG2(__lsx_vsllwil_w_h, ub0, 0, ub1, 0, vb0, 0, vb1, 0,
1257  u0_l, u1_l, v0_l, v1_l);
1258  y0_l = __lsx_vmul_w(y0_l, v_yalpha1);
1259  u0_l = __lsx_vmul_w(u0_l, v_uvalpha1);
1260  v0_l = __lsx_vmul_w(v0_l, v_uvalpha1);
1261  y_l = __lsx_vmadd_w(y0_l, v_yalpha, y1_l);
1262  u_l = __lsx_vmadd_w(u0_l, v_uvalpha, u1_l);
1263  v_l = __lsx_vmadd_w(v0_l, v_uvalpha, v1_l);
1264  u_l = __lsx_vsub_w(u_l, uv);
1265  v_l = __lsx_vsub_w(v_l, uv);
1266  y_l = __lsx_vsrai_w(y_l, 10);
1267  u_l = __lsx_vsrai_w(u_l, 10);
1268  v_l = __lsx_vsrai_w(v_l, 10);
1269  YUVTORGB_LSX(y_l, u_l, v_l, R_l, G_l, B_l, offset, coeff,
1270  y_temp, v2r, v2g, u2g, u2b);
1271 
1272  if (hasAlpha) {
1273  __m128i a0, a1, a0_l;
1274  __m128i a_l, a1_l;
1275 
1276  DUP2_ARG2(__lsx_vldx, abuf0, n, abuf1, n, a0, a1);
1277  DUP2_ARG2(__lsx_vsllwil_w_h, a0, 0, a1, 0, a0_l, a1_l);
1278  a_l = __lsx_vmadd_w(a_bias, a0_l, v_yalpha1);
1279  a_l = __lsx_vmadd_w(a_l, v_yalpha, a1_l);
1280  a_l = __lsx_vsrai_w(a_l, 19);
1281  WRITE_FULL_A_LSX(R_l, G_l, B_l, a_l, 0, 0);
1282  WRITE_FULL_A_LSX(R_l, G_l, B_l, a_l, 1, 1);
1283  WRITE_FULL_A_LSX(R_l, G_l, B_l, a_l, 2, 2);
1284  WRITE_FULL_A_LSX(R_l, G_l, B_l, a_l, 3, 3);
1285  } else {
1286  WRITE_FULL_LSX(R_l, G_l, B_l, 0, 0);
1287  WRITE_FULL_LSX(R_l, G_l, B_l, 1, 1);
1288  WRITE_FULL_LSX(R_l, G_l, B_l, 2, 2);
1289  WRITE_FULL_LSX(R_l, G_l, B_l, 3, 3);
1290  }
1291  i += 4;
1292  }
1293  for (; i < dstW; i++){
1294  int Y = ( buf0[i] * yalpha1 + buf1[i] * yalpha ) >> 10;
1295  int U = (ubuf0[i] * uvalpha1 + ubuf1[i] * uvalpha- uvtemp) >> 10;
1296  int V = (vbuf0[i] * uvalpha1 + vbuf1[i] * uvalpha- uvtemp) >> 10;
1297 
1298  A = 0;
1299  if (hasAlpha){
1300  A = (abuf0[i] * yalpha1 + abuf1[i] * yalpha + atemp) >> 19;
1301  if (A & 0x100)
1302  A = av_clip_uint8(A);
1303  }
1304 
1305  Y -= y_offset;
1306  Y *= y_coeff;
1307  Y += ytemp;
1308  R = (unsigned)Y + V * v2r_coe;
1309  G = (unsigned)Y + V * v2g_coe + U * u2g_coe;
1310  B = (unsigned)Y + U * u2b_coe;
1311  yuv2rgb_write_full(c, dest, i, R, A, G, B, y, target, hasAlpha, err);
1312  dest += step;
1313  }
1314  c->dither_error[0][i] = err[0];
1315  c->dither_error[1][i] = err[1];
1316  c->dither_error[2][i] = err[2];
1317 }
1318 
1319 static void
1321  const int16_t *ubuf[2], const int16_t *vbuf[2],
1322  const int16_t *abuf0, uint8_t *dest, int dstW,
1323  int uvalpha, int y, enum AVPixelFormat target,
1324  int hasAlpha)
1325 {
1326  const int16_t *ubuf0 = ubuf[0], *vbuf0 = vbuf[0];
1327  int i, B, G, R, A;
1328  int step = (target == AV_PIX_FMT_RGB24 || target == AV_PIX_FMT_BGR24) ? 3 : 4;
1329  int err[4] = {0};
1330  int ytemp = 1 << 21;
1331  int bias_int = 64;
1332  int len = dstW - 7;
1333  __m128i y_temp = __lsx_vreplgr2vr_w(ytemp);
1335 
1336  if( target == AV_PIX_FMT_BGR4_BYTE || target == AV_PIX_FMT_RGB4_BYTE
1337  || target == AV_PIX_FMT_BGR8 || target == AV_PIX_FMT_RGB8)
1338  step = 1;
1339  if (uvalpha < 2048) {
1340  int uvtemp = 128 << 7;
1341  __m128i uv = __lsx_vreplgr2vr_w(uvtemp);
1342  __m128i bias = __lsx_vreplgr2vr_w(bias_int);
1343 
1344  for (i = 0; i < len; i += 8) {
1345  __m128i b, ub, vb, ub_l, ub_h, vb_l, vb_h;
1346  __m128i y_l, y_h, u_l, u_h, v_l, v_h;
1347  __m128i R_l, R_h, G_l, G_h, B_l, B_h;
1348  int n = i << 1;
1349 
1350  DUP2_ARG2(__lsx_vldx, buf0, n, ubuf0, n, b, ub);
1351  vb = __lsx_vldx(vbuf0, n);
1352  y_l = __lsx_vsllwil_w_h(b, 2);
1353  y_h = __lsx_vexth_w_h(b);
1354  DUP2_ARG2(__lsx_vsllwil_w_h, ub, 0, vb, 0, ub_l, vb_l);
1355  DUP2_ARG1(__lsx_vexth_w_h, ub, vb, ub_h, vb_h);
1356  y_h = __lsx_vslli_w(y_h, 2);
1357  u_l = __lsx_vsub_w(ub_l, uv);
1358  u_h = __lsx_vsub_w(ub_h, uv);
1359  v_l = __lsx_vsub_w(vb_l, uv);
1360  v_h = __lsx_vsub_w(vb_h, uv);
1361  u_l = __lsx_vslli_w(u_l, 2);
1362  u_h = __lsx_vslli_w(u_h, 2);
1363  v_l = __lsx_vslli_w(v_l, 2);
1364  v_h = __lsx_vslli_w(v_h, 2);
1365  YUVTORGB_LSX(y_l, u_l, v_l, R_l, G_l, B_l, offset, coeff,
1366  y_temp, v2r, v2g, u2g, u2b);
1367  YUVTORGB_LSX(y_h, u_h, v_h, R_h, G_h, B_h, offset, coeff,
1368  y_temp, v2r, v2g, u2g, u2b);
1369 
1370  if(hasAlpha) {
1371  __m128i a_src;
1372  __m128i a_l, a_h;
1373 
1374  a_src = __lsx_vld(abuf0 + i, 0);
1375  a_l = __lsx_vsllwil_w_h(a_src, 0);
1376  a_h = __lsx_vexth_w_h(a_src);
1377  a_l = __lsx_vadd_w(a_l, bias);
1378  a_h = __lsx_vadd_w(a_h, bias);
1379  a_l = __lsx_vsrai_w(a_l, 7);
1380  a_h = __lsx_vsrai_w(a_h, 7);
1381  WRITE_FULL_A_LSX(R_l, G_l, B_l, a_l, 0, 0);
1382  WRITE_FULL_A_LSX(R_l, G_l, B_l, a_l, 1, 1);
1383  WRITE_FULL_A_LSX(R_l, G_l, B_l, a_l, 2, 2);
1384  WRITE_FULL_A_LSX(R_l, G_l, B_l, a_l, 3, 3);
1385  WRITE_FULL_A_LSX(R_h, G_h, B_h, a_h, 0, 4);
1386  WRITE_FULL_A_LSX(R_h, G_h, B_h, a_h, 1, 5);
1387  WRITE_FULL_A_LSX(R_h, G_h, B_h, a_h, 2, 6);
1388  WRITE_FULL_A_LSX(R_h, G_h, B_h, a_h, 3, 7);
1389  } else {
1390  WRITE_FULL_LSX(R_l, G_l, B_l, 0, 0);
1391  WRITE_FULL_LSX(R_l, G_l, B_l, 1, 1);
1392  WRITE_FULL_LSX(R_l, G_l, B_l, 2, 2);
1393  WRITE_FULL_LSX(R_l, G_l, B_l, 3, 3);
1394  WRITE_FULL_LSX(R_h, G_h, B_h, 0, 4);
1395  WRITE_FULL_LSX(R_h, G_h, B_h, 1, 5);
1396  WRITE_FULL_LSX(R_h, G_h, B_h, 2, 6);
1397  WRITE_FULL_LSX(R_h, G_h, B_h, 3, 7);
1398  }
1399  }
1400  if (dstW - i >= 4) {
1401  __m128i b, ub, vb, ub_l, vb_l;
1402  __m128i y_l, u_l, v_l;
1403  __m128i R_l, G_l, B_l;
1404  int n = i << 1;
1405 
1406  DUP2_ARG2(__lsx_vldx, buf0, n, ubuf0, n, b, ub);
1407  vb = __lsx_vldx(vbuf0, n);
1408  y_l = __lsx_vsllwil_w_h(b, 0);
1409  DUP2_ARG2(__lsx_vsllwil_w_h, ub, 0, vb, 0, ub_l, vb_l);
1410  y_l = __lsx_vslli_w(y_l, 2);
1411  u_l = __lsx_vsub_w(ub_l, uv);
1412  v_l = __lsx_vsub_w(vb_l, uv);
1413  u_l = __lsx_vslli_w(u_l, 2);
1414  v_l = __lsx_vslli_w(v_l, 2);
1415  YUVTORGB_LSX(y_l, u_l, v_l, R_l, G_l, B_l, offset, coeff,
1416  y_temp, v2r, v2g, u2g, u2b);
1417 
1418  if(hasAlpha) {
1419  __m128i a_src, a_l;
1420 
1421  a_src = __lsx_vldx(abuf0, n);
1422  a_src = __lsx_vsllwil_w_h(a_src, 0);
1423  a_l = __lsx_vadd_w(bias, a_src);
1424  a_l = __lsx_vsrai_w(a_l, 7);
1425  WRITE_FULL_A_LSX(R_l, G_l, B_l, a_l, 0, 0);
1426  WRITE_FULL_A_LSX(R_l, G_l, B_l, a_l, 1, 1);
1427  WRITE_FULL_A_LSX(R_l, G_l, B_l, a_l, 2, 2);
1428  WRITE_FULL_A_LSX(R_l, G_l, B_l, a_l, 3, 3);
1429  } else {
1430  WRITE_FULL_LSX(R_l, G_l, B_l, 0, 0);
1431  WRITE_FULL_LSX(R_l, G_l, B_l, 1, 1);
1432  WRITE_FULL_LSX(R_l, G_l, B_l, 2, 2);
1433  WRITE_FULL_LSX(R_l, G_l, B_l, 3, 3);
1434  }
1435  i += 4;
1436  }
1437  for (; i < dstW; i++) {
1438  int Y = buf0[i] << 2;
1439  int U = (ubuf0[i] - uvtemp) << 2;
1440  int V = (vbuf0[i] - uvtemp) << 2;
1441 
1442  A = 0;
1443  if(hasAlpha) {
1444  A = (abuf0[i] + 64) >> 7;
1445  if (A & 0x100)
1446  A = av_clip_uint8(A);
1447  }
1448  Y -= y_offset;
1449  Y *= y_coeff;
1450  Y += ytemp;
1451  R = (unsigned)Y + V * v2r_coe;
1452  G = (unsigned)Y + V * v2g_coe + U * u2g_coe;
1453  B = (unsigned)Y + U * u2b_coe;
1454  yuv2rgb_write_full(c, dest, i, R, A, G, B, y, target, hasAlpha, err);
1455  dest += step;
1456  }
1457  } else {
1458  const int16_t *ubuf1 = ubuf[1], *vbuf1 = vbuf[1];
1459  int uvtemp = 128 << 8;
1460  __m128i uv = __lsx_vreplgr2vr_w(uvtemp);
1461  __m128i zero = __lsx_vldi(0);
1462  __m128i bias = __lsx_vreplgr2vr_h(bias_int);
1463 
1464  for (i = 0; i < len; i += 8) {
1465  __m128i b, ub0, ub1, vb0, vb1;
1466  __m128i y_ev, y_od, u_ev, u_od, v_ev, v_od;
1467  __m128i R_ev, R_od, G_ev, G_od, B_ev, B_od;
1468  int n = i << 1;
1469 
1470  DUP4_ARG2(__lsx_vldx, buf0, n, ubuf0, n, vbuf0, n,
1471  ubuf1, n, b, ub0, vb0, ub1);
1472  vb1 = __lsx_vldx(vbuf, n);
1473  y_ev = __lsx_vaddwev_w_h(b, zero);
1474  y_od = __lsx_vaddwod_w_h(b, zero);
1475  DUP2_ARG2(__lsx_vaddwev_w_h, ub0, vb0, ub1, vb1, u_ev, v_ev);
1476  DUP2_ARG2(__lsx_vaddwod_w_h, ub0, vb0, ub1, vb1, u_od, v_od);
1477  DUP2_ARG2(__lsx_vslli_w, y_ev, 2, y_od, 2, y_ev, y_od);
1478  DUP4_ARG2(__lsx_vsub_w, u_ev, uv, u_od, uv, v_ev, uv, v_od, uv,
1479  u_ev, u_od, v_ev, v_od);
1480  DUP4_ARG2(__lsx_vslli_w, u_ev, 1, u_od, 1, v_ev, 1, v_od, 1,
1481  u_ev, u_od, v_ev, v_od);
1482  YUVTORGB_LSX(y_ev, u_ev, v_ev, R_ev, G_ev, B_ev, offset, coeff,
1483  y_temp, v2r, v2g, u2g, u2b);
1484  YUVTORGB_LSX(y_od, u_od, v_od, R_od, G_od, B_od, offset, coeff,
1485  y_temp, v2r, v2g, u2g, u2b);
1486 
1487  if(hasAlpha) {
1488  __m128i a_src;
1489  __m128i a_ev, a_od;
1490 
1491  a_src = __lsx_vld(abuf0 + i, 0);
1492  a_ev = __lsx_vaddwev_w_h(bias, a_src);
1493  a_od = __lsx_vaddwod_w_h(bias, a_src);
1494  a_ev = __lsx_vsrai_w(a_ev, 7);
1495  a_od = __lsx_vsrai_w(a_od, 7);
1496  WRITE_FULL_A_LSX(R_ev, G_ev, B_ev, a_ev, 0, 0);
1497  WRITE_FULL_A_LSX(R_od, G_od, B_od, a_od, 0, 1);
1498  WRITE_FULL_A_LSX(R_ev, G_ev, B_ev, a_ev, 1, 2);
1499  WRITE_FULL_A_LSX(R_od, G_od, B_od, a_od, 1, 3);
1500  WRITE_FULL_A_LSX(R_ev, G_ev, B_ev, a_ev, 2, 4);
1501  WRITE_FULL_A_LSX(R_od, G_od, B_od, a_od, 2, 5);
1502  WRITE_FULL_A_LSX(R_ev, G_ev, B_ev, a_ev, 3, 6);
1503  WRITE_FULL_A_LSX(R_od, G_od, B_od, a_od, 3, 7);
1504  } else {
1505  WRITE_FULL_LSX(R_ev, G_ev, B_ev, 0, 0);
1506  WRITE_FULL_LSX(R_od, G_od, B_od, 0, 1);
1507  WRITE_FULL_LSX(R_ev, G_ev, B_ev, 1, 2);
1508  WRITE_FULL_LSX(R_od, G_od, B_od, 1, 3);
1509  WRITE_FULL_LSX(R_ev, G_ev, B_ev, 2, 4);
1510  WRITE_FULL_LSX(R_od, G_od, B_od, 2, 5);
1511  WRITE_FULL_LSX(R_ev, G_ev, B_ev, 3, 6);
1512  WRITE_FULL_LSX(R_od, G_od, B_od, 3, 7);
1513  }
1514  }
1515  if (dstW - i >= 4) {
1516  __m128i b, ub0, ub1, vb0, vb1;
1517  __m128i y_l, u_l, v_l;
1518  __m128i R_l, G_l, B_l;
1519  int n = i << 1;
1520 
1521  DUP4_ARG2(__lsx_vldx, buf0, n, ubuf0, n, vbuf0, n,
1522  ubuf1, n, b, ub0, vb0, ub1);
1523  vb1 = __lsx_vldx(vbuf1, n);
1524  y_l = __lsx_vsllwil_w_h(b, 0);
1525  y_l = __lsx_vslli_w(y_l, 2);
1526  DUP4_ARG2(__lsx_vsllwil_w_h, ub0, 0, vb0, 0, ub1, 0, vb1, 0,
1527  ub0, vb0, ub1, vb1);
1528  DUP2_ARG2(__lsx_vadd_w, ub0, ub1, vb0, vb1, u_l, v_l);
1529  u_l = __lsx_vsub_w(u_l, uv);
1530  v_l = __lsx_vsub_w(v_l, uv);
1531  u_l = __lsx_vslli_w(u_l, 1);
1532  v_l = __lsx_vslli_w(v_l, 1);
1533  YUVTORGB_LSX(y_l, u_l, v_l, R_l, G_l, B_l, offset, coeff,
1534  y_temp, v2r, v2g, u2g, u2b);
1535 
1536  if(hasAlpha) {
1537  __m128i a_src;
1538  __m128i a_l;
1539 
1540  a_src = __lsx_vld(abuf0 + i, 0);
1541  a_src = __lsx_vilvl_h(a_src, a_src);
1542  a_l = __lsx_vaddwev_w_h(bias, a_l);
1543  a_l = __lsx_vsrai_w(a_l, 7);
1544  WRITE_FULL_A_LSX(R_l, G_l, B_l, a_l, 0, 0);
1545  WRITE_FULL_A_LSX(R_l, G_l, B_l, a_l, 1, 1);
1546  WRITE_FULL_A_LSX(R_l, G_l, B_l, a_l, 2, 2);
1547  WRITE_FULL_A_LSX(R_l, G_l, B_l, a_l, 3, 3);
1548  } else {
1549  WRITE_FULL_LSX(R_l, G_l, B_l, 0, 0);
1550  WRITE_FULL_LSX(R_l, G_l, B_l, 1, 1);
1551  WRITE_FULL_LSX(R_l, G_l, B_l, 2, 2);
1552  WRITE_FULL_LSX(R_l, G_l, B_l, 3, 3);
1553  }
1554  i += 4;
1555  }
1556  for (; i < dstW; i++) {
1557  int Y = buf0[i] << 2;
1558  int U = (ubuf0[i] + ubuf1[i] - uvtemp) << 1;
1559  int V = (vbuf0[i] + vbuf1[i] - uvtemp) << 1;
1560 
1561  A = 0;
1562  if(hasAlpha) {
1563  A = (abuf0[i] + 64) >> 7;
1564  if (A & 0x100)
1565  A = av_clip_uint8(A);
1566  }
1567  Y -= y_offset;
1568  Y *= y_coeff;
1569  Y += ytemp;
1570  R = (unsigned)Y + V * v2r_coe;
1571  G = (unsigned)Y + V * v2g_coe + U * u2g_coe;
1572  B = (unsigned)Y + U * u2b_coe;
1573  yuv2rgb_write_full(c, dest, i, R, A, G, B, y, target, hasAlpha, err);
1574  dest += step;
1575  }
1576  }
1577  c->dither_error[0][i] = err[0];
1578  c->dither_error[1][i] = err[1];
1579  c->dither_error[2][i] = err[2];
1580 }
1581 
1582 #if CONFIG_SMALL
1583 YUV2RGBWRAPPER(yuv2, rgb_full, bgra32_full, AV_PIX_FMT_BGRA,
1584  CONFIG_SWSCALE_ALPHA && c->needAlpha)
1585 YUV2RGBWRAPPER(yuv2, rgb_full, abgr32_full, AV_PIX_FMT_ABGR,
1586  CONFIG_SWSCALE_ALPHA && c->needAlpha)
1587 YUV2RGBWRAPPER(yuv2, rgb_full, rgba32_full, AV_PIX_FMT_RGBA,
1588  CONFIG_SWSCALE_ALPHA && c->needAlpha)
1589 YUV2RGBWRAPPER(yuv2, rgb_full, argb32_full, AV_PIX_FMT_ARGB,
1590  CONFIG_SWSCALE_ALPHA && c->needAlpha)
1591 #else
1592 #if CONFIG_SWSCALE_ALPHA
1593 YUV2RGBWRAPPER(yuv2, rgb_full, bgra32_full, AV_PIX_FMT_BGRA, 1)
1594 YUV2RGBWRAPPER(yuv2, rgb_full, abgr32_full, AV_PIX_FMT_ABGR, 1)
1595 YUV2RGBWRAPPER(yuv2, rgb_full, rgba32_full, AV_PIX_FMT_RGBA, 1)
1596 YUV2RGBWRAPPER(yuv2, rgb_full, argb32_full, AV_PIX_FMT_ARGB, 1)
1597 #endif
1598 YUV2RGBWRAPPER(yuv2, rgb_full, bgrx32_full, AV_PIX_FMT_BGRA, 0)
1599 YUV2RGBWRAPPER(yuv2, rgb_full, xbgr32_full, AV_PIX_FMT_ABGR, 0)
1600 YUV2RGBWRAPPER(yuv2, rgb_full, rgbx32_full, AV_PIX_FMT_RGBA, 0)
1601 YUV2RGBWRAPPER(yuv2, rgb_full, xrgb32_full, AV_PIX_FMT_ARGB, 0)
1602 #endif
1603 YUV2RGBWRAPPER(yuv2, rgb_full, bgr24_full, AV_PIX_FMT_BGR24, 0)
1604 YUV2RGBWRAPPER(yuv2, rgb_full, rgb24_full, AV_PIX_FMT_RGB24, 0)
1605 
1606 YUV2RGBWRAPPER(yuv2, rgb_full, bgr4_byte_full, AV_PIX_FMT_BGR4_BYTE, 0)
1607 YUV2RGBWRAPPER(yuv2, rgb_full, rgb4_byte_full, AV_PIX_FMT_RGB4_BYTE, 0)
1608 YUV2RGBWRAPPER(yuv2, rgb_full, bgr8_full, AV_PIX_FMT_BGR8, 0)
1609 YUV2RGBWRAPPER(yuv2, rgb_full, rgb8_full, AV_PIX_FMT_RGB8, 0)
1610 
1611 
1613  yuv2planar1_fn *yuv2plane1,
1615  yuv2interleavedX_fn *yuv2nv12cX,
1616  yuv2packed1_fn *yuv2packed1,
1617  yuv2packed2_fn *yuv2packed2,
1618  yuv2packedX_fn *yuv2packedX,
1619  yuv2anyX_fn *yuv2anyX)
1620 {
1621  enum AVPixelFormat dstFormat = c->opts.dst_format;
1622 
1623  /* Add initialization once optimized */
1624  if (isSemiPlanarYUV(dstFormat) && isDataInHighBits(dstFormat)) {
1625  } else if (is16BPS(dstFormat)) {
1626  } else if (isNBPS(dstFormat)) {
1627  } else if (dstFormat == AV_PIX_FMT_GRAYF32BE) {
1628  } else if (dstFormat == AV_PIX_FMT_GRAYF32LE) {
1629  } else {
1630  *yuv2plane1 = yuv2plane1_8_lsx;
1632  }
1633 
1634  if(c->opts.flags & SWS_FULL_CHR_H_INT) {
1635  switch (c->opts.dst_format) {
1636  case AV_PIX_FMT_RGBA:
1637 #if CONFIG_SMALL
1638  c->yuv2packedX = yuv2rgba32_full_X_lsx;
1639  c->yuv2packed2 = yuv2rgba32_full_2_lsx;
1640  c->yuv2packed1 = yuv2rgba32_full_1_lsx;
1641 #else
1642 #if CONFIG_SWSCALE_ALPHA
1643  if (c->needAlpha) {
1644  c->yuv2packedX = yuv2rgba32_full_X_lsx;
1645  c->yuv2packed2 = yuv2rgba32_full_2_lsx;
1646  c->yuv2packed1 = yuv2rgba32_full_1_lsx;
1647  } else
1648 #endif /* CONFIG_SWSCALE_ALPHA */
1649  {
1650  c->yuv2packedX = yuv2rgbx32_full_X_lsx;
1651  c->yuv2packed2 = yuv2rgbx32_full_2_lsx;
1652  c->yuv2packed1 = yuv2rgbx32_full_1_lsx;
1653  }
1654 #endif /* !CONFIG_SMALL */
1655  break;
1656  case AV_PIX_FMT_ARGB:
1657 #if CONFIG_SMALL
1658  c->yuv2packedX = yuv2argb32_full_X_lsx;
1659  c->yuv2packed2 = yuv2argb32_full_2_lsx;
1660  c->yuv2packed1 = yuv2argb32_full_1_lsx;
1661 #else
1662 #if CONFIG_SWSCALE_ALPHA
1663  if (c->needAlpha) {
1664  c->yuv2packedX = yuv2argb32_full_X_lsx;
1665  c->yuv2packed2 = yuv2argb32_full_2_lsx;
1666  c->yuv2packed1 = yuv2argb32_full_1_lsx;
1667  } else
1668 #endif /* CONFIG_SWSCALE_ALPHA */
1669  {
1670  c->yuv2packedX = yuv2xrgb32_full_X_lsx;
1671  c->yuv2packed2 = yuv2xrgb32_full_2_lsx;
1672  c->yuv2packed1 = yuv2xrgb32_full_1_lsx;
1673  }
1674 #endif /* !CONFIG_SMALL */
1675  break;
1676  case AV_PIX_FMT_BGRA:
1677 #if CONFIG_SMALL
1678  c->yuv2packedX = yuv2bgra32_full_X_lsx;
1679  c->yuv2packed2 = yuv2bgra32_full_2_lsx;
1680  c->yuv2packed1 = yuv2bgra32_full_1_lsx;
1681 #else
1682 #if CONFIG_SWSCALE_ALPHA
1683  if (c->needAlpha) {
1684  c->yuv2packedX = yuv2bgra32_full_X_lsx;
1685  c->yuv2packed2 = yuv2bgra32_full_2_lsx;
1686  c->yuv2packed1 = yuv2bgra32_full_1_lsx;
1687  } else
1688 #endif /* CONFIG_SWSCALE_ALPHA */
1689  {
1690  c->yuv2packedX = yuv2bgrx32_full_X_lsx;
1691  c->yuv2packed2 = yuv2bgrx32_full_2_lsx;
1692  c->yuv2packed1 = yuv2bgrx32_full_1_lsx;
1693  }
1694 #endif /* !CONFIG_SMALL */
1695  break;
1696  case AV_PIX_FMT_ABGR:
1697 #if CONFIG_SMALL
1698  c->yuv2packedX = yuv2abgr32_full_X_lsx;
1699  c->yuv2packed2 = yuv2abgr32_full_2_lsx;
1700  c->yuv2packed1 = yuv2abgr32_full_1_lsx;
1701 #else
1702 #if CONFIG_SWSCALE_ALPHA
1703  if (c->needAlpha) {
1704  c->yuv2packedX = yuv2abgr32_full_X_lsx;
1705  c->yuv2packed2 = yuv2abgr32_full_2_lsx;
1706  c->yuv2packed1 = yuv2abgr32_full_1_lsx;
1707  } else
1708 #endif /* CONFIG_SWSCALE_ALPHA */
1709  {
1710  c->yuv2packedX = yuv2xbgr32_full_X_lsx;
1711  c->yuv2packed2 = yuv2xbgr32_full_2_lsx;
1712  c->yuv2packed1 = yuv2xbgr32_full_1_lsx;
1713  }
1714 #endif /* !CONFIG_SMALL */
1715  break;
1716  case AV_PIX_FMT_RGB24:
1717  c->yuv2packedX = yuv2rgb24_full_X_lsx;
1718  c->yuv2packed2 = yuv2rgb24_full_2_lsx;
1719  c->yuv2packed1 = yuv2rgb24_full_1_lsx;
1720  break;
1721  case AV_PIX_FMT_BGR24:
1722  c->yuv2packedX = yuv2bgr24_full_X_lsx;
1723  c->yuv2packed2 = yuv2bgr24_full_2_lsx;
1724  c->yuv2packed1 = yuv2bgr24_full_1_lsx;
1725  break;
1726  case AV_PIX_FMT_BGR4_BYTE:
1727  c->yuv2packedX = yuv2bgr4_byte_full_X_lsx;
1728  c->yuv2packed2 = yuv2bgr4_byte_full_2_lsx;
1729  c->yuv2packed1 = yuv2bgr4_byte_full_1_lsx;
1730  break;
1731  case AV_PIX_FMT_RGB4_BYTE:
1732  c->yuv2packedX = yuv2rgb4_byte_full_X_lsx;
1733  c->yuv2packed2 = yuv2rgb4_byte_full_2_lsx;
1734  c->yuv2packed1 = yuv2rgb4_byte_full_1_lsx;
1735  break;
1736  case AV_PIX_FMT_BGR8:
1737  c->yuv2packedX = yuv2bgr8_full_X_lsx;
1738  c->yuv2packed2 = yuv2bgr8_full_2_lsx;
1739  c->yuv2packed1 = yuv2bgr8_full_1_lsx;
1740  break;
1741  case AV_PIX_FMT_RGB8:
1742  c->yuv2packedX = yuv2rgb8_full_X_lsx;
1743  c->yuv2packed2 = yuv2rgb8_full_2_lsx;
1744  c->yuv2packed1 = yuv2rgb8_full_1_lsx;
1745  break;
1746  }
1747  } else {
1748  switch (c->opts.dst_format) {
1749  case AV_PIX_FMT_RGB32:
1750  case AV_PIX_FMT_BGR32:
1751 #if CONFIG_SMALL
1752 #else
1753 #if CONFIG_SWSCALE_ALPHA
1754  if (c->needAlpha) {
1755  } else
1756 #endif /* CONFIG_SWSCALE_ALPHA */
1757  {
1758  c->yuv2packed1 = yuv2rgbx32_1_lsx;
1759  c->yuv2packed2 = yuv2rgbx32_2_lsx;
1760  c->yuv2packedX = yuv2rgbx32_X_lsx;
1761  }
1762 #endif /* !CONFIG_SMALL */
1763  break;
1764  case AV_PIX_FMT_RGB32_1:
1765  case AV_PIX_FMT_BGR32_1:
1766 #if CONFIG_SMALL
1767 #else
1768 #if CONFIG_SWSCALE_ALPHA
1769  if (c->needAlpha) {
1770  } else
1771 #endif /* CONFIG_SWSCALE_ALPHA */
1772  {
1773  c->yuv2packed1 = yuv2rgbx32_1_1_lsx;
1774  c->yuv2packed2 = yuv2rgbx32_1_2_lsx;
1775  c->yuv2packedX = yuv2rgbx32_1_X_lsx;
1776  }
1777 #endif /* !CONFIG_SMALL */
1778  break;
1779  case AV_PIX_FMT_RGB24:
1780  c->yuv2packed1 = yuv2rgb24_1_lsx;
1781  c->yuv2packed2 = yuv2rgb24_2_lsx;
1782  c->yuv2packedX = yuv2rgb24_X_lsx;
1783  break;
1784  case AV_PIX_FMT_BGR24:
1785  c->yuv2packed1 = yuv2bgr24_1_lsx;
1786  c->yuv2packed2 = yuv2bgr24_2_lsx;
1787  c->yuv2packedX = yuv2bgr24_X_lsx;
1788  break;
1789  case AV_PIX_FMT_RGB565LE:
1790  case AV_PIX_FMT_RGB565BE:
1791  case AV_PIX_FMT_BGR565LE:
1792  case AV_PIX_FMT_BGR565BE:
1793  c->yuv2packed1 = yuv2rgb16_1_lsx;
1794  c->yuv2packed2 = yuv2rgb16_2_lsx;
1795  c->yuv2packedX = yuv2rgb16_X_lsx;
1796  break;
1797  case AV_PIX_FMT_RGB555LE:
1798  case AV_PIX_FMT_RGB555BE:
1799  case AV_PIX_FMT_BGR555LE:
1800  case AV_PIX_FMT_BGR555BE:
1801  c->yuv2packed1 = yuv2rgb15_1_lsx;
1802  c->yuv2packed2 = yuv2rgb15_2_lsx;
1803  c->yuv2packedX = yuv2rgb15_X_lsx;
1804  break;
1805  case AV_PIX_FMT_RGB444LE:
1806  case AV_PIX_FMT_RGB444BE:
1807  case AV_PIX_FMT_BGR444LE:
1808  case AV_PIX_FMT_BGR444BE:
1809  c->yuv2packed1 = yuv2rgb12_1_lsx;
1810  c->yuv2packed2 = yuv2rgb12_2_lsx;
1811  c->yuv2packedX = yuv2rgb12_X_lsx;
1812  break;
1813  case AV_PIX_FMT_RGB8:
1814  case AV_PIX_FMT_BGR8:
1815  c->yuv2packed1 = yuv2rgb8_1_lsx;
1816  c->yuv2packed2 = yuv2rgb8_2_lsx;
1817  c->yuv2packedX = yuv2rgb8_X_lsx;
1818  break;
1819  case AV_PIX_FMT_RGB4:
1820  case AV_PIX_FMT_BGR4:
1821  c->yuv2packed1 = yuv2rgb4_1_lsx;
1822  c->yuv2packed2 = yuv2rgb4_2_lsx;
1823  c->yuv2packedX = yuv2rgb4_X_lsx;
1824  break;
1825  case AV_PIX_FMT_RGB4_BYTE:
1826  case AV_PIX_FMT_BGR4_BYTE:
1827  c->yuv2packed1 = yuv2rgb4b_1_lsx;
1828  c->yuv2packed2 = yuv2rgb4b_2_lsx;
1829  c->yuv2packedX = yuv2rgb4b_X_lsx;
1830  break;
1831  }
1832  }
1833 }
A
#define A(x)
Definition: vpx_arith.h:28
yuv2planar1_fn
void(* yuv2planar1_fn)(const int16_t *src, uint8_t *dest, int dstW, const uint8_t *dither, int offset)
Write one line of horizontally scaled data to planar output without any additional vertical scaling (...
Definition: swscale_internal.h:108
AVPixelFormat
AVPixelFormat
Pixel format.
Definition: pixfmt.h:71
SWS_DITHER_AUTO
@ SWS_DITHER_AUTO
Definition: swscale.h:81
av_clip
#define av_clip
Definition: common.h:100
ff_dither_4x4_16
const uint8_t ff_dither_4x4_16[][8]
Definition: output.c:51
r
const char * r
Definition: vf_curves.c:127
AV_PIX_FMT_BGR32
#define AV_PIX_FMT_BGR32
Definition: pixfmt.h:490
AV_PIX_FMT_RGB444LE
@ AV_PIX_FMT_RGB444LE
packed RGB 4:4:4, 16bpp, (msb)4X 4R 4G 4B(lsb), little-endian, X=unused/undefined
Definition: pixfmt.h:136
u
#define u(width, name, range_min, range_max)
Definition: cbs_h2645.c:251
ff_dither_8x8_32
const uint8_t ff_dither_8x8_32[][8]
Definition: output.c:59
av_clip_uintp2
#define av_clip_uintp2
Definition: common.h:124
WRITE_FULL_A_LSX
#define WRITE_FULL_A_LSX(r, g, b, a, t1, s)
Definition: output_lsx.c:930
A_DITHER
#define A_DITHER(u, v)
step
trying all byte sequences megabyte in length and selecting the best looking sequence will yield cases to try But a word about which is also called distortion Distortion can be quantified by almost any quality measurement one chooses the sum of squared differences is used but more complex methods that consider psychovisual effects can be used as well It makes no difference in this discussion First step
Definition: rate_distortion.txt:58
b
#define b
Definition: input.c:41
yuv2planeX
static void FUNC() yuv2planeX(const int16_t *filter, int filterSize, const int16_t **src, uint8_t *dest, int dstW, const uint8_t *dither, int offset)
Definition: swscale_ppc_template.c:84
R
#define R
Definition: huffyuv.h:44
AV_PIX_FMT_RGB32_1
#define AV_PIX_FMT_RGB32_1
Definition: pixfmt.h:489
AV_PIX_FMT_BGR24
@ AV_PIX_FMT_BGR24
packed RGB 8:8:8, 24bpp, BGRBGR...
Definition: pixfmt.h:76
AV_PIX_FMT_BGRA
@ AV_PIX_FMT_BGRA
packed BGRA 8:8:8:8, 32bpp, BGRABGRA...
Definition: pixfmt.h:102
yuv2rgb_full_2_template_lsx
static void yuv2rgb_full_2_template_lsx(SwsInternal *c, const int16_t *buf[2], const int16_t *ubuf[2], const int16_t *vbuf[2], const int16_t *abuf[2], uint8_t *dest, int dstW, int yalpha, int uvalpha, int y, enum AVPixelFormat target, int hasAlpha)
Definition: output_lsx.c:1131
DUP2_ARG2
#define DUP2_ARG2(_INS, _IN0, _IN1, _IN2, _IN3, _OUT0, _OUT1)
Definition: loongson_intrinsics.h:58
A2
@ A2
Definition: mvs.c:525
AV_PIX_FMT_GRAYF32LE
@ AV_PIX_FMT_GRAYF32LE
IEEE-754 single precision Y, 32bpp, little-endian.
Definition: pixfmt.h:364
YUVTORGB_SETUP_LSX
#define YUVTORGB_SETUP_LSX
Definition: output_lsx.c:904
AV_PIX_FMT_RGB555BE
@ AV_PIX_FMT_RGB555BE
packed RGB 5:5:5, 16bpp, (msb)1X 5R 5G 5B(lsb), big-endian , X=unused/undefined
Definition: pixfmt.h:114
is16BPS
static av_always_inline int is16BPS(enum AVPixelFormat pix_fmt)
Definition: swscale_internal.h:727
rgb
Definition: rpzaenc.c:60
b1
static double b1(void *priv, double x, double y)
Definition: vf_xfade.c:2034
ub
#define ub(width, name)
Definition: cbs_h2645.c:401
yuv2rgb_2_template_lsx
static void yuv2rgb_2_template_lsx(SwsInternal *c, const int16_t *buf[2], const int16_t *ubuf[2], const int16_t *vbuf[2], const int16_t *abuf[2], uint8_t *dest, int dstW, int yalpha, int uvalpha, int y, enum AVPixelFormat target, int hasAlpha)
Definition: output_lsx.c:479
swscale_loongarch.h
isNBPS
static av_always_inline int isNBPS(enum AVPixelFormat pix_fmt)
Definition: swscale_internal.h:741
SWS_DITHER_X_DITHER
@ SWS_DITHER_X_DITHER
Definition: swscale.h:85
AV_PIX_FMT_BGR8
@ AV_PIX_FMT_BGR8
packed RGB 3:3:2, 8bpp, (msb)2B 3G 3R(lsb)
Definition: pixfmt.h:90
av_cold
#define av_cold
Definition: attributes.h:90
r_b
#define r_b
YUVRGB_TABLE_HEADROOM
#define YUVRGB_TABLE_HEADROOM
Definition: swscale_internal.h:47
DUP4_ARG2
#define DUP4_ARG2(_INS, _IN0, _IN1, _IN2, _IN3, _IN4, _IN5, _IN6, _IN7, _OUT0, _OUT1, _OUT2, _OUT3)
Definition: loongson_intrinsics.h:76
yuv2packed2_fn
void(* yuv2packed2_fn)(SwsInternal *c, const int16_t *lumSrc[2], const int16_t *chrUSrc[2], const int16_t *chrVSrc[2], const int16_t *alpSrc[2], uint8_t *dest, int dstW, int yalpha, int uvalpha, int y)
Write one line of horizontally scaled Y/U/V/A to packed-pixel YUV/RGB output by doing bilinear scalin...
Definition: swscale_internal.h:214
YUVTORGB_LSX
#define YUVTORGB_LSX(y, u, v, R, G, B, offset, coeff, y_temp, v2r, v2g, u2g, u2b)
Definition: output_lsx.c:918
g
const char * g
Definition: vf_curves.c:128
B
#define B
Definition: huffyuv.h:42
ff_dither_2x2_4
const uint8_t ff_dither_2x2_4[][8]
Definition: output.c:39
ff_dither_8x8_220
const uint8_t ff_dither_8x8_220[][8]
Definition: output.c:84
AV_PIX_FMT_RGB4
@ AV_PIX_FMT_RGB4
packed RGB 1:2:1 bitstream, 4bpp, (msb)1R 2G 1B(lsb), a byte contains two pixels, the first pixel in ...
Definition: pixfmt.h:94
AV_PIX_FMT_BGR32_1
#define AV_PIX_FMT_BGR32_1
Definition: pixfmt.h:491
AV_PIX_FMT_RGBA
@ AV_PIX_FMT_RGBA
packed RGBA 8:8:8:8, 32bpp, RGBARGBA...
Definition: pixfmt.h:100
isSemiPlanarYUV
static av_always_inline int isSemiPlanarYUV(enum AVPixelFormat pix_fmt)
Definition: swscale_internal.h:773
AV_PIX_FMT_RGB565LE
@ AV_PIX_FMT_RGB565LE
packed RGB 5:6:5, 16bpp, (msb) 5R 6G 5B(lsb), little-endian
Definition: pixfmt.h:113
yuv2plane1_8_lsx
void yuv2plane1_8_lsx(const int16_t *src, uint8_t *dest, int dstW, const uint8_t *dither, int offset)
NULL
#define NULL
Definition: coverity.c:32
X_DITHER
#define X_DITHER(u, v)
bias
static int bias(int x, int c)
Definition: vqcdec.c:115
yuv2rgb_1_template_lsx
static void yuv2rgb_1_template_lsx(SwsInternal *c, const int16_t *buf0, const int16_t *ubuf[2], const int16_t *vbuf[2], const int16_t *abuf0, uint8_t *dest, int dstW, int uvalpha, int y, enum AVPixelFormat target, int hasAlpha)
Definition: output_lsx.c:586
V
#define V
Definition: avdct.c:31
AV_PIX_FMT_BGR565LE
@ AV_PIX_FMT_BGR565LE
packed BGR 5:6:5, 16bpp, (msb) 5B 6G 5R(lsb), little-endian
Definition: pixfmt.h:118
yuv2rgb_write_full
static av_always_inline void yuv2rgb_write_full(SwsInternal *c, uint8_t *dest, int i, int R, int A, int G, int B, int y, enum AVPixelFormat target, int hasAlpha, int err[4])
Definition: output_lsx.c:772
YUV2RGBWRAPPER
#define YUV2RGBWRAPPER(name, base, ext, fmt, hasAlpha)
Definition: output_lsx.c:744
AV_PIX_FMT_RGB8
@ AV_PIX_FMT_RGB8
packed RGB 3:3:2, 8bpp, (msb)3R 3G 2B(lsb)
Definition: pixfmt.h:93
AV_PIX_FMT_BGR4
@ AV_PIX_FMT_BGR4
packed RGB 1:2:1 bitstream, 4bpp, (msb)1B 2G 1R(lsb), a byte contains two pixels, the first pixel in ...
Definition: pixfmt.h:91
b_r
#define b_r
ff_sws_init_output_lsx
av_cold void ff_sws_init_output_lsx(SwsInternal *c, yuv2planar1_fn *yuv2plane1, yuv2planarX_fn *yuv2planeX, yuv2interleavedX_fn *yuv2nv12cX, yuv2packed1_fn *yuv2packed1, yuv2packed2_fn *yuv2packed2, yuv2packedX_fn *yuv2packedX, yuv2anyX_fn *yuv2anyX)
Definition: output_lsx.c:1612
AV_PIX_FMT_BGR555BE
@ AV_PIX_FMT_BGR555BE
packed BGR 5:5:5, 16bpp, (msb)1X 5B 5G 5R(lsb), big-endian , X=unused/undefined
Definition: pixfmt.h:119
AV_PIX_FMT_ABGR
@ AV_PIX_FMT_ABGR
packed ABGR 8:8:8:8, 32bpp, ABGRABGR...
Definition: pixfmt.h:101
c
Undefined Behavior In the C some operations are like signed integer dereferencing freed accessing outside allocated Undefined Behavior must not occur in a C it is not safe even if the output of undefined operations is unused The unsafety may seem nit picking but Optimizing compilers have in fact optimized code on the assumption that no undefined Behavior occurs Optimizing code based on wrong assumptions can and has in some cases lead to effects beyond the output of computations The signed integer overflow problem in speed critical code Code which is highly optimized and works with signed integers sometimes has the problem that often the output of the computation does not c
Definition: undefined.txt:32
AV_PIX_FMT_BGR4_BYTE
@ AV_PIX_FMT_BGR4_BYTE
packed RGB 1:2:1, 8bpp, (msb)1B 2G 1R(lsb)
Definition: pixfmt.h:92
isDataInHighBits
static av_always_inline int isDataInHighBits(enum AVPixelFormat pix_fmt)
Definition: swscale_internal.h:936
yuv2packedX_fn
void(* yuv2packedX_fn)(SwsInternal *c, const int16_t *lumFilter, const int16_t **lumSrc, int lumFilterSize, const int16_t *chrFilter, const int16_t **chrUSrc, const int16_t **chrVSrc, int chrFilterSize, const int16_t **alpSrc, uint8_t *dest, int dstW, int y)
Write one line of horizontally scaled Y/U/V/A to packed-pixel YUV/RGB output by doing multi-point ver...
Definition: swscale_internal.h:246
DUP4_ARG1
#define DUP4_ARG1(_INS, _IN0, _IN1, _IN2, _IN3, _OUT0, _OUT1, _OUT2, _OUT3)
Definition: loongson_intrinsics.h:70
AV_PIX_FMT_RGB24
@ AV_PIX_FMT_RGB24
packed RGB 8:8:8, 24bpp, RGBRGB...
Definition: pixfmt.h:75
DUP2_ARG1
#define DUP2_ARG1(_INS, _IN0, _IN1, _OUT0, _OUT1)
Definition: loongson_intrinsics.h:52
A1
@ A1
Definition: mvs.c:524
AV_PIX_FMT_RGB444BE
@ AV_PIX_FMT_RGB444BE
packed RGB 4:4:4, 16bpp, (msb)4X 4R 4G 4B(lsb), big-endian, X=unused/undefined
Definition: pixfmt.h:137
WRITE_FULL_LSX
#define WRITE_FULL_LSX(r, g, b, t1, s)
Definition: output_lsx.c:942
AV_PIX_FMT_BGR555
#define AV_PIX_FMT_BGR555
Definition: pixfmt.h:508
DUP2_ARG3
#define DUP2_ARG3(_INS, _IN0, _IN1, _IN2, _IN3, _IN4, _IN5, _OUT0, _OUT1)
Definition: loongson_intrinsics.h:64
yuv2planeX_8_lsx
void yuv2planeX_8_lsx(const int16_t *filter, int filterSize, const int16_t **src, uint8_t *dest, int dstW, const uint8_t *dither, int offset)
AV_PIX_FMT_BGR444BE
@ AV_PIX_FMT_BGR444BE
packed BGR 4:4:4, 16bpp, (msb)4X 4B 4G 4R(lsb), big-endian, X=unused/undefined
Definition: pixfmt.h:139
WRITE_YUV2RGB_LSX
#define WRITE_YUV2RGB_LSX(vec_y1, vec_y2, vec_u, vec_v, t1, t2, t3, t4)
Definition: output_lsx.c:136
AV_PIX_FMT_RGB32
#define AV_PIX_FMT_RGB32
Definition: pixfmt.h:488
a0
static double a0(void *priv, double x, double y)
Definition: vf_xfade.c:2028
AV_PIX_FMT_BGR565BE
@ AV_PIX_FMT_BGR565BE
packed BGR 5:6:5, 16bpp, (msb) 5B 6G 5R(lsb), big-endian
Definition: pixfmt.h:117
offset
it s the only field you need to keep assuming you have a context There is some magic you don t need to care about around this just let it vf offset
Definition: writing_filters.txt:86
ff_dither_8x8_73
const uint8_t ff_dither_8x8_73[][8]
Definition: output.c:71
zero
static int zero(InterplayACMContext *s, unsigned ind, unsigned col)
Definition: interplayacm.c:121
Y
#define Y
Definition: boxblur.h:37
yuv2anyX_fn
void(* yuv2anyX_fn)(SwsInternal *c, const int16_t *lumFilter, const int16_t **lumSrc, int lumFilterSize, const int16_t *chrFilter, const int16_t **chrUSrc, const int16_t **chrVSrc, int chrFilterSize, const int16_t **alpSrc, uint8_t **dest, int dstW, int y)
Write one line of horizontally scaled Y/U/V/A to YUV/RGB output by doing multi-point vertical scaling...
Definition: swscale_internal.h:280
AV_PIX_FMT_ARGB
@ AV_PIX_FMT_ARGB
packed ARGB 8:8:8:8, 32bpp, ARGBARGB...
Definition: pixfmt.h:99
av_assert2
#define av_assert2(cond)
assert() equivalent, that does lie in speed critical code.
Definition: avassert.h:67
AV_PIX_FMT_RGB555LE
@ AV_PIX_FMT_RGB555LE
packed RGB 5:5:5, 16bpp, (msb)1X 5R 5G 5B(lsb), little-endian, X=unused/undefined
Definition: pixfmt.h:115
i
#define i(width, name, range_min, range_max)
Definition: cbs_h2645.c:256
yuv2rgb_full_1_template_lsx
static void yuv2rgb_full_1_template_lsx(SwsInternal *c, const int16_t *buf0, const int16_t *ubuf[2], const int16_t *vbuf[2], const int16_t *abuf0, uint8_t *dest, int dstW, int uvalpha, int y, enum AVPixelFormat target, int hasAlpha)
Definition: output_lsx.c:1320
AV_PIX_FMT_BGR444
#define AV_PIX_FMT_BGR444
Definition: pixfmt.h:509
yuv2rgb_X_template_lsx
static void yuv2rgb_X_template_lsx(SwsInternal *c, const int16_t *lumFilter, const int16_t **lumSrc, int lumFilterSize, const int16_t *chrFilter, const int16_t **chrUSrc, const int16_t **chrVSrc, int chrFilterSize, const int16_t **alpSrc, uint8_t *dest, int dstW, int y, enum AVPixelFormat target, int hasAlpha)
Definition: output_lsx.c:151
AV_PIX_FMT_RGB555
#define AV_PIX_FMT_RGB555
Definition: pixfmt.h:503
av_always_inline
#define av_always_inline
Definition: attributes.h:49
yuv2interleavedX_fn
void(* yuv2interleavedX_fn)(enum AVPixelFormat dstFormat, const uint8_t *chrDither, const int16_t *chrFilter, int chrFilterSize, const int16_t **chrUSrc, const int16_t **chrVSrc, uint8_t *dest, int dstW)
Write one line of horizontally scaled chroma to interleaved output with multi-point vertical scaling ...
Definition: swscale_internal.h:144
len
int len
Definition: vorbis_enc_data.h:426
AV_PIX_FMT_BGR565
#define AV_PIX_FMT_BGR565
Definition: pixfmt.h:507
AV_PIX_FMT_RGB4_BYTE
@ AV_PIX_FMT_RGB4_BYTE
packed RGB 1:2:1, 8bpp, (msb)1R 2G 1B(lsb)
Definition: pixfmt.h:95
headroom
static int headroom(int *la)
Definition: nellymoser.c:106
AV_PIX_FMT_RGB565
#define AV_PIX_FMT_RGB565
Definition: pixfmt.h:502
yuv2rgb_full_X_template_lsx
static void yuv2rgb_full_X_template_lsx(SwsInternal *c, const int16_t *lumFilter, const int16_t **lumSrc, int lumFilterSize, const int16_t *chrFilter, const int16_t **chrUSrc, const int16_t **chrVSrc, int chrFilterSize, const int16_t **alpSrc, uint8_t *dest, int dstW, int y, enum AVPixelFormat target, int hasAlpha)
Definition: output_lsx.c:952
SWS_DITHER_ED
@ SWS_DITHER_ED
Definition: swscale.h:83
yuv2packed1_fn
void(* yuv2packed1_fn)(SwsInternal *c, const int16_t *lumSrc, const int16_t *chrUSrc[2], const int16_t *chrVSrc[2], const int16_t *alpSrc, uint8_t *dest, int dstW, int uvalpha, int y)
Write one line of horizontally scaled Y/U/V/A to packed-pixel YUV/RGB output without any additional v...
Definition: swscale_internal.h:181
SwsInternal
Definition: swscale_internal.h:317
AV_PIX_FMT_GRAYF32BE
@ AV_PIX_FMT_GRAYF32BE
IEEE-754 single precision Y, 32bpp, big-endian.
Definition: pixfmt.h:363
SWS_FULL_CHR_H_INT
@ SWS_FULL_CHR_H_INT
Perform full chroma upsampling when upscaling to RGB.
Definition: swscale.h:132
U
#define U(x)
Definition: vpx_arith.h:37
yuv2planarX_fn
void(* yuv2planarX_fn)(const int16_t *filter, int filterSize, const int16_t **src, uint8_t *dest, int dstW, const uint8_t *dither, int offset)
Write one line of horizontally scaled data to planar output with multi-point vertical scaling between...
Definition: swscale_internal.h:124
SWS_DITHER_A_DITHER
@ SWS_DITHER_A_DITHER
Definition: swscale.h:84
temp
else temp
Definition: vf_mcdeint.c:263
av_clip_uint8
#define av_clip_uint8
Definition: common.h:106
G
#define G
Definition: huffyuv.h:43
AV_PIX_FMT_RGB565BE
@ AV_PIX_FMT_RGB565BE
packed RGB 5:6:5, 16bpp, (msb) 5R 6G 5B(lsb), big-endian
Definition: pixfmt.h:112
loongson_intrinsics.h
AV_PIX_FMT_BGR555LE
@ AV_PIX_FMT_BGR555LE
packed BGR 5:5:5, 16bpp, (msb)1X 5B 5G 5R(lsb), little-endian, X=unused/undefined
Definition: pixfmt.h:120
yuv2rgb_write
static av_always_inline void yuv2rgb_write(uint8_t *_dest, int i, int Y1, int Y2, unsigned A1, unsigned A2, const void *_r, const void *_g, const void *_b, int y, enum AVPixelFormat target, int hasAlpha)
Definition: output_lsx.c:28
coeff
static const double coeff[2][5]
Definition: vf_owdenoise.c:80
b0
static double b0(void *priv, double x, double y)
Definition: vf_xfade.c:2033
a1
static double a1(void *priv, double x, double y)
Definition: vf_xfade.c:2029
d128
const uint8_t * d128
Definition: yuv2rgb.c:458
AV_PIX_FMT_BGR444LE
@ AV_PIX_FMT_BGR444LE
packed BGR 4:4:4, 16bpp, (msb)4X 4B 4G 4R(lsb), little-endian, X=unused/undefined
Definition: pixfmt.h:138
yuv2rgb
static void yuv2rgb(uint8_t *out, int ridx, int Y, int U, int V)
Definition: g2meet.c:263
ff_dither_2x2_8
const uint8_t ff_dither_2x2_8[][8]
Definition: output.c:45
AV_PIX_FMT_RGB444
#define AV_PIX_FMT_RGB444
Definition: pixfmt.h:504