FFmpeg
qpeldsp_init.c
Go to the documentation of this file.
1 /*
2  * quarterpel DSP functions
3  * Copyright (c) 2000, 2001 Fabrice Bellard
4  * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at>
5  *
6  * This file is part of FFmpeg.
7  *
8  * FFmpeg is free software; you can redistribute it and/or
9  * modify it under the terms of the GNU Lesser General Public
10  * License as published by the Free Software Foundation; either
11  * version 2.1 of the License, or (at your option) any later version.
12  *
13  * FFmpeg is distributed in the hope that it will be useful,
14  * but WITHOUT ANY WARRANTY; without even the implied warranty of
15  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16  * Lesser General Public License for more details.
17  *
18  * You should have received a copy of the GNU Lesser General Public
19  * License along with FFmpeg; if not, write to the Free Software
20  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
21  */
22 
23 #include <stddef.h>
24 #include <stdint.h>
25 
26 #include "config.h"
27 #include "libavutil/attributes.h"
28 #include "libavutil/cpu.h"
29 #include "libavutil/x86/cpu.h"
30 #include "libavcodec/pixels.h"
31 #include "libavcodec/qpeldsp.h"
32 #include "fpel.h"
33 
34 void ff_put_pixels8_l2_mmxext(uint8_t *dst,
35  const uint8_t *src1, const uint8_t *src2,
36  int dstStride, int src1Stride, int h);
38  const uint8_t *src1, const uint8_t *src2,
39  int dstStride, int src1Stride, int h);
40 void ff_avg_pixels8_l2_mmxext(uint8_t *dst,
41  const uint8_t *src1, const uint8_t *src2,
42  int dstStride, int src1Stride, int h);
43 void ff_put_pixels16_l2_mmxext(uint8_t *dst,
44  const uint8_t *src1, const uint8_t *src2,
45  int dstStride, int src1Stride, int h);
46 void ff_avg_pixels16_l2_mmxext(uint8_t *dst,
47  const uint8_t *src1, const uint8_t *src2,
48  int dstStride, int src1Stride, int h);
50  const uint8_t *src1, const uint8_t *src2,
51  int dstStride, int src1Stride, int h);
52 void ff_put_mpeg4_qpel16_h_lowpass_mmxext(uint8_t *dst, const uint8_t *src,
53  int dstStride, int srcStride, int h);
54 void ff_avg_mpeg4_qpel16_h_lowpass_mmxext(uint8_t *dst, const uint8_t *src,
55  int dstStride, int srcStride, int h);
57  const uint8_t *src,
58  int dstStride, int srcStride,
59  int h);
60 void ff_put_mpeg4_qpel8_h_lowpass_mmxext(uint8_t *dst, const uint8_t *src,
61  int dstStride, int srcStride, int h);
62 void ff_avg_mpeg4_qpel8_h_lowpass_mmxext(uint8_t *dst, const uint8_t *src,
63  int dstStride, int srcStride, int h);
65  const uint8_t *src,
66  int dstStride, int srcStride,
67  int h);
68 void ff_put_mpeg4_qpel16_v_lowpass_mmxext(uint8_t *dst, const uint8_t *src,
69  int dstStride, int srcStride);
70 void ff_avg_mpeg4_qpel16_v_lowpass_mmxext(uint8_t *dst, const uint8_t *src,
71  int dstStride, int srcStride);
73  const uint8_t *src,
74  int dstStride, int srcStride);
75 void ff_put_mpeg4_qpel8_v_lowpass_mmxext(uint8_t *dst, const uint8_t *src,
76  int dstStride, int srcStride);
77 void ff_avg_mpeg4_qpel8_v_lowpass_mmxext(uint8_t *dst, const uint8_t *src,
78  int dstStride, int srcStride);
80  const uint8_t *src,
81  int dstStride, int srcStride);
82 
83 #if HAVE_X86ASM
84 
85 #define QPEL_OP(OPNAME, RND, MMX) \
86 static void OPNAME ## qpel8_mc10_ ## MMX(uint8_t *dst, \
87  const uint8_t *src, \
88  ptrdiff_t stride) \
89 { \
90  uint64_t temp[8]; \
91  uint8_t *const half = (uint8_t *) temp; \
92  ff_put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(half, src, 8, \
93  stride, 8); \
94  ff_ ## OPNAME ## pixels8_l2_ ## MMX(dst, src, half, \
95  stride, stride, 8); \
96 } \
97  \
98 static void OPNAME ## qpel8_mc20_ ## MMX(uint8_t *dst, \
99  const uint8_t *src, \
100  ptrdiff_t stride) \
101 { \
102  ff_ ## OPNAME ## mpeg4_qpel8_h_lowpass_ ## MMX(dst, src, stride, \
103  stride, 8); \
104 } \
105  \
106 static void OPNAME ## qpel8_mc30_ ## MMX(uint8_t *dst, \
107  const uint8_t *src, \
108  ptrdiff_t stride) \
109 { \
110  uint64_t temp[8]; \
111  uint8_t *const half = (uint8_t *) temp; \
112  ff_put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(half, src, 8, \
113  stride, 8); \
114  ff_ ## OPNAME ## pixels8_l2_ ## MMX(dst, src + 1, half, stride, \
115  stride, 8); \
116 } \
117  \
118 static void OPNAME ## qpel8_mc01_ ## MMX(uint8_t *dst, \
119  const uint8_t *src, \
120  ptrdiff_t stride) \
121 { \
122  uint64_t temp[8]; \
123  uint8_t *const half = (uint8_t *) temp; \
124  ff_put ## RND ## mpeg4_qpel8_v_lowpass_ ## MMX(half, src, \
125  8, stride); \
126  ff_ ## OPNAME ## pixels8_l2_ ## MMX(dst, src, half, \
127  stride, stride, 8); \
128 } \
129  \
130 static void OPNAME ## qpel8_mc02_ ## MMX(uint8_t *dst, \
131  const uint8_t *src, \
132  ptrdiff_t stride) \
133 { \
134  ff_ ## OPNAME ## mpeg4_qpel8_v_lowpass_ ## MMX(dst, src, \
135  stride, stride); \
136 } \
137  \
138 static void OPNAME ## qpel8_mc03_ ## MMX(uint8_t *dst, \
139  const uint8_t *src, \
140  ptrdiff_t stride) \
141 { \
142  uint64_t temp[8]; \
143  uint8_t *const half = (uint8_t *) temp; \
144  ff_put ## RND ## mpeg4_qpel8_v_lowpass_ ## MMX(half, src, \
145  8, stride); \
146  ff_ ## OPNAME ## pixels8_l2_ ## MMX(dst, src + stride, half, stride,\
147  stride, 8); \
148 } \
149  \
150 static void OPNAME ## qpel8_mc11_ ## MMX(uint8_t *dst, \
151  const uint8_t *src, \
152  ptrdiff_t stride) \
153 { \
154  uint64_t half[8 + 9]; \
155  uint8_t *const halfH = (uint8_t *) half + 64; \
156  uint8_t *const halfHV = (uint8_t *) half; \
157  ff_put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(halfH, src, 8, \
158  stride, 9); \
159  ff_put ## RND ## pixels8_l2_ ## MMX(halfH, src, halfH, 8, \
160  stride, 9); \
161  ff_put ## RND ## mpeg4_qpel8_v_lowpass_ ## MMX(halfHV, halfH, 8, 8);\
162  ff_ ## OPNAME ## pixels8_l2_ ## MMX(dst, halfH, halfHV, \
163  stride, 8, 8); \
164 } \
165  \
166 static void OPNAME ## qpel8_mc31_ ## MMX(uint8_t *dst, \
167  const uint8_t *src, \
168  ptrdiff_t stride) \
169 { \
170  uint64_t half[8 + 9]; \
171  uint8_t *const halfH = (uint8_t *) half + 64; \
172  uint8_t *const halfHV = (uint8_t *) half; \
173  ff_put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(halfH, src, 8, \
174  stride, 9); \
175  ff_put ## RND ## pixels8_l2_ ## MMX(halfH, src + 1, halfH, 8, \
176  stride, 9); \
177  ff_put ## RND ## mpeg4_qpel8_v_lowpass_ ## MMX(halfHV, halfH, 8, 8);\
178  ff_ ## OPNAME ## pixels8_l2_ ## MMX(dst, halfH, halfHV, \
179  stride, 8, 8); \
180 } \
181  \
182 static void OPNAME ## qpel8_mc13_ ## MMX(uint8_t *dst, \
183  const uint8_t *src, \
184  ptrdiff_t stride) \
185 { \
186  uint64_t half[8 + 9]; \
187  uint8_t *const halfH = (uint8_t *) half + 64; \
188  uint8_t *const halfHV = (uint8_t *) half; \
189  ff_put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(halfH, src, 8, \
190  stride, 9); \
191  ff_put ## RND ## pixels8_l2_ ## MMX(halfH, src, halfH, 8, \
192  stride, 9); \
193  ff_put ## RND ## mpeg4_qpel8_v_lowpass_ ## MMX(halfHV, halfH, 8, 8);\
194  ff_ ## OPNAME ## pixels8_l2_ ## MMX(dst, halfH + 8, halfHV, \
195  stride, 8, 8); \
196 } \
197  \
198 static void OPNAME ## qpel8_mc33_ ## MMX(uint8_t *dst, \
199  const uint8_t *src, \
200  ptrdiff_t stride) \
201 { \
202  uint64_t half[8 + 9]; \
203  uint8_t *const halfH = (uint8_t *) half + 64; \
204  uint8_t *const halfHV = (uint8_t *) half; \
205  ff_put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(halfH, src, 8, \
206  stride, 9); \
207  ff_put ## RND ## pixels8_l2_ ## MMX(halfH, src + 1, halfH, 8, \
208  stride, 9); \
209  ff_put ## RND ## mpeg4_qpel8_v_lowpass_ ## MMX(halfHV, halfH, 8, 8);\
210  ff_ ## OPNAME ## pixels8_l2_ ## MMX(dst, halfH + 8, halfHV, \
211  stride, 8, 8); \
212 } \
213  \
214 static void OPNAME ## qpel8_mc21_ ## MMX(uint8_t *dst, \
215  const uint8_t *src, \
216  ptrdiff_t stride) \
217 { \
218  uint64_t half[8 + 9]; \
219  uint8_t *const halfH = (uint8_t *) half + 64; \
220  uint8_t *const halfHV = (uint8_t *) half; \
221  ff_put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(halfH, src, 8, \
222  stride, 9); \
223  ff_put ## RND ## mpeg4_qpel8_v_lowpass_ ## MMX(halfHV, halfH, 8, 8);\
224  ff_ ## OPNAME ## pixels8_l2_ ## MMX(dst, halfH, halfHV, \
225  stride, 8, 8); \
226 } \
227  \
228 static void OPNAME ## qpel8_mc23_ ## MMX(uint8_t *dst, \
229  const uint8_t *src, \
230  ptrdiff_t stride) \
231 { \
232  uint64_t half[8 + 9]; \
233  uint8_t *const halfH = (uint8_t *) half + 64; \
234  uint8_t *const halfHV = (uint8_t *) half; \
235  ff_put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(halfH, src, 8, \
236  stride, 9); \
237  ff_put ## RND ## mpeg4_qpel8_v_lowpass_ ## MMX(halfHV, halfH, 8, 8);\
238  ff_ ## OPNAME ## pixels8_l2_ ## MMX(dst, halfH + 8, halfHV, \
239  stride, 8, 8); \
240 } \
241  \
242 static void OPNAME ## qpel8_mc12_ ## MMX(uint8_t *dst, \
243  const uint8_t *src, \
244  ptrdiff_t stride) \
245 { \
246  uint64_t half[8 + 9]; \
247  uint8_t *const halfH = (uint8_t *) half; \
248  ff_put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(halfH, src, 8, \
249  stride, 9); \
250  ff_put ## RND ## pixels8_l2_ ## MMX(halfH, src, halfH, \
251  8, stride, 9); \
252  ff_ ## OPNAME ## mpeg4_qpel8_v_lowpass_ ## MMX(dst, halfH, \
253  stride, 8); \
254 } \
255  \
256 static void OPNAME ## qpel8_mc32_ ## MMX(uint8_t *dst, \
257  const uint8_t *src, \
258  ptrdiff_t stride) \
259 { \
260  uint64_t half[8 + 9]; \
261  uint8_t *const halfH = (uint8_t *) half; \
262  ff_put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(halfH, src, 8, \
263  stride, 9); \
264  ff_put ## RND ## pixels8_l2_ ## MMX(halfH, src + 1, halfH, 8, \
265  stride, 9); \
266  ff_ ## OPNAME ## mpeg4_qpel8_v_lowpass_ ## MMX(dst, halfH, \
267  stride, 8); \
268 } \
269  \
270 static void OPNAME ## qpel8_mc22_ ## MMX(uint8_t *dst, \
271  const uint8_t *src, \
272  ptrdiff_t stride) \
273 { \
274  uint64_t half[9]; \
275  uint8_t *const halfH = (uint8_t *) half; \
276  ff_put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(halfH, src, 8, \
277  stride, 9); \
278  ff_ ## OPNAME ## mpeg4_qpel8_v_lowpass_ ## MMX(dst, halfH, \
279  stride, 8); \
280 } \
281  \
282 static void OPNAME ## qpel16_mc10_ ## MMX(uint8_t *dst, \
283  const uint8_t *src, \
284  ptrdiff_t stride) \
285 { \
286  uint64_t temp[32]; \
287  uint8_t *const half = (uint8_t *) temp; \
288  ff_put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(half, src, 16, \
289  stride, 16); \
290  ff_ ## OPNAME ## pixels16_l2_ ## MMX(dst, src, half, stride, \
291  stride, 16); \
292 } \
293  \
294 static void OPNAME ## qpel16_mc20_ ## MMX(uint8_t *dst, \
295  const uint8_t *src, \
296  ptrdiff_t stride) \
297 { \
298  ff_ ## OPNAME ## mpeg4_qpel16_h_lowpass_ ## MMX(dst, src, \
299  stride, stride, 16);\
300 } \
301  \
302 static void OPNAME ## qpel16_mc30_ ## MMX(uint8_t *dst, \
303  const uint8_t *src, \
304  ptrdiff_t stride) \
305 { \
306  uint64_t temp[32]; \
307  uint8_t *const half = (uint8_t*) temp; \
308  ff_put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(half, src, 16, \
309  stride, 16); \
310  ff_ ## OPNAME ## pixels16_l2_ ## MMX(dst, src + 1, half, \
311  stride, stride, 16); \
312 } \
313  \
314 static void OPNAME ## qpel16_mc01_ ## MMX(uint8_t *dst, \
315  const uint8_t *src, \
316  ptrdiff_t stride) \
317 { \
318  uint64_t temp[32]; \
319  uint8_t *const half = (uint8_t *) temp; \
320  ff_put ## RND ## mpeg4_qpel16_v_lowpass_ ## MMX(half, src, 16, \
321  stride); \
322  ff_ ## OPNAME ## pixels16_l2_ ## MMX(dst, src, half, stride, \
323  stride, 16); \
324 } \
325  \
326 static void OPNAME ## qpel16_mc02_ ## MMX(uint8_t *dst, \
327  const uint8_t *src, \
328  ptrdiff_t stride) \
329 { \
330  ff_ ## OPNAME ## mpeg4_qpel16_v_lowpass_ ## MMX(dst, src, \
331  stride, stride); \
332 } \
333  \
334 static void OPNAME ## qpel16_mc03_ ## MMX(uint8_t *dst, \
335  const uint8_t *src, \
336  ptrdiff_t stride) \
337 { \
338  uint64_t temp[32]; \
339  uint8_t *const half = (uint8_t *) temp; \
340  ff_put ## RND ## mpeg4_qpel16_v_lowpass_ ## MMX(half, src, 16, \
341  stride); \
342  ff_ ## OPNAME ## pixels16_l2_ ## MMX(dst, src+stride, half, \
343  stride, stride, 16); \
344 } \
345  \
346 static void OPNAME ## qpel16_mc11_ ## MMX(uint8_t *dst, \
347  const uint8_t *src, \
348  ptrdiff_t stride) \
349 { \
350  uint64_t half[16 * 2 + 17 * 2]; \
351  uint8_t *const halfH = (uint8_t *) half + 256; \
352  uint8_t *const halfHV = (uint8_t *) half; \
353  ff_put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(halfH, src, 16, \
354  stride, 17); \
355  ff_put ## RND ## pixels16_l2_ ## MMX(halfH, src, halfH, 16, \
356  stride, 17); \
357  ff_put ## RND ## mpeg4_qpel16_v_lowpass_ ## MMX(halfHV, halfH, \
358  16, 16); \
359  ff_ ## OPNAME ## pixels16_l2_ ## MMX(dst, halfH, halfHV, \
360  stride, 16, 16); \
361 } \
362  \
363 static void OPNAME ## qpel16_mc31_ ## MMX(uint8_t *dst, \
364  const uint8_t *src, \
365  ptrdiff_t stride) \
366 { \
367  uint64_t half[16 * 2 + 17 * 2]; \
368  uint8_t *const halfH = (uint8_t *) half + 256; \
369  uint8_t *const halfHV = (uint8_t *) half; \
370  ff_put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(halfH, src, 16, \
371  stride, 17); \
372  ff_put ## RND ## pixels16_l2_ ## MMX(halfH, src + 1, halfH, 16, \
373  stride, 17); \
374  ff_put ## RND ## mpeg4_qpel16_v_lowpass_ ## MMX(halfHV, halfH, \
375  16, 16); \
376  ff_ ## OPNAME ## pixels16_l2_ ## MMX(dst, halfH, halfHV, \
377  stride, 16, 16); \
378 } \
379  \
380 static void OPNAME ## qpel16_mc13_ ## MMX(uint8_t *dst, \
381  const uint8_t *src, \
382  ptrdiff_t stride) \
383 { \
384  uint64_t half[16 * 2 + 17 * 2]; \
385  uint8_t *const halfH = (uint8_t *) half + 256; \
386  uint8_t *const halfHV = (uint8_t *) half; \
387  ff_put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(halfH, src, 16, \
388  stride, 17); \
389  ff_put ## RND ## pixels16_l2_ ## MMX(halfH, src, halfH, 16, \
390  stride, 17); \
391  ff_put ## RND ## mpeg4_qpel16_v_lowpass_ ## MMX(halfHV, halfH, \
392  16, 16); \
393  ff_ ## OPNAME ## pixels16_l2_ ## MMX(dst, halfH + 16, halfHV, \
394  stride, 16, 16); \
395 } \
396  \
397 static void OPNAME ## qpel16_mc33_ ## MMX(uint8_t *dst, \
398  const uint8_t *src, \
399  ptrdiff_t stride) \
400 { \
401  uint64_t half[16 * 2 + 17 * 2]; \
402  uint8_t *const halfH = (uint8_t *) half + 256; \
403  uint8_t *const halfHV = (uint8_t *) half; \
404  ff_put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(halfH, src, 16, \
405  stride, 17); \
406  ff_put ## RND ## pixels16_l2_ ## MMX(halfH, src + 1, halfH, 16, \
407  stride, 17); \
408  ff_put ## RND ## mpeg4_qpel16_v_lowpass_ ## MMX(halfHV, halfH, \
409  16, 16); \
410  ff_ ## OPNAME ## pixels16_l2_ ## MMX(dst, halfH + 16, halfHV, \
411  stride, 16, 16); \
412 } \
413  \
414 static void OPNAME ## qpel16_mc21_ ## MMX(uint8_t *dst, \
415  const uint8_t *src, \
416  ptrdiff_t stride) \
417 { \
418  uint64_t half[16 * 2 + 17 * 2]; \
419  uint8_t *const halfH = (uint8_t *) half + 256; \
420  uint8_t *const halfHV = (uint8_t *) half; \
421  ff_put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(halfH, src, 16, \
422  stride, 17); \
423  ff_put ## RND ## mpeg4_qpel16_v_lowpass_ ## MMX(halfHV, halfH, \
424  16, 16); \
425  ff_ ## OPNAME ## pixels16_l2_ ## MMX(dst, halfH, halfHV, \
426  stride, 16, 16); \
427 } \
428  \
429 static void OPNAME ## qpel16_mc23_ ## MMX(uint8_t *dst, \
430  const uint8_t *src, \
431  ptrdiff_t stride) \
432 { \
433  uint64_t half[16 * 2 + 17 * 2]; \
434  uint8_t *const halfH = (uint8_t *) half + 256; \
435  uint8_t *const halfHV = (uint8_t *) half; \
436  ff_put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(halfH, src, 16, \
437  stride, 17); \
438  ff_put ## RND ## mpeg4_qpel16_v_lowpass_ ## MMX(halfHV, halfH, \
439  16, 16); \
440  ff_ ## OPNAME ## pixels16_l2_ ## MMX(dst, halfH + 16, halfHV, \
441  stride, 16, 16); \
442 } \
443  \
444 static void OPNAME ## qpel16_mc12_ ## MMX(uint8_t *dst, \
445  const uint8_t *src, \
446  ptrdiff_t stride) \
447 { \
448  uint64_t half[17 * 2]; \
449  uint8_t *const halfH = (uint8_t *) half; \
450  ff_put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(halfH, src, 16, \
451  stride, 17); \
452  ff_put ## RND ## pixels16_l2_ ## MMX(halfH, src, halfH, 16, \
453  stride, 17); \
454  ff_ ## OPNAME ## mpeg4_qpel16_v_lowpass_ ## MMX(dst, halfH, \
455  stride, 16); \
456 } \
457  \
458 static void OPNAME ## qpel16_mc32_ ## MMX(uint8_t *dst, \
459  const uint8_t *src, \
460  ptrdiff_t stride) \
461 { \
462  uint64_t half[17 * 2]; \
463  uint8_t *const halfH = (uint8_t *) half; \
464  ff_put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(halfH, src, 16, \
465  stride, 17); \
466  ff_put ## RND ## pixels16_l2_ ## MMX(halfH, src + 1, halfH, 16, \
467  stride, 17); \
468  ff_ ## OPNAME ## mpeg4_qpel16_v_lowpass_ ## MMX(dst, halfH, \
469  stride, 16); \
470 } \
471  \
472 static void OPNAME ## qpel16_mc22_ ## MMX(uint8_t *dst, \
473  const uint8_t *src, \
474  ptrdiff_t stride) \
475 { \
476  uint64_t half[17 * 2]; \
477  uint8_t *const halfH = (uint8_t *) half; \
478  ff_put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(halfH, src, 16, \
479  stride, 17); \
480  ff_ ## OPNAME ## mpeg4_qpel16_v_lowpass_ ## MMX(dst, halfH, \
481  stride, 16); \
482 }
483 
484 QPEL_OP(put_, _, mmxext)
485 QPEL_OP(avg_, _, mmxext)
486 QPEL_OP(put_no_rnd_, _no_rnd_, mmxext)
487 
488 #define MC00(OPNAME, SIZE, EXT) \
489 static void OPNAME ## _qpel ## SIZE ## _mc00_ ## EXT(uint8_t *dst, \
490  const uint8_t *src,\
491  ptrdiff_t stride) \
492 { \
493  ff_ ## OPNAME ## _pixels ## SIZE ##_ ## EXT(dst, src, stride, SIZE);\
494 }
495 
496 MC00(put, 8, mmx)
497 MC00(avg, 8, mmxext)
498 MC00(put, 16, sse2)
499 MC00(avg, 16, sse2)
500 
501 #endif /* HAVE_X86ASM */
502 
503 #define SET_QPEL_FUNCS(PFX, IDX, SIZE, CPU, PREFIX) \
504 do { \
505  c->PFX ## _pixels_tab[IDX][ 1] = PREFIX ## PFX ## SIZE ## _mc10_ ## CPU; \
506  c->PFX ## _pixels_tab[IDX][ 2] = PREFIX ## PFX ## SIZE ## _mc20_ ## CPU; \
507  c->PFX ## _pixels_tab[IDX][ 3] = PREFIX ## PFX ## SIZE ## _mc30_ ## CPU; \
508  c->PFX ## _pixels_tab[IDX][ 4] = PREFIX ## PFX ## SIZE ## _mc01_ ## CPU; \
509  c->PFX ## _pixels_tab[IDX][ 5] = PREFIX ## PFX ## SIZE ## _mc11_ ## CPU; \
510  c->PFX ## _pixels_tab[IDX][ 6] = PREFIX ## PFX ## SIZE ## _mc21_ ## CPU; \
511  c->PFX ## _pixels_tab[IDX][ 7] = PREFIX ## PFX ## SIZE ## _mc31_ ## CPU; \
512  c->PFX ## _pixels_tab[IDX][ 8] = PREFIX ## PFX ## SIZE ## _mc02_ ## CPU; \
513  c->PFX ## _pixels_tab[IDX][ 9] = PREFIX ## PFX ## SIZE ## _mc12_ ## CPU; \
514  c->PFX ## _pixels_tab[IDX][10] = PREFIX ## PFX ## SIZE ## _mc22_ ## CPU; \
515  c->PFX ## _pixels_tab[IDX][11] = PREFIX ## PFX ## SIZE ## _mc32_ ## CPU; \
516  c->PFX ## _pixels_tab[IDX][12] = PREFIX ## PFX ## SIZE ## _mc03_ ## CPU; \
517  c->PFX ## _pixels_tab[IDX][13] = PREFIX ## PFX ## SIZE ## _mc13_ ## CPU; \
518  c->PFX ## _pixels_tab[IDX][14] = PREFIX ## PFX ## SIZE ## _mc23_ ## CPU; \
519  c->PFX ## _pixels_tab[IDX][15] = PREFIX ## PFX ## SIZE ## _mc33_ ## CPU; \
520 } while (0)
521 
523 {
524  int cpu_flags = av_get_cpu_flags();
525 
526  if (X86_MMXEXT(cpu_flags)) {
527 #if HAVE_MMXEXT_EXTERNAL
528  SET_QPEL_FUNCS(avg_qpel, 0, 16, mmxext, );
529  c->avg_qpel_pixels_tab[1][0] = avg_qpel8_mc00_mmxext;
530  SET_QPEL_FUNCS(avg_qpel, 1, 8, mmxext, );
531 
532  SET_QPEL_FUNCS(put_qpel, 0, 16, mmxext, );
533  c->put_no_rnd_qpel_pixels_tab[1][0] =
534  c->put_qpel_pixels_tab[1][0] = put_qpel8_mc00_mmx;
535  SET_QPEL_FUNCS(put_qpel, 1, 8, mmxext, );
536  SET_QPEL_FUNCS(put_no_rnd_qpel, 0, 16, mmxext, );
537  SET_QPEL_FUNCS(put_no_rnd_qpel, 1, 8, mmxext, );
538 #endif /* HAVE_MMXEXT_EXTERNAL */
539  }
540 #if HAVE_SSE2_EXTERNAL
541  if (EXTERNAL_SSE2(cpu_flags)) {
542  c->put_no_rnd_qpel_pixels_tab[0][0] =
543  c->put_qpel_pixels_tab[0][0] = put_qpel16_mc00_sse2;
544  c->avg_qpel_pixels_tab[0][0] = avg_qpel16_mc00_sse2;
545  }
546 #endif
547 }
ff_avg_mpeg4_qpel16_h_lowpass_mmxext
void ff_avg_mpeg4_qpel16_h_lowpass_mmxext(uint8_t *dst, const uint8_t *src, int dstStride, int srcStride, int h)
cpu.h
ff_put_mpeg4_qpel8_h_lowpass_mmxext
void ff_put_mpeg4_qpel8_h_lowpass_mmxext(uint8_t *dst, const uint8_t *src, int dstStride, int srcStride, int h)
src1
const pixel * src1
Definition: h264pred_template.c:420
pixels.h
av_get_cpu_flags
int av_get_cpu_flags(void)
Return the flags which specify extensions supported by the CPU.
Definition: cpu.c:109
ff_put_no_rnd_mpeg4_qpel8_h_lowpass_mmxext
void ff_put_no_rnd_mpeg4_qpel8_h_lowpass_mmxext(uint8_t *dst, const uint8_t *src, int dstStride, int srcStride, int h)
cpu_flags
static atomic_int cpu_flags
Definition: cpu.c:56
ff_avg_mpeg4_qpel16_v_lowpass_mmxext
void ff_avg_mpeg4_qpel16_v_lowpass_mmxext(uint8_t *dst, const uint8_t *src, int dstStride, int srcStride)
av_cold
#define av_cold
Definition: attributes.h:100
ff_put_pixels16_l2_mmxext
void ff_put_pixels16_l2_mmxext(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dstStride, int src1Stride, int h)
ff_put_no_rnd_mpeg4_qpel16_h_lowpass_mmxext
void ff_put_no_rnd_mpeg4_qpel16_h_lowpass_mmxext(uint8_t *dst, const uint8_t *src, int dstStride, int srcStride, int h)
ff_put_mpeg4_qpel16_h_lowpass_mmxext
void ff_put_mpeg4_qpel16_h_lowpass_mmxext(uint8_t *dst, const uint8_t *src, int dstStride, int srcStride, int h)
ff_put_no_rnd_mpeg4_qpel16_v_lowpass_mmxext
void ff_put_no_rnd_mpeg4_qpel16_v_lowpass_mmxext(uint8_t *dst, const uint8_t *src, int dstStride, int srcStride)
qpeldsp.h
SET_QPEL_FUNCS
#define SET_QPEL_FUNCS(PFX, IDX, SIZE, CPU, PREFIX)
Definition: qpeldsp_init.c:503
c
Undefined Behavior In the C some operations are like signed integer dereferencing freed accessing outside allocated Undefined Behavior must not occur in a C it is not safe even if the output of undefined operations is unused The unsafety may seem nit picking but Optimizing compilers have in fact optimized code on the assumption that no undefined Behavior occurs Optimizing code based on wrong assumptions can and has in some cases lead to effects beyond the output of computations The signed integer overflow problem in speed critical code Code which is highly optimized and works with signed integers sometimes has the problem that often the output of the computation does not c
Definition: undefined.txt:32
ff_avg_pixels8_l2_mmxext
void ff_avg_pixels8_l2_mmxext(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dstStride, int src1Stride, int h)
dst
uint8_t ptrdiff_t const uint8_t ptrdiff_t int intptr_t intptr_t int int16_t * dst
Definition: dsp.h:87
cpu.h
ff_qpeldsp_init_x86
av_cold void ff_qpeldsp_init_x86(QpelDSPContext *c)
Definition: qpeldsp_init.c:522
ff_avg_mpeg4_qpel8_v_lowpass_mmxext
void ff_avg_mpeg4_qpel8_v_lowpass_mmxext(uint8_t *dst, const uint8_t *src, int dstStride, int srcStride)
avg
#define avg(a, b, c, d)
Definition: colorspacedsp_template.c:28
attributes.h
EXTERNAL_SSE2
#define EXTERNAL_SSE2(flags)
Definition: cpu.h:59
src2
const pixel * src2
Definition: h264pred_template.c:421
ff_put_pixels8_l2_mmxext
void ff_put_pixels8_l2_mmxext(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dstStride, int src1Stride, int h)
QpelDSPContext
quarterpel DSP context
Definition: qpeldsp.h:72
fpel.h
ff_put_no_rnd_mpeg4_qpel8_v_lowpass_mmxext
void ff_put_no_rnd_mpeg4_qpel8_v_lowpass_mmxext(uint8_t *dst, const uint8_t *src, int dstStride, int srcStride)
ff_put_mpeg4_qpel8_v_lowpass_mmxext
void ff_put_mpeg4_qpel8_v_lowpass_mmxext(uint8_t *dst, const uint8_t *src, int dstStride, int srcStride)
ff_put_no_rnd_pixels8_l2_mmxext
void ff_put_no_rnd_pixels8_l2_mmxext(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dstStride, int src1Stride, int h)
_
#define _
ff_put_mpeg4_qpel16_v_lowpass_mmxext
void ff_put_mpeg4_qpel16_v_lowpass_mmxext(uint8_t *dst, const uint8_t *src, int dstStride, int srcStride)
h
h
Definition: vp9dsp_template.c:2070
src
#define src
Definition: vp8dsp.c:248
ff_avg_pixels16_l2_mmxext
void ff_avg_pixels16_l2_mmxext(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dstStride, int src1Stride, int h)
ff_put_no_rnd_pixels16_l2_mmxext
void ff_put_no_rnd_pixels16_l2_mmxext(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dstStride, int src1Stride, int h)
ff_avg_mpeg4_qpel8_h_lowpass_mmxext
void ff_avg_mpeg4_qpel8_h_lowpass_mmxext(uint8_t *dst, const uint8_t *src, int dstStride, int srcStride, int h)
X86_MMXEXT
#define X86_MMXEXT(flags)
Definition: cpu.h:31