Go to the documentation of this file.
39 #if HAVE_SSE2_EXTERNAL
41 void ff_cavs_idct8_sse2(int16_t *
out,
const int16_t *in);
43 static void cavs_idct8_add_sse2(uint8_t *
dst, int16_t *
block, ptrdiff_t
stride)
52 #if HAVE_MMXEXT_INLINE
64 #define QPEL_CAVSV1(A,B,C,D,E,F,OP,ADD, MUL1, MUL2) \
65 "movd (%0), "#F" \n\t"\
66 "movq "#C", %%mm6 \n\t"\
67 "pmullw "MANGLE(MUL1)", %%mm6\n\t"\
68 "movq "#D", %%mm7 \n\t"\
69 "pmullw "MANGLE(MUL2)", %%mm7\n\t"\
70 "psllw $3, "#E" \n\t"\
71 "psubw "#E", %%mm6 \n\t"\
72 "psraw $3, "#E" \n\t"\
73 "paddw %%mm7, %%mm6 \n\t"\
74 "paddw "#E", %%mm6 \n\t"\
75 "paddw "#B", "#B" \n\t"\
76 "pxor %%mm7, %%mm7 \n\t"\
78 "punpcklbw %%mm7, "#F" \n\t"\
79 "psubw "#B", %%mm6 \n\t"\
80 "psraw $1, "#B" \n\t"\
81 "psubw "#A", %%mm6 \n\t"\
82 "paddw "MANGLE(ADD)", %%mm6 \n\t"\
83 "psraw $7, %%mm6 \n\t"\
84 "packuswb %%mm6, %%mm6 \n\t"\
85 OP(%%mm6, (%1), A, d) \
89 #define QPEL_CAVSV2(A,B,C,D,E,F,OP,ADD, MUL1, MUL2) \
90 "movd (%0), "#F" \n\t"\
91 "movq "#C", %%mm6 \n\t"\
92 "paddw "#D", %%mm6 \n\t"\
93 "pmullw "MANGLE(MUL1)", %%mm6\n\t"\
95 "punpcklbw %%mm7, "#F" \n\t"\
96 "psubw "#B", %%mm6 \n\t"\
97 "psubw "#E", %%mm6 \n\t"\
98 "paddw "MANGLE(ADD)", %%mm6 \n\t"\
99 "psraw $3, %%mm6 \n\t"\
100 "packuswb %%mm6, %%mm6 \n\t"\
101 OP(%%mm6, (%1), A, d) \
105 #define QPEL_CAVSV3(A,B,C,D,E,F,OP,ADD, MUL1, MUL2) \
106 "movd (%0), "#F" \n\t"\
107 "movq "#C", %%mm6 \n\t"\
108 "pmullw "MANGLE(MUL2)", %%mm6\n\t"\
109 "movq "#D", %%mm7 \n\t"\
110 "pmullw "MANGLE(MUL1)", %%mm7\n\t"\
111 "psllw $3, "#B" \n\t"\
112 "psubw "#B", %%mm6 \n\t"\
113 "psraw $3, "#B" \n\t"\
114 "paddw %%mm7, %%mm6 \n\t"\
115 "paddw "#B", %%mm6 \n\t"\
116 "paddw "#E", "#E" \n\t"\
117 "pxor %%mm7, %%mm7 \n\t"\
119 "punpcklbw %%mm7, "#F" \n\t"\
120 "psubw "#E", %%mm6 \n\t"\
121 "psraw $1, "#E" \n\t"\
122 "psubw "#F", %%mm6 \n\t"\
123 "paddw "MANGLE(ADD)", %%mm6 \n\t"\
124 "psraw $7, %%mm6 \n\t"\
125 "packuswb %%mm6, %%mm6 \n\t"\
126 OP(%%mm6, (%1), A, d) \
130 #define QPEL_CAVSVNUM(VOP,OP,ADD,MUL1,MUL2)\
136 "pxor %%mm7, %%mm7 \n\t"\
137 "movd (%0), %%mm0 \n\t"\
139 "movd (%0), %%mm1 \n\t"\
141 "movd (%0), %%mm2 \n\t"\
143 "movd (%0), %%mm3 \n\t"\
145 "movd (%0), %%mm4 \n\t"\
147 "punpcklbw %%mm7, %%mm0 \n\t"\
148 "punpcklbw %%mm7, %%mm1 \n\t"\
149 "punpcklbw %%mm7, %%mm2 \n\t"\
150 "punpcklbw %%mm7, %%mm3 \n\t"\
151 "punpcklbw %%mm7, %%mm4 \n\t"\
152 VOP(%%mm0, %%mm1, %%mm2, %%mm3, %%mm4, %%mm5, OP, ADD, MUL1, MUL2)\
153 VOP(%%mm1, %%mm2, %%mm3, %%mm4, %%mm5, %%mm0, OP, ADD, MUL1, MUL2)\
154 VOP(%%mm2, %%mm3, %%mm4, %%mm5, %%mm0, %%mm1, OP, ADD, MUL1, MUL2)\
155 VOP(%%mm3, %%mm4, %%mm5, %%mm0, %%mm1, %%mm2, OP, ADD, MUL1, MUL2)\
156 VOP(%%mm4, %%mm5, %%mm0, %%mm1, %%mm2, %%mm3, OP, ADD, MUL1, MUL2)\
157 VOP(%%mm5, %%mm0, %%mm1, %%mm2, %%mm3, %%mm4, OP, ADD, MUL1, MUL2)\
158 VOP(%%mm0, %%mm1, %%mm2, %%mm3, %%mm4, %%mm5, OP, ADD, MUL1, MUL2)\
159 VOP(%%mm1, %%mm2, %%mm3, %%mm4, %%mm5, %%mm0, OP, ADD, MUL1, MUL2)\
161 : "+a"(src), "+c"(dst)\
162 : "S"((x86_reg)srcStride), "r"((x86_reg)dstStride)\
163 NAMED_CONSTRAINTS_ADD(ADD,MUL1,MUL2)\
168 VOP(%%mm2, %%mm3, %%mm4, %%mm5, %%mm0, %%mm1, OP, ADD, MUL1, MUL2)\
169 VOP(%%mm3, %%mm4, %%mm5, %%mm0, %%mm1, %%mm2, OP, ADD, MUL1, MUL2)\
170 VOP(%%mm4, %%mm5, %%mm0, %%mm1, %%mm2, %%mm3, OP, ADD, MUL1, MUL2)\
171 VOP(%%mm5, %%mm0, %%mm1, %%mm2, %%mm3, %%mm4, OP, ADD, MUL1, MUL2)\
172 VOP(%%mm0, %%mm1, %%mm2, %%mm3, %%mm4, %%mm5, OP, ADD, MUL1, MUL2)\
173 VOP(%%mm1, %%mm2, %%mm3, %%mm4, %%mm5, %%mm0, OP, ADD, MUL1, MUL2)\
174 VOP(%%mm2, %%mm3, %%mm4, %%mm5, %%mm0, %%mm1, OP, ADD, MUL1, MUL2)\
175 VOP(%%mm3, %%mm4, %%mm5, %%mm0, %%mm1, %%mm2, OP, ADD, MUL1, MUL2)\
177 : "+a"(src), "+c"(dst)\
178 : "S"((x86_reg)srcStride), "r"((x86_reg)dstStride)\
179 NAMED_CONSTRAINTS_ADD(ADD,MUL1,MUL2)\
183 src += 4-(h+5)*srcStride;\
184 dst += 4-h*dstStride;\
187 #define QPEL_CAVS(OPNAME, OP, MMX)\
188 static void OPNAME ## cavs_qpel8_h_ ## MMX(uint8_t *dst, const uint8_t *src, ptrdiff_t dstStride, ptrdiff_t srcStride)\
192 "pxor %%mm7, %%mm7 \n\t"\
193 "movq "MANGLE(ff_pw_5)", %%mm6\n\t"\
195 "movq (%0), %%mm0 \n\t"\
196 "movq 1(%0), %%mm2 \n\t"\
197 "movq %%mm0, %%mm1 \n\t"\
198 "movq %%mm2, %%mm3 \n\t"\
199 "punpcklbw %%mm7, %%mm0 \n\t"\
200 "punpckhbw %%mm7, %%mm1 \n\t"\
201 "punpcklbw %%mm7, %%mm2 \n\t"\
202 "punpckhbw %%mm7, %%mm3 \n\t"\
203 "paddw %%mm2, %%mm0 \n\t"\
204 "paddw %%mm3, %%mm1 \n\t"\
205 "pmullw %%mm6, %%mm0 \n\t"\
206 "pmullw %%mm6, %%mm1 \n\t"\
207 "movq -1(%0), %%mm2 \n\t"\
208 "movq 2(%0), %%mm4 \n\t"\
209 "movq %%mm2, %%mm3 \n\t"\
210 "movq %%mm4, %%mm5 \n\t"\
211 "punpcklbw %%mm7, %%mm2 \n\t"\
212 "punpckhbw %%mm7, %%mm3 \n\t"\
213 "punpcklbw %%mm7, %%mm4 \n\t"\
214 "punpckhbw %%mm7, %%mm5 \n\t"\
215 "paddw %%mm4, %%mm2 \n\t"\
216 "paddw %%mm3, %%mm5 \n\t"\
217 "psubw %%mm2, %%mm0 \n\t"\
218 "psubw %%mm5, %%mm1 \n\t"\
219 "movq "MANGLE(ff_pw_4)", %%mm5\n\t"\
220 "paddw %%mm5, %%mm0 \n\t"\
221 "paddw %%mm5, %%mm1 \n\t"\
222 "psraw $3, %%mm0 \n\t"\
223 "psraw $3, %%mm1 \n\t"\
224 "packuswb %%mm1, %%mm0 \n\t"\
225 OP(%%mm0, (%1),%%mm5, q) \
230 : "+a"(src), "+c"(dst), "+m"(h)\
231 : "d"((x86_reg)srcStride), "S"((x86_reg)dstStride)\
232 NAMED_CONSTRAINTS_ADD(ff_pw_4,ff_pw_5)\
237 static inline void OPNAME ## cavs_qpel8or16_v1_ ## MMX(uint8_t *dst, const uint8_t *src, ptrdiff_t dstStride, ptrdiff_t srcStride, int h)\
239 QPEL_CAVSVNUM(QPEL_CAVSV1,OP,ff_pw_64,pw_96,pw_42) \
242 static inline void OPNAME ## cavs_qpel8or16_v2_ ## MMX(uint8_t *dst, const uint8_t *src, ptrdiff_t dstStride, ptrdiff_t srcStride, int h)\
244 QPEL_CAVSVNUM(QPEL_CAVSV2,OP,ff_pw_4,ff_pw_5,pw_42) \
247 static inline void OPNAME ## cavs_qpel8or16_v3_ ## MMX(uint8_t *dst, const uint8_t *src, ptrdiff_t dstStride, ptrdiff_t srcStride, int h)\
249 QPEL_CAVSVNUM(QPEL_CAVSV3,OP,ff_pw_64,pw_96,pw_42) \
252 static void OPNAME ## cavs_qpel8_v1_ ## MMX(uint8_t *dst, const uint8_t *src, ptrdiff_t dstStride, ptrdiff_t srcStride)\
254 OPNAME ## cavs_qpel8or16_v1_ ## MMX(dst , src , dstStride, srcStride, 8);\
256 static void OPNAME ## cavs_qpel16_v1_ ## MMX(uint8_t *dst, const uint8_t *src, ptrdiff_t dstStride, ptrdiff_t srcStride)\
258 OPNAME ## cavs_qpel8or16_v1_ ## MMX(dst , src , dstStride, srcStride, 16);\
259 OPNAME ## cavs_qpel8or16_v1_ ## MMX(dst+8, src+8, dstStride, srcStride, 16);\
262 static void OPNAME ## cavs_qpel8_v2_ ## MMX(uint8_t *dst, const uint8_t *src, ptrdiff_t dstStride, ptrdiff_t srcStride)\
264 OPNAME ## cavs_qpel8or16_v2_ ## MMX(dst , src , dstStride, srcStride, 8);\
266 static void OPNAME ## cavs_qpel16_v2_ ## MMX(uint8_t *dst, const uint8_t *src, ptrdiff_t dstStride, ptrdiff_t srcStride)\
268 OPNAME ## cavs_qpel8or16_v2_ ## MMX(dst , src , dstStride, srcStride, 16);\
269 OPNAME ## cavs_qpel8or16_v2_ ## MMX(dst+8, src+8, dstStride, srcStride, 16);\
272 static void OPNAME ## cavs_qpel8_v3_ ## MMX(uint8_t *dst, const uint8_t *src, ptrdiff_t dstStride, ptrdiff_t srcStride)\
274 OPNAME ## cavs_qpel8or16_v3_ ## MMX(dst , src , dstStride, srcStride, 8);\
276 static void OPNAME ## cavs_qpel16_v3_ ## MMX(uint8_t *dst, const uint8_t *src, ptrdiff_t dstStride, ptrdiff_t srcStride)\
278 OPNAME ## cavs_qpel8or16_v3_ ## MMX(dst , src , dstStride, srcStride, 16);\
279 OPNAME ## cavs_qpel8or16_v3_ ## MMX(dst+8, src+8, dstStride, srcStride, 16);\
282 static void OPNAME ## cavs_qpel16_h_ ## MMX(uint8_t *dst, const uint8_t *src, ptrdiff_t dstStride, ptrdiff_t srcStride)\
284 OPNAME ## cavs_qpel8_h_ ## MMX(dst , src , dstStride, srcStride);\
285 OPNAME ## cavs_qpel8_h_ ## MMX(dst+8, src+8, dstStride, srcStride);\
288 OPNAME ## cavs_qpel8_h_ ## MMX(dst , src , dstStride, srcStride);\
289 OPNAME ## cavs_qpel8_h_ ## MMX(dst+8, src+8, dstStride, srcStride);\
292 #define CAVS_MC(OPNAME, SIZE, MMX) \
293 static void OPNAME ## cavs_qpel ## SIZE ## _mc20_ ## MMX(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)\
295 OPNAME ## cavs_qpel ## SIZE ## _h_ ## MMX(dst, src, stride, stride);\
298 static void OPNAME ## cavs_qpel ## SIZE ## _mc01_ ## MMX(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)\
300 OPNAME ## cavs_qpel ## SIZE ## _v1_ ## MMX(dst, src, stride, stride);\
303 static void OPNAME ## cavs_qpel ## SIZE ## _mc02_ ## MMX(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)\
305 OPNAME ## cavs_qpel ## SIZE ## _v2_ ## MMX(dst, src, stride, stride);\
308 static void OPNAME ## cavs_qpel ## SIZE ## _mc03_ ## MMX(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)\
310 OPNAME ## cavs_qpel ## SIZE ## _v3_ ## MMX(dst, src, stride, stride);\
313 #define PUT_OP(a,b,temp, size) "mov" #size " " #a ", " #b " \n\t"
314 #define AVG_MMXEXT_OP(a, b, temp, size) \
315 "mov" #size " " #b ", " #temp " \n\t"\
316 "pavgb " #temp ", " #a " \n\t"\
317 "mov" #size " " #a ", " #b " \n\t"
321 #if HAVE_MMX_EXTERNAL
322 static void put_cavs_qpel8_mc00_mmx(uint8_t *
dst,
const uint8_t *
src,
328 static void avg_cavs_qpel8_mc00_mmxext(uint8_t *
dst,
const uint8_t *
src,
334 static void put_cavs_qpel16_mc00_sse2(uint8_t *
dst,
const uint8_t *
src,
340 static void avg_cavs_qpel16_mc00_sse2(uint8_t *
dst,
const uint8_t *
src,
349 #if HAVE_MMX_EXTERNAL
350 c->put_cavs_qpel_pixels_tab[1][0] = put_cavs_qpel8_mc00_mmx;
354 #define DSPFUNC(PFX, IDX, NUM, EXT) \
355 c->PFX ## _cavs_qpel_pixels_tab[IDX][ 2] = PFX ## _cavs_qpel ## NUM ## _mc20_ ## EXT; \
356 c->PFX ## _cavs_qpel_pixels_tab[IDX][ 4] = PFX ## _cavs_qpel ## NUM ## _mc01_ ## EXT; \
357 c->PFX ## _cavs_qpel_pixels_tab[IDX][ 8] = PFX ## _cavs_qpel ## NUM ## _mc02_ ## EXT; \
358 c->PFX ## _cavs_qpel_pixels_tab[IDX][12] = PFX ## _cavs_qpel ## NUM ## _mc03_ ## EXT; \
360 #if HAVE_MMXEXT_INLINE
361 QPEL_CAVS(put_, PUT_OP, mmxext)
362 QPEL_CAVS(avg_, AVG_MMXEXT_OP, mmxext)
377 #if HAVE_MMXEXT_INLINE
385 #if HAVE_MMX_EXTERNAL
387 c->avg_cavs_qpel_pixels_tab[1][0] = avg_cavs_qpel8_mc00_mmxext;
390 #if HAVE_SSE2_EXTERNAL
392 c->put_cavs_qpel_pixels_tab[0][0] = put_cavs_qpel16_mc00_sse2;
393 c->avg_cavs_qpel_pixels_tab[0][0] = avg_cavs_qpel16_mc00_sse2;
395 c->cavs_idct8_add = cavs_idct8_add_sse2;
void ff_put_pixels8_mmx(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h)
#define DECLARE_ASM_CONST(n, t, v)
int av_get_cpu_flags(void)
Return the flags which specify extensions supported by the CPU.
static atomic_int cpu_flags
av_cold void ff_cavsdsp_init_x86(CAVSDSPContext *c)
#define LOCAL_ALIGNED(a, t, v,...)
void ff_avg_pixels8_mmxext(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h)
void ff_put_pixels16_sse2(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h)
static av_cold void cavsdsp_init_mmx(CAVSDSPContext *c)
void ff_add_pixels_clamped_sse2(const int16_t *block, uint8_t *pixels, ptrdiff_t line_size)
Undefined Behavior In the C some operations are like signed integer dereferencing freed accessing outside allocated Undefined Behavior must not occur in a C it is not safe even if the output of undefined operations is unused The unsafety may seem nit picking but Optimizing compilers have in fact optimized code on the assumption that no undefined Behavior occurs Optimizing code based on wrong assumptions can and has in some cases lead to effects beyond the output of computations The signed integer overflow problem in speed critical code Code which is highly optimized and works with signed integers sometimes has the problem that often the output of the computation does not c
#define CAVS_MC(OPNAME, SIZE)
void ff_avg_pixels16_sse2(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h)
uint8_t ptrdiff_t const uint8_t ptrdiff_t int intptr_t intptr_t int int16_t * dst
static double b2(void *priv, double x, double y)
#define EXTERNAL_SSE2(flags)
#define DSPFUNC(PFX, IDX, NUM, EXT)
#define INLINE_MMXEXT(flags)
The exact code depends on how similar the blocks are and how related they are to the block
#define EXTERNAL_MMXEXT(flags)