00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021 #undef REAL_MOVNTQ
00022 #undef MOVNTQ
00023 #undef PAVGB
00024 #undef PREFETCH
00025
00026 #if COMPILE_TEMPLATE_AMD3DNOW
00027 #define PREFETCH "prefetch"
00028 #elif COMPILE_TEMPLATE_MMX2
00029 #define PREFETCH "prefetchnta"
00030 #else
00031 #define PREFETCH " # nop"
00032 #endif
00033
00034 #if COMPILE_TEMPLATE_MMX2
00035 #define PAVGB(a,b) "pavgb " #a ", " #b " \n\t"
00036 #elif COMPILE_TEMPLATE_AMD3DNOW
00037 #define PAVGB(a,b) "pavgusb " #a ", " #b " \n\t"
00038 #endif
00039
00040 #if COMPILE_TEMPLATE_MMX2
00041 #define REAL_MOVNTQ(a,b) "movntq " #a ", " #b " \n\t"
00042 #else
00043 #define REAL_MOVNTQ(a,b) "movq " #a ", " #b " \n\t"
00044 #endif
00045 #define MOVNTQ(a,b) REAL_MOVNTQ(a,b)
00046
00047 #if COMPILE_TEMPLATE_ALTIVEC
00048 #include "ppc/swscale_altivec_template.c"
00049 #endif
00050
00051 #define YSCALEYUV2YV12X(x, offset, dest, width) \
00052 __asm__ volatile(\
00053 "xor %%"REG_a", %%"REG_a" \n\t"\
00054 "movq "VROUNDER_OFFSET"(%0), %%mm3 \n\t"\
00055 "movq %%mm3, %%mm4 \n\t"\
00056 "lea " offset "(%0), %%"REG_d" \n\t"\
00057 "mov (%%"REG_d"), %%"REG_S" \n\t"\
00058 ASMALIGN(4) \
00059 "1: \n\t"\
00060 "movq 8(%%"REG_d"), %%mm0 \n\t" \
00061 "movq " x "(%%"REG_S", %%"REG_a", 2), %%mm2 \n\t" \
00062 "movq 8+" x "(%%"REG_S", %%"REG_a", 2), %%mm5 \n\t" \
00063 "add $16, %%"REG_d" \n\t"\
00064 "mov (%%"REG_d"), %%"REG_S" \n\t"\
00065 "test %%"REG_S", %%"REG_S" \n\t"\
00066 "pmulhw %%mm0, %%mm2 \n\t"\
00067 "pmulhw %%mm0, %%mm5 \n\t"\
00068 "paddw %%mm2, %%mm3 \n\t"\
00069 "paddw %%mm5, %%mm4 \n\t"\
00070 " jnz 1b \n\t"\
00071 "psraw $3, %%mm3 \n\t"\
00072 "psraw $3, %%mm4 \n\t"\
00073 "packuswb %%mm4, %%mm3 \n\t"\
00074 MOVNTQ(%%mm3, (%1, %%REGa))\
00075 "add $8, %%"REG_a" \n\t"\
00076 "cmp %2, %%"REG_a" \n\t"\
00077 "movq "VROUNDER_OFFSET"(%0), %%mm3 \n\t"\
00078 "movq %%mm3, %%mm4 \n\t"\
00079 "lea " offset "(%0), %%"REG_d" \n\t"\
00080 "mov (%%"REG_d"), %%"REG_S" \n\t"\
00081 "jb 1b \n\t"\
00082 :: "r" (&c->redDither),\
00083 "r" (dest), "g" (width)\
00084 : "%"REG_a, "%"REG_d, "%"REG_S\
00085 );
00086
00087 #define YSCALEYUV2YV12X_ACCURATE(x, offset, dest, width) \
00088 __asm__ volatile(\
00089 "lea " offset "(%0), %%"REG_d" \n\t"\
00090 "xor %%"REG_a", %%"REG_a" \n\t"\
00091 "pxor %%mm4, %%mm4 \n\t"\
00092 "pxor %%mm5, %%mm5 \n\t"\
00093 "pxor %%mm6, %%mm6 \n\t"\
00094 "pxor %%mm7, %%mm7 \n\t"\
00095 "mov (%%"REG_d"), %%"REG_S" \n\t"\
00096 ASMALIGN(4) \
00097 "1: \n\t"\
00098 "movq " x "(%%"REG_S", %%"REG_a", 2), %%mm0 \n\t" \
00099 "movq 8+" x "(%%"REG_S", %%"REG_a", 2), %%mm2 \n\t" \
00100 "mov "STR(APCK_PTR2)"(%%"REG_d"), %%"REG_S" \n\t"\
00101 "movq " x "(%%"REG_S", %%"REG_a", 2), %%mm1 \n\t" \
00102 "movq %%mm0, %%mm3 \n\t"\
00103 "punpcklwd %%mm1, %%mm0 \n\t"\
00104 "punpckhwd %%mm1, %%mm3 \n\t"\
00105 "movq "STR(APCK_COEF)"(%%"REG_d"), %%mm1 \n\t" \
00106 "pmaddwd %%mm1, %%mm0 \n\t"\
00107 "pmaddwd %%mm1, %%mm3 \n\t"\
00108 "paddd %%mm0, %%mm4 \n\t"\
00109 "paddd %%mm3, %%mm5 \n\t"\
00110 "movq 8+" x "(%%"REG_S", %%"REG_a", 2), %%mm3 \n\t" \
00111 "mov "STR(APCK_SIZE)"(%%"REG_d"), %%"REG_S" \n\t"\
00112 "add $"STR(APCK_SIZE)", %%"REG_d" \n\t"\
00113 "test %%"REG_S", %%"REG_S" \n\t"\
00114 "movq %%mm2, %%mm0 \n\t"\
00115 "punpcklwd %%mm3, %%mm2 \n\t"\
00116 "punpckhwd %%mm3, %%mm0 \n\t"\
00117 "pmaddwd %%mm1, %%mm2 \n\t"\
00118 "pmaddwd %%mm1, %%mm0 \n\t"\
00119 "paddd %%mm2, %%mm6 \n\t"\
00120 "paddd %%mm0, %%mm7 \n\t"\
00121 " jnz 1b \n\t"\
00122 "psrad $16, %%mm4 \n\t"\
00123 "psrad $16, %%mm5 \n\t"\
00124 "psrad $16, %%mm6 \n\t"\
00125 "psrad $16, %%mm7 \n\t"\
00126 "movq "VROUNDER_OFFSET"(%0), %%mm0 \n\t"\
00127 "packssdw %%mm5, %%mm4 \n\t"\
00128 "packssdw %%mm7, %%mm6 \n\t"\
00129 "paddw %%mm0, %%mm4 \n\t"\
00130 "paddw %%mm0, %%mm6 \n\t"\
00131 "psraw $3, %%mm4 \n\t"\
00132 "psraw $3, %%mm6 \n\t"\
00133 "packuswb %%mm6, %%mm4 \n\t"\
00134 MOVNTQ(%%mm4, (%1, %%REGa))\
00135 "add $8, %%"REG_a" \n\t"\
00136 "cmp %2, %%"REG_a" \n\t"\
00137 "lea " offset "(%0), %%"REG_d" \n\t"\
00138 "pxor %%mm4, %%mm4 \n\t"\
00139 "pxor %%mm5, %%mm5 \n\t"\
00140 "pxor %%mm6, %%mm6 \n\t"\
00141 "pxor %%mm7, %%mm7 \n\t"\
00142 "mov (%%"REG_d"), %%"REG_S" \n\t"\
00143 "jb 1b \n\t"\
00144 :: "r" (&c->redDither),\
00145 "r" (dest), "g" (width)\
00146 : "%"REG_a, "%"REG_d, "%"REG_S\
00147 );
00148
00149 #define YSCALEYUV2YV121 \
00150 "mov %2, %%"REG_a" \n\t"\
00151 ASMALIGN(4) \
00152 "1: \n\t"\
00153 "movq (%0, %%"REG_a", 2), %%mm0 \n\t"\
00154 "movq 8(%0, %%"REG_a", 2), %%mm1 \n\t"\
00155 "psraw $7, %%mm0 \n\t"\
00156 "psraw $7, %%mm1 \n\t"\
00157 "packuswb %%mm1, %%mm0 \n\t"\
00158 MOVNTQ(%%mm0, (%1, %%REGa))\
00159 "add $8, %%"REG_a" \n\t"\
00160 "jnc 1b \n\t"
00161
00162 #define YSCALEYUV2YV121_ACCURATE \
00163 "mov %2, %%"REG_a" \n\t"\
00164 "pcmpeqw %%mm7, %%mm7 \n\t"\
00165 "psrlw $15, %%mm7 \n\t"\
00166 "psllw $6, %%mm7 \n\t"\
00167 ASMALIGN(4) \
00168 "1: \n\t"\
00169 "movq (%0, %%"REG_a", 2), %%mm0 \n\t"\
00170 "movq 8(%0, %%"REG_a", 2), %%mm1 \n\t"\
00171 "paddsw %%mm7, %%mm0 \n\t"\
00172 "paddsw %%mm7, %%mm1 \n\t"\
00173 "psraw $7, %%mm0 \n\t"\
00174 "psraw $7, %%mm1 \n\t"\
00175 "packuswb %%mm1, %%mm0 \n\t"\
00176 MOVNTQ(%%mm0, (%1, %%REGa))\
00177 "add $8, %%"REG_a" \n\t"\
00178 "jnc 1b \n\t"
00179
00180
00181
00182
00183
00184
00185
00186
00187 #define YSCALEYUV2PACKEDX_UV \
00188 __asm__ volatile(\
00189 "xor %%"REG_a", %%"REG_a" \n\t"\
00190 ASMALIGN(4)\
00191 "nop \n\t"\
00192 "1: \n\t"\
00193 "lea "CHR_MMX_FILTER_OFFSET"(%0), %%"REG_d" \n\t"\
00194 "mov (%%"REG_d"), %%"REG_S" \n\t"\
00195 "movq "VROUNDER_OFFSET"(%0), %%mm3 \n\t"\
00196 "movq %%mm3, %%mm4 \n\t"\
00197 ASMALIGN(4)\
00198 "2: \n\t"\
00199 "movq 8(%%"REG_d"), %%mm0 \n\t" \
00200 "movq (%%"REG_S", %%"REG_a"), %%mm2 \n\t" \
00201 "movq "AV_STRINGIFY(VOF)"(%%"REG_S", %%"REG_a"), %%mm5 \n\t" \
00202 "add $16, %%"REG_d" \n\t"\
00203 "mov (%%"REG_d"), %%"REG_S" \n\t"\
00204 "pmulhw %%mm0, %%mm2 \n\t"\
00205 "pmulhw %%mm0, %%mm5 \n\t"\
00206 "paddw %%mm2, %%mm3 \n\t"\
00207 "paddw %%mm5, %%mm4 \n\t"\
00208 "test %%"REG_S", %%"REG_S" \n\t"\
00209 " jnz 2b \n\t"\
00210
00211 #define YSCALEYUV2PACKEDX_YA(offset,coeff,src1,src2,dst1,dst2) \
00212 "lea "offset"(%0), %%"REG_d" \n\t"\
00213 "mov (%%"REG_d"), %%"REG_S" \n\t"\
00214 "movq "VROUNDER_OFFSET"(%0), "#dst1" \n\t"\
00215 "movq "#dst1", "#dst2" \n\t"\
00216 ASMALIGN(4)\
00217 "2: \n\t"\
00218 "movq 8(%%"REG_d"), "#coeff" \n\t" \
00219 "movq (%%"REG_S", %%"REG_a", 2), "#src1" \n\t" \
00220 "movq 8(%%"REG_S", %%"REG_a", 2), "#src2" \n\t" \
00221 "add $16, %%"REG_d" \n\t"\
00222 "mov (%%"REG_d"), %%"REG_S" \n\t"\
00223 "pmulhw "#coeff", "#src1" \n\t"\
00224 "pmulhw "#coeff", "#src2" \n\t"\
00225 "paddw "#src1", "#dst1" \n\t"\
00226 "paddw "#src2", "#dst2" \n\t"\
00227 "test %%"REG_S", %%"REG_S" \n\t"\
00228 " jnz 2b \n\t"\
00229
00230 #define YSCALEYUV2PACKEDX \
00231 YSCALEYUV2PACKEDX_UV \
00232 YSCALEYUV2PACKEDX_YA(LUM_MMX_FILTER_OFFSET,%%mm0,%%mm2,%%mm5,%%mm1,%%mm7) \
00233
00234 #define YSCALEYUV2PACKEDX_END \
00235 :: "r" (&c->redDither), \
00236 "m" (dummy), "m" (dummy), "m" (dummy),\
00237 "r" (dest), "m" (dstW) \
00238 : "%"REG_a, "%"REG_d, "%"REG_S \
00239 );
00240
00241 #define YSCALEYUV2PACKEDX_ACCURATE_UV \
00242 __asm__ volatile(\
00243 "xor %%"REG_a", %%"REG_a" \n\t"\
00244 ASMALIGN(4)\
00245 "nop \n\t"\
00246 "1: \n\t"\
00247 "lea "CHR_MMX_FILTER_OFFSET"(%0), %%"REG_d" \n\t"\
00248 "mov (%%"REG_d"), %%"REG_S" \n\t"\
00249 "pxor %%mm4, %%mm4 \n\t"\
00250 "pxor %%mm5, %%mm5 \n\t"\
00251 "pxor %%mm6, %%mm6 \n\t"\
00252 "pxor %%mm7, %%mm7 \n\t"\
00253 ASMALIGN(4)\
00254 "2: \n\t"\
00255 "movq (%%"REG_S", %%"REG_a"), %%mm0 \n\t" \
00256 "movq "AV_STRINGIFY(VOF)"(%%"REG_S", %%"REG_a"), %%mm2 \n\t" \
00257 "mov "STR(APCK_PTR2)"(%%"REG_d"), %%"REG_S" \n\t"\
00258 "movq (%%"REG_S", %%"REG_a"), %%mm1 \n\t" \
00259 "movq %%mm0, %%mm3 \n\t"\
00260 "punpcklwd %%mm1, %%mm0 \n\t"\
00261 "punpckhwd %%mm1, %%mm3 \n\t"\
00262 "movq "STR(APCK_COEF)"(%%"REG_d"),%%mm1 \n\t" \
00263 "pmaddwd %%mm1, %%mm0 \n\t"\
00264 "pmaddwd %%mm1, %%mm3 \n\t"\
00265 "paddd %%mm0, %%mm4 \n\t"\
00266 "paddd %%mm3, %%mm5 \n\t"\
00267 "movq "AV_STRINGIFY(VOF)"(%%"REG_S", %%"REG_a"), %%mm3 \n\t" \
00268 "mov "STR(APCK_SIZE)"(%%"REG_d"), %%"REG_S" \n\t"\
00269 "add $"STR(APCK_SIZE)", %%"REG_d" \n\t"\
00270 "test %%"REG_S", %%"REG_S" \n\t"\
00271 "movq %%mm2, %%mm0 \n\t"\
00272 "punpcklwd %%mm3, %%mm2 \n\t"\
00273 "punpckhwd %%mm3, %%mm0 \n\t"\
00274 "pmaddwd %%mm1, %%mm2 \n\t"\
00275 "pmaddwd %%mm1, %%mm0 \n\t"\
00276 "paddd %%mm2, %%mm6 \n\t"\
00277 "paddd %%mm0, %%mm7 \n\t"\
00278 " jnz 2b \n\t"\
00279 "psrad $16, %%mm4 \n\t"\
00280 "psrad $16, %%mm5 \n\t"\
00281 "psrad $16, %%mm6 \n\t"\
00282 "psrad $16, %%mm7 \n\t"\
00283 "movq "VROUNDER_OFFSET"(%0), %%mm0 \n\t"\
00284 "packssdw %%mm5, %%mm4 \n\t"\
00285 "packssdw %%mm7, %%mm6 \n\t"\
00286 "paddw %%mm0, %%mm4 \n\t"\
00287 "paddw %%mm0, %%mm6 \n\t"\
00288 "movq %%mm4, "U_TEMP"(%0) \n\t"\
00289 "movq %%mm6, "V_TEMP"(%0) \n\t"\
00290
00291 #define YSCALEYUV2PACKEDX_ACCURATE_YA(offset) \
00292 "lea "offset"(%0), %%"REG_d" \n\t"\
00293 "mov (%%"REG_d"), %%"REG_S" \n\t"\
00294 "pxor %%mm1, %%mm1 \n\t"\
00295 "pxor %%mm5, %%mm5 \n\t"\
00296 "pxor %%mm7, %%mm7 \n\t"\
00297 "pxor %%mm6, %%mm6 \n\t"\
00298 ASMALIGN(4)\
00299 "2: \n\t"\
00300 "movq (%%"REG_S", %%"REG_a", 2), %%mm0 \n\t" \
00301 "movq 8(%%"REG_S", %%"REG_a", 2), %%mm2 \n\t" \
00302 "mov "STR(APCK_PTR2)"(%%"REG_d"), %%"REG_S" \n\t"\
00303 "movq (%%"REG_S", %%"REG_a", 2), %%mm4 \n\t" \
00304 "movq %%mm0, %%mm3 \n\t"\
00305 "punpcklwd %%mm4, %%mm0 \n\t"\
00306 "punpckhwd %%mm4, %%mm3 \n\t"\
00307 "movq "STR(APCK_COEF)"(%%"REG_d"), %%mm4 \n\t" \
00308 "pmaddwd %%mm4, %%mm0 \n\t"\
00309 "pmaddwd %%mm4, %%mm3 \n\t"\
00310 "paddd %%mm0, %%mm1 \n\t"\
00311 "paddd %%mm3, %%mm5 \n\t"\
00312 "movq 8(%%"REG_S", %%"REG_a", 2), %%mm3 \n\t" \
00313 "mov "STR(APCK_SIZE)"(%%"REG_d"), %%"REG_S" \n\t"\
00314 "add $"STR(APCK_SIZE)", %%"REG_d" \n\t"\
00315 "test %%"REG_S", %%"REG_S" \n\t"\
00316 "movq %%mm2, %%mm0 \n\t"\
00317 "punpcklwd %%mm3, %%mm2 \n\t"\
00318 "punpckhwd %%mm3, %%mm0 \n\t"\
00319 "pmaddwd %%mm4, %%mm2 \n\t"\
00320 "pmaddwd %%mm4, %%mm0 \n\t"\
00321 "paddd %%mm2, %%mm7 \n\t"\
00322 "paddd %%mm0, %%mm6 \n\t"\
00323 " jnz 2b \n\t"\
00324 "psrad $16, %%mm1 \n\t"\
00325 "psrad $16, %%mm5 \n\t"\
00326 "psrad $16, %%mm7 \n\t"\
00327 "psrad $16, %%mm6 \n\t"\
00328 "movq "VROUNDER_OFFSET"(%0), %%mm0 \n\t"\
00329 "packssdw %%mm5, %%mm1 \n\t"\
00330 "packssdw %%mm6, %%mm7 \n\t"\
00331 "paddw %%mm0, %%mm1 \n\t"\
00332 "paddw %%mm0, %%mm7 \n\t"\
00333 "movq "U_TEMP"(%0), %%mm3 \n\t"\
00334 "movq "V_TEMP"(%0), %%mm4 \n\t"\
00335
00336 #define YSCALEYUV2PACKEDX_ACCURATE \
00337 YSCALEYUV2PACKEDX_ACCURATE_UV \
00338 YSCALEYUV2PACKEDX_ACCURATE_YA(LUM_MMX_FILTER_OFFSET)
00339
00340 #define YSCALEYUV2RGBX \
00341 "psubw "U_OFFSET"(%0), %%mm3 \n\t" \
00342 "psubw "V_OFFSET"(%0), %%mm4 \n\t" \
00343 "movq %%mm3, %%mm2 \n\t" \
00344 "movq %%mm4, %%mm5 \n\t" \
00345 "pmulhw "UG_COEFF"(%0), %%mm3 \n\t"\
00346 "pmulhw "VG_COEFF"(%0), %%mm4 \n\t"\
00347 \
00348 "pmulhw "UB_COEFF"(%0), %%mm2 \n\t"\
00349 "pmulhw "VR_COEFF"(%0), %%mm5 \n\t"\
00350 "psubw "Y_OFFSET"(%0), %%mm1 \n\t" \
00351 "psubw "Y_OFFSET"(%0), %%mm7 \n\t" \
00352 "pmulhw "Y_COEFF"(%0), %%mm1 \n\t"\
00353 "pmulhw "Y_COEFF"(%0), %%mm7 \n\t"\
00354 \
00355 "paddw %%mm3, %%mm4 \n\t"\
00356 "movq %%mm2, %%mm0 \n\t"\
00357 "movq %%mm5, %%mm6 \n\t"\
00358 "movq %%mm4, %%mm3 \n\t"\
00359 "punpcklwd %%mm2, %%mm2 \n\t"\
00360 "punpcklwd %%mm5, %%mm5 \n\t"\
00361 "punpcklwd %%mm4, %%mm4 \n\t"\
00362 "paddw %%mm1, %%mm2 \n\t"\
00363 "paddw %%mm1, %%mm5 \n\t"\
00364 "paddw %%mm1, %%mm4 \n\t"\
00365 "punpckhwd %%mm0, %%mm0 \n\t"\
00366 "punpckhwd %%mm6, %%mm6 \n\t"\
00367 "punpckhwd %%mm3, %%mm3 \n\t"\
00368 "paddw %%mm7, %%mm0 \n\t"\
00369 "paddw %%mm7, %%mm6 \n\t"\
00370 "paddw %%mm7, %%mm3 \n\t"\
00371 \
00372 "packuswb %%mm0, %%mm2 \n\t"\
00373 "packuswb %%mm6, %%mm5 \n\t"\
00374 "packuswb %%mm3, %%mm4 \n\t"\
00375
00376 #define REAL_YSCALEYUV2PACKED(index, c) \
00377 "movq "CHR_MMX_FILTER_OFFSET"+8("#c"), %%mm0 \n\t"\
00378 "movq "LUM_MMX_FILTER_OFFSET"+8("#c"), %%mm1 \n\t"\
00379 "psraw $3, %%mm0 \n\t"\
00380 "psraw $3, %%mm1 \n\t"\
00381 "movq %%mm0, "CHR_MMX_FILTER_OFFSET"+8("#c") \n\t"\
00382 "movq %%mm1, "LUM_MMX_FILTER_OFFSET"+8("#c") \n\t"\
00383 "xor "#index", "#index" \n\t"\
00384 ASMALIGN(4)\
00385 "1: \n\t"\
00386 "movq (%2, "#index"), %%mm2 \n\t" \
00387 "movq (%3, "#index"), %%mm3 \n\t" \
00388 "movq "AV_STRINGIFY(VOF)"(%2, "#index"), %%mm5 \n\t" \
00389 "movq "AV_STRINGIFY(VOF)"(%3, "#index"), %%mm4 \n\t" \
00390 "psubw %%mm3, %%mm2 \n\t" \
00391 "psubw %%mm4, %%mm5 \n\t" \
00392 "movq "CHR_MMX_FILTER_OFFSET"+8("#c"), %%mm0 \n\t"\
00393 "pmulhw %%mm0, %%mm2 \n\t" \
00394 "pmulhw %%mm0, %%mm5 \n\t" \
00395 "psraw $7, %%mm3 \n\t" \
00396 "psraw $7, %%mm4 \n\t" \
00397 "paddw %%mm2, %%mm3 \n\t" \
00398 "paddw %%mm5, %%mm4 \n\t" \
00399 "movq (%0, "#index", 2), %%mm0 \n\t" \
00400 "movq (%1, "#index", 2), %%mm1 \n\t" \
00401 "movq 8(%0, "#index", 2), %%mm6 \n\t" \
00402 "movq 8(%1, "#index", 2), %%mm7 \n\t" \
00403 "psubw %%mm1, %%mm0 \n\t" \
00404 "psubw %%mm7, %%mm6 \n\t" \
00405 "pmulhw "LUM_MMX_FILTER_OFFSET"+8("#c"), %%mm0 \n\t" \
00406 "pmulhw "LUM_MMX_FILTER_OFFSET"+8("#c"), %%mm6 \n\t" \
00407 "psraw $7, %%mm1 \n\t" \
00408 "psraw $7, %%mm7 \n\t" \
00409 "paddw %%mm0, %%mm1 \n\t" \
00410 "paddw %%mm6, %%mm7 \n\t" \
00411
00412 #define YSCALEYUV2PACKED(index, c) REAL_YSCALEYUV2PACKED(index, c)
00413
00414 #define REAL_YSCALEYUV2RGB_UV(index, c) \
00415 "xor "#index", "#index" \n\t"\
00416 ASMALIGN(4)\
00417 "1: \n\t"\
00418 "movq (%2, "#index"), %%mm2 \n\t" \
00419 "movq (%3, "#index"), %%mm3 \n\t" \
00420 "movq "AV_STRINGIFY(VOF)"(%2, "#index"), %%mm5 \n\t" \
00421 "movq "AV_STRINGIFY(VOF)"(%3, "#index"), %%mm4 \n\t" \
00422 "psubw %%mm3, %%mm2 \n\t" \
00423 "psubw %%mm4, %%mm5 \n\t" \
00424 "movq "CHR_MMX_FILTER_OFFSET"+8("#c"), %%mm0 \n\t"\
00425 "pmulhw %%mm0, %%mm2 \n\t" \
00426 "pmulhw %%mm0, %%mm5 \n\t" \
00427 "psraw $4, %%mm3 \n\t" \
00428 "psraw $4, %%mm4 \n\t" \
00429 "paddw %%mm2, %%mm3 \n\t" \
00430 "paddw %%mm5, %%mm4 \n\t" \
00431 "psubw "U_OFFSET"("#c"), %%mm3 \n\t" \
00432 "psubw "V_OFFSET"("#c"), %%mm4 \n\t" \
00433 "movq %%mm3, %%mm2 \n\t" \
00434 "movq %%mm4, %%mm5 \n\t" \
00435 "pmulhw "UG_COEFF"("#c"), %%mm3 \n\t"\
00436 "pmulhw "VG_COEFF"("#c"), %%mm4 \n\t"\
00437 \
00438
00439 #define REAL_YSCALEYUV2RGB_YA(index, c, b1, b2) \
00440 "movq ("#b1", "#index", 2), %%mm0 \n\t" \
00441 "movq ("#b2", "#index", 2), %%mm1 \n\t" \
00442 "movq 8("#b1", "#index", 2), %%mm6 \n\t" \
00443 "movq 8("#b2", "#index", 2), %%mm7 \n\t" \
00444 "psubw %%mm1, %%mm0 \n\t" \
00445 "psubw %%mm7, %%mm6 \n\t" \
00446 "pmulhw "LUM_MMX_FILTER_OFFSET"+8("#c"), %%mm0 \n\t" \
00447 "pmulhw "LUM_MMX_FILTER_OFFSET"+8("#c"), %%mm6 \n\t" \
00448 "psraw $4, %%mm1 \n\t" \
00449 "psraw $4, %%mm7 \n\t" \
00450 "paddw %%mm0, %%mm1 \n\t" \
00451 "paddw %%mm6, %%mm7 \n\t" \
00452
00453 #define REAL_YSCALEYUV2RGB_COEFF(c) \
00454 "pmulhw "UB_COEFF"("#c"), %%mm2 \n\t"\
00455 "pmulhw "VR_COEFF"("#c"), %%mm5 \n\t"\
00456 "psubw "Y_OFFSET"("#c"), %%mm1 \n\t" \
00457 "psubw "Y_OFFSET"("#c"), %%mm7 \n\t" \
00458 "pmulhw "Y_COEFF"("#c"), %%mm1 \n\t"\
00459 "pmulhw "Y_COEFF"("#c"), %%mm7 \n\t"\
00460 \
00461 "paddw %%mm3, %%mm4 \n\t"\
00462 "movq %%mm2, %%mm0 \n\t"\
00463 "movq %%mm5, %%mm6 \n\t"\
00464 "movq %%mm4, %%mm3 \n\t"\
00465 "punpcklwd %%mm2, %%mm2 \n\t"\
00466 "punpcklwd %%mm5, %%mm5 \n\t"\
00467 "punpcklwd %%mm4, %%mm4 \n\t"\
00468 "paddw %%mm1, %%mm2 \n\t"\
00469 "paddw %%mm1, %%mm5 \n\t"\
00470 "paddw %%mm1, %%mm4 \n\t"\
00471 "punpckhwd %%mm0, %%mm0 \n\t"\
00472 "punpckhwd %%mm6, %%mm6 \n\t"\
00473 "punpckhwd %%mm3, %%mm3 \n\t"\
00474 "paddw %%mm7, %%mm0 \n\t"\
00475 "paddw %%mm7, %%mm6 \n\t"\
00476 "paddw %%mm7, %%mm3 \n\t"\
00477 \
00478 "packuswb %%mm0, %%mm2 \n\t"\
00479 "packuswb %%mm6, %%mm5 \n\t"\
00480 "packuswb %%mm3, %%mm4 \n\t"\
00481
00482 #define YSCALEYUV2RGB_YA(index, c, b1, b2) REAL_YSCALEYUV2RGB_YA(index, c, b1, b2)
00483
00484 #define YSCALEYUV2RGB(index, c) \
00485 REAL_YSCALEYUV2RGB_UV(index, c) \
00486 REAL_YSCALEYUV2RGB_YA(index, c, %0, %1) \
00487 REAL_YSCALEYUV2RGB_COEFF(c)
00488
00489 #define REAL_YSCALEYUV2PACKED1(index, c) \
00490 "xor "#index", "#index" \n\t"\
00491 ASMALIGN(4)\
00492 "1: \n\t"\
00493 "movq (%2, "#index"), %%mm3 \n\t" \
00494 "movq "AV_STRINGIFY(VOF)"(%2, "#index"), %%mm4 \n\t" \
00495 "psraw $7, %%mm3 \n\t" \
00496 "psraw $7, %%mm4 \n\t" \
00497 "movq (%0, "#index", 2), %%mm1 \n\t" \
00498 "movq 8(%0, "#index", 2), %%mm7 \n\t" \
00499 "psraw $7, %%mm1 \n\t" \
00500 "psraw $7, %%mm7 \n\t" \
00501
00502 #define YSCALEYUV2PACKED1(index, c) REAL_YSCALEYUV2PACKED1(index, c)
00503
00504 #define REAL_YSCALEYUV2RGB1(index, c) \
00505 "xor "#index", "#index" \n\t"\
00506 ASMALIGN(4)\
00507 "1: \n\t"\
00508 "movq (%2, "#index"), %%mm3 \n\t" \
00509 "movq "AV_STRINGIFY(VOF)"(%2, "#index"), %%mm4 \n\t" \
00510 "psraw $4, %%mm3 \n\t" \
00511 "psraw $4, %%mm4 \n\t" \
00512 "psubw "U_OFFSET"("#c"), %%mm3 \n\t" \
00513 "psubw "V_OFFSET"("#c"), %%mm4 \n\t" \
00514 "movq %%mm3, %%mm2 \n\t" \
00515 "movq %%mm4, %%mm5 \n\t" \
00516 "pmulhw "UG_COEFF"("#c"), %%mm3 \n\t"\
00517 "pmulhw "VG_COEFF"("#c"), %%mm4 \n\t"\
00518 \
00519 "movq (%0, "#index", 2), %%mm1 \n\t" \
00520 "movq 8(%0, "#index", 2), %%mm7 \n\t" \
00521 "psraw $4, %%mm1 \n\t" \
00522 "psraw $4, %%mm7 \n\t" \
00523 "pmulhw "UB_COEFF"("#c"), %%mm2 \n\t"\
00524 "pmulhw "VR_COEFF"("#c"), %%mm5 \n\t"\
00525 "psubw "Y_OFFSET"("#c"), %%mm1 \n\t" \
00526 "psubw "Y_OFFSET"("#c"), %%mm7 \n\t" \
00527 "pmulhw "Y_COEFF"("#c"), %%mm1 \n\t"\
00528 "pmulhw "Y_COEFF"("#c"), %%mm7 \n\t"\
00529 \
00530 "paddw %%mm3, %%mm4 \n\t"\
00531 "movq %%mm2, %%mm0 \n\t"\
00532 "movq %%mm5, %%mm6 \n\t"\
00533 "movq %%mm4, %%mm3 \n\t"\
00534 "punpcklwd %%mm2, %%mm2 \n\t"\
00535 "punpcklwd %%mm5, %%mm5 \n\t"\
00536 "punpcklwd %%mm4, %%mm4 \n\t"\
00537 "paddw %%mm1, %%mm2 \n\t"\
00538 "paddw %%mm1, %%mm5 \n\t"\
00539 "paddw %%mm1, %%mm4 \n\t"\
00540 "punpckhwd %%mm0, %%mm0 \n\t"\
00541 "punpckhwd %%mm6, %%mm6 \n\t"\
00542 "punpckhwd %%mm3, %%mm3 \n\t"\
00543 "paddw %%mm7, %%mm0 \n\t"\
00544 "paddw %%mm7, %%mm6 \n\t"\
00545 "paddw %%mm7, %%mm3 \n\t"\
00546 \
00547 "packuswb %%mm0, %%mm2 \n\t"\
00548 "packuswb %%mm6, %%mm5 \n\t"\
00549 "packuswb %%mm3, %%mm4 \n\t"\
00550
00551 #define YSCALEYUV2RGB1(index, c) REAL_YSCALEYUV2RGB1(index, c)
00552
00553 #define REAL_YSCALEYUV2PACKED1b(index, c) \
00554 "xor "#index", "#index" \n\t"\
00555 ASMALIGN(4)\
00556 "1: \n\t"\
00557 "movq (%2, "#index"), %%mm2 \n\t" \
00558 "movq (%3, "#index"), %%mm3 \n\t" \
00559 "movq "AV_STRINGIFY(VOF)"(%2, "#index"), %%mm5 \n\t" \
00560 "movq "AV_STRINGIFY(VOF)"(%3, "#index"), %%mm4 \n\t" \
00561 "paddw %%mm2, %%mm3 \n\t" \
00562 "paddw %%mm5, %%mm4 \n\t" \
00563 "psrlw $8, %%mm3 \n\t" \
00564 "psrlw $8, %%mm4 \n\t" \
00565 "movq (%0, "#index", 2), %%mm1 \n\t" \
00566 "movq 8(%0, "#index", 2), %%mm7 \n\t" \
00567 "psraw $7, %%mm1 \n\t" \
00568 "psraw $7, %%mm7 \n\t"
00569 #define YSCALEYUV2PACKED1b(index, c) REAL_YSCALEYUV2PACKED1b(index, c)
00570
00571
00572 #define REAL_YSCALEYUV2RGB1b(index, c) \
00573 "xor "#index", "#index" \n\t"\
00574 ASMALIGN(4)\
00575 "1: \n\t"\
00576 "movq (%2, "#index"), %%mm2 \n\t" \
00577 "movq (%3, "#index"), %%mm3 \n\t" \
00578 "movq "AV_STRINGIFY(VOF)"(%2, "#index"), %%mm5 \n\t" \
00579 "movq "AV_STRINGIFY(VOF)"(%3, "#index"), %%mm4 \n\t" \
00580 "paddw %%mm2, %%mm3 \n\t" \
00581 "paddw %%mm5, %%mm4 \n\t" \
00582 "psrlw $5, %%mm3 \n\t" \
00583 "psrlw $5, %%mm4 \n\t" \
00584 "psubw "U_OFFSET"("#c"), %%mm3 \n\t" \
00585 "psubw "V_OFFSET"("#c"), %%mm4 \n\t" \
00586 "movq %%mm3, %%mm2 \n\t" \
00587 "movq %%mm4, %%mm5 \n\t" \
00588 "pmulhw "UG_COEFF"("#c"), %%mm3 \n\t"\
00589 "pmulhw "VG_COEFF"("#c"), %%mm4 \n\t"\
00590 \
00591 "movq (%0, "#index", 2), %%mm1 \n\t" \
00592 "movq 8(%0, "#index", 2), %%mm7 \n\t" \
00593 "psraw $4, %%mm1 \n\t" \
00594 "psraw $4, %%mm7 \n\t" \
00595 "pmulhw "UB_COEFF"("#c"), %%mm2 \n\t"\
00596 "pmulhw "VR_COEFF"("#c"), %%mm5 \n\t"\
00597 "psubw "Y_OFFSET"("#c"), %%mm1 \n\t" \
00598 "psubw "Y_OFFSET"("#c"), %%mm7 \n\t" \
00599 "pmulhw "Y_COEFF"("#c"), %%mm1 \n\t"\
00600 "pmulhw "Y_COEFF"("#c"), %%mm7 \n\t"\
00601 \
00602 "paddw %%mm3, %%mm4 \n\t"\
00603 "movq %%mm2, %%mm0 \n\t"\
00604 "movq %%mm5, %%mm6 \n\t"\
00605 "movq %%mm4, %%mm3 \n\t"\
00606 "punpcklwd %%mm2, %%mm2 \n\t"\
00607 "punpcklwd %%mm5, %%mm5 \n\t"\
00608 "punpcklwd %%mm4, %%mm4 \n\t"\
00609 "paddw %%mm1, %%mm2 \n\t"\
00610 "paddw %%mm1, %%mm5 \n\t"\
00611 "paddw %%mm1, %%mm4 \n\t"\
00612 "punpckhwd %%mm0, %%mm0 \n\t"\
00613 "punpckhwd %%mm6, %%mm6 \n\t"\
00614 "punpckhwd %%mm3, %%mm3 \n\t"\
00615 "paddw %%mm7, %%mm0 \n\t"\
00616 "paddw %%mm7, %%mm6 \n\t"\
00617 "paddw %%mm7, %%mm3 \n\t"\
00618 \
00619 "packuswb %%mm0, %%mm2 \n\t"\
00620 "packuswb %%mm6, %%mm5 \n\t"\
00621 "packuswb %%mm3, %%mm4 \n\t"\
00622
00623 #define YSCALEYUV2RGB1b(index, c) REAL_YSCALEYUV2RGB1b(index, c)
00624
00625 #define REAL_YSCALEYUV2RGB1_ALPHA(index) \
00626 "movq (%1, "#index", 2), %%mm7 \n\t" \
00627 "movq 8(%1, "#index", 2), %%mm1 \n\t" \
00628 "psraw $7, %%mm7 \n\t" \
00629 "psraw $7, %%mm1 \n\t" \
00630 "packuswb %%mm1, %%mm7 \n\t"
00631 #define YSCALEYUV2RGB1_ALPHA(index) REAL_YSCALEYUV2RGB1_ALPHA(index)
00632
00633 #define REAL_WRITEBGR32(dst, dstw, index, b, g, r, a, q0, q2, q3, t) \
00634 "movq "#b", "#q2" \n\t" \
00635 "movq "#r", "#t" \n\t" \
00636 "punpcklbw "#g", "#b" \n\t" \
00637 "punpcklbw "#a", "#r" \n\t" \
00638 "punpckhbw "#g", "#q2" \n\t" \
00639 "punpckhbw "#a", "#t" \n\t" \
00640 "movq "#b", "#q0" \n\t" \
00641 "movq "#q2", "#q3" \n\t" \
00642 "punpcklwd "#r", "#q0" \n\t" \
00643 "punpckhwd "#r", "#b" \n\t" \
00644 "punpcklwd "#t", "#q2" \n\t" \
00645 "punpckhwd "#t", "#q3" \n\t" \
00646 \
00647 MOVNTQ( q0, (dst, index, 4))\
00648 MOVNTQ( b, 8(dst, index, 4))\
00649 MOVNTQ( q2, 16(dst, index, 4))\
00650 MOVNTQ( q3, 24(dst, index, 4))\
00651 \
00652 "add $8, "#index" \n\t"\
00653 "cmp "#dstw", "#index" \n\t"\
00654 " jb 1b \n\t"
00655 #define WRITEBGR32(dst, dstw, index, b, g, r, a, q0, q2, q3, t) REAL_WRITEBGR32(dst, dstw, index, b, g, r, a, q0, q2, q3, t)
00656
00657 #define REAL_WRITERGB16(dst, dstw, index) \
00658 "pand "MANGLE(bF8)", %%mm2 \n\t" \
00659 "pand "MANGLE(bFC)", %%mm4 \n\t" \
00660 "pand "MANGLE(bF8)", %%mm5 \n\t" \
00661 "psrlq $3, %%mm2 \n\t"\
00662 \
00663 "movq %%mm2, %%mm1 \n\t"\
00664 "movq %%mm4, %%mm3 \n\t"\
00665 \
00666 "punpcklbw %%mm7, %%mm3 \n\t"\
00667 "punpcklbw %%mm5, %%mm2 \n\t"\
00668 "punpckhbw %%mm7, %%mm4 \n\t"\
00669 "punpckhbw %%mm5, %%mm1 \n\t"\
00670 \
00671 "psllq $3, %%mm3 \n\t"\
00672 "psllq $3, %%mm4 \n\t"\
00673 \
00674 "por %%mm3, %%mm2 \n\t"\
00675 "por %%mm4, %%mm1 \n\t"\
00676 \
00677 MOVNTQ(%%mm2, (dst, index, 2))\
00678 MOVNTQ(%%mm1, 8(dst, index, 2))\
00679 \
00680 "add $8, "#index" \n\t"\
00681 "cmp "#dstw", "#index" \n\t"\
00682 " jb 1b \n\t"
00683 #define WRITERGB16(dst, dstw, index) REAL_WRITERGB16(dst, dstw, index)
00684
00685 #define REAL_WRITERGB15(dst, dstw, index) \
00686 "pand "MANGLE(bF8)", %%mm2 \n\t" \
00687 "pand "MANGLE(bF8)", %%mm4 \n\t" \
00688 "pand "MANGLE(bF8)", %%mm5 \n\t" \
00689 "psrlq $3, %%mm2 \n\t"\
00690 "psrlq $1, %%mm5 \n\t"\
00691 \
00692 "movq %%mm2, %%mm1 \n\t"\
00693 "movq %%mm4, %%mm3 \n\t"\
00694 \
00695 "punpcklbw %%mm7, %%mm3 \n\t"\
00696 "punpcklbw %%mm5, %%mm2 \n\t"\
00697 "punpckhbw %%mm7, %%mm4 \n\t"\
00698 "punpckhbw %%mm5, %%mm1 \n\t"\
00699 \
00700 "psllq $2, %%mm3 \n\t"\
00701 "psllq $2, %%mm4 \n\t"\
00702 \
00703 "por %%mm3, %%mm2 \n\t"\
00704 "por %%mm4, %%mm1 \n\t"\
00705 \
00706 MOVNTQ(%%mm2, (dst, index, 2))\
00707 MOVNTQ(%%mm1, 8(dst, index, 2))\
00708 \
00709 "add $8, "#index" \n\t"\
00710 "cmp "#dstw", "#index" \n\t"\
00711 " jb 1b \n\t"
00712 #define WRITERGB15(dst, dstw, index) REAL_WRITERGB15(dst, dstw, index)
00713
00714 #define WRITEBGR24OLD(dst, dstw, index) \
00715 \
00716 "movq %%mm2, %%mm1 \n\t" \
00717 "movq %%mm5, %%mm6 \n\t" \
00718 "punpcklbw %%mm4, %%mm2 \n\t" \
00719 "punpcklbw %%mm7, %%mm5 \n\t" \
00720 "punpckhbw %%mm4, %%mm1 \n\t" \
00721 "punpckhbw %%mm7, %%mm6 \n\t" \
00722 "movq %%mm2, %%mm0 \n\t" \
00723 "movq %%mm1, %%mm3 \n\t" \
00724 "punpcklwd %%mm5, %%mm0 \n\t" \
00725 "punpckhwd %%mm5, %%mm2 \n\t" \
00726 "punpcklwd %%mm6, %%mm1 \n\t" \
00727 "punpckhwd %%mm6, %%mm3 \n\t" \
00728 \
00729 "movq %%mm0, %%mm4 \n\t" \
00730 "psrlq $8, %%mm0 \n\t" \
00731 "pand "MANGLE(bm00000111)", %%mm4 \n\t" \
00732 "pand "MANGLE(bm11111000)", %%mm0 \n\t" \
00733 "por %%mm4, %%mm0 \n\t" \
00734 "movq %%mm2, %%mm4 \n\t" \
00735 "psllq $48, %%mm2 \n\t" \
00736 "por %%mm2, %%mm0 \n\t" \
00737 \
00738 "movq %%mm4, %%mm2 \n\t" \
00739 "psrld $16, %%mm4 \n\t" \
00740 "psrlq $24, %%mm2 \n\t" \
00741 "por %%mm4, %%mm2 \n\t" \
00742 "pand "MANGLE(bm00001111)", %%mm2 \n\t" \
00743 "movq %%mm1, %%mm4 \n\t" \
00744 "psrlq $8, %%mm1 \n\t" \
00745 "pand "MANGLE(bm00000111)", %%mm4 \n\t" \
00746 "pand "MANGLE(bm11111000)", %%mm1 \n\t" \
00747 "por %%mm4, %%mm1 \n\t" \
00748 "movq %%mm1, %%mm4 \n\t" \
00749 "psllq $32, %%mm1 \n\t" \
00750 "por %%mm1, %%mm2 \n\t" \
00751 \
00752 "psrlq $32, %%mm4 \n\t" \
00753 "movq %%mm3, %%mm5 \n\t" \
00754 "psrlq $8, %%mm3 \n\t" \
00755 "pand "MANGLE(bm00000111)", %%mm5 \n\t" \
00756 "pand "MANGLE(bm11111000)", %%mm3 \n\t" \
00757 "por %%mm5, %%mm3 \n\t" \
00758 "psllq $16, %%mm3 \n\t" \
00759 "por %%mm4, %%mm3 \n\t" \
00760 \
00761 MOVNTQ(%%mm0, (dst))\
00762 MOVNTQ(%%mm2, 8(dst))\
00763 MOVNTQ(%%mm3, 16(dst))\
00764 "add $24, "#dst" \n\t"\
00765 \
00766 "add $8, "#index" \n\t"\
00767 "cmp "#dstw", "#index" \n\t"\
00768 " jb 1b \n\t"
00769
00770 #define WRITEBGR24MMX(dst, dstw, index) \
00771 \
00772 "movq %%mm2, %%mm1 \n\t" \
00773 "movq %%mm5, %%mm6 \n\t" \
00774 "punpcklbw %%mm4, %%mm2 \n\t" \
00775 "punpcklbw %%mm7, %%mm5 \n\t" \
00776 "punpckhbw %%mm4, %%mm1 \n\t" \
00777 "punpckhbw %%mm7, %%mm6 \n\t" \
00778 "movq %%mm2, %%mm0 \n\t" \
00779 "movq %%mm1, %%mm3 \n\t" \
00780 "punpcklwd %%mm5, %%mm0 \n\t" \
00781 "punpckhwd %%mm5, %%mm2 \n\t" \
00782 "punpcklwd %%mm6, %%mm1 \n\t" \
00783 "punpckhwd %%mm6, %%mm3 \n\t" \
00784 \
00785 "movq %%mm0, %%mm4 \n\t" \
00786 "movq %%mm2, %%mm6 \n\t" \
00787 "movq %%mm1, %%mm5 \n\t" \
00788 "movq %%mm3, %%mm7 \n\t" \
00789 \
00790 "psllq $40, %%mm0 \n\t" \
00791 "psllq $40, %%mm2 \n\t" \
00792 "psllq $40, %%mm1 \n\t" \
00793 "psllq $40, %%mm3 \n\t" \
00794 \
00795 "punpckhdq %%mm4, %%mm0 \n\t" \
00796 "punpckhdq %%mm6, %%mm2 \n\t" \
00797 "punpckhdq %%mm5, %%mm1 \n\t" \
00798 "punpckhdq %%mm7, %%mm3 \n\t" \
00799 \
00800 "psrlq $8, %%mm0 \n\t" \
00801 "movq %%mm2, %%mm6 \n\t" \
00802 "psllq $40, %%mm2 \n\t" \
00803 "por %%mm2, %%mm0 \n\t" \
00804 MOVNTQ(%%mm0, (dst))\
00805 \
00806 "psrlq $24, %%mm6 \n\t" \
00807 "movq %%mm1, %%mm5 \n\t" \
00808 "psllq $24, %%mm1 \n\t" \
00809 "por %%mm1, %%mm6 \n\t" \
00810 MOVNTQ(%%mm6, 8(dst))\
00811 \
00812 "psrlq $40, %%mm5 \n\t" \
00813 "psllq $8, %%mm3 \n\t" \
00814 "por %%mm3, %%mm5 \n\t" \
00815 MOVNTQ(%%mm5, 16(dst))\
00816 \
00817 "add $24, "#dst" \n\t"\
00818 \
00819 "add $8, "#index" \n\t"\
00820 "cmp "#dstw", "#index" \n\t"\
00821 " jb 1b \n\t"
00822
00823 #define WRITEBGR24MMX2(dst, dstw, index) \
00824 \
00825 "movq "MANGLE(ff_M24A)", %%mm0 \n\t"\
00826 "movq "MANGLE(ff_M24C)", %%mm7 \n\t"\
00827 "pshufw $0x50, %%mm2, %%mm1 \n\t" \
00828 "pshufw $0x50, %%mm4, %%mm3 \n\t" \
00829 "pshufw $0x00, %%mm5, %%mm6 \n\t" \
00830 \
00831 "pand %%mm0, %%mm1 \n\t" \
00832 "pand %%mm0, %%mm3 \n\t" \
00833 "pand %%mm7, %%mm6 \n\t" \
00834 \
00835 "psllq $8, %%mm3 \n\t" \
00836 "por %%mm1, %%mm6 \n\t"\
00837 "por %%mm3, %%mm6 \n\t"\
00838 MOVNTQ(%%mm6, (dst))\
00839 \
00840 "psrlq $8, %%mm4 \n\t" \
00841 "pshufw $0xA5, %%mm2, %%mm1 \n\t" \
00842 "pshufw $0x55, %%mm4, %%mm3 \n\t" \
00843 "pshufw $0xA5, %%mm5, %%mm6 \n\t" \
00844 \
00845 "pand "MANGLE(ff_M24B)", %%mm1 \n\t" \
00846 "pand %%mm7, %%mm3 \n\t" \
00847 "pand %%mm0, %%mm6 \n\t" \
00848 \
00849 "por %%mm1, %%mm3 \n\t" \
00850 "por %%mm3, %%mm6 \n\t"\
00851 MOVNTQ(%%mm6, 8(dst))\
00852 \
00853 "pshufw $0xFF, %%mm2, %%mm1 \n\t" \
00854 "pshufw $0xFA, %%mm4, %%mm3 \n\t" \
00855 "pshufw $0xFA, %%mm5, %%mm6 \n\t" \
00856 \
00857 "pand %%mm7, %%mm1 \n\t" \
00858 "pand %%mm0, %%mm3 \n\t" \
00859 "pand "MANGLE(ff_M24B)", %%mm6 \n\t" \
00860 \
00861 "por %%mm1, %%mm3 \n\t"\
00862 "por %%mm3, %%mm6 \n\t"\
00863 MOVNTQ(%%mm6, 16(dst))\
00864 \
00865 "add $24, "#dst" \n\t"\
00866 \
00867 "add $8, "#index" \n\t"\
00868 "cmp "#dstw", "#index" \n\t"\
00869 " jb 1b \n\t"
00870
00871 #if COMPILE_TEMPLATE_MMX2
00872 #undef WRITEBGR24
00873 #define WRITEBGR24(dst, dstw, index) WRITEBGR24MMX2(dst, dstw, index)
00874 #else
00875 #undef WRITEBGR24
00876 #define WRITEBGR24(dst, dstw, index) WRITEBGR24MMX(dst, dstw, index)
00877 #endif
00878
00879 #define REAL_WRITEYUY2(dst, dstw, index) \
00880 "packuswb %%mm3, %%mm3 \n\t"\
00881 "packuswb %%mm4, %%mm4 \n\t"\
00882 "packuswb %%mm7, %%mm1 \n\t"\
00883 "punpcklbw %%mm4, %%mm3 \n\t"\
00884 "movq %%mm1, %%mm7 \n\t"\
00885 "punpcklbw %%mm3, %%mm1 \n\t"\
00886 "punpckhbw %%mm3, %%mm7 \n\t"\
00887 \
00888 MOVNTQ(%%mm1, (dst, index, 2))\
00889 MOVNTQ(%%mm7, 8(dst, index, 2))\
00890 \
00891 "add $8, "#index" \n\t"\
00892 "cmp "#dstw", "#index" \n\t"\
00893 " jb 1b \n\t"
00894 #define WRITEYUY2(dst, dstw, index) REAL_WRITEYUY2(dst, dstw, index)
00895
00896
00897 static inline void RENAME(yuv2yuvX)(SwsContext *c, const int16_t *lumFilter, const int16_t **lumSrc, int lumFilterSize,
00898 const int16_t *chrFilter, const int16_t **chrSrc, int chrFilterSize, const int16_t **alpSrc,
00899 uint8_t *dest, uint8_t *uDest, uint8_t *vDest, uint8_t *aDest, long dstW, long chrDstW)
00900 {
00901 #if COMPILE_TEMPLATE_MMX
00902 if(!(c->flags & SWS_BITEXACT)) {
00903 if (c->flags & SWS_ACCURATE_RND) {
00904 if (uDest) {
00905 YSCALEYUV2YV12X_ACCURATE( "0", CHR_MMX_FILTER_OFFSET, uDest, chrDstW)
00906 YSCALEYUV2YV12X_ACCURATE(AV_STRINGIFY(VOF), CHR_MMX_FILTER_OFFSET, vDest, chrDstW)
00907 }
00908 if (CONFIG_SWSCALE_ALPHA && aDest) {
00909 YSCALEYUV2YV12X_ACCURATE( "0", ALP_MMX_FILTER_OFFSET, aDest, dstW)
00910 }
00911
00912 YSCALEYUV2YV12X_ACCURATE("0", LUM_MMX_FILTER_OFFSET, dest, dstW)
00913 } else {
00914 if (uDest) {
00915 YSCALEYUV2YV12X( "0", CHR_MMX_FILTER_OFFSET, uDest, chrDstW)
00916 YSCALEYUV2YV12X(AV_STRINGIFY(VOF), CHR_MMX_FILTER_OFFSET, vDest, chrDstW)
00917 }
00918 if (CONFIG_SWSCALE_ALPHA && aDest) {
00919 YSCALEYUV2YV12X( "0", ALP_MMX_FILTER_OFFSET, aDest, dstW)
00920 }
00921
00922 YSCALEYUV2YV12X("0", LUM_MMX_FILTER_OFFSET, dest, dstW)
00923 }
00924 return;
00925 }
00926 #endif
00927 #if COMPILE_TEMPLATE_ALTIVEC
00928 yuv2yuvX_altivec_real(lumFilter, lumSrc, lumFilterSize,
00929 chrFilter, chrSrc, chrFilterSize,
00930 dest, uDest, vDest, dstW, chrDstW);
00931 #else //COMPILE_TEMPLATE_ALTIVEC
00932 yuv2yuvXinC(lumFilter, lumSrc, lumFilterSize,
00933 chrFilter, chrSrc, chrFilterSize,
00934 alpSrc, dest, uDest, vDest, aDest, dstW, chrDstW);
00935 #endif
00936 }
00937
00938 static inline void RENAME(yuv2nv12X)(SwsContext *c, const int16_t *lumFilter, const int16_t **lumSrc, int lumFilterSize,
00939 const int16_t *chrFilter, const int16_t **chrSrc, int chrFilterSize,
00940 uint8_t *dest, uint8_t *uDest, int dstW, int chrDstW, enum PixelFormat dstFormat)
00941 {
00942 yuv2nv12XinC(lumFilter, lumSrc, lumFilterSize,
00943 chrFilter, chrSrc, chrFilterSize,
00944 dest, uDest, dstW, chrDstW, dstFormat);
00945 }
00946
00947 static inline void RENAME(yuv2yuv1)(SwsContext *c, const int16_t *lumSrc, const int16_t *chrSrc, const int16_t *alpSrc,
00948 uint8_t *dest, uint8_t *uDest, uint8_t *vDest, uint8_t *aDest, long dstW, long chrDstW)
00949 {
00950 int i;
00951 #if COMPILE_TEMPLATE_MMX
00952 if(!(c->flags & SWS_BITEXACT)) {
00953 long p= 4;
00954 const uint8_t *src[4]= {alpSrc + dstW, lumSrc + dstW, chrSrc + chrDstW, chrSrc + VOFW + chrDstW};
00955 uint8_t *dst[4]= {aDest, dest, uDest, vDest};
00956 x86_reg counter[4]= {dstW, dstW, chrDstW, chrDstW};
00957
00958 if (c->flags & SWS_ACCURATE_RND) {
00959 while(p--) {
00960 if (dst[p]) {
00961 __asm__ volatile(
00962 YSCALEYUV2YV121_ACCURATE
00963 :: "r" (src[p]), "r" (dst[p] + counter[p]),
00964 "g" (-counter[p])
00965 : "%"REG_a
00966 );
00967 }
00968 }
00969 } else {
00970 while(p--) {
00971 if (dst[p]) {
00972 __asm__ volatile(
00973 YSCALEYUV2YV121
00974 :: "r" (src[p]), "r" (dst[p] + counter[p]),
00975 "g" (-counter[p])
00976 : "%"REG_a
00977 );
00978 }
00979 }
00980 }
00981 return;
00982 }
00983 #endif
00984 for (i=0; i<dstW; i++) {
00985 int val= (lumSrc[i]+64)>>7;
00986
00987 if (val&256) {
00988 if (val<0) val=0;
00989 else val=255;
00990 }
00991
00992 dest[i]= val;
00993 }
00994
00995 if (uDest)
00996 for (i=0; i<chrDstW; i++) {
00997 int u=(chrSrc[i ]+64)>>7;
00998 int v=(chrSrc[i + VOFW]+64)>>7;
00999
01000 if ((u|v)&256) {
01001 if (u<0) u=0;
01002 else if (u>255) u=255;
01003 if (v<0) v=0;
01004 else if (v>255) v=255;
01005 }
01006
01007 uDest[i]= u;
01008 vDest[i]= v;
01009 }
01010
01011 if (CONFIG_SWSCALE_ALPHA && aDest)
01012 for (i=0; i<dstW; i++) {
01013 int val= (alpSrc[i]+64)>>7;
01014 aDest[i]= av_clip_uint8(val);
01015 }
01016 }
01017
01018
01022 static inline void RENAME(yuv2packedX)(SwsContext *c, const int16_t *lumFilter, const int16_t **lumSrc, int lumFilterSize,
01023 const int16_t *chrFilter, const int16_t **chrSrc, int chrFilterSize,
01024 const int16_t **alpSrc, uint8_t *dest, long dstW, long dstY)
01025 {
01026 #if COMPILE_TEMPLATE_MMX
01027 x86_reg dummy=0;
01028 if(!(c->flags & SWS_BITEXACT)) {
01029 if (c->flags & SWS_ACCURATE_RND) {
01030 switch(c->dstFormat) {
01031 case PIX_FMT_RGB32:
01032 if (CONFIG_SWSCALE_ALPHA && c->alpPixBuf) {
01033 YSCALEYUV2PACKEDX_ACCURATE
01034 YSCALEYUV2RGBX
01035 "movq %%mm2, "U_TEMP"(%0) \n\t"
01036 "movq %%mm4, "V_TEMP"(%0) \n\t"
01037 "movq %%mm5, "Y_TEMP"(%0) \n\t"
01038 YSCALEYUV2PACKEDX_ACCURATE_YA(ALP_MMX_FILTER_OFFSET)
01039 "movq "Y_TEMP"(%0), %%mm5 \n\t"
01040 "psraw $3, %%mm1 \n\t"
01041 "psraw $3, %%mm7 \n\t"
01042 "packuswb %%mm7, %%mm1 \n\t"
01043 WRITEBGR32(%4, %5, %%REGa, %%mm3, %%mm4, %%mm5, %%mm1, %%mm0, %%mm7, %%mm2, %%mm6)
01044
01045 YSCALEYUV2PACKEDX_END
01046 } else {
01047 YSCALEYUV2PACKEDX_ACCURATE
01048 YSCALEYUV2RGBX
01049 "pcmpeqd %%mm7, %%mm7 \n\t"
01050 WRITEBGR32(%4, %5, %%REGa, %%mm2, %%mm4, %%mm5, %%mm7, %%mm0, %%mm1, %%mm3, %%mm6)
01051
01052 YSCALEYUV2PACKEDX_END
01053 }
01054 return;
01055 case PIX_FMT_BGR24:
01056 YSCALEYUV2PACKEDX_ACCURATE
01057 YSCALEYUV2RGBX
01058 "pxor %%mm7, %%mm7 \n\t"
01059 "lea (%%"REG_a", %%"REG_a", 2), %%"REG_c"\n\t"
01060 "add %4, %%"REG_c" \n\t"
01061 WRITEBGR24(%%REGc, %5, %%REGa)
01062
01063
01064 :: "r" (&c->redDither),
01065 "m" (dummy), "m" (dummy), "m" (dummy),
01066 "r" (dest), "m" (dstW)
01067 : "%"REG_a, "%"REG_c, "%"REG_d, "%"REG_S
01068 );
01069 return;
01070 case PIX_FMT_RGB555:
01071 YSCALEYUV2PACKEDX_ACCURATE
01072 YSCALEYUV2RGBX
01073 "pxor %%mm7, %%mm7 \n\t"
01074
01075 #ifdef DITHER1XBPP
01076 "paddusb "BLUE_DITHER"(%0), %%mm2\n\t"
01077 "paddusb "GREEN_DITHER"(%0), %%mm4\n\t"
01078 "paddusb "RED_DITHER"(%0), %%mm5\n\t"
01079 #endif
01080
01081 WRITERGB15(%4, %5, %%REGa)
01082 YSCALEYUV2PACKEDX_END
01083 return;
01084 case PIX_FMT_RGB565:
01085 YSCALEYUV2PACKEDX_ACCURATE
01086 YSCALEYUV2RGBX
01087 "pxor %%mm7, %%mm7 \n\t"
01088
01089 #ifdef DITHER1XBPP
01090 "paddusb "BLUE_DITHER"(%0), %%mm2\n\t"
01091 "paddusb "GREEN_DITHER"(%0), %%mm4\n\t"
01092 "paddusb "RED_DITHER"(%0), %%mm5\n\t"
01093 #endif
01094
01095 WRITERGB16(%4, %5, %%REGa)
01096 YSCALEYUV2PACKEDX_END
01097 return;
01098 case PIX_FMT_YUYV422:
01099 YSCALEYUV2PACKEDX_ACCURATE
01100
01101
01102 "psraw $3, %%mm3 \n\t"
01103 "psraw $3, %%mm4 \n\t"
01104 "psraw $3, %%mm1 \n\t"
01105 "psraw $3, %%mm7 \n\t"
01106 WRITEYUY2(%4, %5, %%REGa)
01107 YSCALEYUV2PACKEDX_END
01108 return;
01109 }
01110 } else {
01111 switch(c->dstFormat) {
01112 case PIX_FMT_RGB32:
01113 if (CONFIG_SWSCALE_ALPHA && c->alpPixBuf) {
01114 YSCALEYUV2PACKEDX
01115 YSCALEYUV2RGBX
01116 YSCALEYUV2PACKEDX_YA(ALP_MMX_FILTER_OFFSET, %%mm0, %%mm3, %%mm6, %%mm1, %%mm7)
01117 "psraw $3, %%mm1 \n\t"
01118 "psraw $3, %%mm7 \n\t"
01119 "packuswb %%mm7, %%mm1 \n\t"
01120 WRITEBGR32(%4, %5, %%REGa, %%mm2, %%mm4, %%mm5, %%mm1, %%mm0, %%mm7, %%mm3, %%mm6)
01121 YSCALEYUV2PACKEDX_END
01122 } else {
01123 YSCALEYUV2PACKEDX
01124 YSCALEYUV2RGBX
01125 "pcmpeqd %%mm7, %%mm7 \n\t"
01126 WRITEBGR32(%4, %5, %%REGa, %%mm2, %%mm4, %%mm5, %%mm7, %%mm0, %%mm1, %%mm3, %%mm6)
01127 YSCALEYUV2PACKEDX_END
01128 }
01129 return;
01130 case PIX_FMT_BGR24:
01131 YSCALEYUV2PACKEDX
01132 YSCALEYUV2RGBX
01133 "pxor %%mm7, %%mm7 \n\t"
01134 "lea (%%"REG_a", %%"REG_a", 2), %%"REG_c" \n\t"
01135 "add %4, %%"REG_c" \n\t"
01136 WRITEBGR24(%%REGc, %5, %%REGa)
01137
01138 :: "r" (&c->redDither),
01139 "m" (dummy), "m" (dummy), "m" (dummy),
01140 "r" (dest), "m" (dstW)
01141 : "%"REG_a, "%"REG_c, "%"REG_d, "%"REG_S
01142 );
01143 return;
01144 case PIX_FMT_RGB555:
01145 YSCALEYUV2PACKEDX
01146 YSCALEYUV2RGBX
01147 "pxor %%mm7, %%mm7 \n\t"
01148
01149 #ifdef DITHER1XBPP
01150 "paddusb "BLUE_DITHER"(%0), %%mm2 \n\t"
01151 "paddusb "GREEN_DITHER"(%0), %%mm4 \n\t"
01152 "paddusb "RED_DITHER"(%0), %%mm5 \n\t"
01153 #endif
01154
01155 WRITERGB15(%4, %5, %%REGa)
01156 YSCALEYUV2PACKEDX_END
01157 return;
01158 case PIX_FMT_RGB565:
01159 YSCALEYUV2PACKEDX
01160 YSCALEYUV2RGBX
01161 "pxor %%mm7, %%mm7 \n\t"
01162
01163 #ifdef DITHER1XBPP
01164 "paddusb "BLUE_DITHER"(%0), %%mm2 \n\t"
01165 "paddusb "GREEN_DITHER"(%0), %%mm4 \n\t"
01166 "paddusb "RED_DITHER"(%0), %%mm5 \n\t"
01167 #endif
01168
01169 WRITERGB16(%4, %5, %%REGa)
01170 YSCALEYUV2PACKEDX_END
01171 return;
01172 case PIX_FMT_YUYV422:
01173 YSCALEYUV2PACKEDX
01174
01175
01176 "psraw $3, %%mm3 \n\t"
01177 "psraw $3, %%mm4 \n\t"
01178 "psraw $3, %%mm1 \n\t"
01179 "psraw $3, %%mm7 \n\t"
01180 WRITEYUY2(%4, %5, %%REGa)
01181 YSCALEYUV2PACKEDX_END
01182 return;
01183 }
01184 }
01185 }
01186 #endif
01187 #if COMPILE_TEMPLATE_ALTIVEC
01188
01189
01190 if (!(c->flags & SWS_BITEXACT) && !c->alpPixBuf &&
01191 (c->dstFormat==PIX_FMT_ABGR || c->dstFormat==PIX_FMT_BGRA ||
01192 c->dstFormat==PIX_FMT_BGR24 || c->dstFormat==PIX_FMT_RGB24 ||
01193 c->dstFormat==PIX_FMT_RGBA || c->dstFormat==PIX_FMT_ARGB))
01194 ff_yuv2packedX_altivec(c, lumFilter, lumSrc, lumFilterSize,
01195 chrFilter, chrSrc, chrFilterSize,
01196 dest, dstW, dstY);
01197 else
01198 #endif
01199 yuv2packedXinC(c, lumFilter, lumSrc, lumFilterSize,
01200 chrFilter, chrSrc, chrFilterSize,
01201 alpSrc, dest, dstW, dstY);
01202 }
01203
01207 static inline void RENAME(yuv2packed2)(SwsContext *c, const uint16_t *buf0, const uint16_t *buf1, const uint16_t *uvbuf0, const uint16_t *uvbuf1,
01208 const uint16_t *abuf0, const uint16_t *abuf1, uint8_t *dest, int dstW, int yalpha, int uvalpha, int y)
01209 {
01210 int yalpha1=4095- yalpha;
01211 int uvalpha1=4095-uvalpha;
01212 int i;
01213
01214 #if COMPILE_TEMPLATE_MMX
01215 if(!(c->flags & SWS_BITEXACT)) {
01216 switch(c->dstFormat) {
01217
01218 case PIX_FMT_RGB32:
01219 if (CONFIG_SWSCALE_ALPHA && c->alpPixBuf) {
01220 #if ARCH_X86_64
01221 __asm__ volatile(
01222 YSCALEYUV2RGB(%%r8, %5)
01223 YSCALEYUV2RGB_YA(%%r8, %5, %6, %7)
01224 "psraw $3, %%mm1 \n\t"
01225 "psraw $3, %%mm7 \n\t"
01226 "packuswb %%mm7, %%mm1 \n\t"
01227 WRITEBGR32(%4, 8280(%5), %%r8, %%mm2, %%mm4, %%mm5, %%mm1, %%mm0, %%mm7, %%mm3, %%mm6)
01228
01229 :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "r" (dest),
01230 "a" (&c->redDither)
01231 ,"r" (abuf0), "r" (abuf1)
01232 : "%r8"
01233 );
01234 #else
01235 *(const uint16_t **)(&c->u_temp)=abuf0;
01236 *(const uint16_t **)(&c->v_temp)=abuf1;
01237 __asm__ volatile(
01238 "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t"
01239 "mov %4, %%"REG_b" \n\t"
01240 "push %%"REG_BP" \n\t"
01241 YSCALEYUV2RGB(%%REGBP, %5)
01242 "push %0 \n\t"
01243 "push %1 \n\t"
01244 "mov "U_TEMP"(%5), %0 \n\t"
01245 "mov "V_TEMP"(%5), %1 \n\t"
01246 YSCALEYUV2RGB_YA(%%REGBP, %5, %0, %1)
01247 "psraw $3, %%mm1 \n\t"
01248 "psraw $3, %%mm7 \n\t"
01249 "packuswb %%mm7, %%mm1 \n\t"
01250 "pop %1 \n\t"
01251 "pop %0 \n\t"
01252 WRITEBGR32(%%REGb, 8280(%5), %%REGBP, %%mm2, %%mm4, %%mm5, %%mm1, %%mm0, %%mm7, %%mm3, %%mm6)
01253 "pop %%"REG_BP" \n\t"
01254 "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t"
01255
01256 :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
01257 "a" (&c->redDither)
01258 );
01259 #endif
01260 } else {
01261 __asm__ volatile(
01262 "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t"
01263 "mov %4, %%"REG_b" \n\t"
01264 "push %%"REG_BP" \n\t"
01265 YSCALEYUV2RGB(%%REGBP, %5)
01266 "pcmpeqd %%mm7, %%mm7 \n\t"
01267 WRITEBGR32(%%REGb, 8280(%5), %%REGBP, %%mm2, %%mm4, %%mm5, %%mm7, %%mm0, %%mm1, %%mm3, %%mm6)
01268 "pop %%"REG_BP" \n\t"
01269 "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t"
01270
01271 :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
01272 "a" (&c->redDither)
01273 );
01274 }
01275 return;
01276 case PIX_FMT_BGR24:
01277 __asm__ volatile(
01278 "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t"
01279 "mov %4, %%"REG_b" \n\t"
01280 "push %%"REG_BP" \n\t"
01281 YSCALEYUV2RGB(%%REGBP, %5)
01282 "pxor %%mm7, %%mm7 \n\t"
01283 WRITEBGR24(%%REGb, 8280(%5), %%REGBP)
01284 "pop %%"REG_BP" \n\t"
01285 "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t"
01286 :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
01287 "a" (&c->redDither)
01288 );
01289 return;
01290 case PIX_FMT_RGB555:
01291 __asm__ volatile(
01292 "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t"
01293 "mov %4, %%"REG_b" \n\t"
01294 "push %%"REG_BP" \n\t"
01295 YSCALEYUV2RGB(%%REGBP, %5)
01296 "pxor %%mm7, %%mm7 \n\t"
01297
01298 #ifdef DITHER1XBPP
01299 "paddusb "BLUE_DITHER"(%5), %%mm2 \n\t"
01300 "paddusb "GREEN_DITHER"(%5), %%mm4 \n\t"
01301 "paddusb "RED_DITHER"(%5), %%mm5 \n\t"
01302 #endif
01303
01304 WRITERGB15(%%REGb, 8280(%5), %%REGBP)
01305 "pop %%"REG_BP" \n\t"
01306 "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t"
01307
01308 :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
01309 "a" (&c->redDither)
01310 );
01311 return;
01312 case PIX_FMT_RGB565:
01313 __asm__ volatile(
01314 "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t"
01315 "mov %4, %%"REG_b" \n\t"
01316 "push %%"REG_BP" \n\t"
01317 YSCALEYUV2RGB(%%REGBP, %5)
01318 "pxor %%mm7, %%mm7 \n\t"
01319
01320 #ifdef DITHER1XBPP
01321 "paddusb "BLUE_DITHER"(%5), %%mm2 \n\t"
01322 "paddusb "GREEN_DITHER"(%5), %%mm4 \n\t"
01323 "paddusb "RED_DITHER"(%5), %%mm5 \n\t"
01324 #endif
01325
01326 WRITERGB16(%%REGb, 8280(%5), %%REGBP)
01327 "pop %%"REG_BP" \n\t"
01328 "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t"
01329 :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
01330 "a" (&c->redDither)
01331 );
01332 return;
01333 case PIX_FMT_YUYV422:
01334 __asm__ volatile(
01335 "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t"
01336 "mov %4, %%"REG_b" \n\t"
01337 "push %%"REG_BP" \n\t"
01338 YSCALEYUV2PACKED(%%REGBP, %5)
01339 WRITEYUY2(%%REGb, 8280(%5), %%REGBP)
01340 "pop %%"REG_BP" \n\t"
01341 "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t"
01342 :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
01343 "a" (&c->redDither)
01344 );
01345 return;
01346 default: break;
01347 }
01348 }
01349 #endif //COMPILE_TEMPLATE_MMX
01350 YSCALE_YUV_2_ANYRGB_C(YSCALE_YUV_2_RGB2_C, YSCALE_YUV_2_PACKED2_C(void,0), YSCALE_YUV_2_GRAY16_2_C, YSCALE_YUV_2_MONO2_C)
01351 }
01352
01356 static inline void RENAME(yuv2packed1)(SwsContext *c, const uint16_t *buf0, const uint16_t *uvbuf0, const uint16_t *uvbuf1,
01357 const uint16_t *abuf0, uint8_t *dest, int dstW, int uvalpha, enum PixelFormat dstFormat, int flags, int y)
01358 {
01359 const int yalpha1=0;
01360 int i;
01361
01362 const uint16_t *buf1= buf0;
01363 const int yalpha= 4096;
01364
01365 if (flags&SWS_FULL_CHR_H_INT) {
01366 c->yuv2packed2(c, buf0, buf0, uvbuf0, uvbuf1, abuf0, abuf0, dest, dstW, 0, uvalpha, y);
01367 return;
01368 }
01369
01370 #if COMPILE_TEMPLATE_MMX
01371 if(!(flags & SWS_BITEXACT)) {
01372 if (uvalpha < 2048) {
01373 switch(dstFormat) {
01374 case PIX_FMT_RGB32:
01375 if (CONFIG_SWSCALE_ALPHA && c->alpPixBuf) {
01376 __asm__ volatile(
01377 "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t"
01378 "mov %4, %%"REG_b" \n\t"
01379 "push %%"REG_BP" \n\t"
01380 YSCALEYUV2RGB1(%%REGBP, %5)
01381 YSCALEYUV2RGB1_ALPHA(%%REGBP)
01382 WRITEBGR32(%%REGb, 8280(%5), %%REGBP, %%mm2, %%mm4, %%mm5, %%mm7, %%mm0, %%mm1, %%mm3, %%mm6)
01383 "pop %%"REG_BP" \n\t"
01384 "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t"
01385
01386 :: "c" (buf0), "d" (abuf0), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
01387 "a" (&c->redDither)
01388 );
01389 } else {
01390 __asm__ volatile(
01391 "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t"
01392 "mov %4, %%"REG_b" \n\t"
01393 "push %%"REG_BP" \n\t"
01394 YSCALEYUV2RGB1(%%REGBP, %5)
01395 "pcmpeqd %%mm7, %%mm7 \n\t"
01396 WRITEBGR32(%%REGb, 8280(%5), %%REGBP, %%mm2, %%mm4, %%mm5, %%mm7, %%mm0, %%mm1, %%mm3, %%mm6)
01397 "pop %%"REG_BP" \n\t"
01398 "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t"
01399
01400 :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
01401 "a" (&c->redDither)
01402 );
01403 }
01404 return;
01405 case PIX_FMT_BGR24:
01406 __asm__ volatile(
01407 "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t"
01408 "mov %4, %%"REG_b" \n\t"
01409 "push %%"REG_BP" \n\t"
01410 YSCALEYUV2RGB1(%%REGBP, %5)
01411 "pxor %%mm7, %%mm7 \n\t"
01412 WRITEBGR24(%%REGb, 8280(%5), %%REGBP)
01413 "pop %%"REG_BP" \n\t"
01414 "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t"
01415
01416 :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
01417 "a" (&c->redDither)
01418 );
01419 return;
01420 case PIX_FMT_RGB555:
01421 __asm__ volatile(
01422 "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t"
01423 "mov %4, %%"REG_b" \n\t"
01424 "push %%"REG_BP" \n\t"
01425 YSCALEYUV2RGB1(%%REGBP, %5)
01426 "pxor %%mm7, %%mm7 \n\t"
01427
01428 #ifdef DITHER1XBPP
01429 "paddusb "BLUE_DITHER"(%5), %%mm2 \n\t"
01430 "paddusb "GREEN_DITHER"(%5), %%mm4 \n\t"
01431 "paddusb "RED_DITHER"(%5), %%mm5 \n\t"
01432 #endif
01433 WRITERGB15(%%REGb, 8280(%5), %%REGBP)
01434 "pop %%"REG_BP" \n\t"
01435 "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t"
01436
01437 :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
01438 "a" (&c->redDither)
01439 );
01440 return;
01441 case PIX_FMT_RGB565:
01442 __asm__ volatile(
01443 "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t"
01444 "mov %4, %%"REG_b" \n\t"
01445 "push %%"REG_BP" \n\t"
01446 YSCALEYUV2RGB1(%%REGBP, %5)
01447 "pxor %%mm7, %%mm7 \n\t"
01448
01449 #ifdef DITHER1XBPP
01450 "paddusb "BLUE_DITHER"(%5), %%mm2 \n\t"
01451 "paddusb "GREEN_DITHER"(%5), %%mm4 \n\t"
01452 "paddusb "RED_DITHER"(%5), %%mm5 \n\t"
01453 #endif
01454
01455 WRITERGB16(%%REGb, 8280(%5), %%REGBP)
01456 "pop %%"REG_BP" \n\t"
01457 "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t"
01458
01459 :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
01460 "a" (&c->redDither)
01461 );
01462 return;
01463 case PIX_FMT_YUYV422:
01464 __asm__ volatile(
01465 "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t"
01466 "mov %4, %%"REG_b" \n\t"
01467 "push %%"REG_BP" \n\t"
01468 YSCALEYUV2PACKED1(%%REGBP, %5)
01469 WRITEYUY2(%%REGb, 8280(%5), %%REGBP)
01470 "pop %%"REG_BP" \n\t"
01471 "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t"
01472
01473 :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
01474 "a" (&c->redDither)
01475 );
01476 return;
01477 }
01478 } else {
01479 switch(dstFormat) {
01480 case PIX_FMT_RGB32:
01481 if (CONFIG_SWSCALE_ALPHA && c->alpPixBuf) {
01482 __asm__ volatile(
01483 "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t"
01484 "mov %4, %%"REG_b" \n\t"
01485 "push %%"REG_BP" \n\t"
01486 YSCALEYUV2RGB1b(%%REGBP, %5)
01487 YSCALEYUV2RGB1_ALPHA(%%REGBP)
01488 WRITEBGR32(%%REGb, 8280(%5), %%REGBP, %%mm2, %%mm4, %%mm5, %%mm7, %%mm0, %%mm1, %%mm3, %%mm6)
01489 "pop %%"REG_BP" \n\t"
01490 "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t"
01491
01492 :: "c" (buf0), "d" (abuf0), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
01493 "a" (&c->redDither)
01494 );
01495 } else {
01496 __asm__ volatile(
01497 "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t"
01498 "mov %4, %%"REG_b" \n\t"
01499 "push %%"REG_BP" \n\t"
01500 YSCALEYUV2RGB1b(%%REGBP, %5)
01501 "pcmpeqd %%mm7, %%mm7 \n\t"
01502 WRITEBGR32(%%REGb, 8280(%5), %%REGBP, %%mm2, %%mm4, %%mm5, %%mm7, %%mm0, %%mm1, %%mm3, %%mm6)
01503 "pop %%"REG_BP" \n\t"
01504 "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t"
01505
01506 :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
01507 "a" (&c->redDither)
01508 );
01509 }
01510 return;
01511 case PIX_FMT_BGR24:
01512 __asm__ volatile(
01513 "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t"
01514 "mov %4, %%"REG_b" \n\t"
01515 "push %%"REG_BP" \n\t"
01516 YSCALEYUV2RGB1b(%%REGBP, %5)
01517 "pxor %%mm7, %%mm7 \n\t"
01518 WRITEBGR24(%%REGb, 8280(%5), %%REGBP)
01519 "pop %%"REG_BP" \n\t"
01520 "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t"
01521
01522 :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
01523 "a" (&c->redDither)
01524 );
01525 return;
01526 case PIX_FMT_RGB555:
01527 __asm__ volatile(
01528 "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t"
01529 "mov %4, %%"REG_b" \n\t"
01530 "push %%"REG_BP" \n\t"
01531 YSCALEYUV2RGB1b(%%REGBP, %5)
01532 "pxor %%mm7, %%mm7 \n\t"
01533
01534 #ifdef DITHER1XBPP
01535 "paddusb "BLUE_DITHER"(%5), %%mm2 \n\t"
01536 "paddusb "GREEN_DITHER"(%5), %%mm4 \n\t"
01537 "paddusb "RED_DITHER"(%5), %%mm5 \n\t"
01538 #endif
01539 WRITERGB15(%%REGb, 8280(%5), %%REGBP)
01540 "pop %%"REG_BP" \n\t"
01541 "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t"
01542
01543 :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
01544 "a" (&c->redDither)
01545 );
01546 return;
01547 case PIX_FMT_RGB565:
01548 __asm__ volatile(
01549 "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t"
01550 "mov %4, %%"REG_b" \n\t"
01551 "push %%"REG_BP" \n\t"
01552 YSCALEYUV2RGB1b(%%REGBP, %5)
01553 "pxor %%mm7, %%mm7 \n\t"
01554
01555 #ifdef DITHER1XBPP
01556 "paddusb "BLUE_DITHER"(%5), %%mm2 \n\t"
01557 "paddusb "GREEN_DITHER"(%5), %%mm4 \n\t"
01558 "paddusb "RED_DITHER"(%5), %%mm5 \n\t"
01559 #endif
01560
01561 WRITERGB16(%%REGb, 8280(%5), %%REGBP)
01562 "pop %%"REG_BP" \n\t"
01563 "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t"
01564
01565 :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
01566 "a" (&c->redDither)
01567 );
01568 return;
01569 case PIX_FMT_YUYV422:
01570 __asm__ volatile(
01571 "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t"
01572 "mov %4, %%"REG_b" \n\t"
01573 "push %%"REG_BP" \n\t"
01574 YSCALEYUV2PACKED1b(%%REGBP, %5)
01575 WRITEYUY2(%%REGb, 8280(%5), %%REGBP)
01576 "pop %%"REG_BP" \n\t"
01577 "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t"
01578
01579 :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
01580 "a" (&c->redDither)
01581 );
01582 return;
01583 }
01584 }
01585 }
01586 #endif
01587 if (uvalpha < 2048) {
01588 YSCALE_YUV_2_ANYRGB_C(YSCALE_YUV_2_RGB1_C, YSCALE_YUV_2_PACKED1_C(void,0), YSCALE_YUV_2_GRAY16_1_C, YSCALE_YUV_2_MONO2_C)
01589 } else {
01590 YSCALE_YUV_2_ANYRGB_C(YSCALE_YUV_2_RGB1B_C, YSCALE_YUV_2_PACKED1B_C(void,0), YSCALE_YUV_2_GRAY16_1_C, YSCALE_YUV_2_MONO2_C)
01591 }
01592 }
01593
01594
01595
01596 static inline void RENAME(yuy2ToY)(uint8_t *dst, const uint8_t *src, long width, uint32_t *unused)
01597 {
01598 #if COMPILE_TEMPLATE_MMX
01599 __asm__ volatile(
01600 "movq "MANGLE(bm01010101)", %%mm2 \n\t"
01601 "mov %0, %%"REG_a" \n\t"
01602 "1: \n\t"
01603 "movq (%1, %%"REG_a",2), %%mm0 \n\t"
01604 "movq 8(%1, %%"REG_a",2), %%mm1 \n\t"
01605 "pand %%mm2, %%mm0 \n\t"
01606 "pand %%mm2, %%mm1 \n\t"
01607 "packuswb %%mm1, %%mm0 \n\t"
01608 "movq %%mm0, (%2, %%"REG_a") \n\t"
01609 "add $8, %%"REG_a" \n\t"
01610 " js 1b \n\t"
01611 : : "g" ((x86_reg)-width), "r" (src+width*2), "r" (dst+width)
01612 : "%"REG_a
01613 );
01614 #else
01615 int i;
01616 for (i=0; i<width; i++)
01617 dst[i]= src[2*i];
01618 #endif
01619 }
01620
01621 static inline void RENAME(yuy2ToUV)(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1, const uint8_t *src2, long width, uint32_t *unused)
01622 {
01623 #if COMPILE_TEMPLATE_MMX
01624 __asm__ volatile(
01625 "movq "MANGLE(bm01010101)", %%mm4 \n\t"
01626 "mov %0, %%"REG_a" \n\t"
01627 "1: \n\t"
01628 "movq (%1, %%"REG_a",4), %%mm0 \n\t"
01629 "movq 8(%1, %%"REG_a",4), %%mm1 \n\t"
01630 "psrlw $8, %%mm0 \n\t"
01631 "psrlw $8, %%mm1 \n\t"
01632 "packuswb %%mm1, %%mm0 \n\t"
01633 "movq %%mm0, %%mm1 \n\t"
01634 "psrlw $8, %%mm0 \n\t"
01635 "pand %%mm4, %%mm1 \n\t"
01636 "packuswb %%mm0, %%mm0 \n\t"
01637 "packuswb %%mm1, %%mm1 \n\t"
01638 "movd %%mm0, (%3, %%"REG_a") \n\t"
01639 "movd %%mm1, (%2, %%"REG_a") \n\t"
01640 "add $4, %%"REG_a" \n\t"
01641 " js 1b \n\t"
01642 : : "g" ((x86_reg)-width), "r" (src1+width*4), "r" (dstU+width), "r" (dstV+width)
01643 : "%"REG_a
01644 );
01645 #else
01646 int i;
01647 for (i=0; i<width; i++) {
01648 dstU[i]= src1[4*i + 1];
01649 dstV[i]= src1[4*i + 3];
01650 }
01651 #endif
01652 assert(src1 == src2);
01653 }
01654
01655 static inline void RENAME(LEToUV)(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1, const uint8_t *src2, long width, uint32_t *unused)
01656 {
01657 #if COMPILE_TEMPLATE_MMX
01658 __asm__ volatile(
01659 "mov %0, %%"REG_a" \n\t"
01660 "1: \n\t"
01661 "movq (%1, %%"REG_a",2), %%mm0 \n\t"
01662 "movq 8(%1, %%"REG_a",2), %%mm1 \n\t"
01663 "movq (%2, %%"REG_a",2), %%mm2 \n\t"
01664 "movq 8(%2, %%"REG_a",2), %%mm3 \n\t"
01665 "psrlw $8, %%mm0 \n\t"
01666 "psrlw $8, %%mm1 \n\t"
01667 "psrlw $8, %%mm2 \n\t"
01668 "psrlw $8, %%mm3 \n\t"
01669 "packuswb %%mm1, %%mm0 \n\t"
01670 "packuswb %%mm3, %%mm2 \n\t"
01671 "movq %%mm0, (%3, %%"REG_a") \n\t"
01672 "movq %%mm2, (%4, %%"REG_a") \n\t"
01673 "add $8, %%"REG_a" \n\t"
01674 " js 1b \n\t"
01675 : : "g" ((x86_reg)-width), "r" (src1+width*2), "r" (src2+width*2), "r" (dstU+width), "r" (dstV+width)
01676 : "%"REG_a
01677 );
01678 #else
01679 int i;
01680 for (i=0; i<width; i++) {
01681 dstU[i]= src1[2*i + 1];
01682 dstV[i]= src2[2*i + 1];
01683 }
01684 #endif
01685 }
01686
01687
01688
01689 static inline void RENAME(uyvyToY)(uint8_t *dst, const uint8_t *src, long width, uint32_t *unused)
01690 {
01691 #if COMPILE_TEMPLATE_MMX
01692 __asm__ volatile(
01693 "mov %0, %%"REG_a" \n\t"
01694 "1: \n\t"
01695 "movq (%1, %%"REG_a",2), %%mm0 \n\t"
01696 "movq 8(%1, %%"REG_a",2), %%mm1 \n\t"
01697 "psrlw $8, %%mm0 \n\t"
01698 "psrlw $8, %%mm1 \n\t"
01699 "packuswb %%mm1, %%mm0 \n\t"
01700 "movq %%mm0, (%2, %%"REG_a") \n\t"
01701 "add $8, %%"REG_a" \n\t"
01702 " js 1b \n\t"
01703 : : "g" ((x86_reg)-width), "r" (src+width*2), "r" (dst+width)
01704 : "%"REG_a
01705 );
01706 #else
01707 int i;
01708 for (i=0; i<width; i++)
01709 dst[i]= src[2*i+1];
01710 #endif
01711 }
01712
01713 static inline void RENAME(uyvyToUV)(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1, const uint8_t *src2, long width, uint32_t *unused)
01714 {
01715 #if COMPILE_TEMPLATE_MMX
01716 __asm__ volatile(
01717 "movq "MANGLE(bm01010101)", %%mm4 \n\t"
01718 "mov %0, %%"REG_a" \n\t"
01719 "1: \n\t"
01720 "movq (%1, %%"REG_a",4), %%mm0 \n\t"
01721 "movq 8(%1, %%"REG_a",4), %%mm1 \n\t"
01722 "pand %%mm4, %%mm0 \n\t"
01723 "pand %%mm4, %%mm1 \n\t"
01724 "packuswb %%mm1, %%mm0 \n\t"
01725 "movq %%mm0, %%mm1 \n\t"
01726 "psrlw $8, %%mm0 \n\t"
01727 "pand %%mm4, %%mm1 \n\t"
01728 "packuswb %%mm0, %%mm0 \n\t"
01729 "packuswb %%mm1, %%mm1 \n\t"
01730 "movd %%mm0, (%3, %%"REG_a") \n\t"
01731 "movd %%mm1, (%2, %%"REG_a") \n\t"
01732 "add $4, %%"REG_a" \n\t"
01733 " js 1b \n\t"
01734 : : "g" ((x86_reg)-width), "r" (src1+width*4), "r" (dstU+width), "r" (dstV+width)
01735 : "%"REG_a
01736 );
01737 #else
01738 int i;
01739 for (i=0; i<width; i++) {
01740 dstU[i]= src1[4*i + 0];
01741 dstV[i]= src1[4*i + 2];
01742 }
01743 #endif
01744 assert(src1 == src2);
01745 }
01746
01747 static inline void RENAME(BEToUV)(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1, const uint8_t *src2, long width, uint32_t *unused)
01748 {
01749 #if COMPILE_TEMPLATE_MMX
01750 __asm__ volatile(
01751 "movq "MANGLE(bm01010101)", %%mm4 \n\t"
01752 "mov %0, %%"REG_a" \n\t"
01753 "1: \n\t"
01754 "movq (%1, %%"REG_a",2), %%mm0 \n\t"
01755 "movq 8(%1, %%"REG_a",2), %%mm1 \n\t"
01756 "movq (%2, %%"REG_a",2), %%mm2 \n\t"
01757 "movq 8(%2, %%"REG_a",2), %%mm3 \n\t"
01758 "pand %%mm4, %%mm0 \n\t"
01759 "pand %%mm4, %%mm1 \n\t"
01760 "pand %%mm4, %%mm2 \n\t"
01761 "pand %%mm4, %%mm3 \n\t"
01762 "packuswb %%mm1, %%mm0 \n\t"
01763 "packuswb %%mm3, %%mm2 \n\t"
01764 "movq %%mm0, (%3, %%"REG_a") \n\t"
01765 "movq %%mm2, (%4, %%"REG_a") \n\t"
01766 "add $8, %%"REG_a" \n\t"
01767 " js 1b \n\t"
01768 : : "g" ((x86_reg)-width), "r" (src1+width*2), "r" (src2+width*2), "r" (dstU+width), "r" (dstV+width)
01769 : "%"REG_a
01770 );
01771 #else
01772 int i;
01773 for (i=0; i<width; i++) {
01774 dstU[i]= src1[2*i];
01775 dstV[i]= src2[2*i];
01776 }
01777 #endif
01778 }
01779
01780 static inline void RENAME(nvXXtoUV)(uint8_t *dst1, uint8_t *dst2,
01781 const uint8_t *src, long width)
01782 {
01783 #if COMPILE_TEMPLATE_MMX
01784 __asm__ volatile(
01785 "movq "MANGLE(bm01010101)", %%mm4 \n\t"
01786 "mov %0, %%"REG_a" \n\t"
01787 "1: \n\t"
01788 "movq (%1, %%"REG_a",2), %%mm0 \n\t"
01789 "movq 8(%1, %%"REG_a",2), %%mm1 \n\t"
01790 "movq %%mm0, %%mm2 \n\t"
01791 "movq %%mm1, %%mm3 \n\t"
01792 "pand %%mm4, %%mm0 \n\t"
01793 "pand %%mm4, %%mm1 \n\t"
01794 "psrlw $8, %%mm2 \n\t"
01795 "psrlw $8, %%mm3 \n\t"
01796 "packuswb %%mm1, %%mm0 \n\t"
01797 "packuswb %%mm3, %%mm2 \n\t"
01798 "movq %%mm0, (%2, %%"REG_a") \n\t"
01799 "movq %%mm2, (%3, %%"REG_a") \n\t"
01800 "add $8, %%"REG_a" \n\t"
01801 " js 1b \n\t"
01802 : : "g" ((x86_reg)-width), "r" (src+width*2), "r" (dst1+width), "r" (dst2+width)
01803 : "%"REG_a
01804 );
01805 #else
01806 int i;
01807 for (i = 0; i < width; i++) {
01808 dst1[i] = src[2*i+0];
01809 dst2[i] = src[2*i+1];
01810 }
01811 #endif
01812 }
01813
01814 static inline void RENAME(nv12ToUV)(uint8_t *dstU, uint8_t *dstV,
01815 const uint8_t *src1, const uint8_t *src2,
01816 long width, uint32_t *unused)
01817 {
01818 RENAME(nvXXtoUV)(dstU, dstV, src1, width);
01819 }
01820
01821 static inline void RENAME(nv21ToUV)(uint8_t *dstU, uint8_t *dstV,
01822 const uint8_t *src1, const uint8_t *src2,
01823 long width, uint32_t *unused)
01824 {
01825 RENAME(nvXXtoUV)(dstV, dstU, src1, width);
01826 }
01827
01828 #if COMPILE_TEMPLATE_MMX
01829 static inline void RENAME(bgr24ToY_mmx)(uint8_t *dst, const uint8_t *src, long width, enum PixelFormat srcFormat)
01830 {
01831
01832 if(srcFormat == PIX_FMT_BGR24) {
01833 __asm__ volatile(
01834 "movq "MANGLE(ff_bgr24toY1Coeff)", %%mm5 \n\t"
01835 "movq "MANGLE(ff_bgr24toY2Coeff)", %%mm6 \n\t"
01836 :
01837 );
01838 } else {
01839 __asm__ volatile(
01840 "movq "MANGLE(ff_rgb24toY1Coeff)", %%mm5 \n\t"
01841 "movq "MANGLE(ff_rgb24toY2Coeff)", %%mm6 \n\t"
01842 :
01843 );
01844 }
01845
01846 __asm__ volatile(
01847 "movq "MANGLE(ff_bgr24toYOffset)", %%mm4 \n\t"
01848 "mov %2, %%"REG_a" \n\t"
01849 "pxor %%mm7, %%mm7 \n\t"
01850 "1: \n\t"
01851 PREFETCH" 64(%0) \n\t"
01852 "movd (%0), %%mm0 \n\t"
01853 "movd 2(%0), %%mm1 \n\t"
01854 "movd 6(%0), %%mm2 \n\t"
01855 "movd 8(%0), %%mm3 \n\t"
01856 "add $12, %0 \n\t"
01857 "punpcklbw %%mm7, %%mm0 \n\t"
01858 "punpcklbw %%mm7, %%mm1 \n\t"
01859 "punpcklbw %%mm7, %%mm2 \n\t"
01860 "punpcklbw %%mm7, %%mm3 \n\t"
01861 "pmaddwd %%mm5, %%mm0 \n\t"
01862 "pmaddwd %%mm6, %%mm1 \n\t"
01863 "pmaddwd %%mm5, %%mm2 \n\t"
01864 "pmaddwd %%mm6, %%mm3 \n\t"
01865 "paddd %%mm1, %%mm0 \n\t"
01866 "paddd %%mm3, %%mm2 \n\t"
01867 "paddd %%mm4, %%mm0 \n\t"
01868 "paddd %%mm4, %%mm2 \n\t"
01869 "psrad $15, %%mm0 \n\t"
01870 "psrad $15, %%mm2 \n\t"
01871 "packssdw %%mm2, %%mm0 \n\t"
01872 "packuswb %%mm0, %%mm0 \n\t"
01873 "movd %%mm0, (%1, %%"REG_a") \n\t"
01874 "add $4, %%"REG_a" \n\t"
01875 " js 1b \n\t"
01876 : "+r" (src)
01877 : "r" (dst+width), "g" ((x86_reg)-width)
01878 : "%"REG_a
01879 );
01880 }
01881
01882 static inline void RENAME(bgr24ToUV_mmx)(uint8_t *dstU, uint8_t *dstV, const uint8_t *src, long width, enum PixelFormat srcFormat)
01883 {
01884 __asm__ volatile(
01885 "movq 24(%4), %%mm6 \n\t"
01886 "mov %3, %%"REG_a" \n\t"
01887 "pxor %%mm7, %%mm7 \n\t"
01888 "1: \n\t"
01889 PREFETCH" 64(%0) \n\t"
01890 "movd (%0), %%mm0 \n\t"
01891 "movd 2(%0), %%mm1 \n\t"
01892 "punpcklbw %%mm7, %%mm0 \n\t"
01893 "punpcklbw %%mm7, %%mm1 \n\t"
01894 "movq %%mm0, %%mm2 \n\t"
01895 "movq %%mm1, %%mm3 \n\t"
01896 "pmaddwd (%4), %%mm0 \n\t"
01897 "pmaddwd 8(%4), %%mm1 \n\t"
01898 "pmaddwd 16(%4), %%mm2 \n\t"
01899 "pmaddwd %%mm6, %%mm3 \n\t"
01900 "paddd %%mm1, %%mm0 \n\t"
01901 "paddd %%mm3, %%mm2 \n\t"
01902
01903 "movd 6(%0), %%mm1 \n\t"
01904 "movd 8(%0), %%mm3 \n\t"
01905 "add $12, %0 \n\t"
01906 "punpcklbw %%mm7, %%mm1 \n\t"
01907 "punpcklbw %%mm7, %%mm3 \n\t"
01908 "movq %%mm1, %%mm4 \n\t"
01909 "movq %%mm3, %%mm5 \n\t"
01910 "pmaddwd (%4), %%mm1 \n\t"
01911 "pmaddwd 8(%4), %%mm3 \n\t"
01912 "pmaddwd 16(%4), %%mm4 \n\t"
01913 "pmaddwd %%mm6, %%mm5 \n\t"
01914 "paddd %%mm3, %%mm1 \n\t"
01915 "paddd %%mm5, %%mm4 \n\t"
01916
01917 "movq "MANGLE(ff_bgr24toUVOffset)", %%mm3 \n\t"
01918 "paddd %%mm3, %%mm0 \n\t"
01919 "paddd %%mm3, %%mm2 \n\t"
01920 "paddd %%mm3, %%mm1 \n\t"
01921 "paddd %%mm3, %%mm4 \n\t"
01922 "psrad $15, %%mm0 \n\t"
01923 "psrad $15, %%mm2 \n\t"
01924 "psrad $15, %%mm1 \n\t"
01925 "psrad $15, %%mm4 \n\t"
01926 "packssdw %%mm1, %%mm0 \n\t"
01927 "packssdw %%mm4, %%mm2 \n\t"
01928 "packuswb %%mm0, %%mm0 \n\t"
01929 "packuswb %%mm2, %%mm2 \n\t"
01930 "movd %%mm0, (%1, %%"REG_a") \n\t"
01931 "movd %%mm2, (%2, %%"REG_a") \n\t"
01932 "add $4, %%"REG_a" \n\t"
01933 " js 1b \n\t"
01934 : "+r" (src)
01935 : "r" (dstU+width), "r" (dstV+width), "g" ((x86_reg)-width), "r"(ff_bgr24toUV[srcFormat == PIX_FMT_RGB24])
01936 : "%"REG_a
01937 );
01938 }
01939 #endif
01940
01941 static inline void RENAME(bgr24ToY)(uint8_t *dst, const uint8_t *src, long width, uint32_t *unused)
01942 {
01943 #if COMPILE_TEMPLATE_MMX
01944 RENAME(bgr24ToY_mmx)(dst, src, width, PIX_FMT_BGR24);
01945 #else
01946 int i;
01947 for (i=0; i<width; i++) {
01948 int b= src[i*3+0];
01949 int g= src[i*3+1];
01950 int r= src[i*3+2];
01951
01952 dst[i]= ((RY*r + GY*g + BY*b + (33<<(RGB2YUV_SHIFT-1)))>>RGB2YUV_SHIFT);
01953 }
01954 #endif
01955 }
01956
01957 static inline void RENAME(bgr24ToUV)(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1, const uint8_t *src2, long width, uint32_t *unused)
01958 {
01959 #if COMPILE_TEMPLATE_MMX
01960 RENAME(bgr24ToUV_mmx)(dstU, dstV, src1, width, PIX_FMT_BGR24);
01961 #else
01962 int i;
01963 for (i=0; i<width; i++) {
01964 int b= src1[3*i + 0];
01965 int g= src1[3*i + 1];
01966 int r= src1[3*i + 2];
01967
01968 dstU[i]= (RU*r + GU*g + BU*b + (257<<(RGB2YUV_SHIFT-1)))>>RGB2YUV_SHIFT;
01969 dstV[i]= (RV*r + GV*g + BV*b + (257<<(RGB2YUV_SHIFT-1)))>>RGB2YUV_SHIFT;
01970 }
01971 #endif
01972 assert(src1 == src2);
01973 }
01974
01975 static inline void RENAME(bgr24ToUV_half)(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1, const uint8_t *src2, long width, uint32_t *unused)
01976 {
01977 int i;
01978 for (i=0; i<width; i++) {
01979 int b= src1[6*i + 0] + src1[6*i + 3];
01980 int g= src1[6*i + 1] + src1[6*i + 4];
01981 int r= src1[6*i + 2] + src1[6*i + 5];
01982
01983 dstU[i]= (RU*r + GU*g + BU*b + (257<<RGB2YUV_SHIFT))>>(RGB2YUV_SHIFT+1);
01984 dstV[i]= (RV*r + GV*g + BV*b + (257<<RGB2YUV_SHIFT))>>(RGB2YUV_SHIFT+1);
01985 }
01986 assert(src1 == src2);
01987 }
01988
01989 static inline void RENAME(rgb24ToY)(uint8_t *dst, const uint8_t *src, long width, uint32_t *unused)
01990 {
01991 #if COMPILE_TEMPLATE_MMX
01992 RENAME(bgr24ToY_mmx)(dst, src, width, PIX_FMT_RGB24);
01993 #else
01994 int i;
01995 for (i=0; i<width; i++) {
01996 int r= src[i*3+0];
01997 int g= src[i*3+1];
01998 int b= src[i*3+2];
01999
02000 dst[i]= ((RY*r + GY*g + BY*b + (33<<(RGB2YUV_SHIFT-1)))>>RGB2YUV_SHIFT);
02001 }
02002 #endif
02003 }
02004
02005 static inline void RENAME(rgb24ToUV)(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1, const uint8_t *src2, long width, uint32_t *unused)
02006 {
02007 #if COMPILE_TEMPLATE_MMX
02008 assert(src1==src2);
02009 RENAME(bgr24ToUV_mmx)(dstU, dstV, src1, width, PIX_FMT_RGB24);
02010 #else
02011 int i;
02012 assert(src1==src2);
02013 for (i=0; i<width; i++) {
02014 int r= src1[3*i + 0];
02015 int g= src1[3*i + 1];
02016 int b= src1[3*i + 2];
02017
02018 dstU[i]= (RU*r + GU*g + BU*b + (257<<(RGB2YUV_SHIFT-1)))>>RGB2YUV_SHIFT;
02019 dstV[i]= (RV*r + GV*g + BV*b + (257<<(RGB2YUV_SHIFT-1)))>>RGB2YUV_SHIFT;
02020 }
02021 #endif
02022 }
02023
02024 static inline void RENAME(rgb24ToUV_half)(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1, const uint8_t *src2, long width, uint32_t *unused)
02025 {
02026 int i;
02027 assert(src1==src2);
02028 for (i=0; i<width; i++) {
02029 int r= src1[6*i + 0] + src1[6*i + 3];
02030 int g= src1[6*i + 1] + src1[6*i + 4];
02031 int b= src1[6*i + 2] + src1[6*i + 5];
02032
02033 dstU[i]= (RU*r + GU*g + BU*b + (257<<RGB2YUV_SHIFT))>>(RGB2YUV_SHIFT+1);
02034 dstV[i]= (RV*r + GV*g + BV*b + (257<<RGB2YUV_SHIFT))>>(RGB2YUV_SHIFT+1);
02035 }
02036 }
02037
02038
02039
02040 static inline void RENAME(hScale)(int16_t *dst, int dstW, const uint8_t *src, int srcW, int xInc,
02041 const int16_t *filter, const int16_t *filterPos, long filterSize)
02042 {
02043 #if COMPILE_TEMPLATE_MMX
02044 assert(filterSize % 4 == 0 && filterSize>0);
02045 if (filterSize==4) {
02046 x86_reg counter= -2*dstW;
02047 filter-= counter*2;
02048 filterPos-= counter/2;
02049 dst-= counter/2;
02050 __asm__ volatile(
02051 #if defined(PIC)
02052 "push %%"REG_b" \n\t"
02053 #endif
02054 "pxor %%mm7, %%mm7 \n\t"
02055 "push %%"REG_BP" \n\t"
02056 "mov %%"REG_a", %%"REG_BP" \n\t"
02057 ASMALIGN(4)
02058 "1: \n\t"
02059 "movzwl (%2, %%"REG_BP"), %%eax \n\t"
02060 "movzwl 2(%2, %%"REG_BP"), %%ebx \n\t"
02061 "movq (%1, %%"REG_BP", 4), %%mm1 \n\t"
02062 "movq 8(%1, %%"REG_BP", 4), %%mm3 \n\t"
02063 "movd (%3, %%"REG_a"), %%mm0 \n\t"
02064 "movd (%3, %%"REG_b"), %%mm2 \n\t"
02065 "punpcklbw %%mm7, %%mm0 \n\t"
02066 "punpcklbw %%mm7, %%mm2 \n\t"
02067 "pmaddwd %%mm1, %%mm0 \n\t"
02068 "pmaddwd %%mm2, %%mm3 \n\t"
02069 "movq %%mm0, %%mm4 \n\t"
02070 "punpckldq %%mm3, %%mm0 \n\t"
02071 "punpckhdq %%mm3, %%mm4 \n\t"
02072 "paddd %%mm4, %%mm0 \n\t"
02073 "psrad $7, %%mm0 \n\t"
02074 "packssdw %%mm0, %%mm0 \n\t"
02075 "movd %%mm0, (%4, %%"REG_BP") \n\t"
02076 "add $4, %%"REG_BP" \n\t"
02077 " jnc 1b \n\t"
02078
02079 "pop %%"REG_BP" \n\t"
02080 #if defined(PIC)
02081 "pop %%"REG_b" \n\t"
02082 #endif
02083 : "+a" (counter)
02084 : "c" (filter), "d" (filterPos), "S" (src), "D" (dst)
02085 #if !defined(PIC)
02086 : "%"REG_b
02087 #endif
02088 );
02089 } else if (filterSize==8) {
02090 x86_reg counter= -2*dstW;
02091 filter-= counter*4;
02092 filterPos-= counter/2;
02093 dst-= counter/2;
02094 __asm__ volatile(
02095 #if defined(PIC)
02096 "push %%"REG_b" \n\t"
02097 #endif
02098 "pxor %%mm7, %%mm7 \n\t"
02099 "push %%"REG_BP" \n\t"
02100 "mov %%"REG_a", %%"REG_BP" \n\t"
02101 ASMALIGN(4)
02102 "1: \n\t"
02103 "movzwl (%2, %%"REG_BP"), %%eax \n\t"
02104 "movzwl 2(%2, %%"REG_BP"), %%ebx \n\t"
02105 "movq (%1, %%"REG_BP", 8), %%mm1 \n\t"
02106 "movq 16(%1, %%"REG_BP", 8), %%mm3 \n\t"
02107 "movd (%3, %%"REG_a"), %%mm0 \n\t"
02108 "movd (%3, %%"REG_b"), %%mm2 \n\t"
02109 "punpcklbw %%mm7, %%mm0 \n\t"
02110 "punpcklbw %%mm7, %%mm2 \n\t"
02111 "pmaddwd %%mm1, %%mm0 \n\t"
02112 "pmaddwd %%mm2, %%mm3 \n\t"
02113
02114 "movq 8(%1, %%"REG_BP", 8), %%mm1 \n\t"
02115 "movq 24(%1, %%"REG_BP", 8), %%mm5 \n\t"
02116 "movd 4(%3, %%"REG_a"), %%mm4 \n\t"
02117 "movd 4(%3, %%"REG_b"), %%mm2 \n\t"
02118 "punpcklbw %%mm7, %%mm4 \n\t"
02119 "punpcklbw %%mm7, %%mm2 \n\t"
02120 "pmaddwd %%mm1, %%mm4 \n\t"
02121 "pmaddwd %%mm2, %%mm5 \n\t"
02122 "paddd %%mm4, %%mm0 \n\t"
02123 "paddd %%mm5, %%mm3 \n\t"
02124 "movq %%mm0, %%mm4 \n\t"
02125 "punpckldq %%mm3, %%mm0 \n\t"
02126 "punpckhdq %%mm3, %%mm4 \n\t"
02127 "paddd %%mm4, %%mm0 \n\t"
02128 "psrad $7, %%mm0 \n\t"
02129 "packssdw %%mm0, %%mm0 \n\t"
02130 "movd %%mm0, (%4, %%"REG_BP") \n\t"
02131 "add $4, %%"REG_BP" \n\t"
02132 " jnc 1b \n\t"
02133
02134 "pop %%"REG_BP" \n\t"
02135 #if defined(PIC)
02136 "pop %%"REG_b" \n\t"
02137 #endif
02138 : "+a" (counter)
02139 : "c" (filter), "d" (filterPos), "S" (src), "D" (dst)
02140 #if !defined(PIC)
02141 : "%"REG_b
02142 #endif
02143 );
02144 } else {
02145 const uint8_t *offset = src+filterSize;
02146 x86_reg counter= -2*dstW;
02147
02148 filterPos-= counter/2;
02149 dst-= counter/2;
02150 __asm__ volatile(
02151 "pxor %%mm7, %%mm7 \n\t"
02152 ASMALIGN(4)
02153 "1: \n\t"
02154 "mov %2, %%"REG_c" \n\t"
02155 "movzwl (%%"REG_c", %0), %%eax \n\t"
02156 "movzwl 2(%%"REG_c", %0), %%edx \n\t"
02157 "mov %5, %%"REG_c" \n\t"
02158 "pxor %%mm4, %%mm4 \n\t"
02159 "pxor %%mm5, %%mm5 \n\t"
02160 "2: \n\t"
02161 "movq (%1), %%mm1 \n\t"
02162 "movq (%1, %6), %%mm3 \n\t"
02163 "movd (%%"REG_c", %%"REG_a"), %%mm0 \n\t"
02164 "movd (%%"REG_c", %%"REG_d"), %%mm2 \n\t"
02165 "punpcklbw %%mm7, %%mm0 \n\t"
02166 "punpcklbw %%mm7, %%mm2 \n\t"
02167 "pmaddwd %%mm1, %%mm0 \n\t"
02168 "pmaddwd %%mm2, %%mm3 \n\t"
02169 "paddd %%mm3, %%mm5 \n\t"
02170 "paddd %%mm0, %%mm4 \n\t"
02171 "add $8, %1 \n\t"
02172 "add $4, %%"REG_c" \n\t"
02173 "cmp %4, %%"REG_c" \n\t"
02174 " jb 2b \n\t"
02175 "add %6, %1 \n\t"
02176 "movq %%mm4, %%mm0 \n\t"
02177 "punpckldq %%mm5, %%mm4 \n\t"
02178 "punpckhdq %%mm5, %%mm0 \n\t"
02179 "paddd %%mm0, %%mm4 \n\t"
02180 "psrad $7, %%mm4 \n\t"
02181 "packssdw %%mm4, %%mm4 \n\t"
02182 "mov %3, %%"REG_a" \n\t"
02183 "movd %%mm4, (%%"REG_a", %0) \n\t"
02184 "add $4, %0 \n\t"
02185 " jnc 1b \n\t"
02186
02187 : "+r" (counter), "+r" (filter)
02188 : "m" (filterPos), "m" (dst), "m"(offset),
02189 "m" (src), "r" ((x86_reg)filterSize*2)
02190 : "%"REG_a, "%"REG_c, "%"REG_d
02191 );
02192 }
02193 #else
02194 #if COMPILE_TEMPLATE_ALTIVEC
02195 hScale_altivec_real(dst, dstW, src, srcW, xInc, filter, filterPos, filterSize);
02196 #else
02197 int i;
02198 for (i=0; i<dstW; i++) {
02199 int j;
02200 int srcPos= filterPos[i];
02201 int val=0;
02202
02203 for (j=0; j<filterSize; j++) {
02204
02205 val += ((int)src[srcPos + j])*filter[filterSize*i + j];
02206 }
02207
02208 dst[i] = FFMIN(val>>7, (1<<15)-1);
02209
02210 }
02211 #endif
02212 #endif
02213 }
02214
02215
02216
02217 static void RENAME(chrRangeToJpeg)(uint16_t *dst, int width)
02218 {
02219 int i;
02220 for (i = 0; i < width; i++) {
02221 dst[i ] = (FFMIN(dst[i ],30775)*4663 - 9289992)>>12;
02222 dst[i+VOFW] = (FFMIN(dst[i+VOFW],30775)*4663 - 9289992)>>12;
02223 }
02224 }
02225 static void RENAME(chrRangeFromJpeg)(uint16_t *dst, int width)
02226 {
02227 int i;
02228 for (i = 0; i < width; i++) {
02229 dst[i ] = (dst[i ]*1799 + 4081085)>>11;
02230 dst[i+VOFW] = (dst[i+VOFW]*1799 + 4081085)>>11;
02231 }
02232 }
02233 static void RENAME(lumRangeToJpeg)(uint16_t *dst, int width)
02234 {
02235 int i;
02236 for (i = 0; i < width; i++)
02237 dst[i] = (FFMIN(dst[i],30189)*19077 - 39057361)>>14;
02238 }
02239 static void RENAME(lumRangeFromJpeg)(uint16_t *dst, int width)
02240 {
02241 int i;
02242 for (i = 0; i < width; i++)
02243 dst[i] = (dst[i]*14071 + 33561947)>>14;
02244 }
02245
02246 #define FAST_BILINEAR_X86 \
02247 "subl %%edi, %%esi \n\t" \
02248 "imull %%ecx, %%esi \n\t" \
02249 "shll $16, %%edi \n\t" \
02250 "addl %%edi, %%esi \n\t" \
02251 "mov %1, %%"REG_D"\n\t" \
02252 "shrl $9, %%esi \n\t" \
02253
02254 static inline void RENAME(hyscale_fast)(SwsContext *c, int16_t *dst,
02255 long dstWidth, const uint8_t *src, int srcW,
02256 int xInc)
02257 {
02258 #if ARCH_X86
02259 #if COMPILE_TEMPLATE_MMX2
02260 int32_t *filterPos = c->hLumFilterPos;
02261 int16_t *filter = c->hLumFilter;
02262 int canMMX2BeUsed = c->canMMX2BeUsed;
02263 void *mmx2FilterCode= c->lumMmx2FilterCode;
02264 int i;
02265 #if defined(PIC)
02266 DECLARE_ALIGNED(8, uint64_t, ebxsave);
02267 #endif
02268 if (canMMX2BeUsed) {
02269 __asm__ volatile(
02270 #if defined(PIC)
02271 "mov %%"REG_b", %5 \n\t"
02272 #endif
02273 "pxor %%mm7, %%mm7 \n\t"
02274 "mov %0, %%"REG_c" \n\t"
02275 "mov %1, %%"REG_D" \n\t"
02276 "mov %2, %%"REG_d" \n\t"
02277 "mov %3, %%"REG_b" \n\t"
02278 "xor %%"REG_a", %%"REG_a" \n\t"
02279 PREFETCH" (%%"REG_c") \n\t"
02280 PREFETCH" 32(%%"REG_c") \n\t"
02281 PREFETCH" 64(%%"REG_c") \n\t"
02282
02283 #if ARCH_X86_64
02284
02285 #define CALL_MMX2_FILTER_CODE \
02286 "movl (%%"REG_b"), %%esi \n\t"\
02287 "call *%4 \n\t"\
02288 "movl (%%"REG_b", %%"REG_a"), %%esi \n\t"\
02289 "add %%"REG_S", %%"REG_c" \n\t"\
02290 "add %%"REG_a", %%"REG_D" \n\t"\
02291 "xor %%"REG_a", %%"REG_a" \n\t"\
02292
02293 #else
02294
02295 #define CALL_MMX2_FILTER_CODE \
02296 "movl (%%"REG_b"), %%esi \n\t"\
02297 "call *%4 \n\t"\
02298 "addl (%%"REG_b", %%"REG_a"), %%"REG_c" \n\t"\
02299 "add %%"REG_a", %%"REG_D" \n\t"\
02300 "xor %%"REG_a", %%"REG_a" \n\t"\
02301
02302 #endif
02303
02304 CALL_MMX2_FILTER_CODE
02305 CALL_MMX2_FILTER_CODE
02306 CALL_MMX2_FILTER_CODE
02307 CALL_MMX2_FILTER_CODE
02308 CALL_MMX2_FILTER_CODE
02309 CALL_MMX2_FILTER_CODE
02310 CALL_MMX2_FILTER_CODE
02311 CALL_MMX2_FILTER_CODE
02312
02313 #if defined(PIC)
02314 "mov %5, %%"REG_b" \n\t"
02315 #endif
02316 :: "m" (src), "m" (dst), "m" (filter), "m" (filterPos),
02317 "m" (mmx2FilterCode)
02318 #if defined(PIC)
02319 ,"m" (ebxsave)
02320 #endif
02321 : "%"REG_a, "%"REG_c, "%"REG_d, "%"REG_S, "%"REG_D
02322 #if !defined(PIC)
02323 ,"%"REG_b
02324 #endif
02325 );
02326 for (i=dstWidth-1; (i*xInc)>>16 >=srcW-1; i--) dst[i] = src[srcW-1]*128;
02327 } else {
02328 #endif
02329 x86_reg xInc_shr16 = xInc >> 16;
02330 uint16_t xInc_mask = xInc & 0xffff;
02331
02332 __asm__ volatile(
02333 "xor %%"REG_a", %%"REG_a" \n\t"
02334 "xor %%"REG_d", %%"REG_d" \n\t"
02335 "xorl %%ecx, %%ecx \n\t"
02336 ASMALIGN(4)
02337 "1: \n\t"
02338 "movzbl (%0, %%"REG_d"), %%edi \n\t"
02339 "movzbl 1(%0, %%"REG_d"), %%esi \n\t"
02340 FAST_BILINEAR_X86
02341 "movw %%si, (%%"REG_D", %%"REG_a", 2) \n\t"
02342 "addw %4, %%cx \n\t"
02343 "adc %3, %%"REG_d" \n\t"
02344
02345 "movzbl (%0, %%"REG_d"), %%edi \n\t"
02346 "movzbl 1(%0, %%"REG_d"), %%esi \n\t"
02347 FAST_BILINEAR_X86
02348 "movw %%si, 2(%%"REG_D", %%"REG_a", 2) \n\t"
02349 "addw %4, %%cx \n\t"
02350 "adc %3, %%"REG_d" \n\t"
02351
02352
02353 "add $2, %%"REG_a" \n\t"
02354 "cmp %2, %%"REG_a" \n\t"
02355 " jb 1b \n\t"
02356
02357
02358 :: "r" (src), "m" (dst), "m" (dstWidth), "m" (xInc_shr16), "m" (xInc_mask)
02359 : "%"REG_a, "%"REG_d, "%ecx", "%"REG_D, "%esi"
02360 );
02361 #if COMPILE_TEMPLATE_MMX2
02362 }
02363 #endif
02364 #else
02365 int i;
02366 unsigned int xpos=0;
02367 for (i=0;i<dstWidth;i++) {
02368 register unsigned int xx=xpos>>16;
02369 register unsigned int xalpha=(xpos&0xFFFF)>>9;
02370 dst[i]= (src[xx]<<7) + (src[xx+1] - src[xx])*xalpha;
02371 xpos+=xInc;
02372 }
02373 #endif
02374 }
02375
02376
02377 static inline void RENAME(hyscale)(SwsContext *c, uint16_t *dst, long dstWidth, const uint8_t *src, int srcW, int xInc,
02378 const int16_t *hLumFilter,
02379 const int16_t *hLumFilterPos, int hLumFilterSize,
02380 uint8_t *formatConvBuffer,
02381 uint32_t *pal, int isAlpha)
02382 {
02383 void (*toYV12)(uint8_t *, const uint8_t *, long, uint32_t *) = isAlpha ? c->alpToYV12 : c->lumToYV12;
02384 void (*convertRange)(uint16_t *, int) = isAlpha ? NULL : c->lumConvertRange;
02385
02386 src += isAlpha ? c->alpSrcOffset : c->lumSrcOffset;
02387
02388 if (toYV12) {
02389 toYV12(formatConvBuffer, src, srcW, pal);
02390 src= formatConvBuffer;
02391 }
02392
02393 if (!c->hyscale_fast) {
02394 c->hScale(dst, dstWidth, src, srcW, xInc, hLumFilter, hLumFilterPos, hLumFilterSize);
02395 } else {
02396 c->hyscale_fast(c, dst, dstWidth, src, srcW, xInc);
02397 }
02398
02399 if (convertRange)
02400 convertRange(dst, dstWidth);
02401 }
02402
02403 static inline void RENAME(hcscale_fast)(SwsContext *c, int16_t *dst,
02404 long dstWidth, const uint8_t *src1,
02405 const uint8_t *src2, int srcW, int xInc)
02406 {
02407 #if ARCH_X86
02408 #if COMPILE_TEMPLATE_MMX2
02409 int32_t *filterPos = c->hChrFilterPos;
02410 int16_t *filter = c->hChrFilter;
02411 int canMMX2BeUsed = c->canMMX2BeUsed;
02412 void *mmx2FilterCode= c->chrMmx2FilterCode;
02413 int i;
02414 #if defined(PIC)
02415 DECLARE_ALIGNED(8, uint64_t, ebxsave);
02416 #endif
02417 if (canMMX2BeUsed) {
02418 __asm__ volatile(
02419 #if defined(PIC)
02420 "mov %%"REG_b", %6 \n\t"
02421 #endif
02422 "pxor %%mm7, %%mm7 \n\t"
02423 "mov %0, %%"REG_c" \n\t"
02424 "mov %1, %%"REG_D" \n\t"
02425 "mov %2, %%"REG_d" \n\t"
02426 "mov %3, %%"REG_b" \n\t"
02427 "xor %%"REG_a", %%"REG_a" \n\t"
02428 PREFETCH" (%%"REG_c") \n\t"
02429 PREFETCH" 32(%%"REG_c") \n\t"
02430 PREFETCH" 64(%%"REG_c") \n\t"
02431
02432 CALL_MMX2_FILTER_CODE
02433 CALL_MMX2_FILTER_CODE
02434 CALL_MMX2_FILTER_CODE
02435 CALL_MMX2_FILTER_CODE
02436 "xor %%"REG_a", %%"REG_a" \n\t"
02437 "mov %5, %%"REG_c" \n\t"
02438 "mov %1, %%"REG_D" \n\t"
02439 "add $"AV_STRINGIFY(VOF)", %%"REG_D" \n\t"
02440 PREFETCH" (%%"REG_c") \n\t"
02441 PREFETCH" 32(%%"REG_c") \n\t"
02442 PREFETCH" 64(%%"REG_c") \n\t"
02443
02444 CALL_MMX2_FILTER_CODE
02445 CALL_MMX2_FILTER_CODE
02446 CALL_MMX2_FILTER_CODE
02447 CALL_MMX2_FILTER_CODE
02448
02449 #if defined(PIC)
02450 "mov %6, %%"REG_b" \n\t"
02451 #endif
02452 :: "m" (src1), "m" (dst), "m" (filter), "m" (filterPos),
02453 "m" (mmx2FilterCode), "m" (src2)
02454 #if defined(PIC)
02455 ,"m" (ebxsave)
02456 #endif
02457 : "%"REG_a, "%"REG_c, "%"REG_d, "%"REG_S, "%"REG_D
02458 #if !defined(PIC)
02459 ,"%"REG_b
02460 #endif
02461 );
02462 for (i=dstWidth-1; (i*xInc)>>16 >=srcW-1; i--) {
02463
02464 dst[i] = src1[srcW-1]*128;
02465 dst[i+VOFW] = src2[srcW-1]*128;
02466 }
02467 } else {
02468 #endif
02469 x86_reg xInc_shr16 = (x86_reg) (xInc >> 16);
02470 uint16_t xInc_mask = xInc & 0xffff;
02471 __asm__ volatile(
02472 "xor %%"REG_a", %%"REG_a" \n\t"
02473 "xor %%"REG_d", %%"REG_d" \n\t"
02474 "xorl %%ecx, %%ecx \n\t"
02475 ASMALIGN(4)
02476 "1: \n\t"
02477 "mov %0, %%"REG_S" \n\t"
02478 "movzbl (%%"REG_S", %%"REG_d"), %%edi \n\t"
02479 "movzbl 1(%%"REG_S", %%"REG_d"), %%esi \n\t"
02480 FAST_BILINEAR_X86
02481 "movw %%si, (%%"REG_D", %%"REG_a", 2) \n\t"
02482
02483 "movzbl (%5, %%"REG_d"), %%edi \n\t"
02484 "movzbl 1(%5, %%"REG_d"), %%esi \n\t"
02485 FAST_BILINEAR_X86
02486 "movw %%si, "AV_STRINGIFY(VOF)"(%%"REG_D", %%"REG_a", 2) \n\t"
02487
02488 "addw %4, %%cx \n\t"
02489 "adc %3, %%"REG_d" \n\t"
02490 "add $1, %%"REG_a" \n\t"
02491 "cmp %2, %%"REG_a" \n\t"
02492 " jb 1b \n\t"
02493
02494
02495
02496 #if ARCH_X86_64 && AV_GCC_VERSION_AT_LEAST(3,4)
02497 :: "m" (src1), "m" (dst), "g" (dstWidth), "m" (xInc_shr16), "m" (xInc_mask),
02498 #else
02499 :: "m" (src1), "m" (dst), "m" (dstWidth), "m" (xInc_shr16), "m" (xInc_mask),
02500 #endif
02501 "r" (src2)
02502 : "%"REG_a, "%"REG_d, "%ecx", "%"REG_D, "%esi"
02503 );
02504 #if COMPILE_TEMPLATE_MMX2
02505 }
02506 #endif
02507 #else
02508 int i;
02509 unsigned int xpos=0;
02510 for (i=0;i<dstWidth;i++) {
02511 register unsigned int xx=xpos>>16;
02512 register unsigned int xalpha=(xpos&0xFFFF)>>9;
02513 dst[i]=(src1[xx]*(xalpha^127)+src1[xx+1]*xalpha);
02514 dst[i+VOFW]=(src2[xx]*(xalpha^127)+src2[xx+1]*xalpha);
02515
02516
02517
02518
02519 xpos+=xInc;
02520 }
02521 #endif
02522 }
02523
02524 inline static void RENAME(hcscale)(SwsContext *c, uint16_t *dst, long dstWidth, const uint8_t *src1, const uint8_t *src2,
02525 int srcW, int xInc, const int16_t *hChrFilter,
02526 const int16_t *hChrFilterPos, int hChrFilterSize,
02527 uint8_t *formatConvBuffer,
02528 uint32_t *pal)
02529 {
02530
02531 src1 += c->chrSrcOffset;
02532 src2 += c->chrSrcOffset;
02533
02534 if (c->chrToYV12) {
02535 c->chrToYV12(formatConvBuffer, formatConvBuffer+VOFW, src1, src2, srcW, pal);
02536 src1= formatConvBuffer;
02537 src2= formatConvBuffer+VOFW;
02538 }
02539
02540 if (!c->hcscale_fast) {
02541 c->hScale(dst , dstWidth, src1, srcW, xInc, hChrFilter, hChrFilterPos, hChrFilterSize);
02542 c->hScale(dst+VOFW, dstWidth, src2, srcW, xInc, hChrFilter, hChrFilterPos, hChrFilterSize);
02543 } else {
02544 c->hcscale_fast(c, dst, dstWidth, src1, src2, srcW, xInc);
02545 }
02546
02547 if (c->chrConvertRange)
02548 c->chrConvertRange(dst, dstWidth);
02549 }
02550
02551 #define DEBUG_SWSCALE_BUFFERS 0
02552 #define DEBUG_BUFFERS(...) if (DEBUG_SWSCALE_BUFFERS) av_log(c, AV_LOG_DEBUG, __VA_ARGS__)
02553
02554 static int RENAME(swScale)(SwsContext *c, const uint8_t* src[], int srcStride[], int srcSliceY,
02555 int srcSliceH, uint8_t* dst[], int dstStride[])
02556 {
02557
02558 const int srcW= c->srcW;
02559 const int dstW= c->dstW;
02560 const int dstH= c->dstH;
02561 const int chrDstW= c->chrDstW;
02562 const int chrSrcW= c->chrSrcW;
02563 const int lumXInc= c->lumXInc;
02564 const int chrXInc= c->chrXInc;
02565 const enum PixelFormat dstFormat= c->dstFormat;
02566 const int flags= c->flags;
02567 int16_t *vLumFilterPos= c->vLumFilterPos;
02568 int16_t *vChrFilterPos= c->vChrFilterPos;
02569 int16_t *hLumFilterPos= c->hLumFilterPos;
02570 int16_t *hChrFilterPos= c->hChrFilterPos;
02571 int16_t *vLumFilter= c->vLumFilter;
02572 int16_t *vChrFilter= c->vChrFilter;
02573 int16_t *hLumFilter= c->hLumFilter;
02574 int16_t *hChrFilter= c->hChrFilter;
02575 int32_t *lumMmxFilter= c->lumMmxFilter;
02576 int32_t *chrMmxFilter= c->chrMmxFilter;
02577 int32_t av_unused *alpMmxFilter= c->alpMmxFilter;
02578 const int vLumFilterSize= c->vLumFilterSize;
02579 const int vChrFilterSize= c->vChrFilterSize;
02580 const int hLumFilterSize= c->hLumFilterSize;
02581 const int hChrFilterSize= c->hChrFilterSize;
02582 int16_t **lumPixBuf= c->lumPixBuf;
02583 int16_t **chrPixBuf= c->chrPixBuf;
02584 int16_t **alpPixBuf= c->alpPixBuf;
02585 const int vLumBufSize= c->vLumBufSize;
02586 const int vChrBufSize= c->vChrBufSize;
02587 uint8_t *formatConvBuffer= c->formatConvBuffer;
02588 const int chrSrcSliceY= srcSliceY >> c->chrSrcVSubSample;
02589 const int chrSrcSliceH= -((-srcSliceH) >> c->chrSrcVSubSample);
02590 int lastDstY;
02591 uint32_t *pal=c->pal_yuv;
02592
02593
02594 int dstY= c->dstY;
02595 int lumBufIndex= c->lumBufIndex;
02596 int chrBufIndex= c->chrBufIndex;
02597 int lastInLumBuf= c->lastInLumBuf;
02598 int lastInChrBuf= c->lastInChrBuf;
02599
02600 if (isPacked(c->srcFormat)) {
02601 src[0]=
02602 src[1]=
02603 src[2]=
02604 src[3]= src[0];
02605 srcStride[0]=
02606 srcStride[1]=
02607 srcStride[2]=
02608 srcStride[3]= srcStride[0];
02609 }
02610 srcStride[1]<<= c->vChrDrop;
02611 srcStride[2]<<= c->vChrDrop;
02612
02613 DEBUG_BUFFERS("swScale() %p[%d] %p[%d] %p[%d] %p[%d] -> %p[%d] %p[%d] %p[%d] %p[%d]\n",
02614 src[0], srcStride[0], src[1], srcStride[1], src[2], srcStride[2], src[3], srcStride[3],
02615 dst[0], dstStride[0], dst[1], dstStride[1], dst[2], dstStride[2], dst[3], dstStride[3]);
02616 DEBUG_BUFFERS("srcSliceY: %d srcSliceH: %d dstY: %d dstH: %d\n",
02617 srcSliceY, srcSliceH, dstY, dstH);
02618 DEBUG_BUFFERS("vLumFilterSize: %d vLumBufSize: %d vChrFilterSize: %d vChrBufSize: %d\n",
02619 vLumFilterSize, vLumBufSize, vChrFilterSize, vChrBufSize);
02620
02621 if (dstStride[0]%8 !=0 || dstStride[1]%8 !=0 || dstStride[2]%8 !=0 || dstStride[3]%8 != 0) {
02622 static int warnedAlready=0;
02623 if (flags & SWS_PRINT_INFO && !warnedAlready) {
02624 av_log(c, AV_LOG_WARNING, "Warning: dstStride is not aligned!\n"
02625 " ->cannot do aligned memory accesses anymore\n");
02626 warnedAlready=1;
02627 }
02628 }
02629
02630
02631
02632
02633 if (srcSliceY ==0) {
02634 lumBufIndex=-1;
02635 chrBufIndex=-1;
02636 dstY=0;
02637 lastInLumBuf= -1;
02638 lastInChrBuf= -1;
02639 }
02640
02641 lastDstY= dstY;
02642
02643 for (;dstY < dstH; dstY++) {
02644 unsigned char *dest =dst[0]+dstStride[0]*dstY;
02645 const int chrDstY= dstY>>c->chrDstVSubSample;
02646 unsigned char *uDest=dst[1]+dstStride[1]*chrDstY;
02647 unsigned char *vDest=dst[2]+dstStride[2]*chrDstY;
02648 unsigned char *aDest=(CONFIG_SWSCALE_ALPHA && alpPixBuf) ? dst[3]+dstStride[3]*dstY : NULL;
02649
02650 const int firstLumSrcY= vLumFilterPos[dstY];
02651 const int firstLumSrcY2= vLumFilterPos[FFMIN(dstY | ((1<<c->chrDstVSubSample) - 1), dstH-1)];
02652 const int firstChrSrcY= vChrFilterPos[chrDstY];
02653 int lastLumSrcY= firstLumSrcY + vLumFilterSize -1;
02654 int lastLumSrcY2=firstLumSrcY2+ vLumFilterSize -1;
02655 int lastChrSrcY= firstChrSrcY + vChrFilterSize -1;
02656 int enough_lines;
02657
02658
02659 if (firstLumSrcY > lastInLumBuf) lastInLumBuf= firstLumSrcY-1;
02660 if (firstChrSrcY > lastInChrBuf) lastInChrBuf= firstChrSrcY-1;
02661 assert(firstLumSrcY >= lastInLumBuf - vLumBufSize + 1);
02662 assert(firstChrSrcY >= lastInChrBuf - vChrBufSize + 1);
02663
02664 DEBUG_BUFFERS("dstY: %d\n", dstY);
02665 DEBUG_BUFFERS("\tfirstLumSrcY: %d lastLumSrcY: %d lastInLumBuf: %d\n",
02666 firstLumSrcY, lastLumSrcY, lastInLumBuf);
02667 DEBUG_BUFFERS("\tfirstChrSrcY: %d lastChrSrcY: %d lastInChrBuf: %d\n",
02668 firstChrSrcY, lastChrSrcY, lastInChrBuf);
02669
02670
02671 enough_lines = lastLumSrcY2 < srcSliceY + srcSliceH && lastChrSrcY < -((-srcSliceY - srcSliceH)>>c->chrSrcVSubSample);
02672
02673 if (!enough_lines) {
02674 lastLumSrcY = srcSliceY + srcSliceH - 1;
02675 lastChrSrcY = chrSrcSliceY + chrSrcSliceH - 1;
02676 DEBUG_BUFFERS("buffering slice: lastLumSrcY %d lastChrSrcY %d\n",
02677 lastLumSrcY, lastChrSrcY);
02678 }
02679
02680
02681 while(lastInLumBuf < lastLumSrcY) {
02682 const uint8_t *src1= src[0]+(lastInLumBuf + 1 - srcSliceY)*srcStride[0];
02683 const uint8_t *src2= src[3]+(lastInLumBuf + 1 - srcSliceY)*srcStride[3];
02684 lumBufIndex++;
02685 assert(lumBufIndex < 2*vLumBufSize);
02686 assert(lastInLumBuf + 1 - srcSliceY < srcSliceH);
02687 assert(lastInLumBuf + 1 - srcSliceY >= 0);
02688 RENAME(hyscale)(c, lumPixBuf[ lumBufIndex ], dstW, src1, srcW, lumXInc,
02689 hLumFilter, hLumFilterPos, hLumFilterSize,
02690 formatConvBuffer,
02691 pal, 0);
02692 if (CONFIG_SWSCALE_ALPHA && alpPixBuf)
02693 RENAME(hyscale)(c, alpPixBuf[ lumBufIndex ], dstW, src2, srcW, lumXInc,
02694 hLumFilter, hLumFilterPos, hLumFilterSize,
02695 formatConvBuffer,
02696 pal, 1);
02697 lastInLumBuf++;
02698 DEBUG_BUFFERS("\t\tlumBufIndex %d: lastInLumBuf: %d\n",
02699 lumBufIndex, lastInLumBuf);
02700 }
02701 while(lastInChrBuf < lastChrSrcY) {
02702 const uint8_t *src1= src[1]+(lastInChrBuf + 1 - chrSrcSliceY)*srcStride[1];
02703 const uint8_t *src2= src[2]+(lastInChrBuf + 1 - chrSrcSliceY)*srcStride[2];
02704 chrBufIndex++;
02705 assert(chrBufIndex < 2*vChrBufSize);
02706 assert(lastInChrBuf + 1 - chrSrcSliceY < (chrSrcSliceH));
02707 assert(lastInChrBuf + 1 - chrSrcSliceY >= 0);
02708
02709
02710 if (c->needs_hcscale)
02711 RENAME(hcscale)(c, chrPixBuf[ chrBufIndex ], chrDstW, src1, src2, chrSrcW, chrXInc,
02712 hChrFilter, hChrFilterPos, hChrFilterSize,
02713 formatConvBuffer,
02714 pal);
02715 lastInChrBuf++;
02716 DEBUG_BUFFERS("\t\tchrBufIndex %d: lastInChrBuf: %d\n",
02717 chrBufIndex, lastInChrBuf);
02718 }
02719
02720 if (lumBufIndex >= vLumBufSize) lumBufIndex-= vLumBufSize;
02721 if (chrBufIndex >= vChrBufSize) chrBufIndex-= vChrBufSize;
02722 if (!enough_lines)
02723 break;
02724
02725 #if COMPILE_TEMPLATE_MMX
02726 c->blueDither= ff_dither8[dstY&1];
02727 if (c->dstFormat == PIX_FMT_RGB555 || c->dstFormat == PIX_FMT_BGR555)
02728 c->greenDither= ff_dither8[dstY&1];
02729 else
02730 c->greenDither= ff_dither4[dstY&1];
02731 c->redDither= ff_dither8[(dstY+1)&1];
02732 #endif
02733 if (dstY < dstH-2) {
02734 const int16_t **lumSrcPtr= (const int16_t **) lumPixBuf + lumBufIndex + firstLumSrcY - lastInLumBuf + vLumBufSize;
02735 const int16_t **chrSrcPtr= (const int16_t **) chrPixBuf + chrBufIndex + firstChrSrcY - lastInChrBuf + vChrBufSize;
02736 const int16_t **alpSrcPtr= (CONFIG_SWSCALE_ALPHA && alpPixBuf) ? (const int16_t **) alpPixBuf + lumBufIndex + firstLumSrcY - lastInLumBuf + vLumBufSize : NULL;
02737 #if COMPILE_TEMPLATE_MMX
02738 int i;
02739 if (flags & SWS_ACCURATE_RND) {
02740 int s= APCK_SIZE / 8;
02741 for (i=0; i<vLumFilterSize; i+=2) {
02742 *(const void**)&lumMmxFilter[s*i ]= lumSrcPtr[i ];
02743 *(const void**)&lumMmxFilter[s*i+APCK_PTR2/4 ]= lumSrcPtr[i+(vLumFilterSize>1)];
02744 lumMmxFilter[s*i+APCK_COEF/4 ]=
02745 lumMmxFilter[s*i+APCK_COEF/4+1]= vLumFilter[dstY*vLumFilterSize + i ]
02746 + (vLumFilterSize>1 ? vLumFilter[dstY*vLumFilterSize + i + 1]<<16 : 0);
02747 if (CONFIG_SWSCALE_ALPHA && alpPixBuf) {
02748 *(const void**)&alpMmxFilter[s*i ]= alpSrcPtr[i ];
02749 *(const void**)&alpMmxFilter[s*i+APCK_PTR2/4 ]= alpSrcPtr[i+(vLumFilterSize>1)];
02750 alpMmxFilter[s*i+APCK_COEF/4 ]=
02751 alpMmxFilter[s*i+APCK_COEF/4+1]= lumMmxFilter[s*i+APCK_COEF/4 ];
02752 }
02753 }
02754 for (i=0; i<vChrFilterSize; i+=2) {
02755 *(const void**)&chrMmxFilter[s*i ]= chrSrcPtr[i ];
02756 *(const void**)&chrMmxFilter[s*i+APCK_PTR2/4 ]= chrSrcPtr[i+(vChrFilterSize>1)];
02757 chrMmxFilter[s*i+APCK_COEF/4 ]=
02758 chrMmxFilter[s*i+APCK_COEF/4+1]= vChrFilter[chrDstY*vChrFilterSize + i ]
02759 + (vChrFilterSize>1 ? vChrFilter[chrDstY*vChrFilterSize + i + 1]<<16 : 0);
02760 }
02761 } else {
02762 for (i=0; i<vLumFilterSize; i++) {
02763 lumMmxFilter[4*i+0]= (int32_t)lumSrcPtr[i];
02764 lumMmxFilter[4*i+1]= (uint64_t)lumSrcPtr[i] >> 32;
02765 lumMmxFilter[4*i+2]=
02766 lumMmxFilter[4*i+3]=
02767 ((uint16_t)vLumFilter[dstY*vLumFilterSize + i])*0x10001;
02768 if (CONFIG_SWSCALE_ALPHA && alpPixBuf) {
02769 alpMmxFilter[4*i+0]= (int32_t)alpSrcPtr[i];
02770 alpMmxFilter[4*i+1]= (uint64_t)alpSrcPtr[i] >> 32;
02771 alpMmxFilter[4*i+2]=
02772 alpMmxFilter[4*i+3]= lumMmxFilter[4*i+2];
02773 }
02774 }
02775 for (i=0; i<vChrFilterSize; i++) {
02776 chrMmxFilter[4*i+0]= (int32_t)chrSrcPtr[i];
02777 chrMmxFilter[4*i+1]= (uint64_t)chrSrcPtr[i] >> 32;
02778 chrMmxFilter[4*i+2]=
02779 chrMmxFilter[4*i+3]=
02780 ((uint16_t)vChrFilter[chrDstY*vChrFilterSize + i])*0x10001;
02781 }
02782 }
02783 #endif
02784 if (dstFormat == PIX_FMT_NV12 || dstFormat == PIX_FMT_NV21) {
02785 const int chrSkipMask= (1<<c->chrDstVSubSample)-1;
02786 if (dstY&chrSkipMask) uDest= NULL;
02787 c->yuv2nv12X(c,
02788 vLumFilter+dstY*vLumFilterSize , lumSrcPtr, vLumFilterSize,
02789 vChrFilter+chrDstY*vChrFilterSize, chrSrcPtr, vChrFilterSize,
02790 dest, uDest, dstW, chrDstW, dstFormat);
02791 } else if (isPlanarYUV(dstFormat) || dstFormat==PIX_FMT_GRAY8) {
02792 const int chrSkipMask= (1<<c->chrDstVSubSample)-1;
02793 if ((dstY&chrSkipMask) || isGray(dstFormat)) uDest=vDest= NULL;
02794 if (is16BPS(dstFormat)) {
02795 yuv2yuvX16inC(
02796 vLumFilter+dstY*vLumFilterSize , lumSrcPtr, vLumFilterSize,
02797 vChrFilter+chrDstY*vChrFilterSize, chrSrcPtr, vChrFilterSize,
02798 alpSrcPtr, (uint16_t *) dest, (uint16_t *) uDest, (uint16_t *) vDest, (uint16_t *) aDest, dstW, chrDstW,
02799 dstFormat);
02800 } else if (vLumFilterSize == 1 && vChrFilterSize == 1) {
02801 const int16_t *lumBuf = lumSrcPtr[0];
02802 const int16_t *chrBuf= chrSrcPtr[0];
02803 const int16_t *alpBuf= (CONFIG_SWSCALE_ALPHA && alpPixBuf) ? alpSrcPtr[0] : NULL;
02804 c->yuv2yuv1(c, lumBuf, chrBuf, alpBuf, dest, uDest, vDest, aDest, dstW, chrDstW);
02805 } else {
02806 c->yuv2yuvX(c,
02807 vLumFilter+dstY*vLumFilterSize , lumSrcPtr, vLumFilterSize,
02808 vChrFilter+chrDstY*vChrFilterSize, chrSrcPtr, vChrFilterSize,
02809 alpSrcPtr, dest, uDest, vDest, aDest, dstW, chrDstW);
02810 }
02811 } else {
02812 assert(lumSrcPtr + vLumFilterSize - 1 < lumPixBuf + vLumBufSize*2);
02813 assert(chrSrcPtr + vChrFilterSize - 1 < chrPixBuf + vChrBufSize*2);
02814 if (vLumFilterSize == 1 && vChrFilterSize == 2) {
02815 int chrAlpha= vChrFilter[2*dstY+1];
02816 if(flags & SWS_FULL_CHR_H_INT) {
02817 yuv2rgbXinC_full(c,
02818 vLumFilter+dstY*vLumFilterSize, lumSrcPtr, vLumFilterSize,
02819 vChrFilter+dstY*vChrFilterSize, chrSrcPtr, vChrFilterSize,
02820 alpSrcPtr, dest, dstW, dstY);
02821 } else {
02822 c->yuv2packed1(c, *lumSrcPtr, *chrSrcPtr, *(chrSrcPtr+1),
02823 alpPixBuf ? *alpSrcPtr : NULL,
02824 dest, dstW, chrAlpha, dstFormat, flags, dstY);
02825 }
02826 } else if (vLumFilterSize == 2 && vChrFilterSize == 2) {
02827 int lumAlpha= vLumFilter[2*dstY+1];
02828 int chrAlpha= vChrFilter[2*dstY+1];
02829 lumMmxFilter[2]=
02830 lumMmxFilter[3]= vLumFilter[2*dstY ]*0x10001;
02831 chrMmxFilter[2]=
02832 chrMmxFilter[3]= vChrFilter[2*chrDstY]*0x10001;
02833 if(flags & SWS_FULL_CHR_H_INT) {
02834 yuv2rgbXinC_full(c,
02835 vLumFilter+dstY*vLumFilterSize, lumSrcPtr, vLumFilterSize,
02836 vChrFilter+dstY*vChrFilterSize, chrSrcPtr, vChrFilterSize,
02837 alpSrcPtr, dest, dstW, dstY);
02838 } else {
02839 c->yuv2packed2(c, *lumSrcPtr, *(lumSrcPtr+1), *chrSrcPtr, *(chrSrcPtr+1),
02840 alpPixBuf ? *alpSrcPtr : NULL, alpPixBuf ? *(alpSrcPtr+1) : NULL,
02841 dest, dstW, lumAlpha, chrAlpha, dstY);
02842 }
02843 } else {
02844 if(flags & SWS_FULL_CHR_H_INT) {
02845 yuv2rgbXinC_full(c,
02846 vLumFilter+dstY*vLumFilterSize, lumSrcPtr, vLumFilterSize,
02847 vChrFilter+dstY*vChrFilterSize, chrSrcPtr, vChrFilterSize,
02848 alpSrcPtr, dest, dstW, dstY);
02849 } else {
02850 c->yuv2packedX(c,
02851 vLumFilter+dstY*vLumFilterSize, lumSrcPtr, vLumFilterSize,
02852 vChrFilter+dstY*vChrFilterSize, chrSrcPtr, vChrFilterSize,
02853 alpSrcPtr, dest, dstW, dstY);
02854 }
02855 }
02856 }
02857 } else {
02858 const int16_t **lumSrcPtr= (const int16_t **)lumPixBuf + lumBufIndex + firstLumSrcY - lastInLumBuf + vLumBufSize;
02859 const int16_t **chrSrcPtr= (const int16_t **)chrPixBuf + chrBufIndex + firstChrSrcY - lastInChrBuf + vChrBufSize;
02860 const int16_t **alpSrcPtr= (CONFIG_SWSCALE_ALPHA && alpPixBuf) ? (const int16_t **)alpPixBuf + lumBufIndex + firstLumSrcY - lastInLumBuf + vLumBufSize : NULL;
02861 if (dstFormat == PIX_FMT_NV12 || dstFormat == PIX_FMT_NV21) {
02862 const int chrSkipMask= (1<<c->chrDstVSubSample)-1;
02863 if (dstY&chrSkipMask) uDest= NULL;
02864 yuv2nv12XinC(
02865 vLumFilter+dstY*vLumFilterSize , lumSrcPtr, vLumFilterSize,
02866 vChrFilter+chrDstY*vChrFilterSize, chrSrcPtr, vChrFilterSize,
02867 dest, uDest, dstW, chrDstW, dstFormat);
02868 } else if (isPlanarYUV(dstFormat) || dstFormat==PIX_FMT_GRAY8) {
02869 const int chrSkipMask= (1<<c->chrDstVSubSample)-1;
02870 if ((dstY&chrSkipMask) || isGray(dstFormat)) uDest=vDest= NULL;
02871 if (is16BPS(dstFormat)) {
02872 yuv2yuvX16inC(
02873 vLumFilter+dstY*vLumFilterSize , lumSrcPtr, vLumFilterSize,
02874 vChrFilter+chrDstY*vChrFilterSize, chrSrcPtr, vChrFilterSize,
02875 alpSrcPtr, (uint16_t *) dest, (uint16_t *) uDest, (uint16_t *) vDest, (uint16_t *) aDest, dstW, chrDstW,
02876 dstFormat);
02877 } else {
02878 yuv2yuvXinC(
02879 vLumFilter+dstY*vLumFilterSize , lumSrcPtr, vLumFilterSize,
02880 vChrFilter+chrDstY*vChrFilterSize, chrSrcPtr, vChrFilterSize,
02881 alpSrcPtr, dest, uDest, vDest, aDest, dstW, chrDstW);
02882 }
02883 } else {
02884 assert(lumSrcPtr + vLumFilterSize - 1 < lumPixBuf + vLumBufSize*2);
02885 assert(chrSrcPtr + vChrFilterSize - 1 < chrPixBuf + vChrBufSize*2);
02886 if(flags & SWS_FULL_CHR_H_INT) {
02887 yuv2rgbXinC_full(c,
02888 vLumFilter+dstY*vLumFilterSize, lumSrcPtr, vLumFilterSize,
02889 vChrFilter+dstY*vChrFilterSize, chrSrcPtr, vChrFilterSize,
02890 alpSrcPtr, dest, dstW, dstY);
02891 } else {
02892 yuv2packedXinC(c,
02893 vLumFilter+dstY*vLumFilterSize, lumSrcPtr, vLumFilterSize,
02894 vChrFilter+dstY*vChrFilterSize, chrSrcPtr, vChrFilterSize,
02895 alpSrcPtr, dest, dstW, dstY);
02896 }
02897 }
02898 }
02899 }
02900
02901 if ((dstFormat == PIX_FMT_YUVA420P) && !alpPixBuf)
02902 fillPlane(dst[3], dstStride[3], dstW, dstY-lastDstY, lastDstY, 255);
02903
02904 #if COMPILE_TEMPLATE_MMX
02905 if (flags & SWS_CPU_CAPS_MMX2 ) __asm__ volatile("sfence":::"memory");
02906
02907 if (flags & SWS_CPU_CAPS_3DNOW) __asm__ volatile("femms" :::"memory");
02908 else __asm__ volatile("emms" :::"memory");
02909 #endif
02910
02911 c->dstY= dstY;
02912 c->lumBufIndex= lumBufIndex;
02913 c->chrBufIndex= chrBufIndex;
02914 c->lastInLumBuf= lastInLumBuf;
02915 c->lastInChrBuf= lastInChrBuf;
02916
02917 return dstY - lastDstY;
02918 }
02919
02920 static void RENAME(sws_init_swScale)(SwsContext *c)
02921 {
02922 enum PixelFormat srcFormat = c->srcFormat;
02923
02924 c->yuv2nv12X = RENAME(yuv2nv12X );
02925 c->yuv2yuv1 = RENAME(yuv2yuv1 );
02926 c->yuv2yuvX = RENAME(yuv2yuvX );
02927 c->yuv2packed1 = RENAME(yuv2packed1 );
02928 c->yuv2packed2 = RENAME(yuv2packed2 );
02929 c->yuv2packedX = RENAME(yuv2packedX );
02930
02931 c->hScale = RENAME(hScale );
02932
02933 #if COMPILE_TEMPLATE_MMX
02934
02935 if (c->flags & SWS_FAST_BILINEAR && c->canMMX2BeUsed)
02936 #else
02937 if (c->flags & SWS_FAST_BILINEAR)
02938 #endif
02939 {
02940 c->hyscale_fast = RENAME(hyscale_fast);
02941 c->hcscale_fast = RENAME(hcscale_fast);
02942 }
02943
02944 c->chrToYV12 = NULL;
02945 switch(srcFormat) {
02946 case PIX_FMT_YUYV422 : c->chrToYV12 = RENAME(yuy2ToUV); break;
02947 case PIX_FMT_UYVY422 : c->chrToYV12 = RENAME(uyvyToUV); break;
02948 case PIX_FMT_NV12 : c->chrToYV12 = RENAME(nv12ToUV); break;
02949 case PIX_FMT_NV21 : c->chrToYV12 = RENAME(nv21ToUV); break;
02950 case PIX_FMT_RGB8 :
02951 case PIX_FMT_BGR8 :
02952 case PIX_FMT_PAL8 :
02953 case PIX_FMT_BGR4_BYTE:
02954 case PIX_FMT_RGB4_BYTE: c->chrToYV12 = palToUV; break;
02955 case PIX_FMT_YUV420P16BE:
02956 case PIX_FMT_YUV422P16BE:
02957 case PIX_FMT_YUV444P16BE: c->chrToYV12 = RENAME(BEToUV); break;
02958 case PIX_FMT_YUV420P16LE:
02959 case PIX_FMT_YUV422P16LE:
02960 case PIX_FMT_YUV444P16LE: c->chrToYV12 = RENAME(LEToUV); break;
02961 }
02962 if (c->chrSrcHSubSample) {
02963 switch(srcFormat) {
02964 case PIX_FMT_RGB48BE:
02965 case PIX_FMT_RGB48LE: c->chrToYV12 = rgb48ToUV_half; break;
02966 case PIX_FMT_RGB32 :
02967 case PIX_FMT_RGB32_1: c->chrToYV12 = bgr32ToUV_half; break;
02968 case PIX_FMT_BGR24 : c->chrToYV12 = RENAME(bgr24ToUV_half); break;
02969 case PIX_FMT_BGR565 : c->chrToYV12 = bgr16ToUV_half; break;
02970 case PIX_FMT_BGR555 : c->chrToYV12 = bgr15ToUV_half; break;
02971 case PIX_FMT_BGR32 :
02972 case PIX_FMT_BGR32_1: c->chrToYV12 = rgb32ToUV_half; break;
02973 case PIX_FMT_RGB24 : c->chrToYV12 = RENAME(rgb24ToUV_half); break;
02974 case PIX_FMT_RGB565 : c->chrToYV12 = rgb16ToUV_half; break;
02975 case PIX_FMT_RGB555 : c->chrToYV12 = rgb15ToUV_half; break;
02976 }
02977 } else {
02978 switch(srcFormat) {
02979 case PIX_FMT_RGB48BE:
02980 case PIX_FMT_RGB48LE: c->chrToYV12 = rgb48ToUV; break;
02981 case PIX_FMT_RGB32 :
02982 case PIX_FMT_RGB32_1: c->chrToYV12 = bgr32ToUV; break;
02983 case PIX_FMT_BGR24 : c->chrToYV12 = RENAME(bgr24ToUV); break;
02984 case PIX_FMT_BGR565 : c->chrToYV12 = bgr16ToUV; break;
02985 case PIX_FMT_BGR555 : c->chrToYV12 = bgr15ToUV; break;
02986 case PIX_FMT_BGR32 :
02987 case PIX_FMT_BGR32_1: c->chrToYV12 = rgb32ToUV; break;
02988 case PIX_FMT_RGB24 : c->chrToYV12 = RENAME(rgb24ToUV); break;
02989 case PIX_FMT_RGB565 : c->chrToYV12 = rgb16ToUV; break;
02990 case PIX_FMT_RGB555 : c->chrToYV12 = rgb15ToUV; break;
02991 }
02992 }
02993
02994 c->lumToYV12 = NULL;
02995 c->alpToYV12 = NULL;
02996 switch (srcFormat) {
02997 case PIX_FMT_YUYV422 :
02998 case PIX_FMT_YUV420P16BE:
02999 case PIX_FMT_YUV422P16BE:
03000 case PIX_FMT_YUV444P16BE:
03001 case PIX_FMT_GRAY16BE : c->lumToYV12 = RENAME(yuy2ToY); break;
03002 case PIX_FMT_UYVY422 :
03003 case PIX_FMT_YUV420P16LE:
03004 case PIX_FMT_YUV422P16LE:
03005 case PIX_FMT_YUV444P16LE:
03006 case PIX_FMT_GRAY16LE : c->lumToYV12 = RENAME(uyvyToY); break;
03007 case PIX_FMT_BGR24 : c->lumToYV12 = RENAME(bgr24ToY); break;
03008 case PIX_FMT_BGR565 : c->lumToYV12 = bgr16ToY; break;
03009 case PIX_FMT_BGR555 : c->lumToYV12 = bgr15ToY; break;
03010 case PIX_FMT_RGB24 : c->lumToYV12 = RENAME(rgb24ToY); break;
03011 case PIX_FMT_RGB565 : c->lumToYV12 = rgb16ToY; break;
03012 case PIX_FMT_RGB555 : c->lumToYV12 = rgb15ToY; break;
03013 case PIX_FMT_RGB8 :
03014 case PIX_FMT_BGR8 :
03015 case PIX_FMT_PAL8 :
03016 case PIX_FMT_BGR4_BYTE:
03017 case PIX_FMT_RGB4_BYTE: c->lumToYV12 = palToY; break;
03018 case PIX_FMT_MONOBLACK: c->lumToYV12 = monoblack2Y; break;
03019 case PIX_FMT_MONOWHITE: c->lumToYV12 = monowhite2Y; break;
03020 case PIX_FMT_RGB32 :
03021 case PIX_FMT_RGB32_1: c->lumToYV12 = bgr32ToY; break;
03022 case PIX_FMT_BGR32 :
03023 case PIX_FMT_BGR32_1: c->lumToYV12 = rgb32ToY; break;
03024 case PIX_FMT_RGB48BE:
03025 case PIX_FMT_RGB48LE: c->lumToYV12 = rgb48ToY; break;
03026 }
03027 if (c->alpPixBuf) {
03028 switch (srcFormat) {
03029 case PIX_FMT_RGB32 :
03030 case PIX_FMT_RGB32_1:
03031 case PIX_FMT_BGR32 :
03032 case PIX_FMT_BGR32_1: c->alpToYV12 = abgrToA; break;
03033 }
03034 }
03035
03036 switch (srcFormat) {
03037 case PIX_FMT_RGB32 :
03038 case PIX_FMT_BGR32 :
03039 c->alpSrcOffset = 3;
03040 break;
03041 case PIX_FMT_RGB32_1:
03042 case PIX_FMT_BGR32_1:
03043 c->lumSrcOffset = ALT32_CORR;
03044 c->chrSrcOffset = ALT32_CORR;
03045 break;
03046 case PIX_FMT_RGB48LE:
03047 c->lumSrcOffset = 1;
03048 c->chrSrcOffset = 1;
03049 c->alpSrcOffset = 1;
03050 break;
03051 }
03052
03053 if (c->srcRange != c->dstRange && !isAnyRGB(c->dstFormat)) {
03054 if (c->srcRange) {
03055 c->lumConvertRange = RENAME(lumRangeFromJpeg);
03056 c->chrConvertRange = RENAME(chrRangeFromJpeg);
03057 } else {
03058 c->lumConvertRange = RENAME(lumRangeToJpeg);
03059 c->chrConvertRange = RENAME(chrRangeToJpeg);
03060 }
03061 }
03062
03063 if (!(isGray(srcFormat) || isGray(c->dstFormat) ||
03064 srcFormat == PIX_FMT_MONOBLACK || srcFormat == PIX_FMT_MONOWHITE))
03065 c->needs_hcscale = 1;
03066 }