21 #include "../swscale_internal.h" 
   27 #define RET 0xC3 // near return opcode for x86 
   28 #define PREFETCH "prefetchnta" 
   60         "movq    (%%"FF_REG_d
", %%"FF_REG_a
"), %%mm3    \n\t" 
   61         "movd    (%%"FF_REG_c
", %%"FF_REG_S
"), %%mm0    \n\t" 
   62         "movd   1(%%"FF_REG_c
", %%"FF_REG_S
"), %%mm1    \n\t" 
   63         "punpcklbw                %%mm7, %%mm1          \n\t" 
   64         "punpcklbw                %%mm7, %%mm0          \n\t" 
   65         "pshufw                   $0xFF, %%mm1, %%mm1   \n\t" 
   67         "pshufw                   $0xFF, %%mm0, %%mm0   \n\t" 
   69         "psubw                    %%mm1, %%mm0          \n\t" 
   70         "movl   8(%%"FF_REG_b
", %%"FF_REG_a
"), %%esi    \n\t" 
   71         "pmullw                   %%mm3, %%mm0          \n\t" 
   72         "psllw                       $7, %%mm1          \n\t" 
   73         "paddw                    %%mm1, %%mm0          \n\t" 
   75         "movq                     %%mm0, (%%"FF_REG_D
", %%"FF_REG_a
") \n\t" 
   77         "add                         $8, %%"FF_REG_a
"   \n\t" 
   91         : 
"=r" (fragmentA), 
"=r" (imm8OfPShufW1A), 
"=r" (imm8OfPShufW2A),
 
   92           "=r" (fragmentLengthA)
 
   99         "movq    (%%"FF_REG_d
", %%"FF_REG_a
"), %%mm3    \n\t" 
  100         "movd    (%%"FF_REG_c
", %%"FF_REG_S
"), %%mm0    \n\t" 
  101         "punpcklbw                %%mm7, %%mm0          \n\t" 
  102         "pshufw                   $0xFF, %%mm0, %%mm1   \n\t" 
  104         "pshufw                   $0xFF, %%mm0, %%mm0   \n\t" 
  106         "psubw                    %%mm1, %%mm0          \n\t" 
  107         "movl   8(%%"FF_REG_b
", %%"FF_REG_a
"), %%esi    \n\t" 
  108         "pmullw                   %%mm3, %%mm0          \n\t" 
  109         "psllw                       $7, %%mm1          \n\t" 
  110         "paddw                    %%mm1, %%mm0          \n\t" 
  112         "movq                     %%mm0, (%%"FF_REG_D
", %%"FF_REG_a
") \n\t" 
  114         "add                         $8, %%"FF_REG_a
"   \n\t" 
  128         : 
"=r" (fragmentB), 
"=r" (imm8OfPShufW1B), 
"=r" (imm8OfPShufW2B),
 
  129           "=r" (fragmentLengthB)
 
  135     for (
i = 0; 
i < dstW / numSplits; 
i++) {
 
  140             int b                  = ((xpos + xInc) >> 16) - xx;
 
  141             int c                  = ((xpos + xInc * 2) >> 16) - xx;
 
  142             int d                  = ((xpos + xInc * 3) >> 16) - xx;
 
  143             int inc                = (
d + 1 < 4);
 
  144             uint8_t *
fragment      = inc ? fragmentB : fragmentA;
 
  145             x86_reg imm8OfPShufW1  = inc ? imm8OfPShufW1B : imm8OfPShufW1A;
 
  146             x86_reg imm8OfPShufW2  = inc ? imm8OfPShufW2B : imm8OfPShufW2A;
 
  147             x86_reg fragmentLength = inc ? fragmentLengthB : fragmentLengthA;
 
  148             int maxShift           = 3 - (
d + inc);
 
  152                 filter[
i]        = ((xpos              & 0xFFFF) ^ 0xFFFF) >> 9;
 
  153                 filter[
i + 1]    = (((xpos + xInc)     & 0xFFFF) ^ 0xFFFF) >> 9;
 
  154                 filter[
i + 2]    = (((xpos + xInc * 2) & 0xFFFF) ^ 0xFFFF) >> 9;
 
  155                 filter[
i + 3]    = (((xpos + xInc * 3) & 0xFFFF) ^ 0xFFFF) >> 9;
 
  156                 filterPos[
i / 2] = xx;
 
  158                 memcpy(filterCode + fragmentPos, 
fragment, fragmentLength);
 
  160                 filterCode[fragmentPos + imm8OfPShufW1] =  (
a + inc)       |
 
  164                 filterCode[fragmentPos + imm8OfPShufW2] =  
a | (
b << 2) |
 
  168                 if (
i + 4 - inc >= dstW)
 
  170                 else if ((filterPos[
i / 2] & 3) <= maxShift)
 
  171                     shift = filterPos[
i / 2] & 3;   
 
  174                     filterCode[fragmentPos + imm8OfPShufW1] += 0x55 * 
shift;
 
  175                     filterCode[fragmentPos + imm8OfPShufW2] += 0x55 * 
shift;
 
  176                     filterPos[
i / 2]                        -= 
shift;
 
  180             fragmentPos += fragmentLength;
 
  183                 filterCode[fragmentPos] = 
RET;
 
  188         filterPos[((
i / 2) + 1) & (~1)] = xpos >> 16;  
 
  190     return fragmentPos + 1;
 
  194                                  int dstWidth, 
const uint8_t *
src,
 
  197     int32_t *filterPos = 
c->hLumFilterPos;
 
  198     int16_t *
filter    = 
c->hLumFilter;
 
  199     void    *mmxextFilterCode = 
c->lumMmxextFilterCode;
 
  204 #if !HAVE_EBX_AVAILABLE 
  211         "mov               -8(%%rsp), %%"FF_REG_a
"    \n\t" 
  212         "mov            %%"FF_REG_a
", %5              \n\t"   
  214 #if !HAVE_EBX_AVAILABLE 
  215         "mov            %%"FF_REG_b
", %5              \n\t"   
  218         "pxor                  %%mm7, %%mm7           \n\t" 
  219         "mov                      %0, %%"FF_REG_c
"    \n\t" 
  220         "mov                      %1, %%"FF_REG_D
"    \n\t" 
  221         "mov                      %2, %%"FF_REG_d
"    \n\t" 
  222         "mov                      %3, %%"FF_REG_b
"    \n\t" 
  223         "xor            %%"FF_REG_a
", %%"FF_REG_a
"    \n\t"  
  229 #define CALL_MMXEXT_FILTER_CODE \ 
  230         "movl               (%%"FF_REG_b"), %%esi        \n\t"\ 
  232         "movl (%%"FF_REG_b", %%"FF_REG_a"), %%esi        \n\t"\ 
  233         "add                  %%"FF_REG_S", %%"FF_REG_c" \n\t"\ 
  234         "add                  %%"FF_REG_a", %%"FF_REG_D" \n\t"\ 
  235         "xor                  %%"FF_REG_a", %%"FF_REG_a" \n\t"\ 
  238 #define CALL_MMXEXT_FILTER_CODE \ 
  239         "movl               (%%"FF_REG_b"), %%esi        \n\t"\ 
  241         "addl (%%"FF_REG_b", %%"FF_REG_a"), %%"FF_REG_c" \n\t"\ 
  242         "add                  %%"FF_REG_a", %%"FF_REG_D" \n\t"\ 
  243         "xor                  %%"FF_REG_a", %%"FF_REG_a" \n\t"\ 
  247         CALL_MMXEXT_FILTER_CODE
 
  248         CALL_MMXEXT_FILTER_CODE
 
  249         CALL_MMXEXT_FILTER_CODE
 
  250         CALL_MMXEXT_FILTER_CODE
 
  251         CALL_MMXEXT_FILTER_CODE
 
  252         CALL_MMXEXT_FILTER_CODE
 
  253         CALL_MMXEXT_FILTER_CODE
 
  254         CALL_MMXEXT_FILTER_CODE
 
  257         "mov                      %5, %%"FF_REG_a
" \n\t" 
  258         "mov            %%"FF_REG_a
", -8(%%rsp)    \n\t" 
  260 #if !HAVE_EBX_AVAILABLE 
  261         "mov                      %5, %%"FF_REG_b
" \n\t" 
  264         :: 
"m" (
src), 
"m" (dst), 
"m" (
filter), 
"m" (filterPos),
 
  265            "m" (mmxextFilterCode)
 
  269 #if !HAVE_EBX_AVAILABLE 
  273         : 
"%"FF_REG_a, 
"%"FF_REG_c, 
"%"FF_REG_d, 
"%"FF_REG_S, 
"%"FF_REG_D
 
  274 #
if ARCH_X86_64 || HAVE_EBX_AVAILABLE
 
  279     for (
i=dstWidth-1; (
i*xInc)>>16 >=srcW-1; 
i--)
 
  280         dst[
i] = 
src[srcW-1]*128;
 
  284                                  int dstWidth, 
const uint8_t *
src1,
 
  285                                  const uint8_t *
src2, 
int srcW, 
int xInc)
 
  287     int32_t *filterPos = 
c->hChrFilterPos;
 
  288     int16_t *
filter    = 
c->hChrFilter;
 
  289     void    *mmxextFilterCode = 
c->chrMmxextFilterCode;
 
  294 #if !HAVE_EBX_AVAILABLE 
  300         "mov          -8(%%rsp), %%"FF_REG_a
"    \n\t" 
  301         "mov       %%"FF_REG_a
", %7              \n\t"   
  303 #if !HAVE_EBX_AVAILABLE 
  304         "mov       %%"FF_REG_b
", %7              \n\t"   
  307         "pxor             %%mm7, %%mm7           \n\t" 
  308         "mov                 %0, %%"FF_REG_c
"    \n\t" 
  309         "mov                 %1, %%"FF_REG_D
"    \n\t" 
  310         "mov                 %2, %%"FF_REG_d
"    \n\t" 
  311         "mov                 %3, %%"FF_REG_b
"    \n\t" 
  312         "xor          %%"FF_REG_a
", %%"FF_REG_a
" \n\t"  
  317         CALL_MMXEXT_FILTER_CODE
 
  318         CALL_MMXEXT_FILTER_CODE
 
  319         CALL_MMXEXT_FILTER_CODE
 
  320         CALL_MMXEXT_FILTER_CODE
 
  321         "xor          %%"FF_REG_a
", %%"FF_REG_a
" \n\t"  
  322         "mov                    %5, %%"FF_REG_c
" \n\t"  
  323         "mov                    %6, %%"FF_REG_D
" \n\t"  
  328         CALL_MMXEXT_FILTER_CODE
 
  329         CALL_MMXEXT_FILTER_CODE
 
  330         CALL_MMXEXT_FILTER_CODE
 
  331         CALL_MMXEXT_FILTER_CODE
 
  334         "mov                    %7, %%"FF_REG_a
" \n\t" 
  335         "mov          %%"FF_REG_a
", -8(%%rsp)    \n\t" 
  337 #if !HAVE_EBX_AVAILABLE 
  338         "mov %7, %%"FF_REG_b
"    \n\t" 
  341         :: 
"m" (
src1), 
"m" (dst1), 
"m" (
filter), 
"m" (filterPos),
 
  342            "m" (mmxextFilterCode), 
"m" (
src2), 
"m"(dst2)
 
  346 #if !HAVE_EBX_AVAILABLE 
  350         : 
"%"FF_REG_a, 
"%"FF_REG_c, 
"%"FF_REG_d, 
"%"FF_REG_S, 
"%"FF_REG_D
 
  351 #
if ARCH_X86_64 || HAVE_EBX_AVAILABLE
 
  356     for (
i=dstWidth-1; (
i*xInc)>>16 >=srcW-1; 
i--) {
 
  357         dst1[
i] = 
src1[srcW-1]*128;
 
  358         dst2[
i] = 
src2[srcW-1]*128;
 
  361 #endif //HAVE_INLINE_ASM