00001 
00002 
00003 
00004 
00005 
00006 
00007 
00008 
00009 
00010 
00011 
00012 
00013 
00014 
00015 
00016 
00017 
00018 
00019 
00020 
00026 #include "libavutil/x86_cpu.h"
00027 
00028 #undef REAL_PAVGB
00029 #undef PAVGB
00030 #undef PMINUB
00031 #undef PMAXUB
00032 
00033 #if   HAVE_MMX2
00034 #define REAL_PAVGB(a,b) "pavgb " #a ", " #b " \n\t"
00035 #elif HAVE_AMD3DNOW
00036 #define REAL_PAVGB(a,b) "pavgusb " #a ", " #b " \n\t"
00037 #endif
00038 #define PAVGB(a,b)  REAL_PAVGB(a,b)
00039 
00040 #if   HAVE_MMX2
00041 #define PMINUB(a,b,t) "pminub " #a ", " #b " \n\t"
00042 #elif HAVE_MMX
00043 #define PMINUB(b,a,t) \
00044     "movq " #a ", " #t " \n\t"\
00045     "psubusb " #b ", " #t " \n\t"\
00046     "psubb " #t ", " #a " \n\t"
00047 #endif
00048 
00049 #if   HAVE_MMX2
00050 #define PMAXUB(a,b) "pmaxub " #a ", " #b " \n\t"
00051 #elif HAVE_MMX
00052 #define PMAXUB(a,b) \
00053     "psubusb " #a ", " #b " \n\t"\
00054     "paddb " #a ", " #b " \n\t"
00055 #endif
00056 
00057 
00058 #if HAVE_MMX
00059 
00062 static inline int RENAME(vertClassify)(uint8_t src[], int stride, PPContext *c){
00063     int numEq= 0, dcOk;
00064     src+= stride*4; 
00065     __asm__ volatile(
00066         "movq %0, %%mm7                         \n\t"
00067         "movq %1, %%mm6                         \n\t"
00068         : : "m" (c->mmxDcOffset[c->nonBQP]),  "m" (c->mmxDcThreshold[c->nonBQP])
00069         );
00070 
00071     __asm__ volatile(
00072         "lea (%2, %3), %%"REG_a"                \n\t"
00073 
00074 
00075 
00076         "movq (%2), %%mm0                       \n\t"
00077         "movq (%%"REG_a"), %%mm1                \n\t"
00078         "movq %%mm0, %%mm3                      \n\t"
00079         "movq %%mm0, %%mm4                      \n\t"
00080         PMAXUB(%%mm1, %%mm4)
00081         PMINUB(%%mm1, %%mm3, %%mm5)
00082         "psubb %%mm1, %%mm0                     \n\t" 
00083         "paddb %%mm7, %%mm0                     \n\t"
00084         "pcmpgtb %%mm6, %%mm0                   \n\t"
00085 
00086         "movq (%%"REG_a",%3), %%mm2             \n\t"
00087         PMAXUB(%%mm2, %%mm4)
00088         PMINUB(%%mm2, %%mm3, %%mm5)
00089         "psubb %%mm2, %%mm1                     \n\t"
00090         "paddb %%mm7, %%mm1                     \n\t"
00091         "pcmpgtb %%mm6, %%mm1                   \n\t"
00092         "paddb %%mm1, %%mm0                     \n\t"
00093 
00094         "movq (%%"REG_a", %3, 2), %%mm1         \n\t"
00095         PMAXUB(%%mm1, %%mm4)
00096         PMINUB(%%mm1, %%mm3, %%mm5)
00097         "psubb %%mm1, %%mm2                     \n\t"
00098         "paddb %%mm7, %%mm2                     \n\t"
00099         "pcmpgtb %%mm6, %%mm2                   \n\t"
00100         "paddb %%mm2, %%mm0                     \n\t"
00101 
00102         "lea (%%"REG_a", %3, 4), %%"REG_a"      \n\t"
00103 
00104         "movq (%2, %3, 4), %%mm2                \n\t"
00105         PMAXUB(%%mm2, %%mm4)
00106         PMINUB(%%mm2, %%mm3, %%mm5)
00107         "psubb %%mm2, %%mm1                     \n\t"
00108         "paddb %%mm7, %%mm1                     \n\t"
00109         "pcmpgtb %%mm6, %%mm1                   \n\t"
00110         "paddb %%mm1, %%mm0                     \n\t"
00111 
00112         "movq (%%"REG_a"), %%mm1                \n\t"
00113         PMAXUB(%%mm1, %%mm4)
00114         PMINUB(%%mm1, %%mm3, %%mm5)
00115         "psubb %%mm1, %%mm2                     \n\t"
00116         "paddb %%mm7, %%mm2                     \n\t"
00117         "pcmpgtb %%mm6, %%mm2                   \n\t"
00118         "paddb %%mm2, %%mm0                     \n\t"
00119 
00120         "movq (%%"REG_a", %3), %%mm2            \n\t"
00121         PMAXUB(%%mm2, %%mm4)
00122         PMINUB(%%mm2, %%mm3, %%mm5)
00123         "psubb %%mm2, %%mm1                     \n\t"
00124         "paddb %%mm7, %%mm1                     \n\t"
00125         "pcmpgtb %%mm6, %%mm1                   \n\t"
00126         "paddb %%mm1, %%mm0                     \n\t"
00127 
00128         "movq (%%"REG_a", %3, 2), %%mm1         \n\t"
00129         PMAXUB(%%mm1, %%mm4)
00130         PMINUB(%%mm1, %%mm3, %%mm5)
00131         "psubb %%mm1, %%mm2                     \n\t"
00132         "paddb %%mm7, %%mm2                     \n\t"
00133         "pcmpgtb %%mm6, %%mm2                   \n\t"
00134         "paddb %%mm2, %%mm0                     \n\t"
00135         "psubusb %%mm3, %%mm4                   \n\t"
00136 
00137         "                                       \n\t"
00138 #if HAVE_MMX2
00139         "pxor %%mm7, %%mm7                      \n\t"
00140         "psadbw %%mm7, %%mm0                    \n\t"
00141 #else
00142         "movq %%mm0, %%mm1                      \n\t"
00143         "psrlw $8, %%mm0                        \n\t"
00144         "paddb %%mm1, %%mm0                     \n\t"
00145         "movq %%mm0, %%mm1                      \n\t"
00146         "psrlq $16, %%mm0                       \n\t"
00147         "paddb %%mm1, %%mm0                     \n\t"
00148         "movq %%mm0, %%mm1                      \n\t"
00149         "psrlq $32, %%mm0                       \n\t"
00150         "paddb %%mm1, %%mm0                     \n\t"
00151 #endif
00152         "movq %4, %%mm7                         \n\t" 
00153         "paddusb %%mm7, %%mm7                   \n\t" 
00154         "psubusb %%mm7, %%mm4                   \n\t" 
00155         "packssdw %%mm4, %%mm4                  \n\t"
00156         "movd %%mm0, %0                         \n\t"
00157         "movd %%mm4, %1                         \n\t"
00158 
00159         : "=r" (numEq), "=r" (dcOk)
00160         : "r" (src), "r" ((x86_reg)stride), "m" (c->pQPb)
00161         : "%"REG_a
00162         );
00163 
00164     numEq= (-numEq) &0xFF;
00165     if(numEq > c->ppMode.flatnessThreshold){
00166         if(dcOk) return 0;
00167         else     return 1;
00168     }else{
00169         return 2;
00170     }
00171 }
00172 #endif //HAVE_MMX
00173 
00178 #if !HAVE_ALTIVEC
00179 static inline void RENAME(doVertLowPass)(uint8_t *src, int stride, PPContext *c)
00180 {
00181 #if HAVE_MMX2 || HAVE_AMD3DNOW
00182     src+= stride*3;
00183     __asm__ volatile(        
00184         "movq %2, %%mm0                         \n\t"  
00185         "pxor %%mm4, %%mm4                      \n\t"
00186 
00187         "movq (%0), %%mm6                       \n\t"
00188         "movq (%0, %1), %%mm5                   \n\t"
00189         "movq %%mm5, %%mm1                      \n\t"
00190         "movq %%mm6, %%mm2                      \n\t"
00191         "psubusb %%mm6, %%mm5                   \n\t"
00192         "psubusb %%mm1, %%mm2                   \n\t"
00193         "por %%mm5, %%mm2                       \n\t" 
00194         "psubusb %%mm0, %%mm2                   \n\t" 
00195         "pcmpeqb %%mm4, %%mm2                   \n\t" 
00196 
00197         "pand %%mm2, %%mm6                      \n\t"
00198         "pandn %%mm1, %%mm2                     \n\t"
00199         "por %%mm2, %%mm6                       \n\t"
00200 
00201         "movq (%0, %1, 8), %%mm5                \n\t"
00202         "lea (%0, %1, 4), %%"REG_a"             \n\t"
00203         "lea (%0, %1, 8), %%"REG_c"             \n\t"
00204         "sub %1, %%"REG_c"                      \n\t"
00205         "add %1, %0                             \n\t" 
00206         "movq (%0, %1, 8), %%mm7                \n\t"
00207         "movq %%mm5, %%mm1                      \n\t"
00208         "movq %%mm7, %%mm2                      \n\t"
00209         "psubusb %%mm7, %%mm5                   \n\t"
00210         "psubusb %%mm1, %%mm2                   \n\t"
00211         "por %%mm5, %%mm2                       \n\t" 
00212         "psubusb %%mm0, %%mm2                   \n\t" 
00213         "pcmpeqb %%mm4, %%mm2                   \n\t" 
00214 
00215         "pand %%mm2, %%mm7                      \n\t"
00216         "pandn %%mm1, %%mm2                     \n\t"
00217         "por %%mm2, %%mm7                       \n\t" 
00218 
00219 
00220         
00221         
00222         
00223         
00224         
00225 
00226         "movq (%0, %1), %%mm0                   \n\t" 
00227         "movq %%mm0, %%mm1                      \n\t" 
00228         PAVGB(%%mm6, %%mm0)                           
00229         PAVGB(%%mm6, %%mm0)                           
00230 
00231         "movq (%0, %1, 4), %%mm2                \n\t" 
00232         "movq %%mm2, %%mm5                      \n\t" 
00233         PAVGB((%%REGa), %%mm2)                        
00234         PAVGB((%0, %1, 2), %%mm2)                     
00235         "movq %%mm2, %%mm3                      \n\t" 
00236         "movq (%0), %%mm4                       \n\t" 
00237         PAVGB(%%mm4, %%mm3)                           
00238         PAVGB(%%mm0, %%mm3)                           
00239         "movq %%mm3, (%0)                       \n\t" 
00240         
00241         "movq %%mm1, %%mm0                      \n\t" 
00242         PAVGB(%%mm6, %%mm0)                           
00243         "movq %%mm4, %%mm3                      \n\t" 
00244         PAVGB((%0,%1,2), %%mm3)                       
00245         PAVGB((%%REGa,%1,2), %%mm5)                   
00246         PAVGB((%%REGa), %%mm5)                        
00247         PAVGB(%%mm5, %%mm3)                           
00248         PAVGB(%%mm0, %%mm3)                           
00249         "movq %%mm3, (%0,%1)                    \n\t" 
00250         
00251         PAVGB(%%mm4, %%mm6)                                   
00252         "movq (%%"REG_c"), %%mm0                \n\t" 
00253         PAVGB((%%REGa, %1, 2), %%mm0)                 
00254         "movq %%mm0, %%mm3                      \n\t" 
00255         PAVGB(%%mm1, %%mm0)                           
00256         PAVGB(%%mm6, %%mm0)                           
00257         PAVGB(%%mm2, %%mm0)                           
00258         "movq (%0, %1, 2), %%mm2                \n\t" 
00259         "movq %%mm0, (%0, %1, 2)                \n\t" 
00260         
00261         "movq (%%"REG_a", %1, 4), %%mm0         \n\t" 
00262         PAVGB((%%REGc), %%mm0)                        
00263         PAVGB(%%mm0, %%mm6)                           
00264         PAVGB(%%mm1, %%mm4)                           
00265         PAVGB(%%mm2, %%mm1)                           
00266         PAVGB(%%mm1, %%mm6)                           
00267         PAVGB(%%mm5, %%mm6)                           
00268         "movq (%%"REG_a"), %%mm5                \n\t" 
00269         "movq %%mm6, (%%"REG_a")                \n\t" 
00270         
00271         "movq (%%"REG_a", %1, 4), %%mm6         \n\t" 
00272         PAVGB(%%mm7, %%mm6)                           
00273         PAVGB(%%mm4, %%mm6)                           
00274         PAVGB(%%mm3, %%mm6)                           
00275         PAVGB(%%mm5, %%mm2)                           
00276         "movq (%0, %1, 4), %%mm4                \n\t" 
00277         PAVGB(%%mm4, %%mm2)                           
00278         PAVGB(%%mm2, %%mm6)                           
00279         "movq %%mm6, (%0, %1, 4)                \n\t" 
00280         
00281         PAVGB(%%mm7, %%mm1)                           
00282         PAVGB(%%mm4, %%mm5)                           
00283         PAVGB(%%mm5, %%mm0)                           
00284         "movq (%%"REG_a", %1, 2), %%mm6         \n\t" 
00285         PAVGB(%%mm6, %%mm1)                           
00286         PAVGB(%%mm0, %%mm1)                           
00287         "movq %%mm1, (%%"REG_a", %1, 2)         \n\t" 
00288         
00289         PAVGB((%%REGc), %%mm2)                        
00290         "movq (%%"REG_a", %1, 4), %%mm0         \n\t" 
00291         PAVGB(%%mm0, %%mm6)                           
00292         PAVGB(%%mm7, %%mm6)                           
00293         PAVGB(%%mm2, %%mm6)                           
00294         "movq %%mm6, (%%"REG_c")                \n\t" 
00295         
00296         PAVGB(%%mm7, %%mm5)                           
00297         PAVGB(%%mm7, %%mm5)                           
00298 
00299         PAVGB(%%mm3, %%mm0)                           
00300         PAVGB(%%mm0, %%mm5)                           
00301         "movq %%mm5, (%%"REG_a", %1, 4)         \n\t" 
00302         "sub %1, %0                             \n\t"
00303 
00304         :
00305         : "r" (src), "r" ((x86_reg)stride), "m" (c->pQPb)
00306         : "%"REG_a, "%"REG_c
00307     );
00308 #else //HAVE_MMX2 || HAVE_AMD3DNOW
00309     const int l1= stride;
00310     const int l2= stride + l1;
00311     const int l3= stride + l2;
00312     const int l4= stride + l3;
00313     const int l5= stride + l4;
00314     const int l6= stride + l5;
00315     const int l7= stride + l6;
00316     const int l8= stride + l7;
00317     const int l9= stride + l8;
00318     int x;
00319     src+= stride*3;
00320     for(x=0; x<BLOCK_SIZE; x++){
00321         const int first= FFABS(src[0] - src[l1]) < c->QP ? src[0] : src[l1];
00322         const int last= FFABS(src[l8] - src[l9]) < c->QP ? src[l9] : src[l8];
00323 
00324         int sums[10];
00325         sums[0] = 4*first + src[l1] + src[l2] + src[l3] + 4;
00326         sums[1] = sums[0] - first  + src[l4];
00327         sums[2] = sums[1] - first  + src[l5];
00328         sums[3] = sums[2] - first  + src[l6];
00329         sums[4] = sums[3] - first  + src[l7];
00330         sums[5] = sums[4] - src[l1] + src[l8];
00331         sums[6] = sums[5] - src[l2] + last;
00332         sums[7] = sums[6] - src[l3] + last;
00333         sums[8] = sums[7] - src[l4] + last;
00334         sums[9] = sums[8] - src[l5] + last;
00335 
00336         src[l1]= (sums[0] + sums[2] + 2*src[l1])>>4;
00337         src[l2]= (sums[1] + sums[3] + 2*src[l2])>>4;
00338         src[l3]= (sums[2] + sums[4] + 2*src[l3])>>4;
00339         src[l4]= (sums[3] + sums[5] + 2*src[l4])>>4;
00340         src[l5]= (sums[4] + sums[6] + 2*src[l5])>>4;
00341         src[l6]= (sums[5] + sums[7] + 2*src[l6])>>4;
00342         src[l7]= (sums[6] + sums[8] + 2*src[l7])>>4;
00343         src[l8]= (sums[7] + sums[9] + 2*src[l8])>>4;
00344 
00345         src++;
00346     }
00347 #endif //HAVE_MMX2 || HAVE_AMD3DNOW
00348 }
00349 #endif //HAVE_ALTIVEC
00350 
00358 static inline void RENAME(vertX1Filter)(uint8_t *src, int stride, PPContext *co)
00359 {
00360 #if HAVE_MMX2 || HAVE_AMD3DNOW
00361     src+= stride*3;
00362 
00363     __asm__ volatile(
00364         "pxor %%mm7, %%mm7                      \n\t" 
00365         "lea (%0, %1), %%"REG_a"                \n\t"
00366         "lea (%%"REG_a", %1, 4), %%"REG_c"      \n\t"
00367 
00368 
00369         "movq (%%"REG_a", %1, 2), %%mm0         \n\t" 
00370         "movq (%0, %1, 4), %%mm1                \n\t" 
00371         "movq %%mm1, %%mm2                      \n\t" 
00372         "psubusb %%mm0, %%mm1                   \n\t"
00373         "psubusb %%mm2, %%mm0                   \n\t"
00374         "por %%mm1, %%mm0                       \n\t" 
00375         "movq (%%"REG_c"), %%mm3                \n\t" 
00376         "movq (%%"REG_c", %1), %%mm4            \n\t" 
00377         "movq %%mm3, %%mm5                      \n\t" 
00378         "psubusb %%mm4, %%mm3                   \n\t"
00379         "psubusb %%mm5, %%mm4                   \n\t"
00380         "por %%mm4, %%mm3                       \n\t" 
00381         PAVGB(%%mm3, %%mm0)                           
00382         "movq %%mm2, %%mm1                      \n\t" 
00383         "psubusb %%mm5, %%mm2                   \n\t"
00384         "movq %%mm2, %%mm4                      \n\t"
00385         "pcmpeqb %%mm7, %%mm2                   \n\t" 
00386         "psubusb %%mm1, %%mm5                   \n\t"
00387         "por %%mm5, %%mm4                       \n\t" 
00388         "psubusb %%mm0, %%mm4                   \n\t" 
00389         "movq %%mm4, %%mm3                      \n\t" 
00390         "movq %2, %%mm0                         \n\t"
00391         "paddusb %%mm0, %%mm0                   \n\t"
00392         "psubusb %%mm0, %%mm4                   \n\t"
00393         "pcmpeqb %%mm7, %%mm4                   \n\t" 
00394         "psubusb "MANGLE(b01)", %%mm3           \n\t"
00395         "pand %%mm4, %%mm3                      \n\t" 
00396 
00397         PAVGB(%%mm7, %%mm3)                           
00398         "movq %%mm3, %%mm1                      \n\t" 
00399         PAVGB(%%mm7, %%mm3)                           
00400         PAVGB(%%mm1, %%mm3)                           
00401 
00402         "movq (%0, %1, 4), %%mm0                \n\t" 
00403         "pxor %%mm2, %%mm0                      \n\t" 
00404         "psubusb %%mm3, %%mm0                   \n\t"
00405         "pxor %%mm2, %%mm0                      \n\t"
00406         "movq %%mm0, (%0, %1, 4)                \n\t" 
00407 
00408         "movq (%%"REG_c"), %%mm0                \n\t" 
00409         "pxor %%mm2, %%mm0                      \n\t" 
00410         "paddusb %%mm3, %%mm0                   \n\t"
00411         "pxor %%mm2, %%mm0                      \n\t"
00412         "movq %%mm0, (%%"REG_c")                \n\t" 
00413 
00414         PAVGB(%%mm7, %%mm1)                           
00415 
00416         "movq (%%"REG_a", %1, 2), %%mm0         \n\t" 
00417         "pxor %%mm2, %%mm0                      \n\t" 
00418         "psubusb %%mm1, %%mm0                   \n\t"
00419         "pxor %%mm2, %%mm0                      \n\t"
00420         "movq %%mm0, (%%"REG_a", %1, 2)         \n\t" 
00421 
00422         "movq (%%"REG_c", %1), %%mm0            \n\t" 
00423         "pxor %%mm2, %%mm0                      \n\t" 
00424         "paddusb %%mm1, %%mm0                   \n\t"
00425         "pxor %%mm2, %%mm0                      \n\t"
00426         "movq %%mm0, (%%"REG_c", %1)            \n\t" 
00427 
00428         PAVGB(%%mm7, %%mm1)                           
00429 
00430         "movq (%%"REG_a", %1), %%mm0            \n\t" 
00431         "pxor %%mm2, %%mm0                      \n\t" 
00432         "psubusb %%mm1, %%mm0                   \n\t"
00433         "pxor %%mm2, %%mm0                      \n\t"
00434         "movq %%mm0, (%%"REG_a", %1)            \n\t" 
00435 
00436         "movq (%%"REG_c", %1, 2), %%mm0         \n\t" 
00437         "pxor %%mm2, %%mm0                      \n\t" 
00438         "paddusb %%mm1, %%mm0                   \n\t"
00439         "pxor %%mm2, %%mm0                      \n\t"
00440         "movq %%mm0, (%%"REG_c", %1, 2)         \n\t" 
00441 
00442         :
00443         : "r" (src), "r" ((x86_reg)stride), "m" (co->pQPb)
00444         : "%"REG_a, "%"REG_c
00445     );
00446 #else //HAVE_MMX2 || HAVE_AMD3DNOW
00447 
00448     const int l1= stride;
00449     const int l2= stride + l1;
00450     const int l3= stride + l2;
00451     const int l4= stride + l3;
00452     const int l5= stride + l4;
00453     const int l6= stride + l5;
00454     const int l7= stride + l6;
00455 
00456 
00457     int x;
00458 
00459     src+= stride*3;
00460     for(x=0; x<BLOCK_SIZE; x++){
00461         int a= src[l3] - src[l4];
00462         int b= src[l4] - src[l5];
00463         int c= src[l5] - src[l6];
00464 
00465         int d= FFABS(b) - ((FFABS(a) + FFABS(c))>>1);
00466         d= FFMAX(d, 0);
00467 
00468         if(d < co->QP*2){
00469             int v = d * FFSIGN(-b);
00470 
00471             src[l2] +=v>>3;
00472             src[l3] +=v>>2;
00473             src[l4] +=(3*v)>>3;
00474             src[l5] -=(3*v)>>3;
00475             src[l6] -=v>>2;
00476             src[l7] -=v>>3;
00477         }
00478         src++;
00479     }
00480 #endif //HAVE_MMX2 || HAVE_AMD3DNOW
00481 }
00482 
00483 #if !HAVE_ALTIVEC
00484 static inline void RENAME(doVertDefFilter)(uint8_t src[], int stride, PPContext *c)
00485 {
00486 #if HAVE_MMX2 || HAVE_AMD3DNOW
00487 
00488 
00489 
00490 
00491 
00492 
00493 
00494 
00495 
00496 
00497 
00498 
00499 
00500 
00501     src+= stride*4;
00502     __asm__ volatile(
00503 
00504 #if 0 //slightly more accurate and slightly slower
00505         "pxor %%mm7, %%mm7                      \n\t" 
00506         "lea (%0, %1), %%"REG_a"                \n\t"
00507         "lea (%%"REG_a", %1, 4), %%"REG_c"      \n\t"
00508 
00509 
00510 
00511 
00512 
00513         "movq (%0, %1, 2), %%mm0                \n\t" 
00514         "movq (%0), %%mm1                       \n\t" 
00515         "movq %%mm0, %%mm2                      \n\t" 
00516         PAVGB(%%mm7, %%mm0)                           
00517         PAVGB(%%mm1, %%mm0)                           
00518         PAVGB(%%mm2, %%mm0)                           
00519 
00520         "movq (%%"REG_a"), %%mm1                \n\t" 
00521         "movq (%%"REG_a", %1, 2), %%mm3         \n\t" 
00522         "movq %%mm1, %%mm4                      \n\t" 
00523         PAVGB(%%mm7, %%mm1)                           
00524         PAVGB(%%mm3, %%mm1)                           
00525         PAVGB(%%mm4, %%mm1)                           
00526 
00527         "movq %%mm0, %%mm4                      \n\t" 
00528         "psubusb %%mm1, %%mm0                   \n\t"
00529         "psubusb %%mm4, %%mm1                   \n\t"
00530         "por %%mm0, %%mm1                       \n\t" 
00531 
00532 
00533         "movq (%0, %1, 4), %%mm0                \n\t" 
00534         "movq %%mm0, %%mm4                      \n\t" 
00535         PAVGB(%%mm7, %%mm0)                           
00536         PAVGB(%%mm2, %%mm0)                           
00537         PAVGB(%%mm4, %%mm0)                           
00538 
00539         "movq (%%"REG_c"), %%mm2                \n\t" 
00540         "movq %%mm3, %%mm5                      \n\t" 
00541         PAVGB(%%mm7, %%mm3)                           
00542         PAVGB(%%mm2, %%mm3)                           
00543         PAVGB(%%mm5, %%mm3)                           
00544 
00545         "movq %%mm0, %%mm6                      \n\t" 
00546         "psubusb %%mm3, %%mm0                   \n\t"
00547         "psubusb %%mm6, %%mm3                   \n\t"
00548         "por %%mm0, %%mm3                       \n\t" 
00549         "pcmpeqb %%mm7, %%mm0                   \n\t" 
00550 
00551 
00552         "movq (%%"REG_c", %1), %%mm6            \n\t" 
00553         "movq %%mm6, %%mm5                      \n\t" 
00554         PAVGB(%%mm7, %%mm6)                           
00555         PAVGB(%%mm4, %%mm6)                           
00556         PAVGB(%%mm5, %%mm6)                           
00557 
00558         "movq (%%"REG_c", %1, 2), %%mm5         \n\t" 
00559         "movq %%mm2, %%mm4                      \n\t" 
00560         PAVGB(%%mm7, %%mm2)                           
00561         PAVGB(%%mm5, %%mm2)                           
00562         PAVGB(%%mm4, %%mm2)                           
00563 
00564         "movq %%mm6, %%mm4                      \n\t" 
00565         "psubusb %%mm2, %%mm6                   \n\t"
00566         "psubusb %%mm4, %%mm2                   \n\t"
00567         "por %%mm6, %%mm2                       \n\t" 
00568 
00569 
00570 
00571         PMINUB(%%mm2, %%mm1, %%mm4)                   
00572         "movq %2, %%mm4                         \n\t" 
00573         "paddusb "MANGLE(b01)", %%mm4           \n\t"
00574         "pcmpgtb %%mm3, %%mm4                   \n\t" 
00575         "psubusb %%mm1, %%mm3                   \n\t" 
00576         "pand %%mm4, %%mm3                      \n\t"
00577 
00578         "movq %%mm3, %%mm1                      \n\t"
00579 
00580         PAVGB(%%mm7, %%mm3)
00581         PAVGB(%%mm7, %%mm3)
00582         "paddusb %%mm1, %%mm3                   \n\t"
00583 
00584 
00585         "movq (%%"REG_a", %1, 2), %%mm6         \n\t" 
00586         "movq (%0, %1, 4), %%mm5                \n\t" 
00587         "movq (%0, %1, 4), %%mm4                \n\t" 
00588         "psubusb %%mm6, %%mm5                   \n\t"
00589         "psubusb %%mm4, %%mm6                   \n\t"
00590         "por %%mm6, %%mm5                       \n\t" 
00591         "pcmpeqb %%mm7, %%mm6                   \n\t" 
00592         "pxor %%mm6, %%mm0                      \n\t"
00593         "pand %%mm0, %%mm3                      \n\t"
00594         PMINUB(%%mm5, %%mm3, %%mm0)
00595 
00596         "psubusb "MANGLE(b01)", %%mm3           \n\t"
00597         PAVGB(%%mm7, %%mm3)
00598 
00599         "movq (%%"REG_a", %1, 2), %%mm0         \n\t"
00600         "movq (%0, %1, 4), %%mm2                \n\t"
00601         "pxor %%mm6, %%mm0                      \n\t"
00602         "pxor %%mm6, %%mm2                      \n\t"
00603         "psubb %%mm3, %%mm0                     \n\t"
00604         "paddb %%mm3, %%mm2                     \n\t"
00605         "pxor %%mm6, %%mm0                      \n\t"
00606         "pxor %%mm6, %%mm2                      \n\t"
00607         "movq %%mm0, (%%"REG_a", %1, 2)         \n\t"
00608         "movq %%mm2, (%0, %1, 4)                \n\t"
00609 #endif //0
00610 
00611         "lea (%0, %1), %%"REG_a"                \n\t"
00612         "pcmpeqb %%mm6, %%mm6                   \n\t" 
00613 
00614 
00615 
00616 
00617 
00618         "movq (%%"REG_a", %1, 2), %%mm1         \n\t" 
00619         "movq (%0, %1, 4), %%mm0                \n\t" 
00620         "pxor %%mm6, %%mm1                      \n\t" 
00621         PAVGB(%%mm1, %%mm0)                           
00622 
00623 
00624         "movq (%%"REG_a", %1, 4), %%mm2         \n\t" 
00625         "movq (%%"REG_a", %1), %%mm3            \n\t" 
00626         "pxor %%mm6, %%mm2                      \n\t" 
00627         "movq %%mm2, %%mm5                      \n\t" 
00628         "movq "MANGLE(b80)", %%mm4              \n\t" 
00629         "lea (%%"REG_a", %1, 4), %%"REG_c"      \n\t"
00630         PAVGB(%%mm3, %%mm2)                           
00631         PAVGB(%%mm0, %%mm4)                           
00632         PAVGB(%%mm2, %%mm4)                           
00633         PAVGB(%%mm0, %%mm4)                           
00634 
00635 
00636         "movq (%%"REG_a"), %%mm2                \n\t" 
00637         "pxor %%mm6, %%mm2                      \n\t" 
00638         PAVGB(%%mm3, %%mm2)                           
00639         PAVGB((%0), %%mm1)                            
00640         "movq "MANGLE(b80)", %%mm3              \n\t" 
00641         PAVGB(%%mm2, %%mm3)                           
00642         PAVGB(%%mm1, %%mm3)                           
00643         PAVGB(%%mm2, %%mm3)                           
00644 
00645 
00646         PAVGB((%%REGc, %1), %%mm5)                    
00647         "movq (%%"REG_c", %1, 2), %%mm1         \n\t" 
00648         "pxor %%mm6, %%mm1                      \n\t" 
00649         PAVGB((%0, %1, 4), %%mm1)                     
00650         "movq "MANGLE(b80)", %%mm2              \n\t" 
00651         PAVGB(%%mm5, %%mm2)                           
00652         PAVGB(%%mm1, %%mm2)                           
00653         PAVGB(%%mm5, %%mm2)                           
00654 
00655 
00656         "movq "MANGLE(b00)", %%mm1              \n\t" 
00657         "movq "MANGLE(b00)", %%mm5              \n\t" 
00658         "psubb %%mm2, %%mm1                     \n\t" 
00659         "psubb %%mm3, %%mm5                     \n\t" 
00660         PMAXUB(%%mm1, %%mm2)                          
00661         PMAXUB(%%mm5, %%mm3)                          
00662         PMINUB(%%mm2, %%mm3, %%mm1)                   
00663 
00664 
00665 
00666         "movq "MANGLE(b00)", %%mm7              \n\t" 
00667         "movq %2, %%mm2                         \n\t" 
00668         PAVGB(%%mm6, %%mm2)                           
00669         "psubb %%mm6, %%mm2                     \n\t"
00670 
00671         "movq %%mm4, %%mm1                      \n\t"
00672         "pcmpgtb %%mm7, %%mm1                   \n\t" 
00673         "pxor %%mm1, %%mm4                      \n\t"
00674         "psubb %%mm1, %%mm4                     \n\t" 
00675         "pcmpgtb %%mm4, %%mm2                   \n\t" 
00676         "psubusb %%mm3, %%mm4                   \n\t" 
00677 
00678 
00679         "movq %%mm4, %%mm3                      \n\t" 
00680         "psubusb "MANGLE(b01)", %%mm4           \n\t"
00681         PAVGB(%%mm7, %%mm4)                           
00682         PAVGB(%%mm7, %%mm4)                           
00683         "paddb %%mm3, %%mm4                     \n\t" 
00684         "pand %%mm2, %%mm4                      \n\t"
00685 
00686         "movq "MANGLE(b80)", %%mm5              \n\t" 
00687         "psubb %%mm0, %%mm5                     \n\t" 
00688         "paddsb %%mm6, %%mm5                    \n\t" 
00689         "pcmpgtb %%mm5, %%mm7                   \n\t" 
00690         "pxor %%mm7, %%mm5                      \n\t"
00691 
00692         PMINUB(%%mm5, %%mm4, %%mm3)                   
00693         "pxor %%mm1, %%mm7                      \n\t" 
00694 
00695         "pand %%mm7, %%mm4                      \n\t"
00696         "movq (%%"REG_a", %1, 2), %%mm0         \n\t"
00697         "movq (%0, %1, 4), %%mm2                \n\t"
00698         "pxor %%mm1, %%mm0                      \n\t"
00699         "pxor %%mm1, %%mm2                      \n\t"
00700         "paddb %%mm4, %%mm0                     \n\t"
00701         "psubb %%mm4, %%mm2                     \n\t"
00702         "pxor %%mm1, %%mm0                      \n\t"
00703         "pxor %%mm1, %%mm2                      \n\t"
00704         "movq %%mm0, (%%"REG_a", %1, 2)         \n\t"
00705         "movq %%mm2, (%0, %1, 4)                \n\t"
00706 
00707         :
00708         : "r" (src), "r" ((x86_reg)stride), "m" (c->pQPb)
00709         : "%"REG_a, "%"REG_c
00710     );
00711 
00712 
00713 
00714 
00715 
00716 
00717 
00718 
00719 
00720 
00721 
00722 
00723 
00724 
00725 
00726 
00727 
00728 
00729 
00730 
00731 
00732 
00733 
00734 
00735 
00736 
00737 
00738 
00739 
00740 
00741 
00742 
00743 
00744 
00745 
00746 
00747 
00748 
00749 
00750 
00751 
00752 
00753 
00754 
00755 
00756 
00757 
00758 
00759 
00760 
00761 
00762 
00763 
00764 
00765 
00766 
00767 #elif HAVE_MMX
00768     DECLARE_ALIGNED(8, uint64_t, tmp)[4]; 
00769     src+= stride*4;
00770     __asm__ volatile(
00771         "pxor %%mm7, %%mm7                      \n\t"
00772 
00773 
00774 
00775 
00776         "movq (%0), %%mm0                       \n\t"
00777         "movq %%mm0, %%mm1                      \n\t"
00778         "punpcklbw %%mm7, %%mm0                 \n\t" 
00779         "punpckhbw %%mm7, %%mm1                 \n\t" 
00780 
00781         "movq (%0, %1), %%mm2                   \n\t"
00782         "lea (%0, %1, 2), %%"REG_a"             \n\t"
00783         "movq %%mm2, %%mm3                      \n\t"
00784         "punpcklbw %%mm7, %%mm2                 \n\t" 
00785         "punpckhbw %%mm7, %%mm3                 \n\t" 
00786 
00787         "movq (%%"REG_a"), %%mm4                \n\t"
00788         "movq %%mm4, %%mm5                      \n\t"
00789         "punpcklbw %%mm7, %%mm4                 \n\t" 
00790         "punpckhbw %%mm7, %%mm5                 \n\t" 
00791 
00792         "paddw %%mm0, %%mm0                     \n\t" 
00793         "paddw %%mm1, %%mm1                     \n\t" 
00794         "psubw %%mm4, %%mm2                     \n\t" 
00795         "psubw %%mm5, %%mm3                     \n\t" 
00796         "psubw %%mm2, %%mm0                     \n\t" 
00797         "psubw %%mm3, %%mm1                     \n\t" 
00798 
00799         "psllw $2, %%mm2                        \n\t" 
00800         "psllw $2, %%mm3                        \n\t" 
00801         "psubw %%mm2, %%mm0                     \n\t" 
00802         "psubw %%mm3, %%mm1                     \n\t" 
00803 
00804         "movq (%%"REG_a", %1), %%mm2            \n\t"
00805         "movq %%mm2, %%mm3                      \n\t"
00806         "punpcklbw %%mm7, %%mm2                 \n\t" 
00807         "punpckhbw %%mm7, %%mm3                 \n\t" 
00808 
00809         "psubw %%mm2, %%mm0                     \n\t" 
00810         "psubw %%mm3, %%mm1                     \n\t" 
00811         "psubw %%mm2, %%mm0                     \n\t" 
00812         "psubw %%mm3, %%mm1                     \n\t" 
00813         "movq %%mm0, (%3)                       \n\t" 
00814         "movq %%mm1, 8(%3)                      \n\t" 
00815 
00816         "movq (%%"REG_a", %1, 2), %%mm0         \n\t"
00817         "movq %%mm0, %%mm1                      \n\t"
00818         "punpcklbw %%mm7, %%mm0                 \n\t" 
00819         "punpckhbw %%mm7, %%mm1                 \n\t" 
00820 
00821         "psubw %%mm0, %%mm2                     \n\t" 
00822         "psubw %%mm1, %%mm3                     \n\t" 
00823         "movq %%mm2, 16(%3)                     \n\t" 
00824         "movq %%mm3, 24(%3)                     \n\t" 
00825         "paddw %%mm4, %%mm4                     \n\t" 
00826         "paddw %%mm5, %%mm5                     \n\t" 
00827         "psubw %%mm2, %%mm4                     \n\t" 
00828         "psubw %%mm3, %%mm5                     \n\t" 
00829 
00830         "lea (%%"REG_a", %1), %0                \n\t"
00831         "psllw $2, %%mm2                        \n\t" 
00832         "psllw $2, %%mm3                        \n\t" 
00833         "psubw %%mm2, %%mm4                     \n\t" 
00834         "psubw %%mm3, %%mm5                     \n\t" 
00835 
00836         "movq (%0, %1, 2), %%mm2                \n\t"
00837         "movq %%mm2, %%mm3                      \n\t"
00838         "punpcklbw %%mm7, %%mm2                 \n\t" 
00839         "punpckhbw %%mm7, %%mm3                 \n\t" 
00840         "psubw %%mm2, %%mm4                     \n\t" 
00841         "psubw %%mm3, %%mm5                     \n\t" 
00842         "psubw %%mm2, %%mm4                     \n\t" 
00843         "psubw %%mm3, %%mm5                     \n\t" 
00844 
00845         "movq (%%"REG_a", %1, 4), %%mm6         \n\t"
00846         "punpcklbw %%mm7, %%mm6                 \n\t" 
00847         "psubw %%mm6, %%mm2                     \n\t" 
00848         "movq (%%"REG_a", %1, 4), %%mm6         \n\t"
00849         "punpckhbw %%mm7, %%mm6                 \n\t" 
00850         "psubw %%mm6, %%mm3                     \n\t" 
00851 
00852         "paddw %%mm0, %%mm0                     \n\t" 
00853         "paddw %%mm1, %%mm1                     \n\t" 
00854         "psubw %%mm2, %%mm0                     \n\t" 
00855         "psubw %%mm3, %%mm1                     \n\t" 
00856 
00857         "psllw $2, %%mm2                        \n\t" 
00858         "psllw $2, %%mm3                        \n\t" 
00859         "psubw %%mm2, %%mm0                     \n\t" 
00860         "psubw %%mm3, %%mm1                     \n\t" 
00861 
00862         "movq (%0, %1, 4), %%mm2                \n\t"
00863         "movq %%mm2, %%mm3                      \n\t"
00864         "punpcklbw %%mm7, %%mm2                 \n\t" 
00865         "punpckhbw %%mm7, %%mm3                 \n\t" 
00866 
00867         "paddw %%mm2, %%mm2                     \n\t" 
00868         "paddw %%mm3, %%mm3                     \n\t" 
00869         "psubw %%mm2, %%mm0                     \n\t" 
00870         "psubw %%mm3, %%mm1                     \n\t" 
00871 
00872         "movq (%3), %%mm2                       \n\t" 
00873         "movq 8(%3), %%mm3                      \n\t" 
00874 
00875 #if HAVE_MMX2
00876         "movq %%mm7, %%mm6                      \n\t" 
00877         "psubw %%mm0, %%mm6                     \n\t"
00878         "pmaxsw %%mm6, %%mm0                    \n\t" 
00879         "movq %%mm7, %%mm6                      \n\t" 
00880         "psubw %%mm1, %%mm6                     \n\t"
00881         "pmaxsw %%mm6, %%mm1                    \n\t" 
00882         "movq %%mm7, %%mm6                      \n\t" 
00883         "psubw %%mm2, %%mm6                     \n\t"
00884         "pmaxsw %%mm6, %%mm2                    \n\t" 
00885         "movq %%mm7, %%mm6                      \n\t" 
00886         "psubw %%mm3, %%mm6                     \n\t"
00887         "pmaxsw %%mm6, %%mm3                    \n\t" 
00888 #else
00889         "movq %%mm7, %%mm6                      \n\t" 
00890         "pcmpgtw %%mm0, %%mm6                   \n\t"
00891         "pxor %%mm6, %%mm0                      \n\t"
00892         "psubw %%mm6, %%mm0                     \n\t" 
00893         "movq %%mm7, %%mm6                      \n\t" 
00894         "pcmpgtw %%mm1, %%mm6                   \n\t"
00895         "pxor %%mm6, %%mm1                      \n\t"
00896         "psubw %%mm6, %%mm1                     \n\t" 
00897         "movq %%mm7, %%mm6                      \n\t" 
00898         "pcmpgtw %%mm2, %%mm6                   \n\t"
00899         "pxor %%mm6, %%mm2                      \n\t"
00900         "psubw %%mm6, %%mm2                     \n\t" 
00901         "movq %%mm7, %%mm6                      \n\t" 
00902         "pcmpgtw %%mm3, %%mm6                   \n\t"
00903         "pxor %%mm6, %%mm3                      \n\t"
00904         "psubw %%mm6, %%mm3                     \n\t" 
00905 #endif
00906 
00907 #if HAVE_MMX2
00908         "pminsw %%mm2, %%mm0                    \n\t"
00909         "pminsw %%mm3, %%mm1                    \n\t"
00910 #else
00911         "movq %%mm0, %%mm6                      \n\t"
00912         "psubusw %%mm2, %%mm6                   \n\t"
00913         "psubw %%mm6, %%mm0                     \n\t"
00914         "movq %%mm1, %%mm6                      \n\t"
00915         "psubusw %%mm3, %%mm6                   \n\t"
00916         "psubw %%mm6, %%mm1                     \n\t"
00917 #endif
00918 
00919         "movd %2, %%mm2                         \n\t" 
00920         "punpcklbw %%mm7, %%mm2                 \n\t"
00921 
00922         "movq %%mm7, %%mm6                      \n\t" 
00923         "pcmpgtw %%mm4, %%mm6                   \n\t" 
00924         "pxor %%mm6, %%mm4                      \n\t"
00925         "psubw %%mm6, %%mm4                     \n\t" 
00926         "pcmpgtw %%mm5, %%mm7                   \n\t" 
00927         "pxor %%mm7, %%mm5                      \n\t"
00928         "psubw %%mm7, %%mm5                     \n\t" 
00929 
00930         "psllw $3, %%mm2                        \n\t" 
00931         "movq %%mm2, %%mm3                      \n\t" 
00932         "pcmpgtw %%mm4, %%mm2                   \n\t"
00933         "pcmpgtw %%mm5, %%mm3                   \n\t"
00934         "pand %%mm2, %%mm4                      \n\t"
00935         "pand %%mm3, %%mm5                      \n\t"
00936 
00937 
00938         "psubusw %%mm0, %%mm4                   \n\t" 
00939         "psubusw %%mm1, %%mm5                   \n\t" 
00940 
00941 
00942         "movq "MANGLE(w05)", %%mm2              \n\t" 
00943         "pmullw %%mm2, %%mm4                    \n\t"
00944         "pmullw %%mm2, %%mm5                    \n\t"
00945         "movq "MANGLE(w20)", %%mm2              \n\t" 
00946         "paddw %%mm2, %%mm4                     \n\t"
00947         "paddw %%mm2, %%mm5                     \n\t"
00948         "psrlw $6, %%mm4                        \n\t"
00949         "psrlw $6, %%mm5                        \n\t"
00950 
00951         "movq 16(%3), %%mm0                     \n\t" 
00952         "movq 24(%3), %%mm1                     \n\t" 
00953 
00954         "pxor %%mm2, %%mm2                      \n\t"
00955         "pxor %%mm3, %%mm3                      \n\t"
00956 
00957         "pcmpgtw %%mm0, %%mm2                   \n\t" 
00958         "pcmpgtw %%mm1, %%mm3                   \n\t" 
00959         "pxor %%mm2, %%mm0                      \n\t"
00960         "pxor %%mm3, %%mm1                      \n\t"
00961         "psubw %%mm2, %%mm0                     \n\t" 
00962         "psubw %%mm3, %%mm1                     \n\t" 
00963         "psrlw $1, %%mm0                        \n\t" 
00964         "psrlw $1, %%mm1                        \n\t" 
00965 
00966         "pxor %%mm6, %%mm2                      \n\t"
00967         "pxor %%mm7, %%mm3                      \n\t"
00968         "pand %%mm2, %%mm4                      \n\t"
00969         "pand %%mm3, %%mm5                      \n\t"
00970 
00971 #if HAVE_MMX2
00972         "pminsw %%mm0, %%mm4                    \n\t"
00973         "pminsw %%mm1, %%mm5                    \n\t"
00974 #else
00975         "movq %%mm4, %%mm2                      \n\t"
00976         "psubusw %%mm0, %%mm2                   \n\t"
00977         "psubw %%mm2, %%mm4                     \n\t"
00978         "movq %%mm5, %%mm2                      \n\t"
00979         "psubusw %%mm1, %%mm2                   \n\t"
00980         "psubw %%mm2, %%mm5                     \n\t"
00981 #endif
00982         "pxor %%mm6, %%mm4                      \n\t"
00983         "pxor %%mm7, %%mm5                      \n\t"
00984         "psubw %%mm6, %%mm4                     \n\t"
00985         "psubw %%mm7, %%mm5                     \n\t"
00986         "packsswb %%mm5, %%mm4                  \n\t"
00987         "movq (%0), %%mm0                       \n\t"
00988         "paddb   %%mm4, %%mm0                   \n\t"
00989         "movq %%mm0, (%0)                       \n\t"
00990         "movq (%0, %1), %%mm0                   \n\t"
00991         "psubb %%mm4, %%mm0                     \n\t"
00992         "movq %%mm0, (%0, %1)                   \n\t"
00993 
00994         : "+r" (src)
00995         : "r" ((x86_reg)stride), "m" (c->pQPb), "r"(tmp)
00996         : "%"REG_a
00997     );
00998 #else //HAVE_MMX2 || HAVE_AMD3DNOW
00999     const int l1= stride;
01000     const int l2= stride + l1;
01001     const int l3= stride + l2;
01002     const int l4= stride + l3;
01003     const int l5= stride + l4;
01004     const int l6= stride + l5;
01005     const int l7= stride + l6;
01006     const int l8= stride + l7;
01007 
01008     int x;
01009     src+= stride*3;
01010     for(x=0; x<BLOCK_SIZE; x++){
01011         const int middleEnergy= 5*(src[l5] - src[l4]) + 2*(src[l3] - src[l6]);
01012         if(FFABS(middleEnergy) < 8*c->QP){
01013             const int q=(src[l4] - src[l5])/2;
01014             const int leftEnergy=  5*(src[l3] - src[l2]) + 2*(src[l1] - src[l4]);
01015             const int rightEnergy= 5*(src[l7] - src[l6]) + 2*(src[l5] - src[l8]);
01016 
01017             int d= FFABS(middleEnergy) - FFMIN( FFABS(leftEnergy), FFABS(rightEnergy) );
01018             d= FFMAX(d, 0);
01019 
01020             d= (5*d + 32) >> 6;
01021             d*= FFSIGN(-middleEnergy);
01022 
01023             if(q>0){
01024                 d= d<0 ? 0 : d;
01025                 d= d>q ? q : d;
01026             }else{
01027                 d= d>0 ? 0 : d;
01028                 d= d<q ? q : d;
01029             }
01030 
01031             src[l4]-= d;
01032             src[l5]+= d;
01033         }
01034         src++;
01035     }
01036 #endif //HAVE_MMX2 || HAVE_AMD3DNOW
01037 }
01038 #endif //HAVE_ALTIVEC
01039 
01040 #if !HAVE_ALTIVEC
01041 static inline void RENAME(dering)(uint8_t src[], int stride, PPContext *c)
01042 {
01043 #if HAVE_MMX2 || HAVE_AMD3DNOW
01044     DECLARE_ALIGNED(8, uint64_t, tmp)[3];
01045     __asm__ volatile(
01046         "pxor %%mm6, %%mm6                      \n\t"
01047         "pcmpeqb %%mm7, %%mm7                   \n\t"
01048         "movq %2, %%mm0                         \n\t"
01049         "punpcklbw %%mm6, %%mm0                 \n\t"
01050         "psrlw $1, %%mm0                        \n\t"
01051         "psubw %%mm7, %%mm0                     \n\t"
01052         "packuswb %%mm0, %%mm0                  \n\t"
01053         "movq %%mm0, %3                         \n\t"
01054 
01055         "lea (%0, %1), %%"REG_a"                \n\t"
01056         "lea (%%"REG_a", %1, 4), %%"REG_d"      \n\t"
01057 
01058 
01059 
01060 
01061 #undef REAL_FIND_MIN_MAX
01062 #undef FIND_MIN_MAX
01063 #if HAVE_MMX2
01064 #define REAL_FIND_MIN_MAX(addr)\
01065         "movq " #addr ", %%mm0                  \n\t"\
01066         "pminub %%mm0, %%mm7                    \n\t"\
01067         "pmaxub %%mm0, %%mm6                    \n\t"
01068 #else
01069 #define REAL_FIND_MIN_MAX(addr)\
01070         "movq " #addr ", %%mm0                  \n\t"\
01071         "movq %%mm7, %%mm1                      \n\t"\
01072         "psubusb %%mm0, %%mm6                   \n\t"\
01073         "paddb %%mm0, %%mm6                     \n\t"\
01074         "psubusb %%mm0, %%mm1                   \n\t"\
01075         "psubb %%mm1, %%mm7                     \n\t"
01076 #endif
01077 #define FIND_MIN_MAX(addr)  REAL_FIND_MIN_MAX(addr)
01078 
01079 FIND_MIN_MAX((%%REGa))
01080 FIND_MIN_MAX((%%REGa, %1))
01081 FIND_MIN_MAX((%%REGa, %1, 2))
01082 FIND_MIN_MAX((%0, %1, 4))
01083 FIND_MIN_MAX((%%REGd))
01084 FIND_MIN_MAX((%%REGd, %1))
01085 FIND_MIN_MAX((%%REGd, %1, 2))
01086 FIND_MIN_MAX((%0, %1, 8))
01087 
01088         "movq %%mm7, %%mm4                      \n\t"
01089         "psrlq $8, %%mm7                        \n\t"
01090 #if HAVE_MMX2
01091         "pminub %%mm4, %%mm7                    \n\t" 
01092         "pshufw $0xF9, %%mm7, %%mm4             \n\t"
01093         "pminub %%mm4, %%mm7                    \n\t" 
01094         "pshufw $0xFE, %%mm7, %%mm4             \n\t"
01095         "pminub %%mm4, %%mm7                    \n\t"
01096 #else
01097         "movq %%mm7, %%mm1                      \n\t"
01098         "psubusb %%mm4, %%mm1                   \n\t"
01099         "psubb %%mm1, %%mm7                     \n\t"
01100         "movq %%mm7, %%mm4                      \n\t"
01101         "psrlq $16, %%mm7                       \n\t"
01102         "movq %%mm7, %%mm1                      \n\t"
01103         "psubusb %%mm4, %%mm1                   \n\t"
01104         "psubb %%mm1, %%mm7                     \n\t"
01105         "movq %%mm7, %%mm4                      \n\t"
01106         "psrlq $32, %%mm7                       \n\t"
01107         "movq %%mm7, %%mm1                      \n\t"
01108         "psubusb %%mm4, %%mm1                   \n\t"
01109         "psubb %%mm1, %%mm7                     \n\t"
01110 #endif
01111 
01112 
01113         "movq %%mm6, %%mm4                      \n\t"
01114         "psrlq $8, %%mm6                        \n\t"
01115 #if HAVE_MMX2
01116         "pmaxub %%mm4, %%mm6                    \n\t" 
01117         "pshufw $0xF9, %%mm6, %%mm4             \n\t"
01118         "pmaxub %%mm4, %%mm6                    \n\t"
01119         "pshufw $0xFE, %%mm6, %%mm4             \n\t"
01120         "pmaxub %%mm4, %%mm6                    \n\t"
01121 #else
01122         "psubusb %%mm4, %%mm6                   \n\t"
01123         "paddb %%mm4, %%mm6                     \n\t"
01124         "movq %%mm6, %%mm4                      \n\t"
01125         "psrlq $16, %%mm6                       \n\t"
01126         "psubusb %%mm4, %%mm6                   \n\t"
01127         "paddb %%mm4, %%mm6                     \n\t"
01128         "movq %%mm6, %%mm4                      \n\t"
01129         "psrlq $32, %%mm6                       \n\t"
01130         "psubusb %%mm4, %%mm6                   \n\t"
01131         "paddb %%mm4, %%mm6                     \n\t"
01132 #endif
01133         "movq %%mm6, %%mm0                      \n\t" 
01134         "psubb %%mm7, %%mm6                     \n\t" 
01135         "push %4                              \n\t"
01136         "movd %%mm6, %k4                        \n\t"
01137         "cmpb "MANGLE(deringThreshold)", %b4    \n\t"
01138         "pop %4                               \n\t"
01139         " jb 1f                                 \n\t"
01140         PAVGB(%%mm0, %%mm7)                           
01141         "punpcklbw %%mm7, %%mm7                 \n\t"
01142         "punpcklbw %%mm7, %%mm7                 \n\t"
01143         "punpcklbw %%mm7, %%mm7                 \n\t"
01144         "movq %%mm7, (%4)                       \n\t"
01145 
01146         "movq (%0), %%mm0                       \n\t" 
01147         "movq %%mm0, %%mm1                      \n\t" 
01148         "movq %%mm0, %%mm2                      \n\t" 
01149         "psllq $8, %%mm1                        \n\t"
01150         "psrlq $8, %%mm2                        \n\t"
01151         "movd -4(%0), %%mm3                     \n\t"
01152         "movd 8(%0), %%mm4                      \n\t"
01153         "psrlq $24, %%mm3                       \n\t"
01154         "psllq $56, %%mm4                       \n\t"
01155         "por %%mm3, %%mm1                       \n\t" 
01156         "por %%mm4, %%mm2                       \n\t" 
01157         "movq %%mm1, %%mm3                      \n\t" 
01158         PAVGB(%%mm2, %%mm1)                           
01159         PAVGB(%%mm0, %%mm1)                           
01160         "psubusb %%mm7, %%mm0                   \n\t"
01161         "psubusb %%mm7, %%mm2                   \n\t"
01162         "psubusb %%mm7, %%mm3                   \n\t"
01163         "pcmpeqb "MANGLE(b00)", %%mm0           \n\t" 
01164         "pcmpeqb "MANGLE(b00)", %%mm2           \n\t" 
01165         "pcmpeqb "MANGLE(b00)", %%mm3           \n\t" 
01166         "paddb %%mm2, %%mm0                     \n\t"
01167         "paddb %%mm3, %%mm0                     \n\t"
01168 
01169         "movq (%%"REG_a"), %%mm2                \n\t" 
01170         "movq %%mm2, %%mm3                      \n\t" 
01171         "movq %%mm2, %%mm4                      \n\t" 
01172         "psllq $8, %%mm3                        \n\t"
01173         "psrlq $8, %%mm4                        \n\t"
01174         "movd -4(%%"REG_a"), %%mm5              \n\t"
01175         "movd 8(%%"REG_a"), %%mm6               \n\t"
01176         "psrlq $24, %%mm5                       \n\t"
01177         "psllq $56, %%mm6                       \n\t"
01178         "por %%mm5, %%mm3                       \n\t" 
01179         "por %%mm6, %%mm4                       \n\t" 
01180         "movq %%mm3, %%mm5                      \n\t" 
01181         PAVGB(%%mm4, %%mm3)                           
01182         PAVGB(%%mm2, %%mm3)                           
01183         "psubusb %%mm7, %%mm2                   \n\t"
01184         "psubusb %%mm7, %%mm4                   \n\t"
01185         "psubusb %%mm7, %%mm5                   \n\t"
01186         "pcmpeqb "MANGLE(b00)", %%mm2           \n\t" 
01187         "pcmpeqb "MANGLE(b00)", %%mm4           \n\t" 
01188         "pcmpeqb "MANGLE(b00)", %%mm5           \n\t" 
01189         "paddb %%mm4, %%mm2                     \n\t"
01190         "paddb %%mm5, %%mm2                     \n\t"
01191 
01192 #define REAL_DERING_CORE(dst,src,ppsx,psx,sx,pplx,plx,lx,t0,t1) \
01193         "movq " #src ", " #sx "                 \n\t" \
01194         "movq " #sx ", " #lx "                  \n\t" \
01195         "movq " #sx ", " #t0 "                  \n\t" \
01196         "psllq $8, " #lx "                      \n\t"\
01197         "psrlq $8, " #t0 "                      \n\t"\
01198         "movd -4" #src ", " #t1 "               \n\t"\
01199         "psrlq $24, " #t1 "                     \n\t"\
01200         "por " #t1 ", " #lx "                   \n\t" \
01201         "movd 8" #src ", " #t1 "                \n\t"\
01202         "psllq $56, " #t1 "                     \n\t"\
01203         "por " #t1 ", " #t0 "                   \n\t" \
01204         "movq " #lx ", " #t1 "                  \n\t" \
01205         PAVGB(t0, lx)                                 \
01206         PAVGB(sx, lx)                                 \
01207         PAVGB(lx, pplx)                                     \
01208         "movq " #lx ", 8(%4)                    \n\t"\
01209         "movq (%4), " #lx "                     \n\t"\
01210         "psubusb " #lx ", " #t1 "               \n\t"\
01211         "psubusb " #lx ", " #t0 "               \n\t"\
01212         "psubusb " #lx ", " #sx "               \n\t"\
01213         "movq "MANGLE(b00)", " #lx "            \n\t"\
01214         "pcmpeqb " #lx ", " #t1 "               \n\t" \
01215         "pcmpeqb " #lx ", " #t0 "               \n\t" \
01216         "pcmpeqb " #lx ", " #sx "               \n\t" \
01217         "paddb " #t1 ", " #t0 "                 \n\t"\
01218         "paddb " #t0 ", " #sx "                 \n\t"\
01219 \
01220         PAVGB(plx, pplx)                              \
01221         "movq " #dst ", " #t0 "                 \n\t" \
01222         "movq " #t0 ", " #t1 "                  \n\t" \
01223         "psubusb %3, " #t0 "                    \n\t"\
01224         "paddusb %3, " #t1 "                    \n\t"\
01225         PMAXUB(t0, pplx)\
01226         PMINUB(t1, pplx, t0)\
01227         "paddb " #sx ", " #ppsx "               \n\t"\
01228         "paddb " #psx ", " #ppsx "              \n\t"\
01229         "#paddb "MANGLE(b02)", " #ppsx "        \n\t"\
01230         "pand "MANGLE(b08)", " #ppsx "          \n\t"\
01231         "pcmpeqb " #lx ", " #ppsx "             \n\t"\
01232         "pand " #ppsx ", " #pplx "              \n\t"\
01233         "pandn " #dst ", " #ppsx "              \n\t"\
01234         "por " #pplx ", " #ppsx "               \n\t"\
01235         "movq " #ppsx ", " #dst "               \n\t"\
01236         "movq 8(%4), " #lx "                    \n\t"
01237 
01238 #define DERING_CORE(dst,src,ppsx,psx,sx,pplx,plx,lx,t0,t1) \
01239    REAL_DERING_CORE(dst,src,ppsx,psx,sx,pplx,plx,lx,t0,t1)
01240 
01241 
01242 
01243 
01244 
01245 
01246 
01247 
01248 
01249 
01250 
01251 
01252 
01253 
01254 
01255 
01256 DERING_CORE((%%REGa)       ,(%%REGa, %1)   ,%%mm0,%%mm2,%%mm4,%%mm1,%%mm3,%%mm5,%%mm6,%%mm7)
01257 DERING_CORE((%%REGa, %1)   ,(%%REGa, %1, 2),%%mm2,%%mm4,%%mm0,%%mm3,%%mm5,%%mm1,%%mm6,%%mm7)
01258 DERING_CORE((%%REGa, %1, 2),(%0, %1, 4)    ,%%mm4,%%mm0,%%mm2,%%mm5,%%mm1,%%mm3,%%mm6,%%mm7)
01259 DERING_CORE((%0, %1, 4)    ,(%%REGd)       ,%%mm0,%%mm2,%%mm4,%%mm1,%%mm3,%%mm5,%%mm6,%%mm7)
01260 DERING_CORE((%%REGd)       ,(%%REGd, %1)   ,%%mm2,%%mm4,%%mm0,%%mm3,%%mm5,%%mm1,%%mm6,%%mm7)
01261 DERING_CORE((%%REGd, %1)   ,(%%REGd, %1, 2),%%mm4,%%mm0,%%mm2,%%mm5,%%mm1,%%mm3,%%mm6,%%mm7)
01262 DERING_CORE((%%REGd, %1, 2),(%0, %1, 8)    ,%%mm0,%%mm2,%%mm4,%%mm1,%%mm3,%%mm5,%%mm6,%%mm7)
01263 DERING_CORE((%0, %1, 8)    ,(%%REGd, %1, 4),%%mm2,%%mm4,%%mm0,%%mm3,%%mm5,%%mm1,%%mm6,%%mm7)
01264 
01265         "1:                        \n\t"
01266         : : "r" (src), "r" ((x86_reg)stride), "m" (c->pQPb), "m"(c->pQPb2), "q"(tmp)
01267         : "%"REG_a, "%"REG_d
01268     );
01269 #else //HAVE_MMX2 || HAVE_AMD3DNOW
01270     int y;
01271     int min=255;
01272     int max=0;
01273     int avg;
01274     uint8_t *p;
01275     int s[10];
01276     const int QP2= c->QP/2 + 1;
01277 
01278     for(y=1; y<9; y++){
01279         int x;
01280         p= src + stride*y;
01281         for(x=1; x<9; x++){
01282             p++;
01283             if(*p > max) max= *p;
01284             if(*p < min) min= *p;
01285         }
01286     }
01287     avg= (min + max + 1)>>1;
01288 
01289     if(max - min <deringThreshold) return;
01290 
01291     for(y=0; y<10; y++){
01292         int t = 0;
01293 
01294         if(src[stride*y + 0] > avg) t+= 1;
01295         if(src[stride*y + 1] > avg) t+= 2;
01296         if(src[stride*y + 2] > avg) t+= 4;
01297         if(src[stride*y + 3] > avg) t+= 8;
01298         if(src[stride*y + 4] > avg) t+= 16;
01299         if(src[stride*y + 5] > avg) t+= 32;
01300         if(src[stride*y + 6] > avg) t+= 64;
01301         if(src[stride*y + 7] > avg) t+= 128;
01302         if(src[stride*y + 8] > avg) t+= 256;
01303         if(src[stride*y + 9] > avg) t+= 512;
01304 
01305         t |= (~t)<<16;
01306         t &= (t<<1) & (t>>1);
01307         s[y] = t;
01308     }
01309 
01310     for(y=1; y<9; y++){
01311         int t = s[y-1] & s[y] & s[y+1];
01312         t|= t>>16;
01313         s[y-1]= t;
01314     }
01315 
01316     for(y=1; y<9; y++){
01317         int x;
01318         int t = s[y-1];
01319 
01320         p= src + stride*y;
01321         for(x=1; x<9; x++){
01322             p++;
01323             if(t & (1<<x)){
01324                 int f= (*(p-stride-1)) + 2*(*(p-stride)) + (*(p-stride+1))
01325                       +2*(*(p     -1)) + 4*(*p         ) + 2*(*(p     +1))
01326                       +(*(p+stride-1)) + 2*(*(p+stride)) + (*(p+stride+1));
01327                 f= (f + 8)>>4;
01328 
01329 #ifdef DEBUG_DERING_THRESHOLD
01330                     __asm__ volatile("emms\n\t":);
01331                     {
01332                     static long long numPixels=0;
01333                     if(x!=1 && x!=8 && y!=1 && y!=8) numPixels++;
01334 
01335 
01336 
01337                     if(max-min < 20){
01338                         static int numSkipped=0;
01339                         static int errorSum=0;
01340                         static int worstQP=0;
01341                         static int worstRange=0;
01342                         static int worstDiff=0;
01343                         int diff= (f - *p);
01344                         int absDiff= FFABS(diff);
01345                         int error= diff*diff;
01346 
01347                         if(x==1 || x==8 || y==1 || y==8) continue;
01348 
01349                         numSkipped++;
01350                         if(absDiff > worstDiff){
01351                             worstDiff= absDiff;
01352                             worstQP= QP;
01353                             worstRange= max-min;
01354                         }
01355                         errorSum+= error;
01356 
01357                         if(1024LL*1024LL*1024LL % numSkipped == 0){
01358                             av_log(c, AV_LOG_INFO, "sum:%1.3f, skip:%d, wQP:%d, "
01359                                    "wRange:%d, wDiff:%d, relSkip:%1.3f\n",
01360                                    (float)errorSum/numSkipped, numSkipped, worstQP, worstRange,
01361                                    worstDiff, (float)numSkipped/numPixels);
01362                         }
01363                     }
01364                     }
01365 #endif
01366                     if     (*p + QP2 < f) *p= *p + QP2;
01367                     else if(*p - QP2 > f) *p= *p - QP2;
01368                     else *p=f;
01369             }
01370         }
01371     }
01372 #ifdef DEBUG_DERING_THRESHOLD
01373     if(max-min < 20){
01374         for(y=1; y<9; y++){
01375             int x;
01376             int t = 0;
01377             p= src + stride*y;
01378             for(x=1; x<9; x++){
01379                 p++;
01380                 *p = FFMIN(*p + 20, 255);
01381             }
01382         }
01383 
01384     }
01385 #endif
01386 #endif //HAVE_MMX2 || HAVE_AMD3DNOW
01387 }
01388 #endif //HAVE_ALTIVEC
01389 
01396 static inline void RENAME(deInterlaceInterpolateLinear)(uint8_t src[], int stride)
01397 {
01398 #if HAVE_MMX2 || HAVE_AMD3DNOW
01399     src+= 4*stride;
01400     __asm__ volatile(
01401         "lea (%0, %1), %%"REG_a"                \n\t"
01402         "lea (%%"REG_a", %1, 4), %%"REG_c"      \n\t"
01403 
01404 
01405 
01406         "movq (%0), %%mm0                       \n\t"
01407         "movq (%%"REG_a", %1), %%mm1            \n\t"
01408         PAVGB(%%mm1, %%mm0)
01409         "movq %%mm0, (%%"REG_a")                \n\t"
01410         "movq (%0, %1, 4), %%mm0                \n\t"
01411         PAVGB(%%mm0, %%mm1)
01412         "movq %%mm1, (%%"REG_a", %1, 2)         \n\t"
01413         "movq (%%"REG_c", %1), %%mm1            \n\t"
01414         PAVGB(%%mm1, %%mm0)
01415         "movq %%mm0, (%%"REG_c")                \n\t"
01416         "movq (%0, %1, 8), %%mm0                \n\t"
01417         PAVGB(%%mm0, %%mm1)
01418         "movq %%mm1, (%%"REG_c", %1, 2)         \n\t"
01419 
01420         : : "r" (src), "r" ((x86_reg)stride)
01421         : "%"REG_a, "%"REG_c
01422     );
01423 #else
01424     int a, b, x;
01425     src+= 4*stride;
01426 
01427     for(x=0; x<2; x++){
01428         a= *(uint32_t*)&src[stride*0];
01429         b= *(uint32_t*)&src[stride*2];
01430         *(uint32_t*)&src[stride*1]= (a|b) - (((a^b)&0xFEFEFEFEUL)>>1);
01431         a= *(uint32_t*)&src[stride*4];
01432         *(uint32_t*)&src[stride*3]= (a|b) - (((a^b)&0xFEFEFEFEUL)>>1);
01433         b= *(uint32_t*)&src[stride*6];
01434         *(uint32_t*)&src[stride*5]= (a|b) - (((a^b)&0xFEFEFEFEUL)>>1);
01435         a= *(uint32_t*)&src[stride*8];
01436         *(uint32_t*)&src[stride*7]= (a|b) - (((a^b)&0xFEFEFEFEUL)>>1);
01437         src += 4;
01438     }
01439 #endif
01440 }
01441 
01449 static inline void RENAME(deInterlaceInterpolateCubic)(uint8_t src[], int stride)
01450 {
01451 #if HAVE_MMX2 || HAVE_AMD3DNOW
01452     src+= stride*3;
01453     __asm__ volatile(
01454         "lea (%0, %1), %%"REG_a"                \n\t"
01455         "lea (%%"REG_a", %1, 4), %%"REG_d"      \n\t"
01456         "lea (%%"REG_d", %1, 4), %%"REG_c"      \n\t"
01457         "add %1, %%"REG_c"                      \n\t"
01458         "pxor %%mm7, %%mm7                      \n\t"
01459 
01460 
01461 
01462 #define REAL_DEINT_CUBIC(a,b,c,d,e)\
01463         "movq " #a ", %%mm0                     \n\t"\
01464         "movq " #b ", %%mm1                     \n\t"\
01465         "movq " #d ", %%mm2                     \n\t"\
01466         "movq " #e ", %%mm3                     \n\t"\
01467         PAVGB(%%mm2, %%mm1)                             \
01468         PAVGB(%%mm3, %%mm0)                             \
01469         "movq %%mm0, %%mm2                      \n\t"\
01470         "punpcklbw %%mm7, %%mm0                 \n\t"\
01471         "punpckhbw %%mm7, %%mm2                 \n\t"\
01472         "movq %%mm1, %%mm3                      \n\t"\
01473         "punpcklbw %%mm7, %%mm1                 \n\t"\
01474         "punpckhbw %%mm7, %%mm3                 \n\t"\
01475         "psubw %%mm1, %%mm0                     \n\t"   \
01476         "psubw %%mm3, %%mm2                     \n\t"   \
01477         "psraw $3, %%mm0                        \n\t"   \
01478         "psraw $3, %%mm2                        \n\t"   \
01479         "psubw %%mm0, %%mm1                     \n\t"   \
01480         "psubw %%mm2, %%mm3                     \n\t"   \
01481         "packuswb %%mm3, %%mm1                  \n\t"\
01482         "movq %%mm1, " #c "                     \n\t"
01483 #define DEINT_CUBIC(a,b,c,d,e)  REAL_DEINT_CUBIC(a,b,c,d,e)
01484 
01485 DEINT_CUBIC((%0)        , (%%REGa, %1), (%%REGa, %1, 2), (%0, %1, 4) , (%%REGd, %1))
01486 DEINT_CUBIC((%%REGa, %1), (%0, %1, 4) , (%%REGd)       , (%%REGd, %1), (%0, %1, 8))
01487 DEINT_CUBIC((%0, %1, 4) , (%%REGd, %1), (%%REGd, %1, 2), (%0, %1, 8) , (%%REGc))
01488 DEINT_CUBIC((%%REGd, %1), (%0, %1, 8) , (%%REGd, %1, 4), (%%REGc)    , (%%REGc, %1, 2))
01489 
01490         : : "r" (src), "r" ((x86_reg)stride)
01491         : "%"REG_a, "%"REG_d, "%"REG_c
01492     );
01493 #else //HAVE_MMX2 || HAVE_AMD3DNOW
01494     int x;
01495     src+= stride*3;
01496     for(x=0; x<8; x++){
01497         src[stride*3] = CLIP((-src[0]        + 9*src[stride*2] + 9*src[stride*4] - src[stride*6])>>4);
01498         src[stride*5] = CLIP((-src[stride*2] + 9*src[stride*4] + 9*src[stride*6] - src[stride*8])>>4);
01499         src[stride*7] = CLIP((-src[stride*4] + 9*src[stride*6] + 9*src[stride*8] - src[stride*10])>>4);
01500         src[stride*9] = CLIP((-src[stride*6] + 9*src[stride*8] + 9*src[stride*10] - src[stride*12])>>4);
01501         src++;
01502     }
01503 #endif //HAVE_MMX2 || HAVE_AMD3DNOW
01504 }
01505 
01513 static inline void RENAME(deInterlaceFF)(uint8_t src[], int stride, uint8_t *tmp)
01514 {
01515 #if HAVE_MMX2 || HAVE_AMD3DNOW
01516     src+= stride*4;
01517     __asm__ volatile(
01518         "lea (%0, %1), %%"REG_a"                \n\t"
01519         "lea (%%"REG_a", %1, 4), %%"REG_d"      \n\t"
01520         "pxor %%mm7, %%mm7                      \n\t"
01521         "movq (%2), %%mm0                       \n\t"
01522 
01523 
01524 
01525 #define REAL_DEINT_FF(a,b,c,d)\
01526         "movq " #a ", %%mm1                     \n\t"\
01527         "movq " #b ", %%mm2                     \n\t"\
01528         "movq " #c ", %%mm3                     \n\t"\
01529         "movq " #d ", %%mm4                     \n\t"\
01530         PAVGB(%%mm3, %%mm1)                          \
01531         PAVGB(%%mm4, %%mm0)                          \
01532         "movq %%mm0, %%mm3                      \n\t"\
01533         "punpcklbw %%mm7, %%mm0                 \n\t"\
01534         "punpckhbw %%mm7, %%mm3                 \n\t"\
01535         "movq %%mm1, %%mm4                      \n\t"\
01536         "punpcklbw %%mm7, %%mm1                 \n\t"\
01537         "punpckhbw %%mm7, %%mm4                 \n\t"\
01538         "psllw $2, %%mm1                        \n\t"\
01539         "psllw $2, %%mm4                        \n\t"\
01540         "psubw %%mm0, %%mm1                     \n\t"\
01541         "psubw %%mm3, %%mm4                     \n\t"\
01542         "movq %%mm2, %%mm5                      \n\t"\
01543         "movq %%mm2, %%mm0                      \n\t"\
01544         "punpcklbw %%mm7, %%mm2                 \n\t"\
01545         "punpckhbw %%mm7, %%mm5                 \n\t"\
01546         "paddw %%mm2, %%mm1                     \n\t"\
01547         "paddw %%mm5, %%mm4                     \n\t"\
01548         "psraw $2, %%mm1                        \n\t"\
01549         "psraw $2, %%mm4                        \n\t"\
01550         "packuswb %%mm4, %%mm1                  \n\t"\
01551         "movq %%mm1, " #b "                     \n\t"\
01552 
01553 #define DEINT_FF(a,b,c,d)  REAL_DEINT_FF(a,b,c,d)
01554 
01555 DEINT_FF((%0)        , (%%REGa)       , (%%REGa, %1), (%%REGa, %1, 2))
01556 DEINT_FF((%%REGa, %1), (%%REGa, %1, 2), (%0, %1, 4) , (%%REGd)       )
01557 DEINT_FF((%0, %1, 4) , (%%REGd)       , (%%REGd, %1), (%%REGd, %1, 2))
01558 DEINT_FF((%%REGd, %1), (%%REGd, %1, 2), (%0, %1, 8) , (%%REGd, %1, 4))
01559 
01560         "movq %%mm0, (%2)                       \n\t"
01561         : : "r" (src), "r" ((x86_reg)stride), "r"(tmp)
01562         : "%"REG_a, "%"REG_d
01563     );
01564 #else //HAVE_MMX2 || HAVE_AMD3DNOW
01565     int x;
01566     src+= stride*4;
01567     for(x=0; x<8; x++){
01568         int t1= tmp[x];
01569         int t2= src[stride*1];
01570 
01571         src[stride*1]= CLIP((-t1 + 4*src[stride*0] + 2*t2 + 4*src[stride*2] - src[stride*3] + 4)>>3);
01572         t1= src[stride*4];
01573         src[stride*3]= CLIP((-t2 + 4*src[stride*2] + 2*t1 + 4*src[stride*4] - src[stride*5] + 4)>>3);
01574         t2= src[stride*6];
01575         src[stride*5]= CLIP((-t1 + 4*src[stride*4] + 2*t2 + 4*src[stride*6] - src[stride*7] + 4)>>3);
01576         t1= src[stride*8];
01577         src[stride*7]= CLIP((-t2 + 4*src[stride*6] + 2*t1 + 4*src[stride*8] - src[stride*9] + 4)>>3);
01578         tmp[x]= t1;
01579 
01580         src++;
01581     }
01582 #endif //HAVE_MMX2 || HAVE_AMD3DNOW
01583 }
01584 
01592 static inline void RENAME(deInterlaceL5)(uint8_t src[], int stride, uint8_t *tmp, uint8_t *tmp2)
01593 {
01594 #if HAVE_MMX2 || HAVE_AMD3DNOW
01595     src+= stride*4;
01596     __asm__ volatile(
01597         "lea (%0, %1), %%"REG_a"                \n\t"
01598         "lea (%%"REG_a", %1, 4), %%"REG_d"      \n\t"
01599         "pxor %%mm7, %%mm7                      \n\t"
01600         "movq (%2), %%mm0                       \n\t"
01601         "movq (%3), %%mm1                       \n\t"
01602 
01603 
01604 
01605 #define REAL_DEINT_L5(t1,t2,a,b,c)\
01606         "movq " #a ", %%mm2                     \n\t"\
01607         "movq " #b ", %%mm3                     \n\t"\
01608         "movq " #c ", %%mm4                     \n\t"\
01609         PAVGB(t2, %%mm3)                             \
01610         PAVGB(t1, %%mm4)                             \
01611         "movq %%mm2, %%mm5                      \n\t"\
01612         "movq %%mm2, " #t1 "                    \n\t"\
01613         "punpcklbw %%mm7, %%mm2                 \n\t"\
01614         "punpckhbw %%mm7, %%mm5                 \n\t"\
01615         "movq %%mm2, %%mm6                      \n\t"\
01616         "paddw %%mm2, %%mm2                     \n\t"\
01617         "paddw %%mm6, %%mm2                     \n\t"\
01618         "movq %%mm5, %%mm6                      \n\t"\
01619         "paddw %%mm5, %%mm5                     \n\t"\
01620         "paddw %%mm6, %%mm5                     \n\t"\
01621         "movq %%mm3, %%mm6                      \n\t"\
01622         "punpcklbw %%mm7, %%mm3                 \n\t"\
01623         "punpckhbw %%mm7, %%mm6                 \n\t"\
01624         "paddw %%mm3, %%mm3                     \n\t"\
01625         "paddw %%mm6, %%mm6                     \n\t"\
01626         "paddw %%mm3, %%mm2                     \n\t"\
01627         "paddw %%mm6, %%mm5                     \n\t"\
01628         "movq %%mm4, %%mm6                      \n\t"\
01629         "punpcklbw %%mm7, %%mm4                 \n\t"\
01630         "punpckhbw %%mm7, %%mm6                 \n\t"\
01631         "psubw %%mm4, %%mm2                     \n\t"\
01632         "psubw %%mm6, %%mm5                     \n\t"\
01633         "psraw $2, %%mm2                        \n\t"\
01634         "psraw $2, %%mm5                        \n\t"\
01635         "packuswb %%mm5, %%mm2                  \n\t"\
01636         "movq %%mm2, " #a "                     \n\t"\
01637 
01638 #define DEINT_L5(t1,t2,a,b,c)  REAL_DEINT_L5(t1,t2,a,b,c)
01639 
01640 DEINT_L5(%%mm0, %%mm1, (%0)           , (%%REGa)       , (%%REGa, %1)   )
01641 DEINT_L5(%%mm1, %%mm0, (%%REGa)       , (%%REGa, %1)   , (%%REGa, %1, 2))
01642 DEINT_L5(%%mm0, %%mm1, (%%REGa, %1)   , (%%REGa, %1, 2), (%0, %1, 4)   )
01643 DEINT_L5(%%mm1, %%mm0, (%%REGa, %1, 2), (%0, %1, 4)    , (%%REGd)       )
01644 DEINT_L5(%%mm0, %%mm1, (%0, %1, 4)    , (%%REGd)       , (%%REGd, %1)   )
01645 DEINT_L5(%%mm1, %%mm0, (%%REGd)       , (%%REGd, %1)   , (%%REGd, %1, 2))
01646 DEINT_L5(%%mm0, %%mm1, (%%REGd, %1)   , (%%REGd, %1, 2), (%0, %1, 8)   )
01647 DEINT_L5(%%mm1, %%mm0, (%%REGd, %1, 2), (%0, %1, 8)    , (%%REGd, %1, 4))
01648 
01649         "movq %%mm0, (%2)                       \n\t"
01650         "movq %%mm1, (%3)                       \n\t"
01651         : : "r" (src), "r" ((x86_reg)stride), "r"(tmp), "r"(tmp2)
01652         : "%"REG_a, "%"REG_d
01653     );
01654 #else //HAVE_MMX2 || HAVE_AMD3DNOW
01655     int x;
01656     src+= stride*4;
01657     for(x=0; x<8; x++){
01658         int t1= tmp[x];
01659         int t2= tmp2[x];
01660         int t3= src[0];
01661 
01662         src[stride*0]= CLIP((-(t1 + src[stride*2]) + 2*(t2 + src[stride*1]) + 6*t3 + 4)>>3);
01663         t1= src[stride*1];
01664         src[stride*1]= CLIP((-(t2 + src[stride*3]) + 2*(t3 + src[stride*2]) + 6*t1 + 4)>>3);
01665         t2= src[stride*2];
01666         src[stride*2]= CLIP((-(t3 + src[stride*4]) + 2*(t1 + src[stride*3]) + 6*t2 + 4)>>3);
01667         t3= src[stride*3];
01668         src[stride*3]= CLIP((-(t1 + src[stride*5]) + 2*(t2 + src[stride*4]) + 6*t3 + 4)>>3);
01669         t1= src[stride*4];
01670         src[stride*4]= CLIP((-(t2 + src[stride*6]) + 2*(t3 + src[stride*5]) + 6*t1 + 4)>>3);
01671         t2= src[stride*5];
01672         src[stride*5]= CLIP((-(t3 + src[stride*7]) + 2*(t1 + src[stride*6]) + 6*t2 + 4)>>3);
01673         t3= src[stride*6];
01674         src[stride*6]= CLIP((-(t1 + src[stride*8]) + 2*(t2 + src[stride*7]) + 6*t3 + 4)>>3);
01675         t1= src[stride*7];
01676         src[stride*7]= CLIP((-(t2 + src[stride*9]) + 2*(t3 + src[stride*8]) + 6*t1 + 4)>>3);
01677 
01678         tmp[x]= t3;
01679         tmp2[x]= t1;
01680 
01681         src++;
01682     }
01683 #endif //HAVE_MMX2 || HAVE_AMD3DNOW
01684 }
01685 
01693 static inline void RENAME(deInterlaceBlendLinear)(uint8_t src[], int stride, uint8_t *tmp)
01694 {
01695 #if HAVE_MMX2 || HAVE_AMD3DNOW
01696     src+= 4*stride;
01697     __asm__ volatile(
01698         "lea (%0, %1), %%"REG_a"                \n\t"
01699         "lea (%%"REG_a", %1, 4), %%"REG_d"      \n\t"
01700 
01701 
01702 
01703         "movq (%2), %%mm0                       \n\t" 
01704         "movq (%%"REG_a"), %%mm1                \n\t" 
01705         PAVGB(%%mm1, %%mm0)                           
01706         "movq (%0), %%mm2                       \n\t" 
01707         PAVGB(%%mm2, %%mm0)
01708         "movq %%mm0, (%0)                       \n\t"
01709         "movq (%%"REG_a", %1), %%mm0            \n\t" 
01710         PAVGB(%%mm0, %%mm2)                           
01711         PAVGB(%%mm1, %%mm2)                           
01712         "movq %%mm2, (%%"REG_a")                \n\t"
01713         "movq (%%"REG_a", %1, 2), %%mm2         \n\t" 
01714         PAVGB(%%mm2, %%mm1)                           
01715         PAVGB(%%mm0, %%mm1)                           
01716         "movq %%mm1, (%%"REG_a", %1)            \n\t"
01717         "movq (%0, %1, 4), %%mm1                \n\t" 
01718         PAVGB(%%mm1, %%mm0)                           
01719         PAVGB(%%mm2, %%mm0)                           
01720         "movq %%mm0, (%%"REG_a", %1, 2)         \n\t"
01721         "movq (%%"REG_d"), %%mm0                \n\t" 
01722         PAVGB(%%mm0, %%mm2)                           
01723         PAVGB(%%mm1, %%mm2)                           
01724         "movq %%mm2, (%0, %1, 4)                \n\t"
01725         "movq (%%"REG_d", %1), %%mm2            \n\t" 
01726         PAVGB(%%mm2, %%mm1)                           
01727         PAVGB(%%mm0, %%mm1)                           
01728         "movq %%mm1, (%%"REG_d")                \n\t"
01729         "movq (%%"REG_d", %1, 2), %%mm1         \n\t" 
01730         PAVGB(%%mm1, %%mm0)                           
01731         PAVGB(%%mm2, %%mm0)                           
01732         "movq %%mm0, (%%"REG_d", %1)            \n\t"
01733         "movq (%0, %1, 8), %%mm0                \n\t" 
01734         PAVGB(%%mm0, %%mm2)                           
01735         PAVGB(%%mm1, %%mm2)                           
01736         "movq %%mm2, (%%"REG_d", %1, 2)         \n\t"
01737         "movq %%mm1, (%2)                       \n\t"
01738 
01739         : : "r" (src), "r" ((x86_reg)stride), "r" (tmp)
01740         : "%"REG_a, "%"REG_d
01741     );
01742 #else //HAVE_MMX2 || HAVE_AMD3DNOW
01743     int a, b, c, x;
01744     src+= 4*stride;
01745 
01746     for(x=0; x<2; x++){
01747         a= *(uint32_t*)&tmp[stride*0];
01748         b= *(uint32_t*)&src[stride*0];
01749         c= *(uint32_t*)&src[stride*1];
01750         a= (a&c) + (((a^c)&0xFEFEFEFEUL)>>1);
01751         *(uint32_t*)&src[stride*0]= (a|b) - (((a^b)&0xFEFEFEFEUL)>>1);
01752 
01753         a= *(uint32_t*)&src[stride*2];
01754         b= (a&b) + (((a^b)&0xFEFEFEFEUL)>>1);
01755         *(uint32_t*)&src[stride*1]= (c|b) - (((c^b)&0xFEFEFEFEUL)>>1);
01756 
01757         b= *(uint32_t*)&src[stride*3];
01758         c= (b&c) + (((b^c)&0xFEFEFEFEUL)>>1);
01759         *(uint32_t*)&src[stride*2]= (c|a) - (((c^a)&0xFEFEFEFEUL)>>1);
01760 
01761         c= *(uint32_t*)&src[stride*4];
01762         a= (a&c) + (((a^c)&0xFEFEFEFEUL)>>1);
01763         *(uint32_t*)&src[stride*3]= (a|b) - (((a^b)&0xFEFEFEFEUL)>>1);
01764 
01765         a= *(uint32_t*)&src[stride*5];
01766         b= (a&b) + (((a^b)&0xFEFEFEFEUL)>>1);
01767         *(uint32_t*)&src[stride*4]= (c|b) - (((c^b)&0xFEFEFEFEUL)>>1);
01768 
01769         b= *(uint32_t*)&src[stride*6];
01770         c= (b&c) + (((b^c)&0xFEFEFEFEUL)>>1);
01771         *(uint32_t*)&src[stride*5]= (c|a) - (((c^a)&0xFEFEFEFEUL)>>1);
01772 
01773         c= *(uint32_t*)&src[stride*7];
01774         a= (a&c) + (((a^c)&0xFEFEFEFEUL)>>1);
01775         *(uint32_t*)&src[stride*6]= (a|b) - (((a^b)&0xFEFEFEFEUL)>>1);
01776 
01777         a= *(uint32_t*)&src[stride*8];
01778         b= (a&b) + (((a^b)&0xFEFEFEFEUL)>>1);
01779         *(uint32_t*)&src[stride*7]= (c|b) - (((c^b)&0xFEFEFEFEUL)>>1);
01780 
01781         *(uint32_t*)&tmp[stride*0]= c;
01782         src += 4;
01783         tmp += 4;
01784     }
01785 #endif //HAVE_MMX2 || HAVE_AMD3DNOW
01786 }
01787 
01794 static inline void RENAME(deInterlaceMedian)(uint8_t src[], int stride)
01795 {
01796 #if HAVE_MMX
01797     src+= 4*stride;
01798 #if HAVE_MMX2
01799     __asm__ volatile(
01800         "lea (%0, %1), %%"REG_a"                \n\t"
01801         "lea (%%"REG_a", %1, 4), %%"REG_d"      \n\t"
01802 
01803 
01804 
01805         "movq (%0), %%mm0                       \n\t" 
01806         "movq (%%"REG_a", %1), %%mm2            \n\t" 
01807         "movq (%%"REG_a"), %%mm1                \n\t" 
01808         "movq %%mm0, %%mm3                      \n\t"
01809         "pmaxub %%mm1, %%mm0                    \n\t" 
01810         "pminub %%mm3, %%mm1                    \n\t" 
01811         "pmaxub %%mm2, %%mm1                    \n\t" 
01812         "pminub %%mm1, %%mm0                    \n\t"
01813         "movq %%mm0, (%%"REG_a")                \n\t"
01814 
01815         "movq (%0, %1, 4), %%mm0                \n\t" 
01816         "movq (%%"REG_a", %1, 2), %%mm1         \n\t" 
01817         "movq %%mm2, %%mm3                      \n\t"
01818         "pmaxub %%mm1, %%mm2                    \n\t" 
01819         "pminub %%mm3, %%mm1                    \n\t" 
01820         "pmaxub %%mm0, %%mm1                    \n\t" 
01821         "pminub %%mm1, %%mm2                    \n\t"
01822         "movq %%mm2, (%%"REG_a", %1, 2)         \n\t"
01823 
01824         "movq (%%"REG_d"), %%mm2                \n\t" 
01825         "movq (%%"REG_d", %1), %%mm1            \n\t" 
01826         "movq %%mm2, %%mm3                      \n\t"
01827         "pmaxub %%mm0, %%mm2                    \n\t" 
01828         "pminub %%mm3, %%mm0                    \n\t" 
01829         "pmaxub %%mm1, %%mm0                    \n\t" 
01830         "pminub %%mm0, %%mm2                    \n\t"
01831         "movq %%mm2, (%%"REG_d")                \n\t"
01832 
01833         "movq (%%"REG_d", %1, 2), %%mm2         \n\t" 
01834         "movq (%0, %1, 8), %%mm0                \n\t" 
01835         "movq %%mm2, %%mm3                      \n\t"
01836         "pmaxub %%mm0, %%mm2                    \n\t" 
01837         "pminub %%mm3, %%mm0                    \n\t" 
01838         "pmaxub %%mm1, %%mm0                    \n\t" 
01839         "pminub %%mm0, %%mm2                    \n\t"
01840         "movq %%mm2, (%%"REG_d", %1, 2)         \n\t"
01841 
01842 
01843         : : "r" (src), "r" ((x86_reg)stride)
01844         : "%"REG_a, "%"REG_d
01845     );
01846 
01847 #else // MMX without MMX2
01848     __asm__ volatile(
01849         "lea (%0, %1), %%"REG_a"                \n\t"
01850         "lea (%%"REG_a", %1, 4), %%"REG_d"      \n\t"
01851 
01852 
01853         "pxor %%mm7, %%mm7                      \n\t"
01854 
01855 #define REAL_MEDIAN(a,b,c)\
01856         "movq " #a ", %%mm0                     \n\t"\
01857         "movq " #b ", %%mm2                     \n\t"\
01858         "movq " #c ", %%mm1                     \n\t"\
01859         "movq %%mm0, %%mm3                      \n\t"\
01860         "movq %%mm1, %%mm4                      \n\t"\
01861         "movq %%mm2, %%mm5                      \n\t"\
01862         "psubusb %%mm1, %%mm3                   \n\t"\
01863         "psubusb %%mm2, %%mm4                   \n\t"\
01864         "psubusb %%mm0, %%mm5                   \n\t"\
01865         "pcmpeqb %%mm7, %%mm3                   \n\t"\
01866         "pcmpeqb %%mm7, %%mm4                   \n\t"\
01867         "pcmpeqb %%mm7, %%mm5                   \n\t"\
01868         "movq %%mm3, %%mm6                      \n\t"\
01869         "pxor %%mm4, %%mm3                      \n\t"\
01870         "pxor %%mm5, %%mm4                      \n\t"\
01871         "pxor %%mm6, %%mm5                      \n\t"\
01872         "por %%mm3, %%mm1                       \n\t"\
01873         "por %%mm4, %%mm2                       \n\t"\
01874         "por %%mm5, %%mm0                       \n\t"\
01875         "pand %%mm2, %%mm0                      \n\t"\
01876         "pand %%mm1, %%mm0                      \n\t"\
01877         "movq %%mm0, " #b "                     \n\t"
01878 #define MEDIAN(a,b,c)  REAL_MEDIAN(a,b,c)
01879 
01880 MEDIAN((%0)        , (%%REGa)       , (%%REGa, %1))
01881 MEDIAN((%%REGa, %1), (%%REGa, %1, 2), (%0, %1, 4))
01882 MEDIAN((%0, %1, 4) , (%%REGd)       , (%%REGd, %1))
01883 MEDIAN((%%REGd, %1), (%%REGd, %1, 2), (%0, %1, 8))
01884 
01885         : : "r" (src), "r" ((x86_reg)stride)
01886         : "%"REG_a, "%"REG_d
01887     );
01888 #endif //HAVE_MMX2
01889 #else //HAVE_MMX
01890     int x, y;
01891     src+= 4*stride;
01892     
01893     for(x=0; x<8; x++){
01894         uint8_t *colsrc = src;
01895         for (y=0; y<4; y++){
01896             int a, b, c, d, e, f;
01897             a = colsrc[0       ];
01898             b = colsrc[stride  ];
01899             c = colsrc[stride*2];
01900             d = (a-b)>>31;
01901             e = (b-c)>>31;
01902             f = (c-a)>>31;
01903             colsrc[stride  ] = (a|(d^f)) & (b|(d^e)) & (c|(e^f));
01904             colsrc += stride*2;
01905         }
01906         src++;
01907     }
01908 #endif //HAVE_MMX
01909 }
01910 
01911 #if HAVE_MMX
01912 
01915 static inline void RENAME(transpose1)(uint8_t *dst1, uint8_t *dst2, uint8_t *src, int srcStride)
01916 {
01917     __asm__(
01918         "lea (%0, %1), %%"REG_a"                \n\t"
01919 
01920 
01921         "movq (%0), %%mm0                       \n\t" 
01922         "movq (%%"REG_a"), %%mm1                \n\t" 
01923         "movq %%mm0, %%mm2                      \n\t" 
01924         "punpcklbw %%mm1, %%mm0                 \n\t" 
01925         "punpckhbw %%mm1, %%mm2                 \n\t" 
01926 
01927         "movq (%%"REG_a", %1), %%mm1            \n\t"
01928         "movq (%%"REG_a", %1, 2), %%mm3         \n\t"
01929         "movq %%mm1, %%mm4                      \n\t"
01930         "punpcklbw %%mm3, %%mm1                 \n\t"
01931         "punpckhbw %%mm3, %%mm4                 \n\t"
01932 
01933         "movq %%mm0, %%mm3                      \n\t"
01934         "punpcklwd %%mm1, %%mm0                 \n\t"
01935         "punpckhwd %%mm1, %%mm3                 \n\t"
01936         "movq %%mm2, %%mm1                      \n\t"
01937         "punpcklwd %%mm4, %%mm2                 \n\t"
01938         "punpckhwd %%mm4, %%mm1                 \n\t"
01939 
01940         "movd %%mm0, 128(%2)                    \n\t"
01941         "psrlq $32, %%mm0                       \n\t"
01942         "movd %%mm0, 144(%2)                    \n\t"
01943         "movd %%mm3, 160(%2)                    \n\t"
01944         "psrlq $32, %%mm3                       \n\t"
01945         "movd %%mm3, 176(%2)                    \n\t"
01946         "movd %%mm3, 48(%3)                     \n\t"
01947         "movd %%mm2, 192(%2)                    \n\t"
01948         "movd %%mm2, 64(%3)                     \n\t"
01949         "psrlq $32, %%mm2                       \n\t"
01950         "movd %%mm2, 80(%3)                     \n\t"
01951         "movd %%mm1, 96(%3)                     \n\t"
01952         "psrlq $32, %%mm1                       \n\t"
01953         "movd %%mm1, 112(%3)                    \n\t"
01954 
01955         "lea (%%"REG_a", %1, 4), %%"REG_a"      \n\t"
01956 
01957         "movq (%0, %1, 4), %%mm0                \n\t" 
01958         "movq (%%"REG_a"), %%mm1                \n\t" 
01959         "movq %%mm0, %%mm2                      \n\t" 
01960         "punpcklbw %%mm1, %%mm0                 \n\t" 
01961         "punpckhbw %%mm1, %%mm2                 \n\t" 
01962 
01963         "movq (%%"REG_a", %1), %%mm1            \n\t"
01964         "movq (%%"REG_a", %1, 2), %%mm3         \n\t"
01965         "movq %%mm1, %%mm4                      \n\t"
01966         "punpcklbw %%mm3, %%mm1                 \n\t"
01967         "punpckhbw %%mm3, %%mm4                 \n\t"
01968 
01969         "movq %%mm0, %%mm3                      \n\t"
01970         "punpcklwd %%mm1, %%mm0                 \n\t"
01971         "punpckhwd %%mm1, %%mm3                 \n\t"
01972         "movq %%mm2, %%mm1                      \n\t"
01973         "punpcklwd %%mm4, %%mm2                 \n\t"
01974         "punpckhwd %%mm4, %%mm1                 \n\t"
01975 
01976         "movd %%mm0, 132(%2)                    \n\t"
01977         "psrlq $32, %%mm0                       \n\t"
01978         "movd %%mm0, 148(%2)                    \n\t"
01979         "movd %%mm3, 164(%2)                    \n\t"
01980         "psrlq $32, %%mm3                       \n\t"
01981         "movd %%mm3, 180(%2)                    \n\t"
01982         "movd %%mm3, 52(%3)                     \n\t"
01983         "movd %%mm2, 196(%2)                    \n\t"
01984         "movd %%mm2, 68(%3)                     \n\t"
01985         "psrlq $32, %%mm2                       \n\t"
01986         "movd %%mm2, 84(%3)                     \n\t"
01987         "movd %%mm1, 100(%3)                    \n\t"
01988         "psrlq $32, %%mm1                       \n\t"
01989         "movd %%mm1, 116(%3)                    \n\t"
01990 
01991 
01992         :: "r" (src), "r" ((x86_reg)srcStride), "r" (dst1), "r" (dst2)
01993         : "%"REG_a
01994     );
01995 }
01996 
02000 static inline void RENAME(transpose2)(uint8_t *dst, int dstStride, uint8_t *src)
02001 {
02002     __asm__(
02003         "lea (%0, %1), %%"REG_a"                \n\t"
02004         "lea (%%"REG_a",%1,4), %%"REG_d"        \n\t"
02005 
02006 
02007         "movq (%2), %%mm0                       \n\t" 
02008         "movq 16(%2), %%mm1                     \n\t" 
02009         "movq %%mm0, %%mm2                      \n\t" 
02010         "punpcklbw %%mm1, %%mm0                 \n\t" 
02011         "punpckhbw %%mm1, %%mm2                 \n\t" 
02012 
02013         "movq 32(%2), %%mm1                     \n\t"
02014         "movq 48(%2), %%mm3                     \n\t"
02015         "movq %%mm1, %%mm4                      \n\t"
02016         "punpcklbw %%mm3, %%mm1                 \n\t"
02017         "punpckhbw %%mm3, %%mm4                 \n\t"
02018 
02019         "movq %%mm0, %%mm3                      \n\t"
02020         "punpcklwd %%mm1, %%mm0                 \n\t"
02021         "punpckhwd %%mm1, %%mm3                 \n\t"
02022         "movq %%mm2, %%mm1                      \n\t"
02023         "punpcklwd %%mm4, %%mm2                 \n\t"
02024         "punpckhwd %%mm4, %%mm1                 \n\t"
02025 
02026         "movd %%mm0, (%0)                       \n\t"
02027         "psrlq $32, %%mm0                       \n\t"
02028         "movd %%mm0, (%%"REG_a")                \n\t"
02029         "movd %%mm3, (%%"REG_a", %1)            \n\t"
02030         "psrlq $32, %%mm3                       \n\t"
02031         "movd %%mm3, (%%"REG_a", %1, 2)         \n\t"
02032         "movd %%mm2, (%0, %1, 4)                \n\t"
02033         "psrlq $32, %%mm2                       \n\t"
02034         "movd %%mm2, (%%"REG_d")                \n\t"
02035         "movd %%mm1, (%%"REG_d", %1)            \n\t"
02036         "psrlq $32, %%mm1                       \n\t"
02037         "movd %%mm1, (%%"REG_d", %1, 2)         \n\t"
02038 
02039 
02040         "movq 64(%2), %%mm0                     \n\t" 
02041         "movq 80(%2), %%mm1                     \n\t" 
02042         "movq %%mm0, %%mm2                      \n\t" 
02043         "punpcklbw %%mm1, %%mm0                 \n\t" 
02044         "punpckhbw %%mm1, %%mm2                 \n\t" 
02045 
02046         "movq 96(%2), %%mm1                     \n\t"
02047         "movq 112(%2), %%mm3                    \n\t"
02048         "movq %%mm1, %%mm4                      \n\t"
02049         "punpcklbw %%mm3, %%mm1                 \n\t"
02050         "punpckhbw %%mm3, %%mm4                 \n\t"
02051 
02052         "movq %%mm0, %%mm3                      \n\t"
02053         "punpcklwd %%mm1, %%mm0                 \n\t"
02054         "punpckhwd %%mm1, %%mm3                 \n\t"
02055         "movq %%mm2, %%mm1                      \n\t"
02056         "punpcklwd %%mm4, %%mm2                 \n\t"
02057         "punpckhwd %%mm4, %%mm1                 \n\t"
02058 
02059         "movd %%mm0, 4(%0)                      \n\t"
02060         "psrlq $32, %%mm0                       \n\t"
02061         "movd %%mm0, 4(%%"REG_a")               \n\t"
02062         "movd %%mm3, 4(%%"REG_a", %1)           \n\t"
02063         "psrlq $32, %%mm3                       \n\t"
02064         "movd %%mm3, 4(%%"REG_a", %1, 2)        \n\t"
02065         "movd %%mm2, 4(%0, %1, 4)               \n\t"
02066         "psrlq $32, %%mm2                       \n\t"
02067         "movd %%mm2, 4(%%"REG_d")               \n\t"
02068         "movd %%mm1, 4(%%"REG_d", %1)           \n\t"
02069         "psrlq $32, %%mm1                       \n\t"
02070         "movd %%mm1, 4(%%"REG_d", %1, 2)        \n\t"
02071 
02072         :: "r" (dst), "r" ((x86_reg)dstStride), "r" (src)
02073         : "%"REG_a, "%"REG_d
02074     );
02075 }
02076 #endif //HAVE_MMX
02077 
02078 
02079 #if !HAVE_ALTIVEC
02080 static inline void RENAME(tempNoiseReducer)(uint8_t *src, int stride,
02081                                     uint8_t *tempBlurred, uint32_t *tempBlurredPast, int *maxNoise)
02082 {
02083     
02084     tempBlurredPast[127]= maxNoise[0];
02085     tempBlurredPast[128]= maxNoise[1];
02086     tempBlurredPast[129]= maxNoise[2];
02087 
02088 #define FAST_L2_DIFF
02089 
02090 #if HAVE_MMX2 || HAVE_AMD3DNOW
02091     __asm__ volatile(
02092         "lea (%2, %2, 2), %%"REG_a"             \n\t" 
02093         "lea (%2, %2, 4), %%"REG_d"             \n\t" 
02094         "lea (%%"REG_d", %2, 2), %%"REG_c"      \n\t" 
02095 
02096 
02097 
02098 #ifdef L1_DIFF //needs mmx2
02099         "movq (%0), %%mm0                       \n\t" 
02100         "psadbw (%1), %%mm0                     \n\t" 
02101         "movq (%0, %2), %%mm1                   \n\t" 
02102         "psadbw (%1, %2), %%mm1                 \n\t" 
02103         "movq (%0, %2, 2), %%mm2                \n\t" 
02104         "psadbw (%1, %2, 2), %%mm2              \n\t" 
02105         "movq (%0, %%"REG_a"), %%mm3            \n\t" 
02106         "psadbw (%1, %%"REG_a"), %%mm3          \n\t" 
02107 
02108         "movq (%0, %2, 4), %%mm4                \n\t" 
02109         "paddw %%mm1, %%mm0                     \n\t"
02110         "psadbw (%1, %2, 4), %%mm4              \n\t" 
02111         "movq (%0, %%"REG_d"), %%mm5            \n\t" 
02112         "paddw %%mm2, %%mm0                     \n\t"
02113         "psadbw (%1, %%"REG_d"), %%mm5          \n\t" 
02114         "movq (%0, %%"REG_a", 2), %%mm6         \n\t" 
02115         "paddw %%mm3, %%mm0                     \n\t"
02116         "psadbw (%1, %%"REG_a", 2), %%mm6       \n\t" 
02117         "movq (%0, %%"REG_c"), %%mm7            \n\t" 
02118         "paddw %%mm4, %%mm0                     \n\t"
02119         "psadbw (%1, %%"REG_c"), %%mm7          \n\t" 
02120         "paddw %%mm5, %%mm6                     \n\t"
02121         "paddw %%mm7, %%mm6                     \n\t"
02122         "paddw %%mm6, %%mm0                     \n\t"
02123 #else //L1_DIFF
02124 #if defined (FAST_L2_DIFF)
02125         "pcmpeqb %%mm7, %%mm7                   \n\t"
02126         "movq "MANGLE(b80)", %%mm6              \n\t"
02127         "pxor %%mm0, %%mm0                      \n\t"
02128 #define REAL_L2_DIFF_CORE(a, b)\
02129         "movq " #a ", %%mm5                     \n\t"\
02130         "movq " #b ", %%mm2                     \n\t"\
02131         "pxor %%mm7, %%mm2                      \n\t"\
02132         PAVGB(%%mm2, %%mm5)\
02133         "paddb %%mm6, %%mm5                     \n\t"\
02134         "movq %%mm5, %%mm2                      \n\t"\
02135         "psllw $8, %%mm5                        \n\t"\
02136         "pmaddwd %%mm5, %%mm5                   \n\t"\
02137         "pmaddwd %%mm2, %%mm2                   \n\t"\
02138         "paddd %%mm2, %%mm5                     \n\t"\
02139         "psrld $14, %%mm5                       \n\t"\
02140         "paddd %%mm5, %%mm0                     \n\t"
02141 
02142 #else //defined (FAST_L2_DIFF)
02143         "pxor %%mm7, %%mm7                      \n\t"
02144         "pxor %%mm0, %%mm0                      \n\t"
02145 #define REAL_L2_DIFF_CORE(a, b)\
02146         "movq " #a ", %%mm5                     \n\t"\
02147         "movq " #b ", %%mm2                     \n\t"\
02148         "movq %%mm5, %%mm1                      \n\t"\
02149         "movq %%mm2, %%mm3                      \n\t"\
02150         "punpcklbw %%mm7, %%mm5                 \n\t"\
02151         "punpckhbw %%mm7, %%mm1                 \n\t"\
02152         "punpcklbw %%mm7, %%mm2                 \n\t"\
02153         "punpckhbw %%mm7, %%mm3                 \n\t"\
02154         "psubw %%mm2, %%mm5                     \n\t"\
02155         "psubw %%mm3, %%mm1                     \n\t"\
02156         "pmaddwd %%mm5, %%mm5                   \n\t"\
02157         "pmaddwd %%mm1, %%mm1                   \n\t"\
02158         "paddd %%mm1, %%mm5                     \n\t"\
02159         "paddd %%mm5, %%mm0                     \n\t"
02160 
02161 #endif //defined (FAST_L2_DIFF)
02162 
02163 #define L2_DIFF_CORE(a, b)  REAL_L2_DIFF_CORE(a, b)
02164 
02165 L2_DIFF_CORE((%0)          , (%1))
02166 L2_DIFF_CORE((%0, %2)      , (%1, %2))
02167 L2_DIFF_CORE((%0, %2, 2)   , (%1, %2, 2))
02168 L2_DIFF_CORE((%0, %%REGa)  , (%1, %%REGa))
02169 L2_DIFF_CORE((%0, %2, 4)   , (%1, %2, 4))
02170 L2_DIFF_CORE((%0, %%REGd)  , (%1, %%REGd))
02171 L2_DIFF_CORE((%0, %%REGa,2), (%1, %%REGa,2))
02172 L2_DIFF_CORE((%0, %%REGc)  , (%1, %%REGc))
02173 
02174 #endif //L1_DIFF
02175 
02176         "movq %%mm0, %%mm4                      \n\t"
02177         "psrlq $32, %%mm0                       \n\t"
02178         "paddd %%mm0, %%mm4                     \n\t"
02179         "movd %%mm4, %%ecx                      \n\t"
02180         "shll $2, %%ecx                         \n\t"
02181         "mov %3, %%"REG_d"                      \n\t"
02182         "addl -4(%%"REG_d"), %%ecx              \n\t"
02183         "addl 4(%%"REG_d"), %%ecx               \n\t"
02184         "addl -1024(%%"REG_d"), %%ecx           \n\t"
02185         "addl $4, %%ecx                         \n\t"
02186         "addl 1024(%%"REG_d"), %%ecx            \n\t"
02187         "shrl $3, %%ecx                         \n\t"
02188         "movl %%ecx, (%%"REG_d")                \n\t"
02189 
02190 
02191 
02192 
02193         "cmpl 512(%%"REG_d"), %%ecx             \n\t"
02194         " jb 2f                                 \n\t"
02195         "cmpl 516(%%"REG_d"), %%ecx             \n\t"
02196         " jb 1f                                 \n\t"
02197 
02198         "lea (%%"REG_a", %2, 2), %%"REG_d"      \n\t" 
02199         "lea (%%"REG_d", %2, 2), %%"REG_c"      \n\t" 
02200         "movq (%0), %%mm0                       \n\t" 
02201         "movq (%0, %2), %%mm1                   \n\t" 
02202         "movq (%0, %2, 2), %%mm2                \n\t" 
02203         "movq (%0, %%"REG_a"), %%mm3            \n\t" 
02204         "movq (%0, %2, 4), %%mm4                \n\t" 
02205         "movq (%0, %%"REG_d"), %%mm5            \n\t" 
02206         "movq (%0, %%"REG_a", 2), %%mm6         \n\t" 
02207         "movq (%0, %%"REG_c"), %%mm7            \n\t" 
02208         "movq %%mm0, (%1)                       \n\t" 
02209         "movq %%mm1, (%1, %2)                   \n\t" 
02210         "movq %%mm2, (%1, %2, 2)                \n\t" 
02211         "movq %%mm3, (%1, %%"REG_a")            \n\t" 
02212         "movq %%mm4, (%1, %2, 4)                \n\t" 
02213         "movq %%mm5, (%1, %%"REG_d")            \n\t" 
02214         "movq %%mm6, (%1, %%"REG_a", 2)         \n\t" 
02215         "movq %%mm7, (%1, %%"REG_c")            \n\t" 
02216         "jmp 4f                                 \n\t"
02217 
02218         "1:                                     \n\t"
02219         "lea (%%"REG_a", %2, 2), %%"REG_d"      \n\t" 
02220         "lea (%%"REG_d", %2, 2), %%"REG_c"      \n\t" 
02221         "movq (%0), %%mm0                       \n\t" 
02222         PAVGB((%1), %%mm0)                            
02223         "movq (%0, %2), %%mm1                   \n\t" 
02224         PAVGB((%1, %2), %%mm1)                        
02225         "movq (%0, %2, 2), %%mm2                \n\t" 
02226         PAVGB((%1, %2, 2), %%mm2)                     
02227         "movq (%0, %%"REG_a"), %%mm3            \n\t" 
02228         PAVGB((%1, %%REGa), %%mm3)                    
02229         "movq (%0, %2, 4), %%mm4                \n\t" 
02230         PAVGB((%1, %2, 4), %%mm4)                     
02231         "movq (%0, %%"REG_d"), %%mm5            \n\t" 
02232         PAVGB((%1, %%REGd), %%mm5)                    
02233         "movq (%0, %%"REG_a", 2), %%mm6         \n\t" 
02234         PAVGB((%1, %%REGa, 2), %%mm6)                 
02235         "movq (%0, %%"REG_c"), %%mm7            \n\t" 
02236         PAVGB((%1, %%REGc), %%mm7)                    
02237         "movq %%mm0, (%1)                       \n\t" 
02238         "movq %%mm1, (%1, %2)                   \n\t" 
02239         "movq %%mm2, (%1, %2, 2)                \n\t" 
02240         "movq %%mm3, (%1, %%"REG_a")            \n\t" 
02241         "movq %%mm4, (%1, %2, 4)                \n\t" 
02242         "movq %%mm5, (%1, %%"REG_d")            \n\t" 
02243         "movq %%mm6, (%1, %%"REG_a", 2)         \n\t" 
02244         "movq %%mm7, (%1, %%"REG_c")            \n\t" 
02245         "movq %%mm0, (%0)                       \n\t" 
02246         "movq %%mm1, (%0, %2)                   \n\t" 
02247         "movq %%mm2, (%0, %2, 2)                \n\t" 
02248         "movq %%mm3, (%0, %%"REG_a")            \n\t" 
02249         "movq %%mm4, (%0, %2, 4)                \n\t" 
02250         "movq %%mm5, (%0, %%"REG_d")            \n\t" 
02251         "movq %%mm6, (%0, %%"REG_a", 2)         \n\t" 
02252         "movq %%mm7, (%0, %%"REG_c")            \n\t" 
02253         "jmp 4f                                 \n\t"
02254 
02255         "2:                                     \n\t"
02256         "cmpl 508(%%"REG_d"), %%ecx             \n\t"
02257         " jb 3f                                 \n\t"
02258 
02259         "lea (%%"REG_a", %2, 2), %%"REG_d"      \n\t" 
02260         "lea (%%"REG_d", %2, 2), %%"REG_c"      \n\t" 
02261         "movq (%0), %%mm0                       \n\t" 
02262         "movq (%0, %2), %%mm1                   \n\t" 
02263         "movq (%0, %2, 2), %%mm2                \n\t" 
02264         "movq (%0, %%"REG_a"), %%mm3            \n\t" 
02265         "movq (%1), %%mm4                       \n\t" 
02266         "movq (%1, %2), %%mm5                   \n\t" 
02267         "movq (%1, %2, 2), %%mm6                \n\t" 
02268         "movq (%1, %%"REG_a"), %%mm7            \n\t" 
02269         PAVGB(%%mm4, %%mm0)
02270         PAVGB(%%mm5, %%mm1)
02271         PAVGB(%%mm6, %%mm2)
02272         PAVGB(%%mm7, %%mm3)
02273         PAVGB(%%mm4, %%mm0)
02274         PAVGB(%%mm5, %%mm1)
02275         PAVGB(%%mm6, %%mm2)
02276         PAVGB(%%mm7, %%mm3)
02277         "movq %%mm0, (%1)                       \n\t" 
02278         "movq %%mm1, (%1, %2)                   \n\t" 
02279         "movq %%mm2, (%1, %2, 2)                \n\t" 
02280         "movq %%mm3, (%1, %%"REG_a")            \n\t" 
02281         "movq %%mm0, (%0)                       \n\t" 
02282         "movq %%mm1, (%0, %2)                   \n\t" 
02283         "movq %%mm2, (%0, %2, 2)                \n\t" 
02284         "movq %%mm3, (%0, %%"REG_a")            \n\t" 
02285 
02286         "movq (%0, %2, 4), %%mm0                \n\t" 
02287         "movq (%0, %%"REG_d"), %%mm1            \n\t" 
02288         "movq (%0, %%"REG_a", 2), %%mm2         \n\t" 
02289         "movq (%0, %%"REG_c"), %%mm3            \n\t" 
02290         "movq (%1, %2, 4), %%mm4                \n\t" 
02291         "movq (%1, %%"REG_d"), %%mm5            \n\t" 
02292         "movq (%1, %%"REG_a", 2), %%mm6         \n\t" 
02293         "movq (%1, %%"REG_c"), %%mm7            \n\t" 
02294         PAVGB(%%mm4, %%mm0)
02295         PAVGB(%%mm5, %%mm1)
02296         PAVGB(%%mm6, %%mm2)
02297         PAVGB(%%mm7, %%mm3)
02298         PAVGB(%%mm4, %%mm0)
02299         PAVGB(%%mm5, %%mm1)
02300         PAVGB(%%mm6, %%mm2)
02301         PAVGB(%%mm7, %%mm3)
02302         "movq %%mm0, (%1, %2, 4)                \n\t" 
02303         "movq %%mm1, (%1, %%"REG_d")            \n\t" 
02304         "movq %%mm2, (%1, %%"REG_a", 2)         \n\t" 
02305         "movq %%mm3, (%1, %%"REG_c")            \n\t" 
02306         "movq %%mm0, (%0, %2, 4)                \n\t" 
02307         "movq %%mm1, (%0, %%"REG_d")            \n\t" 
02308         "movq %%mm2, (%0, %%"REG_a", 2)         \n\t" 
02309         "movq %%mm3, (%0, %%"REG_c")            \n\t" 
02310         "jmp 4f                                 \n\t"
02311 
02312         "3:                                     \n\t"
02313         "lea (%%"REG_a", %2, 2), %%"REG_d"      \n\t" 
02314         "lea (%%"REG_d", %2, 2), %%"REG_c"      \n\t" 
02315         "movq (%0), %%mm0                       \n\t" 
02316         "movq (%0, %2), %%mm1                   \n\t" 
02317         "movq (%0, %2, 2), %%mm2                \n\t" 
02318         "movq (%0, %%"REG_a"), %%mm3            \n\t" 
02319         "movq (%1), %%mm4                       \n\t" 
02320         "movq (%1, %2), %%mm5                   \n\t" 
02321         "movq (%1, %2, 2), %%mm6                \n\t" 
02322         "movq (%1, %%"REG_a"), %%mm7            \n\t" 
02323         PAVGB(%%mm4, %%mm0)
02324         PAVGB(%%mm5, %%mm1)
02325         PAVGB(%%mm6, %%mm2)
02326         PAVGB(%%mm7, %%mm3)
02327         PAVGB(%%mm4, %%mm0)
02328         PAVGB(%%mm5, %%mm1)
02329         PAVGB(%%mm6, %%mm2)
02330         PAVGB(%%mm7, %%mm3)
02331         PAVGB(%%mm4, %%mm0)
02332         PAVGB(%%mm5, %%mm1)
02333         PAVGB(%%mm6, %%mm2)
02334         PAVGB(%%mm7, %%mm3)
02335         "movq %%mm0, (%1)                       \n\t" 
02336         "movq %%mm1, (%1, %2)                   \n\t" 
02337         "movq %%mm2, (%1, %2, 2)                \n\t" 
02338         "movq %%mm3, (%1, %%"REG_a")            \n\t" 
02339         "movq %%mm0, (%0)                       \n\t" 
02340         "movq %%mm1, (%0, %2)                   \n\t" 
02341         "movq %%mm2, (%0, %2, 2)                \n\t" 
02342         "movq %%mm3, (%0, %%"REG_a")            \n\t" 
02343 
02344         "movq (%0, %2, 4), %%mm0                \n\t" 
02345         "movq (%0, %%"REG_d"), %%mm1            \n\t" 
02346         "movq (%0, %%"REG_a", 2), %%mm2         \n\t" 
02347         "movq (%0, %%"REG_c"), %%mm3            \n\t" 
02348         "movq (%1, %2, 4), %%mm4                \n\t" 
02349         "movq (%1, %%"REG_d"), %%mm5            \n\t" 
02350         "movq (%1, %%"REG_a", 2), %%mm6         \n\t" 
02351         "movq (%1, %%"REG_c"), %%mm7            \n\t" 
02352         PAVGB(%%mm4, %%mm0)
02353         PAVGB(%%mm5, %%mm1)
02354         PAVGB(%%mm6, %%mm2)
02355         PAVGB(%%mm7, %%mm3)
02356         PAVGB(%%mm4, %%mm0)
02357         PAVGB(%%mm5, %%mm1)
02358         PAVGB(%%mm6, %%mm2)
02359         PAVGB(%%mm7, %%mm3)
02360         PAVGB(%%mm4, %%mm0)
02361         PAVGB(%%mm5, %%mm1)
02362         PAVGB(%%mm6, %%mm2)
02363         PAVGB(%%mm7, %%mm3)
02364         "movq %%mm0, (%1, %2, 4)                \n\t" 
02365         "movq %%mm1, (%1, %%"REG_d")            \n\t" 
02366         "movq %%mm2, (%1, %%"REG_a", 2)         \n\t" 
02367         "movq %%mm3, (%1, %%"REG_c")            \n\t" 
02368         "movq %%mm0, (%0, %2, 4)                \n\t" 
02369         "movq %%mm1, (%0, %%"REG_d")            \n\t" 
02370         "movq %%mm2, (%0, %%"REG_a", 2)         \n\t" 
02371         "movq %%mm3, (%0, %%"REG_c")            \n\t" 
02372 
02373         "4:                                     \n\t"
02374 
02375         :: "r" (src), "r" (tempBlurred), "r"((x86_reg)stride), "m" (tempBlurredPast)
02376         : "%"REG_a, "%"REG_d, "%"REG_c, "memory"
02377     );
02378 #else //HAVE_MMX2 || HAVE_AMD3DNOW
02379 {
02380     int y;
02381     int d=0;
02382 
02383     int i;
02384 
02385     for(y=0; y<8; y++){
02386         int x;
02387         for(x=0; x<8; x++){
02388             int ref= tempBlurred[ x + y*stride ];
02389             int cur= src[ x + y*stride ];
02390             int d1=ref - cur;
02391 
02392 
02393 
02394             d+= d1*d1;
02395 
02396         }
02397     }
02398     i=d;
02399     d=  (
02400         4*d
02401         +(*(tempBlurredPast-256))
02402         +(*(tempBlurredPast-1))+ (*(tempBlurredPast+1))
02403         +(*(tempBlurredPast+256))
02404         +4)>>3;
02405     *tempBlurredPast=i;
02406 
02407 
02408 
02409 
02410 
02411 
02412 
02413 
02414 
02415     if(d > maxNoise[1]){
02416         if(d < maxNoise[2]){
02417             for(y=0; y<8; y++){
02418                 int x;
02419                 for(x=0; x<8; x++){
02420                     int ref= tempBlurred[ x + y*stride ];
02421                     int cur= src[ x + y*stride ];
02422                     tempBlurred[ x + y*stride ]=
02423                     src[ x + y*stride ]=
02424                         (ref + cur + 1)>>1;
02425                 }
02426             }
02427         }else{
02428             for(y=0; y<8; y++){
02429                 int x;
02430                 for(x=0; x<8; x++){
02431                     tempBlurred[ x + y*stride ]= src[ x + y*stride ];
02432                 }
02433             }
02434         }
02435     }else{
02436         if(d < maxNoise[0]){
02437             for(y=0; y<8; y++){
02438                 int x;
02439                 for(x=0; x<8; x++){
02440                     int ref= tempBlurred[ x + y*stride ];
02441                     int cur= src[ x + y*stride ];
02442                     tempBlurred[ x + y*stride ]=
02443                     src[ x + y*stride ]=
02444                         (ref*7 + cur + 4)>>3;
02445                 }
02446             }
02447         }else{
02448             for(y=0; y<8; y++){
02449                 int x;
02450                 for(x=0; x<8; x++){
02451                     int ref= tempBlurred[ x + y*stride ];
02452                     int cur= src[ x + y*stride ];
02453                     tempBlurred[ x + y*stride ]=
02454                     src[ x + y*stride ]=
02455                         (ref*3 + cur + 2)>>2;
02456                 }
02457             }
02458         }
02459     }
02460 }
02461 #endif //HAVE_MMX2 || HAVE_AMD3DNOW
02462 }
02463 #endif //HAVE_ALTIVEC
02464 
02465 #if HAVE_MMX
02466 
02469 static av_always_inline void RENAME(do_a_deblock)(uint8_t *src, int step, int stride, PPContext *c){
02470     int64_t dc_mask, eq_mask, both_masks;
02471     int64_t sums[10*8*2];
02472     src+= step*3; 
02473 
02474     __asm__ volatile(
02475         "movq %0, %%mm7                         \n\t"
02476         "movq %1, %%mm6                         \n\t"
02477         : : "m" (c->mmxDcOffset[c->nonBQP]),  "m" (c->mmxDcThreshold[c->nonBQP])
02478         );
02479 
02480     __asm__ volatile(
02481         "lea (%2, %3), %%"REG_a"                \n\t"
02482 
02483 
02484 
02485         "movq (%2), %%mm0                       \n\t"
02486         "movq (%%"REG_a"), %%mm1                \n\t"
02487         "movq %%mm1, %%mm3                      \n\t"
02488         "movq %%mm1, %%mm4                      \n\t"
02489         "psubb %%mm1, %%mm0                     \n\t" 
02490         "paddb %%mm7, %%mm0                     \n\t"
02491         "pcmpgtb %%mm6, %%mm0                   \n\t"
02492 
02493         "movq (%%"REG_a",%3), %%mm2             \n\t"
02494         PMAXUB(%%mm2, %%mm4)
02495         PMINUB(%%mm2, %%mm3, %%mm5)
02496         "psubb %%mm2, %%mm1                     \n\t"
02497         "paddb %%mm7, %%mm1                     \n\t"
02498         "pcmpgtb %%mm6, %%mm1                   \n\t"
02499         "paddb %%mm1, %%mm0                     \n\t"
02500 
02501         "movq (%%"REG_a", %3, 2), %%mm1         \n\t"
02502         PMAXUB(%%mm1, %%mm4)
02503         PMINUB(%%mm1, %%mm3, %%mm5)
02504         "psubb %%mm1, %%mm2                     \n\t"
02505         "paddb %%mm7, %%mm2                     \n\t"
02506         "pcmpgtb %%mm6, %%mm2                   \n\t"
02507         "paddb %%mm2, %%mm0                     \n\t"
02508 
02509         "lea (%%"REG_a", %3, 4), %%"REG_a"      \n\t"
02510 
02511         "movq (%2, %3, 4), %%mm2                \n\t"
02512         PMAXUB(%%mm2, %%mm4)
02513         PMINUB(%%mm2, %%mm3, %%mm5)
02514         "psubb %%mm2, %%mm1                     \n\t"
02515         "paddb %%mm7, %%mm1                     \n\t"
02516         "pcmpgtb %%mm6, %%mm1                   \n\t"
02517         "paddb %%mm1, %%mm0                     \n\t"
02518 
02519         "movq (%%"REG_a"), %%mm1                \n\t"
02520         PMAXUB(%%mm1, %%mm4)
02521         PMINUB(%%mm1, %%mm3, %%mm5)
02522         "psubb %%mm1, %%mm2                     \n\t"
02523         "paddb %%mm7, %%mm2                     \n\t"
02524         "pcmpgtb %%mm6, %%mm2                   \n\t"
02525         "paddb %%mm2, %%mm0                     \n\t"
02526 
02527         "movq (%%"REG_a", %3), %%mm2            \n\t"
02528         PMAXUB(%%mm2, %%mm4)
02529         PMINUB(%%mm2, %%mm3, %%mm5)
02530         "psubb %%mm2, %%mm1                     \n\t"
02531         "paddb %%mm7, %%mm1                     \n\t"
02532         "pcmpgtb %%mm6, %%mm1                   \n\t"
02533         "paddb %%mm1, %%mm0                     \n\t"
02534 
02535         "movq (%%"REG_a", %3, 2), %%mm1         \n\t"
02536         PMAXUB(%%mm1, %%mm4)
02537         PMINUB(%%mm1, %%mm3, %%mm5)
02538         "psubb %%mm1, %%mm2                     \n\t"
02539         "paddb %%mm7, %%mm2                     \n\t"
02540         "pcmpgtb %%mm6, %%mm2                   \n\t"
02541         "paddb %%mm2, %%mm0                     \n\t"
02542 
02543         "movq (%2, %3, 8), %%mm2                \n\t"
02544         PMAXUB(%%mm2, %%mm4)
02545         PMINUB(%%mm2, %%mm3, %%mm5)
02546         "psubb %%mm2, %%mm1                     \n\t"
02547         "paddb %%mm7, %%mm1                     \n\t"
02548         "pcmpgtb %%mm6, %%mm1                   \n\t"
02549         "paddb %%mm1, %%mm0                     \n\t"
02550 
02551         "movq (%%"REG_a", %3, 4), %%mm1         \n\t"
02552         "psubb %%mm1, %%mm2                     \n\t"
02553         "paddb %%mm7, %%mm2                     \n\t"
02554         "pcmpgtb %%mm6, %%mm2                   \n\t"
02555         "paddb %%mm2, %%mm0                     \n\t"
02556         "psubusb %%mm3, %%mm4                   \n\t"
02557 
02558         "pxor %%mm6, %%mm6                      \n\t"
02559         "movq %4, %%mm7                         \n\t" 
02560         "paddusb %%mm7, %%mm7                   \n\t" 
02561         "psubusb %%mm4, %%mm7                   \n\t" 
02562         "pcmpeqb %%mm6, %%mm7                   \n\t" 
02563         "pcmpeqb %%mm6, %%mm7                   \n\t" 
02564         "movq %%mm7, %1                         \n\t"
02565 
02566         "movq %5, %%mm7                         \n\t"
02567         "punpcklbw %%mm7, %%mm7                 \n\t"
02568         "punpcklbw %%mm7, %%mm7                 \n\t"
02569         "punpcklbw %%mm7, %%mm7                 \n\t"
02570         "psubb %%mm0, %%mm6                     \n\t"
02571         "pcmpgtb %%mm7, %%mm6                   \n\t"
02572         "movq %%mm6, %0                         \n\t"
02573 
02574         : "=m" (eq_mask), "=m" (dc_mask)
02575         : "r" (src), "r" ((x86_reg)step), "m" (c->pQPb), "m"(c->ppMode.flatnessThreshold)
02576         : "%"REG_a
02577     );
02578 
02579     both_masks = dc_mask & eq_mask;
02580 
02581     if(both_masks){
02582         x86_reg offset= -8*step;
02583         int64_t *temp_sums= sums;
02584 
02585         __asm__ volatile(
02586             "movq %2, %%mm0                         \n\t"  
02587             "pxor %%mm4, %%mm4                      \n\t"
02588 
02589             "movq (%0), %%mm6                       \n\t"
02590             "movq (%0, %1), %%mm5                   \n\t"
02591             "movq %%mm5, %%mm1                      \n\t"
02592             "movq %%mm6, %%mm2                      \n\t"
02593             "psubusb %%mm6, %%mm5                   \n\t"
02594             "psubusb %%mm1, %%mm2                   \n\t"
02595             "por %%mm5, %%mm2                       \n\t" 
02596             "psubusb %%mm2, %%mm0                   \n\t" 
02597             "pcmpeqb %%mm4, %%mm0                   \n\t" 
02598 
02599             "pxor %%mm6, %%mm1                      \n\t"
02600             "pand %%mm0, %%mm1                      \n\t"
02601             "pxor %%mm1, %%mm6                      \n\t"
02602             
02603 
02604             "movq (%0, %1, 8), %%mm5                \n\t"
02605             "add %1, %0                             \n\t" 
02606             "movq (%0, %1, 8), %%mm7                \n\t"
02607             "movq %%mm5, %%mm1                      \n\t"
02608             "movq %%mm7, %%mm2                      \n\t"
02609             "psubusb %%mm7, %%mm5                   \n\t"
02610             "psubusb %%mm1, %%mm2                   \n\t"
02611             "por %%mm5, %%mm2                       \n\t" 
02612             "movq %2, %%mm0                         \n\t"  
02613             "psubusb %%mm2, %%mm0                   \n\t" 
02614             "pcmpeqb %%mm4, %%mm0                   \n\t" 
02615 
02616             "pxor %%mm7, %%mm1                      \n\t"
02617             "pand %%mm0, %%mm1                      \n\t"
02618             "pxor %%mm1, %%mm7                      \n\t"
02619 
02620             "movq %%mm6, %%mm5                      \n\t"
02621             "punpckhbw %%mm4, %%mm6                 \n\t"
02622             "punpcklbw %%mm4, %%mm5                 \n\t"
02623             
02624 
02625             "movq %%mm5, %%mm0                      \n\t"
02626             "movq %%mm6, %%mm1                      \n\t"
02627             "psllw $2, %%mm0                        \n\t"
02628             "psllw $2, %%mm1                        \n\t"
02629             "paddw "MANGLE(w04)", %%mm0             \n\t"
02630             "paddw "MANGLE(w04)", %%mm1             \n\t"
02631 
02632 #define NEXT\
02633             "movq (%0), %%mm2                       \n\t"\
02634             "movq (%0), %%mm3                       \n\t"\
02635             "add %1, %0                             \n\t"\
02636             "punpcklbw %%mm4, %%mm2                 \n\t"\
02637             "punpckhbw %%mm4, %%mm3                 \n\t"\
02638             "paddw %%mm2, %%mm0                     \n\t"\
02639             "paddw %%mm3, %%mm1                     \n\t"
02640 
02641 #define PREV\
02642             "movq (%0), %%mm2                       \n\t"\
02643             "movq (%0), %%mm3                       \n\t"\
02644             "add %1, %0                             \n\t"\
02645             "punpcklbw %%mm4, %%mm2                 \n\t"\
02646             "punpckhbw %%mm4, %%mm3                 \n\t"\
02647             "psubw %%mm2, %%mm0                     \n\t"\
02648             "psubw %%mm3, %%mm1                     \n\t"
02649 
02650 
02651             NEXT 
02652             NEXT 
02653             NEXT 
02654             "movq %%mm0, (%3)                       \n\t"
02655             "movq %%mm1, 8(%3)                      \n\t"
02656 
02657             NEXT 
02658             "psubw %%mm5, %%mm0                     \n\t"
02659             "psubw %%mm6, %%mm1                     \n\t"
02660             "movq %%mm0, 16(%3)                     \n\t"
02661             "movq %%mm1, 24(%3)                     \n\t"
02662 
02663             NEXT 
02664             "psubw %%mm5, %%mm0                     \n\t"
02665             "psubw %%mm6, %%mm1                     \n\t"
02666             "movq %%mm0, 32(%3)                     \n\t"
02667             "movq %%mm1, 40(%3)                     \n\t"
02668 
02669             NEXT 
02670             "psubw %%mm5, %%mm0                     \n\t"
02671             "psubw %%mm6, %%mm1                     \n\t"
02672             "movq %%mm0, 48(%3)                     \n\t"
02673             "movq %%mm1, 56(%3)                     \n\t"
02674 
02675             NEXT 
02676             "psubw %%mm5, %%mm0                     \n\t"
02677             "psubw %%mm6, %%mm1                     \n\t"
02678             "movq %%mm0, 64(%3)                     \n\t"
02679             "movq %%mm1, 72(%3)                     \n\t"
02680 
02681             "movq %%mm7, %%mm6                      \n\t"
02682             "punpckhbw %%mm4, %%mm7                 \n\t"
02683             "punpcklbw %%mm4, %%mm6                 \n\t"
02684 
02685             NEXT 
02686             "mov %4, %0                             \n\t"
02687             "add %1, %0                             \n\t"
02688             PREV 
02689             "movq %%mm0, 80(%3)                     \n\t"
02690             "movq %%mm1, 88(%3)                     \n\t"
02691 
02692             PREV 
02693             "paddw %%mm6, %%mm0                     \n\t"
02694             "paddw %%mm7, %%mm1                     \n\t"
02695             "movq %%mm0, 96(%3)                     \n\t"
02696             "movq %%mm1, 104(%3)                    \n\t"
02697 
02698             PREV 
02699             "paddw %%mm6, %%mm0                     \n\t"
02700             "paddw %%mm7, %%mm1                     \n\t"
02701             "movq %%mm0, 112(%3)                    \n\t"
02702             "movq %%mm1, 120(%3)                    \n\t"
02703 
02704             PREV 
02705             "paddw %%mm6, %%mm0                     \n\t"
02706             "paddw %%mm7, %%mm1                     \n\t"
02707             "movq %%mm0, 128(%3)                    \n\t"
02708             "movq %%mm1, 136(%3)                    \n\t"
02709 
02710             PREV 
02711             "paddw %%mm6, %%mm0                     \n\t"
02712             "paddw %%mm7, %%mm1                     \n\t"
02713             "movq %%mm0, 144(%3)                    \n\t"
02714             "movq %%mm1, 152(%3)                    \n\t"
02715 
02716             "mov %4, %0                             \n\t" 
02717 
02718             : "+&r"(src)
02719             : "r" ((x86_reg)step), "m" (c->pQPb), "r"(sums), "g"(src)
02720         );
02721 
02722         src+= step; 
02723 
02724         __asm__ volatile(
02725             "movq %4, %%mm6                         \n\t"
02726             "pcmpeqb %%mm5, %%mm5                   \n\t"
02727             "pxor %%mm6, %%mm5                      \n\t"
02728             "pxor %%mm7, %%mm7                      \n\t"
02729 
02730             "1:                                     \n\t"
02731             "movq (%1), %%mm0                       \n\t"
02732             "movq 8(%1), %%mm1                      \n\t"
02733             "paddw 32(%1), %%mm0                    \n\t"
02734             "paddw 40(%1), %%mm1                    \n\t"
02735             "movq (%0, %3), %%mm2                   \n\t"
02736             "movq %%mm2, %%mm3                      \n\t"
02737             "movq %%mm2, %%mm4                      \n\t"
02738             "punpcklbw %%mm7, %%mm2                 \n\t"
02739             "punpckhbw %%mm7, %%mm3                 \n\t"
02740             "paddw %%mm2, %%mm0                     \n\t"
02741             "paddw %%mm3, %%mm1                     \n\t"
02742             "paddw %%mm2, %%mm0                     \n\t"
02743             "paddw %%mm3, %%mm1                     \n\t"
02744             "psrlw $4, %%mm0                        \n\t"
02745             "psrlw $4, %%mm1                        \n\t"
02746             "packuswb %%mm1, %%mm0                  \n\t"
02747             "pand %%mm6, %%mm0                      \n\t"
02748             "pand %%mm5, %%mm4                      \n\t"
02749             "por %%mm4, %%mm0                       \n\t"
02750             "movq %%mm0, (%0, %3)                   \n\t"
02751             "add $16, %1                            \n\t"
02752             "add %2, %0                             \n\t"
02753             " js 1b                                 \n\t"
02754 
02755             : "+r"(offset), "+r"(temp_sums)
02756             : "r" ((x86_reg)step), "r"(src - offset), "m"(both_masks)
02757         );
02758     }else
02759         src+= step; 
02760 
02761     if(eq_mask != -1LL){
02762         uint8_t *temp_src= src;
02763         DECLARE_ALIGNED(8, uint64_t, tmp)[4]; 
02764         __asm__ volatile(
02765             "pxor %%mm7, %%mm7                      \n\t"
02766 
02767 
02768 
02769             "movq (%0), %%mm0                       \n\t"
02770             "movq %%mm0, %%mm1                      \n\t"
02771             "punpcklbw %%mm7, %%mm0                 \n\t" 
02772             "punpckhbw %%mm7, %%mm1                 \n\t" 
02773 
02774             "movq (%0, %1), %%mm2                   \n\t"
02775             "lea (%0, %1, 2), %%"REG_a"             \n\t"
02776             "movq %%mm2, %%mm3                      \n\t"
02777             "punpcklbw %%mm7, %%mm2                 \n\t" 
02778             "punpckhbw %%mm7, %%mm3                 \n\t" 
02779 
02780             "movq (%%"REG_a"), %%mm4                \n\t"
02781             "movq %%mm4, %%mm5                      \n\t"
02782             "punpcklbw %%mm7, %%mm4                 \n\t" 
02783             "punpckhbw %%mm7, %%mm5                 \n\t" 
02784 
02785             "paddw %%mm0, %%mm0                     \n\t" 
02786             "paddw %%mm1, %%mm1                     \n\t" 
02787             "psubw %%mm4, %%mm2                     \n\t" 
02788             "psubw %%mm5, %%mm3                     \n\t" 
02789             "psubw %%mm2, %%mm0                     \n\t" 
02790             "psubw %%mm3, %%mm1                     \n\t" 
02791 
02792             "psllw $2, %%mm2                        \n\t" 
02793             "psllw $2, %%mm3                        \n\t" 
02794             "psubw %%mm2, %%mm0                     \n\t" 
02795             "psubw %%mm3, %%mm1                     \n\t" 
02796 
02797             "movq (%%"REG_a", %1), %%mm2            \n\t"
02798             "movq %%mm2, %%mm3                      \n\t"
02799             "punpcklbw %%mm7, %%mm2                 \n\t" 
02800             "punpckhbw %%mm7, %%mm3                 \n\t" 
02801 
02802             "psubw %%mm2, %%mm0                     \n\t" 
02803             "psubw %%mm3, %%mm1                     \n\t" 
02804             "psubw %%mm2, %%mm0                     \n\t" 
02805             "psubw %%mm3, %%mm1                     \n\t" 
02806             "movq %%mm0, (%4)                       \n\t" 
02807             "movq %%mm1, 8(%4)                      \n\t" 
02808 
02809             "movq (%%"REG_a", %1, 2), %%mm0         \n\t"
02810             "movq %%mm0, %%mm1                      \n\t"
02811             "punpcklbw %%mm7, %%mm0                 \n\t" 
02812             "punpckhbw %%mm7, %%mm1                 \n\t" 
02813 
02814             "psubw %%mm0, %%mm2                     \n\t" 
02815             "psubw %%mm1, %%mm3                     \n\t" 
02816             "movq %%mm2, 16(%4)                     \n\t" 
02817             "movq %%mm3, 24(%4)                     \n\t" 
02818             "paddw %%mm4, %%mm4                     \n\t" 
02819             "paddw %%mm5, %%mm5                     \n\t" 
02820             "psubw %%mm2, %%mm4                     \n\t" 
02821             "psubw %%mm3, %%mm5                     \n\t" 
02822 
02823             "lea (%%"REG_a", %1), %0                \n\t"
02824             "psllw $2, %%mm2                        \n\t" 
02825             "psllw $2, %%mm3                        \n\t" 
02826             "psubw %%mm2, %%mm4                     \n\t" 
02827             "psubw %%mm3, %%mm5                     \n\t" 
02828 
02829             "movq (%0, %1, 2), %%mm2                \n\t"
02830             "movq %%mm2, %%mm3                      \n\t"
02831             "punpcklbw %%mm7, %%mm2                 \n\t" 
02832             "punpckhbw %%mm7, %%mm3                 \n\t" 
02833             "psubw %%mm2, %%mm4                     \n\t" 
02834             "psubw %%mm3, %%mm5                     \n\t" 
02835             "psubw %%mm2, %%mm4                     \n\t" 
02836             "psubw %%mm3, %%mm5                     \n\t" 
02837 
02838             "movq (%%"REG_a", %1, 4), %%mm6         \n\t"
02839             "punpcklbw %%mm7, %%mm6                 \n\t" 
02840             "psubw %%mm6, %%mm2                     \n\t" 
02841             "movq (%%"REG_a", %1, 4), %%mm6         \n\t"
02842             "punpckhbw %%mm7, %%mm6                 \n\t" 
02843             "psubw %%mm6, %%mm3                     \n\t" 
02844 
02845             "paddw %%mm0, %%mm0                     \n\t" 
02846             "paddw %%mm1, %%mm1                     \n\t" 
02847             "psubw %%mm2, %%mm0                     \n\t" 
02848             "psubw %%mm3, %%mm1                     \n\t" 
02849 
02850             "psllw $2, %%mm2                        \n\t" 
02851             "psllw $2, %%mm3                        \n\t" 
02852             "psubw %%mm2, %%mm0                     \n\t" 
02853             "psubw %%mm3, %%mm1                     \n\t" 
02854 
02855             "movq (%0, %1, 4), %%mm2                \n\t"
02856             "movq %%mm2, %%mm3                      \n\t"
02857             "punpcklbw %%mm7, %%mm2                 \n\t" 
02858             "punpckhbw %%mm7, %%mm3                 \n\t" 
02859 
02860             "paddw %%mm2, %%mm2                     \n\t" 
02861             "paddw %%mm3, %%mm3                     \n\t" 
02862             "psubw %%mm2, %%mm0                     \n\t" 
02863             "psubw %%mm3, %%mm1                     \n\t" 
02864 
02865             "movq (%4), %%mm2                       \n\t" 
02866             "movq 8(%4), %%mm3                      \n\t" 
02867 
02868 #if HAVE_MMX2
02869             "movq %%mm7, %%mm6                      \n\t" 
02870             "psubw %%mm0, %%mm6                     \n\t"
02871             "pmaxsw %%mm6, %%mm0                    \n\t" 
02872             "movq %%mm7, %%mm6                      \n\t" 
02873             "psubw %%mm1, %%mm6                     \n\t"
02874             "pmaxsw %%mm6, %%mm1                    \n\t" 
02875             "movq %%mm7, %%mm6                      \n\t" 
02876             "psubw %%mm2, %%mm6                     \n\t"
02877             "pmaxsw %%mm6, %%mm2                    \n\t" 
02878             "movq %%mm7, %%mm6                      \n\t" 
02879             "psubw %%mm3, %%mm6                     \n\t"
02880             "pmaxsw %%mm6, %%mm3                    \n\t" 
02881 #else
02882             "movq %%mm7, %%mm6                      \n\t" 
02883             "pcmpgtw %%mm0, %%mm6                   \n\t"
02884             "pxor %%mm6, %%mm0                      \n\t"
02885             "psubw %%mm6, %%mm0                     \n\t" 
02886             "movq %%mm7, %%mm6                      \n\t" 
02887             "pcmpgtw %%mm1, %%mm6                   \n\t"
02888             "pxor %%mm6, %%mm1                      \n\t"
02889             "psubw %%mm6, %%mm1                     \n\t" 
02890             "movq %%mm7, %%mm6                      \n\t" 
02891             "pcmpgtw %%mm2, %%mm6                   \n\t"
02892             "pxor %%mm6, %%mm2                      \n\t"
02893             "psubw %%mm6, %%mm2                     \n\t" 
02894             "movq %%mm7, %%mm6                      \n\t" 
02895             "pcmpgtw %%mm3, %%mm6                   \n\t"
02896             "pxor %%mm6, %%mm3                      \n\t"
02897             "psubw %%mm6, %%mm3                     \n\t" 
02898 #endif
02899 
02900 #if HAVE_MMX2
02901             "pminsw %%mm2, %%mm0                    \n\t"
02902             "pminsw %%mm3, %%mm1                    \n\t"
02903 #else
02904             "movq %%mm0, %%mm6                      \n\t"
02905             "psubusw %%mm2, %%mm6                   \n\t"
02906             "psubw %%mm6, %%mm0                     \n\t"
02907             "movq %%mm1, %%mm6                      \n\t"
02908             "psubusw %%mm3, %%mm6                   \n\t"
02909             "psubw %%mm6, %%mm1                     \n\t"
02910 #endif
02911 
02912             "movd %2, %%mm2                         \n\t" 
02913             "punpcklbw %%mm7, %%mm2                 \n\t"
02914 
02915             "movq %%mm7, %%mm6                      \n\t" 
02916             "pcmpgtw %%mm4, %%mm6                   \n\t" 
02917             "pxor %%mm6, %%mm4                      \n\t"
02918             "psubw %%mm6, %%mm4                     \n\t" 
02919             "pcmpgtw %%mm5, %%mm7                   \n\t" 
02920             "pxor %%mm7, %%mm5                      \n\t"
02921             "psubw %%mm7, %%mm5                     \n\t" 
02922 
02923             "psllw $3, %%mm2                        \n\t" 
02924             "movq %%mm2, %%mm3                      \n\t" 
02925             "pcmpgtw %%mm4, %%mm2                   \n\t"
02926             "pcmpgtw %%mm5, %%mm3                   \n\t"
02927             "pand %%mm2, %%mm4                      \n\t"
02928             "pand %%mm3, %%mm5                      \n\t"
02929 
02930 
02931             "psubusw %%mm0, %%mm4                   \n\t" 
02932             "psubusw %%mm1, %%mm5                   \n\t" 
02933 
02934 
02935             "movq "MANGLE(w05)", %%mm2              \n\t" 
02936             "pmullw %%mm2, %%mm4                    \n\t"
02937             "pmullw %%mm2, %%mm5                    \n\t"
02938             "movq "MANGLE(w20)", %%mm2              \n\t" 
02939             "paddw %%mm2, %%mm4                     \n\t"
02940             "paddw %%mm2, %%mm5                     \n\t"
02941             "psrlw $6, %%mm4                        \n\t"
02942             "psrlw $6, %%mm5                        \n\t"
02943 
02944             "movq 16(%4), %%mm0                     \n\t" 
02945             "movq 24(%4), %%mm1                     \n\t" 
02946 
02947             "pxor %%mm2, %%mm2                      \n\t"
02948             "pxor %%mm3, %%mm3                      \n\t"
02949 
02950             "pcmpgtw %%mm0, %%mm2                   \n\t" 
02951             "pcmpgtw %%mm1, %%mm3                   \n\t" 
02952             "pxor %%mm2, %%mm0                      \n\t"
02953             "pxor %%mm3, %%mm1                      \n\t"
02954             "psubw %%mm2, %%mm0                     \n\t" 
02955             "psubw %%mm3, %%mm1                     \n\t" 
02956             "psrlw $1, %%mm0                        \n\t" 
02957             "psrlw $1, %%mm1                        \n\t" 
02958 
02959             "pxor %%mm6, %%mm2                      \n\t"
02960             "pxor %%mm7, %%mm3                      \n\t"
02961             "pand %%mm2, %%mm4                      \n\t"
02962             "pand %%mm3, %%mm5                      \n\t"
02963 
02964 #if HAVE_MMX2
02965             "pminsw %%mm0, %%mm4                    \n\t"
02966             "pminsw %%mm1, %%mm5                    \n\t"
02967 #else
02968             "movq %%mm4, %%mm2                      \n\t"
02969             "psubusw %%mm0, %%mm2                   \n\t"
02970             "psubw %%mm2, %%mm4                     \n\t"
02971             "movq %%mm5, %%mm2                      \n\t"
02972             "psubusw %%mm1, %%mm2                   \n\t"
02973             "psubw %%mm2, %%mm5                     \n\t"
02974 #endif
02975             "pxor %%mm6, %%mm4                      \n\t"
02976             "pxor %%mm7, %%mm5                      \n\t"
02977             "psubw %%mm6, %%mm4                     \n\t"
02978             "psubw %%mm7, %%mm5                     \n\t"
02979             "packsswb %%mm5, %%mm4                  \n\t"
02980             "movq %3, %%mm1                         \n\t"
02981             "pandn %%mm4, %%mm1                     \n\t"
02982             "movq (%0), %%mm0                       \n\t"
02983             "paddb   %%mm1, %%mm0                   \n\t"
02984             "movq %%mm0, (%0)                       \n\t"
02985             "movq (%0, %1), %%mm0                   \n\t"
02986             "psubb %%mm1, %%mm0                     \n\t"
02987             "movq %%mm0, (%0, %1)                   \n\t"
02988 
02989             : "+r" (temp_src)
02990             : "r" ((x86_reg)step), "m" (c->pQPb), "m"(eq_mask), "r"(tmp)
02991             : "%"REG_a
02992         );
02993     }
02994 
02995 
02996 
02997 
02998 
02999 }
03000 #endif //HAVE_MMX
03001 
03002 static void RENAME(postProcess)(const uint8_t src[], int srcStride, uint8_t dst[], int dstStride, int width, int height,
03003                                 const QP_STORE_T QPs[], int QPStride, int isColor, PPContext *c);
03004 
03009 #undef REAL_SCALED_CPY
03010 #undef SCALED_CPY
03011 
03012 static inline void RENAME(blockCopy)(uint8_t dst[], int dstStride, const uint8_t src[], int srcStride,
03013                                      int levelFix, int64_t *packedOffsetAndScale)
03014 {
03015 #if !HAVE_MMX
03016     int i;
03017 #endif
03018     if(levelFix){
03019 #if HAVE_MMX
03020     __asm__ volatile(
03021         "movq (%%"REG_a"), %%mm2        \n\t" 
03022         "movq 8(%%"REG_a"), %%mm3       \n\t" 
03023         "lea (%2,%4), %%"REG_a"         \n\t"
03024         "lea (%3,%5), %%"REG_d"         \n\t"
03025         "pxor %%mm4, %%mm4              \n\t"
03026 #if HAVE_MMX2
03027 #define REAL_SCALED_CPY(src1, src2, dst1, dst2)                                                \
03028         "movq " #src1 ", %%mm0          \n\t"\
03029         "movq " #src1 ", %%mm5          \n\t"\
03030         "movq " #src2 ", %%mm1          \n\t"\
03031         "movq " #src2 ", %%mm6          \n\t"\
03032         "punpcklbw %%mm0, %%mm0         \n\t"\
03033         "punpckhbw %%mm5, %%mm5         \n\t"\
03034         "punpcklbw %%mm1, %%mm1         \n\t"\
03035         "punpckhbw %%mm6, %%mm6         \n\t"\
03036         "pmulhuw %%mm3, %%mm0           \n\t"\
03037         "pmulhuw %%mm3, %%mm5           \n\t"\
03038         "pmulhuw %%mm3, %%mm1           \n\t"\
03039         "pmulhuw %%mm3, %%mm6           \n\t"\
03040         "psubw %%mm2, %%mm0             \n\t"\
03041         "psubw %%mm2, %%mm5             \n\t"\
03042         "psubw %%mm2, %%mm1             \n\t"\
03043         "psubw %%mm2, %%mm6             \n\t"\
03044         "packuswb %%mm5, %%mm0          \n\t"\
03045         "packuswb %%mm6, %%mm1          \n\t"\
03046         "movq %%mm0, " #dst1 "          \n\t"\
03047         "movq %%mm1, " #dst2 "          \n\t"\
03048 
03049 #else //HAVE_MMX2
03050 #define REAL_SCALED_CPY(src1, src2, dst1, dst2)                                        \
03051         "movq " #src1 ", %%mm0          \n\t"\
03052         "movq " #src1 ", %%mm5          \n\t"\
03053         "punpcklbw %%mm4, %%mm0         \n\t"\
03054         "punpckhbw %%mm4, %%mm5         \n\t"\
03055         "psubw %%mm2, %%mm0             \n\t"\
03056         "psubw %%mm2, %%mm5             \n\t"\
03057         "movq " #src2 ", %%mm1          \n\t"\
03058         "psllw $6, %%mm0                \n\t"\
03059         "psllw $6, %%mm5                \n\t"\
03060         "pmulhw %%mm3, %%mm0            \n\t"\
03061         "movq " #src2 ", %%mm6          \n\t"\
03062         "pmulhw %%mm3, %%mm5            \n\t"\
03063         "punpcklbw %%mm4, %%mm1         \n\t"\
03064         "punpckhbw %%mm4, %%mm6         \n\t"\
03065         "psubw %%mm2, %%mm1             \n\t"\
03066         "psubw %%mm2, %%mm6             \n\t"\
03067         "psllw $6, %%mm1                \n\t"\
03068         "psllw $6, %%mm6                \n\t"\
03069         "pmulhw %%mm3, %%mm1            \n\t"\
03070         "pmulhw %%mm3, %%mm6            \n\t"\
03071         "packuswb %%mm5, %%mm0          \n\t"\
03072         "packuswb %%mm6, %%mm1          \n\t"\
03073         "movq %%mm0, " #dst1 "          \n\t"\
03074         "movq %%mm1, " #dst2 "          \n\t"\
03075 
03076 #endif //HAVE_MMX2
03077 #define SCALED_CPY(src1, src2, dst1, dst2)\
03078    REAL_SCALED_CPY(src1, src2, dst1, dst2)
03079 
03080 SCALED_CPY((%2)       , (%2, %4)      , (%3)       , (%3, %5))
03081 SCALED_CPY((%2, %4, 2), (%%REGa, %4, 2), (%3, %5, 2), (%%REGd, %5, 2))
03082 SCALED_CPY((%2, %4, 4), (%%REGa, %4, 4), (%3, %5, 4), (%%REGd, %5, 4))
03083         "lea (%%"REG_a",%4,4), %%"REG_a"        \n\t"
03084         "lea (%%"REG_d",%5,4), %%"REG_d"        \n\t"
03085 SCALED_CPY((%%REGa, %4), (%%REGa, %4, 2), (%%REGd, %5), (%%REGd, %5, 2))
03086 
03087 
03088         : "=&a" (packedOffsetAndScale)
03089         : "0" (packedOffsetAndScale),
03090         "r"(src),
03091         "r"(dst),
03092         "r" ((x86_reg)srcStride),
03093         "r" ((x86_reg)dstStride)
03094         : "%"REG_d
03095     );
03096 #else //HAVE_MMX
03097     for(i=0; i<8; i++)
03098         memcpy( &(dst[dstStride*i]),
03099                 &(src[srcStride*i]), BLOCK_SIZE);
03100 #endif //HAVE_MMX
03101     }else{
03102 #if HAVE_MMX
03103     __asm__ volatile(
03104         "lea (%0,%2), %%"REG_a"                 \n\t"
03105         "lea (%1,%3), %%"REG_d"                 \n\t"
03106 
03107 #define REAL_SIMPLE_CPY(src1, src2, dst1, dst2)                              \
03108         "movq " #src1 ", %%mm0          \n\t"\
03109         "movq " #src2 ", %%mm1          \n\t"\
03110         "movq %%mm0, " #dst1 "          \n\t"\
03111         "movq %%mm1, " #dst2 "          \n\t"\
03112 
03113 #define SIMPLE_CPY(src1, src2, dst1, dst2)\
03114    REAL_SIMPLE_CPY(src1, src2, dst1, dst2)
03115 
03116 SIMPLE_CPY((%0)       , (%0, %2)       , (%1)       , (%1, %3))
03117 SIMPLE_CPY((%0, %2, 2), (%%REGa, %2, 2), (%1, %3, 2), (%%REGd, %3, 2))
03118 SIMPLE_CPY((%0, %2, 4), (%%REGa, %2, 4), (%1, %3, 4), (%%REGd, %3, 4))
03119         "lea (%%"REG_a",%2,4), %%"REG_a"        \n\t"
03120         "lea (%%"REG_d",%3,4), %%"REG_d"        \n\t"
03121 SIMPLE_CPY((%%REGa, %2), (%%REGa, %2, 2), (%%REGd, %3), (%%REGd, %3, 2))
03122 
03123         : : "r" (src),
03124         "r" (dst),
03125         "r" ((x86_reg)srcStride),
03126         "r" ((x86_reg)dstStride)
03127         : "%"REG_a, "%"REG_d
03128     );
03129 #else //HAVE_MMX
03130     for(i=0; i<8; i++)
03131         memcpy( &(dst[dstStride*i]),
03132                 &(src[srcStride*i]), BLOCK_SIZE);
03133 #endif //HAVE_MMX
03134     }
03135 }
03136 
03140 static inline void RENAME(duplicate)(uint8_t src[], int stride)
03141 {
03142 #if HAVE_MMX
03143     __asm__ volatile(
03144         "movq (%0), %%mm0               \n\t"
03145         "add %1, %0                     \n\t"
03146         "movq %%mm0, (%0)               \n\t"
03147         "movq %%mm0, (%0, %1)           \n\t"
03148         "movq %%mm0, (%0, %1, 2)        \n\t"
03149         : "+r" (src)
03150         : "r" ((x86_reg)-stride)
03151     );
03152 #else
03153     int i;
03154     uint8_t *p=src;
03155     for(i=0; i<3; i++){
03156         p-= stride;
03157         memcpy(p, src, 8);
03158     }
03159 #endif
03160 }
03161 
03165 static void RENAME(postProcess)(const uint8_t src[], int srcStride, uint8_t dst[], int dstStride, int width, int height,
03166                                 const QP_STORE_T QPs[], int QPStride, int isColor, PPContext *c2)
03167 {
03168     DECLARE_ALIGNED(8, PPContext, c)= *c2; 
03169     int x,y;
03170 #ifdef COMPILE_TIME_MODE
03171     const int mode= COMPILE_TIME_MODE;
03172 #else
03173     const int mode= isColor ? c.ppMode.chromMode : c.ppMode.lumMode;
03174 #endif
03175     int black=0, white=255; 
03176     int QPCorrecture= 256*256;
03177 
03178     int copyAhead;
03179 #if HAVE_MMX
03180     int i;
03181 #endif
03182 
03183     const int qpHShift= isColor ? 4-c.hChromaSubSample : 4;
03184     const int qpVShift= isColor ? 4-c.vChromaSubSample : 4;
03185 
03186     
03187     uint64_t * const yHistogram= c.yHistogram;
03188     uint8_t * const tempSrc= srcStride > 0 ? c.tempSrc : c.tempSrc - 23*srcStride;
03189     uint8_t * const tempDst= dstStride > 0 ? c.tempDst : c.tempDst - 23*dstStride;
03190     
03191 
03192 #if HAVE_MMX
03193     for(i=0; i<57; i++){
03194         int offset= ((i*c.ppMode.baseDcDiff)>>8) + 1;
03195         int threshold= offset*2 + 1;
03196         c.mmxDcOffset[i]= 0x7F - offset;
03197         c.mmxDcThreshold[i]= 0x7F - threshold;
03198         c.mmxDcOffset[i]*= 0x0101010101010101LL;
03199         c.mmxDcThreshold[i]*= 0x0101010101010101LL;
03200     }
03201 #endif
03202 
03203     if(mode & CUBIC_IPOL_DEINT_FILTER) copyAhead=16;
03204     else if(   (mode & LINEAR_BLEND_DEINT_FILTER)
03205             || (mode & FFMPEG_DEINT_FILTER)
03206             || (mode & LOWPASS5_DEINT_FILTER)) copyAhead=14;
03207     else if(   (mode & V_DEBLOCK)
03208             || (mode & LINEAR_IPOL_DEINT_FILTER)
03209             || (mode & MEDIAN_DEINT_FILTER)
03210             || (mode & V_A_DEBLOCK)) copyAhead=13;
03211     else if(mode & V_X1_FILTER) copyAhead=11;
03212 
03213     else if(mode & DERING) copyAhead=9;
03214     else copyAhead=8;
03215 
03216     copyAhead-= 8;
03217 
03218     if(!isColor){
03219         uint64_t sum= 0;
03220         int i;
03221         uint64_t maxClipped;
03222         uint64_t clipped;
03223         double scale;
03224 
03225         c.frameNum++;
03226         
03227         if(c.frameNum == 1) yHistogram[0]= width*height/64*15/256;
03228 
03229         for(i=0; i<256; i++){
03230             sum+= yHistogram[i];
03231         }
03232 
03233         
03234         maxClipped= (uint64_t)(sum * c.ppMode.maxClippedThreshold);
03235 
03236         clipped= sum;
03237         for(black=255; black>0; black--){
03238             if(clipped < maxClipped) break;
03239             clipped-= yHistogram[black];
03240         }
03241 
03242         clipped= sum;
03243         for(white=0; white<256; white++){
03244             if(clipped < maxClipped) break;
03245             clipped-= yHistogram[white];
03246         }
03247 
03248         scale= (double)(c.ppMode.maxAllowedY - c.ppMode.minAllowedY) / (double)(white-black);
03249 
03250 #if HAVE_MMX2
03251         c.packedYScale= (uint16_t)(scale*256.0 + 0.5);
03252         c.packedYOffset= (((black*c.packedYScale)>>8) - c.ppMode.minAllowedY) & 0xFFFF;
03253 #else
03254         c.packedYScale= (uint16_t)(scale*1024.0 + 0.5);
03255         c.packedYOffset= (black - c.ppMode.minAllowedY) & 0xFFFF;
03256 #endif
03257 
03258         c.packedYOffset|= c.packedYOffset<<32;
03259         c.packedYOffset|= c.packedYOffset<<16;
03260 
03261         c.packedYScale|= c.packedYScale<<32;
03262         c.packedYScale|= c.packedYScale<<16;
03263 
03264         if(mode & LEVEL_FIX)        QPCorrecture= (int)(scale*256*256 + 0.5);
03265         else                        QPCorrecture= 256*256;
03266     }else{
03267         c.packedYScale= 0x0100010001000100LL;
03268         c.packedYOffset= 0;
03269         QPCorrecture= 256*256;
03270     }
03271 
03272     
03273     y=-BLOCK_SIZE;
03274     {
03275         const uint8_t *srcBlock= &(src[y*srcStride]);
03276         uint8_t *dstBlock= tempDst + dstStride;
03277 
03278         
03279         
03280         
03281         for(x=0; x<width; x+=BLOCK_SIZE){
03282 
03283 #if HAVE_MMX2
03284 
03285 
03286 
03287 
03288 
03289 
03290 
03291             __asm__(
03292                 "mov %4, %%"REG_a"              \n\t"
03293                 "shr $2, %%"REG_a"              \n\t"
03294                 "and $6, %%"REG_a"              \n\t"
03295                 "add %5, %%"REG_a"              \n\t"
03296                 "mov %%"REG_a", %%"REG_d"       \n\t"
03297                 "imul %1, %%"REG_a"             \n\t"
03298                 "imul %3, %%"REG_d"             \n\t"
03299                 "prefetchnta 32(%%"REG_a", %0)  \n\t"
03300                 "prefetcht0 32(%%"REG_d", %2)   \n\t"
03301                 "add %1, %%"REG_a"              \n\t"
03302                 "add %3, %%"REG_d"              \n\t"
03303                 "prefetchnta 32(%%"REG_a", %0)  \n\t"
03304                 "prefetcht0 32(%%"REG_d", %2)   \n\t"
03305                 :: "r" (srcBlock), "r" ((x86_reg)srcStride), "r" (dstBlock), "r" ((x86_reg)dstStride),
03306                 "g" ((x86_reg)x), "g" ((x86_reg)copyAhead)
03307                 : "%"REG_a, "%"REG_d
03308             );
03309 
03310 #elif HAVE_AMD3DNOW
03311 
03312 
03313 
03314 
03315 
03316 
03317 #endif
03318 
03319             RENAME(blockCopy)(dstBlock + dstStride*8, dstStride,
03320                               srcBlock + srcStride*8, srcStride, mode & LEVEL_FIX, &c.packedYOffset);
03321 
03322             RENAME(duplicate)(dstBlock + dstStride*8, dstStride);
03323 
03324             if(mode & LINEAR_IPOL_DEINT_FILTER)
03325                 RENAME(deInterlaceInterpolateLinear)(dstBlock, dstStride);
03326             else if(mode & LINEAR_BLEND_DEINT_FILTER)
03327                 RENAME(deInterlaceBlendLinear)(dstBlock, dstStride, c.deintTemp + x);
03328             else if(mode & MEDIAN_DEINT_FILTER)
03329                 RENAME(deInterlaceMedian)(dstBlock, dstStride);
03330             else if(mode & CUBIC_IPOL_DEINT_FILTER)
03331                 RENAME(deInterlaceInterpolateCubic)(dstBlock, dstStride);
03332             else if(mode & FFMPEG_DEINT_FILTER)
03333                 RENAME(deInterlaceFF)(dstBlock, dstStride, c.deintTemp + x);
03334             else if(mode & LOWPASS5_DEINT_FILTER)
03335                 RENAME(deInterlaceL5)(dstBlock, dstStride, c.deintTemp + x, c.deintTemp + width + x);
03336 
03337 
03338 
03339             dstBlock+=8;
03340             srcBlock+=8;
03341         }
03342         if(width==FFABS(dstStride))
03343             linecpy(dst, tempDst + 9*dstStride, copyAhead, dstStride);
03344         else{
03345             int i;
03346             for(i=0; i<copyAhead; i++){
03347                 memcpy(dst + i*dstStride, tempDst + (9+i)*dstStride, width);
03348             }
03349         }
03350     }
03351 
03352     for(y=0; y<height; y+=BLOCK_SIZE){
03353         
03354         const uint8_t *srcBlock= &(src[y*srcStride]);
03355         uint8_t *dstBlock= &(dst[y*dstStride]);
03356 #if HAVE_MMX
03357         uint8_t *tempBlock1= c.tempBlocks;
03358         uint8_t *tempBlock2= c.tempBlocks + 8;
03359 #endif
03360         const int8_t *QPptr= &QPs[(y>>qpVShift)*QPStride];
03361         int8_t *nonBQPptr= &c.nonBQPTable[(y>>qpVShift)*FFABS(QPStride)];
03362         int QP=0;
03363         
03364 
03365         if(y+15 >= height){
03366             int i;
03367             
03368 
03369             linecpy(tempSrc + srcStride*copyAhead, srcBlock + srcStride*copyAhead,
03370                     FFMAX(height-y-copyAhead, 0), srcStride);
03371 
03372             
03373             for(i=FFMAX(height-y, 8); i<copyAhead+8; i++)
03374                     memcpy(tempSrc + srcStride*i, src + srcStride*(height-1), FFABS(srcStride));
03375 
03376             
03377             linecpy(tempDst, dstBlock - dstStride, FFMIN(height-y+1, copyAhead+1), dstStride);
03378 
03379             
03380             for(i=height-y+1; i<=copyAhead; i++)
03381                     memcpy(tempDst + dstStride*i, dst + dstStride*(height-1), FFABS(dstStride));
03382 
03383             dstBlock= tempDst + dstStride;
03384             srcBlock= tempSrc;
03385         }
03386 
03387         
03388         
03389         
03390         for(x=0; x<width; x+=BLOCK_SIZE){
03391             const int stride= dstStride;
03392 #if HAVE_MMX
03393             uint8_t *tmpXchg;
03394 #endif
03395             if(isColor){
03396                 QP= QPptr[x>>qpHShift];
03397                 c.nonBQP= nonBQPptr[x>>qpHShift];
03398             }else{
03399                 QP= QPptr[x>>4];
03400                 QP= (QP* QPCorrecture + 256*128)>>16;
03401                 c.nonBQP= nonBQPptr[x>>4];
03402                 c.nonBQP= (c.nonBQP* QPCorrecture + 256*128)>>16;
03403                 yHistogram[ srcBlock[srcStride*12 + 4] ]++;
03404             }
03405             c.QP= QP;
03406 #if HAVE_MMX
03407             __asm__ volatile(
03408                 "movd %1, %%mm7         \n\t"
03409                 "packuswb %%mm7, %%mm7  \n\t" 
03410                 "packuswb %%mm7, %%mm7  \n\t" 
03411                 "packuswb %%mm7, %%mm7  \n\t" 
03412                 "movq %%mm7, %0         \n\t"
03413                 : "=m" (c.pQPb)
03414                 : "r" (QP)
03415             );
03416 #endif
03417 
03418 
03419 #if HAVE_MMX2
03420 
03421 
03422 
03423 
03424 
03425 
03426 
03427             __asm__(
03428                 "mov %4, %%"REG_a"              \n\t"
03429                 "shr $2, %%"REG_a"              \n\t"
03430                 "and $6, %%"REG_a"              \n\t"
03431                 "add %5, %%"REG_a"              \n\t"
03432                 "mov %%"REG_a", %%"REG_d"       \n\t"
03433                 "imul %1, %%"REG_a"             \n\t"
03434                 "imul %3, %%"REG_d"             \n\t"
03435                 "prefetchnta 32(%%"REG_a", %0)  \n\t"
03436                 "prefetcht0 32(%%"REG_d", %2)   \n\t"
03437                 "add %1, %%"REG_a"              \n\t"
03438                 "add %3, %%"REG_d"              \n\t"
03439                 "prefetchnta 32(%%"REG_a", %0)  \n\t"
03440                 "prefetcht0 32(%%"REG_d", %2)   \n\t"
03441                 :: "r" (srcBlock), "r" ((x86_reg)srcStride), "r" (dstBlock), "r" ((x86_reg)dstStride),
03442                 "g" ((x86_reg)x), "g" ((x86_reg)copyAhead)
03443                 : "%"REG_a, "%"REG_d
03444             );
03445 
03446 #elif HAVE_AMD3DNOW
03447 
03448 
03449 
03450 
03451 
03452 
03453 #endif
03454 
03455             RENAME(blockCopy)(dstBlock + dstStride*copyAhead, dstStride,
03456                               srcBlock + srcStride*copyAhead, srcStride, mode & LEVEL_FIX, &c.packedYOffset);
03457 
03458             if(mode & LINEAR_IPOL_DEINT_FILTER)
03459                 RENAME(deInterlaceInterpolateLinear)(dstBlock, dstStride);
03460             else if(mode & LINEAR_BLEND_DEINT_FILTER)
03461                 RENAME(deInterlaceBlendLinear)(dstBlock, dstStride, c.deintTemp + x);
03462             else if(mode & MEDIAN_DEINT_FILTER)
03463                 RENAME(deInterlaceMedian)(dstBlock, dstStride);
03464             else if(mode & CUBIC_IPOL_DEINT_FILTER)
03465                 RENAME(deInterlaceInterpolateCubic)(dstBlock, dstStride);
03466             else if(mode & FFMPEG_DEINT_FILTER)
03467                 RENAME(deInterlaceFF)(dstBlock, dstStride, c.deintTemp + x);
03468             else if(mode & LOWPASS5_DEINT_FILTER)
03469                 RENAME(deInterlaceL5)(dstBlock, dstStride, c.deintTemp + x, c.deintTemp + width + x);
03470 
03471 
03472 
03473 
03474             
03475             if(y + 8 < height){
03476                 if(mode & V_X1_FILTER)
03477                     RENAME(vertX1Filter)(dstBlock, stride, &c);
03478                 else if(mode & V_DEBLOCK){
03479                     const int t= RENAME(vertClassify)(dstBlock, stride, &c);
03480 
03481                     if(t==1)
03482                         RENAME(doVertLowPass)(dstBlock, stride, &c);
03483                     else if(t==2)
03484                         RENAME(doVertDefFilter)(dstBlock, stride, &c);
03485                 }else if(mode & V_A_DEBLOCK){
03486                     RENAME(do_a_deblock)(dstBlock, stride, 1, &c);
03487                 }
03488             }
03489 
03490 #if HAVE_MMX
03491             RENAME(transpose1)(tempBlock1, tempBlock2, dstBlock, dstStride);
03492 #endif
03493             
03494             if(x - 8 >= 0){
03495 #if HAVE_MMX
03496                 if(mode & H_X1_FILTER)
03497                         RENAME(vertX1Filter)(tempBlock1, 16, &c);
03498                 else if(mode & H_DEBLOCK){
03499 
03500                     const int t= RENAME(vertClassify)(tempBlock1, 16, &c);
03501 
03502                     if(t==1)
03503                         RENAME(doVertLowPass)(tempBlock1, 16, &c);
03504                     else if(t==2)
03505                         RENAME(doVertDefFilter)(tempBlock1, 16, &c);
03506                 }else if(mode & H_A_DEBLOCK){
03507                         RENAME(do_a_deblock)(tempBlock1, 16, 1, &c);
03508                 }
03509 
03510                 RENAME(transpose2)(dstBlock-4, dstStride, tempBlock1 + 4*16);
03511 
03512 #else
03513                 if(mode & H_X1_FILTER)
03514                     horizX1Filter(dstBlock-4, stride, QP);
03515                 else if(mode & H_DEBLOCK){
03516 #if HAVE_ALTIVEC
03517                     DECLARE_ALIGNED(16, unsigned char, tempBlock)[272];
03518                     transpose_16x8_char_toPackedAlign_altivec(tempBlock, dstBlock - (4 + 1), stride);
03519 
03520                     const int t=vertClassify_altivec(tempBlock-48, 16, &c);
03521                     if(t==1) {
03522                         doVertLowPass_altivec(tempBlock-48, 16, &c);
03523                         transpose_8x16_char_fromPackedAlign_altivec(dstBlock - (4 + 1), tempBlock, stride);
03524                     }
03525                     else if(t==2) {
03526                         doVertDefFilter_altivec(tempBlock-48, 16, &c);
03527                         transpose_8x16_char_fromPackedAlign_altivec(dstBlock - (4 + 1), tempBlock, stride);
03528                     }
03529 #else
03530                     const int t= RENAME(horizClassify)(dstBlock-4, stride, &c);
03531 
03532                     if(t==1)
03533                         RENAME(doHorizLowPass)(dstBlock-4, stride, &c);
03534                     else if(t==2)
03535                         RENAME(doHorizDefFilter)(dstBlock-4, stride, &c);
03536 #endif
03537                 }else if(mode & H_A_DEBLOCK){
03538                     RENAME(do_a_deblock)(dstBlock-8, 1, stride, &c);
03539                 }
03540 #endif //HAVE_MMX
03541                 if(mode & DERING){
03542                 
03543                     if(y>0) RENAME(dering)(dstBlock - stride - 8, stride, &c);
03544                 }
03545 
03546                 if(mode & TEMP_NOISE_FILTER)
03547                 {
03548                     RENAME(tempNoiseReducer)(dstBlock-8, stride,
03549                             c.tempBlurred[isColor] + y*dstStride + x,
03550                             c.tempBlurredPast[isColor] + (y>>3)*256 + (x>>3),
03551                             c.ppMode.maxTmpNoise);
03552                 }
03553             }
03554 
03555             dstBlock+=8;
03556             srcBlock+=8;
03557 
03558 #if HAVE_MMX
03559             tmpXchg= tempBlock1;
03560             tempBlock1= tempBlock2;
03561             tempBlock2 = tmpXchg;
03562 #endif
03563         }
03564 
03565         if(mode & DERING){
03566             if(y > 0) RENAME(dering)(dstBlock - dstStride - 8, dstStride, &c);
03567         }
03568 
03569         if((mode & TEMP_NOISE_FILTER)){
03570             RENAME(tempNoiseReducer)(dstBlock-8, dstStride,
03571                     c.tempBlurred[isColor] + y*dstStride + x,
03572                     c.tempBlurredPast[isColor] + (y>>3)*256 + (x>>3),
03573                     c.ppMode.maxTmpNoise);
03574         }
03575 
03576         
03577         if(y+15 >= height){
03578             uint8_t *dstBlock= &(dst[y*dstStride]);
03579             if(width==FFABS(dstStride))
03580                 linecpy(dstBlock, tempDst + dstStride, height-y, dstStride);
03581             else{
03582                 int i;
03583                 for(i=0; i<height-y; i++){
03584                     memcpy(dstBlock + i*dstStride, tempDst + (i+1)*dstStride, width);
03585                 }
03586             }
03587         }
03588 
03589 
03590 
03591 
03592 
03593 
03594 
03595 
03596 
03597     }
03598 #if   HAVE_AMD3DNOW
03599     __asm__ volatile("femms");
03600 #elif HAVE_MMX
03601     __asm__ volatile("emms");
03602 #endif
03603 
03604 #ifdef DEBUG_BRIGHTNESS
03605     if(!isColor){
03606         int max=1;
03607         int i;
03608         for(i=0; i<256; i++)
03609             if(yHistogram[i] > max) max=yHistogram[i];
03610 
03611         for(i=1; i<256; i++){
03612             int x;
03613             int start=yHistogram[i-1]/(max/256+1);
03614             int end=yHistogram[i]/(max/256+1);
03615             int inc= end > start ? 1 : -1;
03616             for(x=start; x!=end+inc; x+=inc)
03617                 dst[ i*dstStride + x]+=128;
03618         }
03619 
03620         for(i=0; i<100; i+=2){
03621             dst[ (white)*dstStride + i]+=128;
03622             dst[ (black)*dstStride + i]+=128;
03623         }
03624     }
03625 #endif
03626 
03627     *c2= c; 
03628 
03629 }