FFmpeg: libpostproc/postprocess.c Source File

00001 /*
00002  * Copyright (C) 2001-2003 Michael Niedermayer (michaelni@gmx.at)
00003  *
00004  * AltiVec optimizations (C) 2004 Romain Dolbeau <romain@dolbeau.org>
00005  *
00006  * This file is part of FFmpeg.
00007  *
00008  * FFmpeg is free software; you can redistribute it and/or modify
00009  * it under the terms of the GNU General Public License as published by
00010  * the Free Software Foundation; either version 2 of the License, or
00011  * (at your option) any later version.
00012  *
00013  * FFmpeg is distributed in the hope that it will be useful,
00014  * but WITHOUT ANY WARRANTY; without even the implied warranty of
00015  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00016  * GNU General Public License for more details.
00017  *
00018  * You should have received a copy of the GNU General Public License
00019  * along with FFmpeg; if not, write to the Free Software
00020  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
00021  */
00022 
00028 /*
00029                         C       MMX     MMX2    3DNow   AltiVec
00030 isVertDC                Ec      Ec                      Ec
00031 isVertMinMaxOk          Ec      Ec                      Ec
00032 doVertLowPass           E               e       e       Ec
00033 doVertDefFilter         Ec      Ec      e       e       Ec
00034 isHorizDC               Ec      Ec                      Ec
00035 isHorizMinMaxOk         a       E                       Ec
00036 doHorizLowPass          E               e       e       Ec
00037 doHorizDefFilter        Ec      Ec      e       e       Ec
00038 do_a_deblock            Ec      E       Ec      E
00039 deRing                  E               e       e*      Ecp
00040 Vertical RKAlgo1        E               a       a
00041 Horizontal RKAlgo1                      a       a
00042 Vertical X1#            a               E       E
00043 Horizontal X1#          a               E       E
00044 LinIpolDeinterlace      e               E       E*
00045 CubicIpolDeinterlace    a               e       e*
00046 LinBlendDeinterlace     e               E       E*
00047 MedianDeinterlace#      E       Ec      Ec
00048 TempDeNoiser#           E               e       e       Ec
00049 
00050 * I do not have a 3DNow! CPU -> it is untested, but no one said it does not work so it seems to work
00051 # more or less selfinvented filters so the exactness is not too meaningful
00052 E = Exact implementation
00053 e = almost exact implementation (slightly different rounding,...)
00054 a = alternative / approximate impl
00055 c = checked against the other implementations (-vo md5)
00056 p = partially optimized, still some work to do
00057 */
00058 
00059 /*
00060 TODO:
00061 reduce the time wasted on the mem transfer
00062 unroll stuff if instructions depend too much on the prior one
00063 move YScale thing to the end instead of fixing QP
00064 write a faster and higher quality deblocking filter :)
00065 make the mainloop more flexible (variable number of blocks at once
00066         (the if/else stuff per block is slowing things down)
00067 compare the quality & speed of all filters
00068 split this huge file
00069 optimize c versions
00070 try to unroll inner for(x=0 ... loop to avoid these damn if(x ... checks
00071 ...
00072 */
00073 
00074 //Changelog: use git log
00075 
00076 #include "config.h"
00077 #include "libavutil/avutil.h"
00078 #include "libavutil/avassert.h"
00079 #include <inttypes.h>
00080 #include <stdio.h>
00081 #include <stdlib.h>
00082 #include <string.h>
00083 //#undef HAVE_MMXEXT_INLINE
00084 //#define HAVE_AMD3DNOW_INLINE
00085 //#undef HAVE_MMX_INLINE
00086 //#undef ARCH_X86
00087 //#define DEBUG_BRIGHTNESS
00088 #include "postprocess.h"
00089 #include "postprocess_internal.h"
00090 #include "libavutil/avstring.h"
00091 
00092 unsigned postproc_version(void)
00093 {
00094     av_assert0(LIBPOSTPROC_VERSION_MICRO >= 100);
00095     return LIBPOSTPROC_VERSION_INT;
00096 }
00097 
00098 const char *postproc_configuration(void)
00099 {
00100     return FFMPEG_CONFIGURATION;
00101 }
00102 
00103 const char *postproc_license(void)
00104 {
00105 #define LICENSE_PREFIX "libpostproc license: "
00106     return LICENSE_PREFIX FFMPEG_LICENSE + sizeof(LICENSE_PREFIX) - 1;
00107 }
00108 
00109 #if HAVE_ALTIVEC_H
00110 #include <altivec.h>
00111 #endif
00112 
00113 #define GET_MODE_BUFFER_SIZE 500
00114 #define OPTIONS_ARRAY_SIZE 10
00115 #define BLOCK_SIZE 8
00116 #define TEMP_STRIDE 8
00117 //#define NUM_BLOCKS_AT_ONCE 16 //not used yet
00118 
00119 #if ARCH_X86 && HAVE_INLINE_ASM
00120 DECLARE_ASM_CONST(8, uint64_t, w05)= 0x0005000500050005LL;
00121 DECLARE_ASM_CONST(8, uint64_t, w04)= 0x0004000400040004LL;
00122 DECLARE_ASM_CONST(8, uint64_t, w20)= 0x0020002000200020LL;
00123 DECLARE_ASM_CONST(8, uint64_t, b00)= 0x0000000000000000LL;
00124 DECLARE_ASM_CONST(8, uint64_t, b01)= 0x0101010101010101LL;
00125 DECLARE_ASM_CONST(8, uint64_t, b02)= 0x0202020202020202LL;
00126 DECLARE_ASM_CONST(8, uint64_t, b08)= 0x0808080808080808LL;
00127 DECLARE_ASM_CONST(8, uint64_t, b80)= 0x8080808080808080LL;
00128 #endif
00129 
00130 DECLARE_ASM_CONST(8, int, deringThreshold)= 20;
00131 
00132 
00133 static struct PPFilter filters[]=
00134 {
00135     {"hb", "hdeblock",              1, 1, 3, H_DEBLOCK},
00136     {"vb", "vdeblock",              1, 2, 4, V_DEBLOCK},
00137 /*  {"hr", "rkhdeblock",            1, 1, 3, H_RK1_FILTER},
00138     {"vr", "rkvdeblock",            1, 2, 4, V_RK1_FILTER},*/
00139     {"h1", "x1hdeblock",            1, 1, 3, H_X1_FILTER},
00140     {"v1", "x1vdeblock",            1, 2, 4, V_X1_FILTER},
00141     {"ha", "ahdeblock",             1, 1, 3, H_A_DEBLOCK},
00142     {"va", "avdeblock",             1, 2, 4, V_A_DEBLOCK},
00143     {"dr", "dering",                1, 5, 6, DERING},
00144     {"al", "autolevels",            0, 1, 2, LEVEL_FIX},
00145     {"lb", "linblenddeint",         1, 1, 4, LINEAR_BLEND_DEINT_FILTER},
00146     {"li", "linipoldeint",          1, 1, 4, LINEAR_IPOL_DEINT_FILTER},
00147     {"ci", "cubicipoldeint",        1, 1, 4, CUBIC_IPOL_DEINT_FILTER},
00148     {"md", "mediandeint",           1, 1, 4, MEDIAN_DEINT_FILTER},
00149     {"fd", "ffmpegdeint",           1, 1, 4, FFMPEG_DEINT_FILTER},
00150     {"l5", "lowpass5",              1, 1, 4, LOWPASS5_DEINT_FILTER},
00151     {"tn", "tmpnoise",              1, 7, 8, TEMP_NOISE_FILTER},
00152     {"fq", "forcequant",            1, 0, 0, FORCE_QUANT},
00153     {"be", "bitexact",              1, 0, 0, BITEXACT},
00154     {NULL, NULL,0,0,0,0} //End Marker
00155 };
00156 
00157 static const char *replaceTable[]=
00158 {
00159     "default",      "hb:a,vb:a,dr:a",
00160     "de",           "hb:a,vb:a,dr:a",
00161     "fast",         "h1:a,v1:a,dr:a",
00162     "fa",           "h1:a,v1:a,dr:a",
00163     "ac",           "ha:a:128:7,va:a,dr:a",
00164     NULL //End Marker
00165 };
00166 
00167 
00168 #if ARCH_X86 && HAVE_INLINE_ASM
00169 static inline void prefetchnta(void *p)
00170 {
00171     __asm__ volatile(   "prefetchnta (%0)\n\t"
00172         : : "r" (p)
00173     );
00174 }
00175 
00176 static inline void prefetcht0(void *p)
00177 {
00178     __asm__ volatile(   "prefetcht0 (%0)\n\t"
00179         : : "r" (p)
00180     );
00181 }
00182 
00183 static inline void prefetcht1(void *p)
00184 {
00185     __asm__ volatile(   "prefetcht1 (%0)\n\t"
00186         : : "r" (p)
00187     );
00188 }
00189 
00190 static inline void prefetcht2(void *p)
00191 {
00192     __asm__ volatile(   "prefetcht2 (%0)\n\t"
00193         : : "r" (p)
00194     );
00195 }
00196 #endif
00197 
00198 /* The horizontal functions exist only in C because the MMX
00199  * code is faster with vertical filters and transposing. */
00200 
00204 static inline int isHorizDC_C(uint8_t src[], int stride, PPContext *c)
00205 {
00206     int numEq= 0;
00207     int y;
00208     const int dcOffset= ((c->nonBQP*c->ppMode.baseDcDiff)>>8) + 1;
00209     const int dcThreshold= dcOffset*2 + 1;
00210 
00211     for(y=0; y<BLOCK_SIZE; y++){
00212         if(((unsigned)(src[0] - src[1] + dcOffset)) < dcThreshold) numEq++;
00213         if(((unsigned)(src[1] - src[2] + dcOffset)) < dcThreshold) numEq++;
00214         if(((unsigned)(src[2] - src[3] + dcOffset)) < dcThreshold) numEq++;
00215         if(((unsigned)(src[3] - src[4] + dcOffset)) < dcThreshold) numEq++;
00216         if(((unsigned)(src[4] - src[5] + dcOffset)) < dcThreshold) numEq++;
00217         if(((unsigned)(src[5] - src[6] + dcOffset)) < dcThreshold) numEq++;
00218         if(((unsigned)(src[6] - src[7] + dcOffset)) < dcThreshold) numEq++;
00219         src+= stride;
00220     }
00221     return numEq > c->ppMode.flatnessThreshold;
00222 }
00223 
00227 static inline int isVertDC_C(uint8_t src[], int stride, PPContext *c)
00228 {
00229     int numEq= 0;
00230     int y;
00231     const int dcOffset= ((c->nonBQP*c->ppMode.baseDcDiff)>>8) + 1;
00232     const int dcThreshold= dcOffset*2 + 1;
00233 
00234     src+= stride*4; // src points to begin of the 8x8 Block
00235     for(y=0; y<BLOCK_SIZE-1; y++){
00236         if(((unsigned)(src[0] - src[0+stride] + dcOffset)) < dcThreshold) numEq++;
00237         if(((unsigned)(src[1] - src[1+stride] + dcOffset)) < dcThreshold) numEq++;
00238         if(((unsigned)(src[2] - src[2+stride] + dcOffset)) < dcThreshold) numEq++;
00239         if(((unsigned)(src[3] - src[3+stride] + dcOffset)) < dcThreshold) numEq++;
00240         if(((unsigned)(src[4] - src[4+stride] + dcOffset)) < dcThreshold) numEq++;
00241         if(((unsigned)(src[5] - src[5+stride] + dcOffset)) < dcThreshold) numEq++;
00242         if(((unsigned)(src[6] - src[6+stride] + dcOffset)) < dcThreshold) numEq++;
00243         if(((unsigned)(src[7] - src[7+stride] + dcOffset)) < dcThreshold) numEq++;
00244         src+= stride;
00245     }
00246     return numEq > c->ppMode.flatnessThreshold;
00247 }
00248 
00249 static inline int isHorizMinMaxOk_C(uint8_t src[], int stride, int QP)
00250 {
00251     int i;
00252     for(i=0; i<2; i++){
00253         if((unsigned)(src[0] - src[5] + 2*QP) > 4*QP) return 0;
00254         src += stride;
00255         if((unsigned)(src[2] - src[7] + 2*QP) > 4*QP) return 0;
00256         src += stride;
00257         if((unsigned)(src[4] - src[1] + 2*QP) > 4*QP) return 0;
00258         src += stride;
00259         if((unsigned)(src[6] - src[3] + 2*QP) > 4*QP) return 0;
00260         src += stride;
00261     }
00262     return 1;
00263 }
00264 
00265 static inline int isVertMinMaxOk_C(uint8_t src[], int stride, int QP)
00266 {
00267     int x;
00268     src+= stride*4;
00269     for(x=0; x<BLOCK_SIZE; x+=4){
00270         if((unsigned)(src[  x + 0*stride] - src[  x + 5*stride] + 2*QP) > 4*QP) return 0;
00271         if((unsigned)(src[1+x + 2*stride] - src[1+x + 7*stride] + 2*QP) > 4*QP) return 0;
00272         if((unsigned)(src[2+x + 4*stride] - src[2+x + 1*stride] + 2*QP) > 4*QP) return 0;
00273         if((unsigned)(src[3+x + 6*stride] - src[3+x + 3*stride] + 2*QP) > 4*QP) return 0;
00274     }
00275     return 1;
00276 }
00277 
00278 static inline int horizClassify_C(uint8_t src[], int stride, PPContext *c)
00279 {
00280     if( isHorizDC_C(src, stride, c) ){
00281         if( isHorizMinMaxOk_C(src, stride, c->QP) )
00282             return 1;
00283         else
00284             return 0;
00285     }else{
00286         return 2;
00287     }
00288 }
00289 
00290 static inline int vertClassify_C(uint8_t src[], int stride, PPContext *c)
00291 {
00292     if( isVertDC_C(src, stride, c) ){
00293         if( isVertMinMaxOk_C(src, stride, c->QP) )
00294             return 1;
00295         else
00296             return 0;
00297     }else{
00298         return 2;
00299     }
00300 }
00301 
00302 static inline void doHorizDefFilter_C(uint8_t dst[], int stride, PPContext *c)
00303 {
00304     int y;
00305     for(y=0; y<BLOCK_SIZE; y++){
00306         const int middleEnergy= 5*(dst[4] - dst[3]) + 2*(dst[2] - dst[5]);
00307 
00308         if(FFABS(middleEnergy) < 8*c->QP){
00309             const int q=(dst[3] - dst[4])/2;
00310             const int leftEnergy=  5*(dst[2] - dst[1]) + 2*(dst[0] - dst[3]);
00311             const int rightEnergy= 5*(dst[6] - dst[5]) + 2*(dst[4] - dst[7]);
00312 
00313             int d= FFABS(middleEnergy) - FFMIN( FFABS(leftEnergy), FFABS(rightEnergy) );
00314             d= FFMAX(d, 0);
00315 
00316             d= (5*d + 32) >> 6;
00317             d*= FFSIGN(-middleEnergy);
00318 
00319             if(q>0)
00320             {
00321                 d= d<0 ? 0 : d;
00322                 d= d>q ? q : d;
00323             }
00324             else
00325             {
00326                 d= d>0 ? 0 : d;
00327                 d= d<q ? q : d;
00328             }
00329 
00330             dst[3]-= d;
00331             dst[4]+= d;
00332         }
00333         dst+= stride;
00334     }
00335 }
00336 
00341 static inline void doHorizLowPass_C(uint8_t dst[], int stride, PPContext *c)
00342 {
00343     int y;
00344     for(y=0; y<BLOCK_SIZE; y++){
00345         const int first= FFABS(dst[-1] - dst[0]) < c->QP ? dst[-1] : dst[0];
00346         const int last= FFABS(dst[8] - dst[7]) < c->QP ? dst[8] : dst[7];
00347 
00348         int sums[10];
00349         sums[0] = 4*first + dst[0] + dst[1] + dst[2] + 4;
00350         sums[1] = sums[0] - first  + dst[3];
00351         sums[2] = sums[1] - first  + dst[4];
00352         sums[3] = sums[2] - first  + dst[5];
00353         sums[4] = sums[3] - first  + dst[6];
00354         sums[5] = sums[4] - dst[0] + dst[7];
00355         sums[6] = sums[5] - dst[1] + last;
00356         sums[7] = sums[6] - dst[2] + last;
00357         sums[8] = sums[7] - dst[3] + last;
00358         sums[9] = sums[8] - dst[4] + last;
00359 
00360         dst[0]= (sums[0] + sums[2] + 2*dst[0])>>4;
00361         dst[1]= (sums[1] + sums[3] + 2*dst[1])>>4;
00362         dst[2]= (sums[2] + sums[4] + 2*dst[2])>>4;
00363         dst[3]= (sums[3] + sums[5] + 2*dst[3])>>4;
00364         dst[4]= (sums[4] + sums[6] + 2*dst[4])>>4;
00365         dst[5]= (sums[5] + sums[7] + 2*dst[5])>>4;
00366         dst[6]= (sums[6] + sums[8] + 2*dst[6])>>4;
00367         dst[7]= (sums[7] + sums[9] + 2*dst[7])>>4;
00368 
00369         dst+= stride;
00370     }
00371 }
00372 
00381 static inline void horizX1Filter(uint8_t *src, int stride, int QP)
00382 {
00383     int y;
00384     static uint64_t *lut= NULL;
00385     if(lut==NULL)
00386     {
00387         int i;
00388         lut = av_malloc(256*8);
00389         for(i=0; i<256; i++)
00390         {
00391             int v= i < 128 ? 2*i : 2*(i-256);
00392 /*
00393 //Simulate 112242211 9-Tap filter
00394             uint64_t a= (v/16)  & 0xFF;
00395             uint64_t b= (v/8)   & 0xFF;
00396             uint64_t c= (v/4)   & 0xFF;
00397             uint64_t d= (3*v/8) & 0xFF;
00398 */
00399 //Simulate piecewise linear interpolation
00400             uint64_t a= (v/16)   & 0xFF;
00401             uint64_t b= (v*3/16) & 0xFF;
00402             uint64_t c= (v*5/16) & 0xFF;
00403             uint64_t d= (7*v/16) & 0xFF;
00404             uint64_t A= (0x100 - a)&0xFF;
00405             uint64_t B= (0x100 - b)&0xFF;
00406             uint64_t C= (0x100 - c)&0xFF;
00407             uint64_t D= (0x100 - c)&0xFF;
00408 
00409             lut[i]   = (a<<56) | (b<<48) | (c<<40) | (d<<32) |
00410                        (D<<24) | (C<<16) | (B<<8)  | (A);
00411             //lut[i] = (v<<32) | (v<<24);
00412         }
00413     }
00414 
00415     for(y=0; y<BLOCK_SIZE; y++){
00416         int a= src[1] - src[2];
00417         int b= src[3] - src[4];
00418         int c= src[5] - src[6];
00419 
00420         int d= FFMAX(FFABS(b) - (FFABS(a) + FFABS(c))/2, 0);
00421 
00422         if(d < QP){
00423             int v = d * FFSIGN(-b);
00424 
00425             src[1] +=v/8;
00426             src[2] +=v/4;
00427             src[3] +=3*v/8;
00428             src[4] -=3*v/8;
00429             src[5] -=v/4;
00430             src[6] -=v/8;
00431         }
00432         src+=stride;
00433     }
00434 }
00435 
00439 static av_always_inline void do_a_deblock_C(uint8_t *src, int step, int stride, PPContext *c){
00440     int y;
00441     const int QP= c->QP;
00442     const int dcOffset= ((c->nonBQP*c->ppMode.baseDcDiff)>>8) + 1;
00443     const int dcThreshold= dcOffset*2 + 1;
00444 //START_TIMER
00445     src+= step*4; // src points to begin of the 8x8 Block
00446     for(y=0; y<8; y++){
00447         int numEq= 0;
00448 
00449         if(((unsigned)(src[-1*step] - src[0*step] + dcOffset)) < dcThreshold) numEq++;
00450         if(((unsigned)(src[ 0*step] - src[1*step] + dcOffset)) < dcThreshold) numEq++;
00451         if(((unsigned)(src[ 1*step] - src[2*step] + dcOffset)) < dcThreshold) numEq++;
00452         if(((unsigned)(src[ 2*step] - src[3*step] + dcOffset)) < dcThreshold) numEq++;
00453         if(((unsigned)(src[ 3*step] - src[4*step] + dcOffset)) < dcThreshold) numEq++;
00454         if(((unsigned)(src[ 4*step] - src[5*step] + dcOffset)) < dcThreshold) numEq++;
00455         if(((unsigned)(src[ 5*step] - src[6*step] + dcOffset)) < dcThreshold) numEq++;
00456         if(((unsigned)(src[ 6*step] - src[7*step] + dcOffset)) < dcThreshold) numEq++;
00457         if(((unsigned)(src[ 7*step] - src[8*step] + dcOffset)) < dcThreshold) numEq++;
00458         if(numEq > c->ppMode.flatnessThreshold){
00459             int min, max, x;
00460 
00461             if(src[0] > src[step]){
00462                 max= src[0];
00463                 min= src[step];
00464             }else{
00465                 max= src[step];
00466                 min= src[0];
00467             }
00468             for(x=2; x<8; x+=2){
00469                 if(src[x*step] > src[(x+1)*step]){
00470                         if(src[x    *step] > max) max= src[ x   *step];
00471                         if(src[(x+1)*step] < min) min= src[(x+1)*step];
00472                 }else{
00473                         if(src[(x+1)*step] > max) max= src[(x+1)*step];
00474                         if(src[ x   *step] < min) min= src[ x   *step];
00475                 }
00476             }
00477             if(max-min < 2*QP){
00478                 const int first= FFABS(src[-1*step] - src[0]) < QP ? src[-1*step] : src[0];
00479                 const int last= FFABS(src[8*step] - src[7*step]) < QP ? src[8*step] : src[7*step];
00480 
00481                 int sums[10];
00482                 sums[0] = 4*first + src[0*step] + src[1*step] + src[2*step] + 4;
00483                 sums[1] = sums[0] - first       + src[3*step];
00484                 sums[2] = sums[1] - first       + src[4*step];
00485                 sums[3] = sums[2] - first       + src[5*step];
00486                 sums[4] = sums[3] - first       + src[6*step];
00487                 sums[5] = sums[4] - src[0*step] + src[7*step];
00488                 sums[6] = sums[5] - src[1*step] + last;
00489                 sums[7] = sums[6] - src[2*step] + last;
00490                 sums[8] = sums[7] - src[3*step] + last;
00491                 sums[9] = sums[8] - src[4*step] + last;
00492 
00493                 src[0*step]= (sums[0] + sums[2] + 2*src[0*step])>>4;
00494                 src[1*step]= (sums[1] + sums[3] + 2*src[1*step])>>4;
00495                 src[2*step]= (sums[2] + sums[4] + 2*src[2*step])>>4;
00496                 src[3*step]= (sums[3] + sums[5] + 2*src[3*step])>>4;
00497                 src[4*step]= (sums[4] + sums[6] + 2*src[4*step])>>4;
00498                 src[5*step]= (sums[5] + sums[7] + 2*src[5*step])>>4;
00499                 src[6*step]= (sums[6] + sums[8] + 2*src[6*step])>>4;
00500                 src[7*step]= (sums[7] + sums[9] + 2*src[7*step])>>4;
00501             }
00502         }else{
00503             const int middleEnergy= 5*(src[4*step] - src[3*step]) + 2*(src[2*step] - src[5*step]);
00504 
00505             if(FFABS(middleEnergy) < 8*QP){
00506                 const int q=(src[3*step] - src[4*step])/2;
00507                 const int leftEnergy=  5*(src[2*step] - src[1*step]) + 2*(src[0*step] - src[3*step]);
00508                 const int rightEnergy= 5*(src[6*step] - src[5*step]) + 2*(src[4*step] - src[7*step]);
00509 
00510                 int d= FFABS(middleEnergy) - FFMIN( FFABS(leftEnergy), FFABS(rightEnergy) );
00511                 d= FFMAX(d, 0);
00512 
00513                 d= (5*d + 32) >> 6;
00514                 d*= FFSIGN(-middleEnergy);
00515 
00516                 if(q>0){
00517                     d= d<0 ? 0 : d;
00518                     d= d>q ? q : d;
00519                 }else{
00520                     d= d>0 ? 0 : d;
00521                     d= d<q ? q : d;
00522                 }
00523 
00524                 src[3*step]-= d;
00525                 src[4*step]+= d;
00526             }
00527         }
00528 
00529         src += stride;
00530     }
00531 /*if(step==16){
00532     STOP_TIMER("step16")
00533 }else{
00534     STOP_TIMER("stepX")
00535 }*/
00536 }
00537 
00538 //Note: we have C, MMX, MMX2, 3DNOW version there is no 3DNOW+MMX2 one
00539 //Plain C versions
00540 //we always compile C for testing which needs bitexactness
00541 #define COMPILE_C
00542 
00543 #if HAVE_ALTIVEC
00544 #define COMPILE_ALTIVEC
00545 #endif //HAVE_ALTIVEC
00546 
00547 #if ARCH_X86 && HAVE_INLINE_ASM
00548 
00549 #if (HAVE_MMX_INLINE && !HAVE_AMD3DNOW_INLINE && !HAVE_MMXEXT_INLINE) || CONFIG_RUNTIME_CPUDETECT
00550 #define COMPILE_MMX
00551 #endif
00552 
00553 #if HAVE_MMXEXT_INLINE || CONFIG_RUNTIME_CPUDETECT
00554 #define COMPILE_MMX2
00555 #endif
00556 
00557 #if (HAVE_AMD3DNOW_INLINE && !HAVE_MMXEXT_INLINE) || CONFIG_RUNTIME_CPUDETECT
00558 #define COMPILE_3DNOW
00559 #endif
00560 #endif /* ARCH_X86 */
00561 
00562 #undef HAVE_MMX_INLINE
00563 #define HAVE_MMX_INLINE 0
00564 #undef HAVE_MMXEXT_INLINE
00565 #define HAVE_MMXEXT_INLINE 0
00566 #undef HAVE_AMD3DNOW_INLINE
00567 #define HAVE_AMD3DNOW_INLINE 0
00568 #undef HAVE_ALTIVEC
00569 #define HAVE_ALTIVEC 0
00570 
00571 #ifdef COMPILE_C
00572 #define RENAME(a) a ## _C
00573 #include "postprocess_template.c"
00574 #endif
00575 
00576 #ifdef COMPILE_ALTIVEC
00577 #undef RENAME
00578 #undef HAVE_ALTIVEC
00579 #define HAVE_ALTIVEC 1
00580 #define RENAME(a) a ## _altivec
00581 #include "postprocess_altivec_template.c"
00582 #include "postprocess_template.c"
00583 #endif
00584 
00585 //MMX versions
00586 #ifdef COMPILE_MMX
00587 #undef RENAME
00588 #undef HAVE_MMX_INLINE
00589 #define HAVE_MMX_INLINE 1
00590 #define RENAME(a) a ## _MMX
00591 #include "postprocess_template.c"
00592 #endif
00593 
00594 //MMX2 versions
00595 #ifdef COMPILE_MMX2
00596 #undef RENAME
00597 #undef HAVE_MMX_INLINE
00598 #undef HAVE_MMXEXT_INLINE
00599 #define HAVE_MMX_INLINE 1
00600 #define HAVE_MMXEXT_INLINE 1
00601 #define RENAME(a) a ## _MMX2
00602 #include "postprocess_template.c"
00603 #endif
00604 
00605 //3DNOW versions
00606 #ifdef COMPILE_3DNOW
00607 #undef RENAME
00608 #undef HAVE_MMX_INLINE
00609 #undef HAVE_MMXEXT_INLINE
00610 #undef HAVE_AMD3DNOW_INLINE
00611 #define HAVE_MMX_INLINE 1
00612 #define HAVE_MMXEXT_INLINE 0
00613 #define HAVE_AMD3DNOW_INLINE 1
00614 #define RENAME(a) a ## _3DNow
00615 #include "postprocess_template.c"
00616 #endif
00617 
00618 // minor note: the HAVE_xyz is messed up after that line so do not use it.
00619 
00620 static inline void postProcess(const uint8_t src[], int srcStride, uint8_t dst[], int dstStride, int width, int height,
00621         const QP_STORE_T QPs[], int QPStride, int isColor, pp_mode *vm, pp_context *vc)
00622 {
00623     PPContext *c= (PPContext *)vc;
00624     PPMode *ppMode= (PPMode *)vm;
00625     c->ppMode= *ppMode; //FIXME
00626 
00627     if(ppMode->lumMode & BITEXACT) {
00628         postProcess_C(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
00629         return;
00630     }
00631 
00632     // Using ifs here as they are faster than function pointers although the
00633     // difference would not be measurable here but it is much better because
00634     // someone might exchange the CPU whithout restarting MPlayer ;)
00635 #if CONFIG_RUNTIME_CPUDETECT
00636 #if ARCH_X86 && HAVE_INLINE_ASM
00637     // ordered per speed fastest first
00638     if(c->cpuCaps & PP_CPU_CAPS_MMX2)
00639         postProcess_MMX2(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
00640     else if(c->cpuCaps & PP_CPU_CAPS_3DNOW)
00641         postProcess_3DNow(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
00642     else if(c->cpuCaps & PP_CPU_CAPS_MMX)
00643         postProcess_MMX(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
00644     else
00645         postProcess_C(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
00646 #else
00647 #if HAVE_ALTIVEC
00648     if(c->cpuCaps & PP_CPU_CAPS_ALTIVEC)
00649             postProcess_altivec(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
00650     else
00651 #endif
00652             postProcess_C(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
00653 #endif
00654 #else /* CONFIG_RUNTIME_CPUDETECT */
00655 #if   HAVE_MMXEXT_INLINE
00656             postProcess_MMX2(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
00657 #elif HAVE_AMD3DNOW_INLINE
00658             postProcess_3DNow(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
00659 #elif HAVE_MMX_INLINE
00660             postProcess_MMX(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
00661 #elif HAVE_ALTIVEC
00662             postProcess_altivec(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
00663 #else
00664             postProcess_C(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
00665 #endif
00666 #endif /* !CONFIG_RUNTIME_CPUDETECT */
00667 }
00668 
00669 //static void postProcess(uint8_t src[], int srcStride, uint8_t dst[], int dstStride, int width, int height,
00670 //        QP_STORE_T QPs[], int QPStride, int isColor, struct PPMode *ppMode);
00671 
00672 /* -pp Command line Help
00673 */
00674 #if LIBPOSTPROC_VERSION_INT < (52<<16)
00675 const char *const pp_help=
00676 #else
00677 const char pp_help[] =
00678 #endif
00679 "Available postprocessing filters:\n"
00680 "Filters                        Options\n"
00681 "short  long name       short   long option     Description\n"
00682 "*      *               a       autoq           CPU power dependent enabler\n"
00683 "                       c       chrom           chrominance filtering enabled\n"
00684 "                       y       nochrom         chrominance filtering disabled\n"
00685 "                       n       noluma          luma filtering disabled\n"
00686 "hb     hdeblock        (2 threshold)           horizontal deblocking filter\n"
00687 "       1. difference factor: default=32, higher -> more deblocking\n"
00688 "       2. flatness threshold: default=39, lower -> more deblocking\n"
00689 "                       the h & v deblocking filters share these\n"
00690 "                       so you can't set different thresholds for h / v\n"
00691 "vb     vdeblock        (2 threshold)           vertical deblocking filter\n"
00692 "ha     hadeblock       (2 threshold)           horizontal deblocking filter\n"
00693 "va     vadeblock       (2 threshold)           vertical deblocking filter\n"
00694 "h1     x1hdeblock                              experimental h deblock filter 1\n"
00695 "v1     x1vdeblock                              experimental v deblock filter 1\n"
00696 "dr     dering                                  deringing filter\n"
00697 "al     autolevels                              automatic brightness / contrast\n"
00698 "                       f        fullyrange     stretch luminance to (0..255)\n"
00699 "lb     linblenddeint                           linear blend deinterlacer\n"
00700 "li     linipoldeint                            linear interpolating deinterlace\n"
00701 "ci     cubicipoldeint                          cubic interpolating deinterlacer\n"
00702 "md     mediandeint                             median deinterlacer\n"
00703 "fd     ffmpegdeint                             ffmpeg deinterlacer\n"
00704 "l5     lowpass5                                FIR lowpass deinterlacer\n"
00705 "de     default                                 hb:a,vb:a,dr:a\n"
00706 "fa     fast                                    h1:a,v1:a,dr:a\n"
00707 "ac                                             ha:a:128:7,va:a,dr:a\n"
00708 "tn     tmpnoise        (3 threshold)           temporal noise reducer\n"
00709 "                     1. <= 2. <= 3.            larger -> stronger filtering\n"
00710 "fq     forceQuant      <quantizer>             force quantizer\n"
00711 "Usage:\n"
00712 "<filterName>[:<option>[:<option>...]][[,|/][-]<filterName>[:<option>...]]...\n"
00713 "long form example:\n"
00714 "vdeblock:autoq/hdeblock:autoq/linblenddeint    default,-vdeblock\n"
00715 "short form example:\n"
00716 "vb:a/hb:a/lb                                   de,-vb\n"
00717 "more examples:\n"
00718 "tn:64:128:256\n"
00719 "\n"
00720 ;
00721 
00722 pp_mode *pp_get_mode_by_name_and_quality(const char *name, int quality)
00723 {
00724     char temp[GET_MODE_BUFFER_SIZE];
00725     char *p= temp;
00726     static const char filterDelimiters[] = ",/";
00727     static const char optionDelimiters[] = ":";
00728     struct PPMode *ppMode;
00729     char *filterToken;
00730 
00731     if (!name)  {
00732         av_log(NULL, AV_LOG_ERROR, "pp: Missing argument\n");
00733         return NULL;
00734     }
00735 
00736     if (!strcmp(name, "help")) {
00737         const char *p;
00738         for (p = pp_help; strchr(p, '\n'); p = strchr(p, '\n') + 1) {
00739             av_strlcpy(temp, p, FFMIN(sizeof(temp), strchr(p, '\n') - p + 2));
00740             av_log(NULL, AV_LOG_INFO, "%s", temp);
00741         }
00742         return NULL;
00743     }
00744 
00745     ppMode= av_malloc(sizeof(PPMode));
00746 
00747     ppMode->lumMode= 0;
00748     ppMode->chromMode= 0;
00749     ppMode->maxTmpNoise[0]= 700;
00750     ppMode->maxTmpNoise[1]= 1500;
00751     ppMode->maxTmpNoise[2]= 3000;
00752     ppMode->maxAllowedY= 234;
00753     ppMode->minAllowedY= 16;
00754     ppMode->baseDcDiff= 256/8;
00755     ppMode->flatnessThreshold= 56-16-1;
00756     ppMode->maxClippedThreshold= 0.01;
00757     ppMode->error=0;
00758 
00759     memset(temp, 0, GET_MODE_BUFFER_SIZE);
00760     av_strlcpy(temp, name, GET_MODE_BUFFER_SIZE - 1);
00761 
00762     av_log(NULL, AV_LOG_DEBUG, "pp: %s\n", name);
00763 
00764     for(;;){
00765         char *filterName;
00766         int q= 1000000; //PP_QUALITY_MAX;
00767         int chrom=-1;
00768         int luma=-1;
00769         char *option;
00770         char *options[OPTIONS_ARRAY_SIZE];
00771         int i;
00772         int filterNameOk=0;
00773         int numOfUnknownOptions=0;
00774         int enable=1; //does the user want us to enabled or disabled the filter
00775 
00776         filterToken= strtok(p, filterDelimiters);
00777         if(filterToken == NULL) break;
00778         p+= strlen(filterToken) + 1; // p points to next filterToken
00779         filterName= strtok(filterToken, optionDelimiters);
00780         av_log(NULL, AV_LOG_DEBUG, "pp: %s::%s\n", filterToken, filterName);
00781 
00782         if(*filterName == '-'){
00783             enable=0;
00784             filterName++;
00785         }
00786 
00787         for(;;){ //for all options
00788             option= strtok(NULL, optionDelimiters);
00789             if(option == NULL) break;
00790 
00791             av_log(NULL, AV_LOG_DEBUG, "pp: option: %s\n", option);
00792             if(!strcmp("autoq", option) || !strcmp("a", option)) q= quality;
00793             else if(!strcmp("nochrom", option) || !strcmp("y", option)) chrom=0;
00794             else if(!strcmp("chrom", option) || !strcmp("c", option)) chrom=1;
00795             else if(!strcmp("noluma", option) || !strcmp("n", option)) luma=0;
00796             else{
00797                 options[numOfUnknownOptions] = option;
00798                 numOfUnknownOptions++;
00799             }
00800             if(numOfUnknownOptions >= OPTIONS_ARRAY_SIZE-1) break;
00801         }
00802         options[numOfUnknownOptions] = NULL;
00803 
00804         /* replace stuff from the replace Table */
00805         for(i=0; replaceTable[2*i]!=NULL; i++){
00806             if(!strcmp(replaceTable[2*i], filterName)){
00807                 int newlen= strlen(replaceTable[2*i + 1]);
00808                 int plen;
00809                 int spaceLeft;
00810 
00811                 p--, *p=',';
00812 
00813                 plen= strlen(p);
00814                 spaceLeft= p - temp + plen;
00815                 if(spaceLeft + newlen  >= GET_MODE_BUFFER_SIZE - 1){
00816                     ppMode->error++;
00817                     break;
00818                 }
00819                 memmove(p + newlen, p, plen+1);
00820                 memcpy(p, replaceTable[2*i + 1], newlen);
00821                 filterNameOk=1;
00822             }
00823         }
00824 
00825         for(i=0; filters[i].shortName!=NULL; i++){
00826             if(   !strcmp(filters[i].longName, filterName)
00827                || !strcmp(filters[i].shortName, filterName)){
00828                 ppMode->lumMode &= ~filters[i].mask;
00829                 ppMode->chromMode &= ~filters[i].mask;
00830 
00831                 filterNameOk=1;
00832                 if(!enable) break; // user wants to disable it
00833 
00834                 if(q >= filters[i].minLumQuality && luma)
00835                     ppMode->lumMode|= filters[i].mask;
00836                 if(chrom==1 || (chrom==-1 && filters[i].chromDefault))
00837                     if(q >= filters[i].minChromQuality)
00838                             ppMode->chromMode|= filters[i].mask;
00839 
00840                 if(filters[i].mask == LEVEL_FIX){
00841                     int o;
00842                     ppMode->minAllowedY= 16;
00843                     ppMode->maxAllowedY= 234;
00844                     for(o=0; options[o]!=NULL; o++){
00845                         if(  !strcmp(options[o],"fullyrange")
00846                            ||!strcmp(options[o],"f")){
00847                             ppMode->minAllowedY= 0;
00848                             ppMode->maxAllowedY= 255;
00849                             numOfUnknownOptions--;
00850                         }
00851                     }
00852                 }
00853                 else if(filters[i].mask == TEMP_NOISE_FILTER)
00854                 {
00855                     int o;
00856                     int numOfNoises=0;
00857 
00858                     for(o=0; options[o]!=NULL; o++){
00859                         char *tail;
00860                         ppMode->maxTmpNoise[numOfNoises]=
00861                             strtol(options[o], &tail, 0);
00862                         if(tail!=options[o]){
00863                             numOfNoises++;
00864                             numOfUnknownOptions--;
00865                             if(numOfNoises >= 3) break;
00866                         }
00867                     }
00868                 }
00869                 else if(filters[i].mask == V_DEBLOCK   || filters[i].mask == H_DEBLOCK
00870                      || filters[i].mask == V_A_DEBLOCK || filters[i].mask == H_A_DEBLOCK){
00871                     int o;
00872 
00873                     for(o=0; options[o]!=NULL && o<2; o++){
00874                         char *tail;
00875                         int val= strtol(options[o], &tail, 0);
00876                         if(tail==options[o]) break;
00877 
00878                         numOfUnknownOptions--;
00879                         if(o==0) ppMode->baseDcDiff= val;
00880                         else ppMode->flatnessThreshold= val;
00881                     }
00882                 }
00883                 else if(filters[i].mask == FORCE_QUANT){
00884                     int o;
00885                     ppMode->forcedQuant= 15;
00886 
00887                     for(o=0; options[o]!=NULL && o<1; o++){
00888                         char *tail;
00889                         int val= strtol(options[o], &tail, 0);
00890                         if(tail==options[o]) break;
00891 
00892                         numOfUnknownOptions--;
00893                         ppMode->forcedQuant= val;
00894                     }
00895                 }
00896             }
00897         }
00898         if(!filterNameOk) ppMode->error++;
00899         ppMode->error += numOfUnknownOptions;
00900     }
00901 
00902     av_log(NULL, AV_LOG_DEBUG, "pp: lumMode=%X, chromMode=%X\n", ppMode->lumMode, ppMode->chromMode);
00903     if(ppMode->error){
00904         av_log(NULL, AV_LOG_ERROR, "%d errors in postprocess string \"%s\"\n", ppMode->error, name);
00905         av_free(ppMode);
00906         return NULL;
00907     }
00908     return ppMode;
00909 }
00910 
00911 void pp_free_mode(pp_mode *mode){
00912     av_free(mode);
00913 }
00914 
00915 static void reallocAlign(void **p, int alignment, int size){
00916     av_free(*p);
00917     *p= av_mallocz(size);
00918 }
00919 
00920 static void reallocBuffers(PPContext *c, int width, int height, int stride, int qpStride){
00921     int mbWidth = (width+15)>>4;
00922     int mbHeight= (height+15)>>4;
00923     int i;
00924 
00925     c->stride= stride;
00926     c->qpStride= qpStride;
00927 
00928     reallocAlign((void **)&c->tempDst, 8, stride*24);
00929     reallocAlign((void **)&c->tempSrc, 8, stride*24);
00930     reallocAlign((void **)&c->tempBlocks, 8, 2*16*8);
00931     reallocAlign((void **)&c->yHistogram, 8, 256*sizeof(uint64_t));
00932     for(i=0; i<256; i++)
00933             c->yHistogram[i]= width*height/64*15/256;
00934 
00935     for(i=0; i<3; i++){
00936         //Note: The +17*1024 is just there so I do not have to worry about r/w over the end.
00937         reallocAlign((void **)&c->tempBlurred[i], 8, stride*mbHeight*16 + 17*1024);
00938         reallocAlign((void **)&c->tempBlurredPast[i], 8, 256*((height+7)&(~7))/2 + 17*1024);//FIXME size
00939     }
00940 
00941     reallocAlign((void **)&c->deintTemp, 8, 2*width+32);
00942     reallocAlign((void **)&c->nonBQPTable, 8, qpStride*mbHeight*sizeof(QP_STORE_T));
00943     reallocAlign((void **)&c->stdQPTable, 8, qpStride*mbHeight*sizeof(QP_STORE_T));
00944     reallocAlign((void **)&c->forcedQPTable, 8, mbWidth*sizeof(QP_STORE_T));
00945 }
00946 
00947 static const char * context_to_name(void * ptr) {
00948     return "postproc";
00949 }
00950 
00951 static const AVClass av_codec_context_class = { "Postproc", context_to_name, NULL };
00952 
00953 pp_context *pp_get_context(int width, int height, int cpuCaps){
00954     PPContext *c= av_malloc(sizeof(PPContext));
00955     int stride= FFALIGN(width, 16);  //assumed / will realloc if needed
00956     int qpStride= (width+15)/16 + 2; //assumed / will realloc if needed
00957 
00958     memset(c, 0, sizeof(PPContext));
00959     c->av_class = &av_codec_context_class;
00960     c->cpuCaps= cpuCaps;
00961     if(cpuCaps&PP_FORMAT){
00962         c->hChromaSubSample= cpuCaps&0x3;
00963         c->vChromaSubSample= (cpuCaps>>4)&0x3;
00964     }else{
00965         c->hChromaSubSample= 1;
00966         c->vChromaSubSample= 1;
00967     }
00968 
00969     reallocBuffers(c, width, height, stride, qpStride);
00970 
00971     c->frameNum=-1;
00972 
00973     return c;
00974 }
00975 
00976 void pp_free_context(void *vc){
00977     PPContext *c = (PPContext*)vc;
00978     int i;
00979 
00980     for(i=0; i<3; i++) av_free(c->tempBlurred[i]);
00981     for(i=0; i<3; i++) av_free(c->tempBlurredPast[i]);
00982 
00983     av_free(c->tempBlocks);
00984     av_free(c->yHistogram);
00985     av_free(c->tempDst);
00986     av_free(c->tempSrc);
00987     av_free(c->deintTemp);
00988     av_free(c->stdQPTable);
00989     av_free(c->nonBQPTable);
00990     av_free(c->forcedQPTable);
00991 
00992     memset(c, 0, sizeof(PPContext));
00993 
00994     av_free(c);
00995 }
00996 
00997 void  pp_postprocess(const uint8_t * src[3], const int srcStride[3],
00998                      uint8_t * dst[3], const int dstStride[3],
00999                      int width, int height,
01000                      const QP_STORE_T *QP_store,  int QPStride,
01001                      pp_mode *vm,  void *vc, int pict_type)
01002 {
01003     int mbWidth = (width+15)>>4;
01004     int mbHeight= (height+15)>>4;
01005     PPMode *mode = (PPMode*)vm;
01006     PPContext *c = (PPContext*)vc;
01007     int minStride= FFMAX(FFABS(srcStride[0]), FFABS(dstStride[0]));
01008     int absQPStride = FFABS(QPStride);
01009 
01010     // c->stride and c->QPStride are always positive
01011     if(c->stride < minStride || c->qpStride < absQPStride)
01012         reallocBuffers(c, width, height,
01013                        FFMAX(minStride, c->stride),
01014                        FFMAX(c->qpStride, absQPStride));
01015 
01016     if(QP_store==NULL || (mode->lumMode & FORCE_QUANT)){
01017         int i;
01018         QP_store= c->forcedQPTable;
01019         absQPStride = QPStride = 0;
01020         if(mode->lumMode & FORCE_QUANT)
01021             for(i=0; i<mbWidth; i++) c->forcedQPTable[i]= mode->forcedQuant;
01022         else
01023             for(i=0; i<mbWidth; i++) c->forcedQPTable[i]= 1;
01024     }
01025 
01026     if(pict_type & PP_PICT_TYPE_QP2){
01027         int i;
01028         const int count= mbHeight * absQPStride;
01029         for(i=0; i<(count>>2); i++){
01030             ((uint32_t*)c->stdQPTable)[i] = (((const uint32_t*)QP_store)[i]>>1) & 0x7F7F7F7F;
01031         }
01032         for(i<<=2; i<count; i++){
01033             c->stdQPTable[i] = QP_store[i]>>1;
01034         }
01035         QP_store= c->stdQPTable;
01036         QPStride= absQPStride;
01037     }
01038 
01039     if(0){
01040         int x,y;
01041         for(y=0; y<mbHeight; y++){
01042             for(x=0; x<mbWidth; x++){
01043                 av_log(c, AV_LOG_INFO, "%2d ", QP_store[x + y*QPStride]);
01044             }
01045             av_log(c, AV_LOG_INFO, "\n");
01046         }
01047         av_log(c, AV_LOG_INFO, "\n");
01048     }
01049 
01050     if((pict_type&7)!=3){
01051         if (QPStride >= 0){
01052             int i;
01053             const int count= mbHeight * QPStride;
01054             for(i=0; i<(count>>2); i++){
01055                 ((uint32_t*)c->nonBQPTable)[i] = ((const uint32_t*)QP_store)[i] & 0x3F3F3F3F;
01056             }
01057             for(i<<=2; i<count; i++){
01058                 c->nonBQPTable[i] = QP_store[i] & 0x3F;
01059             }
01060         } else {
01061             int i,j;
01062             for(i=0; i<mbHeight; i++) {
01063                 for(j=0; j<absQPStride; j++) {
01064                     c->nonBQPTable[i*absQPStride+j] = QP_store[i*QPStride+j] & 0x3F;
01065                 }
01066             }
01067         }
01068     }
01069 
01070     av_log(c, AV_LOG_DEBUG, "using npp filters 0x%X/0x%X\n",
01071            mode->lumMode, mode->chromMode);
01072 
01073     postProcess(src[0], srcStride[0], dst[0], dstStride[0],
01074                 width, height, QP_store, QPStride, 0, mode, c);
01075 
01076     width  = (width )>>c->hChromaSubSample;
01077     height = (height)>>c->vChromaSubSample;
01078 
01079     if(mode->chromMode){
01080         postProcess(src[1], srcStride[1], dst[1], dstStride[1],
01081                     width, height, QP_store, QPStride, 1, mode, c);
01082         postProcess(src[2], srcStride[2], dst[2], dstStride[2],
01083                     width, height, QP_store, QPStride, 2, mode, c);
01084     }
01085     else if(srcStride[1] == dstStride[1] && srcStride[2] == dstStride[2]){
01086         linecpy(dst[1], src[1], height, srcStride[1]);
01087         linecpy(dst[2], src[2], height, srcStride[2]);
01088     }else{
01089         int y;
01090         for(y=0; y<height; y++){
01091             memcpy(&(dst[1][y*dstStride[1]]), &(src[1][y*srcStride[1]]), width);
01092             memcpy(&(dst[2][y*dstStride[2]]), &(src[2][y*srcStride[2]]), width);
01093         }
01094     }
01095 }