00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00027 #include "avcodec.h"
00028 #include "dsputil.h"
00029
00030 #define IdctAdjustBeforeShift 8
00031 #define xC1S7 64277
00032 #define xC2S6 60547
00033 #define xC3S5 54491
00034 #define xC4S4 46341
00035 #define xC5S3 36410
00036 #define xC6S2 25080
00037 #define xC7S1 12785
00038
00039 #define M(a,b) (((a) * (b))>>16)
00040
00041 static av_always_inline void idct(uint8_t *dst, int stride, int16_t *input, int type)
00042 {
00043 int16_t *ip = input;
00044
00045 int A, B, C, D, Ad, Bd, Cd, Dd, E, F, G, H;
00046 int Ed, Gd, Add, Bdd, Fd, Hd;
00047
00048 int i;
00049
00050
00051 for (i = 0; i < 8; i++) {
00052
00053 if ( ip[0] | ip[1] | ip[2] | ip[3] | ip[4] | ip[5] | ip[6] | ip[7] ) {
00054 A = M(xC1S7, ip[1]) + M(xC7S1, ip[7]);
00055 B = M(xC7S1, ip[1]) - M(xC1S7, ip[7]);
00056 C = M(xC3S5, ip[3]) + M(xC5S3, ip[5]);
00057 D = M(xC3S5, ip[5]) - M(xC5S3, ip[3]);
00058
00059 Ad = M(xC4S4, (A - C));
00060 Bd = M(xC4S4, (B - D));
00061
00062 Cd = A + C;
00063 Dd = B + D;
00064
00065 E = M(xC4S4, (ip[0] + ip[4]));
00066 F = M(xC4S4, (ip[0] - ip[4]));
00067
00068 G = M(xC2S6, ip[2]) + M(xC6S2, ip[6]);
00069 H = M(xC6S2, ip[2]) - M(xC2S6, ip[6]);
00070
00071 Ed = E - G;
00072 Gd = E + G;
00073
00074 Add = F + Ad;
00075 Bdd = Bd - H;
00076
00077 Fd = F - Ad;
00078 Hd = Bd + H;
00079
00080
00081 ip[0] = Gd + Cd ;
00082 ip[7] = Gd - Cd ;
00083
00084 ip[1] = Add + Hd;
00085 ip[2] = Add - Hd;
00086
00087 ip[3] = Ed + Dd ;
00088 ip[4] = Ed - Dd ;
00089
00090 ip[5] = Fd + Bdd;
00091 ip[6] = Fd - Bdd;
00092 }
00093
00094 ip += 8;
00095 }
00096
00097 ip = input;
00098
00099 for ( i = 0; i < 8; i++) {
00100
00101 if ( ip[1 * 8] | ip[2 * 8] | ip[3 * 8] |
00102 ip[4 * 8] | ip[5 * 8] | ip[6 * 8] | ip[7 * 8] ) {
00103
00104 A = M(xC1S7, ip[1*8]) + M(xC7S1, ip[7*8]);
00105 B = M(xC7S1, ip[1*8]) - M(xC1S7, ip[7*8]);
00106 C = M(xC3S5, ip[3*8]) + M(xC5S3, ip[5*8]);
00107 D = M(xC3S5, ip[5*8]) - M(xC5S3, ip[3*8]);
00108
00109 Ad = M(xC4S4, (A - C));
00110 Bd = M(xC4S4, (B - D));
00111
00112 Cd = A + C;
00113 Dd = B + D;
00114
00115 E = M(xC4S4, (ip[0*8] + ip[4*8])) + 8;
00116 F = M(xC4S4, (ip[0*8] - ip[4*8])) + 8;
00117
00118 if(type==1){
00119 E += 16*128;
00120 F += 16*128;
00121 }
00122
00123 G = M(xC2S6, ip[2*8]) + M(xC6S2, ip[6*8]);
00124 H = M(xC6S2, ip[2*8]) - M(xC2S6, ip[6*8]);
00125
00126 Ed = E - G;
00127 Gd = E + G;
00128
00129 Add = F + Ad;
00130 Bdd = Bd - H;
00131
00132 Fd = F - Ad;
00133 Hd = Bd + H;
00134
00135
00136 if(type==0){
00137 ip[0*8] = (Gd + Cd ) >> 4;
00138 ip[7*8] = (Gd - Cd ) >> 4;
00139
00140 ip[1*8] = (Add + Hd ) >> 4;
00141 ip[2*8] = (Add - Hd ) >> 4;
00142
00143 ip[3*8] = (Ed + Dd ) >> 4;
00144 ip[4*8] = (Ed - Dd ) >> 4;
00145
00146 ip[5*8] = (Fd + Bdd ) >> 4;
00147 ip[6*8] = (Fd - Bdd ) >> 4;
00148 }else if(type==1){
00149 dst[0*stride] = av_clip_uint8((Gd + Cd ) >> 4);
00150 dst[7*stride] = av_clip_uint8((Gd - Cd ) >> 4);
00151
00152 dst[1*stride] = av_clip_uint8((Add + Hd ) >> 4);
00153 dst[2*stride] = av_clip_uint8((Add - Hd ) >> 4);
00154
00155 dst[3*stride] = av_clip_uint8((Ed + Dd ) >> 4);
00156 dst[4*stride] = av_clip_uint8((Ed - Dd ) >> 4);
00157
00158 dst[5*stride] = av_clip_uint8((Fd + Bdd ) >> 4);
00159 dst[6*stride] = av_clip_uint8((Fd - Bdd ) >> 4);
00160 }else{
00161 dst[0*stride] = av_clip_uint8(dst[0*stride] + ((Gd + Cd ) >> 4));
00162 dst[7*stride] = av_clip_uint8(dst[7*stride] + ((Gd - Cd ) >> 4));
00163
00164 dst[1*stride] = av_clip_uint8(dst[1*stride] + ((Add + Hd ) >> 4));
00165 dst[2*stride] = av_clip_uint8(dst[2*stride] + ((Add - Hd ) >> 4));
00166
00167 dst[3*stride] = av_clip_uint8(dst[3*stride] + ((Ed + Dd ) >> 4));
00168 dst[4*stride] = av_clip_uint8(dst[4*stride] + ((Ed - Dd ) >> 4));
00169
00170 dst[5*stride] = av_clip_uint8(dst[5*stride] + ((Fd + Bdd ) >> 4));
00171 dst[6*stride] = av_clip_uint8(dst[6*stride] + ((Fd - Bdd ) >> 4));
00172 }
00173
00174 } else {
00175 if(type==0){
00176 ip[0*8] =
00177 ip[1*8] =
00178 ip[2*8] =
00179 ip[3*8] =
00180 ip[4*8] =
00181 ip[5*8] =
00182 ip[6*8] =
00183 ip[7*8] = ((xC4S4 * ip[0*8] + (IdctAdjustBeforeShift<<16))>>20);
00184 }else if(type==1){
00185 dst[0*stride]=
00186 dst[1*stride]=
00187 dst[2*stride]=
00188 dst[3*stride]=
00189 dst[4*stride]=
00190 dst[5*stride]=
00191 dst[6*stride]=
00192 dst[7*stride]= av_clip_uint8(128 + ((xC4S4 * ip[0*8] + (IdctAdjustBeforeShift<<16))>>20));
00193 }else{
00194 if(ip[0*8]){
00195 int v= ((xC4S4 * ip[0*8] + (IdctAdjustBeforeShift<<16))>>20);
00196 dst[0*stride] = av_clip_uint8(dst[0*stride] + v);
00197 dst[1*stride] = av_clip_uint8(dst[1*stride] + v);
00198 dst[2*stride] = av_clip_uint8(dst[2*stride] + v);
00199 dst[3*stride] = av_clip_uint8(dst[3*stride] + v);
00200 dst[4*stride] = av_clip_uint8(dst[4*stride] + v);
00201 dst[5*stride] = av_clip_uint8(dst[5*stride] + v);
00202 dst[6*stride] = av_clip_uint8(dst[6*stride] + v);
00203 dst[7*stride] = av_clip_uint8(dst[7*stride] + v);
00204 }
00205 }
00206 }
00207
00208 ip++;
00209 dst++;
00210 }
00211 }
00212
00213 void ff_vp3_idct_c(DCTELEM *block){
00214 idct(NULL, 0, block, 0);
00215 }
00216
00217 void ff_vp3_idct_put_c(uint8_t *dest, int line_size, DCTELEM *block){
00218 idct(dest, line_size, block, 1);
00219 }
00220
00221 void ff_vp3_idct_add_c(uint8_t *dest, int line_size, DCTELEM *block){
00222 idct(dest, line_size, block, 2);
00223 }
00224
00225 void ff_vp3_idct_dc_add_c(uint8_t *dest, int line_size, const DCTELEM *block){
00226 int i, dc = (block[0] + 15) >> 5;
00227
00228 for(i = 0; i < 8; i++){
00229 dest[0] = av_clip_uint8(dest[0] + dc);
00230 dest[1] = av_clip_uint8(dest[1] + dc);
00231 dest[2] = av_clip_uint8(dest[2] + dc);
00232 dest[3] = av_clip_uint8(dest[3] + dc);
00233 dest[4] = av_clip_uint8(dest[4] + dc);
00234 dest[5] = av_clip_uint8(dest[5] + dc);
00235 dest[6] = av_clip_uint8(dest[6] + dc);
00236 dest[7] = av_clip_uint8(dest[7] + dc);
00237 dest += line_size;
00238 }
00239 }
00240
00241 void ff_vp3_v_loop_filter_c(uint8_t *first_pixel, int stride, int *bounding_values)
00242 {
00243 unsigned char *end;
00244 int filter_value;
00245 const int nstride= -stride;
00246
00247 for (end= first_pixel + 8; first_pixel < end; first_pixel++) {
00248 filter_value =
00249 (first_pixel[2 * nstride] - first_pixel[ stride])
00250 +3*(first_pixel[0 ] - first_pixel[nstride]);
00251 filter_value = bounding_values[(filter_value + 4) >> 3];
00252 first_pixel[nstride] = av_clip_uint8(first_pixel[nstride] + filter_value);
00253 first_pixel[0] = av_clip_uint8(first_pixel[0] - filter_value);
00254 }
00255 }
00256
00257 void ff_vp3_h_loop_filter_c(uint8_t *first_pixel, int stride, int *bounding_values)
00258 {
00259 unsigned char *end;
00260 int filter_value;
00261
00262 for (end= first_pixel + 8*stride; first_pixel != end; first_pixel += stride) {
00263 filter_value =
00264 (first_pixel[-2] - first_pixel[ 1])
00265 +3*(first_pixel[ 0] - first_pixel[-1]);
00266 filter_value = bounding_values[(filter_value + 4) >> 3];
00267 first_pixel[-1] = av_clip_uint8(first_pixel[-1] + filter_value);
00268 first_pixel[ 0] = av_clip_uint8(first_pixel[ 0] - filter_value);
00269 }
00270 }