00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032
00033
00034
00035
00036
00037
00038
00039
00040
00041
00042
00043
00044
00045
00046
00047
00048
00049
00050
00051
00052
00053
00054
00055
00056
00062 #include "libavutil/common.h"
00063 #include "dsputil.h"
00064
00065 #include "bit_depth_template.c"
00066
00067 #define DCTSIZE 8
00068 #define BITS_IN_JSAMPLE BIT_DEPTH
00069 #define GLOBAL(x) x
00070 #define RIGHT_SHIFT(x, n) ((x) >> (n))
00071 #define MULTIPLY16C16(var,const) ((var)*(const))
00072
00073 #if 1 //def USE_ACCURATE_ROUNDING
00074 #define DESCALE(x,n) RIGHT_SHIFT((x) + (1 << ((n) - 1)), n)
00075 #else
00076 #define DESCALE(x,n) RIGHT_SHIFT(x, n)
00077 #endif
00078
00079
00080
00081
00082
00083
00084 #if DCTSIZE != 8
00085 #error "Sorry, this code only copes with 8x8 DCTs."
00086 #endif
00087
00088
00089
00090
00091
00092
00093
00094
00095
00096
00097
00098
00099
00100
00101
00102
00103
00104
00105
00106
00107
00108
00109
00110
00111
00112
00113
00114
00115
00116
00117
00118
00119
00120
00121
00122
00123 #undef CONST_BITS
00124 #undef PASS1_BITS
00125 #undef OUT_SHIFT
00126
00127 #if BITS_IN_JSAMPLE == 8
00128 #define CONST_BITS 13
00129 #define PASS1_BITS 4
00130 #define OUT_SHIFT PASS1_BITS
00131 #else
00132 #define CONST_BITS 13
00133 #define PASS1_BITS 1
00134 #define OUT_SHIFT (PASS1_BITS + 1)
00135 #endif
00136
00137
00138
00139
00140
00141
00142
00143
00144 #if CONST_BITS == 13
00145 #define FIX_0_298631336 ((int32_t) 2446)
00146 #define FIX_0_390180644 ((int32_t) 3196)
00147 #define FIX_0_541196100 ((int32_t) 4433)
00148 #define FIX_0_765366865 ((int32_t) 6270)
00149 #define FIX_0_899976223 ((int32_t) 7373)
00150 #define FIX_1_175875602 ((int32_t) 9633)
00151 #define FIX_1_501321110 ((int32_t) 12299)
00152 #define FIX_1_847759065 ((int32_t) 15137)
00153 #define FIX_1_961570560 ((int32_t) 16069)
00154 #define FIX_2_053119869 ((int32_t) 16819)
00155 #define FIX_2_562915447 ((int32_t) 20995)
00156 #define FIX_3_072711026 ((int32_t) 25172)
00157 #else
00158 #define FIX_0_298631336 FIX(0.298631336)
00159 #define FIX_0_390180644 FIX(0.390180644)
00160 #define FIX_0_541196100 FIX(0.541196100)
00161 #define FIX_0_765366865 FIX(0.765366865)
00162 #define FIX_0_899976223 FIX(0.899976223)
00163 #define FIX_1_175875602 FIX(1.175875602)
00164 #define FIX_1_501321110 FIX(1.501321110)
00165 #define FIX_1_847759065 FIX(1.847759065)
00166 #define FIX_1_961570560 FIX(1.961570560)
00167 #define FIX_2_053119869 FIX(2.053119869)
00168 #define FIX_2_562915447 FIX(2.562915447)
00169 #define FIX_3_072711026 FIX(3.072711026)
00170 #endif
00171
00172
00173
00174
00175
00176
00177
00178
00179
00180 #if BITS_IN_JSAMPLE == 8 && CONST_BITS<=13 && PASS1_BITS<=2
00181 #define MULTIPLY(var,const) MULTIPLY16C16(var,const)
00182 #else
00183 #define MULTIPLY(var,const) ((var) * (const))
00184 #endif
00185
00186
00187 static av_always_inline void FUNC(row_fdct)(DCTELEM *data)
00188 {
00189 int tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
00190 int tmp10, tmp11, tmp12, tmp13;
00191 int z1, z2, z3, z4, z5;
00192 DCTELEM *dataptr;
00193 int ctr;
00194
00195
00196
00197
00198
00199 dataptr = data;
00200 for (ctr = DCTSIZE-1; ctr >= 0; ctr--) {
00201 tmp0 = dataptr[0] + dataptr[7];
00202 tmp7 = dataptr[0] - dataptr[7];
00203 tmp1 = dataptr[1] + dataptr[6];
00204 tmp6 = dataptr[1] - dataptr[6];
00205 tmp2 = dataptr[2] + dataptr[5];
00206 tmp5 = dataptr[2] - dataptr[5];
00207 tmp3 = dataptr[3] + dataptr[4];
00208 tmp4 = dataptr[3] - dataptr[4];
00209
00210
00211
00212
00213
00214 tmp10 = tmp0 + tmp3;
00215 tmp13 = tmp0 - tmp3;
00216 tmp11 = tmp1 + tmp2;
00217 tmp12 = tmp1 - tmp2;
00218
00219 dataptr[0] = (DCTELEM) ((tmp10 + tmp11) << PASS1_BITS);
00220 dataptr[4] = (DCTELEM) ((tmp10 - tmp11) << PASS1_BITS);
00221
00222 z1 = MULTIPLY(tmp12 + tmp13, FIX_0_541196100);
00223 dataptr[2] = (DCTELEM) DESCALE(z1 + MULTIPLY(tmp13, FIX_0_765366865),
00224 CONST_BITS-PASS1_BITS);
00225 dataptr[6] = (DCTELEM) DESCALE(z1 + MULTIPLY(tmp12, - FIX_1_847759065),
00226 CONST_BITS-PASS1_BITS);
00227
00228
00229
00230
00231
00232
00233 z1 = tmp4 + tmp7;
00234 z2 = tmp5 + tmp6;
00235 z3 = tmp4 + tmp6;
00236 z4 = tmp5 + tmp7;
00237 z5 = MULTIPLY(z3 + z4, FIX_1_175875602);
00238
00239 tmp4 = MULTIPLY(tmp4, FIX_0_298631336);
00240 tmp5 = MULTIPLY(tmp5, FIX_2_053119869);
00241 tmp6 = MULTIPLY(tmp6, FIX_3_072711026);
00242 tmp7 = MULTIPLY(tmp7, FIX_1_501321110);
00243 z1 = MULTIPLY(z1, - FIX_0_899976223);
00244 z2 = MULTIPLY(z2, - FIX_2_562915447);
00245 z3 = MULTIPLY(z3, - FIX_1_961570560);
00246 z4 = MULTIPLY(z4, - FIX_0_390180644);
00247
00248 z3 += z5;
00249 z4 += z5;
00250
00251 dataptr[7] = (DCTELEM) DESCALE(tmp4 + z1 + z3, CONST_BITS-PASS1_BITS);
00252 dataptr[5] = (DCTELEM) DESCALE(tmp5 + z2 + z4, CONST_BITS-PASS1_BITS);
00253 dataptr[3] = (DCTELEM) DESCALE(tmp6 + z2 + z3, CONST_BITS-PASS1_BITS);
00254 dataptr[1] = (DCTELEM) DESCALE(tmp7 + z1 + z4, CONST_BITS-PASS1_BITS);
00255
00256 dataptr += DCTSIZE;
00257 }
00258 }
00259
00260
00261
00262
00263
00264 GLOBAL(void)
00265 FUNC(ff_jpeg_fdct_islow)(DCTELEM *data)
00266 {
00267 int tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
00268 int tmp10, tmp11, tmp12, tmp13;
00269 int z1, z2, z3, z4, z5;
00270 DCTELEM *dataptr;
00271 int ctr;
00272
00273 FUNC(row_fdct)(data);
00274
00275
00276
00277
00278
00279
00280 dataptr = data;
00281 for (ctr = DCTSIZE-1; ctr >= 0; ctr--) {
00282 tmp0 = dataptr[DCTSIZE*0] + dataptr[DCTSIZE*7];
00283 tmp7 = dataptr[DCTSIZE*0] - dataptr[DCTSIZE*7];
00284 tmp1 = dataptr[DCTSIZE*1] + dataptr[DCTSIZE*6];
00285 tmp6 = dataptr[DCTSIZE*1] - dataptr[DCTSIZE*6];
00286 tmp2 = dataptr[DCTSIZE*2] + dataptr[DCTSIZE*5];
00287 tmp5 = dataptr[DCTSIZE*2] - dataptr[DCTSIZE*5];
00288 tmp3 = dataptr[DCTSIZE*3] + dataptr[DCTSIZE*4];
00289 tmp4 = dataptr[DCTSIZE*3] - dataptr[DCTSIZE*4];
00290
00291
00292
00293
00294
00295 tmp10 = tmp0 + tmp3;
00296 tmp13 = tmp0 - tmp3;
00297 tmp11 = tmp1 + tmp2;
00298 tmp12 = tmp1 - tmp2;
00299
00300 dataptr[DCTSIZE*0] = DESCALE(tmp10 + tmp11, OUT_SHIFT);
00301 dataptr[DCTSIZE*4] = DESCALE(tmp10 - tmp11, OUT_SHIFT);
00302
00303 z1 = MULTIPLY(tmp12 + tmp13, FIX_0_541196100);
00304 dataptr[DCTSIZE*2] = DESCALE(z1 + MULTIPLY(tmp13, FIX_0_765366865),
00305 CONST_BITS + OUT_SHIFT);
00306 dataptr[DCTSIZE*6] = DESCALE(z1 + MULTIPLY(tmp12, - FIX_1_847759065),
00307 CONST_BITS + OUT_SHIFT);
00308
00309
00310
00311
00312
00313
00314 z1 = tmp4 + tmp7;
00315 z2 = tmp5 + tmp6;
00316 z3 = tmp4 + tmp6;
00317 z4 = tmp5 + tmp7;
00318 z5 = MULTIPLY(z3 + z4, FIX_1_175875602);
00319
00320 tmp4 = MULTIPLY(tmp4, FIX_0_298631336);
00321 tmp5 = MULTIPLY(tmp5, FIX_2_053119869);
00322 tmp6 = MULTIPLY(tmp6, FIX_3_072711026);
00323 tmp7 = MULTIPLY(tmp7, FIX_1_501321110);
00324 z1 = MULTIPLY(z1, - FIX_0_899976223);
00325 z2 = MULTIPLY(z2, - FIX_2_562915447);
00326 z3 = MULTIPLY(z3, - FIX_1_961570560);
00327 z4 = MULTIPLY(z4, - FIX_0_390180644);
00328
00329 z3 += z5;
00330 z4 += z5;
00331
00332 dataptr[DCTSIZE*7] = DESCALE(tmp4 + z1 + z3, CONST_BITS + OUT_SHIFT);
00333 dataptr[DCTSIZE*5] = DESCALE(tmp5 + z2 + z4, CONST_BITS + OUT_SHIFT);
00334 dataptr[DCTSIZE*3] = DESCALE(tmp6 + z2 + z3, CONST_BITS + OUT_SHIFT);
00335 dataptr[DCTSIZE*1] = DESCALE(tmp7 + z1 + z4, CONST_BITS + OUT_SHIFT);
00336
00337 dataptr++;
00338 }
00339 }
00340
00341
00342
00343
00344
00345
00346 GLOBAL(void)
00347 FUNC(ff_fdct248_islow)(DCTELEM *data)
00348 {
00349 int tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
00350 int tmp10, tmp11, tmp12, tmp13;
00351 int z1;
00352 DCTELEM *dataptr;
00353 int ctr;
00354
00355 FUNC(row_fdct)(data);
00356
00357
00358
00359
00360
00361
00362 dataptr = data;
00363 for (ctr = DCTSIZE-1; ctr >= 0; ctr--) {
00364 tmp0 = dataptr[DCTSIZE*0] + dataptr[DCTSIZE*1];
00365 tmp1 = dataptr[DCTSIZE*2] + dataptr[DCTSIZE*3];
00366 tmp2 = dataptr[DCTSIZE*4] + dataptr[DCTSIZE*5];
00367 tmp3 = dataptr[DCTSIZE*6] + dataptr[DCTSIZE*7];
00368 tmp4 = dataptr[DCTSIZE*0] - dataptr[DCTSIZE*1];
00369 tmp5 = dataptr[DCTSIZE*2] - dataptr[DCTSIZE*3];
00370 tmp6 = dataptr[DCTSIZE*4] - dataptr[DCTSIZE*5];
00371 tmp7 = dataptr[DCTSIZE*6] - dataptr[DCTSIZE*7];
00372
00373 tmp10 = tmp0 + tmp3;
00374 tmp11 = tmp1 + tmp2;
00375 tmp12 = tmp1 - tmp2;
00376 tmp13 = tmp0 - tmp3;
00377
00378 dataptr[DCTSIZE*0] = DESCALE(tmp10 + tmp11, OUT_SHIFT);
00379 dataptr[DCTSIZE*4] = DESCALE(tmp10 - tmp11, OUT_SHIFT);
00380
00381 z1 = MULTIPLY(tmp12 + tmp13, FIX_0_541196100);
00382 dataptr[DCTSIZE*2] = DESCALE(z1 + MULTIPLY(tmp13, FIX_0_765366865),
00383 CONST_BITS+OUT_SHIFT);
00384 dataptr[DCTSIZE*6] = DESCALE(z1 + MULTIPLY(tmp12, - FIX_1_847759065),
00385 CONST_BITS+OUT_SHIFT);
00386
00387 tmp10 = tmp4 + tmp7;
00388 tmp11 = tmp5 + tmp6;
00389 tmp12 = tmp5 - tmp6;
00390 tmp13 = tmp4 - tmp7;
00391
00392 dataptr[DCTSIZE*1] = DESCALE(tmp10 + tmp11, OUT_SHIFT);
00393 dataptr[DCTSIZE*5] = DESCALE(tmp10 - tmp11, OUT_SHIFT);
00394
00395 z1 = MULTIPLY(tmp12 + tmp13, FIX_0_541196100);
00396 dataptr[DCTSIZE*3] = DESCALE(z1 + MULTIPLY(tmp13, FIX_0_765366865),
00397 CONST_BITS + OUT_SHIFT);
00398 dataptr[DCTSIZE*7] = DESCALE(z1 + MULTIPLY(tmp12, - FIX_1_847759065),
00399 CONST_BITS + OUT_SHIFT);
00400
00401 dataptr++;
00402 }
00403 }