00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032
00033
00034
00035
00036
00037
00038
00039
00040
00041
00042
00043
00044
00045
00046
00047
00048
00049
00050 #include "config.h"
00051 #include "libavcodec/fft.h"
00052 #include "fft_table.h"
00053
00058 #if HAVE_INLINE_ASM
00059 static void ff_fft_calc_mips(FFTContext *s, FFTComplex *z)
00060 {
00061 int nbits, i, n, num_transforms, offset, step;
00062 int n4, n2, n34;
00063 FFTSample tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7, tmp8;
00064 FFTComplex *tmpz;
00065 float w_re, w_im;
00066 float *w_re_ptr, *w_im_ptr;
00067 const int fft_size = (1 << s->nbits);
00068 int s_n = s->nbits;
00069 int tem1, tem2;
00070 float pom, pom1, pom2, pom3;
00071 float temp, temp1, temp3, temp4;
00072 FFTComplex * tmpz_n2, * tmpz_n34, * tmpz_n4;
00073 FFTComplex * tmpz_n2_i, * tmpz_n34_i, * tmpz_n4_i, * tmpz_i;
00074
00078 __asm__ volatile (
00079 "li %[tem1], 16 \n\t"
00080 "sub %[s_n], %[tem1], %[s_n] \n\t"
00081 "li %[tem2], 10923 \n\t"
00082 "srav %[tem2], %[tem2], %[s_n] \n\t"
00083 "ori %[num_t],%[tem2], 1 \n\t"
00084 : [num_t]"=r"(num_transforms), [s_n]"+r"(s_n),
00085 [tem1]"=&r"(tem1), [tem2]"=&r"(tem2)
00086 );
00087
00088
00089 for (n=0; n<num_transforms; n++) {
00090 offset = fft_offsets_lut[n] << 2;
00091 tmpz = z + offset;
00092
00093 tmp1 = tmpz[0].re + tmpz[1].re;
00094 tmp5 = tmpz[2].re + tmpz[3].re;
00095 tmp2 = tmpz[0].im + tmpz[1].im;
00096 tmp6 = tmpz[2].im + tmpz[3].im;
00097 tmp3 = tmpz[0].re - tmpz[1].re;
00098 tmp8 = tmpz[2].im - tmpz[3].im;
00099 tmp4 = tmpz[0].im - tmpz[1].im;
00100 tmp7 = tmpz[2].re - tmpz[3].re;
00101
00102 tmpz[0].re = tmp1 + tmp5;
00103 tmpz[2].re = tmp1 - tmp5;
00104 tmpz[0].im = tmp2 + tmp6;
00105 tmpz[2].im = tmp2 - tmp6;
00106 tmpz[1].re = tmp3 + tmp8;
00107 tmpz[3].re = tmp3 - tmp8;
00108 tmpz[1].im = tmp4 - tmp7;
00109 tmpz[3].im = tmp4 + tmp7;
00110
00111 }
00112
00113 if (fft_size < 8)
00114 return;
00115
00116 num_transforms = (num_transforms >> 1) | 1;
00117
00118 for (n=0; n<num_transforms; n++) {
00119 offset = fft_offsets_lut[n] << 3;
00120 tmpz = z + offset;
00121
00122 __asm__ volatile (
00123 "lwc1 %[tmp1], 32(%[tmpz]) \n\t"
00124 "lwc1 %[pom], 40(%[tmpz]) \n\t"
00125 "lwc1 %[tmp3], 48(%[tmpz]) \n\t"
00126 "lwc1 %[pom1], 56(%[tmpz]) \n\t"
00127 "lwc1 %[tmp2], 36(%[tmpz]) \n\t"
00128 "lwc1 %[pom2], 44(%[tmpz]) \n\t"
00129 "lwc1 %[pom3], 60(%[tmpz]) \n\t"
00130 "lwc1 %[tmp4], 52(%[tmpz]) \n\t"
00131 "add.s %[tmp1], %[tmp1], %[pom] \n\t"
00132 "add.s %[tmp3], %[tmp3], %[pom1] \n\t"
00133 "add.s %[tmp2], %[tmp2], %[pom2] \n\t"
00134 "lwc1 %[pom], 40(%[tmpz]) \n\t"
00135 "add.s %[tmp4], %[tmp4], %[pom3] \n\t"
00136 "add.s %[tmp5], %[tmp1], %[tmp3] \n\t"
00137 "sub.s %[tmp7], %[tmp1], %[tmp3] \n\t"
00138 "lwc1 %[tmp1], 32(%[tmpz]) \n\t"
00139 "lwc1 %[pom1], 44(%[tmpz]) \n\t"
00140 "add.s %[tmp6], %[tmp2], %[tmp4] \n\t"
00141 "sub.s %[tmp8], %[tmp2], %[tmp4] \n\t"
00142 "lwc1 %[tmp2], 36(%[tmpz]) \n\t"
00143 "lwc1 %[pom2], 56(%[tmpz]) \n\t"
00144 "lwc1 %[pom3], 60(%[tmpz]) \n\t"
00145 "lwc1 %[tmp3], 48(%[tmpz]) \n\t"
00146 "lwc1 %[tmp4], 52(%[tmpz]) \n\t"
00147 "sub.s %[tmp1], %[tmp1], %[pom] \n\t"
00148 "lwc1 %[pom], 0(%[tmpz]) \n\t"
00149 "sub.s %[tmp2], %[tmp2], %[pom1] \n\t"
00150 "sub.s %[tmp3], %[tmp3], %[pom2] \n\t"
00151 "lwc1 %[pom2], 4(%[tmpz]) \n\t"
00152 "sub.s %[pom1], %[pom], %[tmp5] \n\t"
00153 "sub.s %[tmp4], %[tmp4], %[pom3] \n\t"
00154 "add.s %[pom3], %[pom], %[tmp5] \n\t"
00155 "sub.s %[pom], %[pom2], %[tmp6] \n\t"
00156 "add.s %[pom2], %[pom2], %[tmp6] \n\t"
00157 "swc1 %[pom1], 32(%[tmpz]) \n\t"
00158 "swc1 %[pom3], 0(%[tmpz]) \n\t"
00159 "swc1 %[pom], 36(%[tmpz]) \n\t"
00160 "swc1 %[pom2], 4(%[tmpz]) \n\t"
00161 "lwc1 %[pom1], 16(%[tmpz]) \n\t"
00162 "lwc1 %[pom3], 20(%[tmpz]) \n\t"
00163 "li.s %[pom], 0.7071067812 \n\t"
00164 "add.s %[temp1],%[tmp1], %[tmp2] \n\t"
00165 "sub.s %[temp], %[pom1], %[tmp8] \n\t"
00166 "add.s %[pom2], %[pom3], %[tmp7] \n\t"
00167 "sub.s %[temp3],%[tmp3], %[tmp4] \n\t"
00168 "sub.s %[temp4],%[tmp2], %[tmp1] \n\t"
00169 "swc1 %[temp], 48(%[tmpz]) \n\t"
00170 "swc1 %[pom2], 52(%[tmpz]) \n\t"
00171 "add.s %[pom1], %[pom1], %[tmp8] \n\t"
00172 "sub.s %[pom3], %[pom3], %[tmp7] \n\t"
00173 "add.s %[tmp3], %[tmp3], %[tmp4] \n\t"
00174 "mul.s %[tmp5], %[pom], %[temp1] \n\t"
00175 "mul.s %[tmp7], %[pom], %[temp3] \n\t"
00176 "mul.s %[tmp6], %[pom], %[temp4] \n\t"
00177 "mul.s %[tmp8], %[pom], %[tmp3] \n\t"
00178 "swc1 %[pom1], 16(%[tmpz]) \n\t"
00179 "swc1 %[pom3], 20(%[tmpz]) \n\t"
00180 "add.s %[tmp1], %[tmp5], %[tmp7] \n\t"
00181 "sub.s %[tmp3], %[tmp5], %[tmp7] \n\t"
00182 "add.s %[tmp2], %[tmp6], %[tmp8] \n\t"
00183 "sub.s %[tmp4], %[tmp6], %[tmp8] \n\t"
00184 "lwc1 %[temp], 8(%[tmpz]) \n\t"
00185 "lwc1 %[temp1],12(%[tmpz]) \n\t"
00186 "lwc1 %[pom], 24(%[tmpz]) \n\t"
00187 "lwc1 %[pom2], 28(%[tmpz]) \n\t"
00188 "sub.s %[temp4],%[temp], %[tmp1] \n\t"
00189 "sub.s %[temp3],%[temp1], %[tmp2] \n\t"
00190 "add.s %[temp], %[temp], %[tmp1] \n\t"
00191 "add.s %[temp1],%[temp1], %[tmp2] \n\t"
00192 "sub.s %[pom1], %[pom], %[tmp4] \n\t"
00193 "add.s %[pom3], %[pom2], %[tmp3] \n\t"
00194 "add.s %[pom], %[pom], %[tmp4] \n\t"
00195 "sub.s %[pom2], %[pom2], %[tmp3] \n\t"
00196 "swc1 %[temp4],40(%[tmpz]) \n\t"
00197 "swc1 %[temp3],44(%[tmpz]) \n\t"
00198 "swc1 %[temp], 8(%[tmpz]) \n\t"
00199 "swc1 %[temp1],12(%[tmpz]) \n\t"
00200 "swc1 %[pom1], 56(%[tmpz]) \n\t"
00201 "swc1 %[pom3], 60(%[tmpz]) \n\t"
00202 "swc1 %[pom], 24(%[tmpz]) \n\t"
00203 "swc1 %[pom2], 28(%[tmpz]) \n\t"
00204 : [tmp1]"=&f"(tmp1), [pom]"=&f"(pom), [pom1]"=&f"(pom1), [pom2]"=&f"(pom2),
00205 [tmp3]"=&f"(tmp3), [tmp2]"=&f"(tmp2), [tmp4]"=&f"(tmp4), [tmp5]"=&f"(tmp5), [tmp7]"=&f"(tmp7),
00206 [tmp6]"=&f"(tmp6), [tmp8]"=&f"(tmp8), [pom3]"=&f"(pom3),[temp]"=&f"(temp), [temp1]"=&f"(temp1),
00207 [temp3]"=&f"(temp3), [temp4]"=&f"(temp4)
00208 : [tmpz]"r"(tmpz)
00209 : "memory"
00210 );
00211 }
00212
00213 step = 1 << (MAX_LOG2_NFFT - 4);
00214 n4 = 4;
00215
00216 for (nbits=4; nbits<=s->nbits; nbits++) {
00217
00218
00219
00220 __asm__ volatile (
00221 "sra %[num_t], %[num_t], 1 \n\t"
00222 "ori %[num_t], %[num_t], 1 \n\t"
00223
00224 : [num_t] "+r" (num_transforms)
00225 );
00226 n2 = 2 * n4;
00227 n34 = 3 * n4;
00228
00229 for (n=0; n<num_transforms; n++) {
00230 offset = fft_offsets_lut[n] << nbits;
00231 tmpz = z + offset;
00232
00233 tmpz_n2 = tmpz + n2;
00234 tmpz_n4 = tmpz + n4;
00235 tmpz_n34 = tmpz + n34;
00236
00237 __asm__ volatile (
00238 "lwc1 %[pom1], 0(%[tmpz_n2]) \n\t"
00239 "lwc1 %[pom], 0(%[tmpz_n34]) \n\t"
00240 "lwc1 %[pom2], 4(%[tmpz_n2]) \n\t"
00241 "lwc1 %[pom3], 4(%[tmpz_n34]) \n\t"
00242 "lwc1 %[temp1],0(%[tmpz]) \n\t"
00243 "lwc1 %[temp3],4(%[tmpz]) \n\t"
00244 "add.s %[tmp5], %[pom1], %[pom] \n\t"
00245 "sub.s %[tmp1], %[pom1], %[pom] \n\t"
00246 "add.s %[tmp6], %[pom2], %[pom3] \n\t"
00247 "sub.s %[tmp2], %[pom2], %[pom3] \n\t"
00248 "sub.s %[temp], %[temp1], %[tmp5] \n\t"
00249 "add.s %[temp1],%[temp1], %[tmp5] \n\t"
00250 "sub.s %[temp4],%[temp3], %[tmp6] \n\t"
00251 "add.s %[temp3],%[temp3], %[tmp6] \n\t"
00252 "swc1 %[temp], 0(%[tmpz_n2]) \n\t"
00253 "swc1 %[temp1],0(%[tmpz]) \n\t"
00254 "lwc1 %[pom1], 0(%[tmpz_n4]) \n\t"
00255 "swc1 %[temp4],4(%[tmpz_n2]) \n\t"
00256 "lwc1 %[temp], 4(%[tmpz_n4]) \n\t"
00257 "swc1 %[temp3],4(%[tmpz]) \n\t"
00258 "sub.s %[pom], %[pom1], %[tmp2] \n\t"
00259 "add.s %[pom1], %[pom1], %[tmp2] \n\t"
00260 "add.s %[temp1],%[temp], %[tmp1] \n\t"
00261 "sub.s %[temp], %[temp], %[tmp1] \n\t"
00262 "swc1 %[pom], 0(%[tmpz_n34]) \n\t"
00263 "swc1 %[pom1], 0(%[tmpz_n4]) \n\t"
00264 "swc1 %[temp1],4(%[tmpz_n34]) \n\t"
00265 "swc1 %[temp], 4(%[tmpz_n4]) \n\t"
00266 : [tmp5]"=&f"(tmp5),
00267 [tmp1]"=&f"(tmp1), [pom]"=&f"(pom), [pom1]"=&f"(pom1), [pom2]"=&f"(pom2),
00268 [tmp2]"=&f"(tmp2), [tmp6]"=&f"(tmp6), [pom3]"=&f"(pom3),
00269 [temp]"=&f"(temp), [temp1]"=&f"(temp1), [temp3]"=&f"(temp3), [temp4]"=&f"(temp4)
00270 : [tmpz]"r"(tmpz), [tmpz_n2]"r"(tmpz_n2), [tmpz_n34]"r"(tmpz_n34), [tmpz_n4]"r"(tmpz_n4)
00271 : "memory"
00272 );
00273
00274 w_re_ptr = (float*)(ff_cos_65536 + step);
00275 w_im_ptr = (float*)(ff_cos_65536 + MAX_FFT_SIZE/4 - step);
00276
00277 for (i=1; i<n4; i++) {
00278 w_re = w_re_ptr[0];
00279 w_im = w_im_ptr[0];
00280 tmpz_n2_i = tmpz_n2 + i;
00281 tmpz_n4_i = tmpz_n4 + i;
00282 tmpz_n34_i= tmpz_n34 + i;
00283 tmpz_i = tmpz + i;
00284
00285 __asm__ volatile (
00286 "lwc1 %[temp], 0(%[tmpz_n2_i]) \n\t"
00287 "lwc1 %[temp1], 4(%[tmpz_n2_i]) \n\t"
00288 "lwc1 %[pom], 0(%[tmpz_n34_i]) \n\t"
00289 "lwc1 %[pom1], 4(%[tmpz_n34_i]) \n\t"
00290 "mul.s %[temp3], %[w_im], %[temp] \n\t"
00291 "mul.s %[temp4], %[w_im], %[temp1] \n\t"
00292 "mul.s %[pom2], %[w_im], %[pom1] \n\t"
00293 "mul.s %[pom3], %[w_im], %[pom] \n\t"
00294 "msub.s %[tmp2], %[temp3], %[w_re], %[temp1] \n\t"
00295 "madd.s %[tmp1], %[temp4], %[w_re], %[temp] \n\t"
00296 "msub.s %[tmp3], %[pom2], %[w_re], %[pom] \n\t"
00297 "madd.s %[tmp4], %[pom3], %[w_re], %[pom1] \n\t"
00298 "lwc1 %[temp], 0(%[tmpz_i]) \n\t"
00299 "lwc1 %[pom], 4(%[tmpz_i]) \n\t"
00300 "add.s %[tmp5], %[tmp1], %[tmp3] \n\t"
00301 "sub.s %[tmp1], %[tmp1], %[tmp3] \n\t"
00302 "add.s %[tmp6], %[tmp2], %[tmp4] \n\t"
00303 "sub.s %[tmp2], %[tmp2], %[tmp4] \n\t"
00304 "sub.s %[temp1], %[temp], %[tmp5] \n\t"
00305 "add.s %[temp], %[temp], %[tmp5] \n\t"
00306 "sub.s %[pom1], %[pom], %[tmp6] \n\t"
00307 "add.s %[pom], %[pom], %[tmp6] \n\t"
00308 "lwc1 %[temp3], 0(%[tmpz_n4_i]) \n\t"
00309 "lwc1 %[pom2], 4(%[tmpz_n4_i]) \n\t"
00310 "swc1 %[temp1], 0(%[tmpz_n2_i]) \n\t"
00311 "swc1 %[temp], 0(%[tmpz_i]) \n\t"
00312 "swc1 %[pom1], 4(%[tmpz_n2_i]) \n\t"
00313 "swc1 %[pom] , 4(%[tmpz_i]) \n\t"
00314 "sub.s %[temp4], %[temp3], %[tmp2] \n\t"
00315 "add.s %[pom3], %[pom2], %[tmp1] \n\t"
00316 "add.s %[temp3], %[temp3], %[tmp2] \n\t"
00317 "sub.s %[pom2], %[pom2], %[tmp1] \n\t"
00318 "swc1 %[temp4], 0(%[tmpz_n34_i]) \n\t"
00319 "swc1 %[pom3], 4(%[tmpz_n34_i]) \n\t"
00320 "swc1 %[temp3], 0(%[tmpz_n4_i]) \n\t"
00321 "swc1 %[pom2], 4(%[tmpz_n4_i]) \n\t"
00322 : [tmp1]"=&f"(tmp1), [tmp2]"=&f" (tmp2), [temp]"=&f"(temp), [tmp3]"=&f"(tmp3),
00323 [tmp4]"=&f"(tmp4), [tmp5]"=&f"(tmp5), [tmp6]"=&f"(tmp6),
00324 [temp1]"=&f"(temp1), [temp3]"=&f"(temp3), [temp4]"=&f"(temp4),
00325 [pom]"=&f"(pom), [pom1]"=&f"(pom1), [pom2]"=&f"(pom2), [pom3]"=&f"(pom3)
00326 : [w_re]"f"(w_re), [w_im]"f"(w_im),
00327 [tmpz_i]"r"(tmpz_i),[tmpz_n2_i]"r"(tmpz_n2_i),
00328 [tmpz_n34_i]"r"(tmpz_n34_i), [tmpz_n4_i]"r"(tmpz_n4_i)
00329 : "memory"
00330 );
00331 w_re_ptr += step;
00332 w_im_ptr -= step;
00333 }
00334 }
00335 step >>= 1;
00336 n4 <<= 1;
00337 }
00338 }
00339
00344 static void ff_imdct_half_mips(FFTContext *s, FFTSample *output, const FFTSample *input)
00345 {
00346 int k, n8, n4, n2, n, j;
00347 const uint16_t *revtab = s->revtab;
00348 const FFTSample *tcos = s->tcos;
00349 const FFTSample *tsin = s->tsin;
00350 const FFTSample *in1, *in2, *in3, *in4;
00351 FFTComplex *z = (FFTComplex *)output;
00352
00353 int j1;
00354 const float *tcos1, *tsin1, *tcos2, *tsin2;
00355 float temp1, temp2, temp3, temp4, temp5, temp6, temp7, temp8,
00356 temp9, temp10, temp11, temp12, temp13, temp14, temp15, temp16;
00357 FFTComplex *z1, *z2;
00358
00359 n = 1 << s->mdct_bits;
00360 n2 = n >> 1;
00361 n4 = n >> 2;
00362 n8 = n >> 3;
00363
00364
00365 in1 = input;
00366 in2 = input + n2 - 1;
00367 in3 = input + 2;
00368 in4 = input + n2 - 3;
00369
00370 tcos1 = tcos;
00371 tsin1 = tsin;
00372
00373
00374 for(k = 0; k < n4; k += 2) {
00375 j = revtab[k ];
00376 j1 = revtab[k + 1];
00377
00378 __asm__ volatile (
00379 "lwc1 %[temp1], 0(%[in2]) \t\n"
00380 "lwc1 %[temp2], 0(%[tcos1]) \t\n"
00381 "lwc1 %[temp3], 0(%[tsin1]) \t\n"
00382 "lwc1 %[temp4], 0(%[in1]) \t\n"
00383 "lwc1 %[temp5], 0(%[in4]) \t\n"
00384 "mul.s %[temp9], %[temp1], %[temp2] \t\n"
00385 "mul.s %[temp10], %[temp1], %[temp3] \t\n"
00386 "lwc1 %[temp6], 4(%[tcos1]) \t\n"
00387 "lwc1 %[temp7], 4(%[tsin1]) \t\n"
00388 "nmsub.s %[temp9], %[temp9], %[temp4], %[temp3] \t\n"
00389 "madd.s %[temp10], %[temp10], %[temp4], %[temp2] \t\n"
00390 "mul.s %[temp11], %[temp5], %[temp6] \t\n"
00391 "mul.s %[temp12], %[temp5], %[temp7] \t\n"
00392 "lwc1 %[temp8], 0(%[in3]) \t\n"
00393 "addiu %[tcos1], %[tcos1], 8 \t\n"
00394 "addiu %[tsin1], %[tsin1], 8 \t\n"
00395 "addiu %[in1], %[in1], 16 \t\n"
00396 "nmsub.s %[temp11], %[temp11], %[temp8], %[temp7] \t\n"
00397 "madd.s %[temp12], %[temp12], %[temp8], %[temp6] \t\n"
00398 "addiu %[in2], %[in2], -16 \t\n"
00399 "addiu %[in3], %[in3], 16 \t\n"
00400 "addiu %[in4], %[in4], -16 \t\n"
00401
00402 : [temp1]"=&f"(temp1), [temp2]"=&f"(temp2),
00403 [temp3]"=&f"(temp3), [temp4]"=&f"(temp4),
00404 [temp5]"=&f"(temp5), [temp6]"=&f"(temp6),
00405 [temp7]"=&f"(temp7), [temp8]"=&f"(temp8),
00406 [temp9]"=&f"(temp9), [temp10]"=&f"(temp10),
00407 [temp11]"=&f"(temp11), [temp12]"=&f"(temp12),
00408 [tsin1]"+r"(tsin1), [tcos1]"+r"(tcos1),
00409 [in1]"+r"(in1), [in2]"+r"(in2),
00410 [in3]"+r"(in3), [in4]"+r"(in4)
00411 );
00412
00413 z[j ].re = temp9;
00414 z[j ].im = temp10;
00415 z[j1].re = temp11;
00416 z[j1].im = temp12;
00417 }
00418
00419 s->fft_calc(s, z);
00420
00421
00422
00423 for(k = 0; k < n8; k += 2) {
00424 tcos1 = &tcos[n8 - k - 2];
00425 tsin1 = &tsin[n8 - k - 2];
00426 tcos2 = &tcos[n8 + k];
00427 tsin2 = &tsin[n8 + k];
00428 z1 = &z[n8 - k - 2];
00429 z2 = &z[n8 + k ];
00430
00431 __asm__ volatile (
00432 "lwc1 %[temp1], 12(%[z1]) \t\n"
00433 "lwc1 %[temp2], 4(%[tsin1]) \t\n"
00434 "lwc1 %[temp3], 4(%[tcos1]) \t\n"
00435 "lwc1 %[temp4], 8(%[z1]) \t\n"
00436 "lwc1 %[temp5], 4(%[z1]) \t\n"
00437 "mul.s %[temp9], %[temp1], %[temp2] \t\n"
00438 "mul.s %[temp10], %[temp1], %[temp3] \t\n"
00439 "lwc1 %[temp6], 0(%[tsin1]) \t\n"
00440 "lwc1 %[temp7], 0(%[tcos1]) \t\n"
00441 "nmsub.s %[temp9], %[temp9], %[temp4], %[temp3] \t\n"
00442 "madd.s %[temp10], %[temp10], %[temp4], %[temp2] \t\n"
00443 "mul.s %[temp11], %[temp5], %[temp6] \t\n"
00444 "mul.s %[temp12], %[temp5], %[temp7] \t\n"
00445 "lwc1 %[temp8], 0(%[z1]) \t\n"
00446 "lwc1 %[temp1], 4(%[z2]) \t\n"
00447 "lwc1 %[temp2], 0(%[tsin2]) \t\n"
00448 "lwc1 %[temp3], 0(%[tcos2]) \t\n"
00449 "nmsub.s %[temp11], %[temp11], %[temp8], %[temp7] \t\n"
00450 "madd.s %[temp12], %[temp12], %[temp8], %[temp6] \t\n"
00451 "mul.s %[temp13], %[temp1], %[temp2] \t\n"
00452 "mul.s %[temp14], %[temp1], %[temp3] \t\n"
00453 "lwc1 %[temp4], 0(%[z2]) \t\n"
00454 "lwc1 %[temp5], 12(%[z2]) \t\n"
00455 "lwc1 %[temp6], 4(%[tsin2]) \t\n"
00456 "lwc1 %[temp7], 4(%[tcos2]) \t\n"
00457 "nmsub.s %[temp13], %[temp13], %[temp4], %[temp3] \t\n"
00458 "madd.s %[temp14], %[temp14], %[temp4], %[temp2] \t\n"
00459 "mul.s %[temp15], %[temp5], %[temp6] \t\n"
00460 "mul.s %[temp16], %[temp5], %[temp7] \t\n"
00461 "lwc1 %[temp8], 8(%[z2]) \t\n"
00462 "nmsub.s %[temp15], %[temp15], %[temp8], %[temp7] \t\n"
00463 "madd.s %[temp16], %[temp16], %[temp8], %[temp6] \t\n"
00464 : [temp1]"=&f"(temp1), [temp2]"=&f"(temp2),
00465 [temp3]"=&f"(temp3), [temp4]"=&f"(temp4),
00466 [temp5]"=&f"(temp5), [temp6]"=&f"(temp6),
00467 [temp7]"=&f"(temp7), [temp8]"=&f"(temp8),
00468 [temp9]"=&f"(temp9), [temp10]"=&f"(temp10),
00469 [temp11]"=&f"(temp11), [temp12]"=&f"(temp12),
00470 [temp13]"=&f"(temp13), [temp14]"=&f"(temp14),
00471 [temp15]"=&f"(temp15), [temp16]"=&f"(temp16)
00472 : [z1]"r"(z1), [z2]"r"(z2),
00473 [tsin1]"r"(tsin1), [tcos1]"r"(tcos1),
00474 [tsin2]"r"(tsin2), [tcos2]"r"(tcos2)
00475 );
00476
00477 z1[1].re = temp9;
00478 z1[1].im = temp14;
00479 z2[0].re = temp13;
00480 z2[0].im = temp10;
00481
00482 z1[0].re = temp11;
00483 z1[0].im = temp16;
00484 z2[1].re = temp15;
00485 z2[1].im = temp12;
00486 }
00487 }
00488 #endif
00489
00495 static void ff_imdct_calc_mips(FFTContext *s, FFTSample *output, const FFTSample *input)
00496 {
00497 int k;
00498 int n = 1 << s->mdct_bits;
00499 int n2 = n >> 1;
00500 int n4 = n >> 2;
00501
00502 ff_imdct_half_mips(s, output+n4, input);
00503
00504 for(k = 0; k < n4; k+=4) {
00505 output[k] = -output[n2-k-1];
00506 output[k+1] = -output[n2-k-2];
00507 output[k+2] = -output[n2-k-3];
00508 output[k+3] = -output[n2-k-4];
00509
00510 output[n-k-1] = output[n2+k];
00511 output[n-k-2] = output[n2+k+1];
00512 output[n-k-3] = output[n2+k+2];
00513 output[n-k-4] = output[n2+k+3];
00514 }
00515 }
00516
00517 av_cold void ff_fft_init_mips(FFTContext *s)
00518 {
00519 int n=0;
00520
00521 ff_fft_lut_init(fft_offsets_lut, 0, 1 << 16, &n);
00522
00523 #if HAVE_INLINE_ASM
00524 s->fft_calc = ff_fft_calc_mips;
00525 #endif
00526 #if CONFIG_MDCT
00527 s->imdct_calc = ff_imdct_calc_mips;
00528 s->imdct_half = ff_imdct_half_mips;
00529 #endif
00530 }