00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023 #include "libavcodec/dsputil.h"
00024
00025 #include "dsputil_ppc.h"
00026
00027 #include "dsputil_altivec.h"
00028
00029 int mm_flags = 0;
00030
00031 int mm_support(void)
00032 {
00033 int result = 0;
00034 #if HAVE_ALTIVEC
00035 if (has_altivec()) {
00036 result |= FF_MM_ALTIVEC;
00037 }
00038 #endif
00039 return result;
00040 }
00041
00042 #if CONFIG_POWERPC_PERF
00043 unsigned long long perfdata[POWERPC_NUM_PMC_ENABLED][powerpc_perf_total][powerpc_data_total];
00044
00045 static unsigned char* perfname[] = {
00046 "ff_fft_calc_altivec",
00047 "gmc1_altivec",
00048 "dct_unquantize_h263_altivec",
00049 "fdct_altivec",
00050 "idct_add_altivec",
00051 "idct_put_altivec",
00052 "put_pixels16_altivec",
00053 "avg_pixels16_altivec",
00054 "avg_pixels8_altivec",
00055 "put_pixels8_xy2_altivec",
00056 "put_no_rnd_pixels8_xy2_altivec",
00057 "put_pixels16_xy2_altivec",
00058 "put_no_rnd_pixels16_xy2_altivec",
00059 "hadamard8_diff8x8_altivec",
00060 "hadamard8_diff16_altivec",
00061 "avg_pixels8_xy2_altivec",
00062 "clear_blocks_dcbz32_ppc",
00063 "clear_blocks_dcbz128_ppc",
00064 "put_h264_chroma_mc8_altivec",
00065 "avg_h264_chroma_mc8_altivec",
00066 "put_h264_qpel16_h_lowpass_altivec",
00067 "avg_h264_qpel16_h_lowpass_altivec",
00068 "put_h264_qpel16_v_lowpass_altivec",
00069 "avg_h264_qpel16_v_lowpass_altivec",
00070 "put_h264_qpel16_hv_lowpass_altivec",
00071 "avg_h264_qpel16_hv_lowpass_altivec",
00072 ""
00073 };
00074 #include <stdio.h>
00075 #endif
00076
00077 #if CONFIG_POWERPC_PERF
00078 void powerpc_display_perf_report(void)
00079 {
00080 int i, j;
00081 av_log(NULL, AV_LOG_INFO, "PowerPC performance report\n Values are from the PMC registers, and represent whatever the registers are set to record.\n");
00082 for(i = 0 ; i < powerpc_perf_total ; i++) {
00083 for (j = 0; j < POWERPC_NUM_PMC_ENABLED ; j++) {
00084 if (perfdata[j][i][powerpc_data_num] != (unsigned long long)0)
00085 av_log(NULL, AV_LOG_INFO,
00086 " Function \"%s\" (pmc%d):\n\tmin: %"PRIu64"\n\tmax: %"PRIu64"\n\tavg: %1.2lf (%"PRIu64")\n",
00087 perfname[i],
00088 j+1,
00089 perfdata[j][i][powerpc_data_min],
00090 perfdata[j][i][powerpc_data_max],
00091 (double)perfdata[j][i][powerpc_data_sum] /
00092 (double)perfdata[j][i][powerpc_data_num],
00093 perfdata[j][i][powerpc_data_num]);
00094 }
00095 }
00096 }
00097 #endif
00098
00099
00100
00101
00102
00103
00104
00105
00106
00107
00108
00109
00110
00111
00112
00113
00114
00115
00116
00117
00118
00119 static void clear_blocks_dcbz32_ppc(DCTELEM *blocks)
00120 {
00121 POWERPC_PERF_DECLARE(powerpc_clear_blocks_dcbz32, 1);
00122 register int misal = ((unsigned long)blocks & 0x00000010);
00123 register int i = 0;
00124 POWERPC_PERF_START_COUNT(powerpc_clear_blocks_dcbz32, 1);
00125 #if 1
00126 if (misal) {
00127 ((unsigned long*)blocks)[0] = 0L;
00128 ((unsigned long*)blocks)[1] = 0L;
00129 ((unsigned long*)blocks)[2] = 0L;
00130 ((unsigned long*)blocks)[3] = 0L;
00131 i += 16;
00132 }
00133 for ( ; i < sizeof(DCTELEM)*6*64-31 ; i += 32) {
00134 __asm__ volatile("dcbz %0,%1" : : "b" (blocks), "r" (i) : "memory");
00135 }
00136 if (misal) {
00137 ((unsigned long*)blocks)[188] = 0L;
00138 ((unsigned long*)blocks)[189] = 0L;
00139 ((unsigned long*)blocks)[190] = 0L;
00140 ((unsigned long*)blocks)[191] = 0L;
00141 i += 16;
00142 }
00143 #else
00144 memset(blocks, 0, sizeof(DCTELEM)*6*64);
00145 #endif
00146 POWERPC_PERF_STOP_COUNT(powerpc_clear_blocks_dcbz32, 1);
00147 }
00148
00149
00150
00151 #if HAVE_DCBZL
00152 static void clear_blocks_dcbz128_ppc(DCTELEM *blocks)
00153 {
00154 POWERPC_PERF_DECLARE(powerpc_clear_blocks_dcbz128, 1);
00155 register int misal = ((unsigned long)blocks & 0x0000007f);
00156 register int i = 0;
00157 POWERPC_PERF_START_COUNT(powerpc_clear_blocks_dcbz128, 1);
00158 #if 1
00159 if (misal) {
00160
00161
00162
00163 memset(blocks, 0, sizeof(DCTELEM)*6*64);
00164 }
00165 else
00166 for ( ; i < sizeof(DCTELEM)*6*64 ; i += 128) {
00167 __asm__ volatile("dcbzl %0,%1" : : "b" (blocks), "r" (i) : "memory");
00168 }
00169 #else
00170 memset(blocks, 0, sizeof(DCTELEM)*6*64);
00171 #endif
00172 POWERPC_PERF_STOP_COUNT(powerpc_clear_blocks_dcbz128, 1);
00173 }
00174 #else
00175 static void clear_blocks_dcbz128_ppc(DCTELEM *blocks)
00176 {
00177 memset(blocks, 0, sizeof(DCTELEM)*6*64);
00178 }
00179 #endif
00180
00181 #if HAVE_DCBZL
00182
00183
00184
00185
00186
00187 static long check_dcbzl_effect(void)
00188 {
00189 register char *fakedata = av_malloc(1024);
00190 register char *fakedata_middle;
00191 register long zero = 0;
00192 register long i = 0;
00193 long count = 0;
00194
00195 if (!fakedata) {
00196 return 0L;
00197 }
00198
00199 fakedata_middle = (fakedata + 512);
00200
00201 memset(fakedata, 0xFF, 1024);
00202
00203
00204
00205 __asm__ volatile("dcbzl %0, %1" : : "b" (fakedata_middle), "r" (zero));
00206
00207 for (i = 0; i < 1024 ; i ++) {
00208 if (fakedata[i] == (char)0)
00209 count++;
00210 }
00211
00212 av_free(fakedata);
00213
00214 return count;
00215 }
00216 #else
00217 static long check_dcbzl_effect(void)
00218 {
00219 return 0;
00220 }
00221 #endif
00222
00223 static void prefetch_ppc(void *mem, int stride, int h)
00224 {
00225 register const uint8_t *p = mem;
00226 do {
00227 __asm__ volatile ("dcbt 0,%0" : : "r" (p));
00228 p+= stride;
00229 } while(--h);
00230 }
00231
00232 void dsputil_init_ppc(DSPContext* c, AVCodecContext *avctx)
00233 {
00234
00235 c->prefetch = prefetch_ppc;
00236 switch (check_dcbzl_effect()) {
00237 case 32:
00238 c->clear_blocks = clear_blocks_dcbz32_ppc;
00239 break;
00240 case 128:
00241 c->clear_blocks = clear_blocks_dcbz128_ppc;
00242 break;
00243 default:
00244 break;
00245 }
00246
00247 #if HAVE_ALTIVEC
00248 if(CONFIG_H264_DECODER) dsputil_h264_init_ppc(c, avctx);
00249
00250 if (has_altivec()) {
00251 mm_flags |= FF_MM_ALTIVEC;
00252
00253 dsputil_init_altivec(c, avctx);
00254 if(CONFIG_VC1_DECODER)
00255 vc1dsp_init_altivec(c, avctx);
00256 float_init_altivec(c, avctx);
00257 int_init_altivec(c, avctx);
00258 c->gmc1 = gmc1_altivec;
00259
00260 #if CONFIG_ENCODERS
00261 if (avctx->dct_algo == FF_DCT_AUTO ||
00262 avctx->dct_algo == FF_DCT_ALTIVEC) {
00263 c->fdct = fdct_altivec;
00264 }
00265 #endif //CONFIG_ENCODERS
00266
00267 if (avctx->lowres==0) {
00268 if ((avctx->idct_algo == FF_IDCT_AUTO) ||
00269 (avctx->idct_algo == FF_IDCT_ALTIVEC)) {
00270 c->idct_put = idct_put_altivec;
00271 c->idct_add = idct_add_altivec;
00272 c->idct_permutation_type = FF_TRANSPOSE_IDCT_PERM;
00273 }else if((CONFIG_VP3_DECODER || CONFIG_VP5_DECODER || CONFIG_VP6_DECODER) &&
00274 avctx->idct_algo==FF_IDCT_VP3){
00275 c->idct_put = ff_vp3_idct_put_altivec;
00276 c->idct_add = ff_vp3_idct_add_altivec;
00277 c->idct = ff_vp3_idct_altivec;
00278 c->idct_permutation_type = FF_TRANSPOSE_IDCT_PERM;
00279 }
00280 }
00281
00282 #if CONFIG_POWERPC_PERF
00283 {
00284 int i, j;
00285 for (i = 0 ; i < powerpc_perf_total ; i++) {
00286 for (j = 0; j < POWERPC_NUM_PMC_ENABLED ; j++) {
00287 perfdata[j][i][powerpc_data_min] = 0xFFFFFFFFFFFFFFFFULL;
00288 perfdata[j][i][powerpc_data_max] = 0x0000000000000000ULL;
00289 perfdata[j][i][powerpc_data_sum] = 0x0000000000000000ULL;
00290 perfdata[j][i][powerpc_data_num] = 0x0000000000000000ULL;
00291 }
00292 }
00293 }
00294 #endif
00295 }
00296 #endif
00297 }