00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032
00033
00034
00035
00036
00037
00038
00039
00040
00041
00042
00043
00044
00045
00046
00047
00048
00049
00050
00051
00052
00053
00054
00055
00056
00057
00058
00059
00060
00061
00062
00063
00064
00065
00066
00067
00068
00069
00070
00071
00072
00073
00074
00075
00076
00077
00078
00079
00080
00081
00082
00083
00084
00085
00086
00087
00088 #include <stdio.h>
00089 #include <stdlib.h>
00090 #include <string.h>
00091 #include <inttypes.h>
00092 #include <assert.h>
00093 #include "config.h"
00094 #include "libswscale/rgb2rgb.h"
00095 #include "libswscale/swscale.h"
00096 #include "libswscale/swscale_internal.h"
00097 #include "libavutil/cpu.h"
00098 #include "libavutil/pixdesc.h"
00099 #include "yuv2rgb_altivec.h"
00100
00101 #undef PROFILE_THE_BEAST
00102 #undef INC_SCALING
00103
00104 typedef unsigned char ubyte;
00105 typedef signed char sbyte;
00106
00107
00108
00109
00110
00111
00112
00113
00114
00115
00116
00117
00118
00119
00120
00121
00122
00123
00124
00125
00126
00127
00128
00129
00130
00131
00132
00133
00134
00135
00136
00137
00138
00139
00140
00141
00142
00143
00144 static
00145 const vector unsigned char
00146 perm_rgb_0 = {0x00,0x01,0x10,0x02,0x03,0x11,0x04,0x05,
00147 0x12,0x06,0x07,0x13,0x08,0x09,0x14,0x0a},
00148 perm_rgb_1 = {0x0b,0x15,0x0c,0x0d,0x16,0x0e,0x0f,0x17,
00149 0x18,0x19,0x1a,0x1b,0x1c,0x1d,0x1e,0x1f},
00150 perm_rgb_2 = {0x10,0x11,0x12,0x13,0x14,0x15,0x16,0x17,
00151 0x00,0x01,0x18,0x02,0x03,0x19,0x04,0x05},
00152 perm_rgb_3 = {0x1a,0x06,0x07,0x1b,0x08,0x09,0x1c,0x0a,
00153 0x0b,0x1d,0x0c,0x0d,0x1e,0x0e,0x0f,0x1f};
00154
00155 #define vec_merge3(x2,x1,x0,y0,y1,y2) \
00156 do { \
00157 __typeof__(x0) o0,o2,o3; \
00158 o0 = vec_mergeh (x0,x1); \
00159 y0 = vec_perm (o0, x2, perm_rgb_0); \
00160 o2 = vec_perm (o0, x2, perm_rgb_1); \
00161 o3 = vec_mergel (x0,x1); \
00162 y1 = vec_perm (o3,o2,perm_rgb_2); \
00163 y2 = vec_perm (o3,o2,perm_rgb_3); \
00164 } while(0)
00165
00166 #define vec_mstbgr24(x0,x1,x2,ptr) \
00167 do { \
00168 __typeof__(x0) _0,_1,_2; \
00169 vec_merge3 (x0,x1,x2,_0,_1,_2); \
00170 vec_st (_0, 0, ptr++); \
00171 vec_st (_1, 0, ptr++); \
00172 vec_st (_2, 0, ptr++); \
00173 } while (0)
00174
00175 #define vec_mstrgb24(x0,x1,x2,ptr) \
00176 do { \
00177 __typeof__(x0) _0,_1,_2; \
00178 vec_merge3 (x2,x1,x0,_0,_1,_2); \
00179 vec_st (_0, 0, ptr++); \
00180 vec_st (_1, 0, ptr++); \
00181 vec_st (_2, 0, ptr++); \
00182 } while (0)
00183
00184
00185
00186
00187
00188 #define vec_mstrgb32(T,x0,x1,x2,x3,ptr) \
00189 do { \
00190 T _0,_1,_2,_3; \
00191 _0 = vec_mergeh (x0,x1); \
00192 _1 = vec_mergeh (x2,x3); \
00193 _2 = (T)vec_mergeh ((vector unsigned short)_0,(vector unsigned short)_1); \
00194 _3 = (T)vec_mergel ((vector unsigned short)_0,(vector unsigned short)_1); \
00195 vec_st (_2, 0*16, (T *)ptr); \
00196 vec_st (_3, 1*16, (T *)ptr); \
00197 _0 = vec_mergel (x0,x1); \
00198 _1 = vec_mergel (x2,x3); \
00199 _2 = (T)vec_mergeh ((vector unsigned short)_0,(vector unsigned short)_1); \
00200 _3 = (T)vec_mergel ((vector unsigned short)_0,(vector unsigned short)_1); \
00201 vec_st (_2, 2*16, (T *)ptr); \
00202 vec_st (_3, 3*16, (T *)ptr); \
00203 ptr += 4; \
00204 } while (0)
00205
00206
00207
00208
00209
00210
00211
00212
00213
00214
00215
00216
00217
00218
00219
00220
00221
00222
00223 #define vec_unh(x) \
00224 (vector signed short) \
00225 vec_perm(x,(__typeof__(x)){0}, \
00226 ((vector unsigned char){0x10,0x00,0x10,0x01,0x10,0x02,0x10,0x03,\
00227 0x10,0x04,0x10,0x05,0x10,0x06,0x10,0x07}))
00228 #define vec_unl(x) \
00229 (vector signed short) \
00230 vec_perm(x,(__typeof__(x)){0}, \
00231 ((vector unsigned char){0x10,0x08,0x10,0x09,0x10,0x0A,0x10,0x0B,\
00232 0x10,0x0C,0x10,0x0D,0x10,0x0E,0x10,0x0F}))
00233
00234 #define vec_clip_s16(x) \
00235 vec_max (vec_min (x, ((vector signed short){235,235,235,235,235,235,235,235})), \
00236 ((vector signed short){ 16, 16, 16, 16, 16, 16, 16, 16}))
00237
00238 #define vec_packclp(x,y) \
00239 (vector unsigned char)vec_packs \
00240 ((vector unsigned short)vec_max (x,((vector signed short) {0})), \
00241 (vector unsigned short)vec_max (y,((vector signed short) {0})))
00242
00243
00244
00245
00246 static inline void cvtyuvtoRGB (SwsContext *c,
00247 vector signed short Y, vector signed short U, vector signed short V,
00248 vector signed short *R, vector signed short *G, vector signed short *B)
00249 {
00250 vector signed short vx,ux,uvx;
00251
00252 Y = vec_mradds (Y, c->CY, c->OY);
00253 U = vec_sub (U,(vector signed short)
00254 vec_splat((vector signed short){128},0));
00255 V = vec_sub (V,(vector signed short)
00256 vec_splat((vector signed short){128},0));
00257
00258
00259 ux = vec_sl (U, c->CSHIFT);
00260 *B = vec_mradds (ux, c->CBU, Y);
00261
00262
00263 vx = vec_sl (V, c->CSHIFT);
00264 *R = vec_mradds (vx, c->CRV, Y);
00265
00266
00267 uvx = vec_mradds (U, c->CGU, Y);
00268 *G = vec_mradds (V, c->CGV, uvx);
00269 }
00270
00271
00272
00273
00274
00275
00276
00277
00278
00279 #define DEFCSP420_CVT(name,out_pixels) \
00280 static int altivec_##name (SwsContext *c, \
00281 const unsigned char **in, int *instrides, \
00282 int srcSliceY, int srcSliceH, \
00283 unsigned char **oplanes, int *outstrides) \
00284 { \
00285 int w = c->srcW; \
00286 int h = srcSliceH; \
00287 int i,j; \
00288 int instrides_scl[3]; \
00289 vector unsigned char y0,y1; \
00290 \
00291 vector signed char u,v; \
00292 \
00293 vector signed short Y0,Y1,Y2,Y3; \
00294 vector signed short U,V; \
00295 vector signed short vx,ux,uvx; \
00296 vector signed short vx0,ux0,uvx0; \
00297 vector signed short vx1,ux1,uvx1; \
00298 vector signed short R0,G0,B0; \
00299 vector signed short R1,G1,B1; \
00300 vector unsigned char R,G,B; \
00301 \
00302 const vector unsigned char *y1ivP, *y2ivP, *uivP, *vivP; \
00303 vector unsigned char align_perm; \
00304 \
00305 vector signed short \
00306 lCY = c->CY, \
00307 lOY = c->OY, \
00308 lCRV = c->CRV, \
00309 lCBU = c->CBU, \
00310 lCGU = c->CGU, \
00311 lCGV = c->CGV; \
00312 \
00313 vector unsigned short lCSHIFT = c->CSHIFT; \
00314 \
00315 const ubyte *y1i = in[0]; \
00316 const ubyte *y2i = in[0]+instrides[0]; \
00317 const ubyte *ui = in[1]; \
00318 const ubyte *vi = in[2]; \
00319 \
00320 vector unsigned char *oute \
00321 = (vector unsigned char *) \
00322 (oplanes[0]+srcSliceY*outstrides[0]); \
00323 vector unsigned char *outo \
00324 = (vector unsigned char *) \
00325 (oplanes[0]+srcSliceY*outstrides[0]+outstrides[0]); \
00326 \
00327 \
00328 instrides_scl[0] = instrides[0]*2-w; \
00329 instrides_scl[1] = instrides[1]-w/2; \
00330 instrides_scl[2] = instrides[2]-w/2; \
00331 \
00332 \
00333 for (i=0;i<h/2;i++) { \
00334 vec_dstst (outo, (0x02000002|(((w*3+32)/32)<<16)), 0); \
00335 vec_dstst (oute, (0x02000002|(((w*3+32)/32)<<16)), 1); \
00336 \
00337 for (j=0;j<w/16;j++) { \
00338 \
00339 y1ivP = (const vector unsigned char *)y1i; \
00340 y2ivP = (const vector unsigned char *)y2i; \
00341 uivP = (const vector unsigned char *)ui; \
00342 vivP = (const vector unsigned char *)vi; \
00343 \
00344 align_perm = vec_lvsl (0, y1i); \
00345 y0 = (vector unsigned char) \
00346 vec_perm (y1ivP[0], y1ivP[1], align_perm); \
00347 \
00348 align_perm = vec_lvsl (0, y2i); \
00349 y1 = (vector unsigned char) \
00350 vec_perm (y2ivP[0], y2ivP[1], align_perm); \
00351 \
00352 align_perm = vec_lvsl (0, ui); \
00353 u = (vector signed char) \
00354 vec_perm (uivP[0], uivP[1], align_perm); \
00355 \
00356 align_perm = vec_lvsl (0, vi); \
00357 v = (vector signed char) \
00358 vec_perm (vivP[0], vivP[1], align_perm); \
00359 \
00360 u = (vector signed char) \
00361 vec_sub (u,(vector signed char) \
00362 vec_splat((vector signed char){128},0)); \
00363 v = (vector signed char) \
00364 vec_sub (v,(vector signed char) \
00365 vec_splat((vector signed char){128},0)); \
00366 \
00367 U = vec_unpackh (u); \
00368 V = vec_unpackh (v); \
00369 \
00370 \
00371 Y0 = vec_unh (y0); \
00372 Y1 = vec_unl (y0); \
00373 Y2 = vec_unh (y1); \
00374 Y3 = vec_unl (y1); \
00375 \
00376 Y0 = vec_mradds (Y0, lCY, lOY); \
00377 Y1 = vec_mradds (Y1, lCY, lOY); \
00378 Y2 = vec_mradds (Y2, lCY, lOY); \
00379 Y3 = vec_mradds (Y3, lCY, lOY); \
00380 \
00381 \
00382 ux = vec_sl (U, lCSHIFT); \
00383 ux = vec_mradds (ux, lCBU, (vector signed short){0}); \
00384 ux0 = vec_mergeh (ux,ux); \
00385 ux1 = vec_mergel (ux,ux); \
00386 \
00387 \
00388 vx = vec_sl (V, lCSHIFT); \
00389 vx = vec_mradds (vx, lCRV, (vector signed short){0}); \
00390 vx0 = vec_mergeh (vx,vx); \
00391 vx1 = vec_mergel (vx,vx); \
00392 \
00393 \
00394 uvx = vec_mradds (U, lCGU, (vector signed short){0}); \
00395 uvx = vec_mradds (V, lCGV, uvx); \
00396 uvx0 = vec_mergeh (uvx,uvx); \
00397 uvx1 = vec_mergel (uvx,uvx); \
00398 \
00399 R0 = vec_add (Y0,vx0); \
00400 G0 = vec_add (Y0,uvx0); \
00401 B0 = vec_add (Y0,ux0); \
00402 R1 = vec_add (Y1,vx1); \
00403 G1 = vec_add (Y1,uvx1); \
00404 B1 = vec_add (Y1,ux1); \
00405 \
00406 R = vec_packclp (R0,R1); \
00407 G = vec_packclp (G0,G1); \
00408 B = vec_packclp (B0,B1); \
00409 \
00410 out_pixels(R,G,B,oute); \
00411 \
00412 R0 = vec_add (Y2,vx0); \
00413 G0 = vec_add (Y2,uvx0); \
00414 B0 = vec_add (Y2,ux0); \
00415 R1 = vec_add (Y3,vx1); \
00416 G1 = vec_add (Y3,uvx1); \
00417 B1 = vec_add (Y3,ux1); \
00418 R = vec_packclp (R0,R1); \
00419 G = vec_packclp (G0,G1); \
00420 B = vec_packclp (B0,B1); \
00421 \
00422 \
00423 out_pixels(R,G,B,outo); \
00424 \
00425 y1i += 16; \
00426 y2i += 16; \
00427 ui += 8; \
00428 vi += 8; \
00429 \
00430 } \
00431 \
00432 outo += (outstrides[0])>>4; \
00433 oute += (outstrides[0])>>4; \
00434 \
00435 ui += instrides_scl[1]; \
00436 vi += instrides_scl[2]; \
00437 y1i += instrides_scl[0]; \
00438 y2i += instrides_scl[0]; \
00439 } \
00440 return srcSliceH; \
00441 }
00442
00443
00444 #define out_abgr(a,b,c,ptr) vec_mstrgb32(__typeof__(a),((__typeof__ (a)){255}),c,b,a,ptr)
00445 #define out_bgra(a,b,c,ptr) vec_mstrgb32(__typeof__(a),c,b,a,((__typeof__ (a)){255}),ptr)
00446 #define out_rgba(a,b,c,ptr) vec_mstrgb32(__typeof__(a),a,b,c,((__typeof__ (a)){255}),ptr)
00447 #define out_argb(a,b,c,ptr) vec_mstrgb32(__typeof__(a),((__typeof__ (a)){255}),a,b,c,ptr)
00448 #define out_rgb24(a,b,c,ptr) vec_mstrgb24(a,b,c,ptr)
00449 #define out_bgr24(a,b,c,ptr) vec_mstbgr24(a,b,c,ptr)
00450
00451 DEFCSP420_CVT (yuv2_abgr, out_abgr)
00452 DEFCSP420_CVT (yuv2_bgra, out_bgra)
00453 DEFCSP420_CVT (yuv2_rgba, out_rgba)
00454 DEFCSP420_CVT (yuv2_argb, out_argb)
00455 DEFCSP420_CVT (yuv2_rgb24, out_rgb24)
00456 DEFCSP420_CVT (yuv2_bgr24, out_bgr24)
00457
00458
00459
00460
00461 static
00462 const vector unsigned char
00463 demux_u = {0x10,0x00,0x10,0x00,
00464 0x10,0x04,0x10,0x04,
00465 0x10,0x08,0x10,0x08,
00466 0x10,0x0c,0x10,0x0c},
00467 demux_v = {0x10,0x02,0x10,0x02,
00468 0x10,0x06,0x10,0x06,
00469 0x10,0x0A,0x10,0x0A,
00470 0x10,0x0E,0x10,0x0E},
00471 demux_y = {0x10,0x01,0x10,0x03,
00472 0x10,0x05,0x10,0x07,
00473 0x10,0x09,0x10,0x0B,
00474 0x10,0x0D,0x10,0x0F};
00475
00476
00477
00478
00479 static int altivec_uyvy_rgb32 (SwsContext *c,
00480 const unsigned char **in, int *instrides,
00481 int srcSliceY, int srcSliceH,
00482 unsigned char **oplanes, int *outstrides)
00483 {
00484 int w = c->srcW;
00485 int h = srcSliceH;
00486 int i,j;
00487 vector unsigned char uyvy;
00488 vector signed short Y,U,V;
00489 vector signed short R0,G0,B0,R1,G1,B1;
00490 vector unsigned char R,G,B;
00491 vector unsigned char *out;
00492 const ubyte *img;
00493
00494 img = in[0];
00495 out = (vector unsigned char *)(oplanes[0]+srcSliceY*outstrides[0]);
00496
00497 for (i=0;i<h;i++) {
00498 for (j=0;j<w/16;j++) {
00499 uyvy = vec_ld (0, img);
00500 U = (vector signed short)
00501 vec_perm (uyvy, (vector unsigned char){0}, demux_u);
00502
00503 V = (vector signed short)
00504 vec_perm (uyvy, (vector unsigned char){0}, demux_v);
00505
00506 Y = (vector signed short)
00507 vec_perm (uyvy, (vector unsigned char){0}, demux_y);
00508
00509 cvtyuvtoRGB (c, Y,U,V,&R0,&G0,&B0);
00510
00511 uyvy = vec_ld (16, img);
00512 U = (vector signed short)
00513 vec_perm (uyvy, (vector unsigned char){0}, demux_u);
00514
00515 V = (vector signed short)
00516 vec_perm (uyvy, (vector unsigned char){0}, demux_v);
00517
00518 Y = (vector signed short)
00519 vec_perm (uyvy, (vector unsigned char){0}, demux_y);
00520
00521 cvtyuvtoRGB (c, Y,U,V,&R1,&G1,&B1);
00522
00523 R = vec_packclp (R0,R1);
00524 G = vec_packclp (G0,G1);
00525 B = vec_packclp (B0,B1);
00526
00527
00528 out_rgba (R,G,B,out);
00529
00530 img += 32;
00531 }
00532 }
00533 return srcSliceH;
00534 }
00535
00536
00537
00538
00539
00540
00541
00542
00543
00544 SwsFunc ff_yuv2rgb_init_altivec(SwsContext *c)
00545 {
00546 if (!(av_get_cpu_flags() & AV_CPU_FLAG_ALTIVEC))
00547 return NULL;
00548
00549
00550
00551
00552
00553
00554
00555
00556 if ((c->srcW & 0xf) != 0) return NULL;
00557
00558 switch (c->srcFormat) {
00559 case PIX_FMT_YUV410P:
00560 case PIX_FMT_YUV420P:
00561
00562 case PIX_FMT_GRAY8:
00563 case PIX_FMT_NV12:
00564 case PIX_FMT_NV21:
00565 if ((c->srcH & 0x1) != 0)
00566 return NULL;
00567
00568 switch(c->dstFormat) {
00569 case PIX_FMT_RGB24:
00570 av_log(c, AV_LOG_WARNING, "ALTIVEC: Color Space RGB24\n");
00571 return altivec_yuv2_rgb24;
00572 case PIX_FMT_BGR24:
00573 av_log(c, AV_LOG_WARNING, "ALTIVEC: Color Space BGR24\n");
00574 return altivec_yuv2_bgr24;
00575 case PIX_FMT_ARGB:
00576 av_log(c, AV_LOG_WARNING, "ALTIVEC: Color Space ARGB\n");
00577 return altivec_yuv2_argb;
00578 case PIX_FMT_ABGR:
00579 av_log(c, AV_LOG_WARNING, "ALTIVEC: Color Space ABGR\n");
00580 return altivec_yuv2_abgr;
00581 case PIX_FMT_RGBA:
00582 av_log(c, AV_LOG_WARNING, "ALTIVEC: Color Space RGBA\n");
00583 return altivec_yuv2_rgba;
00584 case PIX_FMT_BGRA:
00585 av_log(c, AV_LOG_WARNING, "ALTIVEC: Color Space BGRA\n");
00586 return altivec_yuv2_bgra;
00587 default: return NULL;
00588 }
00589 break;
00590
00591 case PIX_FMT_UYVY422:
00592 switch(c->dstFormat) {
00593 case PIX_FMT_BGR32:
00594 av_log(c, AV_LOG_WARNING, "ALTIVEC: Color Space UYVY -> RGB32\n");
00595 return altivec_uyvy_rgb32;
00596 default: return NULL;
00597 }
00598 break;
00599
00600 }
00601 return NULL;
00602 }
00603
00604 void ff_yuv2rgb_init_tables_altivec(SwsContext *c, const int inv_table[4], int brightness, int contrast, int saturation)
00605 {
00606 union {
00607 DECLARE_ALIGNED(16, signed short, tmp)[8];
00608 vector signed short vec;
00609 } buf;
00610
00611 buf.tmp[0] = ((0xffffLL) * contrast>>8)>>9;
00612 buf.tmp[1] = -256*brightness;
00613 buf.tmp[2] = (inv_table[0]>>3) *(contrast>>16)*(saturation>>16);
00614 buf.tmp[3] = (inv_table[1]>>3) *(contrast>>16)*(saturation>>16);
00615 buf.tmp[4] = -((inv_table[2]>>1)*(contrast>>16)*(saturation>>16));
00616 buf.tmp[5] = -((inv_table[3]>>1)*(contrast>>16)*(saturation>>16));
00617
00618
00619 c->CSHIFT = (vector unsigned short)vec_splat_u16(2);
00620 c->CY = vec_splat ((vector signed short)buf.vec, 0);
00621 c->OY = vec_splat ((vector signed short)buf.vec, 1);
00622 c->CRV = vec_splat ((vector signed short)buf.vec, 2);
00623 c->CBU = vec_splat ((vector signed short)buf.vec, 3);
00624 c->CGU = vec_splat ((vector signed short)buf.vec, 4);
00625 c->CGV = vec_splat ((vector signed short)buf.vec, 5);
00626 return;
00627 }
00628
00629
00630 static av_always_inline void
00631 ff_yuv2packedX_altivec(SwsContext *c, const int16_t *lumFilter,
00632 const int16_t **lumSrc, int lumFilterSize,
00633 const int16_t *chrFilter, const int16_t **chrUSrc,
00634 const int16_t **chrVSrc, int chrFilterSize,
00635 const int16_t **alpSrc, uint8_t *dest,
00636 int dstW, int dstY, enum PixelFormat target)
00637 {
00638 int i,j;
00639 vector signed short X,X0,X1,Y0,U0,V0,Y1,U1,V1,U,V;
00640 vector signed short R0,G0,B0,R1,G1,B1;
00641
00642 vector unsigned char R,G,B;
00643 vector unsigned char *out,*nout;
00644
00645 vector signed short RND = vec_splat_s16(1<<3);
00646 vector unsigned short SCL = vec_splat_u16(4);
00647 DECLARE_ALIGNED(16, unsigned int, scratch)[16];
00648
00649 vector signed short *YCoeffs, *CCoeffs;
00650
00651 YCoeffs = c->vYCoeffsBank+dstY*lumFilterSize;
00652 CCoeffs = c->vCCoeffsBank+dstY*chrFilterSize;
00653
00654 out = (vector unsigned char *)dest;
00655
00656 for (i=0; i<dstW; i+=16) {
00657 Y0 = RND;
00658 Y1 = RND;
00659
00660 for (j=0; j<lumFilterSize; j++) {
00661 X0 = vec_ld (0, &lumSrc[j][i]);
00662 X1 = vec_ld (16, &lumSrc[j][i]);
00663 Y0 = vec_mradds (X0, YCoeffs[j], Y0);
00664 Y1 = vec_mradds (X1, YCoeffs[j], Y1);
00665 }
00666
00667 U = RND;
00668 V = RND;
00669
00670 for (j=0; j<chrFilterSize; j++) {
00671 X = vec_ld (0, &chrUSrc[j][i/2]);
00672 U = vec_mradds (X, CCoeffs[j], U);
00673 X = vec_ld (0, &chrVSrc[j][i/2]);
00674 V = vec_mradds (X, CCoeffs[j], V);
00675 }
00676
00677
00678 Y0 = vec_sra (Y0, SCL);
00679 Y1 = vec_sra (Y1, SCL);
00680 U = vec_sra (U, SCL);
00681 V = vec_sra (V, SCL);
00682
00683 Y0 = vec_clip_s16 (Y0);
00684 Y1 = vec_clip_s16 (Y1);
00685 U = vec_clip_s16 (U);
00686 V = vec_clip_s16 (V);
00687
00688
00689
00690
00691
00692
00693
00694
00695
00696
00697 U0 = vec_mergeh (U,U);
00698 V0 = vec_mergeh (V,V);
00699
00700 U1 = vec_mergel (U,U);
00701 V1 = vec_mergel (V,V);
00702
00703 cvtyuvtoRGB (c, Y0,U0,V0,&R0,&G0,&B0);
00704 cvtyuvtoRGB (c, Y1,U1,V1,&R1,&G1,&B1);
00705
00706 R = vec_packclp (R0,R1);
00707 G = vec_packclp (G0,G1);
00708 B = vec_packclp (B0,B1);
00709
00710 switch(target) {
00711 case PIX_FMT_ABGR: out_abgr (R,G,B,out); break;
00712 case PIX_FMT_BGRA: out_bgra (R,G,B,out); break;
00713 case PIX_FMT_RGBA: out_rgba (R,G,B,out); break;
00714 case PIX_FMT_ARGB: out_argb (R,G,B,out); break;
00715 case PIX_FMT_RGB24: out_rgb24 (R,G,B,out); break;
00716 case PIX_FMT_BGR24: out_bgr24 (R,G,B,out); break;
00717 default:
00718 {
00719
00720
00721 static int printed_error_message;
00722 if (!printed_error_message) {
00723 av_log(c, AV_LOG_ERROR, "altivec_yuv2packedX doesn't support %s output\n",
00724 av_get_pix_fmt_name(c->dstFormat));
00725 printed_error_message=1;
00726 }
00727 return;
00728 }
00729 }
00730 }
00731
00732 if (i < dstW) {
00733 i -= 16;
00734
00735 Y0 = RND;
00736 Y1 = RND;
00737
00738 for (j=0; j<lumFilterSize; j++) {
00739 X0 = vec_ld (0, &lumSrc[j][i]);
00740 X1 = vec_ld (16, &lumSrc[j][i]);
00741 Y0 = vec_mradds (X0, YCoeffs[j], Y0);
00742 Y1 = vec_mradds (X1, YCoeffs[j], Y1);
00743 }
00744
00745 U = RND;
00746 V = RND;
00747
00748 for (j=0; j<chrFilterSize; j++) {
00749 X = vec_ld (0, &chrUSrc[j][i/2]);
00750 U = vec_mradds (X, CCoeffs[j], U);
00751 X = vec_ld (0, &chrVSrc[j][i/2]);
00752 V = vec_mradds (X, CCoeffs[j], V);
00753 }
00754
00755
00756 Y0 = vec_sra (Y0, SCL);
00757 Y1 = vec_sra (Y1, SCL);
00758 U = vec_sra (U, SCL);
00759 V = vec_sra (V, SCL);
00760
00761 Y0 = vec_clip_s16 (Y0);
00762 Y1 = vec_clip_s16 (Y1);
00763 U = vec_clip_s16 (U);
00764 V = vec_clip_s16 (V);
00765
00766
00767
00768
00769
00770
00771
00772
00773
00774
00775 U0 = vec_mergeh (U,U);
00776 V0 = vec_mergeh (V,V);
00777
00778 U1 = vec_mergel (U,U);
00779 V1 = vec_mergel (V,V);
00780
00781 cvtyuvtoRGB (c, Y0,U0,V0,&R0,&G0,&B0);
00782 cvtyuvtoRGB (c, Y1,U1,V1,&R1,&G1,&B1);
00783
00784 R = vec_packclp (R0,R1);
00785 G = vec_packclp (G0,G1);
00786 B = vec_packclp (B0,B1);
00787
00788 nout = (vector unsigned char *)scratch;
00789 switch(target) {
00790 case PIX_FMT_ABGR: out_abgr (R,G,B,nout); break;
00791 case PIX_FMT_BGRA: out_bgra (R,G,B,nout); break;
00792 case PIX_FMT_RGBA: out_rgba (R,G,B,nout); break;
00793 case PIX_FMT_ARGB: out_argb (R,G,B,nout); break;
00794 case PIX_FMT_RGB24: out_rgb24 (R,G,B,nout); break;
00795 case PIX_FMT_BGR24: out_bgr24 (R,G,B,nout); break;
00796 default:
00797
00798 av_log(c, AV_LOG_ERROR, "altivec_yuv2packedX doesn't support %s output\n",
00799 av_get_pix_fmt_name(c->dstFormat));
00800 return;
00801 }
00802
00803 memcpy (&((uint32_t*)dest)[i], scratch, (dstW-i)/4);
00804 }
00805
00806 }
00807
00808 #define YUV2PACKEDX_WRAPPER(suffix, pixfmt) \
00809 void ff_yuv2 ## suffix ## _X_altivec(SwsContext *c, const int16_t *lumFilter, \
00810 const int16_t **lumSrc, int lumFilterSize, \
00811 const int16_t *chrFilter, const int16_t **chrUSrc, \
00812 const int16_t **chrVSrc, int chrFilterSize, \
00813 const int16_t **alpSrc, uint8_t *dest, \
00814 int dstW, int dstY) \
00815 { \
00816 ff_yuv2packedX_altivec(c, lumFilter, lumSrc, lumFilterSize, \
00817 chrFilter, chrUSrc, chrVSrc, chrFilterSize, \
00818 alpSrc, dest, dstW, dstY, pixfmt); \
00819 }
00820
00821 YUV2PACKEDX_WRAPPER(abgr, PIX_FMT_ABGR);
00822 YUV2PACKEDX_WRAPPER(bgra, PIX_FMT_BGRA);
00823 YUV2PACKEDX_WRAPPER(argb, PIX_FMT_ARGB);
00824 YUV2PACKEDX_WRAPPER(rgba, PIX_FMT_RGBA);
00825 YUV2PACKEDX_WRAPPER(rgb24, PIX_FMT_RGB24);
00826 YUV2PACKEDX_WRAPPER(bgr24, PIX_FMT_BGR24);