00001 
00002 
00003 
00004 
00005 
00006 
00007 
00008 
00009 
00010 
00011 
00012 
00013 
00014 
00015 
00016 
00017 
00018 
00019 
00020 
00021 
00022 
00023 
00024 
00025 
00026 
00027 
00028 
00029 
00030 
00031 
00032 
00033 
00034 
00035 
00036 
00037 
00038 
00039 
00040 
00041 
00042 
00043 
00044 
00045 
00046 
00047 
00048 
00049 
00050 
00051 
00052 
00053 
00054 
00055 
00056 
00057 
00058 
00059 
00060 
00061 
00062 
00063 
00064 
00065 
00066 
00067 
00068 
00069 
00070 
00071 
00072 
00073 
00074 
00075 
00076 
00077 
00078 
00079 
00080 
00081 
00082 
00083 
00084 
00085 
00086 
00087 
00088 #include <stdio.h>
00089 #include <stdlib.h>
00090 #include <string.h>
00091 #include <inttypes.h>
00092 #include <assert.h>
00093 #include "config.h"
00094 #include "libswscale/rgb2rgb.h"
00095 #include "libswscale/swscale.h"
00096 #include "libswscale/swscale_internal.h"
00097 #include "libavutil/cpu.h"
00098 #include "libavutil/pixdesc.h"
00099 #include "yuv2rgb_altivec.h"
00100 
00101 #undef PROFILE_THE_BEAST
00102 #undef INC_SCALING
00103 
00104 typedef unsigned char ubyte;
00105 typedef signed char   sbyte;
00106 
00107 
00108 
00109 
00110 
00111 
00112 
00113 
00114 
00115 
00116 
00117 
00118 
00119 
00120 
00121 
00122 
00123 
00124 
00125 
00126 
00127 
00128 
00129 
00130 
00131 
00132 
00133 
00134 
00135 
00136 
00137 
00138 
00139 
00140 
00141 
00142 
00143 
00144 static
00145 const vector unsigned char
00146   perm_rgb_0 = {0x00,0x01,0x10,0x02,0x03,0x11,0x04,0x05,
00147                 0x12,0x06,0x07,0x13,0x08,0x09,0x14,0x0a},
00148   perm_rgb_1 = {0x0b,0x15,0x0c,0x0d,0x16,0x0e,0x0f,0x17,
00149                 0x18,0x19,0x1a,0x1b,0x1c,0x1d,0x1e,0x1f},
00150   perm_rgb_2 = {0x10,0x11,0x12,0x13,0x14,0x15,0x16,0x17,
00151                 0x00,0x01,0x18,0x02,0x03,0x19,0x04,0x05},
00152   perm_rgb_3 = {0x1a,0x06,0x07,0x1b,0x08,0x09,0x1c,0x0a,
00153                 0x0b,0x1d,0x0c,0x0d,0x1e,0x0e,0x0f,0x1f};
00154 
00155 #define vec_merge3(x2,x1,x0,y0,y1,y2)       \
00156 do {                                        \
00157     __typeof__(x0) o0,o2,o3;                \
00158         o0 = vec_mergeh (x0,x1);            \
00159         y0 = vec_perm (o0, x2, perm_rgb_0); \
00160         o2 = vec_perm (o0, x2, perm_rgb_1); \
00161         o3 = vec_mergel (x0,x1);            \
00162         y1 = vec_perm (o3,o2,perm_rgb_2);   \
00163         y2 = vec_perm (o3,o2,perm_rgb_3);   \
00164 } while(0)
00165 
00166 #define vec_mstbgr24(x0,x1,x2,ptr)      \
00167 do {                                    \
00168     __typeof__(x0) _0,_1,_2;            \
00169     vec_merge3 (x0,x1,x2,_0,_1,_2);     \
00170     vec_st (_0, 0, ptr++);              \
00171     vec_st (_1, 0, ptr++);              \
00172     vec_st (_2, 0, ptr++);              \
00173 }  while (0)
00174 
00175 #define vec_mstrgb24(x0,x1,x2,ptr)      \
00176 do {                                    \
00177     __typeof__(x0) _0,_1,_2;            \
00178     vec_merge3 (x2,x1,x0,_0,_1,_2);     \
00179     vec_st (_0, 0, ptr++);              \
00180     vec_st (_1, 0, ptr++);              \
00181     vec_st (_2, 0, ptr++);              \
00182 }  while (0)
00183 
00184 
00185 
00186 
00187 
00188 #define vec_mstrgb32(T,x0,x1,x2,x3,ptr)                                       \
00189 do {                                                                          \
00190     T _0,_1,_2,_3;                                                            \
00191     _0 = vec_mergeh (x0,x1);                                                  \
00192     _1 = vec_mergeh (x2,x3);                                                  \
00193     _2 = (T)vec_mergeh ((vector unsigned short)_0,(vector unsigned short)_1); \
00194     _3 = (T)vec_mergel ((vector unsigned short)_0,(vector unsigned short)_1); \
00195     vec_st (_2, 0*16, (T *)ptr);                                              \
00196     vec_st (_3, 1*16, (T *)ptr);                                              \
00197     _0 = vec_mergel (x0,x1);                                                  \
00198     _1 = vec_mergel (x2,x3);                                                  \
00199     _2 = (T)vec_mergeh ((vector unsigned short)_0,(vector unsigned short)_1); \
00200     _3 = (T)vec_mergel ((vector unsigned short)_0,(vector unsigned short)_1); \
00201     vec_st (_2, 2*16, (T *)ptr);                                              \
00202     vec_st (_3, 3*16, (T *)ptr);                                              \
00203     ptr += 4;                                                                 \
00204 }  while (0)
00205 
00206 
00207 
00208 
00209 
00210 
00211 
00212 
00213 
00214 
00215 
00216 
00217 
00218 
00219 
00220 
00221 
00222 
00223 #define vec_unh(x) \
00224     (vector signed short) \
00225         vec_perm(x,(__typeof__(x)){0}, \
00226                  ((vector unsigned char){0x10,0x00,0x10,0x01,0x10,0x02,0x10,0x03,\
00227                                          0x10,0x04,0x10,0x05,0x10,0x06,0x10,0x07}))
00228 #define vec_unl(x) \
00229     (vector signed short) \
00230         vec_perm(x,(__typeof__(x)){0}, \
00231                  ((vector unsigned char){0x10,0x08,0x10,0x09,0x10,0x0A,0x10,0x0B,\
00232                                          0x10,0x0C,0x10,0x0D,0x10,0x0E,0x10,0x0F}))
00233 
00234 #define vec_clip_s16(x) \
00235     vec_max (vec_min (x, ((vector signed short){235,235,235,235,235,235,235,235})), \
00236                          ((vector signed short){ 16, 16, 16, 16, 16, 16, 16, 16}))
00237 
00238 #define vec_packclp(x,y) \
00239     (vector unsigned char)vec_packs \
00240         ((vector unsigned short)vec_max (x,((vector signed short) {0})), \
00241          (vector unsigned short)vec_max (y,((vector signed short) {0})))
00242 
00243 
00244 
00245 
00246 static inline void cvtyuvtoRGB (SwsContext *c,
00247                                 vector signed short Y, vector signed short U, vector signed short V,
00248                                 vector signed short *R, vector signed short *G, vector signed short *B)
00249 {
00250     vector signed   short vx,ux,uvx;
00251 
00252     Y = vec_mradds (Y, c->CY, c->OY);
00253     U  = vec_sub (U,(vector signed short)
00254                     vec_splat((vector signed short){128},0));
00255     V  = vec_sub (V,(vector signed short)
00256                     vec_splat((vector signed short){128},0));
00257 
00258     
00259     ux = vec_sl (U, c->CSHIFT);
00260     *B = vec_mradds (ux, c->CBU, Y);
00261 
00262     
00263     vx = vec_sl (V, c->CSHIFT);
00264     *R = vec_mradds (vx, c->CRV, Y);
00265 
00266     
00267     uvx = vec_mradds (U, c->CGU, Y);
00268     *G  = vec_mradds (V, c->CGV, uvx);
00269 }
00270 
00271 
00272 
00273 
00274 
00275 
00276 
00277 
00278 
00279 #define DEFCSP420_CVT(name,out_pixels)                                  \
00280 static int altivec_##name (SwsContext *c,                               \
00281                            const unsigned char **in, int *instrides,    \
00282                            int srcSliceY,        int srcSliceH,         \
00283                            unsigned char **oplanes, int *outstrides)    \
00284 {                                                                       \
00285     int w = c->srcW;                                                    \
00286     int h = srcSliceH;                                                  \
00287     int i,j;                                                            \
00288     int instrides_scl[3];                                               \
00289     vector unsigned char y0,y1;                                         \
00290                                                                         \
00291     vector signed char  u,v;                                            \
00292                                                                         \
00293     vector signed short Y0,Y1,Y2,Y3;                                    \
00294     vector signed short U,V;                                            \
00295     vector signed short vx,ux,uvx;                                      \
00296     vector signed short vx0,ux0,uvx0;                                   \
00297     vector signed short vx1,ux1,uvx1;                                   \
00298     vector signed short R0,G0,B0;                                       \
00299     vector signed short R1,G1,B1;                                       \
00300     vector unsigned char R,G,B;                                         \
00301                                                                         \
00302     const vector unsigned char *y1ivP, *y2ivP, *uivP, *vivP;            \
00303     vector unsigned char align_perm;                                    \
00304                                                                         \
00305     vector signed short                                                 \
00306         lCY  = c->CY,                                                   \
00307         lOY  = c->OY,                                                   \
00308         lCRV = c->CRV,                                                  \
00309         lCBU = c->CBU,                                                  \
00310         lCGU = c->CGU,                                                  \
00311         lCGV = c->CGV;                                                  \
00312                                                                         \
00313     vector unsigned short lCSHIFT = c->CSHIFT;                          \
00314                                                                         \
00315     const ubyte *y1i   = in[0];                                         \
00316     const ubyte *y2i   = in[0]+instrides[0];                            \
00317     const ubyte *ui    = in[1];                                         \
00318     const ubyte *vi    = in[2];                                         \
00319                                                                         \
00320     vector unsigned char *oute                                          \
00321         = (vector unsigned char *)                                      \
00322             (oplanes[0]+srcSliceY*outstrides[0]);                       \
00323     vector unsigned char *outo                                          \
00324         = (vector unsigned char *)                                      \
00325             (oplanes[0]+srcSliceY*outstrides[0]+outstrides[0]);         \
00326                                                                         \
00327                                                                         \
00328     instrides_scl[0] = instrides[0]*2-w;   \
00329     instrides_scl[1] = instrides[1]-w/2;      \
00330     instrides_scl[2] = instrides[2]-w/2;      \
00331                                                                         \
00332                                                                         \
00333     for (i=0;i<h/2;i++) {                                               \
00334         vec_dstst (outo, (0x02000002|(((w*3+32)/32)<<16)), 0);          \
00335         vec_dstst (oute, (0x02000002|(((w*3+32)/32)<<16)), 1);          \
00336                                                                         \
00337         for (j=0;j<w/16;j++) {                                          \
00338                                                                         \
00339             y1ivP = (const vector unsigned char *)y1i;                  \
00340             y2ivP = (const vector unsigned char *)y2i;                  \
00341             uivP  = (const vector unsigned char *)ui;                   \
00342             vivP  = (const vector unsigned char *)vi;                   \
00343                                                                         \
00344             align_perm = vec_lvsl (0, y1i);                             \
00345             y0 = (vector unsigned char)                                 \
00346                  vec_perm (y1ivP[0], y1ivP[1], align_perm);             \
00347                                                                         \
00348             align_perm = vec_lvsl (0, y2i);                             \
00349             y1 = (vector unsigned char)                                 \
00350                  vec_perm (y2ivP[0], y2ivP[1], align_perm);             \
00351                                                                         \
00352             align_perm = vec_lvsl (0, ui);                              \
00353             u = (vector signed char)                                    \
00354                 vec_perm (uivP[0], uivP[1], align_perm);                \
00355                                                                         \
00356             align_perm = vec_lvsl (0, vi);                              \
00357             v = (vector signed char)                                    \
00358                 vec_perm (vivP[0], vivP[1], align_perm);                \
00359                                                                         \
00360             u  = (vector signed char)                                   \
00361                  vec_sub (u,(vector signed char)                        \
00362                           vec_splat((vector signed char){128},0));      \
00363             v  = (vector signed char)                                   \
00364                  vec_sub (v,(vector signed char)                        \
00365                           vec_splat((vector signed char){128},0));      \
00366                                                                         \
00367             U  = vec_unpackh (u);                                       \
00368             V  = vec_unpackh (v);                                       \
00369                                                                         \
00370                                                                         \
00371             Y0 = vec_unh (y0);                                          \
00372             Y1 = vec_unl (y0);                                          \
00373             Y2 = vec_unh (y1);                                          \
00374             Y3 = vec_unl (y1);                                          \
00375                                                                         \
00376             Y0 = vec_mradds (Y0, lCY, lOY);                             \
00377             Y1 = vec_mradds (Y1, lCY, lOY);                             \
00378             Y2 = vec_mradds (Y2, lCY, lOY);                             \
00379             Y3 = vec_mradds (Y3, lCY, lOY);                             \
00380                                                                         \
00381                               \
00382             ux = vec_sl (U, lCSHIFT);                                   \
00383             ux = vec_mradds (ux, lCBU, (vector signed short){0});       \
00384             ux0  = vec_mergeh (ux,ux);                                  \
00385             ux1  = vec_mergel (ux,ux);                                  \
00386                                                                         \
00387                         \
00388             vx = vec_sl (V, lCSHIFT);                                   \
00389             vx = vec_mradds (vx, lCRV, (vector signed short){0});       \
00390             vx0  = vec_mergeh (vx,vx);                                  \
00391             vx1  = vec_mergel (vx,vx);                                  \
00392                                                                         \
00393                                      \
00394             uvx = vec_mradds (U, lCGU, (vector signed short){0});       \
00395             uvx = vec_mradds (V, lCGV, uvx);                            \
00396             uvx0 = vec_mergeh (uvx,uvx);                                \
00397             uvx1 = vec_mergel (uvx,uvx);                                \
00398                                                                         \
00399             R0 = vec_add (Y0,vx0);                                      \
00400             G0 = vec_add (Y0,uvx0);                                     \
00401             B0 = vec_add (Y0,ux0);                                      \
00402             R1 = vec_add (Y1,vx1);                                      \
00403             G1 = vec_add (Y1,uvx1);                                     \
00404             B1 = vec_add (Y1,ux1);                                      \
00405                                                                         \
00406             R  = vec_packclp (R0,R1);                                   \
00407             G  = vec_packclp (G0,G1);                                   \
00408             B  = vec_packclp (B0,B1);                                   \
00409                                                                         \
00410             out_pixels(R,G,B,oute);                                     \
00411                                                                         \
00412             R0 = vec_add (Y2,vx0);                                      \
00413             G0 = vec_add (Y2,uvx0);                                     \
00414             B0 = vec_add (Y2,ux0);                                      \
00415             R1 = vec_add (Y3,vx1);                                      \
00416             G1 = vec_add (Y3,uvx1);                                     \
00417             B1 = vec_add (Y3,ux1);                                      \
00418             R  = vec_packclp (R0,R1);                                   \
00419             G  = vec_packclp (G0,G1);                                   \
00420             B  = vec_packclp (B0,B1);                                   \
00421                                                                         \
00422                                                                         \
00423             out_pixels(R,G,B,outo);                                     \
00424                                                                         \
00425             y1i  += 16;                                                 \
00426             y2i  += 16;                                                 \
00427             ui   += 8;                                                  \
00428             vi   += 8;                                                  \
00429                                                                         \
00430         }                                                               \
00431                                                                         \
00432         outo  += (outstrides[0])>>4;                                    \
00433         oute  += (outstrides[0])>>4;                                    \
00434                                                                         \
00435         ui    += instrides_scl[1];                                      \
00436         vi    += instrides_scl[2];                                      \
00437         y1i   += instrides_scl[0];                                      \
00438         y2i   += instrides_scl[0];                                      \
00439     }                                                                   \
00440     return srcSliceH;                                                   \
00441 }
00442 
00443 
00444 #define out_abgr(a,b,c,ptr)  vec_mstrgb32(__typeof__(a),((__typeof__ (a)){255}),c,b,a,ptr)
00445 #define out_bgra(a,b,c,ptr)  vec_mstrgb32(__typeof__(a),c,b,a,((__typeof__ (a)){255}),ptr)
00446 #define out_rgba(a,b,c,ptr)  vec_mstrgb32(__typeof__(a),a,b,c,((__typeof__ (a)){255}),ptr)
00447 #define out_argb(a,b,c,ptr)  vec_mstrgb32(__typeof__(a),((__typeof__ (a)){255}),a,b,c,ptr)
00448 #define out_rgb24(a,b,c,ptr) vec_mstrgb24(a,b,c,ptr)
00449 #define out_bgr24(a,b,c,ptr) vec_mstbgr24(a,b,c,ptr)
00450 
00451 DEFCSP420_CVT (yuv2_abgr, out_abgr)
00452 DEFCSP420_CVT (yuv2_bgra, out_bgra)
00453 DEFCSP420_CVT (yuv2_rgba, out_rgba)
00454 DEFCSP420_CVT (yuv2_argb, out_argb)
00455 DEFCSP420_CVT (yuv2_rgb24,  out_rgb24)
00456 DEFCSP420_CVT (yuv2_bgr24,  out_bgr24)
00457 
00458 
00459 
00460 
00461 static
00462 const vector unsigned char
00463     demux_u = {0x10,0x00,0x10,0x00,
00464                0x10,0x04,0x10,0x04,
00465                0x10,0x08,0x10,0x08,
00466                0x10,0x0c,0x10,0x0c},
00467     demux_v = {0x10,0x02,0x10,0x02,
00468                0x10,0x06,0x10,0x06,
00469                0x10,0x0A,0x10,0x0A,
00470                0x10,0x0E,0x10,0x0E},
00471     demux_y = {0x10,0x01,0x10,0x03,
00472                0x10,0x05,0x10,0x07,
00473                0x10,0x09,0x10,0x0B,
00474                0x10,0x0D,0x10,0x0F};
00475 
00476 
00477 
00478 
00479 static int altivec_uyvy_rgb32 (SwsContext *c,
00480                                const unsigned char **in, int *instrides,
00481                                int srcSliceY,        int srcSliceH,
00482                                unsigned char **oplanes, int *outstrides)
00483 {
00484     int w = c->srcW;
00485     int h = srcSliceH;
00486     int i,j;
00487     vector unsigned char uyvy;
00488     vector signed   short Y,U,V;
00489     vector signed   short R0,G0,B0,R1,G1,B1;
00490     vector unsigned char  R,G,B;
00491     vector unsigned char *out;
00492     const ubyte *img;
00493 
00494     img = in[0];
00495     out = (vector unsigned char *)(oplanes[0]+srcSliceY*outstrides[0]);
00496 
00497     for (i=0;i<h;i++) {
00498         for (j=0;j<w/16;j++) {
00499             uyvy = vec_ld (0, img);
00500             U = (vector signed short)
00501                 vec_perm (uyvy, (vector unsigned char){0}, demux_u);
00502 
00503             V = (vector signed short)
00504                 vec_perm (uyvy, (vector unsigned char){0}, demux_v);
00505 
00506             Y = (vector signed short)
00507                 vec_perm (uyvy, (vector unsigned char){0}, demux_y);
00508 
00509             cvtyuvtoRGB (c, Y,U,V,&R0,&G0,&B0);
00510 
00511             uyvy = vec_ld (16, img);
00512             U = (vector signed short)
00513                 vec_perm (uyvy, (vector unsigned char){0}, demux_u);
00514 
00515             V = (vector signed short)
00516                 vec_perm (uyvy, (vector unsigned char){0}, demux_v);
00517 
00518             Y = (vector signed short)
00519                 vec_perm (uyvy, (vector unsigned char){0}, demux_y);
00520 
00521             cvtyuvtoRGB (c, Y,U,V,&R1,&G1,&B1);
00522 
00523             R  = vec_packclp (R0,R1);
00524             G  = vec_packclp (G0,G1);
00525             B  = vec_packclp (B0,B1);
00526 
00527             
00528             out_rgba (R,G,B,out);
00529 
00530             img += 32;
00531         }
00532     }
00533     return srcSliceH;
00534 }
00535 
00536 
00537 
00538 
00539 
00540 
00541 
00542 
00543 
00544 SwsFunc ff_yuv2rgb_init_altivec(SwsContext *c)
00545 {
00546     if (!(av_get_cpu_flags() & AV_CPU_FLAG_ALTIVEC))
00547         return NULL;
00548 
00549     
00550 
00551 
00552 
00553 
00554 
00555 
00556     if ((c->srcW & 0xf) != 0)    return NULL;
00557 
00558     switch (c->srcFormat) {
00559     case PIX_FMT_YUV410P:
00560     case PIX_FMT_YUV420P:
00561     
00562     case PIX_FMT_GRAY8:
00563     case PIX_FMT_NV12:
00564     case PIX_FMT_NV21:
00565         if ((c->srcH & 0x1) != 0)
00566             return NULL;
00567 
00568         switch(c->dstFormat) {
00569         case PIX_FMT_RGB24:
00570             av_log(c, AV_LOG_WARNING, "ALTIVEC: Color Space RGB24\n");
00571             return altivec_yuv2_rgb24;
00572         case PIX_FMT_BGR24:
00573             av_log(c, AV_LOG_WARNING, "ALTIVEC: Color Space BGR24\n");
00574             return altivec_yuv2_bgr24;
00575         case PIX_FMT_ARGB:
00576             av_log(c, AV_LOG_WARNING, "ALTIVEC: Color Space ARGB\n");
00577             return altivec_yuv2_argb;
00578         case PIX_FMT_ABGR:
00579             av_log(c, AV_LOG_WARNING, "ALTIVEC: Color Space ABGR\n");
00580             return altivec_yuv2_abgr;
00581         case PIX_FMT_RGBA:
00582             av_log(c, AV_LOG_WARNING, "ALTIVEC: Color Space RGBA\n");
00583             return altivec_yuv2_rgba;
00584         case PIX_FMT_BGRA:
00585             av_log(c, AV_LOG_WARNING, "ALTIVEC: Color Space BGRA\n");
00586             return altivec_yuv2_bgra;
00587         default: return NULL;
00588         }
00589         break;
00590 
00591     case PIX_FMT_UYVY422:
00592         switch(c->dstFormat) {
00593         case PIX_FMT_BGR32:
00594             av_log(c, AV_LOG_WARNING, "ALTIVEC: Color Space UYVY -> RGB32\n");
00595             return altivec_uyvy_rgb32;
00596         default: return NULL;
00597         }
00598         break;
00599 
00600     }
00601     return NULL;
00602 }
00603 
00604 void ff_yuv2rgb_init_tables_altivec(SwsContext *c, const int inv_table[4], int brightness, int contrast, int saturation)
00605 {
00606     union {
00607         DECLARE_ALIGNED(16, signed short, tmp)[8];
00608         vector signed short vec;
00609     } buf;
00610 
00611     buf.tmp[0] =  ((0xffffLL) * contrast>>8)>>9;                        
00612     buf.tmp[1] =  -256*brightness;                                      
00613     buf.tmp[2] =  (inv_table[0]>>3) *(contrast>>16)*(saturation>>16);   
00614     buf.tmp[3] =  (inv_table[1]>>3) *(contrast>>16)*(saturation>>16);   
00615     buf.tmp[4] = -((inv_table[2]>>1)*(contrast>>16)*(saturation>>16));  
00616     buf.tmp[5] = -((inv_table[3]>>1)*(contrast>>16)*(saturation>>16));  
00617 
00618 
00619     c->CSHIFT = (vector unsigned short)vec_splat_u16(2);
00620     c->CY   = vec_splat ((vector signed short)buf.vec, 0);
00621     c->OY   = vec_splat ((vector signed short)buf.vec, 1);
00622     c->CRV  = vec_splat ((vector signed short)buf.vec, 2);
00623     c->CBU  = vec_splat ((vector signed short)buf.vec, 3);
00624     c->CGU  = vec_splat ((vector signed short)buf.vec, 4);
00625     c->CGV  = vec_splat ((vector signed short)buf.vec, 5);
00626     return;
00627 }
00628 
00629 
00630 static av_always_inline void
00631 ff_yuv2packedX_altivec(SwsContext *c, const int16_t *lumFilter,
00632                        const int16_t **lumSrc, int lumFilterSize,
00633                        const int16_t *chrFilter, const int16_t **chrUSrc,
00634                        const int16_t **chrVSrc, int chrFilterSize,
00635                        const int16_t **alpSrc, uint8_t *dest,
00636                        int dstW, int dstY, enum PixelFormat target)
00637 {
00638     int i,j;
00639     vector signed short X,X0,X1,Y0,U0,V0,Y1,U1,V1,U,V;
00640     vector signed short R0,G0,B0,R1,G1,B1;
00641 
00642     vector unsigned char R,G,B;
00643     vector unsigned char *out,*nout;
00644 
00645     vector signed short   RND = vec_splat_s16(1<<3);
00646     vector unsigned short SCL = vec_splat_u16(4);
00647     DECLARE_ALIGNED(16, unsigned int, scratch)[16];
00648 
00649     vector signed short *YCoeffs, *CCoeffs;
00650 
00651     YCoeffs = c->vYCoeffsBank+dstY*lumFilterSize;
00652     CCoeffs = c->vCCoeffsBank+dstY*chrFilterSize;
00653 
00654     out = (vector unsigned char *)dest;
00655 
00656     for (i=0; i<dstW; i+=16) {
00657         Y0 = RND;
00658         Y1 = RND;
00659         
00660         for (j=0; j<lumFilterSize; j++) {
00661             X0 = vec_ld (0,  &lumSrc[j][i]);
00662             X1 = vec_ld (16, &lumSrc[j][i]);
00663             Y0 = vec_mradds (X0, YCoeffs[j], Y0);
00664             Y1 = vec_mradds (X1, YCoeffs[j], Y1);
00665         }
00666 
00667         U = RND;
00668         V = RND;
00669         
00670         for (j=0; j<chrFilterSize; j++) {
00671             X  = vec_ld (0, &chrUSrc[j][i/2]);
00672             U  = vec_mradds (X, CCoeffs[j], U);
00673             X  = vec_ld (0, &chrVSrc[j][i/2]);
00674             V  = vec_mradds (X, CCoeffs[j], V);
00675         }
00676 
00677         
00678         Y0 = vec_sra (Y0, SCL);
00679         Y1 = vec_sra (Y1, SCL);
00680         U  = vec_sra (U,  SCL);
00681         V  = vec_sra (V,  SCL);
00682 
00683         Y0 = vec_clip_s16 (Y0);
00684         Y1 = vec_clip_s16 (Y1);
00685         U  = vec_clip_s16 (U);
00686         V  = vec_clip_s16 (V);
00687 
00688         
00689 
00690 
00691 
00692 
00693 
00694 
00695 
00696 
00697         U0 = vec_mergeh (U,U);
00698         V0 = vec_mergeh (V,V);
00699 
00700         U1 = vec_mergel (U,U);
00701         V1 = vec_mergel (V,V);
00702 
00703         cvtyuvtoRGB (c, Y0,U0,V0,&R0,&G0,&B0);
00704         cvtyuvtoRGB (c, Y1,U1,V1,&R1,&G1,&B1);
00705 
00706         R  = vec_packclp (R0,R1);
00707         G  = vec_packclp (G0,G1);
00708         B  = vec_packclp (B0,B1);
00709 
00710         switch(target) {
00711         case PIX_FMT_ABGR:  out_abgr  (R,G,B,out); break;
00712         case PIX_FMT_BGRA:  out_bgra  (R,G,B,out); break;
00713         case PIX_FMT_RGBA:  out_rgba  (R,G,B,out); break;
00714         case PIX_FMT_ARGB:  out_argb  (R,G,B,out); break;
00715         case PIX_FMT_RGB24: out_rgb24 (R,G,B,out); break;
00716         case PIX_FMT_BGR24: out_bgr24 (R,G,B,out); break;
00717         default:
00718             {
00719                 
00720 
00721                 static int printed_error_message;
00722                 if (!printed_error_message) {
00723                     av_log(c, AV_LOG_ERROR, "altivec_yuv2packedX doesn't support %s output\n",
00724                            av_get_pix_fmt_name(c->dstFormat));
00725                     printed_error_message=1;
00726                 }
00727                 return;
00728             }
00729         }
00730     }
00731 
00732     if (i < dstW) {
00733         i -= 16;
00734 
00735         Y0 = RND;
00736         Y1 = RND;
00737         
00738         for (j=0; j<lumFilterSize; j++) {
00739             X0 = vec_ld (0,  &lumSrc[j][i]);
00740             X1 = vec_ld (16, &lumSrc[j][i]);
00741             Y0 = vec_mradds (X0, YCoeffs[j], Y0);
00742             Y1 = vec_mradds (X1, YCoeffs[j], Y1);
00743         }
00744 
00745         U = RND;
00746         V = RND;
00747         
00748         for (j=0; j<chrFilterSize; j++) {
00749             X  = vec_ld (0, &chrUSrc[j][i/2]);
00750             U  = vec_mradds (X, CCoeffs[j], U);
00751             X  = vec_ld (0, &chrVSrc[j][i/2]);
00752             V  = vec_mradds (X, CCoeffs[j], V);
00753         }
00754 
00755         
00756         Y0 = vec_sra (Y0, SCL);
00757         Y1 = vec_sra (Y1, SCL);
00758         U  = vec_sra (U,  SCL);
00759         V  = vec_sra (V,  SCL);
00760 
00761         Y0 = vec_clip_s16 (Y0);
00762         Y1 = vec_clip_s16 (Y1);
00763         U  = vec_clip_s16 (U);
00764         V  = vec_clip_s16 (V);
00765 
00766         
00767 
00768 
00769 
00770 
00771 
00772 
00773 
00774 
00775         U0 = vec_mergeh (U,U);
00776         V0 = vec_mergeh (V,V);
00777 
00778         U1 = vec_mergel (U,U);
00779         V1 = vec_mergel (V,V);
00780 
00781         cvtyuvtoRGB (c, Y0,U0,V0,&R0,&G0,&B0);
00782         cvtyuvtoRGB (c, Y1,U1,V1,&R1,&G1,&B1);
00783 
00784         R  = vec_packclp (R0,R1);
00785         G  = vec_packclp (G0,G1);
00786         B  = vec_packclp (B0,B1);
00787 
00788         nout = (vector unsigned char *)scratch;
00789         switch(target) {
00790         case PIX_FMT_ABGR:  out_abgr  (R,G,B,nout); break;
00791         case PIX_FMT_BGRA:  out_bgra  (R,G,B,nout); break;
00792         case PIX_FMT_RGBA:  out_rgba  (R,G,B,nout); break;
00793         case PIX_FMT_ARGB:  out_argb  (R,G,B,nout); break;
00794         case PIX_FMT_RGB24: out_rgb24 (R,G,B,nout); break;
00795         case PIX_FMT_BGR24: out_bgr24 (R,G,B,nout); break;
00796         default:
00797             
00798             av_log(c, AV_LOG_ERROR, "altivec_yuv2packedX doesn't support %s output\n",
00799                    av_get_pix_fmt_name(c->dstFormat));
00800             return;
00801         }
00802 
00803         memcpy (&((uint32_t*)dest)[i], scratch, (dstW-i)/4);
00804     }
00805 
00806 }
00807 
00808 #define YUV2PACKEDX_WRAPPER(suffix, pixfmt) \
00809 void ff_yuv2 ## suffix ## _X_altivec(SwsContext *c, const int16_t *lumFilter, \
00810                             const int16_t **lumSrc, int lumFilterSize, \
00811                             const int16_t *chrFilter, const int16_t **chrUSrc, \
00812                             const int16_t **chrVSrc, int chrFilterSize, \
00813                             const int16_t **alpSrc, uint8_t *dest, \
00814                             int dstW, int dstY) \
00815 { \
00816     ff_yuv2packedX_altivec(c, lumFilter, lumSrc, lumFilterSize, \
00817                            chrFilter, chrUSrc, chrVSrc, chrFilterSize, \
00818                            alpSrc, dest, dstW, dstY, pixfmt); \
00819 }
00820 
00821 YUV2PACKEDX_WRAPPER(abgr,  PIX_FMT_ABGR);
00822 YUV2PACKEDX_WRAPPER(bgra,  PIX_FMT_BGRA);
00823 YUV2PACKEDX_WRAPPER(argb,  PIX_FMT_ARGB);
00824 YUV2PACKEDX_WRAPPER(rgba,  PIX_FMT_RGBA);
00825 YUV2PACKEDX_WRAPPER(rgb24, PIX_FMT_RGB24);
00826 YUV2PACKEDX_WRAPPER(bgr24, PIX_FMT_BGR24);