40 #define GET_PERM(per1, per2, pix) {\ 
   41     per1 = vec_lvsl(0, pix);\ 
   42     per2 = vec_add(per1, vec_splat_u8(1));\ 
   44 #define LOAD_PIX(v, iv, pix, per1, per2) {\ 
   45     vector unsigned char pix2l  = vec_ld(0,  pix);\ 
   46     vector unsigned char pix2r  = vec_ld(16, pix);\ 
   47     v  = vec_perm(pix2l, pix2r, per1);\ 
   48     iv = vec_perm(pix2l, pix2r, per2);\ 
   51 #define GET_PERM(per1, per2, pix) {} 
   52 #define LOAD_PIX(v, iv, pix, per1, per2) {\ 
   53     v  = vec_vsx_ld(0,  pix);\ 
   54     iv = vec_vsx_ld(1,  pix);\ 
   61     int __attribute__((aligned(16))) 
s = 0;
 
   62     const vector 
unsigned char zero =
 
   63         (const vector 
unsigned char) vec_splat_u8(0);
 
   64     vector 
unsigned int sad = (vector 
unsigned int) vec_splat_u32(0);
 
   65     vector 
signed int sumdiffs;
 
   66     vector 
unsigned char perm1, perm2, pix2v, pix2iv;
 
   68     GET_PERM(perm1, perm2, pix2);
 
   69     for (i = 0; i < h; i++) {
 
   73         vector 
unsigned char pix1v  = vec_ld(0,  pix1);
 
   74         LOAD_PIX(pix2v, pix2iv, pix2, perm1, perm2);
 
   77         vector 
unsigned char avgv = vec_avg(pix2v, pix2iv);
 
   80         vector 
unsigned char t5 = vec_sub(vec_max(pix1v, avgv),
 
   81                                           vec_min(pix1v, avgv));
 
   84         sad = vec_sum4s(t5, sad);
 
   90     sumdiffs = vec_sums((vector 
signed int) sad, (vector 
signed int) 
zero);
 
   91     sumdiffs = vec_splat(sumdiffs, 3);
 
   92     vec_ste(sumdiffs, 0, &
s);
 
   98                             ptrdiff_t stride, 
int h)
 
  101     int  __attribute__((aligned(16))) 
s = 0;
 
  102     const vector 
unsigned char zero =
 
  103         (const vector 
unsigned char) vec_splat_u8(0);
 
  104     vector 
unsigned char pix1v, pix3v, avgv, t5;
 
  105     vector 
unsigned int sad = (vector 
unsigned int) vec_splat_u32(0);
 
  106     vector 
signed int sumdiffs;
 
  117     vector 
unsigned char pix2v = VEC_LD(0, pix2);
 
  119     for (i = 0; i < h; i++) {
 
  123         pix1v = vec_ld(0,  pix1);
 
  124         pix3v = VEC_LD(0,  pix3);
 
  127         avgv = vec_avg(pix2v, pix3v);
 
  130         t5 = vec_sub(vec_max(pix1v, avgv), vec_min(pix1v, avgv));
 
  133         sad = vec_sum4s(t5, sad);
 
  141     sumdiffs = vec_sums((vector 
signed int) sad, (vector 
signed int) 
zero);
 
  142     sumdiffs = vec_splat(sumdiffs, 3);
 
  143     vec_ste(sumdiffs, 0, &
s);
 
  148                              ptrdiff_t stride, 
int h)
 
  151     int  __attribute__((aligned(16))) 
s = 0;
 
  153     const vector 
unsigned char zero =
 
  154         (const vector 
unsigned char) vec_splat_u8(0);
 
  155     const vector 
unsigned short two =
 
  156         (const vector 
unsigned short) vec_splat_u16(2);
 
  157     vector 
unsigned char avgv, t5;
 
  158     vector 
unsigned char pix1v, pix3v, pix3iv;
 
  159     vector 
unsigned short pix3lv, pix3hv, pix3ilv, pix3ihv;
 
  160     vector 
unsigned short avghv, avglv;
 
  161     vector 
unsigned int sad = (vector 
unsigned int) vec_splat_u32(0);
 
  162     vector 
signed int sumdiffs;
 
  163     vector 
unsigned char perm1, perm2, pix2v, pix2iv;
 
  164     GET_PERM(perm1, perm2, pix2);
 
  173     LOAD_PIX(pix2v, pix2iv, pix2, perm1, perm2);
 
  174     vector 
unsigned short pix2hv  =
 
  175         (vector 
unsigned short) VEC_MERGEH(
zero, pix2v);
 
  176     vector 
unsigned short pix2lv  =
 
  177         (vector 
unsigned short) VEC_MERGEL(zero, pix2v);
 
  178     vector 
unsigned short pix2ihv =
 
  179         (vector 
unsigned short) VEC_MERGEH(zero, pix2iv);
 
  180     vector 
unsigned short pix2ilv =
 
  181         (vector 
unsigned short) VEC_MERGEL(zero, pix2iv);
 
  183     vector 
unsigned short t1 = vec_add(pix2hv, pix2ihv);
 
  184     vector 
unsigned short t2 = vec_add(pix2lv, pix2ilv);
 
  185     vector 
unsigned short t3, 
t4;
 
  187     for (i = 0; i < h; i++) {
 
  191         pix1v  = vec_ld(0, pix1);
 
  192         LOAD_PIX(pix3v, pix3iv, pix3, perm1, perm2);
 
  201         pix3hv  = (vector 
unsigned short) VEC_MERGEH(zero, pix3v);
 
  202         pix3lv  = (vector 
unsigned short) VEC_MERGEL(zero, pix3v);
 
  203         pix3ihv = (vector 
unsigned short) VEC_MERGEH(zero, pix3iv);
 
  204         pix3ilv = (vector 
unsigned short) VEC_MERGEL(zero, pix3iv);
 
  207         t3 = vec_add(pix3hv, pix3ihv);
 
  208         t4 = vec_add(pix3lv, pix3ilv);
 
  210         avghv = vec_sr(vec_add(vec_add(
t1, t3), two), two);
 
  211         avglv = vec_sr(vec_add(vec_add(
t2, t4), two), two);
 
  214         avgv = vec_pack(avghv, avglv);
 
  217         t5 = vec_sub(vec_max(pix1v, avgv), vec_min(pix1v, avgv));
 
  220         sad = vec_sum4s(t5, sad);
 
  229     sumdiffs = vec_sums((vector 
signed int) sad, (vector 
signed int) zero);
 
  230     sumdiffs = vec_splat(sumdiffs, 3);
 
  231     vec_ste(sumdiffs, 0, &
s);
 
  237                          ptrdiff_t stride, 
int h)
 
  240     int  __attribute__((aligned(16))) 
s;
 
  241     const vector 
unsigned int zero =
 
  242         (const vector 
unsigned int) vec_splat_u32(0);
 
  243     vector 
unsigned int sad = (vector 
unsigned int) vec_splat_u32(0);
 
  244     vector 
signed int sumdiffs;
 
  246     for (i = 0; i < h; i++) {
 
  248         vector 
unsigned char t1 =vec_ld(0, pix1);
 
  249         vector 
unsigned char t2 = VEC_LD(0, pix2);
 
  252         vector 
unsigned char t3 = vec_max(t1, t2);
 
  253         vector 
unsigned char t4 = vec_min(t1, t2);
 
  254         vector 
unsigned char t5 = vec_sub(t3, t4);
 
  257         sad = vec_sum4s(t5, sad);
 
  264     sumdiffs = vec_sums((vector 
signed int) sad, (vector 
signed int) zero);
 
  265     sumdiffs = vec_splat(sumdiffs, 3);
 
  266     vec_ste(sumdiffs, 0, &
s);
 
  272                         ptrdiff_t stride, 
int h)
 
  275     int  __attribute__((aligned(16))) 
s;
 
  276     const vector 
unsigned int zero =
 
  277         (const vector 
unsigned int) vec_splat_u32(0);
 
  278     const vector 
unsigned char permclear =
 
  279         (vector 
unsigned char)
 
  280         { 255, 255, 255, 255, 255, 255, 255, 255, 0, 0, 0, 0, 0, 0, 0, 0 };
 
  281     vector 
unsigned int sad = (vector 
unsigned int) vec_splat_u32(0);
 
  282     vector 
signed int sumdiffs;
 
  284     for (i = 0; i < 
h; i++) {
 
  288         vector 
unsigned char pix1l = VEC_LD(0, pix1);
 
  289         vector 
unsigned char pix2l = VEC_LD(0, pix2);
 
  290         vector 
unsigned char t1 = vec_and(pix1l, permclear);
 
  291         vector 
unsigned char t2 = vec_and(pix2l, permclear);
 
  294         vector 
unsigned char t3 = vec_max(t1, t2);
 
  295         vector 
unsigned char t4 = vec_min(t1, t2);
 
  296         vector 
unsigned char t5 = vec_sub(t3, t4);
 
  299         sad = vec_sum4s(t5, sad);
 
  306     sumdiffs = vec_sums((vector 
signed int) sad, (vector 
signed int) zero);
 
  307     sumdiffs = vec_splat(sumdiffs, 3);
 
  308     vec_ste(sumdiffs, 0, &
s);
 
  316                         ptrdiff_t stride, 
int h)
 
  319     int  __attribute__((aligned(16))) 
s;
 
  320     const vector 
unsigned int zero =
 
  321         (const vector 
unsigned int) vec_splat_u32(0);
 
  322     const vector 
unsigned char permclear =
 
  323         (vector 
unsigned char)
 
  324         { 255, 255, 255, 255, 255, 255, 255, 255, 0, 0, 0, 0, 0, 0, 0, 0 };
 
  325     vector 
unsigned int sum = (vector 
unsigned int) vec_splat_u32(0);
 
  326     vector 
signed int sumsqr;
 
  328     for (i = 0; i < 
h; i++) {
 
  332         vector 
unsigned char t1 = vec_and(VEC_LD(0, pix1), permclear);
 
  333         vector 
unsigned char t2 = vec_and(VEC_LD(0, pix2), permclear);
 
  339         vector 
unsigned char t3 = vec_max(t1, t2);
 
  340         vector 
unsigned char t4 = vec_min(t1, t2);
 
  341         vector 
unsigned char t5 = vec_sub(t3, t4);
 
  344         sum = vec_msum(t5, t5, sum);
 
  351     sumsqr = vec_sums((vector 
signed int) sum, (vector 
signed int) zero);
 
  352     sumsqr = vec_splat(sumsqr, 3);
 
  353     vec_ste(sumsqr, 0, &
s);
 
  361                          ptrdiff_t stride, 
int h)
 
  364     int  __attribute__((aligned(16))) 
s;
 
  365     const vector 
unsigned int zero =
 
  366         (const vector 
unsigned int) vec_splat_u32(0);
 
  367     vector 
unsigned int sum = (vector 
unsigned int) vec_splat_u32(0);
 
  368     vector 
signed int sumsqr;
 
  370     for (i = 0; i < h; i++) {
 
  372         vector 
unsigned char t1 = vec_ld(0, pix1);
 
  373         vector 
unsigned char t2 = VEC_LD(0, pix2);
 
  379         vector 
unsigned char t3 = vec_max(t1, t2);
 
  380         vector 
unsigned char t4 = vec_min(t1, t2);
 
  381         vector 
unsigned char t5 = vec_sub(t3, t4);
 
  384         sum = vec_msum(t5, t5, sum);
 
  391     sumsqr = vec_sums((vector 
signed int) sum, (vector 
signed int) zero);
 
  392     sumsqr = vec_splat(sumsqr, 3);
 
  394     vec_ste(sumsqr, 0, &
s);
 
  401     int __attribute__((aligned(16))) sum;
 
  402     register const vector 
unsigned char vzero =
 
  403         (const vector 
unsigned char) vec_splat_u8(0);
 
  404     register vector 
signed short temp0, temp1, temp2, temp3, temp4,
 
  407         register const vector 
signed short vprod1 =
 
  408             (
const vector 
signed short) { 1, -1, 1, -1, 1, -1, 1, -1 };
 
  409         register const vector 
signed short vprod2 =
 
  410             (
const vector 
signed short) { 1, 1, -1, -1, 1, 1, -1, -1 };
 
  411         register const vector 
signed short vprod3 =
 
  412             (
const vector 
signed short) { 1, 1, 1, 1, -1, -1, -1, -1 };
 
  413         register const vector 
unsigned char perm1 =
 
  414             (
const vector 
unsigned char)
 
  415             { 0x02, 0x03, 0x00, 0x01, 0x06, 0x07, 0x04, 0x05,
 
  416               0x0A, 0x0B, 0x08, 0x09, 0x0E, 0x0F, 0x0C, 0x0D };
 
  417         register const vector 
unsigned char perm2 =
 
  418             (
const vector 
unsigned char)
 
  419             { 0x04, 0x05, 0x06, 0x07, 0x00, 0x01, 0x02, 0x03,
 
  420               0x0C, 0x0D, 0x0E, 0x0F, 0x08, 0x09, 0x0A, 0x0B };
 
  421         register const vector 
unsigned char perm3 =
 
  422             (
const vector 
unsigned char)
 
  423             { 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F,
 
  424               0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07 };
 
  427 #define ONEITERBUTTERFLY(i, res)                                            \ 
  429         register vector unsigned char srcO =  unaligned_load(stride * i, src);  \ 
  430         register vector unsigned char dstO = unaligned_load(stride * i, dst);\ 
  434         register vector signed short srcV =                                 \ 
  435             (vector signed short) VEC_MERGEH((vector signed char) vzero,    \ 
  436                                              (vector signed char) srcO);    \ 
  437         register vector signed short dstV =                                 \ 
  438             (vector signed short) VEC_MERGEH((vector signed char) vzero,    \ 
  439                                              (vector signed char) dstO);    \ 
  442         register vector signed short but0 = vec_sub(srcV, dstV);            \ 
  443         register vector signed short op1  = vec_perm(but0, but0, perm1);    \ 
  444         register vector signed short but1 = vec_mladd(but0, vprod1, op1);   \ 
  445         register vector signed short op2  = vec_perm(but1, but1, perm2);    \ 
  446         register vector signed short but2 = vec_mladd(but1, vprod2, op2);   \ 
  447         register vector signed short op3  = vec_perm(but2, but2, perm3);    \ 
  448         res  = vec_mladd(but2, vprod3, op3);                                \ 
  451         ONEITERBUTTERFLY(0, temp0);
 
  452         ONEITERBUTTERFLY(1, temp1);
 
  453         ONEITERBUTTERFLY(2, temp2);
 
  454         ONEITERBUTTERFLY(3, temp3);
 
  455         ONEITERBUTTERFLY(4, temp4);
 
  456         ONEITERBUTTERFLY(5, temp5);
 
  457         ONEITERBUTTERFLY(6, temp6);
 
  458         ONEITERBUTTERFLY(7, temp7);
 
  460 #undef ONEITERBUTTERFLY 
  462         register vector 
signed int vsum;
 
  463         register vector 
signed short line0  = vec_add(temp0, temp1);
 
  464         register vector 
signed short line1  = vec_sub(temp0, temp1);
 
  465         register vector 
signed short line2  = vec_add(temp2, temp3);
 
  466         register vector 
signed short line3  = vec_sub(temp2, temp3);
 
  467         register vector 
signed short line4  = vec_add(temp4, temp5);
 
  468         register vector 
signed short line5  = vec_sub(temp4, temp5);
 
  469         register vector 
signed short line6  = vec_add(temp6, temp7);
 
  470         register vector 
signed short line7  = vec_sub(temp6, temp7);
 
  472         register vector 
signed short line0B = vec_add(line0, line2);
 
  473         register vector 
signed short line2B = vec_sub(line0, line2);
 
  474         register vector 
signed short line1B = vec_add(line1, line3);
 
  475         register vector 
signed short line3B = vec_sub(line1, line3);
 
  476         register vector 
signed short line4B = vec_add(line4, line6);
 
  477         register vector 
signed short line6B = vec_sub(line4, line6);
 
  478         register vector 
signed short line5B = vec_add(line5, line7);
 
  479         register vector 
signed short line7B = vec_sub(line5, line7);
 
  481         register vector 
signed short line0C = vec_add(line0B, line4B);
 
  482         register vector 
signed short line4C = vec_sub(line0B, line4B);
 
  483         register vector 
signed short line1C = vec_add(line1B, line5B);
 
  484         register vector 
signed short line5C = vec_sub(line1B, line5B);
 
  485         register vector 
signed short line2C = vec_add(line2B, line6B);
 
  486         register vector 
signed short line6C = vec_sub(line2B, line6B);
 
  487         register vector 
signed short line3C = vec_add(line3B, line7B);
 
  488         register vector 
signed short line7C = vec_sub(line3B, line7B);
 
  490         vsum = vec_sum4s(vec_abs(line0C), vec_splat_s32(0));
 
  491         vsum = vec_sum4s(vec_abs(line1C), vsum);
 
  492         vsum = vec_sum4s(vec_abs(line2C), vsum);
 
  493         vsum = vec_sum4s(vec_abs(line3C), vsum);
 
  494         vsum = vec_sum4s(vec_abs(line4C), vsum);
 
  495         vsum = vec_sum4s(vec_abs(line5C), vsum);
 
  496         vsum = vec_sum4s(vec_abs(line6C), vsum);
 
  497         vsum = vec_sum4s(vec_abs(line7C), vsum);
 
  498         vsum = vec_sums(vsum, (vector 
signed int) vzero);
 
  499         vsum = vec_splat(vsum, 3);
 
  501         vec_ste(vsum, 0, &sum);
 
  525                                       uint8_t *src, ptrdiff_t stride, 
int h)
 
  527     int __attribute__((aligned(16))) sum;
 
  528     register vector 
signed short 
  529         temp0 __asm__ ("
v0"),
 
  530         temp1 __asm__ ("v1"),
 
  531         temp2 __asm__ ("v2"),
 
  532         temp3 __asm__ ("v3"),
 
  533         temp4 __asm__ ("v4"),
 
  534         temp5 __asm__ ("v5"),
 
  535         temp6 __asm__ ("v6"),
 
  536         temp7 __asm__ ("v7");
 
  537     register vector 
signed short 
  538         temp0S __asm__ ("v8"),
 
  539         temp1S __asm__ ("v9"),
 
  540         temp2S __asm__ ("v10"),
 
  541         temp3S __asm__ ("v11"),
 
  542         temp4S __asm__ ("v12"),
 
  543         temp5S __asm__ ("v13"),
 
  544         temp6S __asm__ ("v14"),
 
  545         temp7S __asm__ ("v15");
 
  546     register const vector 
unsigned char vzero __asm__ ("v31") =
 
  547         (const vector 
unsigned char) vec_splat_u8(0);
 
  549         register const vector 
signed short vprod1 __asm__ (
"v16") =
 
  550             (
const vector 
signed short) { 1, -1, 1, -1, 1, -1, 1, -1 };
 
  552         register const vector 
signed short vprod2 __asm__ (
"v17") =
 
  553             (
const vector 
signed short) { 1, 1, -1, -1, 1, 1, -1, -1 };
 
  555         register const vector 
signed short vprod3 __asm__ (
"v18") =
 
  556             (
const vector 
signed short) { 1, 1, 1, 1, -1, -1, -1, -1 };
 
  558         register const vector 
unsigned char perm1 __asm__ (
"v19") =
 
  559             (
const vector 
unsigned char)
 
  560             { 0x02, 0x03, 0x00, 0x01, 0x06, 0x07, 0x04, 0x05,
 
  561               0x0A, 0x0B, 0x08, 0x09, 0x0E, 0x0F, 0x0C, 0x0D };
 
  563         register const vector 
unsigned char perm2 __asm__ (
"v20") =
 
  564             (
const vector 
unsigned char)
 
  565             { 0x04, 0x05, 0x06, 0x07, 0x00, 0x01, 0x02, 0x03,
 
  566               0x0C, 0x0D, 0x0E, 0x0F, 0x08, 0x09, 0x0A, 0x0B };
 
  568         register const vector 
unsigned char perm3 __asm__ (
"v21") =
 
  569             (
const vector 
unsigned char)
 
  570             { 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F,
 
  571               0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07 };
 
  573 #define ONEITERBUTTERFLY(i, res1, res2)                                     \ 
  575         register vector unsigned char srcO __asm__ ("v22") =                \ 
  576             unaligned_load(stride * i, src);                                    \ 
  577         register vector unsigned char dstO __asm__ ("v23") =                \ 
  578             unaligned_load(stride * i, dst);\ 
  581         register vector signed short srcV __asm__ ("v24") =                 \ 
  582             (vector signed short) VEC_MERGEH((vector signed char) vzero,    \ 
  583                                              (vector signed char) srcO);    \ 
  584         register vector signed short dstV __asm__ ("v25") =                 \ 
  585             (vector signed short) VEC_MERGEH((vector signed char) vzero,    \ 
  586                                              (vector signed char) dstO);    \ 
  587         register vector signed short srcW __asm__ ("v26") =                 \ 
  588             (vector signed short) VEC_MERGEL((vector signed char) vzero,    \ 
  589                                              (vector signed char) srcO);    \ 
  590         register vector signed short dstW __asm__ ("v27") =                 \ 
  591             (vector signed short) VEC_MERGEL((vector signed char) vzero,    \ 
  592                                              (vector signed char) dstO);    \ 
  595         register vector signed short but0  __asm__ ("v28") =                \ 
  596             vec_sub(srcV, dstV);                                            \ 
  597         register vector signed short but0S __asm__ ("v29") =                \ 
  598             vec_sub(srcW, dstW);                                            \ 
  599         register vector signed short op1   __asm__ ("v30") =                \ 
  600             vec_perm(but0, but0, perm1);                                    \ 
  601         register vector signed short but1  __asm__ ("v22") =                \ 
  602             vec_mladd(but0, vprod1, op1);                                   \ 
  603         register vector signed short op1S  __asm__ ("v23") =                \ 
  604             vec_perm(but0S, but0S, perm1);                                  \ 
  605         register vector signed short but1S __asm__ ("v24") =                \ 
  606             vec_mladd(but0S, vprod1, op1S);                                 \ 
  607         register vector signed short op2   __asm__ ("v25") =                \ 
  608             vec_perm(but1, but1, perm2);                                    \ 
  609         register vector signed short but2  __asm__ ("v26") =                \ 
  610             vec_mladd(but1, vprod2, op2);                                   \ 
  611         register vector signed short op2S  __asm__ ("v27") =                \ 
  612             vec_perm(but1S, but1S, perm2);                                  \ 
  613         register vector signed short but2S __asm__ ("v28") =                \ 
  614             vec_mladd(but1S, vprod2, op2S);                                 \ 
  615         register vector signed short op3   __asm__ ("v29") =                \ 
  616             vec_perm(but2, but2, perm3);                                    \ 
  617         register vector signed short op3S  __asm__ ("v30") =                \ 
  618             vec_perm(but2S, but2S, perm3);                                  \ 
  619         res1 = vec_mladd(but2, vprod3, op3);                                \ 
  620         res2 = vec_mladd(but2S, vprod3, op3S);                              \ 
  623         ONEITERBUTTERFLY(0, temp0, temp0S);
 
  624         ONEITERBUTTERFLY(1, temp1, temp1S);
 
  625         ONEITERBUTTERFLY(2, temp2, temp2S);
 
  626         ONEITERBUTTERFLY(3, temp3, temp3S);
 
  627         ONEITERBUTTERFLY(4, temp4, temp4S);
 
  628         ONEITERBUTTERFLY(5, temp5, temp5S);
 
  629         ONEITERBUTTERFLY(6, temp6, temp6S);
 
  630         ONEITERBUTTERFLY(7, temp7, temp7S);
 
  632 #undef ONEITERBUTTERFLY 
  634         register vector 
signed int vsum;
 
  636         register vector 
signed short line0  = vec_add(temp0, temp1);
 
  637         register vector 
signed short line1  = vec_sub(temp0, temp1);
 
  638         register vector 
signed short line2  = vec_add(temp2, temp3);
 
  639         register vector 
signed short line3  = vec_sub(temp2, temp3);
 
  640         register vector 
signed short line4  = vec_add(temp4, temp5);
 
  641         register vector 
signed short line5  = vec_sub(temp4, temp5);
 
  642         register vector 
signed short line6  = vec_add(temp6, temp7);
 
  643         register vector 
signed short line7  = vec_sub(temp6, temp7);
 
  645         register vector 
signed short line0B = vec_add(line0, line2);
 
  646         register vector 
signed short line2B = vec_sub(line0, line2);
 
  647         register vector 
signed short line1B = vec_add(line1, line3);
 
  648         register vector 
signed short line3B = vec_sub(line1, line3);
 
  649         register vector 
signed short line4B = vec_add(line4, line6);
 
  650         register vector 
signed short line6B = vec_sub(line4, line6);
 
  651         register vector 
signed short line5B = vec_add(line5, line7);
 
  652         register vector 
signed short line7B = vec_sub(line5, line7);
 
  654         register vector 
signed short line0C = vec_add(line0B, line4B);
 
  655         register vector 
signed short line4C = vec_sub(line0B, line4B);
 
  656         register vector 
signed short line1C = vec_add(line1B, line5B);
 
  657         register vector 
signed short line5C = vec_sub(line1B, line5B);
 
  658         register vector 
signed short line2C = vec_add(line2B, line6B);
 
  659         register vector 
signed short line6C = vec_sub(line2B, line6B);
 
  660         register vector 
signed short line3C = vec_add(line3B, line7B);
 
  661         register vector 
signed short line7C = vec_sub(line3B, line7B);
 
  663         register vector 
signed short line0S = vec_add(temp0S, temp1S);
 
  664         register vector 
signed short line1S = vec_sub(temp0S, temp1S);
 
  665         register vector 
signed short line2S = vec_add(temp2S, temp3S);
 
  666         register vector 
signed short line3S = vec_sub(temp2S, temp3S);
 
  667         register vector 
signed short line4S = vec_add(temp4S, temp5S);
 
  668         register vector 
signed short line5S = vec_sub(temp4S, temp5S);
 
  669         register vector 
signed short line6S = vec_add(temp6S, temp7S);
 
  670         register vector 
signed short line7S = vec_sub(temp6S, temp7S);
 
  672         register vector 
signed short line0BS = vec_add(line0S, line2S);
 
  673         register vector 
signed short line2BS = vec_sub(line0S, line2S);
 
  674         register vector 
signed short line1BS = vec_add(line1S, line3S);
 
  675         register vector 
signed short line3BS = vec_sub(line1S, line3S);
 
  676         register vector 
signed short line4BS = vec_add(line4S, line6S);
 
  677         register vector 
signed short line6BS = vec_sub(line4S, line6S);
 
  678         register vector 
signed short line5BS = vec_add(line5S, line7S);
 
  679         register vector 
signed short line7BS = vec_sub(line5S, line7S);
 
  681         register vector 
signed short line0CS = vec_add(line0BS, line4BS);
 
  682         register vector 
signed short line4CS = vec_sub(line0BS, line4BS);
 
  683         register vector 
signed short line1CS = vec_add(line1BS, line5BS);
 
  684         register vector 
signed short line5CS = vec_sub(line1BS, line5BS);
 
  685         register vector 
signed short line2CS = vec_add(line2BS, line6BS);
 
  686         register vector 
signed short line6CS = vec_sub(line2BS, line6BS);
 
  687         register vector 
signed short line3CS = vec_add(line3BS, line7BS);
 
  688         register vector 
signed short line7CS = vec_sub(line3BS, line7BS);
 
  690         vsum = vec_sum4s(vec_abs(line0C), vec_splat_s32(0));
 
  691         vsum = vec_sum4s(vec_abs(line1C), vsum);
 
  692         vsum = vec_sum4s(vec_abs(line2C), vsum);
 
  693         vsum = vec_sum4s(vec_abs(line3C), vsum);
 
  694         vsum = vec_sum4s(vec_abs(line4C), vsum);
 
  695         vsum = vec_sum4s(vec_abs(line5C), vsum);
 
  696         vsum = vec_sum4s(vec_abs(line6C), vsum);
 
  697         vsum = vec_sum4s(vec_abs(line7C), vsum);
 
  699         vsum = vec_sum4s(vec_abs(line0CS), vsum);
 
  700         vsum = vec_sum4s(vec_abs(line1CS), vsum);
 
  701         vsum = vec_sum4s(vec_abs(line2CS), vsum);
 
  702         vsum = vec_sum4s(vec_abs(line3CS), vsum);
 
  703         vsum = vec_sum4s(vec_abs(line4CS), vsum);
 
  704         vsum = vec_sum4s(vec_abs(line5CS), vsum);
 
  705         vsum = vec_sum4s(vec_abs(line6CS), vsum);
 
  706         vsum = vec_sum4s(vec_abs(line7CS), vsum);
 
  707         vsum = vec_sums(vsum, (vector 
signed int) vzero);
 
  708         vsum = vec_splat(vsum, 3);
 
  710         vec_ste(vsum, 0, &sum);
 
  716                                     uint8_t *src, ptrdiff_t stride, 
int h)
 
  718     int score = hadamard8_diff16x8_altivec(s, dst, src, stride, 8);
 
  723         score += hadamard8_diff16x8_altivec(s, dst, src, stride, 8);
 
  735     c->
pix_abs[0][1] = sad16_x2_altivec;
 
  736     c->
pix_abs[0][2] = sad16_y2_altivec;
 
  737     c->
pix_abs[0][3] = sad16_xy2_altivec;
 
  738     c->
pix_abs[0][0] = sad16_altivec;
 
  739     c->
pix_abs[1][0] = sad8_altivec;
 
  741     c->
sad[0] = sad16_altivec;
 
  742     c->
sad[1] = sad8_altivec;
 
  743     c->
sse[0] = sse16_altivec;
 
  744     c->
sse[1] = sse8_altivec;
 
av_cold void ff_me_cmp_init_ppc(MECmpContext *c, AVCodecContext *avctx)
 
Macro definitions for various function/variable attributes. 
 
me_cmp_func hadamard8_diff[6]
 
#define PPC_ALTIVEC(flags)
 
me_cmp_func pix_abs[2][4]
 
Libavcodec external API header. 
 
main external API structure. 
 
int av_get_cpu_flags(void)
Return the flags which specify extensions supported by the CPU. 
 
Contains misc utility macros and inline functions. 
 
GLint GLenum GLboolean GLsizei stride