33 #define LFC_FUNC(DIR, DEPTH, OPT) \ 
   34 void ff_hevc_ ## DIR ## _loop_filter_chroma_ ## DEPTH ## _ ## OPT(uint8_t *pix, ptrdiff_t stride, const int *tc, const uint8_t *no_p, const uint8_t *no_q); 
   36 #define LFL_FUNC(DIR, DEPTH, OPT) \ 
   37 void ff_hevc_ ## DIR ## _loop_filter_luma_ ## DEPTH ## _ ## OPT(uint8_t *pix, ptrdiff_t stride, int beta, const int *tc, const uint8_t *no_p, const uint8_t *no_q); 
   39 #define LFC_FUNCS(type, depth, opt) \ 
   40     LFC_FUNC(h, depth, opt)  \ 
   41     LFC_FUNC(v, depth, opt) 
   43 #define LFL_FUNCS(type, depth, opt) \ 
   44     LFL_FUNC(h, depth, opt)  \ 
   45     LFL_FUNC(v, depth, opt) 
   63 #define IDCT_DC_FUNCS(W, opt) \ 
   64 void ff_hevc_idct_ ## W ## _dc_8_ ## opt(int16_t *coeffs); \ 
   65 void ff_hevc_idct_ ## W ## _dc_10_ ## opt(int16_t *coeffs); \ 
   66 void ff_hevc_idct_ ## W ## _dc_12_ ## opt(int16_t *coeffs) 
   75 #define IDCT_FUNCS(opt)                                             \ 
   76 void ff_hevc_idct_4x4_8_    ## opt(int16_t *coeffs, int col_limit); \ 
   77 void ff_hevc_idct_4x4_10_   ## opt(int16_t *coeffs, int col_limit); \ 
   78 void ff_hevc_idct_8x8_8_    ## opt(int16_t *coeffs, int col_limit); \ 
   79 void ff_hevc_idct_8x8_10_   ## opt(int16_t *coeffs, int col_limit); \ 
   80 void ff_hevc_idct_16x16_8_  ## opt(int16_t *coeffs, int col_limit); \ 
   81 void ff_hevc_idct_16x16_10_ ## opt(int16_t *coeffs, int col_limit); \ 
   82 void ff_hevc_idct_32x32_8_  ## opt(int16_t *coeffs, int col_limit); \ 
   83 void ff_hevc_idct_32x32_10_ ## opt(int16_t *coeffs, int col_limit); 
   89 #define ff_hevc_pel_filters ff_hevc_qpel_filters 
   90 #define DECL_HV_FILTER(f)                                  \ 
   91     const uint8_t *hf = ff_hevc_ ## f ## _filters[mx];     \ 
   92     const uint8_t *vf = ff_hevc_ ## f ## _filters[my]; 
   94 #define FW_PUT(p, a, b, depth, opt) \ 
   95 static void hevc_put_ ## a ## _ ## depth ## _##opt(int16_t *dst, const uint8_t *src, ptrdiff_t srcstride,    \ 
   96                                                int height, intptr_t mx, intptr_t my,int width)               \ 
   99     ff_h2656_put_ ## b ## _ ## depth ## _##opt(dst, 2 * MAX_PB_SIZE, src, srcstride, height, hf, vf, width); \ 
  102 #define FW_PUT_UNI(p, a, b, depth, opt) \ 
  103 static void hevc_put_uni_ ## a ## _ ## depth ## _##opt(uint8_t *dst, ptrdiff_t dststride,                        \ 
  104                                                         const uint8_t *src, ptrdiff_t srcstride,             \ 
  105                                                         int height, intptr_t mx, intptr_t my, int width)     \ 
  108     ff_h2656_put_uni_ ## b ## _ ## depth ## _##opt(dst, dststride, src, srcstride, height, hf, vf, width);   \ 
  111 #if ARCH_X86_64 && HAVE_SSE4_EXTERNAL 
  113 #define FW_PUT_FUNCS(p, a, b, depth, opt) \ 
  114     FW_PUT(p, a, b, depth, opt) \ 
  115     FW_PUT_UNI(p, a, b, depth, opt) 
  117 #define FW_PEL(w, depth, opt) FW_PUT_FUNCS(pel, pel_pixels##w, pixels##w, depth, opt) 
  119 #define FW_DIR(npel, n, w, depth, opt) \ 
  120     FW_PUT_FUNCS(npel, npel ## _h##w,  n ## tap_h##w,  depth, opt) \ 
  121     FW_PUT_FUNCS(npel, npel ## _v##w,  n ## tap_v##w,  depth, opt) 
  123 #define FW_DIR_HV(npel, n, w, depth, opt) \ 
  124     FW_PUT_FUNCS(npel, npel ## _hv##w,  n ## tap_hv##w,  depth, opt) 
  138 #define FW_EPEL(w, depth, opt) FW_DIR(epel, 4, w, depth, opt) 
  139 #define FW_EPEL_HV(w, depth, opt) FW_DIR_HV(epel, 4, w, depth, opt) 
  140 #define FW_EPEL_FUNCS(w, depth, opt) \ 
  141     FW_EPEL(w, depth, opt)           \ 
  142     FW_EPEL_HV(w, depth, opt) 
  146 FW_EPEL_FUNCS(4,   8, sse4)
 
  147 FW_EPEL_FUNCS(6,   8, sse4)
 
  148 FW_EPEL_FUNCS(8,   8, sse4)
 
  149 FW_EPEL_FUNCS(16,  8, sse4)
 
  150 FW_EPEL_FUNCS(4,  10, sse4)
 
  151 FW_EPEL_FUNCS(6,  10, sse4)
 
  152 FW_EPEL_FUNCS(8,  10, sse4)
 
  153 FW_EPEL_FUNCS(4,  12, sse4)
 
  154 FW_EPEL_FUNCS(6,  12, sse4)
 
  155 FW_EPEL_FUNCS(8,  12, sse4)
 
  157 #define FW_QPEL(w, depth, opt) FW_DIR(qpel, 8, w, depth, opt) 
  158 #define FW_QPEL_HV(w, depth, opt) FW_DIR_HV(qpel, 8, w, depth, opt) 
  159 #define FW_QPEL_FUNCS(w, depth, opt) \ 
  160     FW_QPEL(w, depth, opt)           \ 
  161     FW_QPEL_HV(w, depth, opt) 
  166 FW_QPEL_FUNCS(4,   8, sse4)
 
  167 FW_QPEL_FUNCS(8,   8, sse4)
 
  168 FW_QPEL_FUNCS(4,  10, sse4)
 
  169 FW_QPEL_FUNCS(8,  10, sse4)
 
  170 FW_QPEL_FUNCS(4,  12, sse4)
 
  171 FW_QPEL_FUNCS(8,  12, sse4)
 
  173 #if HAVE_AVX2_EXTERNAL 
  176 FW_PUT(pel, pel_pixels16, pixels16, 10, avx2)
 
  179 FW_EPEL(16, 10, avx2)
 
  181 FW_EPEL_HV(32,  8, avx2)
 
  182 FW_EPEL_HV(16, 10, avx2)
 
  185 FW_QPEL(16, 10, avx2)
 
  187 FW_QPEL_HV(16, 10, avx2)
 
  192 #define mc_rep_func(name, bitd, step, W, opt) \ 
  193 static void hevc_put_##name##W##_##bitd##_##opt(int16_t *_dst,                                                  \ 
  194                                             const uint8_t *_src, ptrdiff_t _srcstride, int height,              \ 
  195                                             intptr_t mx, intptr_t my, int width)                                \ 
  199     for (i = 0; i < W; i += step) {                                                                             \ 
  200         const uint8_t *src  = _src + (i * ((bitd + 7) / 8));                                                    \ 
  202         hevc_put_##name##step##_##bitd##_##opt(dst, src, _srcstride, height, mx, my, width);                    \ 
  205 #define mc_rep_uni_func(name, bitd, step, W, opt) \ 
  206 static void hevc_put_uni_##name##W##_##bitd##_##opt(uint8_t *_dst, ptrdiff_t dststride,                         \ 
  207                                                 const uint8_t *_src, ptrdiff_t _srcstride, int height,          \ 
  208                                                 intptr_t mx, intptr_t my, int width)                            \ 
  212     for (i = 0; i < W; i += step) {                                                                             \ 
  213         const uint8_t *src = _src + (i * ((bitd + 7) / 8));                                                     \ 
  214         dst = _dst + (i * ((bitd + 7) / 8));                                                                    \ 
  215         hevc_put_uni_##name##step##_##bitd##_##opt(dst, dststride, src, _srcstride,                             \ 
  216                                                       height, mx, my, width);                                   \ 
  219 #define mc_rep_bi_func(name, bitd, step, W, opt) \ 
  220 static void ff_hevc_put_bi_##name##W##_##bitd##_##opt(uint8_t *_dst, ptrdiff_t dststride, const uint8_t *_src,  \ 
  221                                                ptrdiff_t _srcstride, const int16_t *_src2,                      \ 
  222                                                int height, intptr_t mx, intptr_t my, int width)                 \ 
  226     for (i = 0; i < W ; i += step) {                                                                            \ 
  227         const uint8_t *src  = _src + (i * ((bitd + 7) / 8));                                                    \ 
  228         const int16_t *src2 = _src2 + i;                                                                        \ 
  229         dst  = _dst + (i * ((bitd + 7) / 8));                                                                   \ 
  230         ff_hevc_put_bi_##name##step##_##bitd##_##opt(dst, dststride, src, _srcstride, src2,                     \ 
  231                                                      height, mx, my, width);                                    \ 
  235 #define mc_rep_funcs(name, bitd, step, W, opt)        \ 
  236     mc_rep_func(name, bitd, step, W, opt)            \ 
  237     mc_rep_uni_func(name, bitd, step, W, opt)        \ 
  238     mc_rep_bi_func(name, bitd, step, W, opt) 
  240 #define mc_rep_func2(name, bitd, step1, step2, W, opt) \ 
  241 static void hevc_put_##name##W##_##bitd##_##opt(int16_t *dst,                                                   \ 
  242                                                 const uint8_t *src, ptrdiff_t _srcstride, int height,           \ 
  243                                                 intptr_t mx, intptr_t my, int width)                            \ 
  245     hevc_put_##name##step1##_##bitd##_##opt(dst, src, _srcstride, height, mx, my, width);                       \ 
  246     hevc_put_##name##step2##_##bitd##_##opt(dst + step1, src + (step1 * ((bitd + 7) / 8)),                      \ 
  247                                             _srcstride, height, mx, my, width);                                 \ 
  249 #define mc_rep_uni_func2(name, bitd, step1, step2, W, opt) \ 
  250 static void hevc_put_uni_##name##W##_##bitd##_##opt(uint8_t *dst, ptrdiff_t dststride,                          \ 
  251                                                 const uint8_t *src, ptrdiff_t _srcstride, int height,           \ 
  252                                                 intptr_t mx, intptr_t my, int width)                            \ 
  254     hevc_put_uni_##name##step1##_##bitd##_##opt(dst, dststride, src, _srcstride, height, mx, my, width);     \ 
  255     hevc_put_uni_##name##step2##_##bitd##_##opt(dst + (step1 * ((bitd + 7) / 8)), dststride,                 \ 
  256                                                    src + (step1 * ((bitd + 7) / 8)), _srcstride,                \ 
  257                                                    height, mx, my, width);                                      \ 
  259 #define mc_rep_bi_func2(name, bitd, step1, step2, W, opt) \ 
  260 static void ff_hevc_put_bi_##name##W##_##bitd##_##opt(uint8_t *dst, ptrdiff_t dststride, const uint8_t *src,    \ 
  261                                                ptrdiff_t _srcstride, const int16_t *src2,                       \ 
  262                                                int height, intptr_t mx, intptr_t my, int width)                 \ 
  264     ff_hevc_put_bi_##name##step1##_##bitd##_##opt(dst, dststride, src, _srcstride, src2, height, mx, my, width);\ 
  265     ff_hevc_put_bi_##name##step2##_##bitd##_##opt(dst + (step1 * ((bitd + 7) / 8)), dststride,                  \ 
  266                                                   src + (step1 * ((bitd + 7) / 8)), _srcstride,                 \ 
  267                                                   src2 + step1, height, mx, my, width);                         \ 
  270 #define mc_rep_funcs2(name, bitd, step1, step2, W, opt) \ 
  271     mc_rep_func2(name, bitd, step1, step2, W, opt)      \ 
  272     mc_rep_uni_func2(name, bitd, step1, step2, W, opt)  \ 
  273     mc_rep_bi_func2(name, bitd, step1, step2, W, opt) 
  275 #if ARCH_X86_64 && HAVE_SSE4_EXTERNAL 
  277 #define mc_rep_mix_10(name, width1, width2, width3, opt1, opt2, width4)                                       \ 
  278 static void hevc_put_##name##width1##_10_##opt1(int16_t *dst, const uint8_t *src, ptrdiff_t _srcstride,       \ 
  279                                             int height, intptr_t mx, intptr_t my, int width)                  \ 
  282     hevc_put_##name##width2##_10_##opt1(dst, src, _srcstride, height, mx, my, width);                         \ 
  283     hevc_put_##name##width3##_10_##opt2(dst+ width2, src+ width4, _srcstride, height, mx, my, width);         \ 
  286 #define mc_bi_rep_mix_10(name, width1, width2, width3, opt1, opt2, width4)                                    \ 
  287 static void ff_hevc_put_bi_##name##width1##_10_##opt1(uint8_t *dst, ptrdiff_t dststride, const uint8_t *src,  \ 
  288                                                ptrdiff_t _srcstride, const int16_t *src2,                     \ 
  289                                                int height, intptr_t mx, intptr_t my, int width)               \ 
  291     ff_hevc_put_bi_##name##width2##_10_##opt1(dst, dststride, src, _srcstride, src2,                          \ 
  292                                               height, mx, my, width);                                         \ 
  293     ff_hevc_put_bi_##name##width3##_10_##opt2(dst+width4, dststride, src+width4, _srcstride, src2+width2,     \ 
  294                                               height, mx, my, width);                                         \ 
  297 #define mc_uni_rep_mix_10(name, width1, width2, width3, opt1, opt2, width4)                                   \ 
  298 static void hevc_put_uni_##name##width1##_10_##opt1(uint8_t *dst, ptrdiff_t dststride,                        \ 
  299                                                 const uint8_t *src, ptrdiff_t _srcstride, int height,         \ 
  300                                                 intptr_t mx, intptr_t my, int width)                          \ 
  302     hevc_put_uni_##name##width2##_10_##opt1(dst, dststride, src, _srcstride,                               \ 
  303                                                height, mx, my, width);                                        \ 
  304     hevc_put_uni_##name##width3##_10_##opt2(dst+width4, dststride, src+width4, _srcstride,                 \ 
  305                                                height, mx, my, width);                                        \ 
  308 #define mc_rep_mixs_10(name, width1, width2, width3, opt1, opt2, width4)   \ 
  309 mc_rep_mix_10(name, width1, width2, width3, opt1, opt2, width4)            \ 
  310 mc_bi_rep_mix_10(name, width1, width2, width3, opt1, opt2, width4)         \ 
  311 mc_uni_rep_mix_10(name, width1, width2, width3, opt1, opt2, width4) 
  313 #define mc_rep_mix_8(name, width1, width2, width3, opt1, opt2)                                                \ 
  314 static void hevc_put_##name##width1##_8_##opt1(int16_t *dst, const uint8_t *src, ptrdiff_t _srcstride,        \ 
  315                                            int height, intptr_t mx, intptr_t my, int width)                   \ 
  318     hevc_put_##name##width2##_8_##opt1(dst, src, _srcstride, height, mx, my, width);                          \ 
  319     hevc_put_##name##width3##_8_##opt2(dst+ width2, src+ width2, _srcstride, height, mx, my, width);          \ 
  322 #define mc_bi_rep_mix_8(name, width1, width2, width3, opt1, opt2)                                             \ 
  323 static void ff_hevc_put_bi_##name##width1##_8_##opt1(uint8_t *dst, ptrdiff_t dststride, const uint8_t *src,   \ 
  324                                               ptrdiff_t _srcstride, const int16_t *src2,                      \ 
  325                                               int height, intptr_t mx, intptr_t my, int width)                \ 
  327     ff_hevc_put_bi_##name##width2##_8_##opt1(dst, dststride, src, _srcstride,                                 \ 
  328                                              src2, height, mx, my, width);                                    \ 
  329     ff_hevc_put_bi_##name##width3##_8_##opt2(dst+width2, dststride, src+width2, _srcstride,                   \ 
  330                                              src2+width2, height, mx, my, width);                             \ 
  333 #define mc_uni_rep_mix_8(name, width1, width2, width3, opt1, opt2)                                            \ 
  334 static void hevc_put_uni_##name##width1##_8_##opt1(uint8_t *dst, ptrdiff_t dststride,                             \ 
  335                                                const uint8_t *src, ptrdiff_t _srcstride, int height,          \ 
  336                                                intptr_t mx, intptr_t my, int width)                           \ 
  338     hevc_put_uni_##name##width2##_8_##opt1(dst, dststride, src, _srcstride,                                \ 
  339                                               height, mx, my, width);                                         \ 
  340     hevc_put_uni_##name##width3##_8_##opt2(dst+width2, dststride, src+width2, _srcstride,                  \ 
  341                                               height, mx, my, width);                                         \ 
  344 #define mc_rep_mixs_8(name, width1, width2, width3, opt1, opt2)   \ 
  345 mc_rep_mix_8(name, width1, width2, width3, opt1, opt2)            \ 
  346 mc_bi_rep_mix_8(name, width1, width2, width3, opt1, opt2)         \ 
  347 mc_uni_rep_mix_8(name, width1, width2, width3, opt1, opt2) 
  349 #if HAVE_AVX2_EXTERNAL 
  351 mc_rep_mixs_8(pel_pixels, 48, 32, 16, avx2, sse4)
 
  352 mc_rep_mixs_8(epel_hv,    48, 32, 16, avx2, sse4)
 
  353 mc_rep_mixs_8(epel_h ,    48, 32, 16, avx2, sse4)
 
  354 mc_rep_mixs_8(epel_v ,    48, 32, 16, avx2, sse4)
 
  356 mc_rep_mix_10(pel_pixels, 24, 16, 8, avx2, sse4, 32)
 
  357 mc_bi_rep_mix_10(pel_pixels,24, 16, 8, avx2, sse4, 32)
 
  358 mc_rep_mixs_10(epel_hv,   24, 16, 8, avx2, sse4, 32)
 
  359 mc_rep_mixs_10(epel_h ,   24, 16, 8, avx2, sse4, 32)
 
  360 mc_rep_mixs_10(epel_v ,   24, 16, 8, avx2, sse4, 32)
 
  363 mc_rep_mixs_10(qpel_h ,   24, 16, 8, avx2, sse4, 32)
 
  364 mc_rep_mixs_10(qpel_v ,   24, 16, 8, avx2, sse4, 32)
 
  365 mc_rep_mixs_10(qpel_hv,   24, 16, 8, avx2, sse4, 32)
 
  401 mc_rep_mixs_8(qpel_h ,  48, 32, 16, avx2, sse4)
 
  404 mc_rep_mixs_8(qpel_v,  48, 32, 16, avx2, sse4)
 
  538 #define mc_rep_uni_w(bitd, step, W, opt) \ 
  539 void ff_hevc_put_uni_w##W##_##bitd##_##opt(uint8_t *_dst, ptrdiff_t dststride, const int16_t *_src, \ 
  540                                            int height, int denom,  int _wx, int _ox)                \ 
  544     for (i = 0; i < W; i += step) {                                                                                     \ 
  545         const int16_t *src = _src + i;                                                                                  \ 
  546         dst= _dst + (i * ((bitd + 7) / 8));                                                                             \ 
  547         ff_hevc_put_uni_w##step##_##bitd##_##opt(dst, dststride, src,                               \ 
  548                                                  height, denom, _wx, _ox);                          \ 
  552 mc_rep_uni_w(8, 6, 12, sse4)
 
  553 mc_rep_uni_w(8, 8, 16, sse4)
 
  554 mc_rep_uni_w(8, 8, 24, sse4)
 
  555 mc_rep_uni_w(8, 8, 32, sse4)
 
  556 mc_rep_uni_w(8, 8, 48, sse4)
 
  557 mc_rep_uni_w(8, 8, 64, sse4)
 
  559 mc_rep_uni_w(10, 6, 12, sse4)
 
  560 mc_rep_uni_w(10, 8, 16, sse4)
 
  561 mc_rep_uni_w(10, 8, 24, sse4)
 
  562 mc_rep_uni_w(10, 8, 32, sse4)
 
  563 mc_rep_uni_w(10, 8, 48, sse4)
 
  564 mc_rep_uni_w(10, 8, 64, sse4)
 
  566 mc_rep_uni_w(12, 6, 12, sse4)
 
  567 mc_rep_uni_w(12, 8, 16, sse4)
 
  568 mc_rep_uni_w(12, 8, 24, sse4)
 
  569 mc_rep_uni_w(12, 8, 32, sse4)
 
  570 mc_rep_uni_w(12, 8, 48, sse4)
 
  571 mc_rep_uni_w(12, 8, 64, sse4)
 
  573 #define mc_rep_bi_w(bitd, step, W, opt) \ 
  574 void ff_hevc_put_bi_w##W##_##bitd##_##opt(uint8_t *_dst, ptrdiff_t dststride, const int16_t *_src, \ 
  575                                           const int16_t *_src2, int height,                        \ 
  576                                           int denom,  int _wx0,  int _wx1, int _ox0, int _ox1)     \ 
  580     for (i = 0; i < W; i += step) {                                                                                     \ 
  581         const int16_t *src  = _src  + i;                                                                                \ 
  582         const int16_t *src2 = _src2 + i;                                                                                \ 
  583         dst  = _dst  + (i * ((bitd + 7) / 8));                                                                          \ 
  584         ff_hevc_put_bi_w##step##_##bitd##_##opt(dst, dststride, src, src2,                         \ 
  585                                                 height, denom, _wx0, _wx1, _ox0, _ox1);            \ 
  589 mc_rep_bi_w(8, 6, 12, sse4)
 
  590 mc_rep_bi_w(8, 8, 16, sse4)
 
  591 mc_rep_bi_w(8, 8, 24, sse4)
 
  592 mc_rep_bi_w(8, 8, 32, sse4)
 
  593 mc_rep_bi_w(8, 8, 48, sse4)
 
  594 mc_rep_bi_w(8, 8, 64, sse4)
 
  596 mc_rep_bi_w(10, 6, 12, sse4)
 
  597 mc_rep_bi_w(10, 8, 16, sse4)
 
  598 mc_rep_bi_w(10, 8, 24, sse4)
 
  599 mc_rep_bi_w(10, 8, 32, sse4)
 
  600 mc_rep_bi_w(10, 8, 48, sse4)
 
  601 mc_rep_bi_w(10, 8, 64, sse4)
 
  603 mc_rep_bi_w(12, 6, 12, sse4)
 
  604 mc_rep_bi_w(12, 8, 16, sse4)
 
  605 mc_rep_bi_w(12, 8, 24, sse4)
 
  606 mc_rep_bi_w(12, 8, 32, sse4)
 
  607 mc_rep_bi_w(12, 8, 48, sse4)
 
  608 mc_rep_bi_w(12, 8, 64, sse4)
 
  610 #define mc_uni_w_func(name, bitd, W, opt) \ 
  611 static void hevc_put_uni_w_##name##W##_##bitd##_##opt(uint8_t *_dst, ptrdiff_t _dststride,          \ 
  612                                                       const uint8_t *_src, ptrdiff_t _srcstride,    \ 
  613                                                       int height, int denom,                        \ 
  615                                                       intptr_t mx, intptr_t my, int width)          \ 
  617     LOCAL_ALIGNED_16(int16_t, temp, [71 * MAX_PB_SIZE]);                                            \ 
  618     hevc_put_##name##W##_##bitd##_##opt(temp, _src, _srcstride, height, mx, my, width);             \ 
  619     ff_hevc_put_uni_w##W##_##bitd##_##opt(_dst, _dststride, temp, height, denom, _wx, _ox);         \ 
  622 #define mc_uni_w_funcs(name, bitd, opt)      \ 
  623         mc_uni_w_func(name, bitd, 4, opt)    \ 
  624         mc_uni_w_func(name, bitd, 8, opt)    \ 
  625         mc_uni_w_func(name, bitd, 12, opt)   \ 
  626         mc_uni_w_func(name, bitd, 16, opt)   \ 
  627         mc_uni_w_func(name, bitd, 24, opt)   \ 
  628         mc_uni_w_func(name, bitd, 32, opt)   \ 
  629         mc_uni_w_func(name, bitd, 48, opt)   \ 
  630         mc_uni_w_func(name, bitd, 64, opt) 
  632 mc_uni_w_funcs(pel_pixels, 8, sse4)
 
  633 mc_uni_w_func(pel_pixels, 8, 6, sse4)
 
  634 mc_uni_w_funcs(epel_h, 8, sse4)
 
  635 mc_uni_w_func(epel_h, 8, 6, sse4)
 
  636 mc_uni_w_funcs(epel_v, 8, sse4)
 
  637 mc_uni_w_func(epel_v, 8, 6, sse4)
 
  638 mc_uni_w_funcs(epel_hv, 8, sse4)
 
  639 mc_uni_w_func(epel_hv, 8, 6, sse4)
 
  640 mc_uni_w_funcs(qpel_h, 8, sse4)
 
  641 mc_uni_w_funcs(qpel_v, 8, sse4)
 
  642 mc_uni_w_funcs(qpel_hv, 8, sse4)
 
  644 mc_uni_w_funcs(pel_pixels, 10, sse4)
 
  645 mc_uni_w_func(pel_pixels, 10, 6, sse4)
 
  646 mc_uni_w_funcs(epel_h, 10, sse4)
 
  647 mc_uni_w_func(epel_h, 10, 6, sse4)
 
  648 mc_uni_w_funcs(epel_v, 10, sse4)
 
  649 mc_uni_w_func(epel_v, 10, 6, sse4)
 
  650 mc_uni_w_funcs(epel_hv, 10, sse4)
 
  651 mc_uni_w_func(epel_hv, 10, 6, sse4)
 
  652 mc_uni_w_funcs(qpel_h, 10, sse4)
 
  653 mc_uni_w_funcs(qpel_v, 10, sse4)
 
  654 mc_uni_w_funcs(qpel_hv, 10, sse4)
 
  656 mc_uni_w_funcs(pel_pixels, 12, sse4)
 
  657 mc_uni_w_func(pel_pixels, 12, 6, sse4)
 
  658 mc_uni_w_funcs(epel_h, 12, sse4)
 
  659 mc_uni_w_func(epel_h, 12, 6, sse4)
 
  660 mc_uni_w_funcs(epel_v, 12, sse4)
 
  661 mc_uni_w_func(epel_v, 12, 6, sse4)
 
  662 mc_uni_w_funcs(epel_hv, 12, sse4)
 
  663 mc_uni_w_func(epel_hv, 12, 6, sse4)
 
  664 mc_uni_w_funcs(qpel_h, 12, sse4)
 
  665 mc_uni_w_funcs(qpel_v, 12, sse4)
 
  666 mc_uni_w_funcs(qpel_hv, 12, sse4)
 
  668 #define mc_bi_w_func(name, bitd, W, opt) \ 
  669 static void hevc_put_bi_w_##name##W##_##bitd##_##opt(uint8_t *_dst, ptrdiff_t _dststride,            \ 
  670                                                      const uint8_t *_src, ptrdiff_t _srcstride,      \ 
  671                                                      const int16_t *_src2,                           \ 
  672                                                      int height, int denom,                          \ 
  673                                                      int _wx0, int _wx1, int _ox0, int _ox1,         \ 
  674                                                      intptr_t mx, intptr_t my, int width)            \ 
  676     LOCAL_ALIGNED_16(int16_t, temp, [71 * MAX_PB_SIZE]);                                             \ 
  677     hevc_put_##name##W##_##bitd##_##opt(temp, _src, _srcstride, height, mx, my, width);              \ 
  678     ff_hevc_put_bi_w##W##_##bitd##_##opt(_dst, _dststride, temp, _src2,                              \ 
  679                                          height, denom, _wx0, _wx1, _ox0, _ox1);                     \ 
  682 #define mc_bi_w_funcs(name, bitd, opt)      \ 
  683         mc_bi_w_func(name, bitd, 4, opt)    \ 
  684         mc_bi_w_func(name, bitd, 8, opt)    \ 
  685         mc_bi_w_func(name, bitd, 12, opt)   \ 
  686         mc_bi_w_func(name, bitd, 16, opt)   \ 
  687         mc_bi_w_func(name, bitd, 24, opt)   \ 
  688         mc_bi_w_func(name, bitd, 32, opt)   \ 
  689         mc_bi_w_func(name, bitd, 48, opt)   \ 
  690         mc_bi_w_func(name, bitd, 64, opt) 
  692 mc_bi_w_funcs(pel_pixels, 8, sse4)
 
  693 mc_bi_w_func(pel_pixels, 8, 6, sse4)
 
  694 mc_bi_w_funcs(epel_h, 8, sse4)
 
  695 mc_bi_w_func(epel_h, 8, 6, sse4)
 
  696 mc_bi_w_funcs(epel_v, 8, sse4)
 
  697 mc_bi_w_func(epel_v, 8, 6, sse4)
 
  698 mc_bi_w_funcs(epel_hv, 8, sse4)
 
  699 mc_bi_w_func(epel_hv, 8, 6, sse4)
 
  700 mc_bi_w_funcs(qpel_h, 8, sse4)
 
  701 mc_bi_w_funcs(qpel_v, 8, sse4)
 
  702 mc_bi_w_funcs(qpel_hv, 8, sse4)
 
  704 mc_bi_w_funcs(pel_pixels, 10, sse4)
 
  705 mc_bi_w_func(pel_pixels, 10, 6, sse4)
 
  706 mc_bi_w_funcs(epel_h, 10, sse4)
 
  707 mc_bi_w_func(epel_h, 10, 6, sse4)
 
  708 mc_bi_w_funcs(epel_v, 10, sse4)
 
  709 mc_bi_w_func(epel_v, 10, 6, sse4)
 
  710 mc_bi_w_funcs(epel_hv, 10, sse4)
 
  711 mc_bi_w_func(epel_hv, 10, 6, sse4)
 
  712 mc_bi_w_funcs(qpel_h, 10, sse4)
 
  713 mc_bi_w_funcs(qpel_v, 10, sse4)
 
  714 mc_bi_w_funcs(qpel_hv, 10, sse4)
 
  716 mc_bi_w_funcs(pel_pixels, 12, sse4)
 
  717 mc_bi_w_func(pel_pixels, 12, 6, sse4)
 
  718 mc_bi_w_funcs(epel_h, 12, sse4)
 
  719 mc_bi_w_func(epel_h, 12, 6, sse4)
 
  720 mc_bi_w_funcs(epel_v, 12, sse4)
 
  721 mc_bi_w_func(epel_v, 12, 6, sse4)
 
  722 mc_bi_w_funcs(epel_hv, 12, sse4)
 
  723 mc_bi_w_func(epel_hv, 12, 6, sse4)
 
  724 mc_bi_w_funcs(qpel_h, 12, sse4)
 
  725 mc_bi_w_funcs(qpel_v, 12, sse4)
 
  726 mc_bi_w_funcs(qpel_hv, 12, sse4)
 
  727 #endif //ARCH_X86_64 && HAVE_SSE4_EXTERNAL 
  729 #define SAO_BAND_FILTER_FUNCS(bitd, opt)                                                                                   \ 
  730 void ff_hevc_sao_band_filter_8_##bitd##_##opt(uint8_t *_dst, const uint8_t *_src, ptrdiff_t _stride_dst, ptrdiff_t _stride_src,  \ 
  731                                               const int16_t *sao_offset_val, int sao_left_class, int width, int height);         \ 
  732 void ff_hevc_sao_band_filter_16_##bitd##_##opt(uint8_t *_dst, const uint8_t *_src, ptrdiff_t _stride_dst, ptrdiff_t _stride_src, \ 
  733                                                const int16_t *sao_offset_val, int sao_left_class, int width, int height);        \ 
  734 void ff_hevc_sao_band_filter_32_##bitd##_##opt(uint8_t *_dst, const uint8_t *_src, ptrdiff_t _stride_dst, ptrdiff_t _stride_src, \ 
  735                                                const int16_t *sao_offset_val, int sao_left_class, int width, int height);        \ 
  736 void ff_hevc_sao_band_filter_48_##bitd##_##opt(uint8_t *_dst, const uint8_t *_src, ptrdiff_t _stride_dst, ptrdiff_t _stride_src, \ 
  737                                                const int16_t *sao_offset_val, int sao_left_class, int width, int height);        \ 
  738 void ff_hevc_sao_band_filter_64_##bitd##_##opt(uint8_t *_dst, const uint8_t *_src, ptrdiff_t _stride_dst, ptrdiff_t _stride_src, \ 
  739                                                const int16_t *sao_offset_val, int sao_left_class, int width, int height); 
  751 #define SAO_BAND_INIT(bitd, opt) do {                                       \ 
  752     c->sao_band_filter[0]      = ff_hevc_sao_band_filter_8_##bitd##_##opt;  \ 
  753     c->sao_band_filter[1]      = ff_hevc_sao_band_filter_16_##bitd##_##opt; \ 
  754     c->sao_band_filter[2]      = ff_hevc_sao_band_filter_32_##bitd##_##opt; \ 
  755     c->sao_band_filter[3]      = ff_hevc_sao_band_filter_48_##bitd##_##opt; \ 
  756     c->sao_band_filter[4]      = ff_hevc_sao_band_filter_64_##bitd##_##opt; \ 
  759 #define SAO_EDGE_FILTER_FUNCS(bitd, opt)                                                                      \ 
  760 void ff_hevc_sao_edge_filter_8_##bitd##_##opt(uint8_t *_dst, const uint8_t *_src, ptrdiff_t stride_dst,       \ 
  761                                               const int16_t *sao_offset_val, int eo, int width, int height);  \ 
  762 void ff_hevc_sao_edge_filter_16_##bitd##_##opt(uint8_t *_dst, const uint8_t *_src, ptrdiff_t stride_dst,      \ 
  763                                                const int16_t *sao_offset_val, int eo, int width, int height); \ 
  764 void ff_hevc_sao_edge_filter_32_##bitd##_##opt(uint8_t *_dst, const uint8_t *_src, ptrdiff_t stride_dst,      \ 
  765                                                const int16_t *sao_offset_val, int eo, int width, int height); \ 
  766 void ff_hevc_sao_edge_filter_48_##bitd##_##opt(uint8_t *_dst, const uint8_t *_src, ptrdiff_t stride_dst,      \ 
  767                                                const int16_t *sao_offset_val, int eo, int width, int height); \ 
  768 void ff_hevc_sao_edge_filter_64_##bitd##_##opt(uint8_t *_dst, const uint8_t *_src, ptrdiff_t stride_dst,      \ 
  769                                                const int16_t *sao_offset_val, int eo, int width, int height); \ 
  778 #define SAO_EDGE_INIT(bitd, opt) do {                                       \ 
  779     c->sao_edge_filter[0]      = ff_hevc_sao_edge_filter_8_##bitd##_##opt;  \ 
  780     c->sao_edge_filter[1]      = ff_hevc_sao_edge_filter_16_##bitd##_##opt; \ 
  781     c->sao_edge_filter[2]      = ff_hevc_sao_edge_filter_32_##bitd##_##opt; \ 
  782     c->sao_edge_filter[3]      = ff_hevc_sao_edge_filter_48_##bitd##_##opt; \ 
  783     c->sao_edge_filter[4]      = ff_hevc_sao_edge_filter_64_##bitd##_##opt; \ 
  786 #define PEL_LINK(dst, idx1, idx2, idx3, name, D, opt) \ 
  787 dst          [idx1][idx2][idx3] = hevc_put_       ## name ## _ ## D ## _##opt; \ 
  788 dst ## _bi   [idx1][idx2][idx3] = ff_hevc_put_bi_ ## name ## _ ## D ## _##opt; \ 
  789 dst ## _uni  [idx1][idx2][idx3] = hevc_put_uni_   ## name ## _ ## D ## _##opt; \ 
  790 dst ## _uni_w[idx1][idx2][idx3] = hevc_put_uni_w_ ## name ## _ ## D ## _##opt; \ 
  791 dst ## _bi_w [idx1][idx2][idx3] = hevc_put_bi_w_  ## name ## _ ## D ## _##opt 
  793 #define EPEL_LINKS(pointer, my, mx, fname, bitd, opt )           \ 
  794         PEL_LINK(pointer, 1, my , mx , fname##4 ,  bitd, opt ); \ 
  795         PEL_LINK(pointer, 2, my , mx , fname##6 ,  bitd, opt ); \ 
  796         PEL_LINK(pointer, 3, my , mx , fname##8 ,  bitd, opt ); \ 
  797         PEL_LINK(pointer, 4, my , mx , fname##12,  bitd, opt ); \ 
  798         PEL_LINK(pointer, 5, my , mx , fname##16,  bitd, opt ); \ 
  799         PEL_LINK(pointer, 6, my , mx , fname##24,  bitd, opt ); \ 
  800         PEL_LINK(pointer, 7, my , mx , fname##32,  bitd, opt ); \ 
  801         PEL_LINK(pointer, 8, my , mx , fname##48,  bitd, opt ); \ 
  802         PEL_LINK(pointer, 9, my , mx , fname##64,  bitd, opt ) 
  803 #define QPEL_LINKS(pointer, my, mx, fname, bitd, opt)           \ 
  804         PEL_LINK(pointer, 1, my , mx , fname##4 ,  bitd, opt ); \ 
  805         PEL_LINK(pointer, 3, my , mx , fname##8 ,  bitd, opt ); \ 
  806         PEL_LINK(pointer, 4, my , mx , fname##12,  bitd, opt ); \ 
  807         PEL_LINK(pointer, 5, my , mx , fname##16,  bitd, opt ); \ 
  808         PEL_LINK(pointer, 6, my , mx , fname##24,  bitd, opt ); \ 
  809         PEL_LINK(pointer, 7, my , mx , fname##32,  bitd, opt ); \ 
  810         PEL_LINK(pointer, 8, my , mx , fname##48,  bitd, opt ); \ 
  811         PEL_LINK(pointer, 9, my , mx , fname##64,  bitd, opt ) 
  819             c->idct_dc[0] = ff_hevc_idct_4x4_dc_8_mmxext;
 
  824             c->hevc_v_loop_filter_chroma = ff_hevc_v_loop_filter_chroma_8_sse2;
 
  825             c->hevc_h_loop_filter_chroma = ff_hevc_h_loop_filter_chroma_8_sse2;
 
  827                 c->hevc_v_loop_filter_luma = ff_hevc_v_loop_filter_luma_8_sse2;
 
  828                 c->hevc_h_loop_filter_luma = ff_hevc_h_loop_filter_luma_8_sse2;
 
  830                 c->idct[2] = ff_hevc_idct_16x16_8_sse2;
 
  831                 c->idct[3] = ff_hevc_idct_32x32_8_sse2;
 
  835             c->idct_dc[1] = ff_hevc_idct_8x8_dc_8_sse2;
 
  836             c->idct_dc[2] = ff_hevc_idct_16x16_dc_8_sse2;
 
  837             c->idct_dc[3] = ff_hevc_idct_32x32_dc_8_sse2;
 
  839             c->idct[0]    = ff_hevc_idct_4x4_8_sse2;
 
  840             c->idct[1]    = ff_hevc_idct_8x8_8_sse2;
 
  848                 c->hevc_v_loop_filter_luma = ff_hevc_v_loop_filter_luma_8_ssse3;
 
  849                 c->hevc_h_loop_filter_luma = ff_hevc_h_loop_filter_luma_8_ssse3;
 
  853 #if HAVE_SSE4_EXTERNAL && ARCH_X86_64 
  856             EPEL_LINKS(
c->put_hevc_epel, 0, 0, pel_pixels,  8, sse4);
 
  857             EPEL_LINKS(
c->put_hevc_epel, 0, 1, epel_h,      8, sse4);
 
  858             EPEL_LINKS(
c->put_hevc_epel, 1, 0, epel_v,      8, sse4);
 
  859             EPEL_LINKS(
c->put_hevc_epel, 1, 1, epel_hv,     8, sse4);
 
  861             QPEL_LINKS(
c->put_hevc_qpel, 0, 0, pel_pixels, 8, sse4);
 
  862             QPEL_LINKS(
c->put_hevc_qpel, 0, 1, qpel_h,     8, sse4);
 
  863             QPEL_LINKS(
c->put_hevc_qpel, 1, 0, qpel_v,     8, sse4);
 
  864             QPEL_LINKS(
c->put_hevc_qpel, 1, 1, qpel_hv,    8, sse4);
 
  868             c->hevc_v_loop_filter_chroma = ff_hevc_v_loop_filter_chroma_8_avx;
 
  869             c->hevc_h_loop_filter_chroma = ff_hevc_h_loop_filter_chroma_8_avx;
 
  871                 c->hevc_v_loop_filter_luma = ff_hevc_v_loop_filter_luma_8_avx;
 
  872                 c->hevc_h_loop_filter_luma = ff_hevc_h_loop_filter_luma_8_avx;
 
  874                 c->idct[2] = ff_hevc_idct_16x16_8_avx;
 
  875                 c->idct[3] = ff_hevc_idct_32x32_8_avx;
 
  879             c->idct[0] = ff_hevc_idct_4x4_8_avx;
 
  880             c->idct[1] = ff_hevc_idct_8x8_8_avx;
 
  887             c->sao_band_filter[0] = ff_hevc_sao_band_filter_8_8_avx2;
 
  888             c->sao_band_filter[1] = ff_hevc_sao_band_filter_16_8_avx2;
 
  890 #if HAVE_AVX2_EXTERNAL 
  892             c->idct_dc[2] = ff_hevc_idct_16x16_dc_8_avx2;
 
  893             c->idct_dc[3] = ff_hevc_idct_32x32_dc_8_avx2;
 
  896             c->put_hevc_epel[7][0][0] = hevc_put_pel_pixels32_8_avx2;
 
  897             c->put_hevc_epel[8][0][0] = hevc_put_pel_pixels48_8_avx2;
 
  898             c->put_hevc_epel[9][0][0] = hevc_put_pel_pixels64_8_avx2;
 
  900             c->put_hevc_qpel[7][0][0] = hevc_put_pel_pixels32_8_avx2;
 
  901             c->put_hevc_qpel[8][0][0] = hevc_put_pel_pixels48_8_avx2;
 
  902             c->put_hevc_qpel[9][0][0] = hevc_put_pel_pixels64_8_avx2;
 
  904             c->put_hevc_epel_uni[7][0][0] = hevc_put_uni_pel_pixels32_8_avx2;
 
  905             c->put_hevc_epel_uni[8][0][0] = hevc_put_uni_pel_pixels48_8_avx2;
 
  906             c->put_hevc_epel_uni[9][0][0] = hevc_put_uni_pel_pixels64_8_avx2;
 
  908             c->put_hevc_qpel_uni[7][0][0] = hevc_put_uni_pel_pixels32_8_avx2;
 
  909             c->put_hevc_qpel_uni[8][0][0] = hevc_put_uni_pel_pixels48_8_avx2;
 
  910             c->put_hevc_qpel_uni[9][0][0] = hevc_put_uni_pel_pixels64_8_avx2;
 
  912             c->put_hevc_qpel_bi[7][0][0] = ff_hevc_put_bi_pel_pixels32_8_avx2;
 
  913             c->put_hevc_qpel_bi[8][0][0] = ff_hevc_put_bi_pel_pixels48_8_avx2;
 
  914             c->put_hevc_qpel_bi[9][0][0] = ff_hevc_put_bi_pel_pixels64_8_avx2;
 
  916             c->put_hevc_epel_bi[7][0][0] = ff_hevc_put_bi_pel_pixels32_8_avx2;
 
  917             c->put_hevc_epel_bi[8][0][0] = ff_hevc_put_bi_pel_pixels48_8_avx2;
 
  918             c->put_hevc_epel_bi[9][0][0] = ff_hevc_put_bi_pel_pixels64_8_avx2;
 
  920             c->put_hevc_epel[7][0][1] = hevc_put_epel_h32_8_avx2;
 
  921             c->put_hevc_epel[8][0][1] = hevc_put_epel_h48_8_avx2;
 
  922             c->put_hevc_epel[9][0][1] = hevc_put_epel_h64_8_avx2;
 
  924             c->put_hevc_epel_uni[7][0][1] = hevc_put_uni_epel_h32_8_avx2;
 
  925             c->put_hevc_epel_uni[8][0][1] = hevc_put_uni_epel_h48_8_avx2;
 
  926             c->put_hevc_epel_uni[9][0][1] = hevc_put_uni_epel_h64_8_avx2;
 
  928             c->put_hevc_epel_bi[7][0][1] = ff_hevc_put_bi_epel_h32_8_avx2;
 
  929             c->put_hevc_epel_bi[8][0][1] = ff_hevc_put_bi_epel_h48_8_avx2;
 
  930             c->put_hevc_epel_bi[9][0][1] = ff_hevc_put_bi_epel_h64_8_avx2;
 
  932             c->put_hevc_epel[7][1][0] = hevc_put_epel_v32_8_avx2;
 
  933             c->put_hevc_epel[8][1][0] = hevc_put_epel_v48_8_avx2;
 
  934             c->put_hevc_epel[9][1][0] = hevc_put_epel_v64_8_avx2;
 
  936             c->put_hevc_epel_uni[7][1][0] = hevc_put_uni_epel_v32_8_avx2;
 
  937             c->put_hevc_epel_uni[8][1][0] = hevc_put_uni_epel_v48_8_avx2;
 
  938             c->put_hevc_epel_uni[9][1][0] = hevc_put_uni_epel_v64_8_avx2;
 
  940             c->put_hevc_epel_bi[7][1][0] = ff_hevc_put_bi_epel_v32_8_avx2;
 
  941             c->put_hevc_epel_bi[8][1][0] = ff_hevc_put_bi_epel_v48_8_avx2;
 
  942             c->put_hevc_epel_bi[9][1][0] = ff_hevc_put_bi_epel_v64_8_avx2;
 
  944             c->put_hevc_epel[7][1][1] = hevc_put_epel_hv32_8_avx2;
 
  945             c->put_hevc_epel[8][1][1] = hevc_put_epel_hv48_8_avx2;
 
  946             c->put_hevc_epel[9][1][1] = hevc_put_epel_hv64_8_avx2;
 
  948             c->put_hevc_epel_uni[7][1][1] = hevc_put_uni_epel_hv32_8_avx2;
 
  949             c->put_hevc_epel_uni[8][1][1] = hevc_put_uni_epel_hv48_8_avx2;
 
  950             c->put_hevc_epel_uni[9][1][1] = hevc_put_uni_epel_hv64_8_avx2;
 
  952             c->put_hevc_epel_bi[7][1][1] = ff_hevc_put_bi_epel_hv32_8_avx2;
 
  953             c->put_hevc_epel_bi[8][1][1] = ff_hevc_put_bi_epel_hv48_8_avx2;
 
  954             c->put_hevc_epel_bi[9][1][1] = ff_hevc_put_bi_epel_hv64_8_avx2;
 
  956             c->put_hevc_qpel[7][0][1] = hevc_put_qpel_h32_8_avx2;
 
  957             c->put_hevc_qpel[8][0][1] = hevc_put_qpel_h48_8_avx2;
 
  958             c->put_hevc_qpel[9][0][1] = hevc_put_qpel_h64_8_avx2;
 
  960             c->put_hevc_qpel[7][1][0] = hevc_put_qpel_v32_8_avx2;
 
  961             c->put_hevc_qpel[8][1][0] = hevc_put_qpel_v48_8_avx2;
 
  962             c->put_hevc_qpel[9][1][0] = hevc_put_qpel_v64_8_avx2;
 
  964             c->put_hevc_qpel_uni[7][0][1] = hevc_put_uni_qpel_h32_8_avx2;
 
  965             c->put_hevc_qpel_uni[8][0][1] = hevc_put_uni_qpel_h48_8_avx2;
 
  966             c->put_hevc_qpel_uni[9][0][1] = hevc_put_uni_qpel_h64_8_avx2;
 
  968             c->put_hevc_qpel_uni[7][1][0] = hevc_put_uni_qpel_v32_8_avx2;
 
  969             c->put_hevc_qpel_uni[8][1][0] = hevc_put_uni_qpel_v48_8_avx2;
 
  970             c->put_hevc_qpel_uni[9][1][0] = hevc_put_uni_qpel_v64_8_avx2;
 
  972             c->put_hevc_qpel_bi[7][0][1] = ff_hevc_put_bi_qpel_h32_8_avx2;
 
  973             c->put_hevc_qpel_bi[8][0][1] = ff_hevc_put_bi_qpel_h48_8_avx2;
 
  974             c->put_hevc_qpel_bi[9][0][1] = ff_hevc_put_bi_qpel_h64_8_avx2;
 
  976             c->put_hevc_qpel_bi[7][1][0] = ff_hevc_put_bi_qpel_v32_8_avx2;
 
  977             c->put_hevc_qpel_bi[8][1][0] = ff_hevc_put_bi_qpel_v48_8_avx2;
 
  978             c->put_hevc_qpel_bi[9][1][0] = ff_hevc_put_bi_qpel_v64_8_avx2;
 
  983             c->sao_edge_filter[2] = ff_hevc_sao_edge_filter_32_8_avx2;
 
  984             c->sao_edge_filter[3] = ff_hevc_sao_edge_filter_48_8_avx2;
 
  985             c->sao_edge_filter[4] = ff_hevc_sao_edge_filter_64_8_avx2;
 
 1001             c->idct_dc[0] = ff_hevc_idct_4x4_dc_10_mmxext;
 
 1004             c->hevc_v_loop_filter_chroma = ff_hevc_v_loop_filter_chroma_10_sse2;
 
 1005             c->hevc_h_loop_filter_chroma = ff_hevc_h_loop_filter_chroma_10_sse2;
 
 1007                 c->hevc_v_loop_filter_luma = ff_hevc_v_loop_filter_luma_10_sse2;
 
 1008                 c->hevc_h_loop_filter_luma = ff_hevc_h_loop_filter_luma_10_sse2;
 
 1010                 c->idct[2] = ff_hevc_idct_16x16_10_sse2;
 
 1011                 c->idct[3] = ff_hevc_idct_32x32_10_sse2;
 
 1016             c->idct_dc[1] = ff_hevc_idct_8x8_dc_10_sse2;
 
 1017             c->idct_dc[2] = ff_hevc_idct_16x16_dc_10_sse2;
 
 1018             c->idct_dc[3] = ff_hevc_idct_32x32_dc_10_sse2;
 
 1020             c->idct[0]    = ff_hevc_idct_4x4_10_sse2;
 
 1021             c->idct[1]    = ff_hevc_idct_8x8_10_sse2;
 
 1028             c->hevc_v_loop_filter_luma = ff_hevc_v_loop_filter_luma_10_ssse3;
 
 1029             c->hevc_h_loop_filter_luma = ff_hevc_h_loop_filter_luma_10_ssse3;
 
 1031 #if HAVE_SSE4_EXTERNAL && ARCH_X86_64 
 1033             EPEL_LINKS(
c->put_hevc_epel, 0, 0, pel_pixels, 10, sse4);
 
 1034             EPEL_LINKS(
c->put_hevc_epel, 0, 1, epel_h,     10, sse4);
 
 1035             EPEL_LINKS(
c->put_hevc_epel, 1, 0, epel_v,     10, sse4);
 
 1036             EPEL_LINKS(
c->put_hevc_epel, 1, 1, epel_hv,    10, sse4);
 
 1038             QPEL_LINKS(
c->put_hevc_qpel, 0, 0, pel_pixels, 10, sse4);
 
 1039             QPEL_LINKS(
c->put_hevc_qpel, 0, 1, qpel_h,     10, sse4);
 
 1040             QPEL_LINKS(
c->put_hevc_qpel, 1, 0, qpel_v,     10, sse4);
 
 1041             QPEL_LINKS(
c->put_hevc_qpel, 1, 1, qpel_hv,    10, sse4);
 
 1045             c->hevc_v_loop_filter_chroma = ff_hevc_v_loop_filter_chroma_10_avx;
 
 1046             c->hevc_h_loop_filter_chroma = ff_hevc_h_loop_filter_chroma_10_avx;
 
 1048                 c->hevc_v_loop_filter_luma = ff_hevc_v_loop_filter_luma_10_avx;
 
 1049                 c->hevc_h_loop_filter_luma = ff_hevc_h_loop_filter_luma_10_avx;
 
 1051                 c->idct[2] = ff_hevc_idct_16x16_10_avx;
 
 1052                 c->idct[3] = ff_hevc_idct_32x32_10_avx;
 
 1055             c->idct[0] = ff_hevc_idct_4x4_10_avx;
 
 1056             c->idct[1] = ff_hevc_idct_8x8_10_avx;
 
 1061             c->sao_band_filter[0] = ff_hevc_sao_band_filter_8_10_avx2;
 
 1063 #if HAVE_AVX2_EXTERNAL 
 1065             c->idct_dc[2] = ff_hevc_idct_16x16_dc_10_avx2;
 
 1066             c->idct_dc[3] = ff_hevc_idct_32x32_dc_10_avx2;
 
 1069             c->put_hevc_epel[5][0][0] = hevc_put_pel_pixels16_10_avx2;
 
 1070             c->put_hevc_epel[6][0][0] = hevc_put_pel_pixels24_10_avx2;
 
 1071             c->put_hevc_epel[7][0][0] = hevc_put_pel_pixels32_10_avx2;
 
 1072             c->put_hevc_epel[8][0][0] = hevc_put_pel_pixels48_10_avx2;
 
 1073             c->put_hevc_epel[9][0][0] = hevc_put_pel_pixels64_10_avx2;
 
 1075             c->put_hevc_qpel[5][0][0] = hevc_put_pel_pixels16_10_avx2;
 
 1076             c->put_hevc_qpel[6][0][0] = hevc_put_pel_pixels24_10_avx2;
 
 1077             c->put_hevc_qpel[7][0][0] = hevc_put_pel_pixels32_10_avx2;
 
 1078             c->put_hevc_qpel[8][0][0] = hevc_put_pel_pixels48_10_avx2;
 
 1079             c->put_hevc_qpel[9][0][0] = hevc_put_pel_pixels64_10_avx2;
 
 1081             c->put_hevc_epel_uni[5][0][0] = hevc_put_uni_pel_pixels32_8_avx2;
 
 1082             c->put_hevc_epel_uni[6][0][0] = hevc_put_uni_pel_pixels48_8_avx2;
 
 1083             c->put_hevc_epel_uni[7][0][0] = hevc_put_uni_pel_pixels64_8_avx2;
 
 1084             c->put_hevc_epel_uni[8][0][0] = hevc_put_uni_pel_pixels96_8_avx2;
 
 1085             c->put_hevc_epel_uni[9][0][0] = hevc_put_uni_pel_pixels128_8_avx2;
 
 1087             c->put_hevc_qpel_uni[5][0][0] = hevc_put_uni_pel_pixels32_8_avx2;
 
 1088             c->put_hevc_qpel_uni[6][0][0] = hevc_put_uni_pel_pixels48_8_avx2;
 
 1089             c->put_hevc_qpel_uni[7][0][0] = hevc_put_uni_pel_pixels64_8_avx2;
 
 1090             c->put_hevc_qpel_uni[8][0][0] = hevc_put_uni_pel_pixels96_8_avx2;
 
 1091             c->put_hevc_qpel_uni[9][0][0] = hevc_put_uni_pel_pixels128_8_avx2;
 
 1093             c->put_hevc_epel_bi[5][0][0] = ff_hevc_put_bi_pel_pixels16_10_avx2;
 
 1094             c->put_hevc_epel_bi[6][0][0] = ff_hevc_put_bi_pel_pixels24_10_avx2;
 
 1095             c->put_hevc_epel_bi[7][0][0] = ff_hevc_put_bi_pel_pixels32_10_avx2;
 
 1096             c->put_hevc_epel_bi[8][0][0] = ff_hevc_put_bi_pel_pixels48_10_avx2;
 
 1097             c->put_hevc_epel_bi[9][0][0] = ff_hevc_put_bi_pel_pixels64_10_avx2;
 
 1098             c->put_hevc_qpel_bi[5][0][0] = ff_hevc_put_bi_pel_pixels16_10_avx2;
 
 1099             c->put_hevc_qpel_bi[6][0][0] = ff_hevc_put_bi_pel_pixels24_10_avx2;
 
 1100             c->put_hevc_qpel_bi[7][0][0] = ff_hevc_put_bi_pel_pixels32_10_avx2;
 
 1101             c->put_hevc_qpel_bi[8][0][0] = ff_hevc_put_bi_pel_pixels48_10_avx2;
 
 1102             c->put_hevc_qpel_bi[9][0][0] = ff_hevc_put_bi_pel_pixels64_10_avx2;
 
 1104             c->put_hevc_epel[5][0][1] = hevc_put_epel_h16_10_avx2;
 
 1105             c->put_hevc_epel[6][0][1] = hevc_put_epel_h24_10_avx2;
 
 1106             c->put_hevc_epel[7][0][1] = hevc_put_epel_h32_10_avx2;
 
 1107             c->put_hevc_epel[8][0][1] = hevc_put_epel_h48_10_avx2;
 
 1108             c->put_hevc_epel[9][0][1] = hevc_put_epel_h64_10_avx2;
 
 1110             c->put_hevc_epel_uni[5][0][1] = hevc_put_uni_epel_h16_10_avx2;
 
 1111             c->put_hevc_epel_uni[6][0][1] = hevc_put_uni_epel_h24_10_avx2;
 
 1112             c->put_hevc_epel_uni[7][0][1] = hevc_put_uni_epel_h32_10_avx2;
 
 1113             c->put_hevc_epel_uni[8][0][1] = hevc_put_uni_epel_h48_10_avx2;
 
 1114             c->put_hevc_epel_uni[9][0][1] = hevc_put_uni_epel_h64_10_avx2;
 
 1116             c->put_hevc_epel_bi[5][0][1] = ff_hevc_put_bi_epel_h16_10_avx2;
 
 1117             c->put_hevc_epel_bi[6][0][1] = ff_hevc_put_bi_epel_h24_10_avx2;
 
 1118             c->put_hevc_epel_bi[7][0][1] = ff_hevc_put_bi_epel_h32_10_avx2;
 
 1119             c->put_hevc_epel_bi[8][0][1] = ff_hevc_put_bi_epel_h48_10_avx2;
 
 1120             c->put_hevc_epel_bi[9][0][1] = ff_hevc_put_bi_epel_h64_10_avx2;
 
 1122             c->put_hevc_epel[5][1][0] = hevc_put_epel_v16_10_avx2;
 
 1123             c->put_hevc_epel[6][1][0] = hevc_put_epel_v24_10_avx2;
 
 1124             c->put_hevc_epel[7][1][0] = hevc_put_epel_v32_10_avx2;
 
 1125             c->put_hevc_epel[8][1][0] = hevc_put_epel_v48_10_avx2;
 
 1126             c->put_hevc_epel[9][1][0] = hevc_put_epel_v64_10_avx2;
 
 1128             c->put_hevc_epel_uni[5][1][0] = hevc_put_uni_epel_v16_10_avx2;
 
 1129             c->put_hevc_epel_uni[6][1][0] = hevc_put_uni_epel_v24_10_avx2;
 
 1130             c->put_hevc_epel_uni[7][1][0] = hevc_put_uni_epel_v32_10_avx2;
 
 1131             c->put_hevc_epel_uni[8][1][0] = hevc_put_uni_epel_v48_10_avx2;
 
 1132             c->put_hevc_epel_uni[9][1][0] = hevc_put_uni_epel_v64_10_avx2;
 
 1134             c->put_hevc_epel_bi[5][1][0] = ff_hevc_put_bi_epel_v16_10_avx2;
 
 1135             c->put_hevc_epel_bi[6][1][0] = ff_hevc_put_bi_epel_v24_10_avx2;
 
 1136             c->put_hevc_epel_bi[7][1][0] = ff_hevc_put_bi_epel_v32_10_avx2;
 
 1137             c->put_hevc_epel_bi[8][1][0] = ff_hevc_put_bi_epel_v48_10_avx2;
 
 1138             c->put_hevc_epel_bi[9][1][0] = ff_hevc_put_bi_epel_v64_10_avx2;
 
 1140             c->put_hevc_epel[5][1][1] = hevc_put_epel_hv16_10_avx2;
 
 1141             c->put_hevc_epel[6][1][1] = hevc_put_epel_hv24_10_avx2;
 
 1142             c->put_hevc_epel[7][1][1] = hevc_put_epel_hv32_10_avx2;
 
 1143             c->put_hevc_epel[8][1][1] = hevc_put_epel_hv48_10_avx2;
 
 1144             c->put_hevc_epel[9][1][1] = hevc_put_epel_hv64_10_avx2;
 
 1146             c->put_hevc_epel_uni[5][1][1] = hevc_put_uni_epel_hv16_10_avx2;
 
 1147             c->put_hevc_epel_uni[6][1][1] = hevc_put_uni_epel_hv24_10_avx2;
 
 1148             c->put_hevc_epel_uni[7][1][1] = hevc_put_uni_epel_hv32_10_avx2;
 
 1149             c->put_hevc_epel_uni[8][1][1] = hevc_put_uni_epel_hv48_10_avx2;
 
 1150             c->put_hevc_epel_uni[9][1][1] = hevc_put_uni_epel_hv64_10_avx2;
 
 1152             c->put_hevc_epel_bi[5][1][1] = ff_hevc_put_bi_epel_hv16_10_avx2;
 
 1153             c->put_hevc_epel_bi[6][1][1] = ff_hevc_put_bi_epel_hv24_10_avx2;
 
 1154             c->put_hevc_epel_bi[7][1][1] = ff_hevc_put_bi_epel_hv32_10_avx2;
 
 1155             c->put_hevc_epel_bi[8][1][1] = ff_hevc_put_bi_epel_hv48_10_avx2;
 
 1156             c->put_hevc_epel_bi[9][1][1] = ff_hevc_put_bi_epel_hv64_10_avx2;
 
 1158             c->put_hevc_qpel[5][0][1] = hevc_put_qpel_h16_10_avx2;
 
 1159             c->put_hevc_qpel[6][0][1] = hevc_put_qpel_h24_10_avx2;
 
 1160             c->put_hevc_qpel[7][0][1] = hevc_put_qpel_h32_10_avx2;
 
 1161             c->put_hevc_qpel[8][0][1] = hevc_put_qpel_h48_10_avx2;
 
 1162             c->put_hevc_qpel[9][0][1] = hevc_put_qpel_h64_10_avx2;
 
 1164             c->put_hevc_qpel_uni[5][0][1] = hevc_put_uni_qpel_h16_10_avx2;
 
 1165             c->put_hevc_qpel_uni[6][0][1] = hevc_put_uni_qpel_h24_10_avx2;
 
 1166             c->put_hevc_qpel_uni[7][0][1] = hevc_put_uni_qpel_h32_10_avx2;
 
 1167             c->put_hevc_qpel_uni[8][0][1] = hevc_put_uni_qpel_h48_10_avx2;
 
 1168             c->put_hevc_qpel_uni[9][0][1] = hevc_put_uni_qpel_h64_10_avx2;
 
 1170             c->put_hevc_qpel_bi[5][0][1] = ff_hevc_put_bi_qpel_h16_10_avx2;
 
 1171             c->put_hevc_qpel_bi[6][0][1] = ff_hevc_put_bi_qpel_h24_10_avx2;
 
 1172             c->put_hevc_qpel_bi[7][0][1] = ff_hevc_put_bi_qpel_h32_10_avx2;
 
 1173             c->put_hevc_qpel_bi[8][0][1] = ff_hevc_put_bi_qpel_h48_10_avx2;
 
 1174             c->put_hevc_qpel_bi[9][0][1] = ff_hevc_put_bi_qpel_h64_10_avx2;
 
 1176             c->put_hevc_qpel[5][1][0] = hevc_put_qpel_v16_10_avx2;
 
 1177             c->put_hevc_qpel[6][1][0] = hevc_put_qpel_v24_10_avx2;
 
 1178             c->put_hevc_qpel[7][1][0] = hevc_put_qpel_v32_10_avx2;
 
 1179             c->put_hevc_qpel[8][1][0] = hevc_put_qpel_v48_10_avx2;
 
 1180             c->put_hevc_qpel[9][1][0] = hevc_put_qpel_v64_10_avx2;
 
 1182             c->put_hevc_qpel_uni[5][1][0] = hevc_put_uni_qpel_v16_10_avx2;
 
 1183             c->put_hevc_qpel_uni[6][1][0] = hevc_put_uni_qpel_v24_10_avx2;
 
 1184             c->put_hevc_qpel_uni[7][1][0] = hevc_put_uni_qpel_v32_10_avx2;
 
 1185             c->put_hevc_qpel_uni[8][1][0] = hevc_put_uni_qpel_v48_10_avx2;
 
 1186             c->put_hevc_qpel_uni[9][1][0] = hevc_put_uni_qpel_v64_10_avx2;
 
 1188             c->put_hevc_qpel_bi[5][1][0] = ff_hevc_put_bi_qpel_v16_10_avx2;
 
 1189             c->put_hevc_qpel_bi[6][1][0] = ff_hevc_put_bi_qpel_v24_10_avx2;
 
 1190             c->put_hevc_qpel_bi[7][1][0] = ff_hevc_put_bi_qpel_v32_10_avx2;
 
 1191             c->put_hevc_qpel_bi[8][1][0] = ff_hevc_put_bi_qpel_v48_10_avx2;
 
 1192             c->put_hevc_qpel_bi[9][1][0] = ff_hevc_put_bi_qpel_v64_10_avx2;
 
 1194             c->put_hevc_qpel[5][1][1] = hevc_put_qpel_hv16_10_avx2;
 
 1195             c->put_hevc_qpel[6][1][1] = hevc_put_qpel_hv24_10_avx2;
 
 1196             c->put_hevc_qpel[7][1][1] = hevc_put_qpel_hv32_10_avx2;
 
 1197             c->put_hevc_qpel[8][1][1] = hevc_put_qpel_hv48_10_avx2;
 
 1198             c->put_hevc_qpel[9][1][1] = hevc_put_qpel_hv64_10_avx2;
 
 1200             c->put_hevc_qpel_uni[5][1][1] = hevc_put_uni_qpel_hv16_10_avx2;
 
 1201             c->put_hevc_qpel_uni[6][1][1] = hevc_put_uni_qpel_hv24_10_avx2;
 
 1202             c->put_hevc_qpel_uni[7][1][1] = hevc_put_uni_qpel_hv32_10_avx2;
 
 1203             c->put_hevc_qpel_uni[8][1][1] = hevc_put_uni_qpel_hv48_10_avx2;
 
 1204             c->put_hevc_qpel_uni[9][1][1] = hevc_put_uni_qpel_hv64_10_avx2;
 
 1206             c->put_hevc_qpel_bi[5][1][1] = ff_hevc_put_bi_qpel_hv16_10_avx2;
 
 1207             c->put_hevc_qpel_bi[6][1][1] = ff_hevc_put_bi_qpel_hv24_10_avx2;
 
 1208             c->put_hevc_qpel_bi[7][1][1] = ff_hevc_put_bi_qpel_hv32_10_avx2;
 
 1209             c->put_hevc_qpel_bi[8][1][1] = ff_hevc_put_bi_qpel_hv48_10_avx2;
 
 1210             c->put_hevc_qpel_bi[9][1][1] = ff_hevc_put_bi_qpel_hv64_10_avx2;
 
 1222             c->idct_dc[0] = ff_hevc_idct_4x4_dc_12_mmxext;
 
 1225             c->hevc_v_loop_filter_chroma = ff_hevc_v_loop_filter_chroma_12_sse2;
 
 1226             c->hevc_h_loop_filter_chroma = ff_hevc_h_loop_filter_chroma_12_sse2;
 
 1228                 c->hevc_v_loop_filter_luma = ff_hevc_v_loop_filter_luma_12_sse2;
 
 1229                 c->hevc_h_loop_filter_luma = ff_hevc_h_loop_filter_luma_12_sse2;
 
 1234             c->idct_dc[1] = ff_hevc_idct_8x8_dc_12_sse2;
 
 1235             c->idct_dc[2] = ff_hevc_idct_16x16_dc_12_sse2;
 
 1236             c->idct_dc[3] = ff_hevc_idct_32x32_dc_12_sse2;
 
 1239             c->hevc_v_loop_filter_luma = ff_hevc_v_loop_filter_luma_12_ssse3;
 
 1240             c->hevc_h_loop_filter_luma = ff_hevc_h_loop_filter_luma_12_ssse3;
 
 1242 #if HAVE_SSE4_EXTERNAL && ARCH_X86_64 
 1244             EPEL_LINKS(
c->put_hevc_epel, 0, 0, pel_pixels, 12, sse4);
 
 1245             EPEL_LINKS(
c->put_hevc_epel, 0, 1, epel_h,     12, sse4);
 
 1246             EPEL_LINKS(
c->put_hevc_epel, 1, 0, epel_v,     12, sse4);
 
 1247             EPEL_LINKS(
c->put_hevc_epel, 1, 1, epel_hv,    12, sse4);
 
 1249             QPEL_LINKS(
c->put_hevc_qpel, 0, 0, pel_pixels, 12, sse4);
 
 1250             QPEL_LINKS(
c->put_hevc_qpel, 0, 1, qpel_h,     12, sse4);
 
 1251             QPEL_LINKS(
c->put_hevc_qpel, 1, 0, qpel_v,     12, sse4);
 
 1252             QPEL_LINKS(
c->put_hevc_qpel, 1, 1, qpel_hv,    12, sse4);
 
 1256             c->hevc_v_loop_filter_chroma = ff_hevc_v_loop_filter_chroma_12_avx;
 
 1257             c->hevc_h_loop_filter_chroma = ff_hevc_h_loop_filter_chroma_12_avx;
 
 1259                 c->hevc_v_loop_filter_luma = ff_hevc_v_loop_filter_luma_12_avx;
 
 1260                 c->hevc_h_loop_filter_luma = ff_hevc_h_loop_filter_luma_12_avx;
 
 1265             c->sao_band_filter[0] = ff_hevc_sao_band_filter_8_12_avx2;
 
 1268             c->idct_dc[2] = ff_hevc_idct_16x16_dc_12_avx2;
 
 1269             c->idct_dc[3] = ff_hevc_idct_32x32_dc_12_avx2;