44     int filter_height_down = (raw_my & 3) ? 3 : 0;
 
   45     int full_my            = (raw_my >> 2) + y_offset;
 
   46     int bottom             = full_my + filter_height_down + 
height;
 
   50     return FFMAX(0, bottom);
 
   54                                      int16_t refs[2][48], 
int n,
 
   55                                      int height, 
int y_offset, 
int list0,
 
   56                                      int list1, 
int *nrefs)
 
   69         if (
ref->parent->tf.progress != 
h->cur_pic.tf.progress ||
 
   70             (
ref->reference & 3) != 
h->picture_structure) {
 
   72             if (refs[0][ref_n] < 0)
 
   74             refs[0][ref_n] = 
FFMAX(refs[0][ref_n], my);
 
   82         if (
ref->parent->tf.progress != 
h->cur_pic.tf.progress ||
 
   83             (
ref->reference & 3) != 
h->picture_structure) {
 
   85             if (refs[1][ref_n] < 0)
 
   87             refs[1][ref_n] = 
FFMAX(refs[1][ref_n], my);
 
   99     const int mb_xy   = sl->
mb_xy;
 
  100     const int mb_type = 
h->cur_pic.mb_type[mb_xy];
 
  102     int nrefs[2] = { 0 };
 
  105     memset(refs, -1, 
sizeof(refs));
 
  125         for (
i = 0; 
i < 4; 
i++) {
 
  128             int y_offset          = (
i & 2) << 2;
 
  132                                   IS_DIR(sub_mb_type, 0, 0),
 
  133                                   IS_DIR(sub_mb_type, 0, 1),
 
  137                                   IS_DIR(sub_mb_type, 0, 0),
 
  138                                   IS_DIR(sub_mb_type, 0, 1),
 
  141                                   IS_DIR(sub_mb_type, 0, 0),
 
  142                                   IS_DIR(sub_mb_type, 0, 1),
 
  146                                   IS_DIR(sub_mb_type, 0, 0),
 
  147                                   IS_DIR(sub_mb_type, 0, 1),
 
  150                                   IS_DIR(sub_mb_type, 0, 0),
 
  151                                   IS_DIR(sub_mb_type, 0, 1),
 
  156                 for (j = 0; j < 4; j++) {
 
  157                     int sub_y_offset = y_offset + 2 * (j & 2);
 
  159                                       IS_DIR(sub_mb_type, 0, 0),
 
  160                                       IS_DIR(sub_mb_type, 0, 1),
 
  174                 int pic_height        = 16 * 
h->mb_height >> ref_field_picture;
 
  182                                              FFMIN((row >> 1) - !(row & 1),
 
  186                                              FFMIN((row >> 1), pic_height - 1),
 
  190                                              FFMIN(row * 2 + ref_field,
 
  195                                              FFMIN(row, pic_height - 1),
 
  199                                              FFMIN(row, pic_height - 1),
 
  210                                          uint8_t *dest_y, uint8_t *dest_cb,
 
  212                                          int src_x_offset, 
int src_y_offset,
 
  215                                          int pixel_shift, 
int chroma_idc)
 
  219     const int luma_xy = (mx & 3) + ((my & 3) << 2);
 
  222     uint8_t *src_cb, *src_cr;
 
  224     int extra_height = 0;
 
  226     const int full_mx    = mx >> 2;
 
  227     const int full_my    = my >> 2;
 
  228     const int pic_width  = 16 * 
h->mb_width;
 
  229     const int pic_height = 16 * 
h->mb_height >> 
MB_FIELD(sl);
 
  237     if (full_mx                <          0 - extra_width  ||
 
  238         full_my                <          0 - extra_height ||
 
  239         full_mx + 16  > pic_width  + extra_width  ||
 
  240         full_my + 16  > pic_height + extra_height) {
 
  244                                  16 + 5, 16 + 5 , full_mx - 2,
 
  245                                  full_my - 2, pic_width, pic_height);
 
  257     if (chroma_idc == 3 ) {
 
  264                                      full_mx - 2, full_my - 2,
 
  265                                      pic_width, pic_height);
 
  268         qpix_op[luma_xy](dest_cb, src_cb, sl->
mb_linesize); 
 
  278                                      full_mx - 2, full_my - 2,
 
  279                                      pic_width, pic_height);
 
  282         qpix_op[luma_xy](dest_cr, src_cr, sl->
mb_linesize); 
 
  288     ysh = 3 - (chroma_idc == 2 );
 
  289     if (chroma_idc == 1  && 
MB_FIELD(sl)) {
 
  292         emu |= (my >> 3) < 0 || (my >> 3) + 8 >= (pic_height >> 1);
 
  295     src_cb = pic->
data[1] + ((mx >> 3) * (1 << pixel_shift)) +
 
  297     src_cr = pic->
data[2] + ((mx >> 3) * (1 << pixel_shift)) +
 
  303                                  9, 8 * chroma_idc + 1, (mx >> 3), (my >> ysh),
 
  304                                  pic_width >> 1, pic_height >> (chroma_idc == 1 ));
 
  308               height >> (chroma_idc == 1 ),
 
  309               mx & 7, ((
unsigned)my << (chroma_idc == 2 )) & 7);
 
  314                                  9, 8 * chroma_idc + 1, (mx >> 3), (my >> ysh),
 
  315                                  pic_width >> 1, pic_height >> (chroma_idc == 1 ));
 
  319               mx & 7, ((
unsigned)my << (chroma_idc == 2 )) & 7);
 
  325                                          uint8_t *dest_y, uint8_t *dest_cb,
 
  327                                          int x_offset, 
int y_offset,
 
  332                                          int list0, 
int list1,
 
  333                                          int pixel_shift, 
int chroma_idc)
 
  338     dest_y += (2 * x_offset << pixel_shift) + 2 * y_offset * sl->
mb_linesize;
 
  339     if (chroma_idc == 3 ) {
 
  340         dest_cb += (2 * x_offset << pixel_shift) + 2 * y_offset * sl->
mb_linesize;
 
  341         dest_cr += (2 * x_offset << pixel_shift) + 2 * y_offset * sl->
mb_linesize;
 
  342     } 
else if (chroma_idc == 2 ) {
 
  343         dest_cb += (x_offset << pixel_shift) + 2 * y_offset * sl->
mb_uvlinesize;
 
  344         dest_cr += (x_offset << pixel_shift) + 2 * y_offset * sl->
mb_uvlinesize;
 
  346         dest_cb += (x_offset << pixel_shift) + y_offset * sl->
mb_uvlinesize;
 
  347         dest_cr += (x_offset << pixel_shift) + y_offset * sl->
mb_uvlinesize;
 
  349     x_offset += 8 * sl->
mb_x;
 
  355                     dest_y, dest_cb, dest_cr, x_offset, y_offset,
 
  356                     qpix_op, chroma_op, pixel_shift, chroma_idc);
 
  359         chroma_op = chroma_avg;
 
  365                     dest_y, dest_cb, dest_cr, x_offset, y_offset,
 
  366                     qpix_op, chroma_op, pixel_shift, chroma_idc);
 
  373                                               uint8_t *dest_y, uint8_t *dest_cb,
 
  375                                               int x_offset, 
int y_offset,
 
  382                                               int list0, 
int list1,
 
  383                                               int pixel_shift, 
int chroma_idc)
 
  387     dest_y += (2 * x_offset << pixel_shift) + 2 * y_offset * sl->
mb_linesize;
 
  388     if (chroma_idc == 3 ) {
 
  390         chroma_weight_avg = luma_weight_avg;
 
  391         chroma_weight_op  = luma_weight_op;
 
  392         dest_cb += (2 * x_offset << pixel_shift) + 2 * y_offset * sl->
mb_linesize;
 
  393         dest_cr += (2 * x_offset << pixel_shift) + 2 * y_offset * sl->
mb_linesize;
 
  394     } 
else if (chroma_idc == 2 ) {
 
  396         dest_cb      += (x_offset << pixel_shift) + 2 * y_offset * sl->
mb_uvlinesize;
 
  397         dest_cr      += (x_offset << pixel_shift) + 2 * y_offset * sl->
mb_uvlinesize;
 
  399         chroma_height = 
height >> 1;
 
  400         dest_cb      += (x_offset << pixel_shift) + y_offset * sl->
mb_uvlinesize;
 
  401         dest_cr      += (x_offset << pixel_shift) + y_offset * sl->
mb_uvlinesize;
 
  403     x_offset += 8 * sl->
mb_x;
 
  406     if (list0 && list1) {
 
  416                     dest_y, dest_cb, dest_cr,
 
  417                     x_offset, y_offset, qpix_put, chroma_put,
 
  418                     pixel_shift, chroma_idc);
 
  420                     tmp_y, tmp_cb, tmp_cr,
 
  421                     x_offset, y_offset, qpix_put, chroma_put,
 
  422                     pixel_shift, chroma_idc);
 
  426             int weight1 = 64 - weight0;
 
  428                             height, 5, weight0, weight1, 0);
 
  431                                   chroma_height, 5, weight0, weight1, 0);
 
  433                                   chroma_height, 5, weight0, weight1, 0);
 
  443                 chroma_weight_avg(dest_cb, tmp_cb, sl->
mb_uvlinesize, chroma_height,
 
  449                 chroma_weight_avg(dest_cr, tmp_cr, sl->
mb_uvlinesize, chroma_height,
 
  458         int list     = list1 ? 1 : 0;
 
  462                     dest_y, dest_cb, dest_cr, x_offset, y_offset,
 
  463                     qpix_put, chroma_put, pixel_shift, chroma_idc);
 
  485                                              int list, 
int pixel_shift,
 
  495         int off       =  mx * (1<< pixel_shift) +
 
  499         if (chroma_idc == 3 ) {
 
  503             off= ((mx>>1)+64) * (1<<pixel_shift) + ((my>>1) + (sl->
mb_x&7))*sl->
uvlinesize;
 
  504             h->vdsp.prefetch(
src[1] + off, 
src[2] - 
src[1], 2);
 
  511                                             uint8_t *src_cb, uint8_t *src_cr,
 
  512                                             int linesize, 
int uvlinesize,
 
  513                                             int xchg, 
int chroma444,
 
  514                                             int simple, 
int pixel_shift)
 
  519     uint8_t *top_border_m1;
 
  532         deblock_topleft = 
h->slice_table[sl->
mb_xy - 1 - 
h->mb_stride] == sl->
slice_num;
 
  535         deblock_topleft = (sl->
mb_x > 0);
 
  539     src_y  -= linesize   + 1 + pixel_shift;
 
  540     src_cb -= uvlinesize + 1 + pixel_shift;
 
  541     src_cr -= uvlinesize + 1 + pixel_shift;
 
  546 #define XCHG(a, b, xchg)                        \ 
  549             AV_SWAP64(b + 0, a + 0);            \ 
  550             AV_SWAP64(b + 8, a + 8);            \ 
  560         if (deblock_topleft) {
 
  561             XCHG(top_border_m1 + (8 << pixel_shift),
 
  562                  src_y - (7 << pixel_shift), 1);
 
  564         XCHG(top_border + (0 << pixel_shift), src_y + (1 << pixel_shift), xchg);
 
  565         XCHG(top_border + (8 << pixel_shift), src_y + (9 << pixel_shift), 1);
 
  566         if (sl->
mb_x + 1 < 
h->mb_width) {
 
  568                  src_y + (17 << pixel_shift), 1);
 
  572                 if (deblock_topleft) {
 
  573                     XCHG(top_border_m1 + (24 << pixel_shift), src_cb - (7 << pixel_shift), 1);
 
  574                     XCHG(top_border_m1 + (40 << pixel_shift), src_cr - (7 << pixel_shift), 1);
 
  576                 XCHG(top_border + (16 << pixel_shift), src_cb + (1 << pixel_shift), xchg);
 
  577                 XCHG(top_border + (24 << pixel_shift), src_cb + (9 << pixel_shift), 1);
 
  578                 XCHG(top_border + (32 << pixel_shift), src_cr + (1 << pixel_shift), xchg);
 
  579                 XCHG(top_border + (40 << pixel_shift), src_cr + (9 << pixel_shift), 1);
 
  580                 if (sl->
mb_x + 1 < 
h->mb_width) {
 
  581                     XCHG(sl->
top_borders[top_idx][sl->
mb_x + 1] + (16 << pixel_shift), src_cb + (17 << pixel_shift), 1);
 
  582                     XCHG(sl->
top_borders[top_idx][sl->
mb_x + 1] + (32 << pixel_shift), src_cr + (17 << pixel_shift), 1);
 
  585                 if (deblock_topleft) {
 
  586                     XCHG(top_border_m1 + (16 << pixel_shift), src_cb - (7 << pixel_shift), 1);
 
  587                     XCHG(top_border_m1 + (24 << pixel_shift), src_cr - (7 << pixel_shift), 1);
 
  589                 XCHG(top_border + (16 << pixel_shift), src_cb + 1 + pixel_shift, 1);
 
  590                 XCHG(top_border + (24 << pixel_shift), src_cr + 1 + pixel_shift, 1);
 
  599     if (high_bit_depth) {
 
  608     if (high_bit_depth) {
 
  616                                                        int mb_type, 
int simple,
 
  617                                                        int transform_bypass,
 
  619                                                        const int *block_offset,
 
  621                                                        uint8_t *dest_y, 
int p)
 
  624     void (*idct_dc_add)(uint8_t *dst, int16_t *
block, 
int stride);
 
  627     block_offset += 16 * p;
 
  630             if (transform_bypass) {
 
  632                 idct_add    = 
h->h264dsp.h264_add_pixels8_clear;
 
  634                 idct_dc_add = 
h->h264dsp.h264_idct8_dc_add;
 
  637             for (
i = 0; 
i < 16; 
i += 4) {
 
  638                 uint8_t *
const ptr = dest_y + block_offset[
i];
 
  640                 if (transform_bypass && 
h->ps.sps->profile_idc == 244 && dir <= 1) {
 
  641                     if (
h->x264_build < 151
U) {
 
  642                         h->hpc.pred8x8l_add[dir](ptr, sl->
mb + (
i * 16 + p * 256 << pixel_shift), linesize);
 
  644                         h->hpc.pred8x8l_filter_add[dir](ptr, sl->
mb + (
i * 16 + p * 256 << pixel_shift),
 
  645                                                         (sl-> topleft_samples_available << 
i) & 0x8000,
 
  652                         if (nnz == 1 && 
dctcoef_get(sl->
mb, pixel_shift, 
i * 16 + p * 256))
 
  653                             idct_dc_add(ptr, sl->
mb + (
i * 16 + p * 256 << pixel_shift), linesize);
 
  655                             idct_add(ptr, sl->
mb + (
i * 16 + p * 256 << pixel_shift), linesize);
 
  660             if (transform_bypass) {
 
  662                 idct_add     = 
h->h264dsp.h264_add_pixels4_clear;
 
  664                 idct_dc_add = 
h->h264dsp.h264_idct_dc_add;
 
  667             for (
i = 0; 
i < 16; 
i++) {
 
  668                 uint8_t *
const ptr = dest_y + block_offset[
i];
 
  671                 if (transform_bypass && 
h->ps.sps->profile_idc == 244 && dir <= 1) {
 
  672                     h->hpc.pred4x4_add[dir](ptr, sl->
mb + (
i * 16 + p * 256 << pixel_shift), linesize);
 
  680                         if (!topright_avail) {
 
  682                                 tr_high  = ((uint16_t *)ptr)[3 - linesize / 2] * 0x0001000100010001ULL;
 
  683                                 topright = (uint8_t *)&tr_high;
 
  685                                 tr       = ptr[3 - linesize] * 0x01010101
u;
 
  686                                 topright = (uint8_t *)&tr;
 
  689                             topright = ptr + (4 << pixel_shift) - linesize;
 
  693                     h->hpc.pred4x4[dir](ptr, topright, linesize);
 
  696                         if (nnz == 1 && 
dctcoef_get(sl->
mb, pixel_shift, 
i * 16 + p * 256))
 
  697                             idct_dc_add(ptr, sl->
mb + (
i * 16 + p * 256 << pixel_shift), linesize);
 
  699                             idct_add(ptr, sl->
mb + (
i * 16 + p * 256 << pixel_shift), linesize);
 
  707             if (!transform_bypass)
 
  708                 h->h264dsp.h264_luma_dc_dequant_idct(sl->
mb + (p * 256 << pixel_shift),
 
  710                                                      h->ps.pps->dequant4_coeff[p][qscale][0]);
 
  712                 static const uint8_t dc_mapping[16] = {
 
  713                      0 * 16,  1 * 16,  4 * 16,  5 * 16,
 
  714                      2 * 16,  3 * 16,  6 * 16,  7 * 16,
 
  715                      8 * 16,  9 * 16, 12 * 16, 13 * 16,
 
  716                     10 * 16, 11 * 16, 14 * 16, 15 * 16
 
  718                 for (
i = 0; 
i < 16; 
i++)
 
  720                                 pixel_shift, dc_mapping[
i],
 
  729                                                     int mb_type, 
int simple,
 
  730                                                     int transform_bypass,
 
  732                                                     const int *block_offset,
 
  734                                                     uint8_t *dest_y, 
int p)
 
  738     block_offset += 16 * p;
 
  741             if (transform_bypass) {
 
  742                 if (
h->ps.sps->profile_idc == 244 &&
 
  746                                                                    sl->
mb + (p * 256 << pixel_shift),
 
  749                     for (
i = 0; 
i < 16; 
i++)
 
  752                             h->h264dsp.h264_add_pixels4_clear(dest_y + block_offset[
i],
 
  753                                                               sl->
mb + (
i * 16 + p * 256 << pixel_shift),
 
  757                 h->h264dsp.h264_idct_add16intra(dest_y, block_offset,
 
  758                                                 sl->
mb + (p * 256 << pixel_shift),
 
  762         } 
else if (sl->
cbp & 15) {
 
  763             if (transform_bypass) {
 
  764                 const int di = 
IS_8x8DCT(mb_type) ? 4 : 1;
 
  766                     : 
h->h264dsp.h264_add_pixels4_clear;
 
  767                 for (
i = 0; 
i < 16; 
i += di)
 
  770                                  sl->
mb + (
i * 16 + p * 256 << pixel_shift),
 
  774                     h->h264dsp.h264_idct8_add4(dest_y, block_offset,
 
  775                                                sl->
mb + (p * 256 << pixel_shift),
 
  779                     h->h264dsp.h264_idct_add16(dest_y, block_offset,
 
  780                                                sl->
mb + (p * 256 << pixel_shift),
 
  802     const int mb_xy   = sl->
mb_xy;
 
  803     const int mb_type = 
h->cur_pic.mb_type[mb_xy];
 
  804     int is_complex    = CONFIG_SMALL || sl->
is_complex ||
 
  808         if (is_complex || 
h->pixel_shift)
 
  809             hl_decode_mb_444_complex(
h, sl);
 
  811             hl_decode_mb_444_simple_8(
h, sl);
 
  812     } 
else if (is_complex) {
 
  813         hl_decode_mb_complex(
h, sl);
 
  814     } 
else if (
h->pixel_shift) {
 
  815         hl_decode_mb_simple_16(
h, sl);
 
  817         hl_decode_mb_simple_8(
h, sl);