FFmpeg: libavcodec/h264.c Source File

00001 /*
00002  * H.26L/H.264/AVC/JVT/14496-10/... decoder
00003  * Copyright (c) 2003 Michael Niedermayer <michaelni@gmx.at>
00004  *
00005  * This file is part of FFmpeg.
00006  *
00007  * FFmpeg is free software; you can redistribute it and/or
00008  * modify it under the terms of the GNU Lesser General Public
00009  * License as published by the Free Software Foundation; either
00010  * version 2.1 of the License, or (at your option) any later version.
00011  *
00012  * FFmpeg is distributed in the hope that it will be useful,
00013  * but WITHOUT ANY WARRANTY; without even the implied warranty of
00014  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00015  * Lesser General Public License for more details.
00016  *
00017  * You should have received a copy of the GNU Lesser General Public
00018  * License along with FFmpeg; if not, write to the Free Software
00019  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
00020  */
00021 
00028 #include "libavutil/imgutils.h"
00029 #include "internal.h"
00030 #include "dsputil.h"
00031 #include "avcodec.h"
00032 #include "mpegvideo.h"
00033 #include "h264.h"
00034 #include "h264data.h"
00035 #include "h264_mvpred.h"
00036 #include "golomb.h"
00037 #include "mathops.h"
00038 #include "rectangle.h"
00039 #include "thread.h"
00040 #include "vdpau_internal.h"
00041 #include "libavutil/avassert.h"
00042 
00043 #include "cabac.h"
00044 
00045 //#undef NDEBUG
00046 #include <assert.h>
00047 
00048 static const uint8_t rem6[QP_MAX_NUM+1]={
00049 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3,
00050 };
00051 
00052 static const uint8_t div6[QP_MAX_NUM+1]={
00053 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 6, 6, 6, 6, 6, 6, 7, 7, 7, 7, 7, 7, 8, 8, 8, 8, 8, 8, 9, 9, 9, 9, 9, 9,10,10,10,10,
00054 };
00055 
00056 static const enum PixelFormat hwaccel_pixfmt_list_h264_jpeg_420[] = {
00057     PIX_FMT_DXVA2_VLD,
00058     PIX_FMT_VAAPI_VLD,
00059     PIX_FMT_YUVJ420P,
00060     PIX_FMT_NONE
00061 };
00062 
00063 void ff_h264_write_back_intra_pred_mode(H264Context *h){
00064     int8_t *mode= h->intra4x4_pred_mode + h->mb2br_xy[h->mb_xy];
00065 
00066     AV_COPY32(mode, h->intra4x4_pred_mode_cache + 4 + 8*4);
00067     mode[4]= h->intra4x4_pred_mode_cache[7+8*3];
00068     mode[5]= h->intra4x4_pred_mode_cache[7+8*2];
00069     mode[6]= h->intra4x4_pred_mode_cache[7+8*1];
00070 }
00071 
00075 int ff_h264_check_intra4x4_pred_mode(H264Context *h){
00076     MpegEncContext * const s = &h->s;
00077     static const int8_t top [12]= {-1, 0,LEFT_DC_PRED,-1,-1,-1,-1,-1, 0};
00078     static const int8_t left[12]= { 0,-1, TOP_DC_PRED, 0,-1,-1,-1, 0,-1,DC_128_PRED};
00079     int i;
00080 
00081     if(!(h->top_samples_available&0x8000)){
00082         for(i=0; i<4; i++){
00083             int status= top[ h->intra4x4_pred_mode_cache[scan8[0] + i] ];
00084             if(status<0){
00085                 av_log(h->s.avctx, AV_LOG_ERROR, "top block unavailable for requested intra4x4 mode %d at %d %d\n", status, s->mb_x, s->mb_y);
00086                 return -1;
00087             } else if(status){
00088                 h->intra4x4_pred_mode_cache[scan8[0] + i]= status;
00089             }
00090         }
00091     }
00092 
00093     if((h->left_samples_available&0x8888)!=0x8888){
00094         static const int mask[4]={0x8000,0x2000,0x80,0x20};
00095         for(i=0; i<4; i++){
00096             if(!(h->left_samples_available&mask[i])){
00097                 int status= left[ h->intra4x4_pred_mode_cache[scan8[0] + 8*i] ];
00098                 if(status<0){
00099                     av_log(h->s.avctx, AV_LOG_ERROR, "left block unavailable for requested intra4x4 mode %d at %d %d\n", status, s->mb_x, s->mb_y);
00100                     return -1;
00101                 } else if(status){
00102                     h->intra4x4_pred_mode_cache[scan8[0] + 8*i]= status;
00103                 }
00104             }
00105         }
00106     }
00107 
00108     return 0;
00109 } //FIXME cleanup like check_intra_pred_mode
00110 
00114 int ff_h264_check_intra_pred_mode(H264Context *h, int mode, int is_chroma){
00115     MpegEncContext * const s = &h->s;
00116     static const int8_t top [7]= {LEFT_DC_PRED8x8, 1,-1,-1};
00117     static const int8_t left[7]= { TOP_DC_PRED8x8,-1, 2,-1,DC_128_PRED8x8};
00118 
00119     if(mode > 6U) {
00120         av_log(h->s.avctx, AV_LOG_ERROR, "out of range intra chroma pred mode at %d %d\n", s->mb_x, s->mb_y);
00121         return -1;
00122     }
00123 
00124     if(!(h->top_samples_available&0x8000)){
00125         mode= top[ mode ];
00126         if(mode<0){
00127             av_log(h->s.avctx, AV_LOG_ERROR, "top block unavailable for requested intra mode at %d %d\n", s->mb_x, s->mb_y);
00128             return -1;
00129         }
00130     }
00131 
00132     if((h->left_samples_available&0x8080) != 0x8080){
00133         mode= left[ mode ];
00134         if(is_chroma && (h->left_samples_available&0x8080)){ //mad cow disease mode, aka MBAFF + constrained_intra_pred
00135             mode= ALZHEIMER_DC_L0T_PRED8x8 + (!(h->left_samples_available&0x8000)) + 2*(mode == DC_128_PRED8x8);
00136         }
00137         if(mode<0){
00138             av_log(h->s.avctx, AV_LOG_ERROR, "left block unavailable for requested intra mode at %d %d\n", s->mb_x, s->mb_y);
00139             return -1;
00140         }
00141     }
00142 
00143     return mode;
00144 }
00145 
00146 const uint8_t *ff_h264_decode_nal(H264Context *h, const uint8_t *src, int *dst_length, int *consumed, int length){
00147     int i, si, di;
00148     uint8_t *dst;
00149     int bufidx;
00150 
00151 //    src[0]&0x80;                //forbidden bit
00152     h->nal_ref_idc= src[0]>>5;
00153     h->nal_unit_type= src[0]&0x1F;
00154 
00155     src++; length--;
00156 
00157 #if HAVE_FAST_UNALIGNED
00158 # if HAVE_FAST_64BIT
00159 #   define RS 7
00160     for(i=0; i+1<length; i+=9){
00161         if(!((~AV_RN64A(src+i) & (AV_RN64A(src+i) - 0x0100010001000101ULL)) & 0x8000800080008080ULL))
00162 # else
00163 #   define RS 3
00164     for(i=0; i+1<length; i+=5){
00165         if(!((~AV_RN32A(src+i) & (AV_RN32A(src+i) - 0x01000101U)) & 0x80008080U))
00166 # endif
00167             continue;
00168         if(i>0 && !src[i]) i--;
00169         while(src[i]) i++;
00170 #else
00171 #   define RS 0
00172     for(i=0; i+1<length; i+=2){
00173         if(src[i]) continue;
00174         if(i>0 && src[i-1]==0) i--;
00175 #endif
00176         if(i+2<length && src[i+1]==0 && src[i+2]<=3){
00177             if(src[i+2]!=3){
00178                 /* startcode, so we must be past the end */
00179                 length=i;
00180             }
00181             break;
00182         }
00183         i-= RS;
00184     }
00185 
00186     bufidx = h->nal_unit_type == NAL_DPC ? 1 : 0; // use second escape buffer for inter data
00187     si=h->rbsp_buffer_size[bufidx];
00188     av_fast_malloc(&h->rbsp_buffer[bufidx], &h->rbsp_buffer_size[bufidx], length+FF_INPUT_BUFFER_PADDING_SIZE+MAX_MBPAIR_SIZE);
00189     dst= h->rbsp_buffer[bufidx];
00190     if(si != h->rbsp_buffer_size[bufidx])
00191         memset(dst + length, 0, FF_INPUT_BUFFER_PADDING_SIZE+MAX_MBPAIR_SIZE);
00192 
00193     if (dst == NULL){
00194         return NULL;
00195     }
00196 
00197     if(i>=length-1){ //no escaped 0
00198         *dst_length= length;
00199         *consumed= length+1; //+1 for the header
00200         if(h->s.avctx->flags2 & CODEC_FLAG2_FAST){
00201             return src;
00202         }else{
00203             memcpy(dst, src, length);
00204             return dst;
00205         }
00206     }
00207 
00208 //printf("decoding esc\n");
00209     memcpy(dst, src, i);
00210     si=di=i;
00211     while(si+2<length){
00212         //remove escapes (very rare 1:2^22)
00213         if(src[si+2]>3){
00214             dst[di++]= src[si++];
00215             dst[di++]= src[si++];
00216         }else if(src[si]==0 && src[si+1]==0){
00217             if(src[si+2]==3){ //escape
00218                 dst[di++]= 0;
00219                 dst[di++]= 0;
00220                 si+=3;
00221                 continue;
00222             }else //next start code
00223                 goto nsc;
00224         }
00225 
00226         dst[di++]= src[si++];
00227     }
00228     while(si<length)
00229         dst[di++]= src[si++];
00230 nsc:
00231 
00232     memset(dst+di, 0, FF_INPUT_BUFFER_PADDING_SIZE);
00233 
00234     *dst_length= di;
00235     *consumed= si + 1;//+1 for the header
00236 //FIXME store exact number of bits in the getbitcontext (it is needed for decoding)
00237     return dst;
00238 }
00239 
00244 static int ff_h264_decode_rbsp_trailing(H264Context *h, const uint8_t *src){
00245     int v= *src;
00246     int r;
00247 
00248     tprintf(h->s.avctx, "rbsp trailing %X\n", v);
00249 
00250     for(r=1; r<9; r++){
00251         if(v&1) return r;
00252         v>>=1;
00253     }
00254     return 0;
00255 }
00256 
00257 static inline int get_lowest_part_list_y(H264Context *h, Picture *pic, int n, int height,
00258                                  int y_offset, int list){
00259     int raw_my= h->mv_cache[list][ scan8[n] ][1];
00260     int filter_height= (raw_my&3) ? 2 : 0;
00261     int full_my= (raw_my>>2) + y_offset;
00262     int top = full_my - filter_height, bottom = full_my + height + filter_height;
00263 
00264     return FFMAX(abs(top), bottom);
00265 }
00266 
00267 static inline void get_lowest_part_y(H264Context *h, int refs[2][48], int n, int height,
00268                                int y_offset, int list0, int list1, int *nrefs){
00269     MpegEncContext * const s = &h->s;
00270     int my;
00271 
00272     y_offset += 16*(s->mb_y >> MB_FIELD);
00273 
00274     if(list0){
00275         int ref_n = h->ref_cache[0][ scan8[n] ];
00276         Picture *ref= &h->ref_list[0][ref_n];
00277 
00278         // Error resilience puts the current picture in the ref list.
00279         // Don't try to wait on these as it will cause a deadlock.
00280         // Fields can wait on each other, though.
00281         if(ref->thread_opaque != s->current_picture.thread_opaque ||
00282            (ref->reference&3) != s->picture_structure) {
00283             my = get_lowest_part_list_y(h, ref, n, height, y_offset, 0);
00284             if (refs[0][ref_n] < 0) nrefs[0] += 1;
00285             refs[0][ref_n] = FFMAX(refs[0][ref_n], my);
00286         }
00287     }
00288 
00289     if(list1){
00290         int ref_n = h->ref_cache[1][ scan8[n] ];
00291         Picture *ref= &h->ref_list[1][ref_n];
00292 
00293         if(ref->thread_opaque != s->current_picture.thread_opaque ||
00294            (ref->reference&3) != s->picture_structure) {
00295             my = get_lowest_part_list_y(h, ref, n, height, y_offset, 1);
00296             if (refs[1][ref_n] < 0) nrefs[1] += 1;
00297             refs[1][ref_n] = FFMAX(refs[1][ref_n], my);
00298         }
00299     }
00300 }
00301 
00307 static void await_references(H264Context *h){
00308     MpegEncContext * const s = &h->s;
00309     const int mb_xy= h->mb_xy;
00310     const int mb_type= s->current_picture.mb_type[mb_xy];
00311     int refs[2][48];
00312     int nrefs[2] = {0};
00313     int ref, list;
00314 
00315     memset(refs, -1, sizeof(refs));
00316 
00317     if(IS_16X16(mb_type)){
00318         get_lowest_part_y(h, refs, 0, 16, 0,
00319                   IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1), nrefs);
00320     }else if(IS_16X8(mb_type)){
00321         get_lowest_part_y(h, refs, 0, 8, 0,
00322                   IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1), nrefs);
00323         get_lowest_part_y(h, refs, 8, 8, 8,
00324                   IS_DIR(mb_type, 1, 0), IS_DIR(mb_type, 1, 1), nrefs);
00325     }else if(IS_8X16(mb_type)){
00326         get_lowest_part_y(h, refs, 0, 16, 0,
00327                   IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1), nrefs);
00328         get_lowest_part_y(h, refs, 4, 16, 0,
00329                   IS_DIR(mb_type, 1, 0), IS_DIR(mb_type, 1, 1), nrefs);
00330     }else{
00331         int i;
00332 
00333         assert(IS_8X8(mb_type));
00334 
00335         for(i=0; i<4; i++){
00336             const int sub_mb_type= h->sub_mb_type[i];
00337             const int n= 4*i;
00338             int y_offset= (i&2)<<2;
00339 
00340             if(IS_SUB_8X8(sub_mb_type)){
00341                 get_lowest_part_y(h, refs, n  , 8, y_offset,
00342                           IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1), nrefs);
00343             }else if(IS_SUB_8X4(sub_mb_type)){
00344                 get_lowest_part_y(h, refs, n  , 4, y_offset,
00345                           IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1), nrefs);
00346                 get_lowest_part_y(h, refs, n+2, 4, y_offset+4,
00347                           IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1), nrefs);
00348             }else if(IS_SUB_4X8(sub_mb_type)){
00349                 get_lowest_part_y(h, refs, n  , 8, y_offset,
00350                           IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1), nrefs);
00351                 get_lowest_part_y(h, refs, n+1, 8, y_offset,
00352                           IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1), nrefs);
00353             }else{
00354                 int j;
00355                 assert(IS_SUB_4X4(sub_mb_type));
00356                 for(j=0; j<4; j++){
00357                     int sub_y_offset= y_offset + 2*(j&2);
00358                     get_lowest_part_y(h, refs, n+j, 4, sub_y_offset,
00359                               IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1), nrefs);
00360                 }
00361             }
00362         }
00363     }
00364 
00365     for(list=h->list_count-1; list>=0; list--){
00366         for(ref=0; ref<48 && nrefs[list]; ref++){
00367             int row = refs[list][ref];
00368             if(row >= 0){
00369                 Picture *ref_pic = &h->ref_list[list][ref];
00370                 int ref_field = ref_pic->reference - 1;
00371                 int ref_field_picture = ref_pic->field_picture;
00372                 int pic_height = 16*s->mb_height >> ref_field_picture;
00373 
00374                 row <<= MB_MBAFF;
00375                 nrefs[list]--;
00376 
00377                 if(!FIELD_PICTURE && ref_field_picture){ // frame referencing two fields
00378                     ff_thread_await_progress((AVFrame*)ref_pic, FFMIN((row >> 1) - !(row&1), pic_height-1), 1);
00379                     ff_thread_await_progress((AVFrame*)ref_pic, FFMIN((row >> 1)           , pic_height-1), 0);
00380                 }else if(FIELD_PICTURE && !ref_field_picture){ // field referencing one field of a frame
00381                     ff_thread_await_progress((AVFrame*)ref_pic, FFMIN(row*2 + ref_field    , pic_height-1), 0);
00382                 }else if(FIELD_PICTURE){
00383                     ff_thread_await_progress((AVFrame*)ref_pic, FFMIN(row, pic_height-1), ref_field);
00384                 }else{
00385                     ff_thread_await_progress((AVFrame*)ref_pic, FFMIN(row, pic_height-1), 0);
00386                 }
00387             }
00388         }
00389     }
00390 }
00391 
00392 #if 0
00393 
00397 static void h264_luma_dc_dct_c(DCTELEM *block/*, int qp*/){
00398 //    const int qmul= dequant_coeff[qp][0];
00399     int i;
00400     int temp[16]; //FIXME check if this is a good idea
00401     static const int x_offset[4]={0, 1*stride, 4* stride,  5*stride};
00402     static const int y_offset[4]={0, 2*stride, 8* stride, 10*stride};
00403 
00404     for(i=0; i<4; i++){
00405         const int offset= y_offset[i];
00406         const int z0= block[offset+stride*0] + block[offset+stride*4];
00407         const int z1= block[offset+stride*0] - block[offset+stride*4];
00408         const int z2= block[offset+stride*1] - block[offset+stride*5];
00409         const int z3= block[offset+stride*1] + block[offset+stride*5];
00410 
00411         temp[4*i+0]= z0+z3;
00412         temp[4*i+1]= z1+z2;
00413         temp[4*i+2]= z1-z2;
00414         temp[4*i+3]= z0-z3;
00415     }
00416 
00417     for(i=0; i<4; i++){
00418         const int offset= x_offset[i];
00419         const int z0= temp[4*0+i] + temp[4*2+i];
00420         const int z1= temp[4*0+i] - temp[4*2+i];
00421         const int z2= temp[4*1+i] - temp[4*3+i];
00422         const int z3= temp[4*1+i] + temp[4*3+i];
00423 
00424         block[stride*0 +offset]= (z0 + z3)>>1;
00425         block[stride*2 +offset]= (z1 + z2)>>1;
00426         block[stride*8 +offset]= (z1 - z2)>>1;
00427         block[stride*10+offset]= (z0 - z3)>>1;
00428     }
00429 }
00430 #endif
00431 
00432 #undef xStride
00433 #undef stride
00434 
00435 #if 0
00436 static void chroma_dc_dct_c(DCTELEM *block){
00437     const int stride= 16*2;
00438     const int xStride= 16;
00439     int a,b,c,d,e;
00440 
00441     a= block[stride*0 + xStride*0];
00442     b= block[stride*0 + xStride*1];
00443     c= block[stride*1 + xStride*0];
00444     d= block[stride*1 + xStride*1];
00445 
00446     e= a-b;
00447     a= a+b;
00448     b= c-d;
00449     c= c+d;
00450 
00451     block[stride*0 + xStride*0]= (a+c);
00452     block[stride*0 + xStride*1]= (e+b);
00453     block[stride*1 + xStride*0]= (a-c);
00454     block[stride*1 + xStride*1]= (e-b);
00455 }
00456 #endif
00457 
00458 static inline void mc_dir_part(H264Context *h, Picture *pic, int n, int square, int chroma_height, int delta, int list,
00459                            uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
00460                            int src_x_offset, int src_y_offset,
00461                            qpel_mc_func *qpix_op, h264_chroma_mc_func chroma_op,
00462                            int pixel_shift, int chroma444){
00463     MpegEncContext * const s = &h->s;
00464     const int mx= h->mv_cache[list][ scan8[n] ][0] + src_x_offset*8;
00465     int my=       h->mv_cache[list][ scan8[n] ][1] + src_y_offset*8;
00466     const int luma_xy= (mx&3) + ((my&3)<<2);
00467     int offset = ((mx>>2) << pixel_shift) + (my>>2)*h->mb_linesize;
00468     uint8_t * src_y = pic->data[0] + offset;
00469     uint8_t * src_cb, * src_cr;
00470     int extra_width= h->emu_edge_width;
00471     int extra_height= h->emu_edge_height;
00472     int emu=0;
00473     const int full_mx= mx>>2;
00474     const int full_my= my>>2;
00475     const int pic_width  = 16*s->mb_width;
00476     const int pic_height = 16*s->mb_height >> MB_FIELD;
00477 
00478     if(mx&7) extra_width -= 3;
00479     if(my&7) extra_height -= 3;
00480 
00481     if(   full_mx < 0-extra_width
00482        || full_my < 0-extra_height
00483        || full_mx + 16/*FIXME*/ > pic_width + extra_width
00484        || full_my + 16/*FIXME*/ > pic_height + extra_height){
00485         s->dsp.emulated_edge_mc(s->edge_emu_buffer, src_y - (2 << pixel_shift) - 2*h->mb_linesize, h->mb_linesize, 16+5, 16+5/*FIXME*/, full_mx-2, full_my-2, pic_width, pic_height);
00486             src_y= s->edge_emu_buffer + (2 << pixel_shift) + 2*h->mb_linesize;
00487         emu=1;
00488     }
00489 
00490     qpix_op[luma_xy](dest_y, src_y, h->mb_linesize); //FIXME try variable height perhaps?
00491     if(!square){
00492         qpix_op[luma_xy](dest_y + delta, src_y + delta, h->mb_linesize);
00493     }
00494 
00495     if(CONFIG_GRAY && s->flags&CODEC_FLAG_GRAY) return;
00496 
00497     if(chroma444){
00498         src_cb = pic->data[1] + offset;
00499         if(emu){
00500             s->dsp.emulated_edge_mc(s->edge_emu_buffer, src_cb - (2 << pixel_shift) - 2*h->mb_linesize, h->mb_linesize,
00501                                     16+5, 16+5/*FIXME*/, full_mx-2, full_my-2, pic_width, pic_height);
00502             src_cb= s->edge_emu_buffer + (2 << pixel_shift) + 2*h->mb_linesize;
00503         }
00504         qpix_op[luma_xy](dest_cb, src_cb, h->mb_linesize); //FIXME try variable height perhaps?
00505         if(!square){
00506             qpix_op[luma_xy](dest_cb + delta, src_cb + delta, h->mb_linesize);
00507         }
00508 
00509         src_cr = pic->data[2] + offset;
00510         if(emu){
00511             s->dsp.emulated_edge_mc(s->edge_emu_buffer, src_cr - (2 << pixel_shift) - 2*h->mb_linesize, h->mb_linesize,
00512                                     16+5, 16+5/*FIXME*/, full_mx-2, full_my-2, pic_width, pic_height);
00513             src_cr= s->edge_emu_buffer + (2 << pixel_shift) + 2*h->mb_linesize;
00514         }
00515         qpix_op[luma_xy](dest_cr, src_cr, h->mb_linesize); //FIXME try variable height perhaps?
00516         if(!square){
00517             qpix_op[luma_xy](dest_cr + delta, src_cr + delta, h->mb_linesize);
00518         }
00519         return;
00520     }
00521 
00522     if(MB_FIELD){
00523         // chroma offset when predicting from a field of opposite parity
00524         my += 2 * ((s->mb_y & 1) - (pic->reference - 1));
00525         emu |= (my>>3) < 0 || (my>>3) + 8 >= (pic_height>>1);
00526     }
00527     src_cb= pic->data[1] + ((mx>>3) << pixel_shift) + (my>>3)*h->mb_uvlinesize;
00528     src_cr= pic->data[2] + ((mx>>3) << pixel_shift) + (my>>3)*h->mb_uvlinesize;
00529 
00530     if(emu){
00531         s->dsp.emulated_edge_mc(s->edge_emu_buffer, src_cb, h->mb_uvlinesize, 9, 9/*FIXME*/, (mx>>3), (my>>3), pic_width>>1, pic_height>>1);
00532             src_cb= s->edge_emu_buffer;
00533     }
00534     chroma_op(dest_cb, src_cb, h->mb_uvlinesize, chroma_height, mx&7, my&7);
00535 
00536     if(emu){
00537         s->dsp.emulated_edge_mc(s->edge_emu_buffer, src_cr, h->mb_uvlinesize, 9, 9/*FIXME*/, (mx>>3), (my>>3), pic_width>>1, pic_height>>1);
00538             src_cr= s->edge_emu_buffer;
00539     }
00540     chroma_op(dest_cr, src_cr, h->mb_uvlinesize, chroma_height, mx&7, my&7);
00541 }
00542 
00543 static inline void mc_part_std(H264Context *h, int n, int square, int chroma_height, int delta,
00544                            uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
00545                            int x_offset, int y_offset,
00546                            qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put,
00547                            qpel_mc_func *qpix_avg, h264_chroma_mc_func chroma_avg,
00548                            int list0, int list1, int pixel_shift, int chroma444){
00549     MpegEncContext * const s = &h->s;
00550     qpel_mc_func *qpix_op=  qpix_put;
00551     h264_chroma_mc_func chroma_op= chroma_put;
00552 
00553     dest_y  += (2*x_offset << pixel_shift) + 2*y_offset*h->mb_linesize;
00554     if(chroma444){
00555         dest_cb += (2*x_offset << pixel_shift) + 2*y_offset*h->mb_linesize;
00556         dest_cr += (2*x_offset << pixel_shift) + 2*y_offset*h->mb_linesize;
00557     }else{
00558         dest_cb += (  x_offset << pixel_shift) +   y_offset*h->mb_uvlinesize;
00559         dest_cr += (  x_offset << pixel_shift) +   y_offset*h->mb_uvlinesize;
00560     }
00561     x_offset += 8*s->mb_x;
00562     y_offset += 8*(s->mb_y >> MB_FIELD);
00563 
00564     if(list0){
00565         Picture *ref= &h->ref_list[0][ h->ref_cache[0][ scan8[n] ] ];
00566         mc_dir_part(h, ref, n, square, chroma_height, delta, 0,
00567                            dest_y, dest_cb, dest_cr, x_offset, y_offset,
00568                            qpix_op, chroma_op, pixel_shift, chroma444);
00569 
00570         qpix_op=  qpix_avg;
00571         chroma_op= chroma_avg;
00572     }
00573 
00574     if(list1){
00575         Picture *ref= &h->ref_list[1][ h->ref_cache[1][ scan8[n] ] ];
00576         mc_dir_part(h, ref, n, square, chroma_height, delta, 1,
00577                            dest_y, dest_cb, dest_cr, x_offset, y_offset,
00578                            qpix_op, chroma_op, pixel_shift, chroma444);
00579     }
00580 }
00581 
00582 static inline void mc_part_weighted(H264Context *h, int n, int square, int chroma_height, int delta,
00583                            uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
00584                            int x_offset, int y_offset,
00585                            qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put,
00586                            h264_weight_func luma_weight_op, h264_weight_func chroma_weight_op,
00587                            h264_biweight_func luma_weight_avg, h264_biweight_func chroma_weight_avg,
00588                            int list0, int list1, int pixel_shift, int chroma444){
00589     MpegEncContext * const s = &h->s;
00590 
00591     dest_y += (2*x_offset << pixel_shift) + 2*y_offset*h->mb_linesize;
00592     if(chroma444){
00593         chroma_weight_avg = luma_weight_avg;
00594         chroma_weight_op = luma_weight_op;
00595         dest_cb += (2*x_offset << pixel_shift) + 2*y_offset*h->mb_linesize;
00596         dest_cr += (2*x_offset << pixel_shift) + 2*y_offset*h->mb_linesize;
00597     }else{
00598         dest_cb += (  x_offset << pixel_shift) +   y_offset*h->mb_uvlinesize;
00599         dest_cr += (  x_offset << pixel_shift) +   y_offset*h->mb_uvlinesize;
00600     }
00601     x_offset += 8*s->mb_x;
00602     y_offset += 8*(s->mb_y >> MB_FIELD);
00603 
00604     if(list0 && list1){
00605         /* don't optimize for luma-only case, since B-frames usually
00606          * use implicit weights => chroma too. */
00607         uint8_t *tmp_cb = s->obmc_scratchpad;
00608         uint8_t *tmp_cr = s->obmc_scratchpad + (16 << pixel_shift);
00609         uint8_t *tmp_y  = s->obmc_scratchpad + 16*h->mb_uvlinesize;
00610         int refn0 = h->ref_cache[0][ scan8[n] ];
00611         int refn1 = h->ref_cache[1][ scan8[n] ];
00612 
00613         mc_dir_part(h, &h->ref_list[0][refn0], n, square, chroma_height, delta, 0,
00614                     dest_y, dest_cb, dest_cr,
00615                     x_offset, y_offset, qpix_put, chroma_put, pixel_shift, chroma444);
00616         mc_dir_part(h, &h->ref_list[1][refn1], n, square, chroma_height, delta, 1,
00617                     tmp_y, tmp_cb, tmp_cr,
00618                     x_offset, y_offset, qpix_put, chroma_put, pixel_shift, chroma444);
00619 
00620         if(h->use_weight == 2){
00621             int weight0 = h->implicit_weight[refn0][refn1][s->mb_y&1];
00622             int weight1 = 64 - weight0;
00623             luma_weight_avg(  dest_y,  tmp_y,  h->  mb_linesize, 5, weight0, weight1, 0);
00624             chroma_weight_avg(dest_cb, tmp_cb, h->mb_uvlinesize, 5, weight0, weight1, 0);
00625             chroma_weight_avg(dest_cr, tmp_cr, h->mb_uvlinesize, 5, weight0, weight1, 0);
00626         }else{
00627             luma_weight_avg(dest_y, tmp_y, h->mb_linesize, h->luma_log2_weight_denom,
00628                             h->luma_weight[refn0][0][0] , h->luma_weight[refn1][1][0],
00629                             h->luma_weight[refn0][0][1] + h->luma_weight[refn1][1][1]);
00630             chroma_weight_avg(dest_cb, tmp_cb, h->mb_uvlinesize, h->chroma_log2_weight_denom,
00631                             h->chroma_weight[refn0][0][0][0] , h->chroma_weight[refn1][1][0][0],
00632                             h->chroma_weight[refn0][0][0][1] + h->chroma_weight[refn1][1][0][1]);
00633             chroma_weight_avg(dest_cr, tmp_cr, h->mb_uvlinesize, h->chroma_log2_weight_denom,
00634                             h->chroma_weight[refn0][0][1][0] , h->chroma_weight[refn1][1][1][0],
00635                             h->chroma_weight[refn0][0][1][1] + h->chroma_weight[refn1][1][1][1]);
00636         }
00637     }else{
00638         int list = list1 ? 1 : 0;
00639         int refn = h->ref_cache[list][ scan8[n] ];
00640         Picture *ref= &h->ref_list[list][refn];
00641         mc_dir_part(h, ref, n, square, chroma_height, delta, list,
00642                     dest_y, dest_cb, dest_cr, x_offset, y_offset,
00643                     qpix_put, chroma_put, pixel_shift, chroma444);
00644 
00645         luma_weight_op(dest_y, h->mb_linesize, h->luma_log2_weight_denom,
00646                        h->luma_weight[refn][list][0], h->luma_weight[refn][list][1]);
00647         if(h->use_weight_chroma){
00648             chroma_weight_op(dest_cb, h->mb_uvlinesize, h->chroma_log2_weight_denom,
00649                              h->chroma_weight[refn][list][0][0], h->chroma_weight[refn][list][0][1]);
00650             chroma_weight_op(dest_cr, h->mb_uvlinesize, h->chroma_log2_weight_denom,
00651                              h->chroma_weight[refn][list][1][0], h->chroma_weight[refn][list][1][1]);
00652         }
00653     }
00654 }
00655 
00656 static inline void mc_part(H264Context *h, int n, int square, int chroma_height, int delta,
00657                            uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
00658                            int x_offset, int y_offset,
00659                            qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put,
00660                            qpel_mc_func *qpix_avg, h264_chroma_mc_func chroma_avg,
00661                            h264_weight_func *weight_op, h264_biweight_func *weight_avg,
00662                            int list0, int list1, int pixel_shift, int chroma444){
00663     if((h->use_weight==2 && list0 && list1
00664         && (h->implicit_weight[ h->ref_cache[0][scan8[n]] ][ h->ref_cache[1][scan8[n]] ][h->s.mb_y&1] != 32))
00665        || h->use_weight==1)
00666         mc_part_weighted(h, n, square, chroma_height, delta, dest_y, dest_cb, dest_cr,
00667                          x_offset, y_offset, qpix_put, chroma_put,
00668                          weight_op[0], weight_op[3], weight_avg[0],
00669                          weight_avg[3], list0, list1, pixel_shift, chroma444);
00670     else
00671         mc_part_std(h, n, square, chroma_height, delta, dest_y, dest_cb, dest_cr,
00672                     x_offset, y_offset, qpix_put, chroma_put, qpix_avg,
00673                     chroma_avg, list0, list1, pixel_shift, chroma444);
00674 }
00675 
00676 static inline void prefetch_motion(H264Context *h, int list, int pixel_shift, int chroma444){
00677     /* fetch pixels for estimated mv 4 macroblocks ahead
00678      * optimized for 64byte cache lines */
00679     MpegEncContext * const s = &h->s;
00680     const int refn = h->ref_cache[list][scan8[0]];
00681     if(refn >= 0){
00682         const int mx= (h->mv_cache[list][scan8[0]][0]>>2) + 16*s->mb_x + 8;
00683         const int my= (h->mv_cache[list][scan8[0]][1]>>2) + 16*s->mb_y;
00684         uint8_t **src= h->ref_list[list][refn].data;
00685         int off= ((mx+64)<<h->pixel_shift) + (my + (s->mb_x&3)*4)*h->mb_linesize;
00686         s->dsp.prefetch(src[0]+off, s->linesize, 4);
00687         if(chroma444){
00688             s->dsp.prefetch(src[1]+off, s->linesize, 4);
00689             s->dsp.prefetch(src[2]+off, s->linesize, 4);
00690         }else{
00691             off= (((mx>>1)+64)<<pixel_shift) + ((my>>1) + (s->mb_x&7))*s->uvlinesize;
00692             s->dsp.prefetch(src[1]+off, src[2]-src[1], 2);
00693         }
00694     }
00695 }
00696 
00697 static av_always_inline void hl_motion(H264Context *h, uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
00698                       qpel_mc_func (*qpix_put)[16], h264_chroma_mc_func (*chroma_put),
00699                       qpel_mc_func (*qpix_avg)[16], h264_chroma_mc_func (*chroma_avg),
00700                       h264_weight_func *weight_op, h264_biweight_func *weight_avg,
00701                       int pixel_shift, int chroma444){
00702     MpegEncContext * const s = &h->s;
00703     const int mb_xy= h->mb_xy;
00704     const int mb_type= s->current_picture.mb_type[mb_xy];
00705 
00706     assert(IS_INTER(mb_type));
00707 
00708     if(HAVE_PTHREADS && (s->avctx->active_thread_type & FF_THREAD_FRAME))
00709         await_references(h);
00710     prefetch_motion(h, 0, pixel_shift, chroma444);
00711 
00712     if(IS_16X16(mb_type)){
00713         mc_part(h, 0, 1, 8, 0, dest_y, dest_cb, dest_cr, 0, 0,
00714                 qpix_put[0], chroma_put[0], qpix_avg[0], chroma_avg[0],
00715                 weight_op, weight_avg,
00716                 IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1),
00717                 pixel_shift, chroma444);
00718     }else if(IS_16X8(mb_type)){
00719         mc_part(h, 0, 0, 4, 8 << pixel_shift, dest_y, dest_cb, dest_cr, 0, 0,
00720                 qpix_put[1], chroma_put[0], qpix_avg[1], chroma_avg[0],
00721                 &weight_op[1], &weight_avg[1],
00722                 IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1),
00723                 pixel_shift, chroma444);
00724         mc_part(h, 8, 0, 4, 8 << pixel_shift, dest_y, dest_cb, dest_cr, 0, 4,
00725                 qpix_put[1], chroma_put[0], qpix_avg[1], chroma_avg[0],
00726                 &weight_op[1], &weight_avg[1],
00727                 IS_DIR(mb_type, 1, 0), IS_DIR(mb_type, 1, 1),
00728                 pixel_shift, chroma444);
00729     }else if(IS_8X16(mb_type)){
00730         mc_part(h, 0, 0, 8, 8*h->mb_linesize, dest_y, dest_cb, dest_cr, 0, 0,
00731                 qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
00732                 &weight_op[2], &weight_avg[2],
00733                 IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1),
00734                 pixel_shift, chroma444);
00735         mc_part(h, 4, 0, 8, 8*h->mb_linesize, dest_y, dest_cb, dest_cr, 4, 0,
00736                 qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
00737                 &weight_op[2], &weight_avg[2],
00738                 IS_DIR(mb_type, 1, 0), IS_DIR(mb_type, 1, 1),
00739                 pixel_shift, chroma444);
00740     }else{
00741         int i;
00742 
00743         assert(IS_8X8(mb_type));
00744 
00745         for(i=0; i<4; i++){
00746             const int sub_mb_type= h->sub_mb_type[i];
00747             const int n= 4*i;
00748             int x_offset= (i&1)<<2;
00749             int y_offset= (i&2)<<1;
00750 
00751             if(IS_SUB_8X8(sub_mb_type)){
00752                 mc_part(h, n, 1, 4, 0, dest_y, dest_cb, dest_cr, x_offset, y_offset,
00753                     qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
00754                     &weight_op[3], &weight_avg[3],
00755                     IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1),
00756                     pixel_shift, chroma444);
00757             }else if(IS_SUB_8X4(sub_mb_type)){
00758                 mc_part(h, n  , 0, 2, 4 << pixel_shift, dest_y, dest_cb, dest_cr, x_offset, y_offset,
00759                     qpix_put[2], chroma_put[1], qpix_avg[2], chroma_avg[1],
00760                     &weight_op[4], &weight_avg[4],
00761                     IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1),
00762                     pixel_shift, chroma444);
00763                 mc_part(h, n+2, 0, 2, 4 << pixel_shift, dest_y, dest_cb, dest_cr, x_offset, y_offset+2,
00764                     qpix_put[2], chroma_put[1], qpix_avg[2], chroma_avg[1],
00765                     &weight_op[4], &weight_avg[4],
00766                     IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1),
00767                     pixel_shift, chroma444);
00768             }else if(IS_SUB_4X8(sub_mb_type)){
00769                 mc_part(h, n  , 0, 4, 4*h->mb_linesize, dest_y, dest_cb, dest_cr, x_offset, y_offset,
00770                     qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
00771                     &weight_op[5], &weight_avg[5],
00772                     IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1),
00773                     pixel_shift, chroma444);
00774                 mc_part(h, n+1, 0, 4, 4*h->mb_linesize, dest_y, dest_cb, dest_cr, x_offset+2, y_offset,
00775                     qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
00776                     &weight_op[5], &weight_avg[5],
00777                     IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1),
00778                     pixel_shift, chroma444);
00779             }else{
00780                 int j;
00781                 assert(IS_SUB_4X4(sub_mb_type));
00782                 for(j=0; j<4; j++){
00783                     int sub_x_offset= x_offset + 2*(j&1);
00784                     int sub_y_offset= y_offset +   (j&2);
00785                     mc_part(h, n+j, 1, 2, 0, dest_y, dest_cb, dest_cr, sub_x_offset, sub_y_offset,
00786                         qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
00787                         &weight_op[6], &weight_avg[6],
00788                         IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1),
00789                         pixel_shift, chroma444);
00790                 }
00791             }
00792         }
00793     }
00794 
00795     prefetch_motion(h, 1, pixel_shift, chroma444);
00796 }
00797 
00798 #define hl_motion_fn(sh, bits) \
00799 static av_always_inline void hl_motion_ ## bits(H264Context *h, \
00800                                        uint8_t *dest_y, \
00801                                        uint8_t *dest_cb, uint8_t *dest_cr, \
00802                                        qpel_mc_func (*qpix_put)[16], \
00803                                        h264_chroma_mc_func (*chroma_put), \
00804                                        qpel_mc_func (*qpix_avg)[16], \
00805                                        h264_chroma_mc_func (*chroma_avg), \
00806                                        h264_weight_func *weight_op, \
00807                                        h264_biweight_func *weight_avg, \
00808                                        int chroma444) \
00809 { \
00810     hl_motion(h, dest_y, dest_cb, dest_cr, qpix_put, chroma_put, \
00811               qpix_avg, chroma_avg, weight_op, weight_avg, sh, chroma444); \
00812 }
00813 hl_motion_fn(0, 8);
00814 hl_motion_fn(1, 16);
00815 
00816 static void free_tables(H264Context *h, int free_rbsp){
00817     int i;
00818     H264Context *hx;
00819 
00820     av_freep(&h->intra4x4_pred_mode);
00821     av_freep(&h->chroma_pred_mode_table);
00822     av_freep(&h->cbp_table);
00823     av_freep(&h->mvd_table[0]);
00824     av_freep(&h->mvd_table[1]);
00825     av_freep(&h->direct_table);
00826     av_freep(&h->non_zero_count);
00827     av_freep(&h->slice_table_base);
00828     h->slice_table= NULL;
00829     av_freep(&h->list_counts);
00830 
00831     av_freep(&h->mb2b_xy);
00832     av_freep(&h->mb2br_xy);
00833 
00834     for(i = 0; i < MAX_THREADS; i++) {
00835         hx = h->thread_context[i];
00836         if(!hx) continue;
00837         av_freep(&hx->top_borders[1]);
00838         av_freep(&hx->top_borders[0]);
00839         av_freep(&hx->s.obmc_scratchpad);
00840         if (free_rbsp){
00841             av_freep(&hx->rbsp_buffer[1]);
00842             av_freep(&hx->rbsp_buffer[0]);
00843             hx->rbsp_buffer_size[0] = 0;
00844             hx->rbsp_buffer_size[1] = 0;
00845         }
00846         if (i) av_freep(&h->thread_context[i]);
00847     }
00848 }
00849 
00850 static void init_dequant8_coeff_table(H264Context *h){
00851     int i,j,q,x;
00852     const int max_qp = 51 + 6*(h->sps.bit_depth_luma-8);
00853 
00854     for(i=0; i<6; i++ ){
00855         h->dequant8_coeff[i] = h->dequant8_buffer[i];
00856         for(j=0; j<i; j++){
00857             if(!memcmp(h->pps.scaling_matrix8[j], h->pps.scaling_matrix8[i], 64*sizeof(uint8_t))){
00858                 h->dequant8_coeff[i] = h->dequant8_buffer[j];
00859                 break;
00860             }
00861         }
00862         if(j<i)
00863             continue;
00864 
00865         for(q=0; q<max_qp+1; q++){
00866             int shift = div6[q];
00867             int idx = rem6[q];
00868             for(x=0; x<64; x++)
00869                 h->dequant8_coeff[i][q][(x>>3)|((x&7)<<3)] =
00870                     ((uint32_t)dequant8_coeff_init[idx][ dequant8_coeff_init_scan[((x>>1)&12) | (x&3)] ] *
00871                     h->pps.scaling_matrix8[i][x]) << shift;
00872         }
00873     }
00874 }
00875 
00876 static void init_dequant4_coeff_table(H264Context *h){
00877     int i,j,q,x;
00878     const int max_qp = 51 + 6*(h->sps.bit_depth_luma-8);
00879     for(i=0; i<6; i++ ){
00880         h->dequant4_coeff[i] = h->dequant4_buffer[i];
00881         for(j=0; j<i; j++){
00882             if(!memcmp(h->pps.scaling_matrix4[j], h->pps.scaling_matrix4[i], 16*sizeof(uint8_t))){
00883                 h->dequant4_coeff[i] = h->dequant4_buffer[j];
00884                 break;
00885             }
00886         }
00887         if(j<i)
00888             continue;
00889 
00890         for(q=0; q<max_qp+1; q++){
00891             int shift = div6[q] + 2;
00892             int idx = rem6[q];
00893             for(x=0; x<16; x++)
00894                 h->dequant4_coeff[i][q][(x>>2)|((x<<2)&0xF)] =
00895                     ((uint32_t)dequant4_coeff_init[idx][(x&1) + ((x>>2)&1)] *
00896                     h->pps.scaling_matrix4[i][x]) << shift;
00897         }
00898     }
00899 }
00900 
00901 static void init_dequant_tables(H264Context *h){
00902     int i,x;
00903     init_dequant4_coeff_table(h);
00904     if(h->pps.transform_8x8_mode)
00905         init_dequant8_coeff_table(h);
00906     if(h->sps.transform_bypass){
00907         for(i=0; i<6; i++)
00908             for(x=0; x<16; x++)
00909                 h->dequant4_coeff[i][0][x] = 1<<6;
00910         if(h->pps.transform_8x8_mode)
00911             for(i=0; i<6; i++)
00912                 for(x=0; x<64; x++)
00913                     h->dequant8_coeff[i][0][x] = 1<<6;
00914     }
00915 }
00916 
00917 
00918 int ff_h264_alloc_tables(H264Context *h){
00919     MpegEncContext * const s = &h->s;
00920     const int big_mb_num= s->mb_stride * (s->mb_height+1);
00921     const int row_mb_num= 2*s->mb_stride*s->avctx->thread_count;
00922     int x,y;
00923 
00924     FF_ALLOCZ_OR_GOTO(h->s.avctx, h->intra4x4_pred_mode, row_mb_num * 8  * sizeof(uint8_t), fail)
00925 
00926     FF_ALLOCZ_OR_GOTO(h->s.avctx, h->non_zero_count    , big_mb_num * 48 * sizeof(uint8_t), fail)
00927     FF_ALLOCZ_OR_GOTO(h->s.avctx, h->slice_table_base  , (big_mb_num+s->mb_stride) * sizeof(*h->slice_table_base), fail)
00928     FF_ALLOCZ_OR_GOTO(h->s.avctx, h->cbp_table, big_mb_num * sizeof(uint16_t), fail)
00929 
00930     FF_ALLOCZ_OR_GOTO(h->s.avctx, h->chroma_pred_mode_table, big_mb_num * sizeof(uint8_t), fail)
00931     FF_ALLOCZ_OR_GOTO(h->s.avctx, h->mvd_table[0], 16*row_mb_num * sizeof(uint8_t), fail);
00932     FF_ALLOCZ_OR_GOTO(h->s.avctx, h->mvd_table[1], 16*row_mb_num * sizeof(uint8_t), fail);
00933     FF_ALLOCZ_OR_GOTO(h->s.avctx, h->direct_table, 4*big_mb_num * sizeof(uint8_t) , fail);
00934     FF_ALLOCZ_OR_GOTO(h->s.avctx, h->list_counts, big_mb_num * sizeof(uint8_t), fail)
00935 
00936     memset(h->slice_table_base, -1, (big_mb_num+s->mb_stride)  * sizeof(*h->slice_table_base));
00937     h->slice_table= h->slice_table_base + s->mb_stride*2 + 1;
00938 
00939     FF_ALLOCZ_OR_GOTO(h->s.avctx, h->mb2b_xy  , big_mb_num * sizeof(uint32_t), fail);
00940     FF_ALLOCZ_OR_GOTO(h->s.avctx, h->mb2br_xy , big_mb_num * sizeof(uint32_t), fail);
00941     for(y=0; y<s->mb_height; y++){
00942         for(x=0; x<s->mb_width; x++){
00943             const int mb_xy= x + y*s->mb_stride;
00944             const int b_xy = 4*x + 4*y*h->b_stride;
00945 
00946             h->mb2b_xy [mb_xy]= b_xy;
00947             h->mb2br_xy[mb_xy]= 8*(FMO ? mb_xy : (mb_xy % (2*s->mb_stride)));
00948         }
00949     }
00950 
00951     s->obmc_scratchpad = NULL;
00952 
00953     if(!h->dequant4_coeff[0])
00954         init_dequant_tables(h);
00955 
00956     return 0;
00957 fail:
00958     free_tables(h, 1);
00959     return -1;
00960 }
00961 
00965 static void clone_tables(H264Context *dst, H264Context *src, int i){
00966     MpegEncContext * const s = &src->s;
00967     dst->intra4x4_pred_mode       = src->intra4x4_pred_mode + i*8*2*s->mb_stride;
00968     dst->non_zero_count           = src->non_zero_count;
00969     dst->slice_table              = src->slice_table;
00970     dst->cbp_table                = src->cbp_table;
00971     dst->mb2b_xy                  = src->mb2b_xy;
00972     dst->mb2br_xy                 = src->mb2br_xy;
00973     dst->chroma_pred_mode_table   = src->chroma_pred_mode_table;
00974     dst->mvd_table[0]             = src->mvd_table[0] + i*8*2*s->mb_stride;
00975     dst->mvd_table[1]             = src->mvd_table[1] + i*8*2*s->mb_stride;
00976     dst->direct_table             = src->direct_table;
00977     dst->list_counts              = src->list_counts;
00978 
00979     dst->s.obmc_scratchpad = NULL;
00980     ff_h264_pred_init(&dst->hpc, src->s.codec_id, src->sps.bit_depth_luma);
00981 }
00982 
00987 static int context_init(H264Context *h){
00988     FF_ALLOCZ_OR_GOTO(h->s.avctx, h->top_borders[0], h->s.mb_width * 16*3 * sizeof(uint8_t)*2, fail)
00989     FF_ALLOCZ_OR_GOTO(h->s.avctx, h->top_borders[1], h->s.mb_width * 16*3 * sizeof(uint8_t)*2, fail)
00990 
00991     h->ref_cache[0][scan8[5 ]+1] = h->ref_cache[0][scan8[7 ]+1] = h->ref_cache[0][scan8[13]+1] =
00992     h->ref_cache[1][scan8[5 ]+1] = h->ref_cache[1][scan8[7 ]+1] = h->ref_cache[1][scan8[13]+1] = PART_NOT_AVAILABLE;
00993 
00994     return 0;
00995 fail:
00996     return -1; // free_tables will clean up for us
00997 }
00998 
00999 static int decode_nal_units(H264Context *h, const uint8_t *buf, int buf_size);
01000 
01001 static av_cold void common_init(H264Context *h){
01002     MpegEncContext * const s = &h->s;
01003 
01004     s->width = s->avctx->width;
01005     s->height = s->avctx->height;
01006     s->codec_id= s->avctx->codec->id;
01007 
01008     s->avctx->bits_per_raw_sample = 8;
01009 
01010     ff_h264dsp_init(&h->h264dsp,
01011                     s->avctx->bits_per_raw_sample);
01012     ff_h264_pred_init(&h->hpc, s->codec_id,
01013                       s->avctx->bits_per_raw_sample);
01014 
01015     h->dequant_coeff_pps= -1;
01016     s->unrestricted_mv=1;
01017     s->decode=1; //FIXME
01018 
01019     dsputil_init(&s->dsp, s->avctx); // needed so that idct permutation is known early
01020 
01021     memset(h->pps.scaling_matrix4, 16, 6*16*sizeof(uint8_t));
01022     memset(h->pps.scaling_matrix8, 16, 2*64*sizeof(uint8_t));
01023 }
01024 
01025 int ff_h264_decode_extradata(H264Context *h, const uint8_t *buf, int size)
01026 {
01027     AVCodecContext *avctx = h->s.avctx;
01028 
01029     if(!buf || size <= 0)
01030         return -1;
01031 
01032     if(buf[0] == 1){
01033         int i, cnt, nalsize;
01034         const unsigned char *p = buf;
01035 
01036         h->is_avc = 1;
01037 
01038         if(size < 7) {
01039             av_log(avctx, AV_LOG_ERROR, "avcC too short\n");
01040             return -1;
01041         }
01042         /* sps and pps in the avcC always have length coded with 2 bytes,
01043            so put a fake nal_length_size = 2 while parsing them */
01044         h->nal_length_size = 2;
01045         // Decode sps from avcC
01046         cnt = *(p+5) & 0x1f; // Number of sps
01047         p += 6;
01048         for (i = 0; i < cnt; i++) {
01049             nalsize = AV_RB16(p) + 2;
01050             if(nalsize > size - (p-buf))
01051                 return -1;
01052             if(decode_nal_units(h, p, nalsize) < 0) {
01053                 av_log(avctx, AV_LOG_ERROR, "Decoding sps %d from avcC failed\n", i);
01054                 return -1;
01055             }
01056             p += nalsize;
01057         }
01058         // Decode pps from avcC
01059         cnt = *(p++); // Number of pps
01060         for (i = 0; i < cnt; i++) {
01061             nalsize = AV_RB16(p) + 2;
01062             if(nalsize > size - (p-buf))
01063                 return -1;
01064             if (decode_nal_units(h, p, nalsize) < 0) {
01065                 av_log(avctx, AV_LOG_ERROR, "Decoding pps %d from avcC failed\n", i);
01066                 return -1;
01067             }
01068             p += nalsize;
01069         }
01070         // Now store right nal length size, that will be use to parse all other nals
01071         h->nal_length_size = (buf[4] & 0x03) + 1;
01072     } else {
01073         h->is_avc = 0;
01074         if(decode_nal_units(h, buf, size) < 0)
01075             return -1;
01076     }
01077     return 0;
01078 }
01079 
01080 av_cold int ff_h264_decode_init(AVCodecContext *avctx){
01081     H264Context *h= avctx->priv_data;
01082     MpegEncContext * const s = &h->s;
01083 
01084     MPV_decode_defaults(s);
01085 
01086     s->avctx = avctx;
01087     common_init(h);
01088 
01089     s->out_format = FMT_H264;
01090     s->workaround_bugs= avctx->workaround_bugs;
01091 
01092     // set defaults
01093 //    s->decode_mb= ff_h263_decode_mb;
01094     s->quarter_sample = 1;
01095     if(!avctx->has_b_frames)
01096     s->low_delay= 1;
01097 
01098     avctx->chroma_sample_location = AVCHROMA_LOC_LEFT;
01099 
01100     ff_h264_decode_init_vlc();
01101 
01102     h->pixel_shift = 0;
01103     h->sps.bit_depth_luma = avctx->bits_per_raw_sample = 8;
01104 
01105     h->thread_context[0] = h;
01106     h->outputed_poc = h->next_outputed_poc = INT_MIN;
01107     h->prev_poc_msb= 1<<16;
01108     h->x264_build = -1;
01109     ff_h264_reset_sei(h);
01110     if(avctx->codec_id == CODEC_ID_H264){
01111         if(avctx->ticks_per_frame == 1){
01112             s->avctx->time_base.den *=2;
01113         }
01114         avctx->ticks_per_frame = 2;
01115     }
01116 
01117     if(avctx->extradata_size > 0 && avctx->extradata &&
01118         ff_h264_decode_extradata(h, avctx->extradata, avctx->extradata_size))
01119         return -1;
01120 
01121     if(h->sps.bitstream_restriction_flag && s->avctx->has_b_frames < h->sps.num_reorder_frames){
01122         s->avctx->has_b_frames = h->sps.num_reorder_frames;
01123         s->low_delay = 0;
01124     }
01125 
01126     return 0;
01127 }
01128 
01129 #define IN_RANGE(a, b, size) (((a) >= (b)) && ((a) < ((b)+(size))))
01130 static void copy_picture_range(Picture **to, Picture **from, int count, MpegEncContext *new_base, MpegEncContext *old_base)
01131 {
01132     int i;
01133 
01134     for (i=0; i<count; i++){
01135         assert((IN_RANGE(from[i], old_base, sizeof(*old_base)) ||
01136                 IN_RANGE(from[i], old_base->picture, sizeof(Picture) * old_base->picture_count) ||
01137                 !from[i]));
01138         to[i] = REBASE_PICTURE(from[i], new_base, old_base);
01139     }
01140 }
01141 
01142 static void copy_parameter_set(void **to, void **from, int count, int size)
01143 {
01144     int i;
01145 
01146     for (i=0; i<count; i++){
01147         if (to[i] && !from[i]) av_freep(&to[i]);
01148         else if (from[i] && !to[i]) to[i] = av_malloc(size);
01149 
01150         if (from[i]) memcpy(to[i], from[i], size);
01151     }
01152 }
01153 
01154 static int decode_init_thread_copy(AVCodecContext *avctx){
01155     H264Context *h= avctx->priv_data;
01156 
01157     if (!avctx->is_copy) return 0;
01158     memset(h->sps_buffers, 0, sizeof(h->sps_buffers));
01159     memset(h->pps_buffers, 0, sizeof(h->pps_buffers));
01160 
01161     return 0;
01162 }
01163 
01164 #define copy_fields(to, from, start_field, end_field) memcpy(&to->start_field, &from->start_field, (char*)&to->end_field - (char*)&to->start_field)
01165 static int decode_update_thread_context(AVCodecContext *dst, const AVCodecContext *src){
01166     H264Context *h= dst->priv_data, *h1= src->priv_data;
01167     MpegEncContext * const s = &h->s, * const s1 = &h1->s;
01168     int inited = s->context_initialized, err;
01169     int i;
01170 
01171     if(dst == src || !s1->context_initialized) return 0;
01172 
01173     err = ff_mpeg_update_thread_context(dst, src);
01174     if(err) return err;
01175 
01176     //FIXME handle width/height changing
01177     if(!inited){
01178         for(i = 0; i < MAX_SPS_COUNT; i++)
01179             av_freep(h->sps_buffers + i);
01180 
01181         for(i = 0; i < MAX_PPS_COUNT; i++)
01182             av_freep(h->pps_buffers + i);
01183 
01184         memcpy(&h->s + 1, &h1->s + 1, sizeof(H264Context) - sizeof(MpegEncContext)); //copy all fields after MpegEnc
01185         memset(h->sps_buffers, 0, sizeof(h->sps_buffers));
01186         memset(h->pps_buffers, 0, sizeof(h->pps_buffers));
01187         if (ff_h264_alloc_tables(h) < 0) {
01188             av_log(dst, AV_LOG_ERROR, "Could not allocate memory for h264\n");
01189             return AVERROR(ENOMEM);
01190         }
01191         context_init(h);
01192 
01193         for(i=0; i<2; i++){
01194             h->rbsp_buffer[i] = NULL;
01195             h->rbsp_buffer_size[i] = 0;
01196         }
01197 
01198         h->thread_context[0] = h;
01199 
01200         // frame_start may not be called for the next thread (if it's decoding a bottom field)
01201         // so this has to be allocated here
01202         h->s.obmc_scratchpad = av_malloc(16*6*s->linesize);
01203 
01204         s->dsp.clear_blocks(h->mb);
01205         s->dsp.clear_blocks(h->mb+(24*16<<h->pixel_shift));
01206     }
01207 
01208     //extradata/NAL handling
01209     h->is_avc          = h1->is_avc;
01210 
01211     //SPS/PPS
01212     copy_parameter_set((void**)h->sps_buffers, (void**)h1->sps_buffers, MAX_SPS_COUNT, sizeof(SPS));
01213     h->sps             = h1->sps;
01214     copy_parameter_set((void**)h->pps_buffers, (void**)h1->pps_buffers, MAX_PPS_COUNT, sizeof(PPS));
01215     h->pps             = h1->pps;
01216 
01217     //Dequantization matrices
01218     //FIXME these are big - can they be only copied when PPS changes?
01219     copy_fields(h, h1, dequant4_buffer, dequant4_coeff);
01220 
01221     for(i=0; i<6; i++)
01222         h->dequant4_coeff[i] = h->dequant4_buffer[0] + (h1->dequant4_coeff[i] - h1->dequant4_buffer[0]);
01223 
01224     for(i=0; i<6; i++)
01225         h->dequant8_coeff[i] = h->dequant8_buffer[0] + (h1->dequant8_coeff[i] - h1->dequant8_buffer[0]);
01226 
01227     h->dequant_coeff_pps = h1->dequant_coeff_pps;
01228 
01229     //POC timing
01230     copy_fields(h, h1, poc_lsb, redundant_pic_count);
01231 
01232     //reference lists
01233     copy_fields(h, h1, ref_count, list_count);
01234     copy_fields(h, h1, ref_list,  intra_gb);
01235     copy_fields(h, h1, short_ref, cabac_init_idc);
01236 
01237     copy_picture_range(h->short_ref,   h1->short_ref,   32, s, s1);
01238     copy_picture_range(h->long_ref,    h1->long_ref,    32, s, s1);
01239     copy_picture_range(h->delayed_pic, h1->delayed_pic, MAX_DELAYED_PIC_COUNT+2, s, s1);
01240 
01241     h->last_slice_type = h1->last_slice_type;
01242 
01243     if(!s->current_picture_ptr) return 0;
01244 
01245     if(!s->dropable) {
01246         ff_h264_execute_ref_pic_marking(h, h->mmco, h->mmco_index);
01247         h->prev_poc_msb     = h->poc_msb;
01248         h->prev_poc_lsb     = h->poc_lsb;
01249     }
01250     h->prev_frame_num_offset= h->frame_num_offset;
01251     h->prev_frame_num       = h->frame_num;
01252     h->outputed_poc         = h->next_outputed_poc;
01253 
01254     return 0;
01255 }
01256 
01257 int ff_h264_frame_start(H264Context *h){
01258     MpegEncContext * const s = &h->s;
01259     int i;
01260     const int pixel_shift = h->pixel_shift;
01261     int thread_count = (s->avctx->active_thread_type & FF_THREAD_SLICE) ? s->avctx->thread_count : 1;
01262 
01263     if(MPV_frame_start(s, s->avctx) < 0)
01264         return -1;
01265     ff_er_frame_start(s);
01266     /*
01267      * MPV_frame_start uses pict_type to derive key_frame.
01268      * This is incorrect for H.264; IDR markings must be used.
01269      * Zero here; IDR markings per slice in frame or fields are ORed in later.
01270      * See decode_nal_units().
01271      */
01272     s->current_picture_ptr->key_frame= 0;
01273     s->current_picture_ptr->mmco_reset= 0;
01274 
01275     assert(s->linesize && s->uvlinesize);
01276 
01277     for(i=0; i<16; i++){
01278         h->block_offset[i]= (4*((scan8[i] - scan8[0])&7) << pixel_shift) + 4*s->linesize*((scan8[i] - scan8[0])>>3);
01279         h->block_offset[48+i]= (4*((scan8[i] - scan8[0])&7) << pixel_shift) + 8*s->linesize*((scan8[i] - scan8[0])>>3);
01280     }
01281     for(i=0; i<16; i++){
01282         h->block_offset[16+i]=
01283         h->block_offset[32+i]= (4*((scan8[i] - scan8[0])&7) << pixel_shift) + 4*s->uvlinesize*((scan8[i] - scan8[0])>>3);
01284         h->block_offset[48+16+i]=
01285         h->block_offset[48+32+i]= (4*((scan8[i] - scan8[0])&7) << pixel_shift) + 8*s->uvlinesize*((scan8[i] - scan8[0])>>3);
01286     }
01287 
01288     /* can't be in alloc_tables because linesize isn't known there.
01289      * FIXME: redo bipred weight to not require extra buffer? */
01290     for(i = 0; i < thread_count; i++)
01291         if(h->thread_context[i] && !h->thread_context[i]->s.obmc_scratchpad)
01292             h->thread_context[i]->s.obmc_scratchpad = av_malloc(16*6*s->linesize);
01293 
01294     /* some macroblocks can be accessed before they're available in case of lost slices, mbaff or threading*/
01295     memset(h->slice_table, -1, (s->mb_height*s->mb_stride-1) * sizeof(*h->slice_table));
01296 
01297 //    s->decode= (s->flags&CODEC_FLAG_PSNR) || !s->encoding || s->current_picture.reference /*|| h->contains_intra*/ || 1;
01298 
01299     // We mark the current picture as non-reference after allocating it, so
01300     // that if we break out due to an error it can be released automatically
01301     // in the next MPV_frame_start().
01302     // SVQ3 as well as most other codecs have only last/next/current and thus
01303     // get released even with set reference, besides SVQ3 and others do not
01304     // mark frames as reference later "naturally".
01305     if(s->codec_id != CODEC_ID_SVQ3)
01306         s->current_picture_ptr->reference= 0;
01307 
01308     s->current_picture_ptr->field_poc[0]=
01309     s->current_picture_ptr->field_poc[1]= INT_MAX;
01310 
01311     h->next_output_pic = NULL;
01312 
01313     assert(s->current_picture_ptr->long_ref==0);
01314 
01315     return 0;
01316 }
01317 
01326 static void decode_postinit(H264Context *h, int setup_finished){
01327     MpegEncContext * const s = &h->s;
01328     Picture *out = s->current_picture_ptr;
01329     Picture *cur = s->current_picture_ptr;
01330     int i, pics, out_of_order, out_idx;
01331 
01332     s->current_picture_ptr->qscale_type= FF_QSCALE_TYPE_H264;
01333     s->current_picture_ptr->pict_type= s->pict_type;
01334 
01335     if (h->next_output_pic) return;
01336 
01337     if (cur->field_poc[0]==INT_MAX || cur->field_poc[1]==INT_MAX) {
01338         //FIXME: if we have two PAFF fields in one packet, we can't start the next thread here.
01339         //If we have one field per packet, we can. The check in decode_nal_units() is not good enough
01340         //to find this yet, so we assume the worst for now.
01341         //if (setup_finished)
01342         //    ff_thread_finish_setup(s->avctx);
01343         return;
01344     }
01345 
01346     cur->interlaced_frame = 0;
01347     cur->repeat_pict = 0;
01348 
01349     /* Signal interlacing information externally. */
01350     /* Prioritize picture timing SEI information over used decoding process if it exists. */
01351 
01352     if(h->sps.pic_struct_present_flag){
01353         switch (h->sei_pic_struct)
01354         {
01355         case SEI_PIC_STRUCT_FRAME:
01356             break;
01357         case SEI_PIC_STRUCT_TOP_FIELD:
01358         case SEI_PIC_STRUCT_BOTTOM_FIELD:
01359             cur->interlaced_frame = 1;
01360             break;
01361         case SEI_PIC_STRUCT_TOP_BOTTOM:
01362         case SEI_PIC_STRUCT_BOTTOM_TOP:
01363             if (FIELD_OR_MBAFF_PICTURE)
01364                 cur->interlaced_frame = 1;
01365             else
01366                 // try to flag soft telecine progressive
01367                 cur->interlaced_frame = h->prev_interlaced_frame;
01368             break;
01369         case SEI_PIC_STRUCT_TOP_BOTTOM_TOP:
01370         case SEI_PIC_STRUCT_BOTTOM_TOP_BOTTOM:
01371             // Signal the possibility of telecined film externally (pic_struct 5,6)
01372             // From these hints, let the applications decide if they apply deinterlacing.
01373             cur->repeat_pict = 1;
01374             break;
01375         case SEI_PIC_STRUCT_FRAME_DOUBLING:
01376             // Force progressive here, as doubling interlaced frame is a bad idea.
01377             cur->repeat_pict = 2;
01378             break;
01379         case SEI_PIC_STRUCT_FRAME_TRIPLING:
01380             cur->repeat_pict = 4;
01381             break;
01382         }
01383 
01384         if ((h->sei_ct_type & 3) && h->sei_pic_struct <= SEI_PIC_STRUCT_BOTTOM_TOP)
01385             cur->interlaced_frame = (h->sei_ct_type & (1<<1)) != 0;
01386     }else{
01387         /* Derive interlacing flag from used decoding process. */
01388         cur->interlaced_frame = FIELD_OR_MBAFF_PICTURE;
01389     }
01390     h->prev_interlaced_frame = cur->interlaced_frame;
01391 
01392     if (cur->field_poc[0] != cur->field_poc[1]){
01393         /* Derive top_field_first from field pocs. */
01394         cur->top_field_first = cur->field_poc[0] < cur->field_poc[1];
01395     }else{
01396         if(cur->interlaced_frame || h->sps.pic_struct_present_flag){
01397             /* Use picture timing SEI information. Even if it is a information of a past frame, better than nothing. */
01398             if(h->sei_pic_struct == SEI_PIC_STRUCT_TOP_BOTTOM
01399               || h->sei_pic_struct == SEI_PIC_STRUCT_TOP_BOTTOM_TOP)
01400                 cur->top_field_first = 1;
01401             else
01402                 cur->top_field_first = 0;
01403         }else{
01404             /* Most likely progressive */
01405             cur->top_field_first = 0;
01406         }
01407     }
01408 
01409     //FIXME do something with unavailable reference frames
01410 
01411     /* Sort B-frames into display order */
01412 
01413     if(h->sps.bitstream_restriction_flag
01414        && s->avctx->has_b_frames < h->sps.num_reorder_frames){
01415         s->avctx->has_b_frames = h->sps.num_reorder_frames;
01416         s->low_delay = 0;
01417     }
01418 
01419     if(   s->avctx->strict_std_compliance >= FF_COMPLIANCE_STRICT
01420        && !h->sps.bitstream_restriction_flag){
01421         s->avctx->has_b_frames= MAX_DELAYED_PIC_COUNT;
01422         s->low_delay= 0;
01423     }
01424 
01425     pics = 0;
01426     while(h->delayed_pic[pics]) pics++;
01427 
01428     av_assert0(pics <= MAX_DELAYED_PIC_COUNT);
01429 
01430     h->delayed_pic[pics++] = cur;
01431     if(cur->reference == 0)
01432         cur->reference = DELAYED_PIC_REF;
01433 
01434     out = h->delayed_pic[0];
01435     out_idx = 0;
01436     for(i=1; h->delayed_pic[i] && !h->delayed_pic[i]->key_frame && !h->delayed_pic[i]->mmco_reset; i++)
01437         if(h->delayed_pic[i]->poc < out->poc){
01438             out = h->delayed_pic[i];
01439             out_idx = i;
01440         }
01441     if(s->avctx->has_b_frames == 0 && (h->delayed_pic[0]->key_frame || h->delayed_pic[0]->mmco_reset))
01442         h->next_outputed_poc= INT_MIN;
01443     out_of_order = out->poc < h->next_outputed_poc;
01444 
01445     if(h->sps.bitstream_restriction_flag && s->avctx->has_b_frames >= h->sps.num_reorder_frames)
01446         { }
01447     else if((out_of_order && pics-1 == s->avctx->has_b_frames && s->avctx->has_b_frames < MAX_DELAYED_PIC_COUNT)
01448        || (s->low_delay &&
01449         ((h->next_outputed_poc != INT_MIN && out->poc > h->next_outputed_poc + 2)
01450          || cur->pict_type == AV_PICTURE_TYPE_B)))
01451     {
01452         s->low_delay = 0;
01453         s->avctx->has_b_frames++;
01454     }
01455 
01456     if(out_of_order || pics > s->avctx->has_b_frames){
01457         out->reference &= ~DELAYED_PIC_REF;
01458         out->owner2 = s; // for frame threading, the owner must be the second field's thread
01459                          // or else the first thread can release the picture and reuse it unsafely
01460         for(i=out_idx; h->delayed_pic[i]; i++)
01461             h->delayed_pic[i] = h->delayed_pic[i+1];
01462     }
01463     if(!out_of_order && pics > s->avctx->has_b_frames){
01464         h->next_output_pic = out;
01465         if(out_idx==0 && h->delayed_pic[0] && (h->delayed_pic[0]->key_frame || h->delayed_pic[0]->mmco_reset)) {
01466             h->next_outputed_poc = INT_MIN;
01467         } else
01468             h->next_outputed_poc = out->poc;
01469     }else{
01470         av_log(s->avctx, AV_LOG_DEBUG, "no picture\n");
01471     }
01472 
01473     if (setup_finished)
01474         ff_thread_finish_setup(s->avctx);
01475 }
01476 
01477 static inline void backup_mb_border(H264Context *h, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr, int linesize, int uvlinesize, int chroma444, int simple){
01478     MpegEncContext * const s = &h->s;
01479     uint8_t *top_border;
01480     int top_idx = 1;
01481     const int pixel_shift = h->pixel_shift;
01482 
01483     src_y  -=   linesize;
01484     src_cb -= uvlinesize;
01485     src_cr -= uvlinesize;
01486 
01487     if(!simple && FRAME_MBAFF){
01488         if(s->mb_y&1){
01489             if(!MB_MBAFF){
01490                 top_border = h->top_borders[0][s->mb_x];
01491                 AV_COPY128(top_border, src_y + 15*linesize);
01492                 if (pixel_shift)
01493                     AV_COPY128(top_border+16, src_y+15*linesize+16);
01494                 if(simple || !CONFIG_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
01495                     if(chroma444){
01496                         if (pixel_shift){
01497                             AV_COPY128(top_border+32, src_cb + 15*uvlinesize);
01498                             AV_COPY128(top_border+48, src_cb + 15*uvlinesize+16);
01499                             AV_COPY128(top_border+64, src_cr + 15*uvlinesize);
01500                             AV_COPY128(top_border+80, src_cr + 15*uvlinesize+16);
01501                         } else {
01502                             AV_COPY128(top_border+16, src_cb + 15*uvlinesize);
01503                             AV_COPY128(top_border+32, src_cr + 15*uvlinesize);
01504                         }
01505                     } else {
01506                         if (pixel_shift) {
01507                             AV_COPY128(top_border+32, src_cb+7*uvlinesize);
01508                             AV_COPY128(top_border+48, src_cr+7*uvlinesize);
01509                         } else {
01510                             AV_COPY64(top_border+16, src_cb+7*uvlinesize);
01511                             AV_COPY64(top_border+24, src_cr+7*uvlinesize);
01512                         }
01513                     }
01514                 }
01515             }
01516         }else if(MB_MBAFF){
01517             top_idx = 0;
01518         }else
01519             return;
01520     }
01521 
01522     top_border = h->top_borders[top_idx][s->mb_x];
01523     // There are two lines saved, the line above the the top macroblock of a pair,
01524     // and the line above the bottom macroblock
01525     AV_COPY128(top_border, src_y + 16*linesize);
01526     if (pixel_shift)
01527         AV_COPY128(top_border+16, src_y+16*linesize+16);
01528 
01529     if(simple || !CONFIG_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
01530         if(chroma444){
01531             if (pixel_shift){
01532                 AV_COPY128(top_border+32, src_cb + 16*linesize);
01533                 AV_COPY128(top_border+48, src_cb + 16*linesize+16);
01534                 AV_COPY128(top_border+64, src_cr + 16*linesize);
01535                 AV_COPY128(top_border+80, src_cr + 16*linesize+16);
01536             } else {
01537                 AV_COPY128(top_border+16, src_cb + 16*linesize);
01538                 AV_COPY128(top_border+32, src_cr + 16*linesize);
01539             }
01540         } else {
01541             if (pixel_shift) {
01542                 AV_COPY128(top_border+32, src_cb+8*uvlinesize);
01543                 AV_COPY128(top_border+48, src_cr+8*uvlinesize);
01544             } else {
01545                 AV_COPY64(top_border+16, src_cb+8*uvlinesize);
01546                 AV_COPY64(top_border+24, src_cr+8*uvlinesize);
01547             }
01548         }
01549     }
01550 }
01551 
01552 static inline void xchg_mb_border(H264Context *h, uint8_t *src_y,
01553                                   uint8_t *src_cb, uint8_t *src_cr,
01554                                   int linesize, int uvlinesize,
01555                                   int xchg, int chroma444,
01556                                   int simple, int pixel_shift){
01557     MpegEncContext * const s = &h->s;
01558     int deblock_topleft;
01559     int deblock_top;
01560     int top_idx = 1;
01561     uint8_t *top_border_m1;
01562     uint8_t *top_border;
01563 
01564     if(!simple && FRAME_MBAFF){
01565         if(s->mb_y&1){
01566             if(!MB_MBAFF)
01567                 return;
01568         }else{
01569             top_idx = MB_MBAFF ? 0 : 1;
01570         }
01571     }
01572 
01573     if(h->deblocking_filter == 2) {
01574         deblock_topleft = h->slice_table[h->mb_xy - 1 - s->mb_stride] == h->slice_num;
01575         deblock_top     = h->top_type;
01576     } else {
01577         deblock_topleft = (s->mb_x > 0);
01578         deblock_top     = (s->mb_y > !!MB_FIELD);
01579     }
01580 
01581     src_y  -=   linesize + 1 + pixel_shift;
01582     src_cb -= uvlinesize + 1 + pixel_shift;
01583     src_cr -= uvlinesize + 1 + pixel_shift;
01584 
01585     top_border_m1 = h->top_borders[top_idx][s->mb_x-1];
01586     top_border    = h->top_borders[top_idx][s->mb_x];
01587 
01588 #define XCHG(a,b,xchg)\
01589     if (pixel_shift) {\
01590         if (xchg) {\
01591             AV_SWAP64(b+0,a+0);\
01592             AV_SWAP64(b+8,a+8);\
01593         } else {\
01594             AV_COPY128(b,a); \
01595         }\
01596     } else \
01597 if (xchg) AV_SWAP64(b,a);\
01598 else      AV_COPY64(b,a);
01599 
01600     if(deblock_top){
01601         if(deblock_topleft){
01602             XCHG(top_border_m1 + (8 << pixel_shift), src_y - (7 << pixel_shift), 1);
01603         }
01604         XCHG(top_border + (0 << pixel_shift), src_y + (1 << pixel_shift), xchg);
01605         XCHG(top_border + (8 << pixel_shift), src_y + (9 << pixel_shift), 1);
01606         if(s->mb_x+1 < s->mb_width){
01607             XCHG(h->top_borders[top_idx][s->mb_x+1], src_y + (17 << pixel_shift), 1);
01608         }
01609     }
01610     if(simple || !CONFIG_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
01611         if(chroma444){
01612             if(deblock_topleft){
01613                 XCHG(top_border_m1 + (24 << pixel_shift), src_cb - (7 << pixel_shift), 1);
01614                 XCHG(top_border_m1 + (40 << pixel_shift), src_cr - (7 << pixel_shift), 1);
01615             }
01616             XCHG(top_border + (16 << pixel_shift), src_cb + (1 << pixel_shift), xchg);
01617             XCHG(top_border + (24 << pixel_shift), src_cb + (9 << pixel_shift), 1);
01618             XCHG(top_border + (32 << pixel_shift), src_cr + (1 << pixel_shift), xchg);
01619             XCHG(top_border + (40 << pixel_shift), src_cr + (9 << pixel_shift), 1);
01620             if(s->mb_x+1 < s->mb_width){
01621                 XCHG(h->top_borders[top_idx][s->mb_x+1] + (16 << pixel_shift), src_cb + (17 << pixel_shift), 1);
01622                 XCHG(h->top_borders[top_idx][s->mb_x+1] + (32 << pixel_shift), src_cr + (17 << pixel_shift), 1);
01623             }
01624         } else {
01625             if(deblock_top){
01626                 if(deblock_topleft){
01627                     XCHG(top_border_m1 + (16 << pixel_shift), src_cb - (7 << pixel_shift), 1);
01628                     XCHG(top_border_m1 + (24 << pixel_shift), src_cr - (7 << pixel_shift), 1);
01629                 }
01630                 XCHG(top_border + (16 << pixel_shift), src_cb+1+pixel_shift, 1);
01631                 XCHG(top_border + (24 << pixel_shift), src_cr+1+pixel_shift, 1);
01632             }
01633         }
01634     }
01635 }
01636 
01637 static av_always_inline int dctcoef_get(DCTELEM *mb, int high_bit_depth, int index) {
01638     if (high_bit_depth) {
01639         return AV_RN32A(((int32_t*)mb) + index);
01640     } else
01641         return AV_RN16A(mb + index);
01642 }
01643 
01644 static av_always_inline void dctcoef_set(DCTELEM *mb, int high_bit_depth, int index, int value) {
01645     if (high_bit_depth) {
01646         AV_WN32A(((int32_t*)mb) + index, value);
01647     } else
01648         AV_WN16A(mb + index, value);
01649 }
01650 
01651 static av_always_inline void hl_decode_mb_predict_luma(H264Context *h, int mb_type, int is_h264, int simple, int transform_bypass,
01652                                                        int pixel_shift, int *block_offset, int linesize, uint8_t *dest_y, int p)
01653 {
01654     MpegEncContext * const s = &h->s;
01655     void (*idct_add)(uint8_t *dst, DCTELEM *block, int stride);
01656     void (*idct_dc_add)(uint8_t *dst, DCTELEM *block, int stride);
01657     int i;
01658     int qscale = p == 0 ? s->qscale : h->chroma_qp[p-1];
01659     block_offset += 16*p;
01660     if(IS_INTRA4x4(mb_type)){
01661         if(simple || !s->encoding){
01662             if(IS_8x8DCT(mb_type)){
01663                 if(transform_bypass){
01664                     idct_dc_add =
01665                     idct_add    = s->dsp.add_pixels8;
01666                 }else{
01667                     idct_dc_add = h->h264dsp.h264_idct8_dc_add;
01668                     idct_add    = h->h264dsp.h264_idct8_add;
01669                 }
01670                 for(i=0; i<16; i+=4){
01671                     uint8_t * const ptr= dest_y + block_offset[i];
01672                     const int dir= h->intra4x4_pred_mode_cache[ scan8[i] ];
01673                     if(transform_bypass && h->sps.profile_idc==244 && dir<=1){
01674                         h->hpc.pred8x8l_add[dir](ptr, h->mb + (i*16+p*256 << pixel_shift), linesize);
01675                     }else{
01676                         const int nnz = h->non_zero_count_cache[ scan8[i+p*16] ];
01677                         h->hpc.pred8x8l[ dir ](ptr, (h->topleft_samples_available<<i)&0x8000,
01678                                                     (h->topright_samples_available<<i)&0x4000, linesize);
01679                         if(nnz){
01680                             if(nnz == 1 && dctcoef_get(h->mb, pixel_shift, i*16+p*256))
01681                                 idct_dc_add(ptr, h->mb + (i*16+p*256 << pixel_shift), linesize);
01682                             else
01683                                 idct_add   (ptr, h->mb + (i*16+p*256 << pixel_shift), linesize);
01684                         }
01685                     }
01686                 }
01687             }else{
01688                 if(transform_bypass){
01689                     idct_dc_add =
01690                     idct_add    = s->dsp.add_pixels4;
01691                 }else{
01692                     idct_dc_add = h->h264dsp.h264_idct_dc_add;
01693                     idct_add    = h->h264dsp.h264_idct_add;
01694                 }
01695                 for(i=0; i<16; i++){
01696                     uint8_t * const ptr= dest_y + block_offset[i];
01697                     const int dir= h->intra4x4_pred_mode_cache[ scan8[i] ];
01698 
01699                     if(transform_bypass && h->sps.profile_idc==244 && dir<=1){
01700                         h->hpc.pred4x4_add[dir](ptr, h->mb + (i*16+p*256 << pixel_shift), linesize);
01701                     }else{
01702                         uint8_t *topright;
01703                         int nnz, tr;
01704                         uint64_t tr_high;
01705                         if(dir == DIAG_DOWN_LEFT_PRED || dir == VERT_LEFT_PRED){
01706                             const int topright_avail= (h->topright_samples_available<<i)&0x8000;
01707                             assert(s->mb_y || linesize <= block_offset[i]);
01708                             if(!topright_avail){
01709                                 if (pixel_shift) {
01710                                     tr_high= ((uint16_t*)ptr)[3 - linesize/2]*0x0001000100010001ULL;
01711                                     topright= (uint8_t*) &tr_high;
01712                                 } else {
01713                                     tr= ptr[3 - linesize]*0x01010101;
01714                                     topright= (uint8_t*) &tr;
01715                                 }
01716                             }else
01717                                 topright= ptr + (4 << pixel_shift) - linesize;
01718                         }else
01719                             topright= NULL;
01720 
01721                         h->hpc.pred4x4[ dir ](ptr, topright, linesize);
01722                         nnz = h->non_zero_count_cache[ scan8[i+p*16] ];
01723                         if(nnz){
01724                             if(is_h264){
01725                                 if(nnz == 1 && dctcoef_get(h->mb, pixel_shift, i*16+p*256))
01726                                     idct_dc_add(ptr, h->mb + (i*16+p*256 << pixel_shift), linesize);
01727                                 else
01728                                     idct_add   (ptr, h->mb + (i*16+p*256 << pixel_shift), linesize);
01729                             }else
01730                                 ff_svq3_add_idct_c(ptr, h->mb + i*16+p*256, linesize, qscale, 0);
01731                         }
01732                     }
01733                 }
01734             }
01735         }
01736     }else{
01737         h->hpc.pred16x16[ h->intra16x16_pred_mode ](dest_y , linesize);
01738         if(is_h264){
01739             if(h->non_zero_count_cache[ scan8[LUMA_DC_BLOCK_INDEX+p] ]){
01740                 if(!transform_bypass)
01741                     h->h264dsp.h264_luma_dc_dequant_idct(h->mb+(p*256 << pixel_shift), h->mb_luma_dc[p], h->dequant4_coeff[p][qscale][0]);
01742                 else{
01743                     static const uint8_t dc_mapping[16] = { 0*16, 1*16, 4*16, 5*16, 2*16, 3*16, 6*16, 7*16,
01744                                                             8*16, 9*16,12*16,13*16,10*16,11*16,14*16,15*16};
01745                     for(i = 0; i < 16; i++)
01746                         dctcoef_set(h->mb+p*256, pixel_shift, dc_mapping[i], dctcoef_get(h->mb_luma_dc[p], pixel_shift, i));
01747                 }
01748             }
01749         }else
01750             ff_svq3_luma_dc_dequant_idct_c(h->mb+p*256, h->mb_luma_dc[p], qscale);
01751     }
01752 }
01753 
01754 static av_always_inline void hl_decode_mb_idct_luma(H264Context *h, int mb_type, int is_h264, int simple, int transform_bypass,
01755                                                     int pixel_shift, int *block_offset, int linesize, uint8_t *dest_y, int p)
01756 {
01757     MpegEncContext * const s = &h->s;
01758     void (*idct_add)(uint8_t *dst, DCTELEM *block, int stride);
01759     int i;
01760     block_offset += 16*p;
01761     if(!IS_INTRA4x4(mb_type)){
01762         if(is_h264){
01763             if(IS_INTRA16x16(mb_type)){
01764                 if(transform_bypass){
01765                     if(h->sps.profile_idc==244 && (h->intra16x16_pred_mode==VERT_PRED8x8 || h->intra16x16_pred_mode==HOR_PRED8x8)){
01766                         h->hpc.pred16x16_add[h->intra16x16_pred_mode](dest_y, block_offset, h->mb + (p*256 << pixel_shift), linesize);
01767                     }else{
01768                         for(i=0; i<16; i++){
01769                             if(h->non_zero_count_cache[ scan8[i+p*16] ] || dctcoef_get(h->mb, pixel_shift, i*16+p*256))
01770                                 s->dsp.add_pixels4(dest_y + block_offset[i], h->mb + (i*16+p*256 << pixel_shift), linesize);
01771                         }
01772                     }
01773                 }else{
01774                     h->h264dsp.h264_idct_add16intra(dest_y, block_offset, h->mb + (p*256 << pixel_shift), linesize, h->non_zero_count_cache+p*5*8);
01775                 }
01776             }else if(h->cbp&15){
01777                 if(transform_bypass){
01778                     const int di = IS_8x8DCT(mb_type) ? 4 : 1;
01779                     idct_add= IS_8x8DCT(mb_type) ? s->dsp.add_pixels8 : s->dsp.add_pixels4;
01780                     for(i=0; i<16; i+=di){
01781                         if(h->non_zero_count_cache[ scan8[i+p*16] ]){
01782                             idct_add(dest_y + block_offset[i], h->mb + (i*16+p*256 << pixel_shift), linesize);
01783                         }
01784                     }
01785                 }else{
01786                     if(IS_8x8DCT(mb_type)){
01787                         h->h264dsp.h264_idct8_add4(dest_y, block_offset, h->mb + (p*256 << pixel_shift), linesize, h->non_zero_count_cache+p*5*8);
01788                     }else{
01789                         h->h264dsp.h264_idct_add16(dest_y, block_offset, h->mb + (p*256 << pixel_shift), linesize, h->non_zero_count_cache+p*5*8);
01790                     }
01791                 }
01792             }
01793         }else{
01794             for(i=0; i<16; i++){
01795                 if(h->non_zero_count_cache[ scan8[i+p*16] ] || h->mb[i*16+p*256]){ //FIXME benchmark weird rule, & below
01796                     uint8_t * const ptr= dest_y + block_offset[i];
01797                     ff_svq3_add_idct_c(ptr, h->mb + i*16 + p*256, linesize, s->qscale, IS_INTRA(mb_type) ? 1 : 0);
01798                 }
01799             }
01800         }
01801     }
01802 }
01803 
01804 static av_always_inline void hl_decode_mb_internal(H264Context *h, int simple, int pixel_shift){
01805     MpegEncContext * const s = &h->s;
01806     const int mb_x= s->mb_x;
01807     const int mb_y= s->mb_y;
01808     const int mb_xy= h->mb_xy;
01809     const int mb_type= s->current_picture.mb_type[mb_xy];
01810     uint8_t  *dest_y, *dest_cb, *dest_cr;
01811     int linesize, uvlinesize /*dct_offset*/;
01812     int i, j;
01813     int *block_offset = &h->block_offset[0];
01814     const int transform_bypass = !simple && (s->qscale == 0 && h->sps.transform_bypass);
01815     /* is_h264 should always be true if SVQ3 is disabled. */
01816     const int is_h264 = !CONFIG_SVQ3_DECODER || simple || s->codec_id == CODEC_ID_H264;
01817     void (*idct_add)(uint8_t *dst, DCTELEM *block, int stride);
01818 
01819     dest_y  = s->current_picture.data[0] + ((mb_x << pixel_shift) + mb_y * s->linesize  ) * 16;
01820     dest_cb = s->current_picture.data[1] + ((mb_x << pixel_shift) + mb_y * s->uvlinesize) * 8;
01821     dest_cr = s->current_picture.data[2] + ((mb_x << pixel_shift) + mb_y * s->uvlinesize) * 8;
01822 
01823     s->dsp.prefetch(dest_y + (s->mb_x&3)*4*s->linesize + (64 << pixel_shift), s->linesize, 4);
01824     s->dsp.prefetch(dest_cb + (s->mb_x&7)*s->uvlinesize + (64 << pixel_shift), dest_cr - dest_cb, 2);
01825 
01826     h->list_counts[mb_xy]= h->list_count;
01827 
01828     if (!simple && MB_FIELD) {
01829         linesize   = h->mb_linesize   = s->linesize * 2;
01830         uvlinesize = h->mb_uvlinesize = s->uvlinesize * 2;
01831         block_offset = &h->block_offset[48];
01832         if(mb_y&1){ //FIXME move out of this function?
01833             dest_y -= s->linesize*15;
01834             dest_cb-= s->uvlinesize*7;
01835             dest_cr-= s->uvlinesize*7;
01836         }
01837         if(FRAME_MBAFF) {
01838             int list;
01839             for(list=0; list<h->list_count; list++){
01840                 if(!USES_LIST(mb_type, list))
01841                     continue;
01842                 if(IS_16X16(mb_type)){
01843                     int8_t *ref = &h->ref_cache[list][scan8[0]];
01844                     fill_rectangle(ref, 4, 4, 8, (16+*ref)^(s->mb_y&1), 1);
01845                 }else{
01846                     for(i=0; i<16; i+=4){
01847                         int ref = h->ref_cache[list][scan8[i]];
01848                         if(ref >= 0)
01849                             fill_rectangle(&h->ref_cache[list][scan8[i]], 2, 2, 8, (16+ref)^(s->mb_y&1), 1);
01850                     }
01851                 }
01852             }
01853         }
01854     } else {
01855         linesize   = h->mb_linesize   = s->linesize;
01856         uvlinesize = h->mb_uvlinesize = s->uvlinesize;
01857 //        dct_offset = s->linesize * 16;
01858     }
01859 
01860     if (!simple && IS_INTRA_PCM(mb_type)) {
01861         if (pixel_shift) {
01862             const int bit_depth = h->sps.bit_depth_luma;
01863             int j;
01864             GetBitContext gb;
01865             init_get_bits(&gb, (uint8_t*)h->mb, 384*bit_depth);
01866 
01867             for (i = 0; i < 16; i++) {
01868                 uint16_t *tmp_y  = (uint16_t*)(dest_y  + i*linesize);
01869                 for (j = 0; j < 16; j++)
01870                     tmp_y[j] = get_bits(&gb, bit_depth);
01871             }
01872             if(simple || !CONFIG_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
01873                 if (!h->sps.chroma_format_idc) {
01874                     for (i = 0; i < 8; i++) {
01875                         uint16_t *tmp_cb = (uint16_t*)(dest_cb + i*uvlinesize);
01876                         for (j = 0; j < 8; j++) {
01877                             tmp_cb[j] = 1 << (bit_depth - 1);
01878                         }
01879                     }
01880                     for (i = 0; i < 8; i++) {
01881                         uint16_t *tmp_cr = (uint16_t*)(dest_cr + i*uvlinesize);
01882                         for (j = 0; j < 8; j++) {
01883                             tmp_cr[j] = 1 << (bit_depth - 1);
01884                         }
01885                     }
01886                 } else {
01887                     for (i = 0; i < 8; i++) {
01888                         uint16_t *tmp_cb = (uint16_t*)(dest_cb + i*uvlinesize);
01889                         for (j = 0; j < 8; j++)
01890                             tmp_cb[j] = get_bits(&gb, bit_depth);
01891                     }
01892                     for (i = 0; i < 8; i++) {
01893                         uint16_t *tmp_cr = (uint16_t*)(dest_cr + i*uvlinesize);
01894                         for (j = 0; j < 8; j++)
01895                             tmp_cr[j] = get_bits(&gb, bit_depth);
01896                     }
01897                 }
01898             }
01899         } else {
01900             for (i=0; i<16; i++) {
01901                 memcpy(dest_y + i*  linesize, h->mb       + i*8, 16);
01902             }
01903             if(simple || !CONFIG_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
01904                 if (!h->sps.chroma_format_idc) {
01905                     for (i = 0; i < 8; i++) {
01906                         memset(dest_cb + i*uvlinesize, 128, 8);
01907                         memset(dest_cr + i*uvlinesize, 128, 8);
01908                     }
01909                 } else {
01910                     for (i = 0; i < 8; i++) {
01911                         memcpy(dest_cb + i*uvlinesize, h->mb + 128 + i*4,  8);
01912                         memcpy(dest_cr + i*uvlinesize, h->mb + 160 + i*4,  8);
01913                     }
01914                 }
01915             }
01916         }
01917     } else {
01918         if(IS_INTRA(mb_type)){
01919             if(h->deblocking_filter)
01920                 xchg_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, 1, 0, simple, pixel_shift);
01921 
01922             if(simple || !CONFIG_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
01923                 h->hpc.pred8x8[ h->chroma_pred_mode ](dest_cb, uvlinesize);
01924                 h->hpc.pred8x8[ h->chroma_pred_mode ](dest_cr, uvlinesize);
01925             }
01926 
01927             hl_decode_mb_predict_luma(h, mb_type, is_h264, simple, transform_bypass, pixel_shift, block_offset, linesize, dest_y, 0);
01928 
01929             if(h->deblocking_filter)
01930                 xchg_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, 0, 0, simple, pixel_shift);
01931         }else if(is_h264){
01932             if (pixel_shift) {
01933                 hl_motion_16(h, dest_y, dest_cb, dest_cr,
01934                              s->me.qpel_put, s->dsp.put_h264_chroma_pixels_tab,
01935                              s->me.qpel_avg, s->dsp.avg_h264_chroma_pixels_tab,
01936                              h->h264dsp.weight_h264_pixels_tab,
01937                              h->h264dsp.biweight_h264_pixels_tab, 0);
01938             } else
01939                 hl_motion_8(h, dest_y, dest_cb, dest_cr,
01940                             s->me.qpel_put, s->dsp.put_h264_chroma_pixels_tab,
01941                             s->me.qpel_avg, s->dsp.avg_h264_chroma_pixels_tab,
01942                             h->h264dsp.weight_h264_pixels_tab,
01943                             h->h264dsp.biweight_h264_pixels_tab, 0);
01944         }
01945 
01946         hl_decode_mb_idct_luma(h, mb_type, is_h264, simple, transform_bypass, pixel_shift, block_offset, linesize, dest_y, 0);
01947 
01948         if((simple || !CONFIG_GRAY || !(s->flags&CODEC_FLAG_GRAY)) && (h->cbp&0x30)){
01949             uint8_t *dest[2] = {dest_cb, dest_cr};
01950             if(transform_bypass){
01951                 if(IS_INTRA(mb_type) && h->sps.profile_idc==244 && (h->chroma_pred_mode==VERT_PRED8x8 || h->chroma_pred_mode==HOR_PRED8x8)){
01952                     h->hpc.pred8x8_add[h->chroma_pred_mode](dest[0], block_offset + 16, h->mb + (16*16*1 << pixel_shift), uvlinesize);
01953                     h->hpc.pred8x8_add[h->chroma_pred_mode](dest[1], block_offset + 32, h->mb + (16*16*2 << pixel_shift), uvlinesize);
01954                 }else{
01955                     idct_add = s->dsp.add_pixels4;
01956                     for(j=1; j<3; j++){
01957                         for(i=j*16; i<j*16+4; i++){
01958                             if(h->non_zero_count_cache[ scan8[i] ] || dctcoef_get(h->mb, pixel_shift, i*16))
01959                                 idct_add   (dest[j-1] + block_offset[i], h->mb + (i*16 << pixel_shift), uvlinesize);
01960                         }
01961                     }
01962                 }
01963             }else{
01964                 if(is_h264){
01965                     if(h->non_zero_count_cache[ scan8[CHROMA_DC_BLOCK_INDEX+0] ])
01966                         h->h264dsp.h264_chroma_dc_dequant_idct(h->mb + (16*16*1 << pixel_shift), h->dequant4_coeff[IS_INTRA(mb_type) ? 1:4][h->chroma_qp[0]][0]);
01967                     if(h->non_zero_count_cache[ scan8[CHROMA_DC_BLOCK_INDEX+1] ])
01968                         h->h264dsp.h264_chroma_dc_dequant_idct(h->mb + (16*16*2 << pixel_shift), h->dequant4_coeff[IS_INTRA(mb_type) ? 2:5][h->chroma_qp[1]][0]);
01969                     h->h264dsp.h264_idct_add8(dest, block_offset,
01970                                               h->mb, uvlinesize,
01971                                               h->non_zero_count_cache);
01972                 }
01973 #if CONFIG_SVQ3_DECODER
01974                 else{
01975                     h->h264dsp.h264_chroma_dc_dequant_idct(h->mb + 16*16*1, h->dequant4_coeff[IS_INTRA(mb_type) ? 1:4][h->chroma_qp[0]][0]);
01976                     h->h264dsp.h264_chroma_dc_dequant_idct(h->mb + 16*16*2, h->dequant4_coeff[IS_INTRA(mb_type) ? 2:5][h->chroma_qp[1]][0]);
01977                     for(j=1; j<3; j++){
01978                         for(i=j*16; i<j*16+4; i++){
01979                             if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16]){
01980                                 uint8_t * const ptr= dest[j-1] + block_offset[i];
01981                                 ff_svq3_add_idct_c(ptr, h->mb + i*16, uvlinesize, ff_h264_chroma_qp[0][s->qscale + 12] - 12, 2);
01982                             }
01983                         }
01984                     }
01985                 }
01986 #endif
01987             }
01988         }
01989     }
01990     if(h->cbp || IS_INTRA(mb_type))
01991     {
01992         s->dsp.clear_blocks(h->mb);
01993         s->dsp.clear_blocks(h->mb+(24*16<<pixel_shift));
01994     }
01995 }
01996 
01997 static av_always_inline void hl_decode_mb_444_internal(H264Context *h, int simple, int pixel_shift){
01998     MpegEncContext * const s = &h->s;
01999     const int mb_x= s->mb_x;
02000     const int mb_y= s->mb_y;
02001     const int mb_xy= h->mb_xy;
02002     const int mb_type= s->current_picture.mb_type[mb_xy];
02003     uint8_t  *dest[3];
02004     int linesize;
02005     int i, j, p;
02006     int *block_offset = &h->block_offset[0];
02007     const int transform_bypass = !simple && (s->qscale == 0 && h->sps.transform_bypass);
02008     const int plane_count = (simple || !CONFIG_GRAY || !(s->flags&CODEC_FLAG_GRAY)) ? 3 : 1;
02009 
02010     for (p = 0; p < plane_count; p++)
02011     {
02012         dest[p] = s->current_picture.data[p] + ((mb_x << pixel_shift) + mb_y * s->linesize) * 16;
02013         s->dsp.prefetch(dest[p] + (s->mb_x&3)*4*s->linesize + (64 << pixel_shift), s->linesize, 4);
02014     }
02015 
02016     h->list_counts[mb_xy]= h->list_count;
02017 
02018     if (!simple && MB_FIELD) {
02019         linesize   = h->mb_linesize = h->mb_uvlinesize = s->linesize * 2;
02020         block_offset = &h->block_offset[48];
02021         if(mb_y&1) //FIXME move out of this function?
02022             for (p = 0; p < 3; p++)
02023                 dest[p] -= s->linesize*15;
02024         if(FRAME_MBAFF) {
02025             int list;
02026             for(list=0; list<h->list_count; list++){
02027                 if(!USES_LIST(mb_type, list))
02028                     continue;
02029                 if(IS_16X16(mb_type)){
02030                     int8_t *ref = &h->ref_cache[list][scan8[0]];
02031                     fill_rectangle(ref, 4, 4, 8, (16+*ref)^(s->mb_y&1), 1);
02032                 }else{
02033                     for(i=0; i<16; i+=4){
02034                         int ref = h->ref_cache[list][scan8[i]];
02035                         if(ref >= 0)
02036                             fill_rectangle(&h->ref_cache[list][scan8[i]], 2, 2, 8, (16+ref)^(s->mb_y&1), 1);
02037                     }
02038                 }
02039             }
02040         }
02041     } else {
02042         linesize   = h->mb_linesize = h->mb_uvlinesize = s->linesize;
02043     }
02044 
02045     if (!simple && IS_INTRA_PCM(mb_type)) {
02046         if (pixel_shift) {
02047             const int bit_depth = h->sps.bit_depth_luma;
02048             GetBitContext gb;
02049             init_get_bits(&gb, (uint8_t*)h->mb, 768*bit_depth);
02050 
02051             for (p = 0; p < plane_count; p++) {
02052                 for (i = 0; i < 16; i++) {
02053                     uint16_t *tmp = (uint16_t*)(dest[p] + i*linesize);
02054                     for (j = 0; j < 16; j++)
02055                         tmp[j] = get_bits(&gb, bit_depth);
02056                 }
02057             }
02058         } else {
02059             for (p = 0; p < plane_count; p++) {
02060                 for (i = 0; i < 16; i++) {
02061                     memcpy(dest[p] + i*linesize, h->mb + p*128 + i*8, 16);
02062                 }
02063             }
02064         }
02065     } else {
02066         if(IS_INTRA(mb_type)){
02067             if(h->deblocking_filter)
02068                 xchg_mb_border(h, dest[0], dest[1], dest[2], linesize, linesize, 1, 1, simple, pixel_shift);
02069 
02070             for (p = 0; p < plane_count; p++)
02071                 hl_decode_mb_predict_luma(h, mb_type, 1, simple, transform_bypass, pixel_shift, block_offset, linesize, dest[p], p);
02072 
02073             if(h->deblocking_filter)
02074                 xchg_mb_border(h, dest[0], dest[1], dest[2], linesize, linesize, 0, 1, simple, pixel_shift);
02075         }else{
02076             if (pixel_shift) {
02077                 hl_motion_16(h, dest[0], dest[1], dest[2],
02078                              s->me.qpel_put, s->dsp.put_h264_chroma_pixels_tab,
02079                              s->me.qpel_avg, s->dsp.avg_h264_chroma_pixels_tab,
02080                              h->h264dsp.weight_h264_pixels_tab,
02081                              h->h264dsp.biweight_h264_pixels_tab, 1);
02082             } else
02083                 hl_motion_8(h, dest[0], dest[1], dest[2],
02084                             s->me.qpel_put, s->dsp.put_h264_chroma_pixels_tab,
02085                             s->me.qpel_avg, s->dsp.avg_h264_chroma_pixels_tab,
02086                             h->h264dsp.weight_h264_pixels_tab,
02087                             h->h264dsp.biweight_h264_pixels_tab, 1);
02088         }
02089 
02090         for (p = 0; p < plane_count; p++)
02091             hl_decode_mb_idct_luma(h, mb_type, 1, simple, transform_bypass, pixel_shift, block_offset, linesize, dest[p], p);
02092     }
02093     if(h->cbp || IS_INTRA(mb_type))
02094     {
02095         s->dsp.clear_blocks(h->mb);
02096         s->dsp.clear_blocks(h->mb+(24*16<<pixel_shift));
02097     }
02098 }
02099 
02103 #define hl_decode_mb_simple(sh, bits) \
02104 static void hl_decode_mb_simple_ ## bits(H264Context *h){ \
02105     hl_decode_mb_internal(h, 1, sh); \
02106 }
02107 hl_decode_mb_simple(0, 8);
02108 hl_decode_mb_simple(1, 16);
02109 
02113 static void av_noinline hl_decode_mb_complex(H264Context *h){
02114     hl_decode_mb_internal(h, 0, h->pixel_shift);
02115 }
02116 
02117 static void av_noinline hl_decode_mb_444_complex(H264Context *h){
02118     hl_decode_mb_444_internal(h, 0, h->pixel_shift);
02119 }
02120 
02121 static void av_noinline hl_decode_mb_444_simple(H264Context *h){
02122     hl_decode_mb_444_internal(h, 1, 0);
02123 }
02124 
02125 void ff_h264_hl_decode_mb(H264Context *h){
02126     MpegEncContext * const s = &h->s;
02127     const int mb_xy= h->mb_xy;
02128     const int mb_type= s->current_picture.mb_type[mb_xy];
02129     int is_complex = CONFIG_SMALL || h->is_complex || IS_INTRA_PCM(mb_type) || s->qscale == 0;
02130 
02131     if (CHROMA444) {
02132         if(is_complex || h->pixel_shift)
02133             hl_decode_mb_444_complex(h);
02134         else
02135             hl_decode_mb_444_simple(h);
02136     } else if (is_complex) {
02137         hl_decode_mb_complex(h);
02138     } else if (h->pixel_shift) {
02139         hl_decode_mb_simple_16(h);
02140     } else
02141         hl_decode_mb_simple_8(h);
02142 }
02143 
02144 static int pred_weight_table(H264Context *h){
02145     MpegEncContext * const s = &h->s;
02146     int list, i;
02147     int luma_def, chroma_def;
02148 
02149     h->use_weight= 0;
02150     h->use_weight_chroma= 0;
02151     h->luma_log2_weight_denom= get_ue_golomb(&s->gb);
02152     if(h->sps.chroma_format_idc)
02153         h->chroma_log2_weight_denom= get_ue_golomb(&s->gb);
02154     luma_def = 1<<h->luma_log2_weight_denom;
02155     chroma_def = 1<<h->chroma_log2_weight_denom;
02156 
02157     for(list=0; list<2; list++){
02158         h->luma_weight_flag[list]   = 0;
02159         h->chroma_weight_flag[list] = 0;
02160         for(i=0; i<h->ref_count[list]; i++){
02161             int luma_weight_flag, chroma_weight_flag;
02162 
02163             luma_weight_flag= get_bits1(&s->gb);
02164             if(luma_weight_flag){
02165                 h->luma_weight[i][list][0]= get_se_golomb(&s->gb);
02166                 h->luma_weight[i][list][1]= get_se_golomb(&s->gb);
02167                 if(   h->luma_weight[i][list][0] != luma_def
02168                    || h->luma_weight[i][list][1] != 0) {
02169                     h->use_weight= 1;
02170                     h->luma_weight_flag[list]= 1;
02171                 }
02172             }else{
02173                 h->luma_weight[i][list][0]= luma_def;
02174                 h->luma_weight[i][list][1]= 0;
02175             }
02176 
02177             if(h->sps.chroma_format_idc){
02178                 chroma_weight_flag= get_bits1(&s->gb);
02179                 if(chroma_weight_flag){
02180                     int j;
02181                     for(j=0; j<2; j++){
02182                         h->chroma_weight[i][list][j][0]= get_se_golomb(&s->gb);
02183                         h->chroma_weight[i][list][j][1]= get_se_golomb(&s->gb);
02184                         if(   h->chroma_weight[i][list][j][0] != chroma_def
02185                            || h->chroma_weight[i][list][j][1] != 0) {
02186                             h->use_weight_chroma= 1;
02187                             h->chroma_weight_flag[list]= 1;
02188                         }
02189                     }
02190                 }else{
02191                     int j;
02192                     for(j=0; j<2; j++){
02193                         h->chroma_weight[i][list][j][0]= chroma_def;
02194                         h->chroma_weight[i][list][j][1]= 0;
02195                     }
02196                 }
02197             }
02198         }
02199         if(h->slice_type_nos != AV_PICTURE_TYPE_B) break;
02200     }
02201     h->use_weight= h->use_weight || h->use_weight_chroma;
02202     return 0;
02203 }
02204 
02210 static void implicit_weight_table(H264Context *h, int field){
02211     MpegEncContext * const s = &h->s;
02212     int ref0, ref1, i, cur_poc, ref_start, ref_count0, ref_count1;
02213 
02214     for (i = 0; i < 2; i++) {
02215         h->luma_weight_flag[i]   = 0;
02216         h->chroma_weight_flag[i] = 0;
02217     }
02218 
02219     if(field < 0){
02220         if (s->picture_structure == PICT_FRAME) {
02221             cur_poc = s->current_picture_ptr->poc;
02222         } else {
02223             cur_poc = s->current_picture_ptr->field_poc[s->picture_structure - 1];
02224         }
02225     if(   h->ref_count[0] == 1 && h->ref_count[1] == 1 && !FRAME_MBAFF
02226        && h->ref_list[0][0].poc + h->ref_list[1][0].poc == 2*cur_poc){
02227         h->use_weight= 0;
02228         h->use_weight_chroma= 0;
02229         return;
02230     }
02231         ref_start= 0;
02232         ref_count0= h->ref_count[0];
02233         ref_count1= h->ref_count[1];
02234     }else{
02235         cur_poc = s->current_picture_ptr->field_poc[field];
02236         ref_start= 16;
02237         ref_count0= 16+2*h->ref_count[0];
02238         ref_count1= 16+2*h->ref_count[1];
02239     }
02240 
02241     h->use_weight= 2;
02242     h->use_weight_chroma= 2;
02243     h->luma_log2_weight_denom= 5;
02244     h->chroma_log2_weight_denom= 5;
02245 
02246     for(ref0=ref_start; ref0 < ref_count0; ref0++){
02247         int poc0 = h->ref_list[0][ref0].poc;
02248         for(ref1=ref_start; ref1 < ref_count1; ref1++){
02249             int w = 32;
02250             if (!h->ref_list[0][ref0].long_ref && !h->ref_list[1][ref1].long_ref) {
02251                 int poc1 = h->ref_list[1][ref1].poc;
02252                 int td = av_clip(poc1 - poc0, -128, 127);
02253                 if(td){
02254                     int tb = av_clip(cur_poc - poc0, -128, 127);
02255                     int tx = (16384 + (FFABS(td) >> 1)) / td;
02256                     int dist_scale_factor = (tb*tx + 32) >> 8;
02257                     if(dist_scale_factor >= -64 && dist_scale_factor <= 128)
02258                         w = 64 - dist_scale_factor;
02259                 }
02260             }
02261             if(field<0){
02262                 h->implicit_weight[ref0][ref1][0]=
02263                 h->implicit_weight[ref0][ref1][1]= w;
02264             }else{
02265                 h->implicit_weight[ref0][ref1][field]=w;
02266             }
02267         }
02268     }
02269 }
02270 
02274 static void idr(H264Context *h){
02275     ff_h264_remove_all_refs(h);
02276     h->prev_frame_num= 0;
02277     h->prev_frame_num_offset= 0;
02278     h->prev_poc_msb=
02279     h->prev_poc_lsb= 0;
02280 }
02281 
02282 /* forget old pics after a seek */
02283 static void flush_dpb(AVCodecContext *avctx){
02284     H264Context *h= avctx->priv_data;
02285     int i;
02286     for(i=0; i<=MAX_DELAYED_PIC_COUNT; i++) {
02287         if(h->delayed_pic[i])
02288             h->delayed_pic[i]->reference= 0;
02289         h->delayed_pic[i]= NULL;
02290     }
02291     h->outputed_poc=h->next_outputed_poc= INT_MIN;
02292     h->prev_interlaced_frame = 1;
02293     idr(h);
02294     if(h->s.current_picture_ptr)
02295         h->s.current_picture_ptr->reference= 0;
02296     h->s.first_field= 0;
02297     ff_h264_reset_sei(h);
02298     ff_mpeg_flush(avctx);
02299 }
02300 
02301 static int init_poc(H264Context *h){
02302     MpegEncContext * const s = &h->s;
02303     const int max_frame_num= 1<<h->sps.log2_max_frame_num;
02304     int field_poc[2];
02305     Picture *cur = s->current_picture_ptr;
02306 
02307     h->frame_num_offset= h->prev_frame_num_offset;
02308     if(h->frame_num < h->prev_frame_num)
02309         h->frame_num_offset += max_frame_num;
02310 
02311     if(h->sps.poc_type==0){
02312         const int max_poc_lsb= 1<<h->sps.log2_max_poc_lsb;
02313 
02314         if     (h->poc_lsb < h->prev_poc_lsb && h->prev_poc_lsb - h->poc_lsb >= max_poc_lsb/2)
02315             h->poc_msb = h->prev_poc_msb + max_poc_lsb;
02316         else if(h->poc_lsb > h->prev_poc_lsb && h->prev_poc_lsb - h->poc_lsb < -max_poc_lsb/2)
02317             h->poc_msb = h->prev_poc_msb - max_poc_lsb;
02318         else
02319             h->poc_msb = h->prev_poc_msb;
02320 //printf("poc: %d %d\n", h->poc_msb, h->poc_lsb);
02321         field_poc[0] =
02322         field_poc[1] = h->poc_msb + h->poc_lsb;
02323         if(s->picture_structure == PICT_FRAME)
02324             field_poc[1] += h->delta_poc_bottom;
02325     }else if(h->sps.poc_type==1){
02326         int abs_frame_num, expected_delta_per_poc_cycle, expectedpoc;
02327         int i;
02328 
02329         if(h->sps.poc_cycle_length != 0)
02330             abs_frame_num = h->frame_num_offset + h->frame_num;
02331         else
02332             abs_frame_num = 0;
02333 
02334         if(h->nal_ref_idc==0 && abs_frame_num > 0)
02335             abs_frame_num--;
02336 
02337         expected_delta_per_poc_cycle = 0;
02338         for(i=0; i < h->sps.poc_cycle_length; i++)
02339             expected_delta_per_poc_cycle += h->sps.offset_for_ref_frame[ i ]; //FIXME integrate during sps parse
02340 
02341         if(abs_frame_num > 0){
02342             int poc_cycle_cnt          = (abs_frame_num - 1) / h->sps.poc_cycle_length;
02343             int frame_num_in_poc_cycle = (abs_frame_num - 1) % h->sps.poc_cycle_length;
02344 
02345             expectedpoc = poc_cycle_cnt * expected_delta_per_poc_cycle;
02346             for(i = 0; i <= frame_num_in_poc_cycle; i++)
02347                 expectedpoc = expectedpoc + h->sps.offset_for_ref_frame[ i ];
02348         } else
02349             expectedpoc = 0;
02350 
02351         if(h->nal_ref_idc == 0)
02352             expectedpoc = expectedpoc + h->sps.offset_for_non_ref_pic;
02353 
02354         field_poc[0] = expectedpoc + h->delta_poc[0];
02355         field_poc[1] = field_poc[0] + h->sps.offset_for_top_to_bottom_field;
02356 
02357         if(s->picture_structure == PICT_FRAME)
02358             field_poc[1] += h->delta_poc[1];
02359     }else{
02360         int poc= 2*(h->frame_num_offset + h->frame_num);
02361 
02362         if(!h->nal_ref_idc)
02363             poc--;
02364 
02365         field_poc[0]= poc;
02366         field_poc[1]= poc;
02367     }
02368 
02369     if(s->picture_structure != PICT_BOTTOM_FIELD)
02370         s->current_picture_ptr->field_poc[0]= field_poc[0];
02371     if(s->picture_structure != PICT_TOP_FIELD)
02372         s->current_picture_ptr->field_poc[1]= field_poc[1];
02373     cur->poc= FFMIN(cur->field_poc[0], cur->field_poc[1]);
02374 
02375     return 0;
02376 }
02377 
02378 
02382 static void init_scan_tables(H264Context *h){
02383     int i;
02384     for(i=0; i<16; i++){
02385 #define T(x) (x>>2) | ((x<<2) & 0xF)
02386         h->zigzag_scan[i] = T(zigzag_scan[i]);
02387         h-> field_scan[i] = T( field_scan[i]);
02388 #undef T
02389     }
02390     for(i=0; i<64; i++){
02391 #define T(x) (x>>3) | ((x&7)<<3)
02392         h->zigzag_scan8x8[i]       = T(ff_zigzag_direct[i]);
02393         h->zigzag_scan8x8_cavlc[i] = T(zigzag_scan8x8_cavlc[i]);
02394         h->field_scan8x8[i]        = T(field_scan8x8[i]);
02395         h->field_scan8x8_cavlc[i]  = T(field_scan8x8_cavlc[i]);
02396 #undef T
02397     }
02398     if(h->sps.transform_bypass){ //FIXME same ugly
02399         h->zigzag_scan_q0          = zigzag_scan;
02400         h->zigzag_scan8x8_q0       = ff_zigzag_direct;
02401         h->zigzag_scan8x8_cavlc_q0 = zigzag_scan8x8_cavlc;
02402         h->field_scan_q0           = field_scan;
02403         h->field_scan8x8_q0        = field_scan8x8;
02404         h->field_scan8x8_cavlc_q0  = field_scan8x8_cavlc;
02405     }else{
02406         h->zigzag_scan_q0          = h->zigzag_scan;
02407         h->zigzag_scan8x8_q0       = h->zigzag_scan8x8;
02408         h->zigzag_scan8x8_cavlc_q0 = h->zigzag_scan8x8_cavlc;
02409         h->field_scan_q0           = h->field_scan;
02410         h->field_scan8x8_q0        = h->field_scan8x8;
02411         h->field_scan8x8_cavlc_q0  = h->field_scan8x8_cavlc;
02412     }
02413 }
02414 
02415 static void field_end(H264Context *h, int in_setup){
02416     MpegEncContext * const s = &h->s;
02417     AVCodecContext * const avctx= s->avctx;
02418     s->mb_y= 0;
02419 
02420     if (!in_setup && !s->dropable)
02421         ff_thread_report_progress((AVFrame*)s->current_picture_ptr, (16*s->mb_height >> FIELD_PICTURE) - 1,
02422                                  s->picture_structure==PICT_BOTTOM_FIELD);
02423 
02424     if (CONFIG_H264_VDPAU_DECODER && s->avctx->codec->capabilities&CODEC_CAP_HWACCEL_VDPAU)
02425         ff_vdpau_h264_set_reference_frames(s);
02426 
02427     if(in_setup || !(avctx->active_thread_type&FF_THREAD_FRAME)){
02428         if(!s->dropable) {
02429             ff_h264_execute_ref_pic_marking(h, h->mmco, h->mmco_index);
02430             h->prev_poc_msb= h->poc_msb;
02431             h->prev_poc_lsb= h->poc_lsb;
02432         }
02433         h->prev_frame_num_offset= h->frame_num_offset;
02434         h->prev_frame_num= h->frame_num;
02435         h->outputed_poc = h->next_outputed_poc;
02436     }
02437 
02438     if (avctx->hwaccel) {
02439         if (avctx->hwaccel->end_frame(avctx) < 0)
02440             av_log(avctx, AV_LOG_ERROR, "hardware accelerator failed to decode picture\n");
02441     }
02442 
02443     if (CONFIG_H264_VDPAU_DECODER && s->avctx->codec->capabilities&CODEC_CAP_HWACCEL_VDPAU)
02444         ff_vdpau_h264_picture_complete(s);
02445 
02446     /*
02447      * FIXME: Error handling code does not seem to support interlaced
02448      * when slices span multiple rows
02449      * The ff_er_add_slice calls don't work right for bottom
02450      * fields; they cause massive erroneous error concealing
02451      * Error marking covers both fields (top and bottom).
02452      * This causes a mismatched s->error_count
02453      * and a bad error table. Further, the error count goes to
02454      * INT_MAX when called for bottom field, because mb_y is
02455      * past end by one (callers fault) and resync_mb_y != 0
02456      * causes problems for the first MB line, too.
02457      */
02458     if (!FIELD_PICTURE)
02459         ff_er_frame_end(s);
02460 
02461     MPV_frame_end(s);
02462 
02463     h->current_slice=0;
02464 }
02465 
02469 static void clone_slice(H264Context *dst, H264Context *src)
02470 {
02471     memcpy(dst->block_offset,     src->block_offset, sizeof(dst->block_offset));
02472     dst->s.current_picture_ptr  = src->s.current_picture_ptr;
02473     dst->s.current_picture      = src->s.current_picture;
02474     dst->s.linesize             = src->s.linesize;
02475     dst->s.uvlinesize           = src->s.uvlinesize;
02476     dst->s.first_field          = src->s.first_field;
02477 
02478     dst->prev_poc_msb           = src->prev_poc_msb;
02479     dst->prev_poc_lsb           = src->prev_poc_lsb;
02480     dst->prev_frame_num_offset  = src->prev_frame_num_offset;
02481     dst->prev_frame_num         = src->prev_frame_num;
02482     dst->short_ref_count        = src->short_ref_count;
02483 
02484     memcpy(dst->short_ref,        src->short_ref,        sizeof(dst->short_ref));
02485     memcpy(dst->long_ref,         src->long_ref,         sizeof(dst->long_ref));
02486     memcpy(dst->default_ref_list, src->default_ref_list, sizeof(dst->default_ref_list));
02487     memcpy(dst->ref_list,         src->ref_list,         sizeof(dst->ref_list));
02488 
02489     memcpy(dst->dequant4_coeff,   src->dequant4_coeff,   sizeof(src->dequant4_coeff));
02490     memcpy(dst->dequant8_coeff,   src->dequant8_coeff,   sizeof(src->dequant8_coeff));
02491 }
02492 
02500 int ff_h264_get_profile(SPS *sps)
02501 {
02502     int profile = sps->profile_idc;
02503 
02504     switch(sps->profile_idc) {
02505     case FF_PROFILE_H264_BASELINE:
02506         // constraint_set1_flag set to 1
02507         profile |= (sps->constraint_set_flags & 1<<1) ? FF_PROFILE_H264_CONSTRAINED : 0;
02508         break;
02509     case FF_PROFILE_H264_HIGH_10:
02510     case FF_PROFILE_H264_HIGH_422:
02511     case FF_PROFILE_H264_HIGH_444_PREDICTIVE:
02512         // constraint_set3_flag set to 1
02513         profile |= (sps->constraint_set_flags & 1<<3) ? FF_PROFILE_H264_INTRA : 0;
02514         break;
02515     }
02516 
02517     return profile;
02518 }
02519 
02529 static int decode_slice_header(H264Context *h, H264Context *h0){
02530     MpegEncContext * const s = &h->s;
02531     MpegEncContext * const s0 = &h0->s;
02532     unsigned int first_mb_in_slice;
02533     unsigned int pps_id;
02534     int num_ref_idx_active_override_flag;
02535     unsigned int slice_type, tmp, i, j;
02536     int default_ref_list_done = 0;
02537     int last_pic_structure;
02538 
02539     s->dropable= h->nal_ref_idc == 0;
02540 
02541     /* FIXME: 2tap qpel isn't implemented for high bit depth. */
02542     if((s->avctx->flags2 & CODEC_FLAG2_FAST) && !h->nal_ref_idc && !h->pixel_shift){
02543         s->me.qpel_put= s->dsp.put_2tap_qpel_pixels_tab;
02544         s->me.qpel_avg= s->dsp.avg_2tap_qpel_pixels_tab;
02545     }else{
02546         s->me.qpel_put= s->dsp.put_h264_qpel_pixels_tab;
02547         s->me.qpel_avg= s->dsp.avg_h264_qpel_pixels_tab;
02548     }
02549 
02550     first_mb_in_slice= get_ue_golomb(&s->gb);
02551 
02552     if(first_mb_in_slice == 0){ //FIXME better field boundary detection
02553         if(h0->current_slice && FIELD_PICTURE){
02554             field_end(h, 1);
02555         }
02556 
02557         h0->current_slice = 0;
02558         if (!s0->first_field)
02559             s->current_picture_ptr= NULL;
02560     }
02561 
02562     slice_type= get_ue_golomb_31(&s->gb);
02563     if(slice_type > 9){
02564         av_log(h->s.avctx, AV_LOG_ERROR, "slice type too large (%d) at %d %d\n", h->slice_type, s->mb_x, s->mb_y);
02565         return -1;
02566     }
02567     if(slice_type > 4){
02568         slice_type -= 5;
02569         h->slice_type_fixed=1;
02570     }else
02571         h->slice_type_fixed=0;
02572 
02573     slice_type= golomb_to_pict_type[ slice_type ];
02574     if (slice_type == AV_PICTURE_TYPE_I
02575         || (h0->current_slice != 0 && slice_type == h0->last_slice_type) ) {
02576         default_ref_list_done = 1;
02577     }
02578     h->slice_type= slice_type;
02579     h->slice_type_nos= slice_type & 3;
02580 
02581     s->pict_type= h->slice_type; // to make a few old functions happy, it's wrong though
02582 
02583     pps_id= get_ue_golomb(&s->gb);
02584     if(pps_id>=MAX_PPS_COUNT){
02585         av_log(h->s.avctx, AV_LOG_ERROR, "pps_id out of range\n");
02586         return -1;
02587     }
02588     if(!h0->pps_buffers[pps_id]) {
02589         av_log(h->s.avctx, AV_LOG_ERROR, "non-existing PPS %u referenced\n", pps_id);
02590         return -1;
02591     }
02592     h->pps= *h0->pps_buffers[pps_id];
02593 
02594     if(!h0->sps_buffers[h->pps.sps_id]) {
02595         av_log(h->s.avctx, AV_LOG_ERROR, "non-existing SPS %u referenced\n", h->pps.sps_id);
02596         return -1;
02597     }
02598     h->sps = *h0->sps_buffers[h->pps.sps_id];
02599 
02600     s->avctx->profile = ff_h264_get_profile(&h->sps);
02601     s->avctx->level   = h->sps.level_idc;
02602     s->avctx->refs    = h->sps.ref_frame_count;
02603 
02604     if(h == h0 && h->dequant_coeff_pps != pps_id){
02605         h->dequant_coeff_pps = pps_id;
02606         init_dequant_tables(h);
02607     }
02608 
02609     s->mb_width= h->sps.mb_width;
02610     s->mb_height= h->sps.mb_height * (2 - h->sps.frame_mbs_only_flag);
02611 
02612     h->b_stride=  s->mb_width*4;
02613 
02614     s->width = 16*s->mb_width - (2>>CHROMA444)*FFMIN(h->sps.crop_right, (8<<CHROMA444)-1);
02615     if(h->sps.frame_mbs_only_flag)
02616         s->height= 16*s->mb_height - (2>>CHROMA444)*FFMIN(h->sps.crop_bottom, (8<<CHROMA444)-1);
02617     else
02618         s->height= 16*s->mb_height - (4>>CHROMA444)*FFMIN(h->sps.crop_bottom, (8<<CHROMA444)-1);
02619 
02620     if (FFALIGN(s->avctx->width,  16) == s->width &&
02621         FFALIGN(s->avctx->height, 16) == s->height) {
02622         s->width  = s->avctx->width;
02623         s->height = s->avctx->height;
02624     }
02625 
02626     if (s->context_initialized
02627         && (   s->width != s->avctx->width || s->height != s->avctx->height
02628             || av_cmp_q(h->sps.sar, s->avctx->sample_aspect_ratio))) {
02629         if(h != h0 || (HAVE_THREADS && h->s.avctx->active_thread_type & FF_THREAD_FRAME)) {
02630             av_log_missing_feature(s->avctx, "Width/height changing with threads is", 0);
02631             return AVERROR_PATCHWELCOME;   // width / height changed during parallelized decoding
02632         }
02633         free_tables(h, 0);
02634         flush_dpb(s->avctx);
02635         MPV_common_end(s);
02636         h->list_count = 0;
02637     }
02638     if (!s->context_initialized) {
02639         if (h != h0) {
02640             av_log(h->s.avctx, AV_LOG_ERROR, "Cannot (re-)initialize context during parallel decoding.\n");
02641             return -1;
02642         }
02643 
02644         avcodec_set_dimensions(s->avctx, s->width, s->height);
02645         s->avctx->sample_aspect_ratio= h->sps.sar;
02646         av_assert0(s->avctx->sample_aspect_ratio.den);
02647 
02648         h->s.avctx->coded_width = 16*s->mb_width;
02649         h->s.avctx->coded_height = 16*s->mb_height;
02650 
02651         if(h->sps.video_signal_type_present_flag){
02652             s->avctx->color_range = h->sps.full_range ? AVCOL_RANGE_JPEG : AVCOL_RANGE_MPEG;
02653             if(h->sps.colour_description_present_flag){
02654                 s->avctx->color_primaries = h->sps.color_primaries;
02655                 s->avctx->color_trc       = h->sps.color_trc;
02656                 s->avctx->colorspace      = h->sps.colorspace;
02657             }
02658         }
02659 
02660         if(h->sps.timing_info_present_flag){
02661             int64_t den= h->sps.time_scale;
02662             if(h->x264_build < 44U)
02663                 den *= 2;
02664             av_reduce(&s->avctx->time_base.num, &s->avctx->time_base.den,
02665                       h->sps.num_units_in_tick, den, 1<<30);
02666         }
02667 
02668         switch (h->sps.bit_depth_luma) {
02669             case 9 :
02670                 s->avctx->pix_fmt = CHROMA444 ? PIX_FMT_YUV444P9 : PIX_FMT_YUV420P9;
02671                 break;
02672             case 10 :
02673                 s->avctx->pix_fmt = CHROMA444 ? PIX_FMT_YUV444P10 : PIX_FMT_YUV420P10;
02674                 break;
02675             default:
02676                 if (CHROMA444){
02677                     s->avctx->pix_fmt = s->avctx->color_range == AVCOL_RANGE_JPEG ? PIX_FMT_YUVJ444P : PIX_FMT_YUV444P;
02678                 }else{
02679                     s->avctx->pix_fmt = s->avctx->get_format(s->avctx,
02680                                                              s->avctx->codec->pix_fmts ?
02681                                                              s->avctx->codec->pix_fmts :
02682                                                              s->avctx->color_range == AVCOL_RANGE_JPEG ?
02683                                                              hwaccel_pixfmt_list_h264_jpeg_420 :
02684                                                              ff_hwaccel_pixfmt_list_420);
02685                 }
02686         }
02687 
02688         s->avctx->hwaccel = ff_find_hwaccel(s->avctx->codec->id, s->avctx->pix_fmt);
02689 
02690         if (MPV_common_init(s) < 0) {
02691             av_log(h->s.avctx, AV_LOG_ERROR, "MPV_common_init() failed.\n");
02692             return -1;
02693         }
02694         s->first_field = 0;
02695         h->prev_interlaced_frame = 1;
02696 
02697         init_scan_tables(h);
02698         if (ff_h264_alloc_tables(h) < 0) {
02699             av_log(h->s.avctx, AV_LOG_ERROR, "Could not allocate memory for h264\n");
02700             return AVERROR(ENOMEM);
02701         }
02702 
02703         if (!HAVE_THREADS || !(s->avctx->active_thread_type&FF_THREAD_SLICE)) {
02704             if (context_init(h) < 0) {
02705                 av_log(h->s.avctx, AV_LOG_ERROR, "context_init() failed.\n");
02706                 return -1;
02707             }
02708         } else {
02709             for(i = 1; i < s->avctx->thread_count; i++) {
02710                 H264Context *c;
02711                 c = h->thread_context[i] = av_malloc(sizeof(H264Context));
02712                 memcpy(c, h->s.thread_context[i], sizeof(MpegEncContext));
02713                 memset(&c->s + 1, 0, sizeof(H264Context) - sizeof(MpegEncContext));
02714                 c->h264dsp = h->h264dsp;
02715                 c->sps = h->sps;
02716                 c->pps = h->pps;
02717                 c->pixel_shift = h->pixel_shift;
02718                 init_scan_tables(c);
02719                 clone_tables(c, h, i);
02720             }
02721 
02722             for(i = 0; i < s->avctx->thread_count; i++)
02723                 if (context_init(h->thread_context[i]) < 0) {
02724                     av_log(h->s.avctx, AV_LOG_ERROR, "context_init() failed.\n");
02725                     return -1;
02726                 }
02727         }
02728     }
02729 
02730     h->frame_num= get_bits(&s->gb, h->sps.log2_max_frame_num);
02731 
02732     h->mb_mbaff = 0;
02733     h->mb_aff_frame = 0;
02734     last_pic_structure = s0->picture_structure;
02735     if(h->sps.frame_mbs_only_flag){
02736         s->picture_structure= PICT_FRAME;
02737     }else{
02738         if(get_bits1(&s->gb)) { //field_pic_flag
02739             s->picture_structure= PICT_TOP_FIELD + get_bits1(&s->gb); //bottom_field_flag
02740         } else {
02741             s->picture_structure= PICT_FRAME;
02742             h->mb_aff_frame = h->sps.mb_aff;
02743         }
02744     }
02745     h->mb_field_decoding_flag= s->picture_structure != PICT_FRAME;
02746 
02747     if(h0->current_slice == 0){
02748         // Shorten frame num gaps so we don't have to allocate reference frames just to throw them away
02749         if(h->frame_num != h->prev_frame_num) {
02750             int unwrap_prev_frame_num = h->prev_frame_num, max_frame_num = 1<<h->sps.log2_max_frame_num;
02751 
02752             if (unwrap_prev_frame_num > h->frame_num) unwrap_prev_frame_num -= max_frame_num;
02753 
02754             if ((h->frame_num - unwrap_prev_frame_num) > h->sps.ref_frame_count) {
02755                 unwrap_prev_frame_num = (h->frame_num - h->sps.ref_frame_count) - 1;
02756                 if (unwrap_prev_frame_num < 0)
02757                     unwrap_prev_frame_num += max_frame_num;
02758 
02759                 h->prev_frame_num = unwrap_prev_frame_num;
02760             }
02761         }
02762 
02763         while(h->frame_num !=  h->prev_frame_num &&
02764               h->frame_num != (h->prev_frame_num+1)%(1<<h->sps.log2_max_frame_num)){
02765             Picture *prev = h->short_ref_count ? h->short_ref[0] : NULL;
02766             av_log(h->s.avctx, AV_LOG_DEBUG, "Frame num gap %d %d\n", h->frame_num, h->prev_frame_num);
02767             if (ff_h264_frame_start(h) < 0)
02768                 return -1;
02769             h->prev_frame_num++;
02770             h->prev_frame_num %= 1<<h->sps.log2_max_frame_num;
02771             s->current_picture_ptr->frame_num= h->prev_frame_num;
02772             ff_thread_report_progress((AVFrame*)s->current_picture_ptr, INT_MAX, 0);
02773             ff_thread_report_progress((AVFrame*)s->current_picture_ptr, INT_MAX, 1);
02774             ff_generate_sliding_window_mmcos(h);
02775             ff_h264_execute_ref_pic_marking(h, h->mmco, h->mmco_index);
02776             /* Error concealment: if a ref is missing, copy the previous ref in its place.
02777              * FIXME: avoiding a memcpy would be nice, but ref handling makes many assumptions
02778              * about there being no actual duplicates.
02779              * FIXME: this doesn't copy padding for out-of-frame motion vectors.  Given we're
02780              * concealing a lost frame, this probably isn't noticable by comparison, but it should
02781              * be fixed. */
02782             if (h->short_ref_count) {
02783                 if (prev) {
02784                     av_image_copy(h->short_ref[0]->data, h->short_ref[0]->linesize,
02785                                   (const uint8_t**)prev->data, prev->linesize,
02786                                   s->avctx->pix_fmt, s->mb_width*16, s->mb_height*16);
02787                     h->short_ref[0]->poc = prev->poc+2;
02788                 }
02789                 h->short_ref[0]->frame_num = h->prev_frame_num;
02790             }
02791         }
02792 
02793         /* See if we have a decoded first field looking for a pair... */
02794         if (s0->first_field) {
02795             assert(s0->current_picture_ptr);
02796             assert(s0->current_picture_ptr->data[0]);
02797             assert(s0->current_picture_ptr->reference != DELAYED_PIC_REF);
02798 
02799             /* figure out if we have a complementary field pair */
02800             if (!FIELD_PICTURE || s->picture_structure == last_pic_structure) {
02801                 /*
02802                  * Previous field is unmatched. Don't display it, but let it
02803                  * remain for reference if marked as such.
02804                  */
02805                 s0->current_picture_ptr = NULL;
02806                 s0->first_field = FIELD_PICTURE;
02807 
02808             } else {
02809                 if (s0->current_picture_ptr->frame_num != h->frame_num) {
02810                     /*
02811                      * This and previous field had
02812                      * different frame_nums. Consider this field first in
02813                      * pair. Throw away previous field except for reference
02814                      * purposes.
02815                      */
02816                     s0->first_field = 1;
02817                     s0->current_picture_ptr = NULL;
02818 
02819                 } else {
02820                     /* Second field in complementary pair */
02821                     s0->first_field = 0;
02822                 }
02823             }
02824 
02825         } else {
02826             /* Frame or first field in a potentially complementary pair */
02827             assert(!s0->current_picture_ptr);
02828             s0->first_field = FIELD_PICTURE;
02829         }
02830 
02831         if(!FIELD_PICTURE || s0->first_field) {
02832             if (ff_h264_frame_start(h) < 0) {
02833                 s0->first_field = 0;
02834                 return -1;
02835             }
02836         } else {
02837             ff_release_unused_pictures(s, 0);
02838         }
02839     }
02840     if(h != h0)
02841         clone_slice(h, h0);
02842 
02843     s->current_picture_ptr->frame_num= h->frame_num; //FIXME frame_num cleanup
02844 
02845     assert(s->mb_num == s->mb_width * s->mb_height);
02846     if(first_mb_in_slice << FIELD_OR_MBAFF_PICTURE >= s->mb_num ||
02847        first_mb_in_slice                    >= s->mb_num){
02848         av_log(h->s.avctx, AV_LOG_ERROR, "first_mb_in_slice overflow\n");
02849         return -1;
02850     }
02851     s->resync_mb_x = s->mb_x = first_mb_in_slice % s->mb_width;
02852     s->resync_mb_y = s->mb_y = (first_mb_in_slice / s->mb_width) << FIELD_OR_MBAFF_PICTURE;
02853     if (s->picture_structure == PICT_BOTTOM_FIELD)
02854         s->resync_mb_y = s->mb_y = s->mb_y + 1;
02855     assert(s->mb_y < s->mb_height);
02856 
02857     if(s->picture_structure==PICT_FRAME){
02858         h->curr_pic_num=   h->frame_num;
02859         h->max_pic_num= 1<< h->sps.log2_max_frame_num;
02860     }else{
02861         h->curr_pic_num= 2*h->frame_num + 1;
02862         h->max_pic_num= 1<<(h->sps.log2_max_frame_num + 1);
02863     }
02864 
02865     if(h->nal_unit_type == NAL_IDR_SLICE){
02866         get_ue_golomb(&s->gb); /* idr_pic_id */
02867     }
02868 
02869     if(h->sps.poc_type==0){
02870         h->poc_lsb= get_bits(&s->gb, h->sps.log2_max_poc_lsb);
02871 
02872         if(h->pps.pic_order_present==1 && s->picture_structure==PICT_FRAME){
02873             h->delta_poc_bottom= get_se_golomb(&s->gb);
02874         }
02875     }
02876 
02877     if(h->sps.poc_type==1 && !h->sps.delta_pic_order_always_zero_flag){
02878         h->delta_poc[0]= get_se_golomb(&s->gb);
02879 
02880         if(h->pps.pic_order_present==1 && s->picture_structure==PICT_FRAME)
02881             h->delta_poc[1]= get_se_golomb(&s->gb);
02882     }
02883 
02884     init_poc(h);
02885 
02886     if(h->pps.redundant_pic_cnt_present){
02887         h->redundant_pic_count= get_ue_golomb(&s->gb);
02888     }
02889 
02890     //set defaults, might be overridden a few lines later
02891     h->ref_count[0]= h->pps.ref_count[0];
02892     h->ref_count[1]= h->pps.ref_count[1];
02893 
02894     if(h->slice_type_nos != AV_PICTURE_TYPE_I){
02895         unsigned max= s->picture_structure == PICT_FRAME ? 15 : 31;
02896 
02897         if(h->slice_type_nos == AV_PICTURE_TYPE_B){
02898             h->direct_spatial_mv_pred= get_bits1(&s->gb);
02899         }
02900         num_ref_idx_active_override_flag= get_bits1(&s->gb);
02901 
02902         if(num_ref_idx_active_override_flag){
02903             h->ref_count[0]= get_ue_golomb(&s->gb) + 1;
02904             if(h->slice_type_nos==AV_PICTURE_TYPE_B)
02905                 h->ref_count[1]= get_ue_golomb(&s->gb) + 1;
02906         }
02907 
02908         if (h->ref_count[0]-1 > max || h->ref_count[1]-1 > max){
02909             av_log(h->s.avctx, AV_LOG_ERROR, "reference overflow\n");
02910             h->ref_count[0] = h->ref_count[1] = 1;
02911             return AVERROR_INVALIDDATA;
02912         }
02913 
02914         if(h->slice_type_nos == AV_PICTURE_TYPE_B)
02915             h->list_count= 2;
02916         else
02917             h->list_count= 1;
02918     }else
02919         h->ref_count[1]= h->ref_count[0]= h->list_count= 0;
02920 
02921     if(!default_ref_list_done){
02922         ff_h264_fill_default_ref_list(h);
02923     }
02924 
02925     if(h->slice_type_nos!=AV_PICTURE_TYPE_I && ff_h264_decode_ref_pic_list_reordering(h) < 0) {
02926         h->ref_count[1]= h->ref_count[0]= 0;
02927         return -1;
02928     }
02929 
02930     if(h->slice_type_nos!=AV_PICTURE_TYPE_I){
02931         s->last_picture_ptr= &h->ref_list[0][0];
02932         ff_copy_picture(&s->last_picture, s->last_picture_ptr);
02933     }
02934     if(h->slice_type_nos==AV_PICTURE_TYPE_B){
02935         s->next_picture_ptr= &h->ref_list[1][0];
02936         ff_copy_picture(&s->next_picture, s->next_picture_ptr);
02937     }
02938 
02939     if(   (h->pps.weighted_pred          && h->slice_type_nos == AV_PICTURE_TYPE_P )
02940        ||  (h->pps.weighted_bipred_idc==1 && h->slice_type_nos== AV_PICTURE_TYPE_B ) )
02941         pred_weight_table(h);
02942     else if(h->pps.weighted_bipred_idc==2 && h->slice_type_nos== AV_PICTURE_TYPE_B){
02943         implicit_weight_table(h, -1);
02944     }else {
02945         h->use_weight = 0;
02946         for (i = 0; i < 2; i++) {
02947             h->luma_weight_flag[i]   = 0;
02948             h->chroma_weight_flag[i] = 0;
02949         }
02950     }
02951 
02952     if(h->nal_ref_idc)
02953         ff_h264_decode_ref_pic_marking(h0, &s->gb);
02954 
02955     if(FRAME_MBAFF){
02956         ff_h264_fill_mbaff_ref_list(h);
02957 
02958         if(h->pps.weighted_bipred_idc==2 && h->slice_type_nos== AV_PICTURE_TYPE_B){
02959             implicit_weight_table(h, 0);
02960             implicit_weight_table(h, 1);
02961         }
02962     }
02963 
02964     if(h->slice_type_nos==AV_PICTURE_TYPE_B && !h->direct_spatial_mv_pred)
02965         ff_h264_direct_dist_scale_factor(h);
02966     ff_h264_direct_ref_list_init(h);
02967 
02968     if( h->slice_type_nos != AV_PICTURE_TYPE_I && h->pps.cabac ){
02969         tmp = get_ue_golomb_31(&s->gb);
02970         if(tmp > 2){
02971             av_log(s->avctx, AV_LOG_ERROR, "cabac_init_idc overflow\n");
02972             return -1;
02973         }
02974         h->cabac_init_idc= tmp;
02975     }
02976 
02977     h->last_qscale_diff = 0;
02978     tmp = h->pps.init_qp + get_se_golomb(&s->gb);
02979     if(tmp>51+6*(h->sps.bit_depth_luma-8)){
02980         av_log(s->avctx, AV_LOG_ERROR, "QP %u out of range\n", tmp);
02981         return -1;
02982     }
02983     s->qscale= tmp;
02984     h->chroma_qp[0] = get_chroma_qp(h, 0, s->qscale);
02985     h->chroma_qp[1] = get_chroma_qp(h, 1, s->qscale);
02986     //FIXME qscale / qp ... stuff
02987     if(h->slice_type == AV_PICTURE_TYPE_SP){
02988         get_bits1(&s->gb); /* sp_for_switch_flag */
02989     }
02990     if(h->slice_type==AV_PICTURE_TYPE_SP || h->slice_type == AV_PICTURE_TYPE_SI){
02991         get_se_golomb(&s->gb); /* slice_qs_delta */
02992     }
02993 
02994     h->deblocking_filter = 1;
02995     h->slice_alpha_c0_offset = 52;
02996     h->slice_beta_offset = 52;
02997     if( h->pps.deblocking_filter_parameters_present ) {
02998         tmp= get_ue_golomb_31(&s->gb);
02999         if(tmp > 2){
03000             av_log(s->avctx, AV_LOG_ERROR, "deblocking_filter_idc %u out of range\n", tmp);
03001             return -1;
03002         }
03003         h->deblocking_filter= tmp;
03004         if(h->deblocking_filter < 2)
03005             h->deblocking_filter^= 1; // 1<->0
03006 
03007         if( h->deblocking_filter ) {
03008             h->slice_alpha_c0_offset += get_se_golomb(&s->gb) << 1;
03009             h->slice_beta_offset     += get_se_golomb(&s->gb) << 1;
03010             if(   h->slice_alpha_c0_offset > 104U
03011                || h->slice_beta_offset     > 104U){
03012                 av_log(s->avctx, AV_LOG_ERROR, "deblocking filter parameters %d %d out of range\n", h->slice_alpha_c0_offset, h->slice_beta_offset);
03013                 return -1;
03014             }
03015         }
03016     }
03017 
03018     if(   s->avctx->skip_loop_filter >= AVDISCARD_ALL
03019        ||(s->avctx->skip_loop_filter >= AVDISCARD_NONKEY && h->slice_type_nos != AV_PICTURE_TYPE_I)
03020        ||(s->avctx->skip_loop_filter >= AVDISCARD_BIDIR  && h->slice_type_nos == AV_PICTURE_TYPE_B)
03021        ||(s->avctx->skip_loop_filter >= AVDISCARD_NONREF && h->nal_ref_idc == 0))
03022         h->deblocking_filter= 0;
03023 
03024     if(h->deblocking_filter == 1 && h0->max_contexts > 1) {
03025         if(s->avctx->flags2 & CODEC_FLAG2_FAST) {
03026             /* Cheat slightly for speed:
03027                Do not bother to deblock across slices. */
03028             h->deblocking_filter = 2;
03029         } else {
03030             h0->max_contexts = 1;
03031             if(!h0->single_decode_warning) {
03032                 av_log(s->avctx, AV_LOG_INFO, "Cannot parallelize deblocking type 1, decoding such frames in sequential order\n");
03033                 h0->single_decode_warning = 1;
03034             }
03035             if (h != h0) {
03036                 av_log(h->s.avctx, AV_LOG_ERROR, "Deblocking switched inside frame.\n");
03037                 return 1;
03038             }
03039         }
03040     }
03041     h->qp_thresh= 15 + 52 - FFMIN(h->slice_alpha_c0_offset, h->slice_beta_offset) - FFMAX3(0, h->pps.chroma_qp_index_offset[0], h->pps.chroma_qp_index_offset[1]);
03042 
03043 #if 0 //FMO
03044     if( h->pps.num_slice_groups > 1  && h->pps.mb_slice_group_map_type >= 3 && h->pps.mb_slice_group_map_type <= 5)
03045         slice_group_change_cycle= get_bits(&s->gb, ?);
03046 #endif
03047 
03048     h0->last_slice_type = slice_type;
03049     h->slice_num = ++h0->current_slice;
03050     if(h->slice_num >= MAX_SLICES){
03051         av_log(s->avctx, AV_LOG_ERROR, "Too many slices (%d >= %d), increase MAX_SLICES and recompile\n", h->slice_num, MAX_SLICES);
03052     }
03053 
03054     for(j=0; j<2; j++){
03055         int id_list[16];
03056         int *ref2frm= h->ref2frm[h->slice_num&(MAX_SLICES-1)][j];
03057         for(i=0; i<16; i++){
03058             id_list[i]= 60;
03059             if(h->ref_list[j][i].data[0]){
03060                 int k;
03061                 uint8_t *base= h->ref_list[j][i].base[0];
03062                 for(k=0; k<h->short_ref_count; k++)
03063                     if(h->short_ref[k]->base[0] == base){
03064                         id_list[i]= k;
03065                         break;
03066                     }
03067                 for(k=0; k<h->long_ref_count; k++)
03068                     if(h->long_ref[k] && h->long_ref[k]->base[0] == base){
03069                         id_list[i]= h->short_ref_count + k;
03070                         break;
03071                     }
03072             }
03073         }
03074 
03075         ref2frm[0]=
03076         ref2frm[1]= -1;
03077         for(i=0; i<16; i++)
03078             ref2frm[i+2]= 4*id_list[i]
03079                           +(h->ref_list[j][i].reference&3);
03080         ref2frm[18+0]=
03081         ref2frm[18+1]= -1;
03082         for(i=16; i<48; i++)
03083             ref2frm[i+4]= 4*id_list[(i-16)>>1]
03084                           +(h->ref_list[j][i].reference&3);
03085     }
03086 
03087     //FIXME: fix draw_edges+PAFF+frame threads
03088     h->emu_edge_width= (s->flags&CODEC_FLAG_EMU_EDGE || (!h->sps.frame_mbs_only_flag && s->avctx->active_thread_type)) ? 0 : 16;
03089     h->emu_edge_height= (FRAME_MBAFF || FIELD_PICTURE) ? 0 : h->emu_edge_width;
03090 
03091     if(s->avctx->debug&FF_DEBUG_PICT_INFO){
03092         av_log(h->s.avctx, AV_LOG_DEBUG, "slice:%d %s mb:%d %c%s%s pps:%u frame:%d poc:%d/%d ref:%d/%d qp:%d loop:%d:%d:%d weight:%d%s %s\n",
03093                h->slice_num,
03094                (s->picture_structure==PICT_FRAME ? "F" : s->picture_structure==PICT_TOP_FIELD ? "T" : "B"),
03095                first_mb_in_slice,
03096                av_get_picture_type_char(h->slice_type), h->slice_type_fixed ? " fix" : "", h->nal_unit_type == NAL_IDR_SLICE ? " IDR" : "",
03097                pps_id, h->frame_num,
03098                s->current_picture_ptr->field_poc[0], s->current_picture_ptr->field_poc[1],
03099                h->ref_count[0], h->ref_count[1],
03100                s->qscale,
03101                h->deblocking_filter, h->slice_alpha_c0_offset/2-26, h->slice_beta_offset/2-26,
03102                h->use_weight,
03103                h->use_weight==1 && h->use_weight_chroma ? "c" : "",
03104                h->slice_type == AV_PICTURE_TYPE_B ? (h->direct_spatial_mv_pred ? "SPAT" : "TEMP") : ""
03105                );
03106     }
03107 
03108     return 0;
03109 }
03110 
03111 int ff_h264_get_slice_type(const H264Context *h)
03112 {
03113     switch (h->slice_type) {
03114     case AV_PICTURE_TYPE_P:  return 0;
03115     case AV_PICTURE_TYPE_B:  return 1;
03116     case AV_PICTURE_TYPE_I:  return 2;
03117     case AV_PICTURE_TYPE_SP: return 3;
03118     case AV_PICTURE_TYPE_SI: return 4;
03119     default:         return -1;
03120     }
03121 }
03122 
03127 static int fill_filter_caches(H264Context *h, int mb_type){
03128     MpegEncContext * const s = &h->s;
03129     const int mb_xy= h->mb_xy;
03130     int top_xy, left_xy[2];
03131     int top_type, left_type[2];
03132 
03133     top_xy     = mb_xy  - (s->mb_stride << MB_FIELD);
03134 
03135     //FIXME deblocking could skip the intra and nnz parts.
03136 
03137     /* Wow, what a mess, why didn't they simplify the interlacing & intra
03138      * stuff, I can't imagine that these complex rules are worth it. */
03139 
03140     left_xy[1] = left_xy[0] = mb_xy-1;
03141     if(FRAME_MBAFF){
03142         const int left_mb_field_flag     = IS_INTERLACED(s->current_picture.mb_type[mb_xy-1]);
03143         const int curr_mb_field_flag     = IS_INTERLACED(mb_type);
03144         if(s->mb_y&1){
03145             if (left_mb_field_flag != curr_mb_field_flag) {
03146                 left_xy[0] -= s->mb_stride;
03147             }
03148         }else{
03149             if(curr_mb_field_flag){
03150                 top_xy      += s->mb_stride & (((s->current_picture.mb_type[top_xy    ]>>7)&1)-1);
03151             }
03152             if (left_mb_field_flag != curr_mb_field_flag) {
03153                 left_xy[1] += s->mb_stride;
03154             }
03155         }
03156     }
03157 
03158     h->top_mb_xy = top_xy;
03159     h->left_mb_xy[0] = left_xy[0];
03160     h->left_mb_xy[1] = left_xy[1];
03161     {
03162         //for sufficiently low qp, filtering wouldn't do anything
03163         //this is a conservative estimate: could also check beta_offset and more accurate chroma_qp
03164         int qp_thresh = h->qp_thresh; //FIXME strictly we should store qp_thresh for each mb of a slice
03165         int qp = s->current_picture.qscale_table[mb_xy];
03166         if(qp <= qp_thresh
03167            && (left_xy[0]<0 || ((qp + s->current_picture.qscale_table[left_xy[0]] + 1)>>1) <= qp_thresh)
03168            && (top_xy   < 0 || ((qp + s->current_picture.qscale_table[top_xy    ] + 1)>>1) <= qp_thresh)){
03169             if(!FRAME_MBAFF)
03170                 return 1;
03171             if(   (left_xy[0]< 0            || ((qp + s->current_picture.qscale_table[left_xy[1]             ] + 1)>>1) <= qp_thresh)
03172                && (top_xy    < s->mb_stride || ((qp + s->current_picture.qscale_table[top_xy    -s->mb_stride] + 1)>>1) <= qp_thresh))
03173                 return 1;
03174         }
03175     }
03176 
03177     top_type     = s->current_picture.mb_type[top_xy]    ;
03178     left_type[0] = s->current_picture.mb_type[left_xy[0]];
03179     left_type[1] = s->current_picture.mb_type[left_xy[1]];
03180     if(h->deblocking_filter == 2){
03181         if(h->slice_table[top_xy     ] != h->slice_num) top_type= 0;
03182         if(h->slice_table[left_xy[0] ] != h->slice_num) left_type[0]= left_type[1]= 0;
03183     }else{
03184         if(h->slice_table[top_xy     ] == 0xFFFF) top_type= 0;
03185         if(h->slice_table[left_xy[0] ] == 0xFFFF) left_type[0]= left_type[1] =0;
03186     }
03187     h->top_type    = top_type    ;
03188     h->left_type[0]= left_type[0];
03189     h->left_type[1]= left_type[1];
03190 
03191     if(IS_INTRA(mb_type))
03192         return 0;
03193 
03194     AV_COPY32(&h->non_zero_count_cache[4+8* 1], &h->non_zero_count[mb_xy][ 0]);
03195     AV_COPY32(&h->non_zero_count_cache[4+8* 2], &h->non_zero_count[mb_xy][ 4]);
03196     AV_COPY32(&h->non_zero_count_cache[4+8* 3], &h->non_zero_count[mb_xy][ 8]);
03197     AV_COPY32(&h->non_zero_count_cache[4+8* 4], &h->non_zero_count[mb_xy][12]);
03198 
03199     h->cbp= h->cbp_table[mb_xy];
03200 
03201     {
03202         int list;
03203         for(list=0; list<h->list_count; list++){
03204             int8_t *ref;
03205             int y, b_stride;
03206             int16_t (*mv_dst)[2];
03207             int16_t (*mv_src)[2];
03208 
03209             if(!USES_LIST(mb_type, list)){
03210                 fill_rectangle(  h->mv_cache[list][scan8[0]], 4, 4, 8, pack16to32(0,0), 4);
03211                 AV_WN32A(&h->ref_cache[list][scan8[ 0]], ((LIST_NOT_USED)&0xFF)*0x01010101u);
03212                 AV_WN32A(&h->ref_cache[list][scan8[ 2]], ((LIST_NOT_USED)&0xFF)*0x01010101u);
03213                 AV_WN32A(&h->ref_cache[list][scan8[ 8]], ((LIST_NOT_USED)&0xFF)*0x01010101u);
03214                 AV_WN32A(&h->ref_cache[list][scan8[10]], ((LIST_NOT_USED)&0xFF)*0x01010101u);
03215                 continue;
03216             }
03217 
03218             ref = &s->current_picture.ref_index[list][4*mb_xy];
03219             {
03220                 int (*ref2frm)[64] = h->ref2frm[ h->slice_num&(MAX_SLICES-1) ][0] + (MB_MBAFF ? 20 : 2);
03221                 AV_WN32A(&h->ref_cache[list][scan8[ 0]], (pack16to32(ref2frm[list][ref[0]],ref2frm[list][ref[1]])&0x00FF00FF)*0x0101);
03222                 AV_WN32A(&h->ref_cache[list][scan8[ 2]], (pack16to32(ref2frm[list][ref[0]],ref2frm[list][ref[1]])&0x00FF00FF)*0x0101);
03223                 ref += 2;
03224                 AV_WN32A(&h->ref_cache[list][scan8[ 8]], (pack16to32(ref2frm[list][ref[0]],ref2frm[list][ref[1]])&0x00FF00FF)*0x0101);
03225                 AV_WN32A(&h->ref_cache[list][scan8[10]], (pack16to32(ref2frm[list][ref[0]],ref2frm[list][ref[1]])&0x00FF00FF)*0x0101);
03226             }
03227 
03228             b_stride = h->b_stride;
03229             mv_dst   = &h->mv_cache[list][scan8[0]];
03230             mv_src   = &s->current_picture.motion_val[list][4*s->mb_x + 4*s->mb_y*b_stride];
03231             for(y=0; y<4; y++){
03232                 AV_COPY128(mv_dst + 8*y, mv_src + y*b_stride);
03233             }
03234 
03235         }
03236     }
03237 
03238 
03239 /*
03240 0 . T T. T T T T
03241 1 L . .L . . . .
03242 2 L . .L . . . .
03243 3 . T TL . . . .
03244 4 L . .L . . . .
03245 5 L . .. . . . .
03246 */
03247 //FIXME constraint_intra_pred & partitioning & nnz (let us hope this is just a typo in the spec)
03248     if(top_type){
03249         AV_COPY32(&h->non_zero_count_cache[4+8*0], &h->non_zero_count[top_xy][3*4]);
03250     }
03251 
03252     if(left_type[0]){
03253         h->non_zero_count_cache[3+8*1]= h->non_zero_count[left_xy[0]][3+0*4];
03254         h->non_zero_count_cache[3+8*2]= h->non_zero_count[left_xy[0]][3+1*4];
03255         h->non_zero_count_cache[3+8*3]= h->non_zero_count[left_xy[0]][3+2*4];
03256         h->non_zero_count_cache[3+8*4]= h->non_zero_count[left_xy[0]][3+3*4];
03257     }
03258 
03259     // CAVLC 8x8dct requires NNZ values for residual decoding that differ from what the loop filter needs
03260     if(!CABAC && h->pps.transform_8x8_mode){
03261         if(IS_8x8DCT(top_type)){
03262             h->non_zero_count_cache[4+8*0]=
03263             h->non_zero_count_cache[5+8*0]= (h->cbp_table[top_xy] & 0x4000) >> 12;
03264             h->non_zero_count_cache[6+8*0]=
03265             h->non_zero_count_cache[7+8*0]= (h->cbp_table[top_xy] & 0x8000) >> 12;
03266         }
03267         if(IS_8x8DCT(left_type[0])){
03268             h->non_zero_count_cache[3+8*1]=
03269             h->non_zero_count_cache[3+8*2]= (h->cbp_table[left_xy[0]]&0x2000) >> 12; //FIXME check MBAFF
03270         }
03271         if(IS_8x8DCT(left_type[1])){
03272             h->non_zero_count_cache[3+8*3]=
03273             h->non_zero_count_cache[3+8*4]= (h->cbp_table[left_xy[1]]&0x8000) >> 12; //FIXME check MBAFF
03274         }
03275 
03276         if(IS_8x8DCT(mb_type)){
03277             h->non_zero_count_cache[scan8[0   ]]= h->non_zero_count_cache[scan8[1   ]]=
03278             h->non_zero_count_cache[scan8[2   ]]= h->non_zero_count_cache[scan8[3   ]]= (h->cbp & 0x1000) >> 12;
03279 
03280             h->non_zero_count_cache[scan8[0+ 4]]= h->non_zero_count_cache[scan8[1+ 4]]=
03281             h->non_zero_count_cache[scan8[2+ 4]]= h->non_zero_count_cache[scan8[3+ 4]]= (h->cbp & 0x2000) >> 12;
03282 
03283             h->non_zero_count_cache[scan8[0+ 8]]= h->non_zero_count_cache[scan8[1+ 8]]=
03284             h->non_zero_count_cache[scan8[2+ 8]]= h->non_zero_count_cache[scan8[3+ 8]]= (h->cbp & 0x4000) >> 12;
03285 
03286             h->non_zero_count_cache[scan8[0+12]]= h->non_zero_count_cache[scan8[1+12]]=
03287             h->non_zero_count_cache[scan8[2+12]]= h->non_zero_count_cache[scan8[3+12]]= (h->cbp & 0x8000) >> 12;
03288         }
03289     }
03290 
03291     if(IS_INTER(mb_type) || IS_DIRECT(mb_type)){
03292         int list;
03293         for(list=0; list<h->list_count; list++){
03294             if(USES_LIST(top_type, list)){
03295                 const int b_xy= h->mb2b_xy[top_xy] + 3*h->b_stride;
03296                 const int b8_xy= 4*top_xy + 2;
03297                 int (*ref2frm)[64] = h->ref2frm[ h->slice_table[top_xy]&(MAX_SLICES-1) ][0] + (MB_MBAFF ? 20 : 2);
03298                 AV_COPY128(h->mv_cache[list][scan8[0] + 0 - 1*8], s->current_picture.motion_val[list][b_xy + 0]);
03299                 h->ref_cache[list][scan8[0] + 0 - 1*8]=
03300                 h->ref_cache[list][scan8[0] + 1 - 1*8]= ref2frm[list][s->current_picture.ref_index[list][b8_xy + 0]];
03301                 h->ref_cache[list][scan8[0] + 2 - 1*8]=
03302                 h->ref_cache[list][scan8[0] + 3 - 1*8]= ref2frm[list][s->current_picture.ref_index[list][b8_xy + 1]];
03303             }else{
03304                 AV_ZERO128(h->mv_cache[list][scan8[0] + 0 - 1*8]);
03305                 AV_WN32A(&h->ref_cache[list][scan8[0] + 0 - 1*8], ((LIST_NOT_USED)&0xFF)*0x01010101u);
03306             }
03307 
03308             if(!IS_INTERLACED(mb_type^left_type[0])){
03309                 if(USES_LIST(left_type[0], list)){
03310                     const int b_xy= h->mb2b_xy[left_xy[0]] + 3;
03311                     const int b8_xy= 4*left_xy[0] + 1;
03312                     int (*ref2frm)[64] = h->ref2frm[ h->slice_table[left_xy[0]]&(MAX_SLICES-1) ][0] + (MB_MBAFF ? 20 : 2);
03313                     AV_COPY32(h->mv_cache[list][scan8[0] - 1 + 0 ], s->current_picture.motion_val[list][b_xy + h->b_stride*0]);
03314                     AV_COPY32(h->mv_cache[list][scan8[0] - 1 + 8 ], s->current_picture.motion_val[list][b_xy + h->b_stride*1]);
03315                     AV_COPY32(h->mv_cache[list][scan8[0] - 1 +16 ], s->current_picture.motion_val[list][b_xy + h->b_stride*2]);
03316                     AV_COPY32(h->mv_cache[list][scan8[0] - 1 +24 ], s->current_picture.motion_val[list][b_xy + h->b_stride*3]);
03317                     h->ref_cache[list][scan8[0] - 1 + 0 ]=
03318                     h->ref_cache[list][scan8[0] - 1 + 8 ]= ref2frm[list][s->current_picture.ref_index[list][b8_xy + 2*0]];
03319                     h->ref_cache[list][scan8[0] - 1 +16 ]=
03320                     h->ref_cache[list][scan8[0] - 1 +24 ]= ref2frm[list][s->current_picture.ref_index[list][b8_xy + 2*1]];
03321                 }else{
03322                     AV_ZERO32(h->mv_cache [list][scan8[0] - 1 + 0 ]);
03323                     AV_ZERO32(h->mv_cache [list][scan8[0] - 1 + 8 ]);
03324                     AV_ZERO32(h->mv_cache [list][scan8[0] - 1 +16 ]);
03325                     AV_ZERO32(h->mv_cache [list][scan8[0] - 1 +24 ]);
03326                     h->ref_cache[list][scan8[0] - 1 + 0  ]=
03327                     h->ref_cache[list][scan8[0] - 1 + 8  ]=
03328                     h->ref_cache[list][scan8[0] - 1 + 16 ]=
03329                     h->ref_cache[list][scan8[0] - 1 + 24 ]= LIST_NOT_USED;
03330                 }
03331             }
03332         }
03333     }
03334 
03335     return 0;
03336 }
03337 
03338 static void loop_filter(H264Context *h, int start_x, int end_x){
03339     MpegEncContext * const s = &h->s;
03340     uint8_t  *dest_y, *dest_cb, *dest_cr;
03341     int linesize, uvlinesize, mb_x, mb_y;
03342     const int end_mb_y= s->mb_y + FRAME_MBAFF;
03343     const int old_slice_type= h->slice_type;
03344     const int pixel_shift = h->pixel_shift;
03345 
03346     if(h->deblocking_filter) {
03347         for(mb_x= start_x; mb_x<end_x; mb_x++){
03348             for(mb_y=end_mb_y - FRAME_MBAFF; mb_y<= end_mb_y; mb_y++){
03349                 int mb_xy, mb_type;
03350                 mb_xy = h->mb_xy = mb_x + mb_y*s->mb_stride;
03351                 h->slice_num= h->slice_table[mb_xy];
03352                 mb_type= s->current_picture.mb_type[mb_xy];
03353                 h->list_count= h->list_counts[mb_xy];
03354 
03355                 if(FRAME_MBAFF)
03356                     h->mb_mbaff = h->mb_field_decoding_flag = !!IS_INTERLACED(mb_type);
03357 
03358                 s->mb_x= mb_x;
03359                 s->mb_y= mb_y;
03360                 dest_y  = s->current_picture.data[0] + ((mb_x << pixel_shift) + mb_y * s->linesize  ) * 16;
03361                 dest_cb = s->current_picture.data[1] + ((mb_x << pixel_shift) + mb_y * s->uvlinesize) * (8 << CHROMA444);
03362                 dest_cr = s->current_picture.data[2] + ((mb_x << pixel_shift) + mb_y * s->uvlinesize) * (8 << CHROMA444);
03363                     //FIXME simplify above
03364 
03365                 if (MB_FIELD) {
03366                     linesize   = h->mb_linesize   = s->linesize * 2;
03367                     uvlinesize = h->mb_uvlinesize = s->uvlinesize * 2;
03368                     if(mb_y&1){ //FIXME move out of this function?
03369                         dest_y -= s->linesize*15;
03370                         dest_cb-= s->uvlinesize*((8 << CHROMA444)-1);
03371                         dest_cr-= s->uvlinesize*((8 << CHROMA444)-1);
03372                     }
03373                 } else {
03374                     linesize   = h->mb_linesize   = s->linesize;
03375                     uvlinesize = h->mb_uvlinesize = s->uvlinesize;
03376                 }
03377                 backup_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, CHROMA444, 0);
03378                 if(fill_filter_caches(h, mb_type))
03379                     continue;
03380                 h->chroma_qp[0] = get_chroma_qp(h, 0, s->current_picture.qscale_table[mb_xy]);
03381                 h->chroma_qp[1] = get_chroma_qp(h, 1, s->current_picture.qscale_table[mb_xy]);
03382 
03383                 if (FRAME_MBAFF) {
03384                     ff_h264_filter_mb     (h, mb_x, mb_y, dest_y, dest_cb, dest_cr, linesize, uvlinesize);
03385                 } else {
03386                     ff_h264_filter_mb_fast(h, mb_x, mb_y, dest_y, dest_cb, dest_cr, linesize, uvlinesize);
03387                 }
03388             }
03389         }
03390     }
03391     h->slice_type= old_slice_type;
03392     s->mb_x= end_x;
03393     s->mb_y= end_mb_y - FRAME_MBAFF;
03394     h->chroma_qp[0] = get_chroma_qp(h, 0, s->qscale);
03395     h->chroma_qp[1] = get_chroma_qp(h, 1, s->qscale);
03396 }
03397 
03398 static void predict_field_decoding_flag(H264Context *h){
03399     MpegEncContext * const s = &h->s;
03400     const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
03401     int mb_type = (h->slice_table[mb_xy-1] == h->slice_num)
03402                 ? s->current_picture.mb_type[mb_xy-1]
03403                 : (h->slice_table[mb_xy-s->mb_stride] == h->slice_num)
03404                 ? s->current_picture.mb_type[mb_xy-s->mb_stride]
03405                 : 0;
03406     h->mb_mbaff = h->mb_field_decoding_flag = IS_INTERLACED(mb_type) ? 1 : 0;
03407 }
03408 
03412 static void decode_finish_row(H264Context *h){
03413     MpegEncContext * const s = &h->s;
03414     int top = 16*(s->mb_y >> FIELD_PICTURE);
03415     int height = 16 << FRAME_MBAFF;
03416     int deblock_border = (16 + 4) << FRAME_MBAFF;
03417     int pic_height = 16*s->mb_height >> FIELD_PICTURE;
03418 
03419     if (h->deblocking_filter) {
03420         if((top + height) >= pic_height)
03421             height += deblock_border;
03422 
03423         top -= deblock_border;
03424     }
03425 
03426     if (top >= pic_height || (top + height) < h->emu_edge_height)
03427         return;
03428 
03429     height = FFMIN(height, pic_height - top);
03430     if (top < h->emu_edge_height) {
03431         height = top+height;
03432         top = 0;
03433     }
03434 
03435     ff_draw_horiz_band(s, top, height);
03436 
03437     if (s->dropable) return;
03438 
03439     ff_thread_report_progress((AVFrame*)s->current_picture_ptr, top + height - 1,
03440                              s->picture_structure==PICT_BOTTOM_FIELD);
03441 }
03442 
03443 static int decode_slice(struct AVCodecContext *avctx, void *arg){
03444     H264Context *h = *(void**)arg;
03445     MpegEncContext * const s = &h->s;
03446     const int part_mask= s->partitioned_frame ? (AC_END|AC_ERROR) : 0x7F;
03447     int lf_x_start = s->mb_x;
03448 
03449     s->mb_skip_run= -1;
03450 
03451     h->is_complex = FRAME_MBAFF || s->picture_structure != PICT_FRAME || s->codec_id != CODEC_ID_H264 ||
03452                     (CONFIG_GRAY && (s->flags&CODEC_FLAG_GRAY));
03453 
03454     if( h->pps.cabac ) {
03455         /* realign */
03456         align_get_bits( &s->gb );
03457 
03458         /* init cabac */
03459         ff_init_cabac_states( &h->cabac);
03460         ff_init_cabac_decoder( &h->cabac,
03461                                s->gb.buffer + get_bits_count(&s->gb)/8,
03462                                (get_bits_left(&s->gb) + 7)/8);
03463 
03464         ff_h264_init_cabac_states(h);
03465 
03466         for(;;){
03467 //START_TIMER
03468             int ret = ff_h264_decode_mb_cabac(h);
03469             int eos;
03470 //STOP_TIMER("decode_mb_cabac")
03471 
03472             if(ret>=0) ff_h264_hl_decode_mb(h);
03473 
03474             if( ret >= 0 && FRAME_MBAFF ) { //FIXME optimal? or let mb_decode decode 16x32 ?
03475                 s->mb_y++;
03476 
03477                 ret = ff_h264_decode_mb_cabac(h);
03478 
03479                 if(ret>=0) ff_h264_hl_decode_mb(h);
03480                 s->mb_y--;
03481             }
03482             eos = get_cabac_terminate( &h->cabac );
03483 
03484             if((s->workaround_bugs & FF_BUG_TRUNCATED) && h->cabac.bytestream > h->cabac.bytestream_end + 2){
03485                 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
03486                 if (s->mb_x >= lf_x_start) loop_filter(h, lf_x_start, s->mb_x + 1);
03487                 return 0;
03488             }
03489             if( ret < 0 || h->cabac.bytestream > h->cabac.bytestream_end + 2) {
03490                 av_log(h->s.avctx, AV_LOG_ERROR, "error while decoding MB %d %d, bytestream (%td)\n", s->mb_x, s->mb_y, h->cabac.bytestream_end - h->cabac.bytestream);
03491                 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
03492                 return -1;
03493             }
03494 
03495             if( ++s->mb_x >= s->mb_width ) {
03496                 loop_filter(h, lf_x_start, s->mb_x);
03497                 s->mb_x = lf_x_start = 0;
03498                 decode_finish_row(h);
03499                 ++s->mb_y;
03500                 if(FIELD_OR_MBAFF_PICTURE) {
03501                     ++s->mb_y;
03502                     if(FRAME_MBAFF && s->mb_y < s->mb_height)
03503                         predict_field_decoding_flag(h);
03504                 }
03505             }
03506 
03507             if( eos || s->mb_y >= s->mb_height ) {
03508                 tprintf(s->avctx, "slice end %d %d\n", get_bits_count(&s->gb), s->gb.size_in_bits);
03509                 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
03510                 if (s->mb_x > lf_x_start) loop_filter(h, lf_x_start, s->mb_x);
03511                 return 0;
03512             }
03513         }
03514 
03515     } else {
03516         for(;;){
03517             int ret = ff_h264_decode_mb_cavlc(h);
03518 
03519             if(ret>=0) ff_h264_hl_decode_mb(h);
03520 
03521             if(ret>=0 && FRAME_MBAFF){ //FIXME optimal? or let mb_decode decode 16x32 ?
03522                 s->mb_y++;
03523                 ret = ff_h264_decode_mb_cavlc(h);
03524 
03525                 if(ret>=0) ff_h264_hl_decode_mb(h);
03526                 s->mb_y--;
03527             }
03528 
03529             if(ret<0){
03530                 av_log(h->s.avctx, AV_LOG_ERROR, "error while decoding MB %d %d\n", s->mb_x, s->mb_y);
03531                 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
03532                 return -1;
03533             }
03534 
03535             if(++s->mb_x >= s->mb_width){
03536                 loop_filter(h, lf_x_start, s->mb_x);
03537                 s->mb_x = lf_x_start = 0;
03538                 decode_finish_row(h);
03539                 ++s->mb_y;
03540                 if(FIELD_OR_MBAFF_PICTURE) {
03541                     ++s->mb_y;
03542                     if(FRAME_MBAFF && s->mb_y < s->mb_height)
03543                         predict_field_decoding_flag(h);
03544                 }
03545                 if(s->mb_y >= s->mb_height){
03546                     tprintf(s->avctx, "slice end %d %d\n", get_bits_count(&s->gb), s->gb.size_in_bits);
03547 
03548                     if(   get_bits_count(&s->gb) == s->gb.size_in_bits
03549                        || get_bits_count(&s->gb) <  s->gb.size_in_bits && s->avctx->error_recognition < FF_ER_AGGRESSIVE) {
03550                         ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
03551 
03552                         return 0;
03553                     }else{
03554                         ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
03555 
03556                         return -1;
03557                     }
03558                 }
03559             }
03560 
03561             if(get_bits_count(&s->gb) >= s->gb.size_in_bits && s->mb_skip_run<=0){
03562                 tprintf(s->avctx, "slice end %d %d\n", get_bits_count(&s->gb), s->gb.size_in_bits);
03563                 if(get_bits_count(&s->gb) == s->gb.size_in_bits ){
03564                     ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
03565                     if (s->mb_x > lf_x_start) loop_filter(h, lf_x_start, s->mb_x);
03566 
03567                     return 0;
03568                 }else{
03569                     ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
03570 
03571                     return -1;
03572                 }
03573             }
03574         }
03575     }
03576 
03577 #if 0
03578     for(;s->mb_y < s->mb_height; s->mb_y++){
03579         for(;s->mb_x < s->mb_width; s->mb_x++){
03580             int ret= decode_mb(h);
03581 
03582             ff_h264_hl_decode_mb(h);
03583 
03584             if(ret<0){
03585                 av_log(s->avctx, AV_LOG_ERROR, "error while decoding MB %d %d\n", s->mb_x, s->mb_y);
03586                 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
03587 
03588                 return -1;
03589             }
03590 
03591             if(++s->mb_x >= s->mb_width){
03592                 s->mb_x=0;
03593                 if(++s->mb_y >= s->mb_height){
03594                     if(get_bits_count(s->gb) == s->gb.size_in_bits){
03595                         ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
03596 
03597                         return 0;
03598                     }else{
03599                         ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
03600 
03601                         return -1;
03602                     }
03603                 }
03604             }
03605 
03606             if(get_bits_count(s->?gb) >= s->gb?.size_in_bits){
03607                 if(get_bits_count(s->gb) == s->gb.size_in_bits){
03608                     ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
03609 
03610                     return 0;
03611                 }else{
03612                     ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
03613 
03614                     return -1;
03615                 }
03616             }
03617         }
03618         s->mb_x=0;
03619         ff_draw_horiz_band(s, 16*s->mb_y, 16);
03620     }
03621 #endif
03622     return -1; //not reached
03623 }
03624 
03631 static void execute_decode_slices(H264Context *h, int context_count){
03632     MpegEncContext * const s = &h->s;
03633     AVCodecContext * const avctx= s->avctx;
03634     H264Context *hx;
03635     int i;
03636 
03637     if (s->avctx->hwaccel)
03638         return;
03639     if(s->avctx->codec->capabilities&CODEC_CAP_HWACCEL_VDPAU)
03640         return;
03641     if(context_count == 1) {
03642         decode_slice(avctx, &h);
03643     } else {
03644         for(i = 1; i < context_count; i++) {
03645             hx = h->thread_context[i];
03646             hx->s.error_recognition = avctx->error_recognition;
03647             hx->s.error_count = 0;
03648             hx->x264_build= h->x264_build;
03649         }
03650 
03651         avctx->execute(avctx, (void *)decode_slice,
03652                        h->thread_context, NULL, context_count, sizeof(void*));
03653 
03654         /* pull back stuff from slices to master context */
03655         hx = h->thread_context[context_count - 1];
03656         s->mb_x = hx->s.mb_x;
03657         s->mb_y = hx->s.mb_y;
03658         s->dropable = hx->s.dropable;
03659         s->picture_structure = hx->s.picture_structure;
03660         for(i = 1; i < context_count; i++)
03661             h->s.error_count += h->thread_context[i]->s.error_count;
03662     }
03663 }
03664 
03665 
03666 static int decode_nal_units(H264Context *h, const uint8_t *buf, int buf_size){
03667     MpegEncContext * const s = &h->s;
03668     AVCodecContext * const avctx= s->avctx;
03669     H264Context *hx; 
03670     int buf_index;
03671     int context_count;
03672     int next_avc;
03673     int pass = !(avctx->active_thread_type & FF_THREAD_FRAME);
03674     int nals_needed=0; 
03675     int nal_index;
03676 
03677     h->max_contexts = (HAVE_THREADS && (s->avctx->active_thread_type&FF_THREAD_SLICE)) ? avctx->thread_count : 1;
03678     if(!(s->flags2 & CODEC_FLAG2_CHUNKS)){
03679         h->current_slice = 0;
03680         if (!s->first_field)
03681             s->current_picture_ptr= NULL;
03682         ff_h264_reset_sei(h);
03683     }
03684 
03685     for(;pass <= 1;pass++){
03686         buf_index = 0;
03687         context_count = 0;
03688         next_avc = h->is_avc ? 0 : buf_size;
03689         nal_index = 0;
03690     for(;;){
03691         int consumed;
03692         int dst_length;
03693         int bit_length;
03694         const uint8_t *ptr;
03695         int i, nalsize = 0;
03696         int err;
03697 
03698         if(buf_index >= next_avc) {
03699             if(buf_index >= buf_size) break;
03700             nalsize = 0;
03701             for(i = 0; i < h->nal_length_size; i++)
03702                 nalsize = (nalsize << 8) | buf[buf_index++];
03703             if(nalsize <= 0 || nalsize > buf_size - buf_index){
03704                 av_log(h->s.avctx, AV_LOG_ERROR, "AVC: nal size %d\n", nalsize);
03705                 break;
03706             }
03707             next_avc= buf_index + nalsize;
03708         } else {
03709             // start code prefix search
03710             for(; buf_index + 3 < next_avc; buf_index++){
03711                 // This should always succeed in the first iteration.
03712                 if(buf[buf_index] == 0 && buf[buf_index+1] == 0 && buf[buf_index+2] == 1)
03713                     break;
03714             }
03715 
03716             if(buf_index+3 >= buf_size) break;
03717 
03718             buf_index+=3;
03719             if(buf_index >= next_avc) continue;
03720         }
03721 
03722         hx = h->thread_context[context_count];
03723 
03724         ptr= ff_h264_decode_nal(hx, buf + buf_index, &dst_length, &consumed, next_avc - buf_index);
03725         if (ptr==NULL || dst_length < 0){
03726             return -1;
03727         }
03728         i= buf_index + consumed;
03729         if((s->workaround_bugs & FF_BUG_AUTODETECT) && i+3<next_avc &&
03730            buf[i]==0x00 && buf[i+1]==0x00 && buf[i+2]==0x01 && buf[i+3]==0xE0)
03731             s->workaround_bugs |= FF_BUG_TRUNCATED;
03732 
03733         if(!(s->workaround_bugs & FF_BUG_TRUNCATED)){
03734         while(dst_length > 0 && ptr[dst_length - 1] == 0)
03735             dst_length--;
03736         }
03737         bit_length= !dst_length ? 0 : (8*dst_length - ff_h264_decode_rbsp_trailing(h, ptr + dst_length - 1));
03738 
03739         if(s->avctx->debug&FF_DEBUG_STARTCODE){
03740             av_log(h->s.avctx, AV_LOG_DEBUG, "NAL %d/%d at %d/%d length %d\n", hx->nal_unit_type, hx->nal_ref_idc, buf_index, buf_size, dst_length);
03741         }
03742 
03743         if (h->is_avc && (nalsize != consumed) && nalsize){
03744             av_log(h->s.avctx, AV_LOG_DEBUG, "AVC: Consumed only %d bytes instead of %d\n", consumed, nalsize);
03745         }
03746 
03747         buf_index += consumed;
03748         nal_index++;
03749 
03750         if(pass == 0) {
03751             // packets can sometimes contain multiple PPS/SPS
03752             // e.g. two PAFF field pictures in one packet, or a demuxer which splits NALs strangely
03753             // if so, when frame threading we can't start the next thread until we've read all of them
03754             switch (hx->nal_unit_type) {
03755                 case NAL_SPS:
03756                 case NAL_PPS:
03757                     nals_needed = nal_index;
03758                     break;
03759                 case NAL_IDR_SLICE:
03760                 case NAL_SLICE:
03761                     init_get_bits(&hx->s.gb, ptr, bit_length);
03762                     if (!get_ue_golomb(&hx->s.gb))
03763                         nals_needed = nal_index;
03764             }
03765             continue;
03766         }
03767 
03768         //FIXME do not discard SEI id
03769         if(avctx->skip_frame >= AVDISCARD_NONREF && h->nal_ref_idc  == 0)
03770             continue;
03771 
03772       again:
03773         err = 0;
03774         switch(hx->nal_unit_type){
03775         case NAL_IDR_SLICE:
03776             if (h->nal_unit_type != NAL_IDR_SLICE) {
03777                 av_log(h->s.avctx, AV_LOG_ERROR, "Invalid mix of idr and non-idr slices");
03778                 return -1;
03779             }
03780             idr(h); //FIXME ensure we don't loose some frames if there is reordering
03781         case NAL_SLICE:
03782             init_get_bits(&hx->s.gb, ptr, bit_length);
03783             hx->intra_gb_ptr=
03784             hx->inter_gb_ptr= &hx->s.gb;
03785             hx->s.data_partitioning = 0;
03786 
03787             if((err = decode_slice_header(hx, h)))
03788                break;
03789 
03790             s->current_picture_ptr->key_frame |=
03791                     (hx->nal_unit_type == NAL_IDR_SLICE) ||
03792                     (h->sei_recovery_frame_cnt >= 0);
03793 
03794             if (h->current_slice == 1) {
03795                 if(!(s->flags2 & CODEC_FLAG2_CHUNKS)) {
03796                     decode_postinit(h, nal_index >= nals_needed);
03797                 }
03798 
03799                 if (s->avctx->hwaccel && s->avctx->hwaccel->start_frame(s->avctx, NULL, 0) < 0)
03800                     return -1;
03801                 if(CONFIG_H264_VDPAU_DECODER && s->avctx->codec->capabilities&CODEC_CAP_HWACCEL_VDPAU)
03802                     ff_vdpau_h264_picture_start(s);
03803             }
03804 
03805             if(hx->redundant_pic_count==0
03806                && (avctx->skip_frame < AVDISCARD_NONREF || hx->nal_ref_idc)
03807                && (avctx->skip_frame < AVDISCARD_BIDIR  || hx->slice_type_nos!=AV_PICTURE_TYPE_B)
03808                && (avctx->skip_frame < AVDISCARD_NONKEY || hx->slice_type_nos==AV_PICTURE_TYPE_I)
03809                && avctx->skip_frame < AVDISCARD_ALL){
03810                 if(avctx->hwaccel) {
03811                     if (avctx->hwaccel->decode_slice(avctx, &buf[buf_index - consumed], consumed) < 0)
03812                         return -1;
03813                 }else
03814                 if(CONFIG_H264_VDPAU_DECODER && s->avctx->codec->capabilities&CODEC_CAP_HWACCEL_VDPAU){
03815                     static const uint8_t start_code[] = {0x00, 0x00, 0x01};
03816                     ff_vdpau_add_data_chunk(s, start_code, sizeof(start_code));
03817                     ff_vdpau_add_data_chunk(s, &buf[buf_index - consumed], consumed );
03818                 }else
03819                     context_count++;
03820             }
03821             break;
03822         case NAL_DPA:
03823             init_get_bits(&hx->s.gb, ptr, bit_length);
03824             hx->intra_gb_ptr=
03825             hx->inter_gb_ptr= NULL;
03826 
03827             if ((err = decode_slice_header(hx, h)) < 0)
03828                 break;
03829 
03830             hx->s.data_partitioning = 1;
03831 
03832             break;
03833         case NAL_DPB:
03834             init_get_bits(&hx->intra_gb, ptr, bit_length);
03835             hx->intra_gb_ptr= &hx->intra_gb;
03836             break;
03837         case NAL_DPC:
03838             init_get_bits(&hx->inter_gb, ptr, bit_length);
03839             hx->inter_gb_ptr= &hx->inter_gb;
03840 
03841             if(hx->redundant_pic_count==0 && hx->intra_gb_ptr && hx->s.data_partitioning
03842                && s->context_initialized
03843                && (avctx->skip_frame < AVDISCARD_NONREF || hx->nal_ref_idc)
03844                && (avctx->skip_frame < AVDISCARD_BIDIR  || hx->slice_type_nos!=AV_PICTURE_TYPE_B)
03845                && (avctx->skip_frame < AVDISCARD_NONKEY || hx->slice_type_nos==AV_PICTURE_TYPE_I)
03846                && avctx->skip_frame < AVDISCARD_ALL)
03847                 context_count++;
03848             break;
03849         case NAL_SEI:
03850             init_get_bits(&s->gb, ptr, bit_length);
03851             ff_h264_decode_sei(h);
03852             break;
03853         case NAL_SPS:
03854             init_get_bits(&s->gb, ptr, bit_length);
03855             ff_h264_decode_seq_parameter_set(h);
03856 
03857             if (s->flags& CODEC_FLAG_LOW_DELAY ||
03858                 (h->sps.bitstream_restriction_flag && !h->sps.num_reorder_frames))
03859                 s->low_delay=1;
03860 
03861             if(avctx->has_b_frames < 2)
03862                 avctx->has_b_frames= !s->low_delay;
03863 
03864             if (avctx->bits_per_raw_sample != h->sps.bit_depth_luma) {
03865                 if (h->sps.bit_depth_luma >= 8 && h->sps.bit_depth_luma <= 10) {
03866                     avctx->bits_per_raw_sample = h->sps.bit_depth_luma;
03867                     h->pixel_shift = h->sps.bit_depth_luma > 8;
03868 
03869                     ff_h264dsp_init(&h->h264dsp, h->sps.bit_depth_luma);
03870                     ff_h264_pred_init(&h->hpc, s->codec_id, h->sps.bit_depth_luma);
03871                     dsputil_init(&s->dsp, s->avctx);
03872                 } else {
03873                     av_log(avctx, AV_LOG_DEBUG, "Unsupported bit depth: %d\n", h->sps.bit_depth_luma);
03874                     return -1;
03875                 }
03876             }
03877             break;
03878         case NAL_PPS:
03879             init_get_bits(&s->gb, ptr, bit_length);
03880 
03881             ff_h264_decode_picture_parameter_set(h, bit_length);
03882 
03883             break;
03884         case NAL_AUD:
03885         case NAL_END_SEQUENCE:
03886         case NAL_END_STREAM:
03887         case NAL_FILLER_DATA:
03888         case NAL_SPS_EXT:
03889         case NAL_AUXILIARY_SLICE:
03890             break;
03891         default:
03892             av_log(avctx, AV_LOG_DEBUG, "Unknown NAL code: %d (%d bits)\n", hx->nal_unit_type, bit_length);
03893         }
03894 
03895         if(context_count == h->max_contexts) {
03896             execute_decode_slices(h, context_count);
03897             context_count = 0;
03898         }
03899 
03900         if (err < 0)
03901             av_log(h->s.avctx, AV_LOG_ERROR, "decode_slice_header error\n");
03902         else if(err == 1) {
03903             /* Slice could not be decoded in parallel mode, copy down
03904              * NAL unit stuff to context 0 and restart. Note that
03905              * rbsp_buffer is not transferred, but since we no longer
03906              * run in parallel mode this should not be an issue. */
03907             h->nal_unit_type = hx->nal_unit_type;
03908             h->nal_ref_idc   = hx->nal_ref_idc;
03909             hx = h;
03910             goto again;
03911         }
03912     }
03913     }
03914     if(context_count)
03915         execute_decode_slices(h, context_count);
03916     return buf_index;
03917 }
03918 
03922 static int get_consumed_bytes(MpegEncContext *s, int pos, int buf_size){
03923         if(pos==0) pos=1; //avoid infinite loops (i doubt that is needed but ...)
03924         if(pos+10>buf_size) pos=buf_size; // oops ;)
03925 
03926         return pos;
03927 }
03928 
03929 static int decode_frame(AVCodecContext *avctx,
03930                              void *data, int *data_size,
03931                              AVPacket *avpkt)
03932 {
03933     const uint8_t *buf = avpkt->data;
03934     int buf_size = avpkt->size;
03935     H264Context *h = avctx->priv_data;
03936     MpegEncContext *s = &h->s;
03937     AVFrame *pict = data;
03938     int buf_index;
03939 
03940     s->flags= avctx->flags;
03941     s->flags2= avctx->flags2;
03942 
03943    /* end of stream, output what is still in the buffers */
03944  out:
03945     if (buf_size == 0) {
03946         Picture *out;
03947         int i, out_idx;
03948 
03949         s->current_picture_ptr = NULL;
03950 
03951 //FIXME factorize this with the output code below
03952         out = h->delayed_pic[0];
03953         out_idx = 0;
03954         for(i=1; h->delayed_pic[i] && !h->delayed_pic[i]->key_frame && !h->delayed_pic[i]->mmco_reset; i++)
03955             if(h->delayed_pic[i]->poc < out->poc){
03956                 out = h->delayed_pic[i];
03957                 out_idx = i;
03958             }
03959 
03960         for(i=out_idx; h->delayed_pic[i]; i++)
03961             h->delayed_pic[i] = h->delayed_pic[i+1];
03962 
03963         if(out){
03964             *data_size = sizeof(AVFrame);
03965             *pict= *(AVFrame*)out;
03966         }
03967 
03968         return 0;
03969     }
03970 
03971     buf_index=decode_nal_units(h, buf, buf_size);
03972     if(buf_index < 0)
03973         return -1;
03974 
03975     if (!s->current_picture_ptr && h->nal_unit_type == NAL_END_SEQUENCE) {
03976         buf_size = 0;
03977         goto out;
03978     }
03979 
03980     if(!(s->flags2 & CODEC_FLAG2_CHUNKS) && !s->current_picture_ptr){
03981         if (avctx->skip_frame >= AVDISCARD_NONREF)
03982             return 0;
03983         av_log(avctx, AV_LOG_ERROR, "no frame!\n");
03984         return -1;
03985     }
03986 
03987     if(!(s->flags2 & CODEC_FLAG2_CHUNKS) || (s->mb_y >= s->mb_height && s->mb_height)){
03988 
03989         if(s->flags2 & CODEC_FLAG2_CHUNKS) decode_postinit(h, 1);
03990 
03991         field_end(h, 0);
03992 
03993         if (!h->next_output_pic) {
03994             /* Wait for second field. */
03995             *data_size = 0;
03996 
03997         } else {
03998             *data_size = sizeof(AVFrame);
03999             *pict = *(AVFrame*)h->next_output_pic;
04000         }
04001     }
04002 
04003     assert(pict->data[0] || !*data_size);
04004     ff_print_debug_info(s, pict);
04005 //printf("out %d\n", (int)pict->data[0]);
04006 
04007     return get_consumed_bytes(s, buf_index, buf_size);
04008 }
04009 #if 0
04010 static inline void fill_mb_avail(H264Context *h){
04011     MpegEncContext * const s = &h->s;
04012     const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
04013 
04014     if(s->mb_y){
04015         h->mb_avail[0]= s->mb_x                 && h->slice_table[mb_xy - s->mb_stride - 1] == h->slice_num;
04016         h->mb_avail[1]=                            h->slice_table[mb_xy - s->mb_stride    ] == h->slice_num;
04017         h->mb_avail[2]= s->mb_x+1 < s->mb_width && h->slice_table[mb_xy - s->mb_stride + 1] == h->slice_num;
04018     }else{
04019         h->mb_avail[0]=
04020         h->mb_avail[1]=
04021         h->mb_avail[2]= 0;
04022     }
04023     h->mb_avail[3]= s->mb_x && h->slice_table[mb_xy - 1] == h->slice_num;
04024     h->mb_avail[4]= 1; //FIXME move out
04025     h->mb_avail[5]= 0; //FIXME move out
04026 }
04027 #endif
04028 
04029 #ifdef TEST
04030 #undef printf
04031 #undef random
04032 #define COUNT 8000
04033 #define SIZE (COUNT*40)
04034 int main(void){
04035     int i;
04036     uint8_t temp[SIZE];
04037     PutBitContext pb;
04038     GetBitContext gb;
04039 //    int int_temp[10000];
04040     DSPContext dsp;
04041     AVCodecContext avctx;
04042 
04043     dsputil_init(&dsp, &avctx);
04044 
04045     init_put_bits(&pb, temp, SIZE);
04046     printf("testing unsigned exp golomb\n");
04047     for(i=0; i<COUNT; i++){
04048         START_TIMER
04049         set_ue_golomb(&pb, i);
04050         STOP_TIMER("set_ue_golomb");
04051     }
04052     flush_put_bits(&pb);
04053 
04054     init_get_bits(&gb, temp, 8*SIZE);
04055     for(i=0; i<COUNT; i++){
04056         int j, s;
04057 
04058         s= show_bits(&gb, 24);
04059 
04060         START_TIMER
04061         j= get_ue_golomb(&gb);
04062         if(j != i){
04063             printf("mismatch! at %d (%d should be %d) bits:%6X\n", i, j, i, s);
04064 //            return -1;
04065         }
04066         STOP_TIMER("get_ue_golomb");
04067     }
04068 
04069 
04070     init_put_bits(&pb, temp, SIZE);
04071     printf("testing signed exp golomb\n");
04072     for(i=0; i<COUNT; i++){
04073         START_TIMER
04074         set_se_golomb(&pb, i - COUNT/2);
04075         STOP_TIMER("set_se_golomb");
04076     }
04077     flush_put_bits(&pb);
04078 
04079     init_get_bits(&gb, temp, 8*SIZE);
04080     for(i=0; i<COUNT; i++){
04081         int j, s;
04082 
04083         s= show_bits(&gb, 24);
04084 
04085         START_TIMER
04086         j= get_se_golomb(&gb);
04087         if(j != i - COUNT/2){
04088             printf("mismatch! at %d (%d should be %d) bits:%6X\n", i, j, i, s);
04089 //            return -1;
04090         }
04091         STOP_TIMER("get_se_golomb");
04092     }
04093 
04094 #if 0
04095     printf("testing 4x4 (I)DCT\n");
04096 
04097     DCTELEM block[16];
04098     uint8_t src[16], ref[16];
04099     uint64_t error= 0, max_error=0;
04100 
04101     for(i=0; i<COUNT; i++){
04102         int j;
04103 //        printf("%d %d %d\n", r1, r2, (r2-r1)*16);
04104         for(j=0; j<16; j++){
04105             ref[j]= random()%255;
04106             src[j]= random()%255;
04107         }
04108 
04109         h264_diff_dct_c(block, src, ref, 4);
04110 
04111         //normalize
04112         for(j=0; j<16; j++){
04113 //            printf("%d ", block[j]);
04114             block[j]= block[j]*4;
04115             if(j&1) block[j]= (block[j]*4 + 2)/5;
04116             if(j&4) block[j]= (block[j]*4 + 2)/5;
04117         }
04118 //        printf("\n");
04119 
04120         h->h264dsp.h264_idct_add(ref, block, 4);
04121 /*        for(j=0; j<16; j++){
04122             printf("%d ", ref[j]);
04123         }
04124         printf("\n");*/
04125 
04126         for(j=0; j<16; j++){
04127             int diff= FFABS(src[j] - ref[j]);
04128 
04129             error+= diff*diff;
04130             max_error= FFMAX(max_error, diff);
04131         }
04132     }
04133     printf("error=%f max_error=%d\n", ((float)error)/COUNT/16, (int)max_error );
04134     printf("testing quantizer\n");
04135     for(qp=0; qp<52; qp++){
04136         for(i=0; i<16; i++)
04137             src1_block[i]= src2_block[i]= random()%255;
04138 
04139     }
04140     printf("Testing NAL layer\n");
04141 
04142     uint8_t bitstream[COUNT];
04143     uint8_t nal[COUNT*2];
04144     H264Context h;
04145     memset(&h, 0, sizeof(H264Context));
04146 
04147     for(i=0; i<COUNT; i++){
04148         int zeros= i;
04149         int nal_length;
04150         int consumed;
04151         int out_length;
04152         uint8_t *out;
04153         int j;
04154 
04155         for(j=0; j<COUNT; j++){
04156             bitstream[j]= (random() % 255) + 1;
04157         }
04158 
04159         for(j=0; j<zeros; j++){
04160             int pos= random() % COUNT;
04161             while(bitstream[pos] == 0){
04162                 pos++;
04163                 pos %= COUNT;
04164             }
04165             bitstream[pos]=0;
04166         }
04167 
04168         START_TIMER
04169 
04170         nal_length= encode_nal(&h, nal, bitstream, COUNT, COUNT*2);
04171         if(nal_length<0){
04172             printf("encoding failed\n");
04173             return -1;
04174         }
04175 
04176         out= ff_h264_decode_nal(&h, nal, &out_length, &consumed, nal_length);
04177 
04178         STOP_TIMER("NAL")
04179 
04180         if(out_length != COUNT){
04181             printf("incorrect length %d %d\n", out_length, COUNT);
04182             return -1;
04183         }
04184 
04185         if(consumed != nal_length){
04186             printf("incorrect consumed length %d %d\n", nal_length, consumed);
04187             return -1;
04188         }
04189 
04190         if(memcmp(bitstream, out, COUNT)){
04191             printf("mismatch\n");
04192             return -1;
04193         }
04194     }
04195 #endif
04196 
04197     printf("Testing RBSP\n");
04198 
04199 
04200     return 0;
04201 }
04202 #endif /* TEST */
04203 
04204 
04205 av_cold void ff_h264_free_context(H264Context *h)
04206 {
04207     int i;
04208 
04209     free_tables(h, 1); //FIXME cleanup init stuff perhaps
04210 
04211     for(i = 0; i < MAX_SPS_COUNT; i++)
04212         av_freep(h->sps_buffers + i);
04213 
04214     for(i = 0; i < MAX_PPS_COUNT; i++)
04215         av_freep(h->pps_buffers + i);
04216 }
04217 
04218 av_cold int ff_h264_decode_end(AVCodecContext *avctx)
04219 {
04220     H264Context *h = avctx->priv_data;
04221     MpegEncContext *s = &h->s;
04222 
04223     ff_h264_free_context(h);
04224 
04225     MPV_common_end(s);
04226 
04227 //    memset(h, 0, sizeof(H264Context));
04228 
04229     return 0;
04230 }
04231 
04232 static const AVProfile profiles[] = {
04233     { FF_PROFILE_H264_BASELINE,             "Baseline"              },
04234     { FF_PROFILE_H264_CONSTRAINED_BASELINE, "Constrained Baseline"  },
04235     { FF_PROFILE_H264_MAIN,                 "Main"                  },
04236     { FF_PROFILE_H264_EXTENDED,             "Extended"              },
04237     { FF_PROFILE_H264_HIGH,                 "High"                  },
04238     { FF_PROFILE_H264_HIGH_10,              "High 10"               },
04239     { FF_PROFILE_H264_HIGH_10_INTRA,        "High 10 Intra"         },
04240     { FF_PROFILE_H264_HIGH_422,             "High 4:2:2"            },
04241     { FF_PROFILE_H264_HIGH_422_INTRA,       "High 4:2:2 Intra"      },
04242     { FF_PROFILE_H264_HIGH_444,             "High 4:4:4"            },
04243     { FF_PROFILE_H264_HIGH_444_PREDICTIVE,  "High 4:4:4 Predictive" },
04244     { FF_PROFILE_H264_HIGH_444_INTRA,       "High 4:4:4 Intra"      },
04245     { FF_PROFILE_H264_CAVLC_444,            "CAVLC 4:4:4"           },
04246     { FF_PROFILE_UNKNOWN },
04247 };
04248 
04249 AVCodec ff_h264_decoder = {
04250     "h264",
04251     AVMEDIA_TYPE_VIDEO,
04252     CODEC_ID_H264,
04253     sizeof(H264Context),
04254     ff_h264_decode_init,
04255     NULL,
04256     ff_h264_decode_end,
04257     decode_frame,
04258     /*CODEC_CAP_DRAW_HORIZ_BAND |*/ CODEC_CAP_DR1 | CODEC_CAP_DELAY |
04259         CODEC_CAP_SLICE_THREADS | CODEC_CAP_FRAME_THREADS,
04260     .flush= flush_dpb,
04261     .long_name = NULL_IF_CONFIG_SMALL("H.264 / AVC / MPEG-4 AVC / MPEG-4 part 10"),
04262     .init_thread_copy      = ONLY_IF_THREADS_ENABLED(decode_init_thread_copy),
04263     .update_thread_context = ONLY_IF_THREADS_ENABLED(decode_update_thread_context),
04264     .profiles = NULL_IF_CONFIG_SMALL(profiles),
04265 };
04266 
04267 #if CONFIG_H264_VDPAU_DECODER
04268 AVCodec ff_h264_vdpau_decoder = {
04269     "h264_vdpau",
04270     AVMEDIA_TYPE_VIDEO,
04271     CODEC_ID_H264,
04272     sizeof(H264Context),
04273     ff_h264_decode_init,
04274     NULL,
04275     ff_h264_decode_end,
04276     decode_frame,
04277     CODEC_CAP_DR1 | CODEC_CAP_DELAY | CODEC_CAP_HWACCEL_VDPAU,
04278     .flush= flush_dpb,
04279     .long_name = NULL_IF_CONFIG_SMALL("H.264 / AVC / MPEG-4 AVC / MPEG-4 part 10 (VDPAU acceleration)"),
04280     .pix_fmts = (const enum PixelFormat[]){PIX_FMT_VDPAU_H264, PIX_FMT_NONE},
04281     .profiles = NULL_IF_CONFIG_SMALL(profiles),
04282 };
04283 #endif