Libav 0.7.1
libavcodec/h264.c
Go to the documentation of this file.
00001 /*
00002  * H.26L/H.264/AVC/JVT/14496-10/... decoder
00003  * Copyright (c) 2003 Michael Niedermayer <michaelni@gmx.at>
00004  *
00005  * This file is part of Libav.
00006  *
00007  * Libav is free software; you can redistribute it and/or
00008  * modify it under the terms of the GNU Lesser General Public
00009  * License as published by the Free Software Foundation; either
00010  * version 2.1 of the License, or (at your option) any later version.
00011  *
00012  * Libav is distributed in the hope that it will be useful,
00013  * but WITHOUT ANY WARRANTY; without even the implied warranty of
00014  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00015  * Lesser General Public License for more details.
00016  *
00017  * You should have received a copy of the GNU Lesser General Public
00018  * License along with Libav; if not, write to the Free Software
00019  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
00020  */
00021 
00028 #include "libavutil/imgutils.h"
00029 #include "internal.h"
00030 #include "dsputil.h"
00031 #include "avcodec.h"
00032 #include "mpegvideo.h"
00033 #include "h264.h"
00034 #include "h264data.h"
00035 #include "h264_mvpred.h"
00036 #include "golomb.h"
00037 #include "mathops.h"
00038 #include "rectangle.h"
00039 #include "thread.h"
00040 #include "vdpau_internal.h"
00041 #include "libavutil/avassert.h"
00042 
00043 #include "cabac.h"
00044 
00045 //#undef NDEBUG
00046 #include <assert.h>
00047 
00048 static const uint8_t rem6[QP_MAX_NUM+1]={
00049 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3,
00050 };
00051 
00052 static const uint8_t div6[QP_MAX_NUM+1]={
00053 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 6, 6, 6, 6, 6, 6, 7, 7, 7, 7, 7, 7, 8, 8, 8, 8, 8, 8, 9, 9, 9, 9, 9, 9,10,10,10,10,
00054 };
00055 
00056 static const enum PixelFormat hwaccel_pixfmt_list_h264_jpeg_420[] = {
00057     PIX_FMT_DXVA2_VLD,
00058     PIX_FMT_VAAPI_VLD,
00059     PIX_FMT_YUVJ420P,
00060     PIX_FMT_NONE
00061 };
00062 
00063 void ff_h264_write_back_intra_pred_mode(H264Context *h){
00064     int8_t *mode= h->intra4x4_pred_mode + h->mb2br_xy[h->mb_xy];
00065 
00066     AV_COPY32(mode, h->intra4x4_pred_mode_cache + 4 + 8*4);
00067     mode[4]= h->intra4x4_pred_mode_cache[7+8*3];
00068     mode[5]= h->intra4x4_pred_mode_cache[7+8*2];
00069     mode[6]= h->intra4x4_pred_mode_cache[7+8*1];
00070 }
00071 
00075 int ff_h264_check_intra4x4_pred_mode(H264Context *h){
00076     MpegEncContext * const s = &h->s;
00077     static const int8_t top [12]= {-1, 0,LEFT_DC_PRED,-1,-1,-1,-1,-1, 0};
00078     static const int8_t left[12]= { 0,-1, TOP_DC_PRED, 0,-1,-1,-1, 0,-1,DC_128_PRED};
00079     int i;
00080 
00081     if(!(h->top_samples_available&0x8000)){
00082         for(i=0; i<4; i++){
00083             int status= top[ h->intra4x4_pred_mode_cache[scan8[0] + i] ];
00084             if(status<0){
00085                 av_log(h->s.avctx, AV_LOG_ERROR, "top block unavailable for requested intra4x4 mode %d at %d %d\n", status, s->mb_x, s->mb_y);
00086                 return -1;
00087             } else if(status){
00088                 h->intra4x4_pred_mode_cache[scan8[0] + i]= status;
00089             }
00090         }
00091     }
00092 
00093     if((h->left_samples_available&0x8888)!=0x8888){
00094         static const int mask[4]={0x8000,0x2000,0x80,0x20};
00095         for(i=0; i<4; i++){
00096             if(!(h->left_samples_available&mask[i])){
00097                 int status= left[ h->intra4x4_pred_mode_cache[scan8[0] + 8*i] ];
00098                 if(status<0){
00099                     av_log(h->s.avctx, AV_LOG_ERROR, "left block unavailable for requested intra4x4 mode %d at %d %d\n", status, s->mb_x, s->mb_y);
00100                     return -1;
00101                 } else if(status){
00102                     h->intra4x4_pred_mode_cache[scan8[0] + 8*i]= status;
00103                 }
00104             }
00105         }
00106     }
00107 
00108     return 0;
00109 } //FIXME cleanup like ff_h264_check_intra_pred_mode
00110 
00114 int ff_h264_check_intra_pred_mode(H264Context *h, int mode){
00115     MpegEncContext * const s = &h->s;
00116     static const int8_t top [7]= {LEFT_DC_PRED8x8, 1,-1,-1};
00117     static const int8_t left[7]= { TOP_DC_PRED8x8,-1, 2,-1,DC_128_PRED8x8};
00118 
00119     if(mode > 6U) {
00120         av_log(h->s.avctx, AV_LOG_ERROR, "out of range intra chroma pred mode at %d %d\n", s->mb_x, s->mb_y);
00121         return -1;
00122     }
00123 
00124     if(!(h->top_samples_available&0x8000)){
00125         mode= top[ mode ];
00126         if(mode<0){
00127             av_log(h->s.avctx, AV_LOG_ERROR, "top block unavailable for requested intra mode at %d %d\n", s->mb_x, s->mb_y);
00128             return -1;
00129         }
00130     }
00131 
00132     if((h->left_samples_available&0x8080) != 0x8080){
00133         mode= left[ mode ];
00134         if(h->left_samples_available&0x8080){ //mad cow disease mode, aka MBAFF + constrained_intra_pred
00135             mode= ALZHEIMER_DC_L0T_PRED8x8 + (!(h->left_samples_available&0x8000)) + 2*(mode == DC_128_PRED8x8);
00136         }
00137         if(mode<0){
00138             av_log(h->s.avctx, AV_LOG_ERROR, "left block unavailable for requested intra mode at %d %d\n", s->mb_x, s->mb_y);
00139             return -1;
00140         }
00141     }
00142 
00143     return mode;
00144 }
00145 
00146 const uint8_t *ff_h264_decode_nal(H264Context *h, const uint8_t *src, int *dst_length, int *consumed, int length){
00147     int i, si, di;
00148     uint8_t *dst;
00149     int bufidx;
00150 
00151 //    src[0]&0x80;                //forbidden bit
00152     h->nal_ref_idc= src[0]>>5;
00153     h->nal_unit_type= src[0]&0x1F;
00154 
00155     src++; length--;
00156 
00157 #if HAVE_FAST_UNALIGNED
00158 # if HAVE_FAST_64BIT
00159 #   define RS 7
00160     for(i=0; i+1<length; i+=9){
00161         if(!((~AV_RN64A(src+i) & (AV_RN64A(src+i) - 0x0100010001000101ULL)) & 0x8000800080008080ULL))
00162 # else
00163 #   define RS 3
00164     for(i=0; i+1<length; i+=5){
00165         if(!((~AV_RN32A(src+i) & (AV_RN32A(src+i) - 0x01000101U)) & 0x80008080U))
00166 # endif
00167             continue;
00168         if(i>0 && !src[i]) i--;
00169         while(src[i]) i++;
00170 #else
00171 #   define RS 0
00172     for(i=0; i+1<length; i+=2){
00173         if(src[i]) continue;
00174         if(i>0 && src[i-1]==0) i--;
00175 #endif
00176         if(i+2<length && src[i+1]==0 && src[i+2]<=3){
00177             if(src[i+2]!=3){
00178                 /* startcode, so we must be past the end */
00179                 length=i;
00180             }
00181             break;
00182         }
00183         i-= RS;
00184     }
00185 
00186     if(i>=length-1){ //no escaped 0
00187         *dst_length= length;
00188         *consumed= length+1; //+1 for the header
00189         return src;
00190     }
00191 
00192     bufidx = h->nal_unit_type == NAL_DPC ? 1 : 0; // use second escape buffer for inter data
00193     av_fast_malloc(&h->rbsp_buffer[bufidx], &h->rbsp_buffer_size[bufidx], length+FF_INPUT_BUFFER_PADDING_SIZE);
00194     dst= h->rbsp_buffer[bufidx];
00195 
00196     if (dst == NULL){
00197         return NULL;
00198     }
00199 
00200 //printf("decoding esc\n");
00201     memcpy(dst, src, i);
00202     si=di=i;
00203     while(si+2<length){
00204         //remove escapes (very rare 1:2^22)
00205         if(src[si+2]>3){
00206             dst[di++]= src[si++];
00207             dst[di++]= src[si++];
00208         }else if(src[si]==0 && src[si+1]==0){
00209             if(src[si+2]==3){ //escape
00210                 dst[di++]= 0;
00211                 dst[di++]= 0;
00212                 si+=3;
00213                 continue;
00214             }else //next start code
00215                 goto nsc;
00216         }
00217 
00218         dst[di++]= src[si++];
00219     }
00220     while(si<length)
00221         dst[di++]= src[si++];
00222 nsc:
00223 
00224     memset(dst+di, 0, FF_INPUT_BUFFER_PADDING_SIZE);
00225 
00226     *dst_length= di;
00227     *consumed= si + 1;//+1 for the header
00228 //FIXME store exact number of bits in the getbitcontext (it is needed for decoding)
00229     return dst;
00230 }
00231 
00236 static int ff_h264_decode_rbsp_trailing(H264Context *h, const uint8_t *src){
00237     int v= *src;
00238     int r;
00239 
00240     tprintf(h->s.avctx, "rbsp trailing %X\n", v);
00241 
00242     for(r=1; r<9; r++){
00243         if(v&1) return r;
00244         v>>=1;
00245     }
00246     return 0;
00247 }
00248 
00249 static inline int get_lowest_part_list_y(H264Context *h, Picture *pic, int n, int height,
00250                                  int y_offset, int list){
00251     int raw_my= h->mv_cache[list][ scan8[n] ][1];
00252     int filter_height= (raw_my&3) ? 2 : 0;
00253     int full_my= (raw_my>>2) + y_offset;
00254     int top = full_my - filter_height, bottom = full_my + height + filter_height;
00255 
00256     return FFMAX(abs(top), bottom);
00257 }
00258 
00259 static inline void get_lowest_part_y(H264Context *h, int refs[2][48], int n, int height,
00260                                int y_offset, int list0, int list1, int *nrefs){
00261     MpegEncContext * const s = &h->s;
00262     int my;
00263 
00264     y_offset += 16*(s->mb_y >> MB_FIELD);
00265 
00266     if(list0){
00267         int ref_n = h->ref_cache[0][ scan8[n] ];
00268         Picture *ref= &h->ref_list[0][ref_n];
00269 
00270         // Error resilience puts the current picture in the ref list.
00271         // Don't try to wait on these as it will cause a deadlock.
00272         // Fields can wait on each other, though.
00273         if(ref->thread_opaque != s->current_picture.thread_opaque ||
00274            (ref->reference&3) != s->picture_structure) {
00275             my = get_lowest_part_list_y(h, ref, n, height, y_offset, 0);
00276             if (refs[0][ref_n] < 0) nrefs[0] += 1;
00277             refs[0][ref_n] = FFMAX(refs[0][ref_n], my);
00278         }
00279     }
00280 
00281     if(list1){
00282         int ref_n = h->ref_cache[1][ scan8[n] ];
00283         Picture *ref= &h->ref_list[1][ref_n];
00284 
00285         if(ref->thread_opaque != s->current_picture.thread_opaque ||
00286            (ref->reference&3) != s->picture_structure) {
00287             my = get_lowest_part_list_y(h, ref, n, height, y_offset, 1);
00288             if (refs[1][ref_n] < 0) nrefs[1] += 1;
00289             refs[1][ref_n] = FFMAX(refs[1][ref_n], my);
00290         }
00291     }
00292 }
00293 
00299 static void await_references(H264Context *h){
00300     MpegEncContext * const s = &h->s;
00301     const int mb_xy= h->mb_xy;
00302     const int mb_type= s->current_picture.mb_type[mb_xy];
00303     int refs[2][48];
00304     int nrefs[2] = {0};
00305     int ref, list;
00306 
00307     memset(refs, -1, sizeof(refs));
00308 
00309     if(IS_16X16(mb_type)){
00310         get_lowest_part_y(h, refs, 0, 16, 0,
00311                   IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1), nrefs);
00312     }else if(IS_16X8(mb_type)){
00313         get_lowest_part_y(h, refs, 0, 8, 0,
00314                   IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1), nrefs);
00315         get_lowest_part_y(h, refs, 8, 8, 8,
00316                   IS_DIR(mb_type, 1, 0), IS_DIR(mb_type, 1, 1), nrefs);
00317     }else if(IS_8X16(mb_type)){
00318         get_lowest_part_y(h, refs, 0, 16, 0,
00319                   IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1), nrefs);
00320         get_lowest_part_y(h, refs, 4, 16, 0,
00321                   IS_DIR(mb_type, 1, 0), IS_DIR(mb_type, 1, 1), nrefs);
00322     }else{
00323         int i;
00324 
00325         assert(IS_8X8(mb_type));
00326 
00327         for(i=0; i<4; i++){
00328             const int sub_mb_type= h->sub_mb_type[i];
00329             const int n= 4*i;
00330             int y_offset= (i&2)<<2;
00331 
00332             if(IS_SUB_8X8(sub_mb_type)){
00333                 get_lowest_part_y(h, refs, n  , 8, y_offset,
00334                           IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1), nrefs);
00335             }else if(IS_SUB_8X4(sub_mb_type)){
00336                 get_lowest_part_y(h, refs, n  , 4, y_offset,
00337                           IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1), nrefs);
00338                 get_lowest_part_y(h, refs, n+2, 4, y_offset+4,
00339                           IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1), nrefs);
00340             }else if(IS_SUB_4X8(sub_mb_type)){
00341                 get_lowest_part_y(h, refs, n  , 8, y_offset,
00342                           IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1), nrefs);
00343                 get_lowest_part_y(h, refs, n+1, 8, y_offset,
00344                           IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1), nrefs);
00345             }else{
00346                 int j;
00347                 assert(IS_SUB_4X4(sub_mb_type));
00348                 for(j=0; j<4; j++){
00349                     int sub_y_offset= y_offset + 2*(j&2);
00350                     get_lowest_part_y(h, refs, n+j, 4, sub_y_offset,
00351                               IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1), nrefs);
00352                 }
00353             }
00354         }
00355     }
00356 
00357     for(list=h->list_count-1; list>=0; list--){
00358         for(ref=0; ref<48 && nrefs[list]; ref++){
00359             int row = refs[list][ref];
00360             if(row >= 0){
00361                 Picture *ref_pic = &h->ref_list[list][ref];
00362                 int ref_field = ref_pic->reference - 1;
00363                 int ref_field_picture = ref_pic->field_picture;
00364                 int pic_height = 16*s->mb_height >> ref_field_picture;
00365 
00366                 row <<= MB_MBAFF;
00367                 nrefs[list]--;
00368 
00369                 if(!FIELD_PICTURE && ref_field_picture){ // frame referencing two fields
00370                     ff_thread_await_progress((AVFrame*)ref_pic, FFMIN((row >> 1) - !(row&1), pic_height-1), 1);
00371                     ff_thread_await_progress((AVFrame*)ref_pic, FFMIN((row >> 1)           , pic_height-1), 0);
00372                 }else if(FIELD_PICTURE && !ref_field_picture){ // field referencing one field of a frame
00373                     ff_thread_await_progress((AVFrame*)ref_pic, FFMIN(row*2 + ref_field    , pic_height-1), 0);
00374                 }else if(FIELD_PICTURE){
00375                     ff_thread_await_progress((AVFrame*)ref_pic, FFMIN(row, pic_height-1), ref_field);
00376                 }else{
00377                     ff_thread_await_progress((AVFrame*)ref_pic, FFMIN(row, pic_height-1), 0);
00378                 }
00379             }
00380         }
00381     }
00382 }
00383 
00384 #if 0
00385 
00389 static void h264_luma_dc_dct_c(DCTELEM *block/*, int qp*/){
00390 //    const int qmul= dequant_coeff[qp][0];
00391     int i;
00392     int temp[16]; //FIXME check if this is a good idea
00393     static const int x_offset[4]={0, 1*stride, 4* stride,  5*stride};
00394     static const int y_offset[4]={0, 2*stride, 8* stride, 10*stride};
00395 
00396     for(i=0; i<4; i++){
00397         const int offset= y_offset[i];
00398         const int z0= block[offset+stride*0] + block[offset+stride*4];
00399         const int z1= block[offset+stride*0] - block[offset+stride*4];
00400         const int z2= block[offset+stride*1] - block[offset+stride*5];
00401         const int z3= block[offset+stride*1] + block[offset+stride*5];
00402 
00403         temp[4*i+0]= z0+z3;
00404         temp[4*i+1]= z1+z2;
00405         temp[4*i+2]= z1-z2;
00406         temp[4*i+3]= z0-z3;
00407     }
00408 
00409     for(i=0; i<4; i++){
00410         const int offset= x_offset[i];
00411         const int z0= temp[4*0+i] + temp[4*2+i];
00412         const int z1= temp[4*0+i] - temp[4*2+i];
00413         const int z2= temp[4*1+i] - temp[4*3+i];
00414         const int z3= temp[4*1+i] + temp[4*3+i];
00415 
00416         block[stride*0 +offset]= (z0 + z3)>>1;
00417         block[stride*2 +offset]= (z1 + z2)>>1;
00418         block[stride*8 +offset]= (z1 - z2)>>1;
00419         block[stride*10+offset]= (z0 - z3)>>1;
00420     }
00421 }
00422 #endif
00423 
00424 #undef xStride
00425 #undef stride
00426 
00427 #if 0
00428 static void chroma_dc_dct_c(DCTELEM *block){
00429     const int stride= 16*2;
00430     const int xStride= 16;
00431     int a,b,c,d,e;
00432 
00433     a= block[stride*0 + xStride*0];
00434     b= block[stride*0 + xStride*1];
00435     c= block[stride*1 + xStride*0];
00436     d= block[stride*1 + xStride*1];
00437 
00438     e= a-b;
00439     a= a+b;
00440     b= c-d;
00441     c= c+d;
00442 
00443     block[stride*0 + xStride*0]= (a+c);
00444     block[stride*0 + xStride*1]= (e+b);
00445     block[stride*1 + xStride*0]= (a-c);
00446     block[stride*1 + xStride*1]= (e-b);
00447 }
00448 #endif
00449 
00450 static inline void mc_dir_part(H264Context *h, Picture *pic, int n, int square, int chroma_height, int delta, int list,
00451                            uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
00452                            int src_x_offset, int src_y_offset,
00453                            qpel_mc_func *qpix_op, h264_chroma_mc_func chroma_op,
00454                            int pixel_shift, int chroma444){
00455     MpegEncContext * const s = &h->s;
00456     const int mx= h->mv_cache[list][ scan8[n] ][0] + src_x_offset*8;
00457     int my=       h->mv_cache[list][ scan8[n] ][1] + src_y_offset*8;
00458     const int luma_xy= (mx&3) + ((my&3)<<2);
00459     int offset = ((mx>>2) << pixel_shift) + (my>>2)*h->mb_linesize;
00460     uint8_t * src_y = pic->data[0] + offset;
00461     uint8_t * src_cb, * src_cr;
00462     int extra_width= h->emu_edge_width;
00463     int extra_height= h->emu_edge_height;
00464     int emu=0;
00465     const int full_mx= mx>>2;
00466     const int full_my= my>>2;
00467     const int pic_width  = 16*s->mb_width;
00468     const int pic_height = 16*s->mb_height >> MB_FIELD;
00469 
00470     if(mx&7) extra_width -= 3;
00471     if(my&7) extra_height -= 3;
00472 
00473     if(   full_mx < 0-extra_width
00474        || full_my < 0-extra_height
00475        || full_mx + 16/*FIXME*/ > pic_width + extra_width
00476        || full_my + 16/*FIXME*/ > pic_height + extra_height){
00477         s->dsp.emulated_edge_mc(s->edge_emu_buffer, src_y - (2 << pixel_shift) - 2*h->mb_linesize, h->mb_linesize, 16+5, 16+5/*FIXME*/, full_mx-2, full_my-2, pic_width, pic_height);
00478             src_y= s->edge_emu_buffer + (2 << pixel_shift) + 2*h->mb_linesize;
00479         emu=1;
00480     }
00481 
00482     qpix_op[luma_xy](dest_y, src_y, h->mb_linesize); //FIXME try variable height perhaps?
00483     if(!square){
00484         qpix_op[luma_xy](dest_y + delta, src_y + delta, h->mb_linesize);
00485     }
00486 
00487     if(CONFIG_GRAY && s->flags&CODEC_FLAG_GRAY) return;
00488 
00489     if(chroma444){
00490         src_cb = pic->data[1] + offset;
00491         if(emu){
00492             s->dsp.emulated_edge_mc(s->edge_emu_buffer, src_cb - (2 << pixel_shift) - 2*h->mb_linesize, h->mb_linesize,
00493                                     16+5, 16+5/*FIXME*/, full_mx-2, full_my-2, pic_width, pic_height);
00494             src_cb= s->edge_emu_buffer + (2 << pixel_shift) + 2*h->mb_linesize;
00495         }
00496         qpix_op[luma_xy](dest_cb, src_cb, h->mb_linesize); //FIXME try variable height perhaps?
00497         if(!square){
00498             qpix_op[luma_xy](dest_cb + delta, src_cb + delta, h->mb_linesize);
00499         }
00500 
00501         src_cr = pic->data[2] + offset;
00502         if(emu){
00503             s->dsp.emulated_edge_mc(s->edge_emu_buffer, src_cr - (2 << pixel_shift) - 2*h->mb_linesize, h->mb_linesize,
00504                                     16+5, 16+5/*FIXME*/, full_mx-2, full_my-2, pic_width, pic_height);
00505             src_cr= s->edge_emu_buffer + (2 << pixel_shift) + 2*h->mb_linesize;
00506         }
00507         qpix_op[luma_xy](dest_cr, src_cr, h->mb_linesize); //FIXME try variable height perhaps?
00508         if(!square){
00509             qpix_op[luma_xy](dest_cr + delta, src_cr + delta, h->mb_linesize);
00510         }
00511         return;
00512     }
00513 
00514     if(MB_FIELD){
00515         // chroma offset when predicting from a field of opposite parity
00516         my += 2 * ((s->mb_y & 1) - (pic->reference - 1));
00517         emu |= (my>>3) < 0 || (my>>3) + 8 >= (pic_height>>1);
00518     }
00519     src_cb= pic->data[1] + ((mx>>3) << pixel_shift) + (my>>3)*h->mb_uvlinesize;
00520     src_cr= pic->data[2] + ((mx>>3) << pixel_shift) + (my>>3)*h->mb_uvlinesize;
00521 
00522     if(emu){
00523         s->dsp.emulated_edge_mc(s->edge_emu_buffer, src_cb, h->mb_uvlinesize, 9, 9/*FIXME*/, (mx>>3), (my>>3), pic_width>>1, pic_height>>1);
00524             src_cb= s->edge_emu_buffer;
00525     }
00526     chroma_op(dest_cb, src_cb, h->mb_uvlinesize, chroma_height, mx&7, my&7);
00527 
00528     if(emu){
00529         s->dsp.emulated_edge_mc(s->edge_emu_buffer, src_cr, h->mb_uvlinesize, 9, 9/*FIXME*/, (mx>>3), (my>>3), pic_width>>1, pic_height>>1);
00530             src_cr= s->edge_emu_buffer;
00531     }
00532     chroma_op(dest_cr, src_cr, h->mb_uvlinesize, chroma_height, mx&7, my&7);
00533 }
00534 
00535 static inline void mc_part_std(H264Context *h, int n, int square, int chroma_height, int delta,
00536                            uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
00537                            int x_offset, int y_offset,
00538                            qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put,
00539                            qpel_mc_func *qpix_avg, h264_chroma_mc_func chroma_avg,
00540                            int list0, int list1, int pixel_shift, int chroma444){
00541     MpegEncContext * const s = &h->s;
00542     qpel_mc_func *qpix_op=  qpix_put;
00543     h264_chroma_mc_func chroma_op= chroma_put;
00544 
00545     dest_y  += (2*x_offset << pixel_shift) + 2*y_offset*h->mb_linesize;
00546     if(chroma444){
00547         dest_cb += (2*x_offset << pixel_shift) + 2*y_offset*h->mb_linesize;
00548         dest_cr += (2*x_offset << pixel_shift) + 2*y_offset*h->mb_linesize;
00549     }else{
00550         dest_cb += (  x_offset << pixel_shift) +   y_offset*h->mb_uvlinesize;
00551         dest_cr += (  x_offset << pixel_shift) +   y_offset*h->mb_uvlinesize;
00552     }
00553     x_offset += 8*s->mb_x;
00554     y_offset += 8*(s->mb_y >> MB_FIELD);
00555 
00556     if(list0){
00557         Picture *ref= &h->ref_list[0][ h->ref_cache[0][ scan8[n] ] ];
00558         mc_dir_part(h, ref, n, square, chroma_height, delta, 0,
00559                            dest_y, dest_cb, dest_cr, x_offset, y_offset,
00560                            qpix_op, chroma_op, pixel_shift, chroma444);
00561 
00562         qpix_op=  qpix_avg;
00563         chroma_op= chroma_avg;
00564     }
00565 
00566     if(list1){
00567         Picture *ref= &h->ref_list[1][ h->ref_cache[1][ scan8[n] ] ];
00568         mc_dir_part(h, ref, n, square, chroma_height, delta, 1,
00569                            dest_y, dest_cb, dest_cr, x_offset, y_offset,
00570                            qpix_op, chroma_op, pixel_shift, chroma444);
00571     }
00572 }
00573 
00574 static inline void mc_part_weighted(H264Context *h, int n, int square, int chroma_height, int delta,
00575                            uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
00576                            int x_offset, int y_offset,
00577                            qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put,
00578                            h264_weight_func luma_weight_op, h264_weight_func chroma_weight_op,
00579                            h264_biweight_func luma_weight_avg, h264_biweight_func chroma_weight_avg,
00580                            int list0, int list1, int pixel_shift, int chroma444){
00581     MpegEncContext * const s = &h->s;
00582 
00583     dest_y += (2*x_offset << pixel_shift) + 2*y_offset*h->mb_linesize;
00584     if(chroma444){
00585         chroma_weight_avg = luma_weight_avg;
00586         chroma_weight_op = luma_weight_op;
00587         dest_cb += (2*x_offset << pixel_shift) + 2*y_offset*h->mb_linesize;
00588         dest_cr += (2*x_offset << pixel_shift) + 2*y_offset*h->mb_linesize;
00589     }else{
00590         dest_cb += (  x_offset << pixel_shift) +   y_offset*h->mb_uvlinesize;
00591         dest_cr += (  x_offset << pixel_shift) +   y_offset*h->mb_uvlinesize;
00592     }
00593     x_offset += 8*s->mb_x;
00594     y_offset += 8*(s->mb_y >> MB_FIELD);
00595 
00596     if(list0 && list1){
00597         /* don't optimize for luma-only case, since B-frames usually
00598          * use implicit weights => chroma too. */
00599         uint8_t *tmp_cb = s->obmc_scratchpad;
00600         uint8_t *tmp_cr = s->obmc_scratchpad + (16 << pixel_shift);
00601         uint8_t *tmp_y  = s->obmc_scratchpad + 16*h->mb_uvlinesize;
00602         int refn0 = h->ref_cache[0][ scan8[n] ];
00603         int refn1 = h->ref_cache[1][ scan8[n] ];
00604 
00605         mc_dir_part(h, &h->ref_list[0][refn0], n, square, chroma_height, delta, 0,
00606                     dest_y, dest_cb, dest_cr,
00607                     x_offset, y_offset, qpix_put, chroma_put, pixel_shift, chroma444);
00608         mc_dir_part(h, &h->ref_list[1][refn1], n, square, chroma_height, delta, 1,
00609                     tmp_y, tmp_cb, tmp_cr,
00610                     x_offset, y_offset, qpix_put, chroma_put, pixel_shift, chroma444);
00611 
00612         if(h->use_weight == 2){
00613             int weight0 = h->implicit_weight[refn0][refn1][s->mb_y&1];
00614             int weight1 = 64 - weight0;
00615             luma_weight_avg(  dest_y,  tmp_y,  h->  mb_linesize, 5, weight0, weight1, 0);
00616             chroma_weight_avg(dest_cb, tmp_cb, h->mb_uvlinesize, 5, weight0, weight1, 0);
00617             chroma_weight_avg(dest_cr, tmp_cr, h->mb_uvlinesize, 5, weight0, weight1, 0);
00618         }else{
00619             luma_weight_avg(dest_y, tmp_y, h->mb_linesize, h->luma_log2_weight_denom,
00620                             h->luma_weight[refn0][0][0] , h->luma_weight[refn1][1][0],
00621                             h->luma_weight[refn0][0][1] + h->luma_weight[refn1][1][1]);
00622             chroma_weight_avg(dest_cb, tmp_cb, h->mb_uvlinesize, h->chroma_log2_weight_denom,
00623                             h->chroma_weight[refn0][0][0][0] , h->chroma_weight[refn1][1][0][0],
00624                             h->chroma_weight[refn0][0][0][1] + h->chroma_weight[refn1][1][0][1]);
00625             chroma_weight_avg(dest_cr, tmp_cr, h->mb_uvlinesize, h->chroma_log2_weight_denom,
00626                             h->chroma_weight[refn0][0][1][0] , h->chroma_weight[refn1][1][1][0],
00627                             h->chroma_weight[refn0][0][1][1] + h->chroma_weight[refn1][1][1][1]);
00628         }
00629     }else{
00630         int list = list1 ? 1 : 0;
00631         int refn = h->ref_cache[list][ scan8[n] ];
00632         Picture *ref= &h->ref_list[list][refn];
00633         mc_dir_part(h, ref, n, square, chroma_height, delta, list,
00634                     dest_y, dest_cb, dest_cr, x_offset, y_offset,
00635                     qpix_put, chroma_put, pixel_shift, chroma444);
00636 
00637         luma_weight_op(dest_y, h->mb_linesize, h->luma_log2_weight_denom,
00638                        h->luma_weight[refn][list][0], h->luma_weight[refn][list][1]);
00639         if(h->use_weight_chroma){
00640             chroma_weight_op(dest_cb, h->mb_uvlinesize, h->chroma_log2_weight_denom,
00641                              h->chroma_weight[refn][list][0][0], h->chroma_weight[refn][list][0][1]);
00642             chroma_weight_op(dest_cr, h->mb_uvlinesize, h->chroma_log2_weight_denom,
00643                              h->chroma_weight[refn][list][1][0], h->chroma_weight[refn][list][1][1]);
00644         }
00645     }
00646 }
00647 
00648 static inline void mc_part(H264Context *h, int n, int square, int chroma_height, int delta,
00649                            uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
00650                            int x_offset, int y_offset,
00651                            qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put,
00652                            qpel_mc_func *qpix_avg, h264_chroma_mc_func chroma_avg,
00653                            h264_weight_func *weight_op, h264_biweight_func *weight_avg,
00654                            int list0, int list1, int pixel_shift, int chroma444){
00655     if((h->use_weight==2 && list0 && list1
00656         && (h->implicit_weight[ h->ref_cache[0][scan8[n]] ][ h->ref_cache[1][scan8[n]] ][h->s.mb_y&1] != 32))
00657        || h->use_weight==1)
00658         mc_part_weighted(h, n, square, chroma_height, delta, dest_y, dest_cb, dest_cr,
00659                          x_offset, y_offset, qpix_put, chroma_put,
00660                          weight_op[0], weight_op[3], weight_avg[0],
00661                          weight_avg[3], list0, list1, pixel_shift, chroma444);
00662     else
00663         mc_part_std(h, n, square, chroma_height, delta, dest_y, dest_cb, dest_cr,
00664                     x_offset, y_offset, qpix_put, chroma_put, qpix_avg,
00665                     chroma_avg, list0, list1, pixel_shift, chroma444);
00666 }
00667 
00668 static inline void prefetch_motion(H264Context *h, int list, int pixel_shift, int chroma444){
00669     /* fetch pixels for estimated mv 4 macroblocks ahead
00670      * optimized for 64byte cache lines */
00671     MpegEncContext * const s = &h->s;
00672     const int refn = h->ref_cache[list][scan8[0]];
00673     if(refn >= 0){
00674         const int mx= (h->mv_cache[list][scan8[0]][0]>>2) + 16*s->mb_x + 8;
00675         const int my= (h->mv_cache[list][scan8[0]][1]>>2) + 16*s->mb_y;
00676         uint8_t **src= h->ref_list[list][refn].data;
00677         int off= (mx << pixel_shift) + (my + (s->mb_x&3)*4)*h->mb_linesize + (64 << pixel_shift);
00678         s->dsp.prefetch(src[0]+off, s->linesize, 4);
00679         if(chroma444){
00680             s->dsp.prefetch(src[1]+off, s->linesize, 4);
00681             s->dsp.prefetch(src[2]+off, s->linesize, 4);
00682         }else{
00683             off= ((mx>>1) << pixel_shift) + ((my>>1) + (s->mb_x&7))*s->uvlinesize + (64 << pixel_shift);
00684             s->dsp.prefetch(src[1]+off, src[2]-src[1], 2);
00685         }
00686     }
00687 }
00688 
00689 static av_always_inline void hl_motion(H264Context *h, uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
00690                       qpel_mc_func (*qpix_put)[16], h264_chroma_mc_func (*chroma_put),
00691                       qpel_mc_func (*qpix_avg)[16], h264_chroma_mc_func (*chroma_avg),
00692                       h264_weight_func *weight_op, h264_biweight_func *weight_avg,
00693                       int pixel_shift, int chroma444){
00694     MpegEncContext * const s = &h->s;
00695     const int mb_xy= h->mb_xy;
00696     const int mb_type= s->current_picture.mb_type[mb_xy];
00697 
00698     assert(IS_INTER(mb_type));
00699 
00700     if(HAVE_PTHREADS && (s->avctx->active_thread_type & FF_THREAD_FRAME))
00701         await_references(h);
00702     prefetch_motion(h, 0, pixel_shift, chroma444);
00703 
00704     if(IS_16X16(mb_type)){
00705         mc_part(h, 0, 1, 8, 0, dest_y, dest_cb, dest_cr, 0, 0,
00706                 qpix_put[0], chroma_put[0], qpix_avg[0], chroma_avg[0],
00707                 weight_op, weight_avg,
00708                 IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1),
00709                 pixel_shift, chroma444);
00710     }else if(IS_16X8(mb_type)){
00711         mc_part(h, 0, 0, 4, 8 << pixel_shift, dest_y, dest_cb, dest_cr, 0, 0,
00712                 qpix_put[1], chroma_put[0], qpix_avg[1], chroma_avg[0],
00713                 &weight_op[1], &weight_avg[1],
00714                 IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1),
00715                 pixel_shift, chroma444);
00716         mc_part(h, 8, 0, 4, 8 << pixel_shift, dest_y, dest_cb, dest_cr, 0, 4,
00717                 qpix_put[1], chroma_put[0], qpix_avg[1], chroma_avg[0],
00718                 &weight_op[1], &weight_avg[1],
00719                 IS_DIR(mb_type, 1, 0), IS_DIR(mb_type, 1, 1),
00720                 pixel_shift, chroma444);
00721     }else if(IS_8X16(mb_type)){
00722         mc_part(h, 0, 0, 8, 8*h->mb_linesize, dest_y, dest_cb, dest_cr, 0, 0,
00723                 qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
00724                 &weight_op[2], &weight_avg[2],
00725                 IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1),
00726                 pixel_shift, chroma444);
00727         mc_part(h, 4, 0, 8, 8*h->mb_linesize, dest_y, dest_cb, dest_cr, 4, 0,
00728                 qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
00729                 &weight_op[2], &weight_avg[2],
00730                 IS_DIR(mb_type, 1, 0), IS_DIR(mb_type, 1, 1),
00731                 pixel_shift, chroma444);
00732     }else{
00733         int i;
00734 
00735         assert(IS_8X8(mb_type));
00736 
00737         for(i=0; i<4; i++){
00738             const int sub_mb_type= h->sub_mb_type[i];
00739             const int n= 4*i;
00740             int x_offset= (i&1)<<2;
00741             int y_offset= (i&2)<<1;
00742 
00743             if(IS_SUB_8X8(sub_mb_type)){
00744                 mc_part(h, n, 1, 4, 0, dest_y, dest_cb, dest_cr, x_offset, y_offset,
00745                     qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
00746                     &weight_op[3], &weight_avg[3],
00747                     IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1),
00748                     pixel_shift, chroma444);
00749             }else if(IS_SUB_8X4(sub_mb_type)){
00750                 mc_part(h, n  , 0, 2, 4 << pixel_shift, dest_y, dest_cb, dest_cr, x_offset, y_offset,
00751                     qpix_put[2], chroma_put[1], qpix_avg[2], chroma_avg[1],
00752                     &weight_op[4], &weight_avg[4],
00753                     IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1),
00754                     pixel_shift, chroma444);
00755                 mc_part(h, n+2, 0, 2, 4 << pixel_shift, dest_y, dest_cb, dest_cr, x_offset, y_offset+2,
00756                     qpix_put[2], chroma_put[1], qpix_avg[2], chroma_avg[1],
00757                     &weight_op[4], &weight_avg[4],
00758                     IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1),
00759                     pixel_shift, chroma444);
00760             }else if(IS_SUB_4X8(sub_mb_type)){
00761                 mc_part(h, n  , 0, 4, 4*h->mb_linesize, dest_y, dest_cb, dest_cr, x_offset, y_offset,
00762                     qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
00763                     &weight_op[5], &weight_avg[5],
00764                     IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1),
00765                     pixel_shift, chroma444);
00766                 mc_part(h, n+1, 0, 4, 4*h->mb_linesize, dest_y, dest_cb, dest_cr, x_offset+2, y_offset,
00767                     qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
00768                     &weight_op[5], &weight_avg[5],
00769                     IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1),
00770                     pixel_shift, chroma444);
00771             }else{
00772                 int j;
00773                 assert(IS_SUB_4X4(sub_mb_type));
00774                 for(j=0; j<4; j++){
00775                     int sub_x_offset= x_offset + 2*(j&1);
00776                     int sub_y_offset= y_offset +   (j&2);
00777                     mc_part(h, n+j, 1, 2, 0, dest_y, dest_cb, dest_cr, sub_x_offset, sub_y_offset,
00778                         qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
00779                         &weight_op[6], &weight_avg[6],
00780                         IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1),
00781                         pixel_shift, chroma444);
00782                 }
00783             }
00784         }
00785     }
00786 
00787     prefetch_motion(h, 1, pixel_shift, chroma444);
00788 }
00789 
00790 #define hl_motion_fn(sh, bits) \
00791 static av_always_inline void hl_motion_ ## bits(H264Context *h, \
00792                                        uint8_t *dest_y, \
00793                                        uint8_t *dest_cb, uint8_t *dest_cr, \
00794                                        qpel_mc_func (*qpix_put)[16], \
00795                                        h264_chroma_mc_func (*chroma_put), \
00796                                        qpel_mc_func (*qpix_avg)[16], \
00797                                        h264_chroma_mc_func (*chroma_avg), \
00798                                        h264_weight_func *weight_op, \
00799                                        h264_biweight_func *weight_avg, \
00800                                        int chroma444) \
00801 { \
00802     hl_motion(h, dest_y, dest_cb, dest_cr, qpix_put, chroma_put, \
00803               qpix_avg, chroma_avg, weight_op, weight_avg, sh, chroma444); \
00804 }
00805 hl_motion_fn(0, 8);
00806 hl_motion_fn(1, 16);
00807 
00808 static void free_tables(H264Context *h, int free_rbsp){
00809     int i;
00810     H264Context *hx;
00811 
00812     av_freep(&h->intra4x4_pred_mode);
00813     av_freep(&h->chroma_pred_mode_table);
00814     av_freep(&h->cbp_table);
00815     av_freep(&h->mvd_table[0]);
00816     av_freep(&h->mvd_table[1]);
00817     av_freep(&h->direct_table);
00818     av_freep(&h->non_zero_count);
00819     av_freep(&h->slice_table_base);
00820     h->slice_table= NULL;
00821     av_freep(&h->list_counts);
00822 
00823     av_freep(&h->mb2b_xy);
00824     av_freep(&h->mb2br_xy);
00825 
00826     for(i = 0; i < MAX_THREADS; i++) {
00827         hx = h->thread_context[i];
00828         if(!hx) continue;
00829         av_freep(&hx->top_borders[1]);
00830         av_freep(&hx->top_borders[0]);
00831         av_freep(&hx->s.obmc_scratchpad);
00832         if (free_rbsp){
00833             av_freep(&hx->rbsp_buffer[1]);
00834             av_freep(&hx->rbsp_buffer[0]);
00835             hx->rbsp_buffer_size[0] = 0;
00836             hx->rbsp_buffer_size[1] = 0;
00837         }
00838         if (i) av_freep(&h->thread_context[i]);
00839     }
00840 }
00841 
00842 static void init_dequant8_coeff_table(H264Context *h){
00843     int i,j,q,x;
00844     const int max_qp = 51 + 6*(h->sps.bit_depth_luma-8);
00845 
00846     for(i=0; i<6; i++ ){
00847         h->dequant8_coeff[i] = h->dequant8_buffer[i];
00848         for(j=0; j<i; j++){
00849             if(!memcmp(h->pps.scaling_matrix8[j], h->pps.scaling_matrix8[i], 64*sizeof(uint8_t))){
00850                 h->dequant8_coeff[i] = h->dequant8_buffer[j];
00851                 break;
00852             }
00853         }
00854         if(j<i)
00855             continue;
00856 
00857         for(q=0; q<max_qp+1; q++){
00858             int shift = div6[q];
00859             int idx = rem6[q];
00860             for(x=0; x<64; x++)
00861                 h->dequant8_coeff[i][q][(x>>3)|((x&7)<<3)] =
00862                     ((uint32_t)dequant8_coeff_init[idx][ dequant8_coeff_init_scan[((x>>1)&12) | (x&3)] ] *
00863                     h->pps.scaling_matrix8[i][x]) << shift;
00864         }
00865     }
00866 }
00867 
00868 static void init_dequant4_coeff_table(H264Context *h){
00869     int i,j,q,x;
00870     const int max_qp = 51 + 6*(h->sps.bit_depth_luma-8);
00871     for(i=0; i<6; i++ ){
00872         h->dequant4_coeff[i] = h->dequant4_buffer[i];
00873         for(j=0; j<i; j++){
00874             if(!memcmp(h->pps.scaling_matrix4[j], h->pps.scaling_matrix4[i], 16*sizeof(uint8_t))){
00875                 h->dequant4_coeff[i] = h->dequant4_buffer[j];
00876                 break;
00877             }
00878         }
00879         if(j<i)
00880             continue;
00881 
00882         for(q=0; q<max_qp+1; q++){
00883             int shift = div6[q] + 2;
00884             int idx = rem6[q];
00885             for(x=0; x<16; x++)
00886                 h->dequant4_coeff[i][q][(x>>2)|((x<<2)&0xF)] =
00887                     ((uint32_t)dequant4_coeff_init[idx][(x&1) + ((x>>2)&1)] *
00888                     h->pps.scaling_matrix4[i][x]) << shift;
00889         }
00890     }
00891 }
00892 
00893 static void init_dequant_tables(H264Context *h){
00894     int i,x;
00895     init_dequant4_coeff_table(h);
00896     if(h->pps.transform_8x8_mode)
00897         init_dequant8_coeff_table(h);
00898     if(h->sps.transform_bypass){
00899         for(i=0; i<6; i++)
00900             for(x=0; x<16; x++)
00901                 h->dequant4_coeff[i][0][x] = 1<<6;
00902         if(h->pps.transform_8x8_mode)
00903             for(i=0; i<6; i++)
00904                 for(x=0; x<64; x++)
00905                     h->dequant8_coeff[i][0][x] = 1<<6;
00906     }
00907 }
00908 
00909 
00910 int ff_h264_alloc_tables(H264Context *h){
00911     MpegEncContext * const s = &h->s;
00912     const int big_mb_num= s->mb_stride * (s->mb_height+1);
00913     const int row_mb_num= 2*s->mb_stride*s->avctx->thread_count;
00914     int x,y;
00915 
00916     FF_ALLOCZ_OR_GOTO(h->s.avctx, h->intra4x4_pred_mode, row_mb_num * 8  * sizeof(uint8_t), fail)
00917 
00918     FF_ALLOCZ_OR_GOTO(h->s.avctx, h->non_zero_count    , big_mb_num * 48 * sizeof(uint8_t), fail)
00919     FF_ALLOCZ_OR_GOTO(h->s.avctx, h->slice_table_base  , (big_mb_num+s->mb_stride) * sizeof(*h->slice_table_base), fail)
00920     FF_ALLOCZ_OR_GOTO(h->s.avctx, h->cbp_table, big_mb_num * sizeof(uint16_t), fail)
00921 
00922     FF_ALLOCZ_OR_GOTO(h->s.avctx, h->chroma_pred_mode_table, big_mb_num * sizeof(uint8_t), fail)
00923     FF_ALLOCZ_OR_GOTO(h->s.avctx, h->mvd_table[0], 16*row_mb_num * sizeof(uint8_t), fail);
00924     FF_ALLOCZ_OR_GOTO(h->s.avctx, h->mvd_table[1], 16*row_mb_num * sizeof(uint8_t), fail);
00925     FF_ALLOCZ_OR_GOTO(h->s.avctx, h->direct_table, 4*big_mb_num * sizeof(uint8_t) , fail);
00926     FF_ALLOCZ_OR_GOTO(h->s.avctx, h->list_counts, big_mb_num * sizeof(uint8_t), fail)
00927 
00928     memset(h->slice_table_base, -1, (big_mb_num+s->mb_stride)  * sizeof(*h->slice_table_base));
00929     h->slice_table= h->slice_table_base + s->mb_stride*2 + 1;
00930 
00931     FF_ALLOCZ_OR_GOTO(h->s.avctx, h->mb2b_xy  , big_mb_num * sizeof(uint32_t), fail);
00932     FF_ALLOCZ_OR_GOTO(h->s.avctx, h->mb2br_xy , big_mb_num * sizeof(uint32_t), fail);
00933     for(y=0; y<s->mb_height; y++){
00934         for(x=0; x<s->mb_width; x++){
00935             const int mb_xy= x + y*s->mb_stride;
00936             const int b_xy = 4*x + 4*y*h->b_stride;
00937 
00938             h->mb2b_xy [mb_xy]= b_xy;
00939             h->mb2br_xy[mb_xy]= 8*(FMO ? mb_xy : (mb_xy % (2*s->mb_stride)));
00940         }
00941     }
00942 
00943     s->obmc_scratchpad = NULL;
00944 
00945     if(!h->dequant4_coeff[0])
00946         init_dequant_tables(h);
00947 
00948     return 0;
00949 fail:
00950     free_tables(h, 1);
00951     return -1;
00952 }
00953 
00957 static void clone_tables(H264Context *dst, H264Context *src, int i){
00958     MpegEncContext * const s = &src->s;
00959     dst->intra4x4_pred_mode       = src->intra4x4_pred_mode + i*8*2*s->mb_stride;
00960     dst->non_zero_count           = src->non_zero_count;
00961     dst->slice_table              = src->slice_table;
00962     dst->cbp_table                = src->cbp_table;
00963     dst->mb2b_xy                  = src->mb2b_xy;
00964     dst->mb2br_xy                 = src->mb2br_xy;
00965     dst->chroma_pred_mode_table   = src->chroma_pred_mode_table;
00966     dst->mvd_table[0]             = src->mvd_table[0] + i*8*2*s->mb_stride;
00967     dst->mvd_table[1]             = src->mvd_table[1] + i*8*2*s->mb_stride;
00968     dst->direct_table             = src->direct_table;
00969     dst->list_counts              = src->list_counts;
00970 
00971     dst->s.obmc_scratchpad = NULL;
00972     ff_h264_pred_init(&dst->hpc, src->s.codec_id, src->sps.bit_depth_luma);
00973 }
00974 
00979 static int context_init(H264Context *h){
00980     FF_ALLOCZ_OR_GOTO(h->s.avctx, h->top_borders[0], h->s.mb_width * 16*3 * sizeof(uint8_t)*2, fail)
00981     FF_ALLOCZ_OR_GOTO(h->s.avctx, h->top_borders[1], h->s.mb_width * 16*3 * sizeof(uint8_t)*2, fail)
00982 
00983     h->ref_cache[0][scan8[5 ]+1] = h->ref_cache[0][scan8[7 ]+1] = h->ref_cache[0][scan8[13]+1] =
00984     h->ref_cache[1][scan8[5 ]+1] = h->ref_cache[1][scan8[7 ]+1] = h->ref_cache[1][scan8[13]+1] = PART_NOT_AVAILABLE;
00985 
00986     return 0;
00987 fail:
00988     return -1; // free_tables will clean up for us
00989 }
00990 
00991 static int decode_nal_units(H264Context *h, const uint8_t *buf, int buf_size);
00992 
00993 static av_cold void common_init(H264Context *h){
00994     MpegEncContext * const s = &h->s;
00995 
00996     s->width = s->avctx->width;
00997     s->height = s->avctx->height;
00998     s->codec_id= s->avctx->codec->id;
00999 
01000     ff_h264dsp_init(&h->h264dsp, 8);
01001     ff_h264_pred_init(&h->hpc, s->codec_id, 8);
01002 
01003     h->dequant_coeff_pps= -1;
01004     s->unrestricted_mv=1;
01005     s->decode=1; //FIXME
01006 
01007     dsputil_init(&s->dsp, s->avctx); // needed so that idct permutation is known early
01008 
01009     memset(h->pps.scaling_matrix4, 16, 6*16*sizeof(uint8_t));
01010     memset(h->pps.scaling_matrix8, 16, 2*64*sizeof(uint8_t));
01011 }
01012 
01013 int ff_h264_decode_extradata(H264Context *h)
01014 {
01015     AVCodecContext *avctx = h->s.avctx;
01016 
01017     if(avctx->extradata[0] == 1){
01018         int i, cnt, nalsize;
01019         unsigned char *p = avctx->extradata;
01020 
01021         h->is_avc = 1;
01022 
01023         if(avctx->extradata_size < 7) {
01024             av_log(avctx, AV_LOG_ERROR, "avcC too short\n");
01025             return -1;
01026         }
01027         /* sps and pps in the avcC always have length coded with 2 bytes,
01028            so put a fake nal_length_size = 2 while parsing them */
01029         h->nal_length_size = 2;
01030         // Decode sps from avcC
01031         cnt = *(p+5) & 0x1f; // Number of sps
01032         p += 6;
01033         for (i = 0; i < cnt; i++) {
01034             nalsize = AV_RB16(p) + 2;
01035             if(decode_nal_units(h, p, nalsize) < 0) {
01036                 av_log(avctx, AV_LOG_ERROR, "Decoding sps %d from avcC failed\n", i);
01037                 return -1;
01038             }
01039             p += nalsize;
01040         }
01041         // Decode pps from avcC
01042         cnt = *(p++); // Number of pps
01043         for (i = 0; i < cnt; i++) {
01044             nalsize = AV_RB16(p) + 2;
01045             if (decode_nal_units(h, p, nalsize) < 0) {
01046                 av_log(avctx, AV_LOG_ERROR, "Decoding pps %d from avcC failed\n", i);
01047                 return -1;
01048             }
01049             p += nalsize;
01050         }
01051         // Now store right nal length size, that will be use to parse all other nals
01052         h->nal_length_size = (avctx->extradata[4] & 0x03) + 1;
01053     } else {
01054         h->is_avc = 0;
01055         if(decode_nal_units(h, avctx->extradata, avctx->extradata_size) < 0)
01056             return -1;
01057     }
01058     return 0;
01059 }
01060 
01061 av_cold int ff_h264_decode_init(AVCodecContext *avctx){
01062     H264Context *h= avctx->priv_data;
01063     MpegEncContext * const s = &h->s;
01064 
01065     MPV_decode_defaults(s);
01066 
01067     s->avctx = avctx;
01068     common_init(h);
01069 
01070     s->out_format = FMT_H264;
01071     s->workaround_bugs= avctx->workaround_bugs;
01072 
01073     // set defaults
01074 //    s->decode_mb= ff_h263_decode_mb;
01075     s->quarter_sample = 1;
01076     if(!avctx->has_b_frames)
01077     s->low_delay= 1;
01078 
01079     avctx->chroma_sample_location = AVCHROMA_LOC_LEFT;
01080 
01081     ff_h264_decode_init_vlc();
01082 
01083     h->pixel_shift = 0;
01084     h->sps.bit_depth_luma = avctx->bits_per_raw_sample = 8;
01085 
01086     h->thread_context[0] = h;
01087     h->outputed_poc = h->next_outputed_poc = INT_MIN;
01088     h->prev_poc_msb= 1<<16;
01089     h->x264_build = -1;
01090     ff_h264_reset_sei(h);
01091     if(avctx->codec_id == CODEC_ID_H264){
01092         if(avctx->ticks_per_frame == 1){
01093             s->avctx->time_base.den *=2;
01094         }
01095         avctx->ticks_per_frame = 2;
01096     }
01097 
01098     if(avctx->extradata_size > 0 && avctx->extradata &&
01099         ff_h264_decode_extradata(h))
01100         return -1;
01101 
01102     if(h->sps.bitstream_restriction_flag && s->avctx->has_b_frames < h->sps.num_reorder_frames){
01103         s->avctx->has_b_frames = h->sps.num_reorder_frames;
01104         s->low_delay = 0;
01105     }
01106 
01107     return 0;
01108 }
01109 
01110 #define IN_RANGE(a, b, size) (((a) >= (b)) && ((a) < ((b)+(size))))
01111 static void copy_picture_range(Picture **to, Picture **from, int count, MpegEncContext *new_base, MpegEncContext *old_base)
01112 {
01113     int i;
01114 
01115     for (i=0; i<count; i++){
01116         assert((IN_RANGE(from[i], old_base, sizeof(*old_base)) ||
01117                 IN_RANGE(from[i], old_base->picture, sizeof(Picture) * old_base->picture_count) ||
01118                 !from[i]));
01119         to[i] = REBASE_PICTURE(from[i], new_base, old_base);
01120     }
01121 }
01122 
01123 static void copy_parameter_set(void **to, void **from, int count, int size)
01124 {
01125     int i;
01126 
01127     for (i=0; i<count; i++){
01128         if (to[i] && !from[i]) av_freep(&to[i]);
01129         else if (from[i] && !to[i]) to[i] = av_malloc(size);
01130 
01131         if (from[i]) memcpy(to[i], from[i], size);
01132     }
01133 }
01134 
01135 static int decode_init_thread_copy(AVCodecContext *avctx){
01136     H264Context *h= avctx->priv_data;
01137 
01138     if (!avctx->is_copy) return 0;
01139     memset(h->sps_buffers, 0, sizeof(h->sps_buffers));
01140     memset(h->pps_buffers, 0, sizeof(h->pps_buffers));
01141 
01142     return 0;
01143 }
01144 
01145 #define copy_fields(to, from, start_field, end_field) memcpy(&to->start_field, &from->start_field, (char*)&to->end_field - (char*)&to->start_field)
01146 static int decode_update_thread_context(AVCodecContext *dst, const AVCodecContext *src){
01147     H264Context *h= dst->priv_data, *h1= src->priv_data;
01148     MpegEncContext * const s = &h->s, * const s1 = &h1->s;
01149     int inited = s->context_initialized, err;
01150     int i;
01151 
01152     if(dst == src || !s1->context_initialized) return 0;
01153 
01154     err = ff_mpeg_update_thread_context(dst, src);
01155     if(err) return err;
01156 
01157     //FIXME handle width/height changing
01158     if(!inited){
01159         for(i = 0; i < MAX_SPS_COUNT; i++)
01160             av_freep(h->sps_buffers + i);
01161 
01162         for(i = 0; i < MAX_PPS_COUNT; i++)
01163             av_freep(h->pps_buffers + i);
01164 
01165         memcpy(&h->s + 1, &h1->s + 1, sizeof(H264Context) - sizeof(MpegEncContext)); //copy all fields after MpegEnc
01166         memset(h->sps_buffers, 0, sizeof(h->sps_buffers));
01167         memset(h->pps_buffers, 0, sizeof(h->pps_buffers));
01168         if (ff_h264_alloc_tables(h) < 0) {
01169             av_log(dst, AV_LOG_ERROR, "Could not allocate memory for h264\n");
01170             return AVERROR(ENOMEM);
01171         }
01172         context_init(h);
01173 
01174         for(i=0; i<2; i++){
01175             h->rbsp_buffer[i] = NULL;
01176             h->rbsp_buffer_size[i] = 0;
01177         }
01178 
01179         h->thread_context[0] = h;
01180 
01181         // frame_start may not be called for the next thread (if it's decoding a bottom field)
01182         // so this has to be allocated here
01183         h->s.obmc_scratchpad = av_malloc(16*6*s->linesize);
01184 
01185         s->dsp.clear_blocks(h->mb);
01186         s->dsp.clear_blocks(h->mb+(24*16<<h->pixel_shift));
01187     }
01188 
01189     //extradata/NAL handling
01190     h->is_avc          = h1->is_avc;
01191 
01192     //SPS/PPS
01193     copy_parameter_set((void**)h->sps_buffers, (void**)h1->sps_buffers, MAX_SPS_COUNT, sizeof(SPS));
01194     h->sps             = h1->sps;
01195     copy_parameter_set((void**)h->pps_buffers, (void**)h1->pps_buffers, MAX_PPS_COUNT, sizeof(PPS));
01196     h->pps             = h1->pps;
01197 
01198     //Dequantization matrices
01199     //FIXME these are big - can they be only copied when PPS changes?
01200     copy_fields(h, h1, dequant4_buffer, dequant4_coeff);
01201 
01202     for(i=0; i<6; i++)
01203         h->dequant4_coeff[i] = h->dequant4_buffer[0] + (h1->dequant4_coeff[i] - h1->dequant4_buffer[0]);
01204 
01205     for(i=0; i<6; i++)
01206         h->dequant8_coeff[i] = h->dequant8_buffer[0] + (h1->dequant8_coeff[i] - h1->dequant8_buffer[0]);
01207 
01208     h->dequant_coeff_pps = h1->dequant_coeff_pps;
01209 
01210     //POC timing
01211     copy_fields(h, h1, poc_lsb, redundant_pic_count);
01212 
01213     //reference lists
01214     copy_fields(h, h1, ref_count, list_count);
01215     copy_fields(h, h1, ref_list,  intra_gb);
01216     copy_fields(h, h1, short_ref, cabac_init_idc);
01217 
01218     copy_picture_range(h->short_ref,   h1->short_ref,   32, s, s1);
01219     copy_picture_range(h->long_ref,    h1->long_ref,    32, s, s1);
01220     copy_picture_range(h->delayed_pic, h1->delayed_pic, MAX_DELAYED_PIC_COUNT+2, s, s1);
01221 
01222     h->last_slice_type = h1->last_slice_type;
01223 
01224     if(!s->current_picture_ptr) return 0;
01225 
01226     if(!s->dropable) {
01227         ff_h264_execute_ref_pic_marking(h, h->mmco, h->mmco_index);
01228         h->prev_poc_msb     = h->poc_msb;
01229         h->prev_poc_lsb     = h->poc_lsb;
01230     }
01231     h->prev_frame_num_offset= h->frame_num_offset;
01232     h->prev_frame_num       = h->frame_num;
01233     h->outputed_poc         = h->next_outputed_poc;
01234 
01235     return 0;
01236 }
01237 
01238 int ff_h264_frame_start(H264Context *h){
01239     MpegEncContext * const s = &h->s;
01240     int i;
01241     const int pixel_shift = h->pixel_shift;
01242     int thread_count = (s->avctx->active_thread_type & FF_THREAD_SLICE) ? s->avctx->thread_count : 1;
01243 
01244     if(MPV_frame_start(s, s->avctx) < 0)
01245         return -1;
01246     ff_er_frame_start(s);
01247     /*
01248      * MPV_frame_start uses pict_type to derive key_frame.
01249      * This is incorrect for H.264; IDR markings must be used.
01250      * Zero here; IDR markings per slice in frame or fields are ORed in later.
01251      * See decode_nal_units().
01252      */
01253     s->current_picture_ptr->key_frame= 0;
01254     s->current_picture_ptr->mmco_reset= 0;
01255 
01256     assert(s->linesize && s->uvlinesize);
01257 
01258     for(i=0; i<16; i++){
01259         h->block_offset[i]= (4*((scan8[i] - scan8[0])&7) << pixel_shift) + 4*s->linesize*((scan8[i] - scan8[0])>>3);
01260         h->block_offset[48+i]= (4*((scan8[i] - scan8[0])&7) << pixel_shift) + 8*s->linesize*((scan8[i] - scan8[0])>>3);
01261     }
01262     for(i=0; i<16; i++){
01263         h->block_offset[16+i]=
01264         h->block_offset[32+i]= (4*((scan8[i] - scan8[0])&7) << pixel_shift) + 4*s->uvlinesize*((scan8[i] - scan8[0])>>3);
01265         h->block_offset[48+16+i]=
01266         h->block_offset[48+32+i]= (4*((scan8[i] - scan8[0])&7) << pixel_shift) + 8*s->uvlinesize*((scan8[i] - scan8[0])>>3);
01267     }
01268 
01269     /* can't be in alloc_tables because linesize isn't known there.
01270      * FIXME: redo bipred weight to not require extra buffer? */
01271     for(i = 0; i < thread_count; i++)
01272         if(h->thread_context[i] && !h->thread_context[i]->s.obmc_scratchpad)
01273             h->thread_context[i]->s.obmc_scratchpad = av_malloc(16*6*s->linesize);
01274 
01275     /* some macroblocks can be accessed before they're available in case of lost slices, mbaff or threading*/
01276     memset(h->slice_table, -1, (s->mb_height*s->mb_stride-1) * sizeof(*h->slice_table));
01277 
01278 //    s->decode= (s->flags&CODEC_FLAG_PSNR) || !s->encoding || s->current_picture.reference /*|| h->contains_intra*/ || 1;
01279 
01280     // We mark the current picture as non-reference after allocating it, so
01281     // that if we break out due to an error it can be released automatically
01282     // in the next MPV_frame_start().
01283     // SVQ3 as well as most other codecs have only last/next/current and thus
01284     // get released even with set reference, besides SVQ3 and others do not
01285     // mark frames as reference later "naturally".
01286     if(s->codec_id != CODEC_ID_SVQ3)
01287         s->current_picture_ptr->reference= 0;
01288 
01289     s->current_picture_ptr->field_poc[0]=
01290     s->current_picture_ptr->field_poc[1]= INT_MAX;
01291 
01292     h->next_output_pic = NULL;
01293 
01294     assert(s->current_picture_ptr->long_ref==0);
01295 
01296     return 0;
01297 }
01298 
01307 static void decode_postinit(H264Context *h, int setup_finished){
01308     MpegEncContext * const s = &h->s;
01309     Picture *out = s->current_picture_ptr;
01310     Picture *cur = s->current_picture_ptr;
01311     int i, pics, out_of_order, out_idx;
01312 
01313     s->current_picture_ptr->qscale_type= FF_QSCALE_TYPE_H264;
01314     s->current_picture_ptr->pict_type= s->pict_type;
01315 
01316     if (h->next_output_pic) return;
01317 
01318     if (cur->field_poc[0]==INT_MAX || cur->field_poc[1]==INT_MAX) {
01319         //FIXME: if we have two PAFF fields in one packet, we can't start the next thread here.
01320         //If we have one field per packet, we can. The check in decode_nal_units() is not good enough
01321         //to find this yet, so we assume the worst for now.
01322         //if (setup_finished)
01323         //    ff_thread_finish_setup(s->avctx);
01324         return;
01325     }
01326 
01327     cur->interlaced_frame = 0;
01328     cur->repeat_pict = 0;
01329 
01330     /* Signal interlacing information externally. */
01331     /* Prioritize picture timing SEI information over used decoding process if it exists. */
01332 
01333     if(h->sps.pic_struct_present_flag){
01334         switch (h->sei_pic_struct)
01335         {
01336         case SEI_PIC_STRUCT_FRAME:
01337             break;
01338         case SEI_PIC_STRUCT_TOP_FIELD:
01339         case SEI_PIC_STRUCT_BOTTOM_FIELD:
01340             cur->interlaced_frame = 1;
01341             break;
01342         case SEI_PIC_STRUCT_TOP_BOTTOM:
01343         case SEI_PIC_STRUCT_BOTTOM_TOP:
01344             if (FIELD_OR_MBAFF_PICTURE)
01345                 cur->interlaced_frame = 1;
01346             else
01347                 // try to flag soft telecine progressive
01348                 cur->interlaced_frame = h->prev_interlaced_frame;
01349             break;
01350         case SEI_PIC_STRUCT_TOP_BOTTOM_TOP:
01351         case SEI_PIC_STRUCT_BOTTOM_TOP_BOTTOM:
01352             // Signal the possibility of telecined film externally (pic_struct 5,6)
01353             // From these hints, let the applications decide if they apply deinterlacing.
01354             cur->repeat_pict = 1;
01355             break;
01356         case SEI_PIC_STRUCT_FRAME_DOUBLING:
01357             // Force progressive here, as doubling interlaced frame is a bad idea.
01358             cur->repeat_pict = 2;
01359             break;
01360         case SEI_PIC_STRUCT_FRAME_TRIPLING:
01361             cur->repeat_pict = 4;
01362             break;
01363         }
01364 
01365         if ((h->sei_ct_type & 3) && h->sei_pic_struct <= SEI_PIC_STRUCT_BOTTOM_TOP)
01366             cur->interlaced_frame = (h->sei_ct_type & (1<<1)) != 0;
01367     }else{
01368         /* Derive interlacing flag from used decoding process. */
01369         cur->interlaced_frame = FIELD_OR_MBAFF_PICTURE;
01370     }
01371     h->prev_interlaced_frame = cur->interlaced_frame;
01372 
01373     if (cur->field_poc[0] != cur->field_poc[1]){
01374         /* Derive top_field_first from field pocs. */
01375         cur->top_field_first = cur->field_poc[0] < cur->field_poc[1];
01376     }else{
01377         if(cur->interlaced_frame || h->sps.pic_struct_present_flag){
01378             /* Use picture timing SEI information. Even if it is a information of a past frame, better than nothing. */
01379             if(h->sei_pic_struct == SEI_PIC_STRUCT_TOP_BOTTOM
01380               || h->sei_pic_struct == SEI_PIC_STRUCT_TOP_BOTTOM_TOP)
01381                 cur->top_field_first = 1;
01382             else
01383                 cur->top_field_first = 0;
01384         }else{
01385             /* Most likely progressive */
01386             cur->top_field_first = 0;
01387         }
01388     }
01389 
01390     //FIXME do something with unavailable reference frames
01391 
01392     /* Sort B-frames into display order */
01393 
01394     if(h->sps.bitstream_restriction_flag
01395        && s->avctx->has_b_frames < h->sps.num_reorder_frames){
01396         s->avctx->has_b_frames = h->sps.num_reorder_frames;
01397         s->low_delay = 0;
01398     }
01399 
01400     if(   s->avctx->strict_std_compliance >= FF_COMPLIANCE_STRICT
01401        && !h->sps.bitstream_restriction_flag){
01402         s->avctx->has_b_frames= MAX_DELAYED_PIC_COUNT;
01403         s->low_delay= 0;
01404     }
01405 
01406     pics = 0;
01407     while(h->delayed_pic[pics]) pics++;
01408 
01409     assert(pics <= MAX_DELAYED_PIC_COUNT);
01410 
01411     h->delayed_pic[pics++] = cur;
01412     if(cur->reference == 0)
01413         cur->reference = DELAYED_PIC_REF;
01414 
01415     out = h->delayed_pic[0];
01416     out_idx = 0;
01417     for(i=1; h->delayed_pic[i] && !h->delayed_pic[i]->key_frame && !h->delayed_pic[i]->mmco_reset; i++)
01418         if(h->delayed_pic[i]->poc < out->poc){
01419             out = h->delayed_pic[i];
01420             out_idx = i;
01421         }
01422     if(s->avctx->has_b_frames == 0 && (h->delayed_pic[0]->key_frame || h->delayed_pic[0]->mmco_reset))
01423         h->next_outputed_poc= INT_MIN;
01424     out_of_order = out->poc < h->next_outputed_poc;
01425 
01426     if(h->sps.bitstream_restriction_flag && s->avctx->has_b_frames >= h->sps.num_reorder_frames)
01427         { }
01428     else if((out_of_order && pics-1 == s->avctx->has_b_frames && s->avctx->has_b_frames < MAX_DELAYED_PIC_COUNT)
01429        || (s->low_delay &&
01430         ((h->next_outputed_poc != INT_MIN && out->poc > h->next_outputed_poc + 2)
01431          || cur->pict_type == AV_PICTURE_TYPE_B)))
01432     {
01433         s->low_delay = 0;
01434         s->avctx->has_b_frames++;
01435     }
01436 
01437     if(out_of_order || pics > s->avctx->has_b_frames){
01438         out->reference &= ~DELAYED_PIC_REF;
01439         out->owner2 = s; // for frame threading, the owner must be the second field's thread
01440                          // or else the first thread can release the picture and reuse it unsafely
01441         for(i=out_idx; h->delayed_pic[i]; i++)
01442             h->delayed_pic[i] = h->delayed_pic[i+1];
01443     }
01444     if(!out_of_order && pics > s->avctx->has_b_frames){
01445         h->next_output_pic = out;
01446         if(out_idx==0 && h->delayed_pic[0] && (h->delayed_pic[0]->key_frame || h->delayed_pic[0]->mmco_reset)) {
01447             h->next_outputed_poc = INT_MIN;
01448         } else
01449             h->next_outputed_poc = out->poc;
01450     }else{
01451         av_log(s->avctx, AV_LOG_DEBUG, "no picture\n");
01452     }
01453 
01454     if (setup_finished)
01455         ff_thread_finish_setup(s->avctx);
01456 }
01457 
01458 static inline void backup_mb_border(H264Context *h, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr, int linesize, int uvlinesize, int chroma444, int simple){
01459     MpegEncContext * const s = &h->s;
01460     uint8_t *top_border;
01461     int top_idx = 1;
01462     const int pixel_shift = h->pixel_shift;
01463 
01464     src_y  -=   linesize;
01465     src_cb -= uvlinesize;
01466     src_cr -= uvlinesize;
01467 
01468     if(!simple && FRAME_MBAFF){
01469         if(s->mb_y&1){
01470             if(!MB_MBAFF){
01471                 top_border = h->top_borders[0][s->mb_x];
01472                 AV_COPY128(top_border, src_y + 15*linesize);
01473                 if (pixel_shift)
01474                     AV_COPY128(top_border+16, src_y+15*linesize+16);
01475                 if(simple || !CONFIG_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
01476                     if(chroma444){
01477                         if (pixel_shift){
01478                             AV_COPY128(top_border+32, src_cb + 15*uvlinesize);
01479                             AV_COPY128(top_border+48, src_cb + 15*uvlinesize+16);
01480                             AV_COPY128(top_border+64, src_cr + 15*uvlinesize);
01481                             AV_COPY128(top_border+80, src_cr + 15*uvlinesize+16);
01482                         } else {
01483                             AV_COPY128(top_border+16, src_cb + 15*uvlinesize);
01484                             AV_COPY128(top_border+32, src_cr + 15*uvlinesize);
01485                         }
01486                     } else {
01487                         if (pixel_shift) {
01488                             AV_COPY128(top_border+32, src_cb+7*uvlinesize);
01489                             AV_COPY128(top_border+48, src_cr+7*uvlinesize);
01490                         } else {
01491                             AV_COPY64(top_border+16, src_cb+7*uvlinesize);
01492                             AV_COPY64(top_border+24, src_cr+7*uvlinesize);
01493                         }
01494                     }
01495                 }
01496             }
01497         }else if(MB_MBAFF){
01498             top_idx = 0;
01499         }else
01500             return;
01501     }
01502 
01503     top_border = h->top_borders[top_idx][s->mb_x];
01504     // There are two lines saved, the line above the the top macroblock of a pair,
01505     // and the line above the bottom macroblock
01506     AV_COPY128(top_border, src_y + 16*linesize);
01507     if (pixel_shift)
01508         AV_COPY128(top_border+16, src_y+16*linesize+16);
01509 
01510     if(simple || !CONFIG_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
01511         if(chroma444){
01512             if (pixel_shift){
01513                 AV_COPY128(top_border+32, src_cb + 16*linesize);
01514                 AV_COPY128(top_border+48, src_cb + 16*linesize+16);
01515                 AV_COPY128(top_border+64, src_cr + 16*linesize);
01516                 AV_COPY128(top_border+80, src_cr + 16*linesize+16);
01517             } else {
01518                 AV_COPY128(top_border+16, src_cb + 16*linesize);
01519                 AV_COPY128(top_border+32, src_cr + 16*linesize);
01520             }
01521         } else {
01522             if (pixel_shift) {
01523                 AV_COPY128(top_border+32, src_cb+8*uvlinesize);
01524                 AV_COPY128(top_border+48, src_cr+8*uvlinesize);
01525             } else {
01526                 AV_COPY64(top_border+16, src_cb+8*uvlinesize);
01527                 AV_COPY64(top_border+24, src_cr+8*uvlinesize);
01528             }
01529         }
01530     }
01531 }
01532 
01533 static inline void xchg_mb_border(H264Context *h, uint8_t *src_y,
01534                                   uint8_t *src_cb, uint8_t *src_cr,
01535                                   int linesize, int uvlinesize,
01536                                   int xchg, int chroma444,
01537                                   int simple, int pixel_shift){
01538     MpegEncContext * const s = &h->s;
01539     int deblock_topleft;
01540     int deblock_top;
01541     int top_idx = 1;
01542     uint8_t *top_border_m1;
01543     uint8_t *top_border;
01544 
01545     if(!simple && FRAME_MBAFF){
01546         if(s->mb_y&1){
01547             if(!MB_MBAFF)
01548                 return;
01549         }else{
01550             top_idx = MB_MBAFF ? 0 : 1;
01551         }
01552     }
01553 
01554     if(h->deblocking_filter == 2) {
01555         deblock_topleft = h->slice_table[h->mb_xy - 1 - s->mb_stride] == h->slice_num;
01556         deblock_top     = h->top_type;
01557     } else {
01558         deblock_topleft = (s->mb_x > 0);
01559         deblock_top     = (s->mb_y > !!MB_FIELD);
01560     }
01561 
01562     src_y  -=   linesize + 1 + pixel_shift;
01563     src_cb -= uvlinesize + 1 + pixel_shift;
01564     src_cr -= uvlinesize + 1 + pixel_shift;
01565 
01566     top_border_m1 = h->top_borders[top_idx][s->mb_x-1];
01567     top_border    = h->top_borders[top_idx][s->mb_x];
01568 
01569 #define XCHG(a,b,xchg)\
01570     if (pixel_shift) {\
01571         if (xchg) {\
01572             AV_SWAP64(b+0,a+0);\
01573             AV_SWAP64(b+8,a+8);\
01574         } else {\
01575             AV_COPY128(b,a); \
01576         }\
01577     } else \
01578 if (xchg) AV_SWAP64(b,a);\
01579 else      AV_COPY64(b,a);
01580 
01581     if(deblock_top){
01582         if(deblock_topleft){
01583             XCHG(top_border_m1 + (8 << pixel_shift), src_y - (7 << pixel_shift), 1);
01584         }
01585         XCHG(top_border + (0 << pixel_shift), src_y + (1 << pixel_shift), xchg);
01586         XCHG(top_border + (8 << pixel_shift), src_y + (9 << pixel_shift), 1);
01587         if(s->mb_x+1 < s->mb_width){
01588             XCHG(h->top_borders[top_idx][s->mb_x+1], src_y + (17 << pixel_shift), 1);
01589         }
01590     }
01591     if(simple || !CONFIG_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
01592         if(chroma444){
01593             if(deblock_topleft){
01594                 XCHG(top_border_m1 + (24 << pixel_shift), src_cb - (7 << pixel_shift), 1);
01595                 XCHG(top_border_m1 + (40 << pixel_shift), src_cr - (7 << pixel_shift), 1);
01596             }
01597             XCHG(top_border + (16 << pixel_shift), src_cb + (1 << pixel_shift), xchg);
01598             XCHG(top_border + (24 << pixel_shift), src_cb + (9 << pixel_shift), 1);
01599             XCHG(top_border + (32 << pixel_shift), src_cr + (1 << pixel_shift), xchg);
01600             XCHG(top_border + (40 << pixel_shift), src_cr + (9 << pixel_shift), 1);
01601             if(s->mb_x+1 < s->mb_width){
01602                 XCHG(h->top_borders[top_idx][s->mb_x+1] + (16 << pixel_shift), src_cb + (17 << pixel_shift), 1);
01603                 XCHG(h->top_borders[top_idx][s->mb_x+1] + (32 << pixel_shift), src_cr + (17 << pixel_shift), 1);
01604             }
01605         } else {
01606             if(deblock_top){
01607                 if(deblock_topleft){
01608                     XCHG(top_border_m1 + (16 << pixel_shift), src_cb - (7 << pixel_shift), 1);
01609                     XCHG(top_border_m1 + (24 << pixel_shift), src_cr - (7 << pixel_shift), 1);
01610                 }
01611                 XCHG(top_border + (16 << pixel_shift), src_cb+1+pixel_shift, 1);
01612                 XCHG(top_border + (24 << pixel_shift), src_cr+1+pixel_shift, 1);
01613             }
01614         }
01615     }
01616 }
01617 
01618 static av_always_inline int dctcoef_get(DCTELEM *mb, int high_bit_depth, int index) {
01619     if (high_bit_depth) {
01620         return AV_RN32A(((int32_t*)mb) + index);
01621     } else
01622         return AV_RN16A(mb + index);
01623 }
01624 
01625 static av_always_inline void dctcoef_set(DCTELEM *mb, int high_bit_depth, int index, int value) {
01626     if (high_bit_depth) {
01627         AV_WN32A(((int32_t*)mb) + index, value);
01628     } else
01629         AV_WN16A(mb + index, value);
01630 }
01631 
01632 static av_always_inline void hl_decode_mb_predict_luma(H264Context *h, int mb_type, int is_h264, int simple, int transform_bypass,
01633                                                        int pixel_shift, int *block_offset, int linesize, uint8_t *dest_y, int p)
01634 {
01635     MpegEncContext * const s = &h->s;
01636     void (*idct_add)(uint8_t *dst, DCTELEM *block, int stride);
01637     void (*idct_dc_add)(uint8_t *dst, DCTELEM *block, int stride);
01638     int i;
01639     int qscale = p == 0 ? s->qscale : h->chroma_qp[p-1];
01640     block_offset += 16*p;
01641     if(IS_INTRA4x4(mb_type)){
01642         if(simple || !s->encoding){
01643             if(IS_8x8DCT(mb_type)){
01644                 if(transform_bypass){
01645                     idct_dc_add =
01646                     idct_add    = s->dsp.add_pixels8;
01647                 }else{
01648                     idct_dc_add = h->h264dsp.h264_idct8_dc_add;
01649                     idct_add    = h->h264dsp.h264_idct8_add;
01650                 }
01651                 for(i=0; i<16; i+=4){
01652                     uint8_t * const ptr= dest_y + block_offset[i];
01653                     const int dir= h->intra4x4_pred_mode_cache[ scan8[i] ];
01654                     if(transform_bypass && h->sps.profile_idc==244 && dir<=1){
01655                         h->hpc.pred8x8l_add[dir](ptr, h->mb + (i*16+p*256 << pixel_shift), linesize);
01656                     }else{
01657                         const int nnz = h->non_zero_count_cache[ scan8[i+p*16] ];
01658                         h->hpc.pred8x8l[ dir ](ptr, (h->topleft_samples_available<<i)&0x8000,
01659                                                     (h->topright_samples_available<<i)&0x4000, linesize);
01660                         if(nnz){
01661                             if(nnz == 1 && dctcoef_get(h->mb, pixel_shift, i*16+p*256))
01662                                 idct_dc_add(ptr, h->mb + (i*16+p*256 << pixel_shift), linesize);
01663                             else
01664                                 idct_add   (ptr, h->mb + (i*16+p*256 << pixel_shift), linesize);
01665                         }
01666                     }
01667                 }
01668             }else{
01669                 if(transform_bypass){
01670                     idct_dc_add =
01671                     idct_add    = s->dsp.add_pixels4;
01672                 }else{
01673                     idct_dc_add = h->h264dsp.h264_idct_dc_add;
01674                     idct_add    = h->h264dsp.h264_idct_add;
01675                 }
01676                 for(i=0; i<16; i++){
01677                     uint8_t * const ptr= dest_y + block_offset[i];
01678                     const int dir= h->intra4x4_pred_mode_cache[ scan8[i] ];
01679 
01680                     if(transform_bypass && h->sps.profile_idc==244 && dir<=1){
01681                         h->hpc.pred4x4_add[dir](ptr, h->mb + (i*16+p*256 << pixel_shift), linesize);
01682                     }else{
01683                         uint8_t *topright;
01684                         int nnz, tr;
01685                         uint64_t tr_high;
01686                         if(dir == DIAG_DOWN_LEFT_PRED || dir == VERT_LEFT_PRED){
01687                             const int topright_avail= (h->topright_samples_available<<i)&0x8000;
01688                             assert(s->mb_y || linesize <= block_offset[i]);
01689                             if(!topright_avail){
01690                                 if (pixel_shift) {
01691                                     tr_high= ((uint16_t*)ptr)[3 - linesize/2]*0x0001000100010001ULL;
01692                                     topright= (uint8_t*) &tr_high;
01693                                 } else {
01694                                     tr= ptr[3 - linesize]*0x01010101;
01695                                     topright= (uint8_t*) &tr;
01696                                 }
01697                             }else
01698                                 topright= ptr + (4 << pixel_shift) - linesize;
01699                         }else
01700                             topright= NULL;
01701 
01702                         h->hpc.pred4x4[ dir ](ptr, topright, linesize);
01703                         nnz = h->non_zero_count_cache[ scan8[i+p*16] ];
01704                         if(nnz){
01705                             if(is_h264){
01706                                 if(nnz == 1 && dctcoef_get(h->mb, pixel_shift, i*16+p*256))
01707                                     idct_dc_add(ptr, h->mb + (i*16+p*256 << pixel_shift), linesize);
01708                                 else
01709                                     idct_add   (ptr, h->mb + (i*16+p*256 << pixel_shift), linesize);
01710                             }else
01711                                 ff_svq3_add_idct_c(ptr, h->mb + i*16+p*256, linesize, qscale, 0);
01712                         }
01713                     }
01714                 }
01715             }
01716         }
01717     }else{
01718         h->hpc.pred16x16[ h->intra16x16_pred_mode ](dest_y , linesize);
01719         if(is_h264){
01720             if(h->non_zero_count_cache[ scan8[LUMA_DC_BLOCK_INDEX+p] ]){
01721                 if(!transform_bypass)
01722                     h->h264dsp.h264_luma_dc_dequant_idct(h->mb+(p*256 << pixel_shift), h->mb_luma_dc[p], h->dequant4_coeff[p][qscale][0]);
01723                 else{
01724                     static const uint8_t dc_mapping[16] = { 0*16, 1*16, 4*16, 5*16, 2*16, 3*16, 6*16, 7*16,
01725                                                             8*16, 9*16,12*16,13*16,10*16,11*16,14*16,15*16};
01726                     for(i = 0; i < 16; i++)
01727                         dctcoef_set(h->mb+p*256, pixel_shift, dc_mapping[i], dctcoef_get(h->mb_luma_dc[p], pixel_shift, i));
01728                 }
01729             }
01730         }else
01731             ff_svq3_luma_dc_dequant_idct_c(h->mb+p*256, h->mb_luma_dc[p], qscale);
01732     }
01733 }
01734 
01735 static av_always_inline void hl_decode_mb_idct_luma(H264Context *h, int mb_type, int is_h264, int simple, int transform_bypass,
01736                                                     int pixel_shift, int *block_offset, int linesize, uint8_t *dest_y, int p)
01737 {
01738     MpegEncContext * const s = &h->s;
01739     void (*idct_add)(uint8_t *dst, DCTELEM *block, int stride);
01740     int i;
01741     block_offset += 16*p;
01742     if(!IS_INTRA4x4(mb_type)){
01743         if(is_h264){
01744             if(IS_INTRA16x16(mb_type)){
01745                 if(transform_bypass){
01746                     if(h->sps.profile_idc==244 && (h->intra16x16_pred_mode==VERT_PRED8x8 || h->intra16x16_pred_mode==HOR_PRED8x8)){
01747                         h->hpc.pred16x16_add[h->intra16x16_pred_mode](dest_y, block_offset, h->mb + (p*256 << pixel_shift), linesize);
01748                     }else{
01749                         for(i=0; i<16; i++){
01750                             if(h->non_zero_count_cache[ scan8[i+p*16] ] || dctcoef_get(h->mb, pixel_shift, i*16+p*256))
01751                                 s->dsp.add_pixels4(dest_y + block_offset[i], h->mb + (i*16+p*256 << pixel_shift), linesize);
01752                         }
01753                     }
01754                 }else{
01755                     h->h264dsp.h264_idct_add16intra(dest_y, block_offset, h->mb + (p*256 << pixel_shift), linesize, h->non_zero_count_cache+p*5*8);
01756                 }
01757             }else if(h->cbp&15){
01758                 if(transform_bypass){
01759                     const int di = IS_8x8DCT(mb_type) ? 4 : 1;
01760                     idct_add= IS_8x8DCT(mb_type) ? s->dsp.add_pixels8 : s->dsp.add_pixels4;
01761                     for(i=0; i<16; i+=di){
01762                         if(h->non_zero_count_cache[ scan8[i+p*16] ]){
01763                             idct_add(dest_y + block_offset[i], h->mb + (i*16+p*256 << pixel_shift), linesize);
01764                         }
01765                     }
01766                 }else{
01767                     if(IS_8x8DCT(mb_type)){
01768                         h->h264dsp.h264_idct8_add4(dest_y, block_offset, h->mb + (p*256 << pixel_shift), linesize, h->non_zero_count_cache+p*5*8);
01769                     }else{
01770                         h->h264dsp.h264_idct_add16(dest_y, block_offset, h->mb + (p*256 << pixel_shift), linesize, h->non_zero_count_cache+p*5*8);
01771                     }
01772                 }
01773             }
01774         }else{
01775             for(i=0; i<16; i++){
01776                 if(h->non_zero_count_cache[ scan8[i+p*16] ] || h->mb[i*16+p*256]){ //FIXME benchmark weird rule, & below
01777                     uint8_t * const ptr= dest_y + block_offset[i];
01778                     ff_svq3_add_idct_c(ptr, h->mb + i*16 + p*256, linesize, s->qscale, IS_INTRA(mb_type) ? 1 : 0);
01779                 }
01780             }
01781         }
01782     }
01783 }
01784 
01785 static av_always_inline void hl_decode_mb_internal(H264Context *h, int simple, int pixel_shift){
01786     MpegEncContext * const s = &h->s;
01787     const int mb_x= s->mb_x;
01788     const int mb_y= s->mb_y;
01789     const int mb_xy= h->mb_xy;
01790     const int mb_type= s->current_picture.mb_type[mb_xy];
01791     uint8_t  *dest_y, *dest_cb, *dest_cr;
01792     int linesize, uvlinesize /*dct_offset*/;
01793     int i, j;
01794     int *block_offset = &h->block_offset[0];
01795     const int transform_bypass = !simple && (s->qscale == 0 && h->sps.transform_bypass);
01796     /* is_h264 should always be true if SVQ3 is disabled. */
01797     const int is_h264 = !CONFIG_SVQ3_DECODER || simple || s->codec_id == CODEC_ID_H264;
01798     void (*idct_add)(uint8_t *dst, DCTELEM *block, int stride);
01799 
01800     dest_y  = s->current_picture.data[0] + ((mb_x << pixel_shift) + mb_y * s->linesize  ) * 16;
01801     dest_cb = s->current_picture.data[1] + ((mb_x << pixel_shift) + mb_y * s->uvlinesize) * 8;
01802     dest_cr = s->current_picture.data[2] + ((mb_x << pixel_shift) + mb_y * s->uvlinesize) * 8;
01803 
01804     s->dsp.prefetch(dest_y + (s->mb_x&3)*4*s->linesize + (64 << pixel_shift), s->linesize, 4);
01805     s->dsp.prefetch(dest_cb + (s->mb_x&7)*s->uvlinesize + (64 << pixel_shift), dest_cr - dest_cb, 2);
01806 
01807     h->list_counts[mb_xy]= h->list_count;
01808 
01809     if (!simple && MB_FIELD) {
01810         linesize   = h->mb_linesize   = s->linesize * 2;
01811         uvlinesize = h->mb_uvlinesize = s->uvlinesize * 2;
01812         block_offset = &h->block_offset[48];
01813         if(mb_y&1){ //FIXME move out of this function?
01814             dest_y -= s->linesize*15;
01815             dest_cb-= s->uvlinesize*7;
01816             dest_cr-= s->uvlinesize*7;
01817         }
01818         if(FRAME_MBAFF) {
01819             int list;
01820             for(list=0; list<h->list_count; list++){
01821                 if(!USES_LIST(mb_type, list))
01822                     continue;
01823                 if(IS_16X16(mb_type)){
01824                     int8_t *ref = &h->ref_cache[list][scan8[0]];
01825                     fill_rectangle(ref, 4, 4, 8, (16+*ref)^(s->mb_y&1), 1);
01826                 }else{
01827                     for(i=0; i<16; i+=4){
01828                         int ref = h->ref_cache[list][scan8[i]];
01829                         if(ref >= 0)
01830                             fill_rectangle(&h->ref_cache[list][scan8[i]], 2, 2, 8, (16+ref)^(s->mb_y&1), 1);
01831                     }
01832                 }
01833             }
01834         }
01835     } else {
01836         linesize   = h->mb_linesize   = s->linesize;
01837         uvlinesize = h->mb_uvlinesize = s->uvlinesize;
01838 //        dct_offset = s->linesize * 16;
01839     }
01840 
01841     if (!simple && IS_INTRA_PCM(mb_type)) {
01842         if (pixel_shift) {
01843             const int bit_depth = h->sps.bit_depth_luma;
01844             int j;
01845             GetBitContext gb;
01846             init_get_bits(&gb, (uint8_t*)h->mb, 384*bit_depth);
01847 
01848             for (i = 0; i < 16; i++) {
01849                 uint16_t *tmp_y  = (uint16_t*)(dest_y  + i*linesize);
01850                 for (j = 0; j < 16; j++)
01851                     tmp_y[j] = get_bits(&gb, bit_depth);
01852             }
01853             if(simple || !CONFIG_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
01854                 if (!h->sps.chroma_format_idc) {
01855                     for (i = 0; i < 8; i++) {
01856                         uint16_t *tmp_cb = (uint16_t*)(dest_cb + i*uvlinesize);
01857                         for (j = 0; j < 8; j++) {
01858                             tmp_cb[j] = 1 << (bit_depth - 1);
01859                         }
01860                     }
01861                     for (i = 0; i < 8; i++) {
01862                         uint16_t *tmp_cr = (uint16_t*)(dest_cr + i*uvlinesize);
01863                         for (j = 0; j < 8; j++) {
01864                             tmp_cr[j] = 1 << (bit_depth - 1);
01865                         }
01866                     }
01867                 } else {
01868                     for (i = 0; i < 8; i++) {
01869                         uint16_t *tmp_cb = (uint16_t*)(dest_cb + i*uvlinesize);
01870                         for (j = 0; j < 8; j++)
01871                             tmp_cb[j] = get_bits(&gb, bit_depth);
01872                     }
01873                     for (i = 0; i < 8; i++) {
01874                         uint16_t *tmp_cr = (uint16_t*)(dest_cr + i*uvlinesize);
01875                         for (j = 0; j < 8; j++)
01876                             tmp_cr[j] = get_bits(&gb, bit_depth);
01877                     }
01878                 }
01879             }
01880         } else {
01881             for (i=0; i<16; i++) {
01882                 memcpy(dest_y + i*  linesize, h->mb       + i*8, 16);
01883             }
01884             if(simple || !CONFIG_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
01885                 if (!h->sps.chroma_format_idc) {
01886                     for (i = 0; i < 8; i++) {
01887                         memset(dest_cb + i*uvlinesize, 128, 8);
01888                         memset(dest_cr + i*uvlinesize, 128, 8);
01889                     }
01890                 } else {
01891                     for (i = 0; i < 8; i++) {
01892                         memcpy(dest_cb + i*uvlinesize, h->mb + 128 + i*4,  8);
01893                         memcpy(dest_cr + i*uvlinesize, h->mb + 160 + i*4,  8);
01894                     }
01895                 }
01896             }
01897         }
01898     } else {
01899         if(IS_INTRA(mb_type)){
01900             if(h->deblocking_filter)
01901                 xchg_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, 1, 0, simple, pixel_shift);
01902 
01903             if(simple || !CONFIG_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
01904                 h->hpc.pred8x8[ h->chroma_pred_mode ](dest_cb, uvlinesize);
01905                 h->hpc.pred8x8[ h->chroma_pred_mode ](dest_cr, uvlinesize);
01906             }
01907 
01908             hl_decode_mb_predict_luma(h, mb_type, is_h264, simple, transform_bypass, pixel_shift, block_offset, linesize, dest_y, 0);
01909 
01910             if(h->deblocking_filter)
01911                 xchg_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, 0, 0, simple, pixel_shift);
01912         }else if(is_h264){
01913             if (pixel_shift) {
01914                 hl_motion_16(h, dest_y, dest_cb, dest_cr,
01915                              s->me.qpel_put, s->dsp.put_h264_chroma_pixels_tab,
01916                              s->me.qpel_avg, s->dsp.avg_h264_chroma_pixels_tab,
01917                              h->h264dsp.weight_h264_pixels_tab,
01918                              h->h264dsp.biweight_h264_pixels_tab, 0);
01919             } else
01920                 hl_motion_8(h, dest_y, dest_cb, dest_cr,
01921                             s->me.qpel_put, s->dsp.put_h264_chroma_pixels_tab,
01922                             s->me.qpel_avg, s->dsp.avg_h264_chroma_pixels_tab,
01923                             h->h264dsp.weight_h264_pixels_tab,
01924                             h->h264dsp.biweight_h264_pixels_tab, 0);
01925         }
01926 
01927         hl_decode_mb_idct_luma(h, mb_type, is_h264, simple, transform_bypass, pixel_shift, block_offset, linesize, dest_y, 0);
01928 
01929         if((simple || !CONFIG_GRAY || !(s->flags&CODEC_FLAG_GRAY)) && (h->cbp&0x30)){
01930             uint8_t *dest[2] = {dest_cb, dest_cr};
01931             if(transform_bypass){
01932                 if(IS_INTRA(mb_type) && h->sps.profile_idc==244 && (h->chroma_pred_mode==VERT_PRED8x8 || h->chroma_pred_mode==HOR_PRED8x8)){
01933                     h->hpc.pred8x8_add[h->chroma_pred_mode](dest[0], block_offset + 16, h->mb + (16*16*1 << pixel_shift), uvlinesize);
01934                     h->hpc.pred8x8_add[h->chroma_pred_mode](dest[1], block_offset + 32, h->mb + (16*16*2 << pixel_shift), uvlinesize);
01935                 }else{
01936                     idct_add = s->dsp.add_pixels4;
01937                     for(j=1; j<3; j++){
01938                         for(i=j*16; i<j*16+4; i++){
01939                             if(h->non_zero_count_cache[ scan8[i] ] || dctcoef_get(h->mb, pixel_shift, i*16))
01940                                 idct_add   (dest[j-1] + block_offset[i], h->mb + (i*16 << pixel_shift), uvlinesize);
01941                         }
01942                     }
01943                 }
01944             }else{
01945                 if(is_h264){
01946                     if(h->non_zero_count_cache[ scan8[CHROMA_DC_BLOCK_INDEX+0] ])
01947                         h->h264dsp.h264_chroma_dc_dequant_idct(h->mb + (16*16*1 << pixel_shift), h->dequant4_coeff[IS_INTRA(mb_type) ? 1:4][h->chroma_qp[0]][0]);
01948                     if(h->non_zero_count_cache[ scan8[CHROMA_DC_BLOCK_INDEX+1] ])
01949                         h->h264dsp.h264_chroma_dc_dequant_idct(h->mb + (16*16*2 << pixel_shift), h->dequant4_coeff[IS_INTRA(mb_type) ? 2:5][h->chroma_qp[1]][0]);
01950                     h->h264dsp.h264_idct_add8(dest, block_offset,
01951                                               h->mb, uvlinesize,
01952                                               h->non_zero_count_cache);
01953                 }else{
01954                     h->h264dsp.h264_chroma_dc_dequant_idct(h->mb + 16*16*1, h->dequant4_coeff[IS_INTRA(mb_type) ? 1:4][h->chroma_qp[0]][0]);
01955                     h->h264dsp.h264_chroma_dc_dequant_idct(h->mb + 16*16*2, h->dequant4_coeff[IS_INTRA(mb_type) ? 2:5][h->chroma_qp[1]][0]);
01956                     for(j=1; j<3; j++){
01957                         for(i=j*16; i<j*16+4; i++){
01958                             if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16]){
01959                                 uint8_t * const ptr= dest[j-1] + block_offset[i];
01960                                 ff_svq3_add_idct_c(ptr, h->mb + i*16, uvlinesize, ff_h264_chroma_qp[0][s->qscale + 12] - 12, 2);
01961                             }
01962                         }
01963                     }
01964                 }
01965             }
01966         }
01967     }
01968     if(h->cbp || IS_INTRA(mb_type))
01969     {
01970         s->dsp.clear_blocks(h->mb);
01971         s->dsp.clear_blocks(h->mb+(24*16<<pixel_shift));
01972     }
01973 }
01974 
01975 static av_always_inline void hl_decode_mb_444_internal(H264Context *h, int simple, int pixel_shift){
01976     MpegEncContext * const s = &h->s;
01977     const int mb_x= s->mb_x;
01978     const int mb_y= s->mb_y;
01979     const int mb_xy= h->mb_xy;
01980     const int mb_type= s->current_picture.mb_type[mb_xy];
01981     uint8_t  *dest[3];
01982     int linesize;
01983     int i, j, p;
01984     int *block_offset = &h->block_offset[0];
01985     const int transform_bypass = !simple && (s->qscale == 0 && h->sps.transform_bypass);
01986     const int plane_count = (simple || !CONFIG_GRAY || !(s->flags&CODEC_FLAG_GRAY)) ? 3 : 1;
01987 
01988     for (p = 0; p < plane_count; p++)
01989     {
01990         dest[p] = s->current_picture.data[p] + ((mb_x << pixel_shift) + mb_y * s->linesize) * 16;
01991         s->dsp.prefetch(dest[p] + (s->mb_x&3)*4*s->linesize + (64 << pixel_shift), s->linesize, 4);
01992     }
01993 
01994     h->list_counts[mb_xy]= h->list_count;
01995 
01996     if (!simple && MB_FIELD) {
01997         linesize   = h->mb_linesize = h->mb_uvlinesize = s->linesize * 2;
01998         block_offset = &h->block_offset[48];
01999         if(mb_y&1) //FIXME move out of this function?
02000             for (p = 0; p < 3; p++)
02001                 dest[p] -= s->linesize*15;
02002         if(FRAME_MBAFF) {
02003             int list;
02004             for(list=0; list<h->list_count; list++){
02005                 if(!USES_LIST(mb_type, list))
02006                     continue;
02007                 if(IS_16X16(mb_type)){
02008                     int8_t *ref = &h->ref_cache[list][scan8[0]];
02009                     fill_rectangle(ref, 4, 4, 8, (16+*ref)^(s->mb_y&1), 1);
02010                 }else{
02011                     for(i=0; i<16; i+=4){
02012                         int ref = h->ref_cache[list][scan8[i]];
02013                         if(ref >= 0)
02014                             fill_rectangle(&h->ref_cache[list][scan8[i]], 2, 2, 8, (16+ref)^(s->mb_y&1), 1);
02015                     }
02016                 }
02017             }
02018         }
02019     } else {
02020         linesize   = h->mb_linesize = h->mb_uvlinesize = s->linesize;
02021     }
02022 
02023     if (!simple && IS_INTRA_PCM(mb_type)) {
02024         if (pixel_shift) {
02025             const int bit_depth = h->sps.bit_depth_luma;
02026             GetBitContext gb;
02027             init_get_bits(&gb, (uint8_t*)h->mb, 768*bit_depth);
02028 
02029             for (p = 0; p < plane_count; p++) {
02030                 for (i = 0; i < 16; i++) {
02031                     uint16_t *tmp = (uint16_t*)(dest[p] + i*linesize);
02032                     for (j = 0; j < 16; j++)
02033                         tmp[j] = get_bits(&gb, bit_depth);
02034                 }
02035             }
02036         } else {
02037             for (p = 0; p < plane_count; p++) {
02038                 for (i = 0; i < 16; i++) {
02039                     memcpy(dest[p] + i*linesize, h->mb + p*128 + i*8, 16);
02040                 }
02041             }
02042         }
02043     } else {
02044         if(IS_INTRA(mb_type)){
02045             if(h->deblocking_filter)
02046                 xchg_mb_border(h, dest[0], dest[1], dest[2], linesize, linesize, 1, 1, simple, pixel_shift);
02047 
02048             for (p = 0; p < plane_count; p++)
02049                 hl_decode_mb_predict_luma(h, mb_type, 1, simple, transform_bypass, pixel_shift, block_offset, linesize, dest[p], p);
02050 
02051             if(h->deblocking_filter)
02052                 xchg_mb_border(h, dest[0], dest[1], dest[2], linesize, linesize, 0, 1, simple, pixel_shift);
02053         }else{
02054             if (pixel_shift) {
02055                 hl_motion_16(h, dest[0], dest[1], dest[2],
02056                              s->me.qpel_put, s->dsp.put_h264_chroma_pixels_tab,
02057                              s->me.qpel_avg, s->dsp.avg_h264_chroma_pixels_tab,
02058                              h->h264dsp.weight_h264_pixels_tab,
02059                              h->h264dsp.biweight_h264_pixels_tab, 1);
02060             } else
02061                 hl_motion_8(h, dest[0], dest[1], dest[2],
02062                             s->me.qpel_put, s->dsp.put_h264_chroma_pixels_tab,
02063                             s->me.qpel_avg, s->dsp.avg_h264_chroma_pixels_tab,
02064                             h->h264dsp.weight_h264_pixels_tab,
02065                             h->h264dsp.biweight_h264_pixels_tab, 1);
02066         }
02067 
02068         for (p = 0; p < plane_count; p++)
02069             hl_decode_mb_idct_luma(h, mb_type, 1, simple, transform_bypass, pixel_shift, block_offset, linesize, dest[p], p);
02070     }
02071     if(h->cbp || IS_INTRA(mb_type))
02072     {
02073         s->dsp.clear_blocks(h->mb);
02074         s->dsp.clear_blocks(h->mb+(24*16<<pixel_shift));
02075     }
02076 }
02077 
02081 #define hl_decode_mb_simple(sh, bits) \
02082 static void hl_decode_mb_simple_ ## bits(H264Context *h){ \
02083     hl_decode_mb_internal(h, 1, sh); \
02084 }
02085 hl_decode_mb_simple(0, 8);
02086 hl_decode_mb_simple(1, 16);
02087 
02091 static void av_noinline hl_decode_mb_complex(H264Context *h){
02092     hl_decode_mb_internal(h, 0, h->pixel_shift);
02093 }
02094 
02095 static void av_noinline hl_decode_mb_444_complex(H264Context *h){
02096     hl_decode_mb_444_internal(h, 0, h->pixel_shift);
02097 }
02098 
02099 static void av_noinline hl_decode_mb_444_simple(H264Context *h){
02100     hl_decode_mb_444_internal(h, 1, 0);
02101 }
02102 
02103 void ff_h264_hl_decode_mb(H264Context *h){
02104     MpegEncContext * const s = &h->s;
02105     const int mb_xy= h->mb_xy;
02106     const int mb_type= s->current_picture.mb_type[mb_xy];
02107     int is_complex = CONFIG_SMALL || h->is_complex || IS_INTRA_PCM(mb_type) || s->qscale == 0;
02108 
02109     if (CHROMA444) {
02110         if(is_complex || h->pixel_shift)
02111             hl_decode_mb_444_complex(h);
02112         else
02113             hl_decode_mb_444_simple(h);
02114     } else if (is_complex) {
02115         hl_decode_mb_complex(h);
02116     } else if (h->pixel_shift) {
02117         hl_decode_mb_simple_16(h);
02118     } else
02119         hl_decode_mb_simple_8(h);
02120 }
02121 
02122 static int pred_weight_table(H264Context *h){
02123     MpegEncContext * const s = &h->s;
02124     int list, i;
02125     int luma_def, chroma_def;
02126 
02127     h->use_weight= 0;
02128     h->use_weight_chroma= 0;
02129     h->luma_log2_weight_denom= get_ue_golomb(&s->gb);
02130     if(h->sps.chroma_format_idc)
02131         h->chroma_log2_weight_denom= get_ue_golomb(&s->gb);
02132     luma_def = 1<<h->luma_log2_weight_denom;
02133     chroma_def = 1<<h->chroma_log2_weight_denom;
02134 
02135     for(list=0; list<2; list++){
02136         h->luma_weight_flag[list]   = 0;
02137         h->chroma_weight_flag[list] = 0;
02138         for(i=0; i<h->ref_count[list]; i++){
02139             int luma_weight_flag, chroma_weight_flag;
02140 
02141             luma_weight_flag= get_bits1(&s->gb);
02142             if(luma_weight_flag){
02143                 h->luma_weight[i][list][0]= get_se_golomb(&s->gb);
02144                 h->luma_weight[i][list][1]= get_se_golomb(&s->gb);
02145                 if(   h->luma_weight[i][list][0] != luma_def
02146                    || h->luma_weight[i][list][1] != 0) {
02147                     h->use_weight= 1;
02148                     h->luma_weight_flag[list]= 1;
02149                 }
02150             }else{
02151                 h->luma_weight[i][list][0]= luma_def;
02152                 h->luma_weight[i][list][1]= 0;
02153             }
02154 
02155             if(h->sps.chroma_format_idc){
02156                 chroma_weight_flag= get_bits1(&s->gb);
02157                 if(chroma_weight_flag){
02158                     int j;
02159                     for(j=0; j<2; j++){
02160                         h->chroma_weight[i][list][j][0]= get_se_golomb(&s->gb);
02161                         h->chroma_weight[i][list][j][1]= get_se_golomb(&s->gb);
02162                         if(   h->chroma_weight[i][list][j][0] != chroma_def
02163                            || h->chroma_weight[i][list][j][1] != 0) {
02164                             h->use_weight_chroma= 1;
02165                             h->chroma_weight_flag[list]= 1;
02166                         }
02167                     }
02168                 }else{
02169                     int j;
02170                     for(j=0; j<2; j++){
02171                         h->chroma_weight[i][list][j][0]= chroma_def;
02172                         h->chroma_weight[i][list][j][1]= 0;
02173                     }
02174                 }
02175             }
02176         }
02177         if(h->slice_type_nos != AV_PICTURE_TYPE_B) break;
02178     }
02179     h->use_weight= h->use_weight || h->use_weight_chroma;
02180     return 0;
02181 }
02182 
02188 static void implicit_weight_table(H264Context *h, int field){
02189     MpegEncContext * const s = &h->s;
02190     int ref0, ref1, i, cur_poc, ref_start, ref_count0, ref_count1;
02191 
02192     for (i = 0; i < 2; i++) {
02193         h->luma_weight_flag[i]   = 0;
02194         h->chroma_weight_flag[i] = 0;
02195     }
02196 
02197     if(field < 0){
02198         cur_poc = s->current_picture_ptr->poc;
02199     if(   h->ref_count[0] == 1 && h->ref_count[1] == 1 && !FRAME_MBAFF
02200        && h->ref_list[0][0].poc + h->ref_list[1][0].poc == 2*cur_poc){
02201         h->use_weight= 0;
02202         h->use_weight_chroma= 0;
02203         return;
02204     }
02205         ref_start= 0;
02206         ref_count0= h->ref_count[0];
02207         ref_count1= h->ref_count[1];
02208     }else{
02209         cur_poc = s->current_picture_ptr->field_poc[field];
02210         ref_start= 16;
02211         ref_count0= 16+2*h->ref_count[0];
02212         ref_count1= 16+2*h->ref_count[1];
02213     }
02214 
02215     h->use_weight= 2;
02216     h->use_weight_chroma= 2;
02217     h->luma_log2_weight_denom= 5;
02218     h->chroma_log2_weight_denom= 5;
02219 
02220     for(ref0=ref_start; ref0 < ref_count0; ref0++){
02221         int poc0 = h->ref_list[0][ref0].poc;
02222         for(ref1=ref_start; ref1 < ref_count1; ref1++){
02223             int w = 32;
02224             if (!h->ref_list[0][ref0].long_ref && !h->ref_list[1][ref1].long_ref) {
02225                 int poc1 = h->ref_list[1][ref1].poc;
02226                 int td = av_clip(poc1 - poc0, -128, 127);
02227                 if(td){
02228                     int tb = av_clip(cur_poc - poc0, -128, 127);
02229                     int tx = (16384 + (FFABS(td) >> 1)) / td;
02230                     int dist_scale_factor = (tb*tx + 32) >> 8;
02231                     if(dist_scale_factor >= -64 && dist_scale_factor <= 128)
02232                         w = 64 - dist_scale_factor;
02233                 }
02234             }
02235             if(field<0){
02236                 h->implicit_weight[ref0][ref1][0]=
02237                 h->implicit_weight[ref0][ref1][1]= w;
02238             }else{
02239                 h->implicit_weight[ref0][ref1][field]=w;
02240             }
02241         }
02242     }
02243 }
02244 
02248 static void idr(H264Context *h){
02249     ff_h264_remove_all_refs(h);
02250     h->prev_frame_num= 0;
02251     h->prev_frame_num_offset= 0;
02252     h->prev_poc_msb=
02253     h->prev_poc_lsb= 0;
02254 }
02255 
02256 /* forget old pics after a seek */
02257 static void flush_dpb(AVCodecContext *avctx){
02258     H264Context *h= avctx->priv_data;
02259     int i;
02260     for(i=0; i<MAX_DELAYED_PIC_COUNT; i++) {
02261         if(h->delayed_pic[i])
02262             h->delayed_pic[i]->reference= 0;
02263         h->delayed_pic[i]= NULL;
02264     }
02265     h->outputed_poc=h->next_outputed_poc= INT_MIN;
02266     h->prev_interlaced_frame = 1;
02267     idr(h);
02268     if(h->s.current_picture_ptr)
02269         h->s.current_picture_ptr->reference= 0;
02270     h->s.first_field= 0;
02271     ff_h264_reset_sei(h);
02272     ff_mpeg_flush(avctx);
02273 }
02274 
02275 static int init_poc(H264Context *h){
02276     MpegEncContext * const s = &h->s;
02277     const int max_frame_num= 1<<h->sps.log2_max_frame_num;
02278     int field_poc[2];
02279     Picture *cur = s->current_picture_ptr;
02280 
02281     h->frame_num_offset= h->prev_frame_num_offset;
02282     if(h->frame_num < h->prev_frame_num)
02283         h->frame_num_offset += max_frame_num;
02284 
02285     if(h->sps.poc_type==0){
02286         const int max_poc_lsb= 1<<h->sps.log2_max_poc_lsb;
02287 
02288         if     (h->poc_lsb < h->prev_poc_lsb && h->prev_poc_lsb - h->poc_lsb >= max_poc_lsb/2)
02289             h->poc_msb = h->prev_poc_msb + max_poc_lsb;
02290         else if(h->poc_lsb > h->prev_poc_lsb && h->prev_poc_lsb - h->poc_lsb < -max_poc_lsb/2)
02291             h->poc_msb = h->prev_poc_msb - max_poc_lsb;
02292         else
02293             h->poc_msb = h->prev_poc_msb;
02294 //printf("poc: %d %d\n", h->poc_msb, h->poc_lsb);
02295         field_poc[0] =
02296         field_poc[1] = h->poc_msb + h->poc_lsb;
02297         if(s->picture_structure == PICT_FRAME)
02298             field_poc[1] += h->delta_poc_bottom;
02299     }else if(h->sps.poc_type==1){
02300         int abs_frame_num, expected_delta_per_poc_cycle, expectedpoc;
02301         int i;
02302 
02303         if(h->sps.poc_cycle_length != 0)
02304             abs_frame_num = h->frame_num_offset + h->frame_num;
02305         else
02306             abs_frame_num = 0;
02307 
02308         if(h->nal_ref_idc==0 && abs_frame_num > 0)
02309             abs_frame_num--;
02310 
02311         expected_delta_per_poc_cycle = 0;
02312         for(i=0; i < h->sps.poc_cycle_length; i++)
02313             expected_delta_per_poc_cycle += h->sps.offset_for_ref_frame[ i ]; //FIXME integrate during sps parse
02314 
02315         if(abs_frame_num > 0){
02316             int poc_cycle_cnt          = (abs_frame_num - 1) / h->sps.poc_cycle_length;
02317             int frame_num_in_poc_cycle = (abs_frame_num - 1) % h->sps.poc_cycle_length;
02318 
02319             expectedpoc = poc_cycle_cnt * expected_delta_per_poc_cycle;
02320             for(i = 0; i <= frame_num_in_poc_cycle; i++)
02321                 expectedpoc = expectedpoc + h->sps.offset_for_ref_frame[ i ];
02322         } else
02323             expectedpoc = 0;
02324 
02325         if(h->nal_ref_idc == 0)
02326             expectedpoc = expectedpoc + h->sps.offset_for_non_ref_pic;
02327 
02328         field_poc[0] = expectedpoc + h->delta_poc[0];
02329         field_poc[1] = field_poc[0] + h->sps.offset_for_top_to_bottom_field;
02330 
02331         if(s->picture_structure == PICT_FRAME)
02332             field_poc[1] += h->delta_poc[1];
02333     }else{
02334         int poc= 2*(h->frame_num_offset + h->frame_num);
02335 
02336         if(!h->nal_ref_idc)
02337             poc--;
02338 
02339         field_poc[0]= poc;
02340         field_poc[1]= poc;
02341     }
02342 
02343     if(s->picture_structure != PICT_BOTTOM_FIELD)
02344         s->current_picture_ptr->field_poc[0]= field_poc[0];
02345     if(s->picture_structure != PICT_TOP_FIELD)
02346         s->current_picture_ptr->field_poc[1]= field_poc[1];
02347     cur->poc= FFMIN(cur->field_poc[0], cur->field_poc[1]);
02348 
02349     return 0;
02350 }
02351 
02352 
02356 static void init_scan_tables(H264Context *h){
02357     int i;
02358     for(i=0; i<16; i++){
02359 #define T(x) (x>>2) | ((x<<2) & 0xF)
02360         h->zigzag_scan[i] = T(zigzag_scan[i]);
02361         h-> field_scan[i] = T( field_scan[i]);
02362 #undef T
02363     }
02364     for(i=0; i<64; i++){
02365 #define T(x) (x>>3) | ((x&7)<<3)
02366         h->zigzag_scan8x8[i]       = T(ff_zigzag_direct[i]);
02367         h->zigzag_scan8x8_cavlc[i] = T(zigzag_scan8x8_cavlc[i]);
02368         h->field_scan8x8[i]        = T(field_scan8x8[i]);
02369         h->field_scan8x8_cavlc[i]  = T(field_scan8x8_cavlc[i]);
02370 #undef T
02371     }
02372     if(h->sps.transform_bypass){ //FIXME same ugly
02373         h->zigzag_scan_q0          = zigzag_scan;
02374         h->zigzag_scan8x8_q0       = ff_zigzag_direct;
02375         h->zigzag_scan8x8_cavlc_q0 = zigzag_scan8x8_cavlc;
02376         h->field_scan_q0           = field_scan;
02377         h->field_scan8x8_q0        = field_scan8x8;
02378         h->field_scan8x8_cavlc_q0  = field_scan8x8_cavlc;
02379     }else{
02380         h->zigzag_scan_q0          = h->zigzag_scan;
02381         h->zigzag_scan8x8_q0       = h->zigzag_scan8x8;
02382         h->zigzag_scan8x8_cavlc_q0 = h->zigzag_scan8x8_cavlc;
02383         h->field_scan_q0           = h->field_scan;
02384         h->field_scan8x8_q0        = h->field_scan8x8;
02385         h->field_scan8x8_cavlc_q0  = h->field_scan8x8_cavlc;
02386     }
02387 }
02388 
02389 static void field_end(H264Context *h, int in_setup){
02390     MpegEncContext * const s = &h->s;
02391     AVCodecContext * const avctx= s->avctx;
02392     s->mb_y= 0;
02393 
02394     if (!in_setup && !s->dropable)
02395         ff_thread_report_progress((AVFrame*)s->current_picture_ptr, (16*s->mb_height >> FIELD_PICTURE) - 1,
02396                                  s->picture_structure==PICT_BOTTOM_FIELD);
02397 
02398     if (CONFIG_H264_VDPAU_DECODER && s->avctx->codec->capabilities&CODEC_CAP_HWACCEL_VDPAU)
02399         ff_vdpau_h264_set_reference_frames(s);
02400 
02401     if(in_setup || !(avctx->active_thread_type&FF_THREAD_FRAME)){
02402         if(!s->dropable) {
02403             ff_h264_execute_ref_pic_marking(h, h->mmco, h->mmco_index);
02404             h->prev_poc_msb= h->poc_msb;
02405             h->prev_poc_lsb= h->poc_lsb;
02406         }
02407         h->prev_frame_num_offset= h->frame_num_offset;
02408         h->prev_frame_num= h->frame_num;
02409         h->outputed_poc = h->next_outputed_poc;
02410     }
02411 
02412     if (avctx->hwaccel) {
02413         if (avctx->hwaccel->end_frame(avctx) < 0)
02414             av_log(avctx, AV_LOG_ERROR, "hardware accelerator failed to decode picture\n");
02415     }
02416 
02417     if (CONFIG_H264_VDPAU_DECODER && s->avctx->codec->capabilities&CODEC_CAP_HWACCEL_VDPAU)
02418         ff_vdpau_h264_picture_complete(s);
02419 
02420     /*
02421      * FIXME: Error handling code does not seem to support interlaced
02422      * when slices span multiple rows
02423      * The ff_er_add_slice calls don't work right for bottom
02424      * fields; they cause massive erroneous error concealing
02425      * Error marking covers both fields (top and bottom).
02426      * This causes a mismatched s->error_count
02427      * and a bad error table. Further, the error count goes to
02428      * INT_MAX when called for bottom field, because mb_y is
02429      * past end by one (callers fault) and resync_mb_y != 0
02430      * causes problems for the first MB line, too.
02431      */
02432     if (!FIELD_PICTURE)
02433         ff_er_frame_end(s);
02434 
02435     MPV_frame_end(s);
02436 
02437     h->current_slice=0;
02438 }
02439 
02443 static void clone_slice(H264Context *dst, H264Context *src)
02444 {
02445     memcpy(dst->block_offset,     src->block_offset, sizeof(dst->block_offset));
02446     dst->s.current_picture_ptr  = src->s.current_picture_ptr;
02447     dst->s.current_picture      = src->s.current_picture;
02448     dst->s.linesize             = src->s.linesize;
02449     dst->s.uvlinesize           = src->s.uvlinesize;
02450     dst->s.first_field          = src->s.first_field;
02451 
02452     dst->prev_poc_msb           = src->prev_poc_msb;
02453     dst->prev_poc_lsb           = src->prev_poc_lsb;
02454     dst->prev_frame_num_offset  = src->prev_frame_num_offset;
02455     dst->prev_frame_num         = src->prev_frame_num;
02456     dst->short_ref_count        = src->short_ref_count;
02457 
02458     memcpy(dst->short_ref,        src->short_ref,        sizeof(dst->short_ref));
02459     memcpy(dst->long_ref,         src->long_ref,         sizeof(dst->long_ref));
02460     memcpy(dst->default_ref_list, src->default_ref_list, sizeof(dst->default_ref_list));
02461     memcpy(dst->ref_list,         src->ref_list,         sizeof(dst->ref_list));
02462 
02463     memcpy(dst->dequant4_coeff,   src->dequant4_coeff,   sizeof(src->dequant4_coeff));
02464     memcpy(dst->dequant8_coeff,   src->dequant8_coeff,   sizeof(src->dequant8_coeff));
02465 }
02466 
02474 int ff_h264_get_profile(SPS *sps)
02475 {
02476     int profile = sps->profile_idc;
02477 
02478     switch(sps->profile_idc) {
02479     case FF_PROFILE_H264_BASELINE:
02480         // constraint_set1_flag set to 1
02481         profile |= (sps->constraint_set_flags & 1<<1) ? FF_PROFILE_H264_CONSTRAINED : 0;
02482         break;
02483     case FF_PROFILE_H264_HIGH_10:
02484     case FF_PROFILE_H264_HIGH_422:
02485     case FF_PROFILE_H264_HIGH_444_PREDICTIVE:
02486         // constraint_set3_flag set to 1
02487         profile |= (sps->constraint_set_flags & 1<<3) ? FF_PROFILE_H264_INTRA : 0;
02488         break;
02489     }
02490 
02491     return profile;
02492 }
02493 
02503 static int decode_slice_header(H264Context *h, H264Context *h0){
02504     MpegEncContext * const s = &h->s;
02505     MpegEncContext * const s0 = &h0->s;
02506     unsigned int first_mb_in_slice;
02507     unsigned int pps_id;
02508     int num_ref_idx_active_override_flag;
02509     unsigned int slice_type, tmp, i, j;
02510     int default_ref_list_done = 0;
02511     int last_pic_structure;
02512 
02513     s->dropable= h->nal_ref_idc == 0;
02514 
02515     /* FIXME: 2tap qpel isn't implemented for high bit depth. */
02516     if((s->avctx->flags2 & CODEC_FLAG2_FAST) && !h->nal_ref_idc && !h->pixel_shift){
02517         s->me.qpel_put= s->dsp.put_2tap_qpel_pixels_tab;
02518         s->me.qpel_avg= s->dsp.avg_2tap_qpel_pixels_tab;
02519     }else{
02520         s->me.qpel_put= s->dsp.put_h264_qpel_pixels_tab;
02521         s->me.qpel_avg= s->dsp.avg_h264_qpel_pixels_tab;
02522     }
02523 
02524     first_mb_in_slice= get_ue_golomb(&s->gb);
02525 
02526     if(first_mb_in_slice == 0){ //FIXME better field boundary detection
02527         if(h0->current_slice && FIELD_PICTURE){
02528             field_end(h, 1);
02529         }
02530 
02531         h0->current_slice = 0;
02532         if (!s0->first_field)
02533             s->current_picture_ptr= NULL;
02534     }
02535 
02536     slice_type= get_ue_golomb_31(&s->gb);
02537     if(slice_type > 9){
02538         av_log(h->s.avctx, AV_LOG_ERROR, "slice type too large (%d) at %d %d\n", h->slice_type, s->mb_x, s->mb_y);
02539         return -1;
02540     }
02541     if(slice_type > 4){
02542         slice_type -= 5;
02543         h->slice_type_fixed=1;
02544     }else
02545         h->slice_type_fixed=0;
02546 
02547     slice_type= golomb_to_pict_type[ slice_type ];
02548     if (slice_type == AV_PICTURE_TYPE_I
02549         || (h0->current_slice != 0 && slice_type == h0->last_slice_type) ) {
02550         default_ref_list_done = 1;
02551     }
02552     h->slice_type= slice_type;
02553     h->slice_type_nos= slice_type & 3;
02554 
02555     s->pict_type= h->slice_type; // to make a few old functions happy, it's wrong though
02556 
02557     pps_id= get_ue_golomb(&s->gb);
02558     if(pps_id>=MAX_PPS_COUNT){
02559         av_log(h->s.avctx, AV_LOG_ERROR, "pps_id out of range\n");
02560         return -1;
02561     }
02562     if(!h0->pps_buffers[pps_id]) {
02563         av_log(h->s.avctx, AV_LOG_ERROR, "non-existing PPS %u referenced\n", pps_id);
02564         return -1;
02565     }
02566     h->pps= *h0->pps_buffers[pps_id];
02567 
02568     if(!h0->sps_buffers[h->pps.sps_id]) {
02569         av_log(h->s.avctx, AV_LOG_ERROR, "non-existing SPS %u referenced\n", h->pps.sps_id);
02570         return -1;
02571     }
02572     h->sps = *h0->sps_buffers[h->pps.sps_id];
02573 
02574     s->avctx->profile = ff_h264_get_profile(&h->sps);
02575     s->avctx->level   = h->sps.level_idc;
02576     s->avctx->refs    = h->sps.ref_frame_count;
02577 
02578     if(h == h0 && h->dequant_coeff_pps != pps_id){
02579         h->dequant_coeff_pps = pps_id;
02580         init_dequant_tables(h);
02581     }
02582 
02583     s->mb_width= h->sps.mb_width;
02584     s->mb_height= h->sps.mb_height * (2 - h->sps.frame_mbs_only_flag);
02585 
02586     h->b_stride=  s->mb_width*4;
02587 
02588     s->width = 16*s->mb_width - (2>>CHROMA444)*FFMIN(h->sps.crop_right, (8<<CHROMA444)-1);
02589     if(h->sps.frame_mbs_only_flag)
02590         s->height= 16*s->mb_height - (2>>CHROMA444)*FFMIN(h->sps.crop_bottom, (8<<CHROMA444)-1);
02591     else
02592         s->height= 16*s->mb_height - (4>>CHROMA444)*FFMIN(h->sps.crop_bottom, (8<<CHROMA444)-1);
02593 
02594     if (s->context_initialized
02595         && (   s->width != s->avctx->width || s->height != s->avctx->height
02596             || av_cmp_q(h->sps.sar, s->avctx->sample_aspect_ratio))) {
02597         if(h != h0) {
02598             av_log_missing_feature(s->avctx, "Width/height changing with threads is", 0);
02599             return -1;   // width / height changed during parallelized decoding
02600         }
02601         free_tables(h, 0);
02602         flush_dpb(s->avctx);
02603         MPV_common_end(s);
02604     }
02605     if (!s->context_initialized) {
02606         if (h != h0) {
02607             av_log(h->s.avctx, AV_LOG_ERROR, "Cannot (re-)initialize context during parallel decoding.\n");
02608             return -1;
02609         }
02610 
02611         avcodec_set_dimensions(s->avctx, s->width, s->height);
02612         s->avctx->sample_aspect_ratio= h->sps.sar;
02613         av_assert0(s->avctx->sample_aspect_ratio.den);
02614 
02615         h->s.avctx->coded_width = 16*s->mb_width;
02616         h->s.avctx->coded_height = 16*s->mb_height;
02617 
02618         if(h->sps.video_signal_type_present_flag){
02619             s->avctx->color_range = h->sps.full_range ? AVCOL_RANGE_JPEG : AVCOL_RANGE_MPEG;
02620             if(h->sps.colour_description_present_flag){
02621                 s->avctx->color_primaries = h->sps.color_primaries;
02622                 s->avctx->color_trc       = h->sps.color_trc;
02623                 s->avctx->colorspace      = h->sps.colorspace;
02624             }
02625         }
02626 
02627         if(h->sps.timing_info_present_flag){
02628             int64_t den= h->sps.time_scale;
02629             if(h->x264_build < 44U)
02630                 den *= 2;
02631             av_reduce(&s->avctx->time_base.num, &s->avctx->time_base.den,
02632                       h->sps.num_units_in_tick, den, 1<<30);
02633         }
02634 
02635         switch (h->sps.bit_depth_luma) {
02636             case 9 :
02637                 s->avctx->pix_fmt = CHROMA444 ? PIX_FMT_YUV444P9 : PIX_FMT_YUV420P9;
02638                 break;
02639             case 10 :
02640                 s->avctx->pix_fmt = CHROMA444 ? PIX_FMT_YUV444P10 : PIX_FMT_YUV420P10;
02641                 break;
02642             default:
02643                 if (CHROMA444){
02644                     s->avctx->pix_fmt = s->avctx->color_range == AVCOL_RANGE_JPEG ? PIX_FMT_YUVJ444P : PIX_FMT_YUV444P;
02645                 }else{
02646                     s->avctx->pix_fmt = s->avctx->get_format(s->avctx,
02647                                                              s->avctx->codec->pix_fmts ?
02648                                                              s->avctx->codec->pix_fmts :
02649                                                              s->avctx->color_range == AVCOL_RANGE_JPEG ?
02650                                                              hwaccel_pixfmt_list_h264_jpeg_420 :
02651                                                              ff_hwaccel_pixfmt_list_420);
02652                 }
02653         }
02654 
02655         s->avctx->hwaccel = ff_find_hwaccel(s->avctx->codec->id, s->avctx->pix_fmt);
02656 
02657         if (MPV_common_init(s) < 0) {
02658             av_log(h->s.avctx, AV_LOG_ERROR, "MPV_common_init() failed.\n");
02659             return -1;
02660         }
02661         s->first_field = 0;
02662         h->prev_interlaced_frame = 1;
02663 
02664         init_scan_tables(h);
02665         if (ff_h264_alloc_tables(h) < 0) {
02666             av_log(h->s.avctx, AV_LOG_ERROR, "Could not allocate memory for h264\n");
02667             return AVERROR(ENOMEM);
02668         }
02669 
02670         if (!HAVE_THREADS || !(s->avctx->active_thread_type&FF_THREAD_SLICE)) {
02671             if (context_init(h) < 0) {
02672                 av_log(h->s.avctx, AV_LOG_ERROR, "context_init() failed.\n");
02673                 return -1;
02674             }
02675         } else {
02676             for(i = 1; i < s->avctx->thread_count; i++) {
02677                 H264Context *c;
02678                 c = h->thread_context[i] = av_malloc(sizeof(H264Context));
02679                 memcpy(c, h->s.thread_context[i], sizeof(MpegEncContext));
02680                 memset(&c->s + 1, 0, sizeof(H264Context) - sizeof(MpegEncContext));
02681                 c->h264dsp = h->h264dsp;
02682                 c->sps = h->sps;
02683                 c->pps = h->pps;
02684                 c->pixel_shift = h->pixel_shift;
02685                 init_scan_tables(c);
02686                 clone_tables(c, h, i);
02687             }
02688 
02689             for(i = 0; i < s->avctx->thread_count; i++)
02690                 if (context_init(h->thread_context[i]) < 0) {
02691                     av_log(h->s.avctx, AV_LOG_ERROR, "context_init() failed.\n");
02692                     return -1;
02693                 }
02694         }
02695     }
02696 
02697     h->frame_num= get_bits(&s->gb, h->sps.log2_max_frame_num);
02698 
02699     h->mb_mbaff = 0;
02700     h->mb_aff_frame = 0;
02701     last_pic_structure = s0->picture_structure;
02702     if(h->sps.frame_mbs_only_flag){
02703         s->picture_structure= PICT_FRAME;
02704     }else{
02705         if(get_bits1(&s->gb)) { //field_pic_flag
02706             s->picture_structure= PICT_TOP_FIELD + get_bits1(&s->gb); //bottom_field_flag
02707         } else {
02708             s->picture_structure= PICT_FRAME;
02709             h->mb_aff_frame = h->sps.mb_aff;
02710         }
02711     }
02712     h->mb_field_decoding_flag= s->picture_structure != PICT_FRAME;
02713 
02714     if(h0->current_slice == 0){
02715         // Shorten frame num gaps so we don't have to allocate reference frames just to throw them away
02716         if(h->frame_num != h->prev_frame_num) {
02717             int unwrap_prev_frame_num = h->prev_frame_num, max_frame_num = 1<<h->sps.log2_max_frame_num;
02718 
02719             if (unwrap_prev_frame_num > h->frame_num) unwrap_prev_frame_num -= max_frame_num;
02720 
02721             if ((h->frame_num - unwrap_prev_frame_num) > h->sps.ref_frame_count) {
02722                 unwrap_prev_frame_num = (h->frame_num - h->sps.ref_frame_count) - 1;
02723                 if (unwrap_prev_frame_num < 0)
02724                     unwrap_prev_frame_num += max_frame_num;
02725 
02726                 h->prev_frame_num = unwrap_prev_frame_num;
02727             }
02728         }
02729 
02730         while(h->frame_num !=  h->prev_frame_num &&
02731               h->frame_num != (h->prev_frame_num+1)%(1<<h->sps.log2_max_frame_num)){
02732             Picture *prev = h->short_ref_count ? h->short_ref[0] : NULL;
02733             av_log(h->s.avctx, AV_LOG_DEBUG, "Frame num gap %d %d\n", h->frame_num, h->prev_frame_num);
02734             if (ff_h264_frame_start(h) < 0)
02735                 return -1;
02736             h->prev_frame_num++;
02737             h->prev_frame_num %= 1<<h->sps.log2_max_frame_num;
02738             s->current_picture_ptr->frame_num= h->prev_frame_num;
02739             ff_thread_report_progress((AVFrame*)s->current_picture_ptr, INT_MAX, 0);
02740             ff_thread_report_progress((AVFrame*)s->current_picture_ptr, INT_MAX, 1);
02741             ff_generate_sliding_window_mmcos(h);
02742             ff_h264_execute_ref_pic_marking(h, h->mmco, h->mmco_index);
02743             /* Error concealment: if a ref is missing, copy the previous ref in its place.
02744              * FIXME: avoiding a memcpy would be nice, but ref handling makes many assumptions
02745              * about there being no actual duplicates.
02746              * FIXME: this doesn't copy padding for out-of-frame motion vectors.  Given we're
02747              * concealing a lost frame, this probably isn't noticable by comparison, but it should
02748              * be fixed. */
02749             if (h->short_ref_count) {
02750                 if (prev) {
02751                     av_image_copy(h->short_ref[0]->data, h->short_ref[0]->linesize,
02752                                   (const uint8_t**)prev->data, prev->linesize,
02753                                   s->avctx->pix_fmt, s->mb_width*16, s->mb_height*16);
02754                     h->short_ref[0]->poc = prev->poc+2;
02755                 }
02756                 h->short_ref[0]->frame_num = h->prev_frame_num;
02757             }
02758         }
02759 
02760         /* See if we have a decoded first field looking for a pair... */
02761         if (s0->first_field) {
02762             assert(s0->current_picture_ptr);
02763             assert(s0->current_picture_ptr->data[0]);
02764             assert(s0->current_picture_ptr->reference != DELAYED_PIC_REF);
02765 
02766             /* figure out if we have a complementary field pair */
02767             if (!FIELD_PICTURE || s->picture_structure == last_pic_structure) {
02768                 /*
02769                  * Previous field is unmatched. Don't display it, but let it
02770                  * remain for reference if marked as such.
02771                  */
02772                 s0->current_picture_ptr = NULL;
02773                 s0->first_field = FIELD_PICTURE;
02774 
02775             } else {
02776                 if (h->nal_ref_idc &&
02777                         s0->current_picture_ptr->reference &&
02778                         s0->current_picture_ptr->frame_num != h->frame_num) {
02779                     /*
02780                      * This and previous field were reference, but had
02781                      * different frame_nums. Consider this field first in
02782                      * pair. Throw away previous field except for reference
02783                      * purposes.
02784                      */
02785                     s0->first_field = 1;
02786                     s0->current_picture_ptr = NULL;
02787 
02788                 } else {
02789                     /* Second field in complementary pair */
02790                     s0->first_field = 0;
02791                 }
02792             }
02793 
02794         } else {
02795             /* Frame or first field in a potentially complementary pair */
02796             assert(!s0->current_picture_ptr);
02797             s0->first_field = FIELD_PICTURE;
02798         }
02799 
02800         if(!FIELD_PICTURE || s0->first_field) {
02801             if (ff_h264_frame_start(h) < 0) {
02802                 s0->first_field = 0;
02803                 return -1;
02804             }
02805         } else {
02806             ff_release_unused_pictures(s, 0);
02807         }
02808     }
02809     if(h != h0)
02810         clone_slice(h, h0);
02811 
02812     s->current_picture_ptr->frame_num= h->frame_num; //FIXME frame_num cleanup
02813 
02814     assert(s->mb_num == s->mb_width * s->mb_height);
02815     if(first_mb_in_slice << FIELD_OR_MBAFF_PICTURE >= s->mb_num ||
02816        first_mb_in_slice                    >= s->mb_num){
02817         av_log(h->s.avctx, AV_LOG_ERROR, "first_mb_in_slice overflow\n");
02818         return -1;
02819     }
02820     s->resync_mb_x = s->mb_x = first_mb_in_slice % s->mb_width;
02821     s->resync_mb_y = s->mb_y = (first_mb_in_slice / s->mb_width) << FIELD_OR_MBAFF_PICTURE;
02822     if (s->picture_structure == PICT_BOTTOM_FIELD)
02823         s->resync_mb_y = s->mb_y = s->mb_y + 1;
02824     assert(s->mb_y < s->mb_height);
02825 
02826     if(s->picture_structure==PICT_FRAME){
02827         h->curr_pic_num=   h->frame_num;
02828         h->max_pic_num= 1<< h->sps.log2_max_frame_num;
02829     }else{
02830         h->curr_pic_num= 2*h->frame_num + 1;
02831         h->max_pic_num= 1<<(h->sps.log2_max_frame_num + 1);
02832     }
02833 
02834     if(h->nal_unit_type == NAL_IDR_SLICE){
02835         get_ue_golomb(&s->gb); /* idr_pic_id */
02836     }
02837 
02838     if(h->sps.poc_type==0){
02839         h->poc_lsb= get_bits(&s->gb, h->sps.log2_max_poc_lsb);
02840 
02841         if(h->pps.pic_order_present==1 && s->picture_structure==PICT_FRAME){
02842             h->delta_poc_bottom= get_se_golomb(&s->gb);
02843         }
02844     }
02845 
02846     if(h->sps.poc_type==1 && !h->sps.delta_pic_order_always_zero_flag){
02847         h->delta_poc[0]= get_se_golomb(&s->gb);
02848 
02849         if(h->pps.pic_order_present==1 && s->picture_structure==PICT_FRAME)
02850             h->delta_poc[1]= get_se_golomb(&s->gb);
02851     }
02852 
02853     init_poc(h);
02854 
02855     if(h->pps.redundant_pic_cnt_present){
02856         h->redundant_pic_count= get_ue_golomb(&s->gb);
02857     }
02858 
02859     //set defaults, might be overridden a few lines later
02860     h->ref_count[0]= h->pps.ref_count[0];
02861     h->ref_count[1]= h->pps.ref_count[1];
02862 
02863     if(h->slice_type_nos != AV_PICTURE_TYPE_I){
02864         if(h->slice_type_nos == AV_PICTURE_TYPE_B){
02865             h->direct_spatial_mv_pred= get_bits1(&s->gb);
02866         }
02867         num_ref_idx_active_override_flag= get_bits1(&s->gb);
02868 
02869         if(num_ref_idx_active_override_flag){
02870             h->ref_count[0]= get_ue_golomb(&s->gb) + 1;
02871             if(h->slice_type_nos==AV_PICTURE_TYPE_B)
02872                 h->ref_count[1]= get_ue_golomb(&s->gb) + 1;
02873 
02874             if(h->ref_count[0]-1 > 32-1 || h->ref_count[1]-1 > 32-1){
02875                 av_log(h->s.avctx, AV_LOG_ERROR, "reference overflow\n");
02876                 h->ref_count[0]= h->ref_count[1]= 1;
02877                 return -1;
02878             }
02879         }
02880         if(h->slice_type_nos == AV_PICTURE_TYPE_B)
02881             h->list_count= 2;
02882         else
02883             h->list_count= 1;
02884     }else
02885         h->list_count= 0;
02886 
02887     if(!default_ref_list_done){
02888         ff_h264_fill_default_ref_list(h);
02889     }
02890 
02891     if(h->slice_type_nos!=AV_PICTURE_TYPE_I && ff_h264_decode_ref_pic_list_reordering(h) < 0)
02892         return -1;
02893 
02894     if(h->slice_type_nos!=AV_PICTURE_TYPE_I){
02895         s->last_picture_ptr= &h->ref_list[0][0];
02896         ff_copy_picture(&s->last_picture, s->last_picture_ptr);
02897     }
02898     if(h->slice_type_nos==AV_PICTURE_TYPE_B){
02899         s->next_picture_ptr= &h->ref_list[1][0];
02900         ff_copy_picture(&s->next_picture, s->next_picture_ptr);
02901     }
02902 
02903     if(   (h->pps.weighted_pred          && h->slice_type_nos == AV_PICTURE_TYPE_P )
02904        ||  (h->pps.weighted_bipred_idc==1 && h->slice_type_nos== AV_PICTURE_TYPE_B ) )
02905         pred_weight_table(h);
02906     else if(h->pps.weighted_bipred_idc==2 && h->slice_type_nos== AV_PICTURE_TYPE_B){
02907         implicit_weight_table(h, -1);
02908     }else {
02909         h->use_weight = 0;
02910         for (i = 0; i < 2; i++) {
02911             h->luma_weight_flag[i]   = 0;
02912             h->chroma_weight_flag[i] = 0;
02913         }
02914     }
02915 
02916     if(h->nal_ref_idc)
02917         ff_h264_decode_ref_pic_marking(h0, &s->gb);
02918 
02919     if(FRAME_MBAFF){
02920         ff_h264_fill_mbaff_ref_list(h);
02921 
02922         if(h->pps.weighted_bipred_idc==2 && h->slice_type_nos== AV_PICTURE_TYPE_B){
02923             implicit_weight_table(h, 0);
02924             implicit_weight_table(h, 1);
02925         }
02926     }
02927 
02928     if(h->slice_type_nos==AV_PICTURE_TYPE_B && !h->direct_spatial_mv_pred)
02929         ff_h264_direct_dist_scale_factor(h);
02930     ff_h264_direct_ref_list_init(h);
02931 
02932     if( h->slice_type_nos != AV_PICTURE_TYPE_I && h->pps.cabac ){
02933         tmp = get_ue_golomb_31(&s->gb);
02934         if(tmp > 2){
02935             av_log(s->avctx, AV_LOG_ERROR, "cabac_init_idc overflow\n");
02936             return -1;
02937         }
02938         h->cabac_init_idc= tmp;
02939     }
02940 
02941     h->last_qscale_diff = 0;
02942     tmp = h->pps.init_qp + get_se_golomb(&s->gb);
02943     if(tmp>51+6*(h->sps.bit_depth_luma-8)){
02944         av_log(s->avctx, AV_LOG_ERROR, "QP %u out of range\n", tmp);
02945         return -1;
02946     }
02947     s->qscale= tmp;
02948     h->chroma_qp[0] = get_chroma_qp(h, 0, s->qscale);
02949     h->chroma_qp[1] = get_chroma_qp(h, 1, s->qscale);
02950     //FIXME qscale / qp ... stuff
02951     if(h->slice_type == AV_PICTURE_TYPE_SP){
02952         get_bits1(&s->gb); /* sp_for_switch_flag */
02953     }
02954     if(h->slice_type==AV_PICTURE_TYPE_SP || h->slice_type == AV_PICTURE_TYPE_SI){
02955         get_se_golomb(&s->gb); /* slice_qs_delta */
02956     }
02957 
02958     h->deblocking_filter = 1;
02959     h->slice_alpha_c0_offset = 52;
02960     h->slice_beta_offset = 52;
02961     if( h->pps.deblocking_filter_parameters_present ) {
02962         tmp= get_ue_golomb_31(&s->gb);
02963         if(tmp > 2){
02964             av_log(s->avctx, AV_LOG_ERROR, "deblocking_filter_idc %u out of range\n", tmp);
02965             return -1;
02966         }
02967         h->deblocking_filter= tmp;
02968         if(h->deblocking_filter < 2)
02969             h->deblocking_filter^= 1; // 1<->0
02970 
02971         if( h->deblocking_filter ) {
02972             h->slice_alpha_c0_offset += get_se_golomb(&s->gb) << 1;
02973             h->slice_beta_offset     += get_se_golomb(&s->gb) << 1;
02974             if(   h->slice_alpha_c0_offset > 104U
02975                || h->slice_beta_offset     > 104U){
02976                 av_log(s->avctx, AV_LOG_ERROR, "deblocking filter parameters %d %d out of range\n", h->slice_alpha_c0_offset, h->slice_beta_offset);
02977                 return -1;
02978             }
02979         }
02980     }
02981 
02982     if(   s->avctx->skip_loop_filter >= AVDISCARD_ALL
02983        ||(s->avctx->skip_loop_filter >= AVDISCARD_NONKEY && h->slice_type_nos != AV_PICTURE_TYPE_I)
02984        ||(s->avctx->skip_loop_filter >= AVDISCARD_BIDIR  && h->slice_type_nos == AV_PICTURE_TYPE_B)
02985        ||(s->avctx->skip_loop_filter >= AVDISCARD_NONREF && h->nal_ref_idc == 0))
02986         h->deblocking_filter= 0;
02987 
02988     if(h->deblocking_filter == 1 && h0->max_contexts > 1) {
02989         if(s->avctx->flags2 & CODEC_FLAG2_FAST) {
02990             /* Cheat slightly for speed:
02991                Do not bother to deblock across slices. */
02992             h->deblocking_filter = 2;
02993         } else {
02994             h0->max_contexts = 1;
02995             if(!h0->single_decode_warning) {
02996                 av_log(s->avctx, AV_LOG_INFO, "Cannot parallelize deblocking type 1, decoding such frames in sequential order\n");
02997                 h0->single_decode_warning = 1;
02998             }
02999             if (h != h0) {
03000                 av_log(h->s.avctx, AV_LOG_ERROR, "Deblocking switched inside frame.\n");
03001                 return 1;
03002             }
03003         }
03004     }
03005     h->qp_thresh= 15 + 52 - FFMIN(h->slice_alpha_c0_offset, h->slice_beta_offset) - FFMAX3(0, h->pps.chroma_qp_index_offset[0], h->pps.chroma_qp_index_offset[1]);
03006 
03007 #if 0 //FMO
03008     if( h->pps.num_slice_groups > 1  && h->pps.mb_slice_group_map_type >= 3 && h->pps.mb_slice_group_map_type <= 5)
03009         slice_group_change_cycle= get_bits(&s->gb, ?);
03010 #endif
03011 
03012     h0->last_slice_type = slice_type;
03013     h->slice_num = ++h0->current_slice;
03014     if(h->slice_num >= MAX_SLICES){
03015         av_log(s->avctx, AV_LOG_ERROR, "Too many slices, increase MAX_SLICES and recompile\n");
03016     }
03017 
03018     for(j=0; j<2; j++){
03019         int id_list[16];
03020         int *ref2frm= h->ref2frm[h->slice_num&(MAX_SLICES-1)][j];
03021         for(i=0; i<16; i++){
03022             id_list[i]= 60;
03023             if(h->ref_list[j][i].data[0]){
03024                 int k;
03025                 uint8_t *base= h->ref_list[j][i].base[0];
03026                 for(k=0; k<h->short_ref_count; k++)
03027                     if(h->short_ref[k]->base[0] == base){
03028                         id_list[i]= k;
03029                         break;
03030                     }
03031                 for(k=0; k<h->long_ref_count; k++)
03032                     if(h->long_ref[k] && h->long_ref[k]->base[0] == base){
03033                         id_list[i]= h->short_ref_count + k;
03034                         break;
03035                     }
03036             }
03037         }
03038 
03039         ref2frm[0]=
03040         ref2frm[1]= -1;
03041         for(i=0; i<16; i++)
03042             ref2frm[i+2]= 4*id_list[i]
03043                           +(h->ref_list[j][i].reference&3);
03044         ref2frm[18+0]=
03045         ref2frm[18+1]= -1;
03046         for(i=16; i<48; i++)
03047             ref2frm[i+4]= 4*id_list[(i-16)>>1]
03048                           +(h->ref_list[j][i].reference&3);
03049     }
03050 
03051     //FIXME: fix draw_edges+PAFF+frame threads
03052     h->emu_edge_width= (s->flags&CODEC_FLAG_EMU_EDGE || (!h->sps.frame_mbs_only_flag && s->avctx->active_thread_type)) ? 0 : 16;
03053     h->emu_edge_height= (FRAME_MBAFF || FIELD_PICTURE) ? 0 : h->emu_edge_width;
03054 
03055     if(s->avctx->debug&FF_DEBUG_PICT_INFO){
03056         av_log(h->s.avctx, AV_LOG_DEBUG, "slice:%d %s mb:%d %c%s%s pps:%u frame:%d poc:%d/%d ref:%d/%d qp:%d loop:%d:%d:%d weight:%d%s %s\n",
03057                h->slice_num,
03058                (s->picture_structure==PICT_FRAME ? "F" : s->picture_structure==PICT_TOP_FIELD ? "T" : "B"),
03059                first_mb_in_slice,
03060                av_get_picture_type_char(h->slice_type), h->slice_type_fixed ? " fix" : "", h->nal_unit_type == NAL_IDR_SLICE ? " IDR" : "",
03061                pps_id, h->frame_num,
03062                s->current_picture_ptr->field_poc[0], s->current_picture_ptr->field_poc[1],
03063                h->ref_count[0], h->ref_count[1],
03064                s->qscale,
03065                h->deblocking_filter, h->slice_alpha_c0_offset/2-26, h->slice_beta_offset/2-26,
03066                h->use_weight,
03067                h->use_weight==1 && h->use_weight_chroma ? "c" : "",
03068                h->slice_type == AV_PICTURE_TYPE_B ? (h->direct_spatial_mv_pred ? "SPAT" : "TEMP") : ""
03069                );
03070     }
03071 
03072     return 0;
03073 }
03074 
03075 int ff_h264_get_slice_type(const H264Context *h)
03076 {
03077     switch (h->slice_type) {
03078     case AV_PICTURE_TYPE_P:  return 0;
03079     case AV_PICTURE_TYPE_B:  return 1;
03080     case AV_PICTURE_TYPE_I:  return 2;
03081     case AV_PICTURE_TYPE_SP: return 3;
03082     case AV_PICTURE_TYPE_SI: return 4;
03083     default:         return -1;
03084     }
03085 }
03086 
03091 static int fill_filter_caches(H264Context *h, int mb_type){
03092     MpegEncContext * const s = &h->s;
03093     const int mb_xy= h->mb_xy;
03094     int top_xy, left_xy[2];
03095     int top_type, left_type[2];
03096 
03097     top_xy     = mb_xy  - (s->mb_stride << MB_FIELD);
03098 
03099     //FIXME deblocking could skip the intra and nnz parts.
03100 
03101     /* Wow, what a mess, why didn't they simplify the interlacing & intra
03102      * stuff, I can't imagine that these complex rules are worth it. */
03103 
03104     left_xy[1] = left_xy[0] = mb_xy-1;
03105     if(FRAME_MBAFF){
03106         const int left_mb_field_flag     = IS_INTERLACED(s->current_picture.mb_type[mb_xy-1]);
03107         const int curr_mb_field_flag     = IS_INTERLACED(mb_type);
03108         if(s->mb_y&1){
03109             if (left_mb_field_flag != curr_mb_field_flag) {
03110                 left_xy[0] -= s->mb_stride;
03111             }
03112         }else{
03113             if(curr_mb_field_flag){
03114                 top_xy      += s->mb_stride & (((s->current_picture.mb_type[top_xy    ]>>7)&1)-1);
03115             }
03116             if (left_mb_field_flag != curr_mb_field_flag) {
03117                 left_xy[1] += s->mb_stride;
03118             }
03119         }
03120     }
03121 
03122     h->top_mb_xy = top_xy;
03123     h->left_mb_xy[0] = left_xy[0];
03124     h->left_mb_xy[1] = left_xy[1];
03125     {
03126         //for sufficiently low qp, filtering wouldn't do anything
03127         //this is a conservative estimate: could also check beta_offset and more accurate chroma_qp
03128         int qp_thresh = h->qp_thresh; //FIXME strictly we should store qp_thresh for each mb of a slice
03129         int qp = s->current_picture.qscale_table[mb_xy];
03130         if(qp <= qp_thresh
03131            && (left_xy[0]<0 || ((qp + s->current_picture.qscale_table[left_xy[0]] + 1)>>1) <= qp_thresh)
03132            && (top_xy   < 0 || ((qp + s->current_picture.qscale_table[top_xy    ] + 1)>>1) <= qp_thresh)){
03133             if(!FRAME_MBAFF)
03134                 return 1;
03135             if(   (left_xy[0]< 0            || ((qp + s->current_picture.qscale_table[left_xy[1]             ] + 1)>>1) <= qp_thresh)
03136                && (top_xy    < s->mb_stride || ((qp + s->current_picture.qscale_table[top_xy    -s->mb_stride] + 1)>>1) <= qp_thresh))
03137                 return 1;
03138         }
03139     }
03140 
03141     top_type     = s->current_picture.mb_type[top_xy]    ;
03142     left_type[0] = s->current_picture.mb_type[left_xy[0]];
03143     left_type[1] = s->current_picture.mb_type[left_xy[1]];
03144     if(h->deblocking_filter == 2){
03145         if(h->slice_table[top_xy     ] != h->slice_num) top_type= 0;
03146         if(h->slice_table[left_xy[0] ] != h->slice_num) left_type[0]= left_type[1]= 0;
03147     }else{
03148         if(h->slice_table[top_xy     ] == 0xFFFF) top_type= 0;
03149         if(h->slice_table[left_xy[0] ] == 0xFFFF) left_type[0]= left_type[1] =0;
03150     }
03151     h->top_type    = top_type    ;
03152     h->left_type[0]= left_type[0];
03153     h->left_type[1]= left_type[1];
03154 
03155     if(IS_INTRA(mb_type))
03156         return 0;
03157 
03158     AV_COPY32(&h->non_zero_count_cache[4+8* 1], &h->non_zero_count[mb_xy][ 0]);
03159     AV_COPY32(&h->non_zero_count_cache[4+8* 2], &h->non_zero_count[mb_xy][ 4]);
03160     AV_COPY32(&h->non_zero_count_cache[4+8* 3], &h->non_zero_count[mb_xy][ 8]);
03161     AV_COPY32(&h->non_zero_count_cache[4+8* 4], &h->non_zero_count[mb_xy][12]);
03162 
03163     h->cbp= h->cbp_table[mb_xy];
03164 
03165     {
03166         int list;
03167         for(list=0; list<h->list_count; list++){
03168             int8_t *ref;
03169             int y, b_stride;
03170             int16_t (*mv_dst)[2];
03171             int16_t (*mv_src)[2];
03172 
03173             if(!USES_LIST(mb_type, list)){
03174                 fill_rectangle(  h->mv_cache[list][scan8[0]], 4, 4, 8, pack16to32(0,0), 4);
03175                 AV_WN32A(&h->ref_cache[list][scan8[ 0]], ((LIST_NOT_USED)&0xFF)*0x01010101u);
03176                 AV_WN32A(&h->ref_cache[list][scan8[ 2]], ((LIST_NOT_USED)&0xFF)*0x01010101u);
03177                 AV_WN32A(&h->ref_cache[list][scan8[ 8]], ((LIST_NOT_USED)&0xFF)*0x01010101u);
03178                 AV_WN32A(&h->ref_cache[list][scan8[10]], ((LIST_NOT_USED)&0xFF)*0x01010101u);
03179                 continue;
03180             }
03181 
03182             ref = &s->current_picture.ref_index[list][4*mb_xy];
03183             {
03184                 int (*ref2frm)[64] = h->ref2frm[ h->slice_num&(MAX_SLICES-1) ][0] + (MB_MBAFF ? 20 : 2);
03185                 AV_WN32A(&h->ref_cache[list][scan8[ 0]], (pack16to32(ref2frm[list][ref[0]],ref2frm[list][ref[1]])&0x00FF00FF)*0x0101);
03186                 AV_WN32A(&h->ref_cache[list][scan8[ 2]], (pack16to32(ref2frm[list][ref[0]],ref2frm[list][ref[1]])&0x00FF00FF)*0x0101);
03187                 ref += 2;
03188                 AV_WN32A(&h->ref_cache[list][scan8[ 8]], (pack16to32(ref2frm[list][ref[0]],ref2frm[list][ref[1]])&0x00FF00FF)*0x0101);
03189                 AV_WN32A(&h->ref_cache[list][scan8[10]], (pack16to32(ref2frm[list][ref[0]],ref2frm[list][ref[1]])&0x00FF00FF)*0x0101);
03190             }
03191 
03192             b_stride = h->b_stride;
03193             mv_dst   = &h->mv_cache[list][scan8[0]];
03194             mv_src   = &s->current_picture.motion_val[list][4*s->mb_x + 4*s->mb_y*b_stride];
03195             for(y=0; y<4; y++){
03196                 AV_COPY128(mv_dst + 8*y, mv_src + y*b_stride);
03197             }
03198 
03199         }
03200     }
03201 
03202 
03203 /*
03204 0 . T T. T T T T
03205 1 L . .L . . . .
03206 2 L . .L . . . .
03207 3 . T TL . . . .
03208 4 L . .L . . . .
03209 5 L . .. . . . .
03210 */
03211 //FIXME constraint_intra_pred & partitioning & nnz (let us hope this is just a typo in the spec)
03212     if(top_type){
03213         AV_COPY32(&h->non_zero_count_cache[4+8*0], &h->non_zero_count[top_xy][3*4]);
03214     }
03215 
03216     if(left_type[0]){
03217         h->non_zero_count_cache[3+8*1]= h->non_zero_count[left_xy[0]][3+0*4];
03218         h->non_zero_count_cache[3+8*2]= h->non_zero_count[left_xy[0]][3+1*4];
03219         h->non_zero_count_cache[3+8*3]= h->non_zero_count[left_xy[0]][3+2*4];
03220         h->non_zero_count_cache[3+8*4]= h->non_zero_count[left_xy[0]][3+3*4];
03221     }
03222 
03223     // CAVLC 8x8dct requires NNZ values for residual decoding that differ from what the loop filter needs
03224     if(!CABAC && h->pps.transform_8x8_mode){
03225         if(IS_8x8DCT(top_type)){
03226             h->non_zero_count_cache[4+8*0]=
03227             h->non_zero_count_cache[5+8*0]= (h->cbp_table[top_xy] & 0x4000) >> 12;
03228             h->non_zero_count_cache[6+8*0]=
03229             h->non_zero_count_cache[7+8*0]= (h->cbp_table[top_xy] & 0x8000) >> 12;
03230         }
03231         if(IS_8x8DCT(left_type[0])){
03232             h->non_zero_count_cache[3+8*1]=
03233             h->non_zero_count_cache[3+8*2]= (h->cbp_table[left_xy[0]]&0x2000) >> 12; //FIXME check MBAFF
03234         }
03235         if(IS_8x8DCT(left_type[1])){
03236             h->non_zero_count_cache[3+8*3]=
03237             h->non_zero_count_cache[3+8*4]= (h->cbp_table[left_xy[1]]&0x8000) >> 12; //FIXME check MBAFF
03238         }
03239 
03240         if(IS_8x8DCT(mb_type)){
03241             h->non_zero_count_cache[scan8[0   ]]= h->non_zero_count_cache[scan8[1   ]]=
03242             h->non_zero_count_cache[scan8[2   ]]= h->non_zero_count_cache[scan8[3   ]]= (h->cbp & 0x1000) >> 12;
03243 
03244             h->non_zero_count_cache[scan8[0+ 4]]= h->non_zero_count_cache[scan8[1+ 4]]=
03245             h->non_zero_count_cache[scan8[2+ 4]]= h->non_zero_count_cache[scan8[3+ 4]]= (h->cbp & 0x2000) >> 12;
03246 
03247             h->non_zero_count_cache[scan8[0+ 8]]= h->non_zero_count_cache[scan8[1+ 8]]=
03248             h->non_zero_count_cache[scan8[2+ 8]]= h->non_zero_count_cache[scan8[3+ 8]]= (h->cbp & 0x4000) >> 12;
03249 
03250             h->non_zero_count_cache[scan8[0+12]]= h->non_zero_count_cache[scan8[1+12]]=
03251             h->non_zero_count_cache[scan8[2+12]]= h->non_zero_count_cache[scan8[3+12]]= (h->cbp & 0x8000) >> 12;
03252         }
03253     }
03254 
03255     if(IS_INTER(mb_type) || IS_DIRECT(mb_type)){
03256         int list;
03257         for(list=0; list<h->list_count; list++){
03258             if(USES_LIST(top_type, list)){
03259                 const int b_xy= h->mb2b_xy[top_xy] + 3*h->b_stride;
03260                 const int b8_xy= 4*top_xy + 2;
03261                 int (*ref2frm)[64] = h->ref2frm[ h->slice_table[top_xy]&(MAX_SLICES-1) ][0] + (MB_MBAFF ? 20 : 2);
03262                 AV_COPY128(h->mv_cache[list][scan8[0] + 0 - 1*8], s->current_picture.motion_val[list][b_xy + 0]);
03263                 h->ref_cache[list][scan8[0] + 0 - 1*8]=
03264                 h->ref_cache[list][scan8[0] + 1 - 1*8]= ref2frm[list][s->current_picture.ref_index[list][b8_xy + 0]];
03265                 h->ref_cache[list][scan8[0] + 2 - 1*8]=
03266                 h->ref_cache[list][scan8[0] + 3 - 1*8]= ref2frm[list][s->current_picture.ref_index[list][b8_xy + 1]];
03267             }else{
03268                 AV_ZERO128(h->mv_cache[list][scan8[0] + 0 - 1*8]);
03269                 AV_WN32A(&h->ref_cache[list][scan8[0] + 0 - 1*8], ((LIST_NOT_USED)&0xFF)*0x01010101u);
03270             }
03271 
03272             if(!IS_INTERLACED(mb_type^left_type[0])){
03273                 if(USES_LIST(left_type[0], list)){
03274                     const int b_xy= h->mb2b_xy[left_xy[0]] + 3;
03275                     const int b8_xy= 4*left_xy[0] + 1;
03276                     int (*ref2frm)[64] = h->ref2frm[ h->slice_table[left_xy[0]]&(MAX_SLICES-1) ][0] + (MB_MBAFF ? 20 : 2);
03277                     AV_COPY32(h->mv_cache[list][scan8[0] - 1 + 0 ], s->current_picture.motion_val[list][b_xy + h->b_stride*0]);
03278                     AV_COPY32(h->mv_cache[list][scan8[0] - 1 + 8 ], s->current_picture.motion_val[list][b_xy + h->b_stride*1]);
03279                     AV_COPY32(h->mv_cache[list][scan8[0] - 1 +16 ], s->current_picture.motion_val[list][b_xy + h->b_stride*2]);
03280                     AV_COPY32(h->mv_cache[list][scan8[0] - 1 +24 ], s->current_picture.motion_val[list][b_xy + h->b_stride*3]);
03281                     h->ref_cache[list][scan8[0] - 1 + 0 ]=
03282                     h->ref_cache[list][scan8[0] - 1 + 8 ]= ref2frm[list][s->current_picture.ref_index[list][b8_xy + 2*0]];
03283                     h->ref_cache[list][scan8[0] - 1 +16 ]=
03284                     h->ref_cache[list][scan8[0] - 1 +24 ]= ref2frm[list][s->current_picture.ref_index[list][b8_xy + 2*1]];
03285                 }else{
03286                     AV_ZERO32(h->mv_cache [list][scan8[0] - 1 + 0 ]);
03287                     AV_ZERO32(h->mv_cache [list][scan8[0] - 1 + 8 ]);
03288                     AV_ZERO32(h->mv_cache [list][scan8[0] - 1 +16 ]);
03289                     AV_ZERO32(h->mv_cache [list][scan8[0] - 1 +24 ]);
03290                     h->ref_cache[list][scan8[0] - 1 + 0  ]=
03291                     h->ref_cache[list][scan8[0] - 1 + 8  ]=
03292                     h->ref_cache[list][scan8[0] - 1 + 16 ]=
03293                     h->ref_cache[list][scan8[0] - 1 + 24 ]= LIST_NOT_USED;
03294                 }
03295             }
03296         }
03297     }
03298 
03299     return 0;
03300 }
03301 
03302 static void loop_filter(H264Context *h, int start_x, int end_x){
03303     MpegEncContext * const s = &h->s;
03304     uint8_t  *dest_y, *dest_cb, *dest_cr;
03305     int linesize, uvlinesize, mb_x, mb_y;
03306     const int end_mb_y= s->mb_y + FRAME_MBAFF;
03307     const int old_slice_type= h->slice_type;
03308     const int pixel_shift = h->pixel_shift;
03309 
03310     if(h->deblocking_filter) {
03311         for(mb_x= start_x; mb_x<end_x; mb_x++){
03312             for(mb_y=end_mb_y - FRAME_MBAFF; mb_y<= end_mb_y; mb_y++){
03313                 int mb_xy, mb_type;
03314                 mb_xy = h->mb_xy = mb_x + mb_y*s->mb_stride;
03315                 h->slice_num= h->slice_table[mb_xy];
03316                 mb_type= s->current_picture.mb_type[mb_xy];
03317                 h->list_count= h->list_counts[mb_xy];
03318 
03319                 if(FRAME_MBAFF)
03320                     h->mb_mbaff = h->mb_field_decoding_flag = !!IS_INTERLACED(mb_type);
03321 
03322                 s->mb_x= mb_x;
03323                 s->mb_y= mb_y;
03324                 dest_y  = s->current_picture.data[0] + ((mb_x << pixel_shift) + mb_y * s->linesize  ) * 16;
03325                 dest_cb = s->current_picture.data[1] + ((mb_x << pixel_shift) + mb_y * s->uvlinesize) * (8 << CHROMA444);
03326                 dest_cr = s->current_picture.data[2] + ((mb_x << pixel_shift) + mb_y * s->uvlinesize) * (8 << CHROMA444);
03327                     //FIXME simplify above
03328 
03329                 if (MB_FIELD) {
03330                     linesize   = h->mb_linesize   = s->linesize * 2;
03331                     uvlinesize = h->mb_uvlinesize = s->uvlinesize * 2;
03332                     if(mb_y&1){ //FIXME move out of this function?
03333                         dest_y -= s->linesize*15;
03334                         dest_cb-= s->uvlinesize*((8 << CHROMA444)-1);
03335                         dest_cr-= s->uvlinesize*((8 << CHROMA444)-1);
03336                     }
03337                 } else {
03338                     linesize   = h->mb_linesize   = s->linesize;
03339                     uvlinesize = h->mb_uvlinesize = s->uvlinesize;
03340                 }
03341                 backup_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, CHROMA444, 0);
03342                 if(fill_filter_caches(h, mb_type))
03343                     continue;
03344                 h->chroma_qp[0] = get_chroma_qp(h, 0, s->current_picture.qscale_table[mb_xy]);
03345                 h->chroma_qp[1] = get_chroma_qp(h, 1, s->current_picture.qscale_table[mb_xy]);
03346 
03347                 if (FRAME_MBAFF) {
03348                     ff_h264_filter_mb     (h, mb_x, mb_y, dest_y, dest_cb, dest_cr, linesize, uvlinesize);
03349                 } else {
03350                     ff_h264_filter_mb_fast(h, mb_x, mb_y, dest_y, dest_cb, dest_cr, linesize, uvlinesize);
03351                 }
03352             }
03353         }
03354     }
03355     h->slice_type= old_slice_type;
03356     s->mb_x= end_x;
03357     s->mb_y= end_mb_y - FRAME_MBAFF;
03358     h->chroma_qp[0] = get_chroma_qp(h, 0, s->qscale);
03359     h->chroma_qp[1] = get_chroma_qp(h, 1, s->qscale);
03360 }
03361 
03362 static void predict_field_decoding_flag(H264Context *h){
03363     MpegEncContext * const s = &h->s;
03364     const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
03365     int mb_type = (h->slice_table[mb_xy-1] == h->slice_num)
03366                 ? s->current_picture.mb_type[mb_xy-1]
03367                 : (h->slice_table[mb_xy-s->mb_stride] == h->slice_num)
03368                 ? s->current_picture.mb_type[mb_xy-s->mb_stride]
03369                 : 0;
03370     h->mb_mbaff = h->mb_field_decoding_flag = IS_INTERLACED(mb_type) ? 1 : 0;
03371 }
03372 
03376 static void decode_finish_row(H264Context *h){
03377     MpegEncContext * const s = &h->s;
03378     int top = 16*(s->mb_y >> FIELD_PICTURE);
03379     int height = 16 << FRAME_MBAFF;
03380     int deblock_border = (16 + 4) << FRAME_MBAFF;
03381     int pic_height = 16*s->mb_height >> FIELD_PICTURE;
03382 
03383     if (h->deblocking_filter) {
03384         if((top + height) >= pic_height)
03385             height += deblock_border;
03386 
03387         top -= deblock_border;
03388     }
03389 
03390     if (top >= pic_height || (top + height) < h->emu_edge_height)
03391         return;
03392 
03393     height = FFMIN(height, pic_height - top);
03394     if (top < h->emu_edge_height) {
03395         height = top+height;
03396         top = 0;
03397     }
03398 
03399     ff_draw_horiz_band(s, top, height);
03400 
03401     if (s->dropable) return;
03402 
03403     ff_thread_report_progress((AVFrame*)s->current_picture_ptr, top + height - 1,
03404                              s->picture_structure==PICT_BOTTOM_FIELD);
03405 }
03406 
03407 static int decode_slice(struct AVCodecContext *avctx, void *arg){
03408     H264Context *h = *(void**)arg;
03409     MpegEncContext * const s = &h->s;
03410     const int part_mask= s->partitioned_frame ? (AC_END|AC_ERROR) : 0x7F;
03411     int lf_x_start = s->mb_x;
03412 
03413     s->mb_skip_run= -1;
03414 
03415     h->is_complex = FRAME_MBAFF || s->picture_structure != PICT_FRAME || s->codec_id != CODEC_ID_H264 ||
03416                     (CONFIG_GRAY && (s->flags&CODEC_FLAG_GRAY));
03417 
03418     if( h->pps.cabac ) {
03419         /* realign */
03420         align_get_bits( &s->gb );
03421 
03422         /* init cabac */
03423         ff_init_cabac_states( &h->cabac);
03424         ff_init_cabac_decoder( &h->cabac,
03425                                s->gb.buffer + get_bits_count(&s->gb)/8,
03426                                (get_bits_left(&s->gb) + 7)/8);
03427 
03428         ff_h264_init_cabac_states(h);
03429 
03430         for(;;){
03431 //START_TIMER
03432             int ret = ff_h264_decode_mb_cabac(h);
03433             int eos;
03434 //STOP_TIMER("decode_mb_cabac")
03435 
03436             if(ret>=0) ff_h264_hl_decode_mb(h);
03437 
03438             if( ret >= 0 && FRAME_MBAFF ) { //FIXME optimal? or let mb_decode decode 16x32 ?
03439                 s->mb_y++;
03440 
03441                 ret = ff_h264_decode_mb_cabac(h);
03442 
03443                 if(ret>=0) ff_h264_hl_decode_mb(h);
03444                 s->mb_y--;
03445             }
03446             eos = get_cabac_terminate( &h->cabac );
03447 
03448             if((s->workaround_bugs & FF_BUG_TRUNCATED) && h->cabac.bytestream > h->cabac.bytestream_end + 2){
03449                 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
03450                 if (s->mb_x >= lf_x_start) loop_filter(h, lf_x_start, s->mb_x + 1);
03451                 return 0;
03452             }
03453             if( ret < 0 || h->cabac.bytestream > h->cabac.bytestream_end + 2) {
03454                 av_log(h->s.avctx, AV_LOG_ERROR, "error while decoding MB %d %d, bytestream (%td)\n", s->mb_x, s->mb_y, h->cabac.bytestream_end - h->cabac.bytestream);
03455                 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
03456                 return -1;
03457             }
03458 
03459             if( ++s->mb_x >= s->mb_width ) {
03460                 loop_filter(h, lf_x_start, s->mb_x);
03461                 s->mb_x = lf_x_start = 0;
03462                 decode_finish_row(h);
03463                 ++s->mb_y;
03464                 if(FIELD_OR_MBAFF_PICTURE) {
03465                     ++s->mb_y;
03466                     if(FRAME_MBAFF && s->mb_y < s->mb_height)
03467                         predict_field_decoding_flag(h);
03468                 }
03469             }
03470 
03471             if( eos || s->mb_y >= s->mb_height ) {
03472                 tprintf(s->avctx, "slice end %d %d\n", get_bits_count(&s->gb), s->gb.size_in_bits);
03473                 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
03474                 if (s->mb_x > lf_x_start) loop_filter(h, lf_x_start, s->mb_x);
03475                 return 0;
03476             }
03477         }
03478 
03479     } else {
03480         for(;;){
03481             int ret = ff_h264_decode_mb_cavlc(h);
03482 
03483             if(ret>=0) ff_h264_hl_decode_mb(h);
03484 
03485             if(ret>=0 && FRAME_MBAFF){ //FIXME optimal? or let mb_decode decode 16x32 ?
03486                 s->mb_y++;
03487                 ret = ff_h264_decode_mb_cavlc(h);
03488 
03489                 if(ret>=0) ff_h264_hl_decode_mb(h);
03490                 s->mb_y--;
03491             }
03492 
03493             if(ret<0){
03494                 av_log(h->s.avctx, AV_LOG_ERROR, "error while decoding MB %d %d\n", s->mb_x, s->mb_y);
03495                 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
03496                 return -1;
03497             }
03498 
03499             if(++s->mb_x >= s->mb_width){
03500                 loop_filter(h, lf_x_start, s->mb_x);
03501                 s->mb_x = lf_x_start = 0;
03502                 decode_finish_row(h);
03503                 ++s->mb_y;
03504                 if(FIELD_OR_MBAFF_PICTURE) {
03505                     ++s->mb_y;
03506                     if(FRAME_MBAFF && s->mb_y < s->mb_height)
03507                         predict_field_decoding_flag(h);
03508                 }
03509                 if(s->mb_y >= s->mb_height){
03510                     tprintf(s->avctx, "slice end %d %d\n", get_bits_count(&s->gb), s->gb.size_in_bits);
03511 
03512                     if(get_bits_count(&s->gb) == s->gb.size_in_bits ) {
03513                         ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
03514 
03515                         return 0;
03516                     }else{
03517                         ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
03518 
03519                         return -1;
03520                     }
03521                 }
03522             }
03523 
03524             if(get_bits_count(&s->gb) >= s->gb.size_in_bits && s->mb_skip_run<=0){
03525                 tprintf(s->avctx, "slice end %d %d\n", get_bits_count(&s->gb), s->gb.size_in_bits);
03526                 if(get_bits_count(&s->gb) == s->gb.size_in_bits ){
03527                     ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
03528                     if (s->mb_x > lf_x_start) loop_filter(h, lf_x_start, s->mb_x);
03529 
03530                     return 0;
03531                 }else{
03532                     ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
03533 
03534                     return -1;
03535                 }
03536             }
03537         }
03538     }
03539 
03540 #if 0
03541     for(;s->mb_y < s->mb_height; s->mb_y++){
03542         for(;s->mb_x < s->mb_width; s->mb_x++){
03543             int ret= decode_mb(h);
03544 
03545             ff_h264_hl_decode_mb(h);
03546 
03547             if(ret<0){
03548                 av_log(s->avctx, AV_LOG_ERROR, "error while decoding MB %d %d\n", s->mb_x, s->mb_y);
03549                 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
03550 
03551                 return -1;
03552             }
03553 
03554             if(++s->mb_x >= s->mb_width){
03555                 s->mb_x=0;
03556                 if(++s->mb_y >= s->mb_height){
03557                     if(get_bits_count(s->gb) == s->gb.size_in_bits){
03558                         ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
03559 
03560                         return 0;
03561                     }else{
03562                         ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
03563 
03564                         return -1;
03565                     }
03566                 }
03567             }
03568 
03569             if(get_bits_count(s->?gb) >= s->gb?.size_in_bits){
03570                 if(get_bits_count(s->gb) == s->gb.size_in_bits){
03571                     ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
03572 
03573                     return 0;
03574                 }else{
03575                     ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
03576 
03577                     return -1;
03578                 }
03579             }
03580         }
03581         s->mb_x=0;
03582         ff_draw_horiz_band(s, 16*s->mb_y, 16);
03583     }
03584 #endif
03585     return -1; //not reached
03586 }
03587 
03594 static void execute_decode_slices(H264Context *h, int context_count){
03595     MpegEncContext * const s = &h->s;
03596     AVCodecContext * const avctx= s->avctx;
03597     H264Context *hx;
03598     int i;
03599 
03600     if (s->avctx->hwaccel)
03601         return;
03602     if(s->avctx->codec->capabilities&CODEC_CAP_HWACCEL_VDPAU)
03603         return;
03604     if(context_count == 1) {
03605         decode_slice(avctx, &h);
03606     } else {
03607         for(i = 1; i < context_count; i++) {
03608             hx = h->thread_context[i];
03609             hx->s.error_recognition = avctx->error_recognition;
03610             hx->s.error_count = 0;
03611         }
03612 
03613         avctx->execute(avctx, (void *)decode_slice,
03614                        h->thread_context, NULL, context_count, sizeof(void*));
03615 
03616         /* pull back stuff from slices to master context */
03617         hx = h->thread_context[context_count - 1];
03618         s->mb_x = hx->s.mb_x;
03619         s->mb_y = hx->s.mb_y;
03620         s->dropable = hx->s.dropable;
03621         s->picture_structure = hx->s.picture_structure;
03622         for(i = 1; i < context_count; i++)
03623             h->s.error_count += h->thread_context[i]->s.error_count;
03624     }
03625 }
03626 
03627 
03628 static int decode_nal_units(H264Context *h, const uint8_t *buf, int buf_size){
03629     MpegEncContext * const s = &h->s;
03630     AVCodecContext * const avctx= s->avctx;
03631     H264Context *hx; 
03632     int buf_index;
03633     int context_count;
03634     int next_avc;
03635     int pass = !(avctx->active_thread_type & FF_THREAD_FRAME);
03636     int nals_needed=0; 
03637     int nal_index;
03638 
03639     h->max_contexts = (HAVE_THREADS && (s->avctx->active_thread_type&FF_THREAD_SLICE)) ? avctx->thread_count : 1;
03640     if(!(s->flags2 & CODEC_FLAG2_CHUNKS)){
03641         h->current_slice = 0;
03642         if (!s->first_field)
03643             s->current_picture_ptr= NULL;
03644         ff_h264_reset_sei(h);
03645     }
03646 
03647     for(;pass <= 1;pass++){
03648         buf_index = 0;
03649         context_count = 0;
03650         next_avc = h->is_avc ? 0 : buf_size;
03651         nal_index = 0;
03652     for(;;){
03653         int consumed;
03654         int dst_length;
03655         int bit_length;
03656         const uint8_t *ptr;
03657         int i, nalsize = 0;
03658         int err;
03659 
03660         if(buf_index >= next_avc) {
03661             if(buf_index >= buf_size) break;
03662             nalsize = 0;
03663             for(i = 0; i < h->nal_length_size; i++)
03664                 nalsize = (nalsize << 8) | buf[buf_index++];
03665             if(nalsize <= 0 || nalsize > buf_size - buf_index){
03666                 av_log(h->s.avctx, AV_LOG_ERROR, "AVC: nal size %d\n", nalsize);
03667                 break;
03668             }
03669             next_avc= buf_index + nalsize;
03670         } else {
03671             // start code prefix search
03672             for(; buf_index + 3 < next_avc; buf_index++){
03673                 // This should always succeed in the first iteration.
03674                 if(buf[buf_index] == 0 && buf[buf_index+1] == 0 && buf[buf_index+2] == 1)
03675                     break;
03676             }
03677 
03678             if(buf_index+3 >= buf_size) break;
03679 
03680             buf_index+=3;
03681             if(buf_index >= next_avc) continue;
03682         }
03683 
03684         hx = h->thread_context[context_count];
03685 
03686         ptr= ff_h264_decode_nal(hx, buf + buf_index, &dst_length, &consumed, next_avc - buf_index);
03687         if (ptr==NULL || dst_length < 0){
03688             return -1;
03689         }
03690         i= buf_index + consumed;
03691         if((s->workaround_bugs & FF_BUG_AUTODETECT) && i+3<next_avc &&
03692            buf[i]==0x00 && buf[i+1]==0x00 && buf[i+2]==0x01 && buf[i+3]==0xE0)
03693             s->workaround_bugs |= FF_BUG_TRUNCATED;
03694 
03695         if(!(s->workaround_bugs & FF_BUG_TRUNCATED)){
03696         while(ptr[dst_length - 1] == 0 && dst_length > 0)
03697             dst_length--;
03698         }
03699         bit_length= !dst_length ? 0 : (8*dst_length - ff_h264_decode_rbsp_trailing(h, ptr + dst_length - 1));
03700 
03701         if(s->avctx->debug&FF_DEBUG_STARTCODE){
03702             av_log(h->s.avctx, AV_LOG_DEBUG, "NAL %d at %d/%d length %d\n", hx->nal_unit_type, buf_index, buf_size, dst_length);
03703         }
03704 
03705         if (h->is_avc && (nalsize != consumed) && nalsize){
03706             av_log(h->s.avctx, AV_LOG_DEBUG, "AVC: Consumed only %d bytes instead of %d\n", consumed, nalsize);
03707         }
03708 
03709         buf_index += consumed;
03710         nal_index++;
03711 
03712         if(pass == 0) {
03713             // packets can sometimes contain multiple PPS/SPS
03714             // e.g. two PAFF field pictures in one packet, or a demuxer which splits NALs strangely
03715             // if so, when frame threading we can't start the next thread until we've read all of them
03716             switch (hx->nal_unit_type) {
03717                 case NAL_SPS:
03718                 case NAL_PPS:
03719                 case NAL_IDR_SLICE:
03720                 case NAL_SLICE:
03721                     nals_needed = nal_index;
03722             }
03723             continue;
03724         }
03725 
03726         //FIXME do not discard SEI id
03727         if(avctx->skip_frame >= AVDISCARD_NONREF && h->nal_ref_idc  == 0)
03728             continue;
03729 
03730       again:
03731         err = 0;
03732         switch(hx->nal_unit_type){
03733         case NAL_IDR_SLICE:
03734             if (h->nal_unit_type != NAL_IDR_SLICE) {
03735                 av_log(h->s.avctx, AV_LOG_ERROR, "Invalid mix of idr and non-idr slices");
03736                 return -1;
03737             }
03738             idr(h); //FIXME ensure we don't loose some frames if there is reordering
03739         case NAL_SLICE:
03740             init_get_bits(&hx->s.gb, ptr, bit_length);
03741             hx->intra_gb_ptr=
03742             hx->inter_gb_ptr= &hx->s.gb;
03743             hx->s.data_partitioning = 0;
03744 
03745             if((err = decode_slice_header(hx, h)))
03746                break;
03747 
03748             s->current_picture_ptr->key_frame |=
03749                     (hx->nal_unit_type == NAL_IDR_SLICE) ||
03750                     (h->sei_recovery_frame_cnt >= 0);
03751 
03752             if (h->current_slice == 1) {
03753                 if(!(s->flags2 & CODEC_FLAG2_CHUNKS)) {
03754                     decode_postinit(h, nal_index >= nals_needed);
03755                 }
03756 
03757                 if (s->avctx->hwaccel && s->avctx->hwaccel->start_frame(s->avctx, NULL, 0) < 0)
03758                     return -1;
03759                 if(CONFIG_H264_VDPAU_DECODER && s->avctx->codec->capabilities&CODEC_CAP_HWACCEL_VDPAU)
03760                     ff_vdpau_h264_picture_start(s);
03761             }
03762 
03763             if(hx->redundant_pic_count==0
03764                && (avctx->skip_frame < AVDISCARD_NONREF || hx->nal_ref_idc)
03765                && (avctx->skip_frame < AVDISCARD_BIDIR  || hx->slice_type_nos!=AV_PICTURE_TYPE_B)
03766                && (avctx->skip_frame < AVDISCARD_NONKEY || hx->slice_type_nos==AV_PICTURE_TYPE_I)
03767                && avctx->skip_frame < AVDISCARD_ALL){
03768                 if(avctx->hwaccel) {
03769                     if (avctx->hwaccel->decode_slice(avctx, &buf[buf_index - consumed], consumed) < 0)
03770                         return -1;
03771                 }else
03772                 if(CONFIG_H264_VDPAU_DECODER && s->avctx->codec->capabilities&CODEC_CAP_HWACCEL_VDPAU){
03773                     static const uint8_t start_code[] = {0x00, 0x00, 0x01};
03774                     ff_vdpau_add_data_chunk(s, start_code, sizeof(start_code));
03775                     ff_vdpau_add_data_chunk(s, &buf[buf_index - consumed], consumed );
03776                 }else
03777                     context_count++;
03778             }
03779             break;
03780         case NAL_DPA:
03781             init_get_bits(&hx->s.gb, ptr, bit_length);
03782             hx->intra_gb_ptr=
03783             hx->inter_gb_ptr= NULL;
03784 
03785             if ((err = decode_slice_header(hx, h)) < 0)
03786                 break;
03787 
03788             hx->s.data_partitioning = 1;
03789 
03790             break;
03791         case NAL_DPB:
03792             init_get_bits(&hx->intra_gb, ptr, bit_length);
03793             hx->intra_gb_ptr= &hx->intra_gb;
03794             break;
03795         case NAL_DPC:
03796             init_get_bits(&hx->inter_gb, ptr, bit_length);
03797             hx->inter_gb_ptr= &hx->inter_gb;
03798 
03799             if(hx->redundant_pic_count==0 && hx->intra_gb_ptr && hx->s.data_partitioning
03800                && s->context_initialized
03801                && (avctx->skip_frame < AVDISCARD_NONREF || hx->nal_ref_idc)
03802                && (avctx->skip_frame < AVDISCARD_BIDIR  || hx->slice_type_nos!=AV_PICTURE_TYPE_B)
03803                && (avctx->skip_frame < AVDISCARD_NONKEY || hx->slice_type_nos==AV_PICTURE_TYPE_I)
03804                && avctx->skip_frame < AVDISCARD_ALL)
03805                 context_count++;
03806             break;
03807         case NAL_SEI:
03808             init_get_bits(&s->gb, ptr, bit_length);
03809             ff_h264_decode_sei(h);
03810             break;
03811         case NAL_SPS:
03812             init_get_bits(&s->gb, ptr, bit_length);
03813             ff_h264_decode_seq_parameter_set(h);
03814 
03815             if (s->flags& CODEC_FLAG_LOW_DELAY ||
03816                 (h->sps.bitstream_restriction_flag && !h->sps.num_reorder_frames))
03817                 s->low_delay=1;
03818 
03819             if(avctx->has_b_frames < 2)
03820                 avctx->has_b_frames= !s->low_delay;
03821 
03822             if (avctx->bits_per_raw_sample != h->sps.bit_depth_luma) {
03823                 if (h->sps.bit_depth_luma >= 8 && h->sps.bit_depth_luma <= 10) {
03824                     avctx->bits_per_raw_sample = h->sps.bit_depth_luma;
03825                     h->pixel_shift = h->sps.bit_depth_luma > 8;
03826 
03827                     ff_h264dsp_init(&h->h264dsp, h->sps.bit_depth_luma);
03828                     ff_h264_pred_init(&h->hpc, s->codec_id, h->sps.bit_depth_luma);
03829                     dsputil_init(&s->dsp, s->avctx);
03830                 } else {
03831                     av_log(avctx, AV_LOG_DEBUG, "Unsupported bit depth: %d\n", h->sps.bit_depth_luma);
03832                     return -1;
03833                 }
03834             }
03835             break;
03836         case NAL_PPS:
03837             init_get_bits(&s->gb, ptr, bit_length);
03838 
03839             ff_h264_decode_picture_parameter_set(h, bit_length);
03840 
03841             break;
03842         case NAL_AUD:
03843         case NAL_END_SEQUENCE:
03844         case NAL_END_STREAM:
03845         case NAL_FILLER_DATA:
03846         case NAL_SPS_EXT:
03847         case NAL_AUXILIARY_SLICE:
03848             break;
03849         default:
03850             av_log(avctx, AV_LOG_DEBUG, "Unknown NAL code: %d (%d bits)\n", hx->nal_unit_type, bit_length);
03851         }
03852 
03853         if(context_count == h->max_contexts) {
03854             execute_decode_slices(h, context_count);
03855             context_count = 0;
03856         }
03857 
03858         if (err < 0)
03859             av_log(h->s.avctx, AV_LOG_ERROR, "decode_slice_header error\n");
03860         else if(err == 1) {
03861             /* Slice could not be decoded in parallel mode, copy down
03862              * NAL unit stuff to context 0 and restart. Note that
03863              * rbsp_buffer is not transferred, but since we no longer
03864              * run in parallel mode this should not be an issue. */
03865             h->nal_unit_type = hx->nal_unit_type;
03866             h->nal_ref_idc   = hx->nal_ref_idc;
03867             hx = h;
03868             goto again;
03869         }
03870     }
03871     }
03872     if(context_count)
03873         execute_decode_slices(h, context_count);
03874     return buf_index;
03875 }
03876 
03880 static int get_consumed_bytes(MpegEncContext *s, int pos, int buf_size){
03881         if(pos==0) pos=1; //avoid infinite loops (i doubt that is needed but ...)
03882         if(pos+10>buf_size) pos=buf_size; // oops ;)
03883 
03884         return pos;
03885 }
03886 
03887 static int decode_frame(AVCodecContext *avctx,
03888                              void *data, int *data_size,
03889                              AVPacket *avpkt)
03890 {
03891     const uint8_t *buf = avpkt->data;
03892     int buf_size = avpkt->size;
03893     H264Context *h = avctx->priv_data;
03894     MpegEncContext *s = &h->s;
03895     AVFrame *pict = data;
03896     int buf_index;
03897 
03898     s->flags= avctx->flags;
03899     s->flags2= avctx->flags2;
03900 
03901    /* end of stream, output what is still in the buffers */
03902  out:
03903     if (buf_size == 0) {
03904         Picture *out;
03905         int i, out_idx;
03906 
03907         s->current_picture_ptr = NULL;
03908 
03909 //FIXME factorize this with the output code below
03910         out = h->delayed_pic[0];
03911         out_idx = 0;
03912         for(i=1; h->delayed_pic[i] && !h->delayed_pic[i]->key_frame && !h->delayed_pic[i]->mmco_reset; i++)
03913             if(h->delayed_pic[i]->poc < out->poc){
03914                 out = h->delayed_pic[i];
03915                 out_idx = i;
03916             }
03917 
03918         for(i=out_idx; h->delayed_pic[i]; i++)
03919             h->delayed_pic[i] = h->delayed_pic[i+1];
03920 
03921         if(out){
03922             *data_size = sizeof(AVFrame);
03923             *pict= *(AVFrame*)out;
03924         }
03925 
03926         return 0;
03927     }
03928 
03929     buf_index=decode_nal_units(h, buf, buf_size);
03930     if(buf_index < 0)
03931         return -1;
03932 
03933     if (!s->current_picture_ptr && h->nal_unit_type == NAL_END_SEQUENCE) {
03934         buf_size = 0;
03935         goto out;
03936     }
03937 
03938     if(!(s->flags2 & CODEC_FLAG2_CHUNKS) && !s->current_picture_ptr){
03939         if (avctx->skip_frame >= AVDISCARD_NONREF)
03940             return 0;
03941         av_log(avctx, AV_LOG_ERROR, "no frame!\n");
03942         return -1;
03943     }
03944 
03945     if(!(s->flags2 & CODEC_FLAG2_CHUNKS) || (s->mb_y >= s->mb_height && s->mb_height)){
03946 
03947         if(s->flags2 & CODEC_FLAG2_CHUNKS) decode_postinit(h, 1);
03948 
03949         field_end(h, 0);
03950 
03951         if (!h->next_output_pic) {
03952             /* Wait for second field. */
03953             *data_size = 0;
03954 
03955         } else {
03956             *data_size = sizeof(AVFrame);
03957             *pict = *(AVFrame*)h->next_output_pic;
03958         }
03959     }
03960 
03961     assert(pict->data[0] || !*data_size);
03962     ff_print_debug_info(s, pict);
03963 //printf("out %d\n", (int)pict->data[0]);
03964 
03965     return get_consumed_bytes(s, buf_index, buf_size);
03966 }
03967 #if 0
03968 static inline void fill_mb_avail(H264Context *h){
03969     MpegEncContext * const s = &h->s;
03970     const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
03971 
03972     if(s->mb_y){
03973         h->mb_avail[0]= s->mb_x                 && h->slice_table[mb_xy - s->mb_stride - 1] == h->slice_num;
03974         h->mb_avail[1]=                            h->slice_table[mb_xy - s->mb_stride    ] == h->slice_num;
03975         h->mb_avail[2]= s->mb_x+1 < s->mb_width && h->slice_table[mb_xy - s->mb_stride + 1] == h->slice_num;
03976     }else{
03977         h->mb_avail[0]=
03978         h->mb_avail[1]=
03979         h->mb_avail[2]= 0;
03980     }
03981     h->mb_avail[3]= s->mb_x && h->slice_table[mb_xy - 1] == h->slice_num;
03982     h->mb_avail[4]= 1; //FIXME move out
03983     h->mb_avail[5]= 0; //FIXME move out
03984 }
03985 #endif
03986 
03987 #ifdef TEST
03988 #undef printf
03989 #undef random
03990 #define COUNT 8000
03991 #define SIZE (COUNT*40)
03992 int main(void){
03993     int i;
03994     uint8_t temp[SIZE];
03995     PutBitContext pb;
03996     GetBitContext gb;
03997 //    int int_temp[10000];
03998     DSPContext dsp;
03999     AVCodecContext avctx;
04000 
04001     dsputil_init(&dsp, &avctx);
04002 
04003     init_put_bits(&pb, temp, SIZE);
04004     printf("testing unsigned exp golomb\n");
04005     for(i=0; i<COUNT; i++){
04006         START_TIMER
04007         set_ue_golomb(&pb, i);
04008         STOP_TIMER("set_ue_golomb");
04009     }
04010     flush_put_bits(&pb);
04011 
04012     init_get_bits(&gb, temp, 8*SIZE);
04013     for(i=0; i<COUNT; i++){
04014         int j, s;
04015 
04016         s= show_bits(&gb, 24);
04017 
04018         START_TIMER
04019         j= get_ue_golomb(&gb);
04020         if(j != i){
04021             printf("mismatch! at %d (%d should be %d) bits:%6X\n", i, j, i, s);
04022 //            return -1;
04023         }
04024         STOP_TIMER("get_ue_golomb");
04025     }
04026 
04027 
04028     init_put_bits(&pb, temp, SIZE);
04029     printf("testing signed exp golomb\n");
04030     for(i=0; i<COUNT; i++){
04031         START_TIMER
04032         set_se_golomb(&pb, i - COUNT/2);
04033         STOP_TIMER("set_se_golomb");
04034     }
04035     flush_put_bits(&pb);
04036 
04037     init_get_bits(&gb, temp, 8*SIZE);
04038     for(i=0; i<COUNT; i++){
04039         int j, s;
04040 
04041         s= show_bits(&gb, 24);
04042 
04043         START_TIMER
04044         j= get_se_golomb(&gb);
04045         if(j != i - COUNT/2){
04046             printf("mismatch! at %d (%d should be %d) bits:%6X\n", i, j, i, s);
04047 //            return -1;
04048         }
04049         STOP_TIMER("get_se_golomb");
04050     }
04051 
04052 #if 0
04053     printf("testing 4x4 (I)DCT\n");
04054 
04055     DCTELEM block[16];
04056     uint8_t src[16], ref[16];
04057     uint64_t error= 0, max_error=0;
04058 
04059     for(i=0; i<COUNT; i++){
04060         int j;
04061 //        printf("%d %d %d\n", r1, r2, (r2-r1)*16);
04062         for(j=0; j<16; j++){
04063             ref[j]= random()%255;
04064             src[j]= random()%255;
04065         }
04066 
04067         h264_diff_dct_c(block, src, ref, 4);
04068 
04069         //normalize
04070         for(j=0; j<16; j++){
04071 //            printf("%d ", block[j]);
04072             block[j]= block[j]*4;
04073             if(j&1) block[j]= (block[j]*4 + 2)/5;
04074             if(j&4) block[j]= (block[j]*4 + 2)/5;
04075         }
04076 //        printf("\n");
04077 
04078         h->h264dsp.h264_idct_add(ref, block, 4);
04079 /*        for(j=0; j<16; j++){
04080             printf("%d ", ref[j]);
04081         }
04082         printf("\n");*/
04083 
04084         for(j=0; j<16; j++){
04085             int diff= FFABS(src[j] - ref[j]);
04086 
04087             error+= diff*diff;
04088             max_error= FFMAX(max_error, diff);
04089         }
04090     }
04091     printf("error=%f max_error=%d\n", ((float)error)/COUNT/16, (int)max_error );
04092     printf("testing quantizer\n");
04093     for(qp=0; qp<52; qp++){
04094         for(i=0; i<16; i++)
04095             src1_block[i]= src2_block[i]= random()%255;
04096 
04097     }
04098     printf("Testing NAL layer\n");
04099 
04100     uint8_t bitstream[COUNT];
04101     uint8_t nal[COUNT*2];
04102     H264Context h;
04103     memset(&h, 0, sizeof(H264Context));
04104 
04105     for(i=0; i<COUNT; i++){
04106         int zeros= i;
04107         int nal_length;
04108         int consumed;
04109         int out_length;
04110         uint8_t *out;
04111         int j;
04112 
04113         for(j=0; j<COUNT; j++){
04114             bitstream[j]= (random() % 255) + 1;
04115         }
04116 
04117         for(j=0; j<zeros; j++){
04118             int pos= random() % COUNT;
04119             while(bitstream[pos] == 0){
04120                 pos++;
04121                 pos %= COUNT;
04122             }
04123             bitstream[pos]=0;
04124         }
04125 
04126         START_TIMER
04127 
04128         nal_length= encode_nal(&h, nal, bitstream, COUNT, COUNT*2);
04129         if(nal_length<0){
04130             printf("encoding failed\n");
04131             return -1;
04132         }
04133 
04134         out= ff_h264_decode_nal(&h, nal, &out_length, &consumed, nal_length);
04135 
04136         STOP_TIMER("NAL")
04137 
04138         if(out_length != COUNT){
04139             printf("incorrect length %d %d\n", out_length, COUNT);
04140             return -1;
04141         }
04142 
04143         if(consumed != nal_length){
04144             printf("incorrect consumed length %d %d\n", nal_length, consumed);
04145             return -1;
04146         }
04147 
04148         if(memcmp(bitstream, out, COUNT)){
04149             printf("mismatch\n");
04150             return -1;
04151         }
04152     }
04153 #endif
04154 
04155     printf("Testing RBSP\n");
04156 
04157 
04158     return 0;
04159 }
04160 #endif /* TEST */
04161 
04162 
04163 av_cold void ff_h264_free_context(H264Context *h)
04164 {
04165     int i;
04166 
04167     free_tables(h, 1); //FIXME cleanup init stuff perhaps
04168 
04169     for(i = 0; i < MAX_SPS_COUNT; i++)
04170         av_freep(h->sps_buffers + i);
04171 
04172     for(i = 0; i < MAX_PPS_COUNT; i++)
04173         av_freep(h->pps_buffers + i);
04174 }
04175 
04176 av_cold int ff_h264_decode_end(AVCodecContext *avctx)
04177 {
04178     H264Context *h = avctx->priv_data;
04179     MpegEncContext *s = &h->s;
04180 
04181     ff_h264_free_context(h);
04182 
04183     MPV_common_end(s);
04184 
04185 //    memset(h, 0, sizeof(H264Context));
04186 
04187     return 0;
04188 }
04189 
04190 static const AVProfile profiles[] = {
04191     { FF_PROFILE_H264_BASELINE,             "Baseline"              },
04192     { FF_PROFILE_H264_CONSTRAINED_BASELINE, "Constrained Baseline"  },
04193     { FF_PROFILE_H264_MAIN,                 "Main"                  },
04194     { FF_PROFILE_H264_EXTENDED,             "Extended"              },
04195     { FF_PROFILE_H264_HIGH,                 "High"                  },
04196     { FF_PROFILE_H264_HIGH_10,              "High 10"               },
04197     { FF_PROFILE_H264_HIGH_10_INTRA,        "High 10 Intra"         },
04198     { FF_PROFILE_H264_HIGH_422,             "High 4:2:2"            },
04199     { FF_PROFILE_H264_HIGH_422_INTRA,       "High 4:2:2 Intra"      },
04200     { FF_PROFILE_H264_HIGH_444,             "High 4:4:4"            },
04201     { FF_PROFILE_H264_HIGH_444_PREDICTIVE,  "High 4:4:4 Predictive" },
04202     { FF_PROFILE_H264_HIGH_444_INTRA,       "High 4:4:4 Intra"      },
04203     { FF_PROFILE_H264_CAVLC_444,            "CAVLC 4:4:4"           },
04204     { FF_PROFILE_UNKNOWN },
04205 };
04206 
04207 AVCodec ff_h264_decoder = {
04208     "h264",
04209     AVMEDIA_TYPE_VIDEO,
04210     CODEC_ID_H264,
04211     sizeof(H264Context),
04212     ff_h264_decode_init,
04213     NULL,
04214     ff_h264_decode_end,
04215     decode_frame,
04216     /*CODEC_CAP_DRAW_HORIZ_BAND |*/ CODEC_CAP_DR1 | CODEC_CAP_DELAY |
04217         CODEC_CAP_SLICE_THREADS | CODEC_CAP_FRAME_THREADS,
04218     .flush= flush_dpb,
04219     .long_name = NULL_IF_CONFIG_SMALL("H.264 / AVC / MPEG-4 AVC / MPEG-4 part 10"),
04220     .init_thread_copy      = ONLY_IF_THREADS_ENABLED(decode_init_thread_copy),
04221     .update_thread_context = ONLY_IF_THREADS_ENABLED(decode_update_thread_context),
04222     .profiles = NULL_IF_CONFIG_SMALL(profiles),
04223 };
04224 
04225 #if CONFIG_H264_VDPAU_DECODER
04226 AVCodec ff_h264_vdpau_decoder = {
04227     "h264_vdpau",
04228     AVMEDIA_TYPE_VIDEO,
04229     CODEC_ID_H264,
04230     sizeof(H264Context),
04231     ff_h264_decode_init,
04232     NULL,
04233     ff_h264_decode_end,
04234     decode_frame,
04235     CODEC_CAP_DR1 | CODEC_CAP_DELAY | CODEC_CAP_HWACCEL_VDPAU,
04236     .flush= flush_dpb,
04237     .long_name = NULL_IF_CONFIG_SMALL("H.264 / AVC / MPEG-4 AVC / MPEG-4 part 10 (VDPAU acceleration)"),
04238     .pix_fmts = (const enum PixelFormat[]){PIX_FMT_VDPAU_H264, PIX_FMT_NONE},
04239     .profiles = NULL_IF_CONFIG_SMALL(profiles),
04240 };
04241 #endif