Libav 0.7.1
|
00001 /* 00002 * H.26L/H.264/AVC/JVT/14496-10/... decoder 00003 * Copyright (c) 2003 Michael Niedermayer <michaelni@gmx.at> 00004 * 00005 * This file is part of Libav. 00006 * 00007 * Libav is free software; you can redistribute it and/or 00008 * modify it under the terms of the GNU Lesser General Public 00009 * License as published by the Free Software Foundation; either 00010 * version 2.1 of the License, or (at your option) any later version. 00011 * 00012 * Libav is distributed in the hope that it will be useful, 00013 * but WITHOUT ANY WARRANTY; without even the implied warranty of 00014 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 00015 * Lesser General Public License for more details. 00016 * 00017 * You should have received a copy of the GNU Lesser General Public 00018 * License along with Libav; if not, write to the Free Software 00019 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 00020 */ 00021 00028 #include "libavutil/imgutils.h" 00029 #include "internal.h" 00030 #include "dsputil.h" 00031 #include "avcodec.h" 00032 #include "mpegvideo.h" 00033 #include "h264.h" 00034 #include "h264data.h" 00035 #include "h264_mvpred.h" 00036 #include "golomb.h" 00037 #include "mathops.h" 00038 #include "rectangle.h" 00039 #include "thread.h" 00040 #include "vdpau_internal.h" 00041 #include "libavutil/avassert.h" 00042 00043 #include "cabac.h" 00044 00045 //#undef NDEBUG 00046 #include <assert.h> 00047 00048 static const uint8_t rem6[QP_MAX_NUM+1]={ 00049 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 00050 }; 00051 00052 static const uint8_t div6[QP_MAX_NUM+1]={ 00053 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 6, 6, 6, 6, 6, 6, 7, 7, 7, 7, 7, 7, 8, 8, 8, 8, 8, 8, 9, 9, 9, 9, 9, 9,10,10,10,10, 00054 }; 00055 00056 static const enum PixelFormat hwaccel_pixfmt_list_h264_jpeg_420[] = { 00057 PIX_FMT_DXVA2_VLD, 00058 PIX_FMT_VAAPI_VLD, 00059 PIX_FMT_YUVJ420P, 00060 PIX_FMT_NONE 00061 }; 00062 00063 void ff_h264_write_back_intra_pred_mode(H264Context *h){ 00064 int8_t *mode= h->intra4x4_pred_mode + h->mb2br_xy[h->mb_xy]; 00065 00066 AV_COPY32(mode, h->intra4x4_pred_mode_cache + 4 + 8*4); 00067 mode[4]= h->intra4x4_pred_mode_cache[7+8*3]; 00068 mode[5]= h->intra4x4_pred_mode_cache[7+8*2]; 00069 mode[6]= h->intra4x4_pred_mode_cache[7+8*1]; 00070 } 00071 00075 int ff_h264_check_intra4x4_pred_mode(H264Context *h){ 00076 MpegEncContext * const s = &h->s; 00077 static const int8_t top [12]= {-1, 0,LEFT_DC_PRED,-1,-1,-1,-1,-1, 0}; 00078 static const int8_t left[12]= { 0,-1, TOP_DC_PRED, 0,-1,-1,-1, 0,-1,DC_128_PRED}; 00079 int i; 00080 00081 if(!(h->top_samples_available&0x8000)){ 00082 for(i=0; i<4; i++){ 00083 int status= top[ h->intra4x4_pred_mode_cache[scan8[0] + i] ]; 00084 if(status<0){ 00085 av_log(h->s.avctx, AV_LOG_ERROR, "top block unavailable for requested intra4x4 mode %d at %d %d\n", status, s->mb_x, s->mb_y); 00086 return -1; 00087 } else if(status){ 00088 h->intra4x4_pred_mode_cache[scan8[0] + i]= status; 00089 } 00090 } 00091 } 00092 00093 if((h->left_samples_available&0x8888)!=0x8888){ 00094 static const int mask[4]={0x8000,0x2000,0x80,0x20}; 00095 for(i=0; i<4; i++){ 00096 if(!(h->left_samples_available&mask[i])){ 00097 int status= left[ h->intra4x4_pred_mode_cache[scan8[0] + 8*i] ]; 00098 if(status<0){ 00099 av_log(h->s.avctx, AV_LOG_ERROR, "left block unavailable for requested intra4x4 mode %d at %d %d\n", status, s->mb_x, s->mb_y); 00100 return -1; 00101 } else if(status){ 00102 h->intra4x4_pred_mode_cache[scan8[0] + 8*i]= status; 00103 } 00104 } 00105 } 00106 } 00107 00108 return 0; 00109 } //FIXME cleanup like ff_h264_check_intra_pred_mode 00110 00114 int ff_h264_check_intra_pred_mode(H264Context *h, int mode){ 00115 MpegEncContext * const s = &h->s; 00116 static const int8_t top [7]= {LEFT_DC_PRED8x8, 1,-1,-1}; 00117 static const int8_t left[7]= { TOP_DC_PRED8x8,-1, 2,-1,DC_128_PRED8x8}; 00118 00119 if(mode > 6U) { 00120 av_log(h->s.avctx, AV_LOG_ERROR, "out of range intra chroma pred mode at %d %d\n", s->mb_x, s->mb_y); 00121 return -1; 00122 } 00123 00124 if(!(h->top_samples_available&0x8000)){ 00125 mode= top[ mode ]; 00126 if(mode<0){ 00127 av_log(h->s.avctx, AV_LOG_ERROR, "top block unavailable for requested intra mode at %d %d\n", s->mb_x, s->mb_y); 00128 return -1; 00129 } 00130 } 00131 00132 if((h->left_samples_available&0x8080) != 0x8080){ 00133 mode= left[ mode ]; 00134 if(h->left_samples_available&0x8080){ //mad cow disease mode, aka MBAFF + constrained_intra_pred 00135 mode= ALZHEIMER_DC_L0T_PRED8x8 + (!(h->left_samples_available&0x8000)) + 2*(mode == DC_128_PRED8x8); 00136 } 00137 if(mode<0){ 00138 av_log(h->s.avctx, AV_LOG_ERROR, "left block unavailable for requested intra mode at %d %d\n", s->mb_x, s->mb_y); 00139 return -1; 00140 } 00141 } 00142 00143 return mode; 00144 } 00145 00146 const uint8_t *ff_h264_decode_nal(H264Context *h, const uint8_t *src, int *dst_length, int *consumed, int length){ 00147 int i, si, di; 00148 uint8_t *dst; 00149 int bufidx; 00150 00151 // src[0]&0x80; //forbidden bit 00152 h->nal_ref_idc= src[0]>>5; 00153 h->nal_unit_type= src[0]&0x1F; 00154 00155 src++; length--; 00156 00157 #if HAVE_FAST_UNALIGNED 00158 # if HAVE_FAST_64BIT 00159 # define RS 7 00160 for(i=0; i+1<length; i+=9){ 00161 if(!((~AV_RN64A(src+i) & (AV_RN64A(src+i) - 0x0100010001000101ULL)) & 0x8000800080008080ULL)) 00162 # else 00163 # define RS 3 00164 for(i=0; i+1<length; i+=5){ 00165 if(!((~AV_RN32A(src+i) & (AV_RN32A(src+i) - 0x01000101U)) & 0x80008080U)) 00166 # endif 00167 continue; 00168 if(i>0 && !src[i]) i--; 00169 while(src[i]) i++; 00170 #else 00171 # define RS 0 00172 for(i=0; i+1<length; i+=2){ 00173 if(src[i]) continue; 00174 if(i>0 && src[i-1]==0) i--; 00175 #endif 00176 if(i+2<length && src[i+1]==0 && src[i+2]<=3){ 00177 if(src[i+2]!=3){ 00178 /* startcode, so we must be past the end */ 00179 length=i; 00180 } 00181 break; 00182 } 00183 i-= RS; 00184 } 00185 00186 if(i>=length-1){ //no escaped 0 00187 *dst_length= length; 00188 *consumed= length+1; //+1 for the header 00189 return src; 00190 } 00191 00192 bufidx = h->nal_unit_type == NAL_DPC ? 1 : 0; // use second escape buffer for inter data 00193 av_fast_malloc(&h->rbsp_buffer[bufidx], &h->rbsp_buffer_size[bufidx], length+FF_INPUT_BUFFER_PADDING_SIZE); 00194 dst= h->rbsp_buffer[bufidx]; 00195 00196 if (dst == NULL){ 00197 return NULL; 00198 } 00199 00200 //printf("decoding esc\n"); 00201 memcpy(dst, src, i); 00202 si=di=i; 00203 while(si+2<length){ 00204 //remove escapes (very rare 1:2^22) 00205 if(src[si+2]>3){ 00206 dst[di++]= src[si++]; 00207 dst[di++]= src[si++]; 00208 }else if(src[si]==0 && src[si+1]==0){ 00209 if(src[si+2]==3){ //escape 00210 dst[di++]= 0; 00211 dst[di++]= 0; 00212 si+=3; 00213 continue; 00214 }else //next start code 00215 goto nsc; 00216 } 00217 00218 dst[di++]= src[si++]; 00219 } 00220 while(si<length) 00221 dst[di++]= src[si++]; 00222 nsc: 00223 00224 memset(dst+di, 0, FF_INPUT_BUFFER_PADDING_SIZE); 00225 00226 *dst_length= di; 00227 *consumed= si + 1;//+1 for the header 00228 //FIXME store exact number of bits in the getbitcontext (it is needed for decoding) 00229 return dst; 00230 } 00231 00236 static int ff_h264_decode_rbsp_trailing(H264Context *h, const uint8_t *src){ 00237 int v= *src; 00238 int r; 00239 00240 tprintf(h->s.avctx, "rbsp trailing %X\n", v); 00241 00242 for(r=1; r<9; r++){ 00243 if(v&1) return r; 00244 v>>=1; 00245 } 00246 return 0; 00247 } 00248 00249 static inline int get_lowest_part_list_y(H264Context *h, Picture *pic, int n, int height, 00250 int y_offset, int list){ 00251 int raw_my= h->mv_cache[list][ scan8[n] ][1]; 00252 int filter_height= (raw_my&3) ? 2 : 0; 00253 int full_my= (raw_my>>2) + y_offset; 00254 int top = full_my - filter_height, bottom = full_my + height + filter_height; 00255 00256 return FFMAX(abs(top), bottom); 00257 } 00258 00259 static inline void get_lowest_part_y(H264Context *h, int refs[2][48], int n, int height, 00260 int y_offset, int list0, int list1, int *nrefs){ 00261 MpegEncContext * const s = &h->s; 00262 int my; 00263 00264 y_offset += 16*(s->mb_y >> MB_FIELD); 00265 00266 if(list0){ 00267 int ref_n = h->ref_cache[0][ scan8[n] ]; 00268 Picture *ref= &h->ref_list[0][ref_n]; 00269 00270 // Error resilience puts the current picture in the ref list. 00271 // Don't try to wait on these as it will cause a deadlock. 00272 // Fields can wait on each other, though. 00273 if(ref->thread_opaque != s->current_picture.thread_opaque || 00274 (ref->reference&3) != s->picture_structure) { 00275 my = get_lowest_part_list_y(h, ref, n, height, y_offset, 0); 00276 if (refs[0][ref_n] < 0) nrefs[0] += 1; 00277 refs[0][ref_n] = FFMAX(refs[0][ref_n], my); 00278 } 00279 } 00280 00281 if(list1){ 00282 int ref_n = h->ref_cache[1][ scan8[n] ]; 00283 Picture *ref= &h->ref_list[1][ref_n]; 00284 00285 if(ref->thread_opaque != s->current_picture.thread_opaque || 00286 (ref->reference&3) != s->picture_structure) { 00287 my = get_lowest_part_list_y(h, ref, n, height, y_offset, 1); 00288 if (refs[1][ref_n] < 0) nrefs[1] += 1; 00289 refs[1][ref_n] = FFMAX(refs[1][ref_n], my); 00290 } 00291 } 00292 } 00293 00299 static void await_references(H264Context *h){ 00300 MpegEncContext * const s = &h->s; 00301 const int mb_xy= h->mb_xy; 00302 const int mb_type= s->current_picture.mb_type[mb_xy]; 00303 int refs[2][48]; 00304 int nrefs[2] = {0}; 00305 int ref, list; 00306 00307 memset(refs, -1, sizeof(refs)); 00308 00309 if(IS_16X16(mb_type)){ 00310 get_lowest_part_y(h, refs, 0, 16, 0, 00311 IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1), nrefs); 00312 }else if(IS_16X8(mb_type)){ 00313 get_lowest_part_y(h, refs, 0, 8, 0, 00314 IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1), nrefs); 00315 get_lowest_part_y(h, refs, 8, 8, 8, 00316 IS_DIR(mb_type, 1, 0), IS_DIR(mb_type, 1, 1), nrefs); 00317 }else if(IS_8X16(mb_type)){ 00318 get_lowest_part_y(h, refs, 0, 16, 0, 00319 IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1), nrefs); 00320 get_lowest_part_y(h, refs, 4, 16, 0, 00321 IS_DIR(mb_type, 1, 0), IS_DIR(mb_type, 1, 1), nrefs); 00322 }else{ 00323 int i; 00324 00325 assert(IS_8X8(mb_type)); 00326 00327 for(i=0; i<4; i++){ 00328 const int sub_mb_type= h->sub_mb_type[i]; 00329 const int n= 4*i; 00330 int y_offset= (i&2)<<2; 00331 00332 if(IS_SUB_8X8(sub_mb_type)){ 00333 get_lowest_part_y(h, refs, n , 8, y_offset, 00334 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1), nrefs); 00335 }else if(IS_SUB_8X4(sub_mb_type)){ 00336 get_lowest_part_y(h, refs, n , 4, y_offset, 00337 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1), nrefs); 00338 get_lowest_part_y(h, refs, n+2, 4, y_offset+4, 00339 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1), nrefs); 00340 }else if(IS_SUB_4X8(sub_mb_type)){ 00341 get_lowest_part_y(h, refs, n , 8, y_offset, 00342 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1), nrefs); 00343 get_lowest_part_y(h, refs, n+1, 8, y_offset, 00344 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1), nrefs); 00345 }else{ 00346 int j; 00347 assert(IS_SUB_4X4(sub_mb_type)); 00348 for(j=0; j<4; j++){ 00349 int sub_y_offset= y_offset + 2*(j&2); 00350 get_lowest_part_y(h, refs, n+j, 4, sub_y_offset, 00351 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1), nrefs); 00352 } 00353 } 00354 } 00355 } 00356 00357 for(list=h->list_count-1; list>=0; list--){ 00358 for(ref=0; ref<48 && nrefs[list]; ref++){ 00359 int row = refs[list][ref]; 00360 if(row >= 0){ 00361 Picture *ref_pic = &h->ref_list[list][ref]; 00362 int ref_field = ref_pic->reference - 1; 00363 int ref_field_picture = ref_pic->field_picture; 00364 int pic_height = 16*s->mb_height >> ref_field_picture; 00365 00366 row <<= MB_MBAFF; 00367 nrefs[list]--; 00368 00369 if(!FIELD_PICTURE && ref_field_picture){ // frame referencing two fields 00370 ff_thread_await_progress((AVFrame*)ref_pic, FFMIN((row >> 1) - !(row&1), pic_height-1), 1); 00371 ff_thread_await_progress((AVFrame*)ref_pic, FFMIN((row >> 1) , pic_height-1), 0); 00372 }else if(FIELD_PICTURE && !ref_field_picture){ // field referencing one field of a frame 00373 ff_thread_await_progress((AVFrame*)ref_pic, FFMIN(row*2 + ref_field , pic_height-1), 0); 00374 }else if(FIELD_PICTURE){ 00375 ff_thread_await_progress((AVFrame*)ref_pic, FFMIN(row, pic_height-1), ref_field); 00376 }else{ 00377 ff_thread_await_progress((AVFrame*)ref_pic, FFMIN(row, pic_height-1), 0); 00378 } 00379 } 00380 } 00381 } 00382 } 00383 00384 #if 0 00385 00389 static void h264_luma_dc_dct_c(DCTELEM *block/*, int qp*/){ 00390 // const int qmul= dequant_coeff[qp][0]; 00391 int i; 00392 int temp[16]; //FIXME check if this is a good idea 00393 static const int x_offset[4]={0, 1*stride, 4* stride, 5*stride}; 00394 static const int y_offset[4]={0, 2*stride, 8* stride, 10*stride}; 00395 00396 for(i=0; i<4; i++){ 00397 const int offset= y_offset[i]; 00398 const int z0= block[offset+stride*0] + block[offset+stride*4]; 00399 const int z1= block[offset+stride*0] - block[offset+stride*4]; 00400 const int z2= block[offset+stride*1] - block[offset+stride*5]; 00401 const int z3= block[offset+stride*1] + block[offset+stride*5]; 00402 00403 temp[4*i+0]= z0+z3; 00404 temp[4*i+1]= z1+z2; 00405 temp[4*i+2]= z1-z2; 00406 temp[4*i+3]= z0-z3; 00407 } 00408 00409 for(i=0; i<4; i++){ 00410 const int offset= x_offset[i]; 00411 const int z0= temp[4*0+i] + temp[4*2+i]; 00412 const int z1= temp[4*0+i] - temp[4*2+i]; 00413 const int z2= temp[4*1+i] - temp[4*3+i]; 00414 const int z3= temp[4*1+i] + temp[4*3+i]; 00415 00416 block[stride*0 +offset]= (z0 + z3)>>1; 00417 block[stride*2 +offset]= (z1 + z2)>>1; 00418 block[stride*8 +offset]= (z1 - z2)>>1; 00419 block[stride*10+offset]= (z0 - z3)>>1; 00420 } 00421 } 00422 #endif 00423 00424 #undef xStride 00425 #undef stride 00426 00427 #if 0 00428 static void chroma_dc_dct_c(DCTELEM *block){ 00429 const int stride= 16*2; 00430 const int xStride= 16; 00431 int a,b,c,d,e; 00432 00433 a= block[stride*0 + xStride*0]; 00434 b= block[stride*0 + xStride*1]; 00435 c= block[stride*1 + xStride*0]; 00436 d= block[stride*1 + xStride*1]; 00437 00438 e= a-b; 00439 a= a+b; 00440 b= c-d; 00441 c= c+d; 00442 00443 block[stride*0 + xStride*0]= (a+c); 00444 block[stride*0 + xStride*1]= (e+b); 00445 block[stride*1 + xStride*0]= (a-c); 00446 block[stride*1 + xStride*1]= (e-b); 00447 } 00448 #endif 00449 00450 static inline void mc_dir_part(H264Context *h, Picture *pic, int n, int square, int chroma_height, int delta, int list, 00451 uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr, 00452 int src_x_offset, int src_y_offset, 00453 qpel_mc_func *qpix_op, h264_chroma_mc_func chroma_op, 00454 int pixel_shift, int chroma444){ 00455 MpegEncContext * const s = &h->s; 00456 const int mx= h->mv_cache[list][ scan8[n] ][0] + src_x_offset*8; 00457 int my= h->mv_cache[list][ scan8[n] ][1] + src_y_offset*8; 00458 const int luma_xy= (mx&3) + ((my&3)<<2); 00459 int offset = ((mx>>2) << pixel_shift) + (my>>2)*h->mb_linesize; 00460 uint8_t * src_y = pic->data[0] + offset; 00461 uint8_t * src_cb, * src_cr; 00462 int extra_width= h->emu_edge_width; 00463 int extra_height= h->emu_edge_height; 00464 int emu=0; 00465 const int full_mx= mx>>2; 00466 const int full_my= my>>2; 00467 const int pic_width = 16*s->mb_width; 00468 const int pic_height = 16*s->mb_height >> MB_FIELD; 00469 00470 if(mx&7) extra_width -= 3; 00471 if(my&7) extra_height -= 3; 00472 00473 if( full_mx < 0-extra_width 00474 || full_my < 0-extra_height 00475 || full_mx + 16/*FIXME*/ > pic_width + extra_width 00476 || full_my + 16/*FIXME*/ > pic_height + extra_height){ 00477 s->dsp.emulated_edge_mc(s->edge_emu_buffer, src_y - (2 << pixel_shift) - 2*h->mb_linesize, h->mb_linesize, 16+5, 16+5/*FIXME*/, full_mx-2, full_my-2, pic_width, pic_height); 00478 src_y= s->edge_emu_buffer + (2 << pixel_shift) + 2*h->mb_linesize; 00479 emu=1; 00480 } 00481 00482 qpix_op[luma_xy](dest_y, src_y, h->mb_linesize); //FIXME try variable height perhaps? 00483 if(!square){ 00484 qpix_op[luma_xy](dest_y + delta, src_y + delta, h->mb_linesize); 00485 } 00486 00487 if(CONFIG_GRAY && s->flags&CODEC_FLAG_GRAY) return; 00488 00489 if(chroma444){ 00490 src_cb = pic->data[1] + offset; 00491 if(emu){ 00492 s->dsp.emulated_edge_mc(s->edge_emu_buffer, src_cb - (2 << pixel_shift) - 2*h->mb_linesize, h->mb_linesize, 00493 16+5, 16+5/*FIXME*/, full_mx-2, full_my-2, pic_width, pic_height); 00494 src_cb= s->edge_emu_buffer + (2 << pixel_shift) + 2*h->mb_linesize; 00495 } 00496 qpix_op[luma_xy](dest_cb, src_cb, h->mb_linesize); //FIXME try variable height perhaps? 00497 if(!square){ 00498 qpix_op[luma_xy](dest_cb + delta, src_cb + delta, h->mb_linesize); 00499 } 00500 00501 src_cr = pic->data[2] + offset; 00502 if(emu){ 00503 s->dsp.emulated_edge_mc(s->edge_emu_buffer, src_cr - (2 << pixel_shift) - 2*h->mb_linesize, h->mb_linesize, 00504 16+5, 16+5/*FIXME*/, full_mx-2, full_my-2, pic_width, pic_height); 00505 src_cr= s->edge_emu_buffer + (2 << pixel_shift) + 2*h->mb_linesize; 00506 } 00507 qpix_op[luma_xy](dest_cr, src_cr, h->mb_linesize); //FIXME try variable height perhaps? 00508 if(!square){ 00509 qpix_op[luma_xy](dest_cr + delta, src_cr + delta, h->mb_linesize); 00510 } 00511 return; 00512 } 00513 00514 if(MB_FIELD){ 00515 // chroma offset when predicting from a field of opposite parity 00516 my += 2 * ((s->mb_y & 1) - (pic->reference - 1)); 00517 emu |= (my>>3) < 0 || (my>>3) + 8 >= (pic_height>>1); 00518 } 00519 src_cb= pic->data[1] + ((mx>>3) << pixel_shift) + (my>>3)*h->mb_uvlinesize; 00520 src_cr= pic->data[2] + ((mx>>3) << pixel_shift) + (my>>3)*h->mb_uvlinesize; 00521 00522 if(emu){ 00523 s->dsp.emulated_edge_mc(s->edge_emu_buffer, src_cb, h->mb_uvlinesize, 9, 9/*FIXME*/, (mx>>3), (my>>3), pic_width>>1, pic_height>>1); 00524 src_cb= s->edge_emu_buffer; 00525 } 00526 chroma_op(dest_cb, src_cb, h->mb_uvlinesize, chroma_height, mx&7, my&7); 00527 00528 if(emu){ 00529 s->dsp.emulated_edge_mc(s->edge_emu_buffer, src_cr, h->mb_uvlinesize, 9, 9/*FIXME*/, (mx>>3), (my>>3), pic_width>>1, pic_height>>1); 00530 src_cr= s->edge_emu_buffer; 00531 } 00532 chroma_op(dest_cr, src_cr, h->mb_uvlinesize, chroma_height, mx&7, my&7); 00533 } 00534 00535 static inline void mc_part_std(H264Context *h, int n, int square, int chroma_height, int delta, 00536 uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr, 00537 int x_offset, int y_offset, 00538 qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put, 00539 qpel_mc_func *qpix_avg, h264_chroma_mc_func chroma_avg, 00540 int list0, int list1, int pixel_shift, int chroma444){ 00541 MpegEncContext * const s = &h->s; 00542 qpel_mc_func *qpix_op= qpix_put; 00543 h264_chroma_mc_func chroma_op= chroma_put; 00544 00545 dest_y += (2*x_offset << pixel_shift) + 2*y_offset*h->mb_linesize; 00546 if(chroma444){ 00547 dest_cb += (2*x_offset << pixel_shift) + 2*y_offset*h->mb_linesize; 00548 dest_cr += (2*x_offset << pixel_shift) + 2*y_offset*h->mb_linesize; 00549 }else{ 00550 dest_cb += ( x_offset << pixel_shift) + y_offset*h->mb_uvlinesize; 00551 dest_cr += ( x_offset << pixel_shift) + y_offset*h->mb_uvlinesize; 00552 } 00553 x_offset += 8*s->mb_x; 00554 y_offset += 8*(s->mb_y >> MB_FIELD); 00555 00556 if(list0){ 00557 Picture *ref= &h->ref_list[0][ h->ref_cache[0][ scan8[n] ] ]; 00558 mc_dir_part(h, ref, n, square, chroma_height, delta, 0, 00559 dest_y, dest_cb, dest_cr, x_offset, y_offset, 00560 qpix_op, chroma_op, pixel_shift, chroma444); 00561 00562 qpix_op= qpix_avg; 00563 chroma_op= chroma_avg; 00564 } 00565 00566 if(list1){ 00567 Picture *ref= &h->ref_list[1][ h->ref_cache[1][ scan8[n] ] ]; 00568 mc_dir_part(h, ref, n, square, chroma_height, delta, 1, 00569 dest_y, dest_cb, dest_cr, x_offset, y_offset, 00570 qpix_op, chroma_op, pixel_shift, chroma444); 00571 } 00572 } 00573 00574 static inline void mc_part_weighted(H264Context *h, int n, int square, int chroma_height, int delta, 00575 uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr, 00576 int x_offset, int y_offset, 00577 qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put, 00578 h264_weight_func luma_weight_op, h264_weight_func chroma_weight_op, 00579 h264_biweight_func luma_weight_avg, h264_biweight_func chroma_weight_avg, 00580 int list0, int list1, int pixel_shift, int chroma444){ 00581 MpegEncContext * const s = &h->s; 00582 00583 dest_y += (2*x_offset << pixel_shift) + 2*y_offset*h->mb_linesize; 00584 if(chroma444){ 00585 chroma_weight_avg = luma_weight_avg; 00586 chroma_weight_op = luma_weight_op; 00587 dest_cb += (2*x_offset << pixel_shift) + 2*y_offset*h->mb_linesize; 00588 dest_cr += (2*x_offset << pixel_shift) + 2*y_offset*h->mb_linesize; 00589 }else{ 00590 dest_cb += ( x_offset << pixel_shift) + y_offset*h->mb_uvlinesize; 00591 dest_cr += ( x_offset << pixel_shift) + y_offset*h->mb_uvlinesize; 00592 } 00593 x_offset += 8*s->mb_x; 00594 y_offset += 8*(s->mb_y >> MB_FIELD); 00595 00596 if(list0 && list1){ 00597 /* don't optimize for luma-only case, since B-frames usually 00598 * use implicit weights => chroma too. */ 00599 uint8_t *tmp_cb = s->obmc_scratchpad; 00600 uint8_t *tmp_cr = s->obmc_scratchpad + (16 << pixel_shift); 00601 uint8_t *tmp_y = s->obmc_scratchpad + 16*h->mb_uvlinesize; 00602 int refn0 = h->ref_cache[0][ scan8[n] ]; 00603 int refn1 = h->ref_cache[1][ scan8[n] ]; 00604 00605 mc_dir_part(h, &h->ref_list[0][refn0], n, square, chroma_height, delta, 0, 00606 dest_y, dest_cb, dest_cr, 00607 x_offset, y_offset, qpix_put, chroma_put, pixel_shift, chroma444); 00608 mc_dir_part(h, &h->ref_list[1][refn1], n, square, chroma_height, delta, 1, 00609 tmp_y, tmp_cb, tmp_cr, 00610 x_offset, y_offset, qpix_put, chroma_put, pixel_shift, chroma444); 00611 00612 if(h->use_weight == 2){ 00613 int weight0 = h->implicit_weight[refn0][refn1][s->mb_y&1]; 00614 int weight1 = 64 - weight0; 00615 luma_weight_avg( dest_y, tmp_y, h-> mb_linesize, 5, weight0, weight1, 0); 00616 chroma_weight_avg(dest_cb, tmp_cb, h->mb_uvlinesize, 5, weight0, weight1, 0); 00617 chroma_weight_avg(dest_cr, tmp_cr, h->mb_uvlinesize, 5, weight0, weight1, 0); 00618 }else{ 00619 luma_weight_avg(dest_y, tmp_y, h->mb_linesize, h->luma_log2_weight_denom, 00620 h->luma_weight[refn0][0][0] , h->luma_weight[refn1][1][0], 00621 h->luma_weight[refn0][0][1] + h->luma_weight[refn1][1][1]); 00622 chroma_weight_avg(dest_cb, tmp_cb, h->mb_uvlinesize, h->chroma_log2_weight_denom, 00623 h->chroma_weight[refn0][0][0][0] , h->chroma_weight[refn1][1][0][0], 00624 h->chroma_weight[refn0][0][0][1] + h->chroma_weight[refn1][1][0][1]); 00625 chroma_weight_avg(dest_cr, tmp_cr, h->mb_uvlinesize, h->chroma_log2_weight_denom, 00626 h->chroma_weight[refn0][0][1][0] , h->chroma_weight[refn1][1][1][0], 00627 h->chroma_weight[refn0][0][1][1] + h->chroma_weight[refn1][1][1][1]); 00628 } 00629 }else{ 00630 int list = list1 ? 1 : 0; 00631 int refn = h->ref_cache[list][ scan8[n] ]; 00632 Picture *ref= &h->ref_list[list][refn]; 00633 mc_dir_part(h, ref, n, square, chroma_height, delta, list, 00634 dest_y, dest_cb, dest_cr, x_offset, y_offset, 00635 qpix_put, chroma_put, pixel_shift, chroma444); 00636 00637 luma_weight_op(dest_y, h->mb_linesize, h->luma_log2_weight_denom, 00638 h->luma_weight[refn][list][0], h->luma_weight[refn][list][1]); 00639 if(h->use_weight_chroma){ 00640 chroma_weight_op(dest_cb, h->mb_uvlinesize, h->chroma_log2_weight_denom, 00641 h->chroma_weight[refn][list][0][0], h->chroma_weight[refn][list][0][1]); 00642 chroma_weight_op(dest_cr, h->mb_uvlinesize, h->chroma_log2_weight_denom, 00643 h->chroma_weight[refn][list][1][0], h->chroma_weight[refn][list][1][1]); 00644 } 00645 } 00646 } 00647 00648 static inline void mc_part(H264Context *h, int n, int square, int chroma_height, int delta, 00649 uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr, 00650 int x_offset, int y_offset, 00651 qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put, 00652 qpel_mc_func *qpix_avg, h264_chroma_mc_func chroma_avg, 00653 h264_weight_func *weight_op, h264_biweight_func *weight_avg, 00654 int list0, int list1, int pixel_shift, int chroma444){ 00655 if((h->use_weight==2 && list0 && list1 00656 && (h->implicit_weight[ h->ref_cache[0][scan8[n]] ][ h->ref_cache[1][scan8[n]] ][h->s.mb_y&1] != 32)) 00657 || h->use_weight==1) 00658 mc_part_weighted(h, n, square, chroma_height, delta, dest_y, dest_cb, dest_cr, 00659 x_offset, y_offset, qpix_put, chroma_put, 00660 weight_op[0], weight_op[3], weight_avg[0], 00661 weight_avg[3], list0, list1, pixel_shift, chroma444); 00662 else 00663 mc_part_std(h, n, square, chroma_height, delta, dest_y, dest_cb, dest_cr, 00664 x_offset, y_offset, qpix_put, chroma_put, qpix_avg, 00665 chroma_avg, list0, list1, pixel_shift, chroma444); 00666 } 00667 00668 static inline void prefetch_motion(H264Context *h, int list, int pixel_shift, int chroma444){ 00669 /* fetch pixels for estimated mv 4 macroblocks ahead 00670 * optimized for 64byte cache lines */ 00671 MpegEncContext * const s = &h->s; 00672 const int refn = h->ref_cache[list][scan8[0]]; 00673 if(refn >= 0){ 00674 const int mx= (h->mv_cache[list][scan8[0]][0]>>2) + 16*s->mb_x + 8; 00675 const int my= (h->mv_cache[list][scan8[0]][1]>>2) + 16*s->mb_y; 00676 uint8_t **src= h->ref_list[list][refn].data; 00677 int off= (mx << pixel_shift) + (my + (s->mb_x&3)*4)*h->mb_linesize + (64 << pixel_shift); 00678 s->dsp.prefetch(src[0]+off, s->linesize, 4); 00679 if(chroma444){ 00680 s->dsp.prefetch(src[1]+off, s->linesize, 4); 00681 s->dsp.prefetch(src[2]+off, s->linesize, 4); 00682 }else{ 00683 off= ((mx>>1) << pixel_shift) + ((my>>1) + (s->mb_x&7))*s->uvlinesize + (64 << pixel_shift); 00684 s->dsp.prefetch(src[1]+off, src[2]-src[1], 2); 00685 } 00686 } 00687 } 00688 00689 static av_always_inline void hl_motion(H264Context *h, uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr, 00690 qpel_mc_func (*qpix_put)[16], h264_chroma_mc_func (*chroma_put), 00691 qpel_mc_func (*qpix_avg)[16], h264_chroma_mc_func (*chroma_avg), 00692 h264_weight_func *weight_op, h264_biweight_func *weight_avg, 00693 int pixel_shift, int chroma444){ 00694 MpegEncContext * const s = &h->s; 00695 const int mb_xy= h->mb_xy; 00696 const int mb_type= s->current_picture.mb_type[mb_xy]; 00697 00698 assert(IS_INTER(mb_type)); 00699 00700 if(HAVE_PTHREADS && (s->avctx->active_thread_type & FF_THREAD_FRAME)) 00701 await_references(h); 00702 prefetch_motion(h, 0, pixel_shift, chroma444); 00703 00704 if(IS_16X16(mb_type)){ 00705 mc_part(h, 0, 1, 8, 0, dest_y, dest_cb, dest_cr, 0, 0, 00706 qpix_put[0], chroma_put[0], qpix_avg[0], chroma_avg[0], 00707 weight_op, weight_avg, 00708 IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1), 00709 pixel_shift, chroma444); 00710 }else if(IS_16X8(mb_type)){ 00711 mc_part(h, 0, 0, 4, 8 << pixel_shift, dest_y, dest_cb, dest_cr, 0, 0, 00712 qpix_put[1], chroma_put[0], qpix_avg[1], chroma_avg[0], 00713 &weight_op[1], &weight_avg[1], 00714 IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1), 00715 pixel_shift, chroma444); 00716 mc_part(h, 8, 0, 4, 8 << pixel_shift, dest_y, dest_cb, dest_cr, 0, 4, 00717 qpix_put[1], chroma_put[0], qpix_avg[1], chroma_avg[0], 00718 &weight_op[1], &weight_avg[1], 00719 IS_DIR(mb_type, 1, 0), IS_DIR(mb_type, 1, 1), 00720 pixel_shift, chroma444); 00721 }else if(IS_8X16(mb_type)){ 00722 mc_part(h, 0, 0, 8, 8*h->mb_linesize, dest_y, dest_cb, dest_cr, 0, 0, 00723 qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1], 00724 &weight_op[2], &weight_avg[2], 00725 IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1), 00726 pixel_shift, chroma444); 00727 mc_part(h, 4, 0, 8, 8*h->mb_linesize, dest_y, dest_cb, dest_cr, 4, 0, 00728 qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1], 00729 &weight_op[2], &weight_avg[2], 00730 IS_DIR(mb_type, 1, 0), IS_DIR(mb_type, 1, 1), 00731 pixel_shift, chroma444); 00732 }else{ 00733 int i; 00734 00735 assert(IS_8X8(mb_type)); 00736 00737 for(i=0; i<4; i++){ 00738 const int sub_mb_type= h->sub_mb_type[i]; 00739 const int n= 4*i; 00740 int x_offset= (i&1)<<2; 00741 int y_offset= (i&2)<<1; 00742 00743 if(IS_SUB_8X8(sub_mb_type)){ 00744 mc_part(h, n, 1, 4, 0, dest_y, dest_cb, dest_cr, x_offset, y_offset, 00745 qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1], 00746 &weight_op[3], &weight_avg[3], 00747 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1), 00748 pixel_shift, chroma444); 00749 }else if(IS_SUB_8X4(sub_mb_type)){ 00750 mc_part(h, n , 0, 2, 4 << pixel_shift, dest_y, dest_cb, dest_cr, x_offset, y_offset, 00751 qpix_put[2], chroma_put[1], qpix_avg[2], chroma_avg[1], 00752 &weight_op[4], &weight_avg[4], 00753 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1), 00754 pixel_shift, chroma444); 00755 mc_part(h, n+2, 0, 2, 4 << pixel_shift, dest_y, dest_cb, dest_cr, x_offset, y_offset+2, 00756 qpix_put[2], chroma_put[1], qpix_avg[2], chroma_avg[1], 00757 &weight_op[4], &weight_avg[4], 00758 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1), 00759 pixel_shift, chroma444); 00760 }else if(IS_SUB_4X8(sub_mb_type)){ 00761 mc_part(h, n , 0, 4, 4*h->mb_linesize, dest_y, dest_cb, dest_cr, x_offset, y_offset, 00762 qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2], 00763 &weight_op[5], &weight_avg[5], 00764 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1), 00765 pixel_shift, chroma444); 00766 mc_part(h, n+1, 0, 4, 4*h->mb_linesize, dest_y, dest_cb, dest_cr, x_offset+2, y_offset, 00767 qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2], 00768 &weight_op[5], &weight_avg[5], 00769 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1), 00770 pixel_shift, chroma444); 00771 }else{ 00772 int j; 00773 assert(IS_SUB_4X4(sub_mb_type)); 00774 for(j=0; j<4; j++){ 00775 int sub_x_offset= x_offset + 2*(j&1); 00776 int sub_y_offset= y_offset + (j&2); 00777 mc_part(h, n+j, 1, 2, 0, dest_y, dest_cb, dest_cr, sub_x_offset, sub_y_offset, 00778 qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2], 00779 &weight_op[6], &weight_avg[6], 00780 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1), 00781 pixel_shift, chroma444); 00782 } 00783 } 00784 } 00785 } 00786 00787 prefetch_motion(h, 1, pixel_shift, chroma444); 00788 } 00789 00790 #define hl_motion_fn(sh, bits) \ 00791 static av_always_inline void hl_motion_ ## bits(H264Context *h, \ 00792 uint8_t *dest_y, \ 00793 uint8_t *dest_cb, uint8_t *dest_cr, \ 00794 qpel_mc_func (*qpix_put)[16], \ 00795 h264_chroma_mc_func (*chroma_put), \ 00796 qpel_mc_func (*qpix_avg)[16], \ 00797 h264_chroma_mc_func (*chroma_avg), \ 00798 h264_weight_func *weight_op, \ 00799 h264_biweight_func *weight_avg, \ 00800 int chroma444) \ 00801 { \ 00802 hl_motion(h, dest_y, dest_cb, dest_cr, qpix_put, chroma_put, \ 00803 qpix_avg, chroma_avg, weight_op, weight_avg, sh, chroma444); \ 00804 } 00805 hl_motion_fn(0, 8); 00806 hl_motion_fn(1, 16); 00807 00808 static void free_tables(H264Context *h, int free_rbsp){ 00809 int i; 00810 H264Context *hx; 00811 00812 av_freep(&h->intra4x4_pred_mode); 00813 av_freep(&h->chroma_pred_mode_table); 00814 av_freep(&h->cbp_table); 00815 av_freep(&h->mvd_table[0]); 00816 av_freep(&h->mvd_table[1]); 00817 av_freep(&h->direct_table); 00818 av_freep(&h->non_zero_count); 00819 av_freep(&h->slice_table_base); 00820 h->slice_table= NULL; 00821 av_freep(&h->list_counts); 00822 00823 av_freep(&h->mb2b_xy); 00824 av_freep(&h->mb2br_xy); 00825 00826 for(i = 0; i < MAX_THREADS; i++) { 00827 hx = h->thread_context[i]; 00828 if(!hx) continue; 00829 av_freep(&hx->top_borders[1]); 00830 av_freep(&hx->top_borders[0]); 00831 av_freep(&hx->s.obmc_scratchpad); 00832 if (free_rbsp){ 00833 av_freep(&hx->rbsp_buffer[1]); 00834 av_freep(&hx->rbsp_buffer[0]); 00835 hx->rbsp_buffer_size[0] = 0; 00836 hx->rbsp_buffer_size[1] = 0; 00837 } 00838 if (i) av_freep(&h->thread_context[i]); 00839 } 00840 } 00841 00842 static void init_dequant8_coeff_table(H264Context *h){ 00843 int i,j,q,x; 00844 const int max_qp = 51 + 6*(h->sps.bit_depth_luma-8); 00845 00846 for(i=0; i<6; i++ ){ 00847 h->dequant8_coeff[i] = h->dequant8_buffer[i]; 00848 for(j=0; j<i; j++){ 00849 if(!memcmp(h->pps.scaling_matrix8[j], h->pps.scaling_matrix8[i], 64*sizeof(uint8_t))){ 00850 h->dequant8_coeff[i] = h->dequant8_buffer[j]; 00851 break; 00852 } 00853 } 00854 if(j<i) 00855 continue; 00856 00857 for(q=0; q<max_qp+1; q++){ 00858 int shift = div6[q]; 00859 int idx = rem6[q]; 00860 for(x=0; x<64; x++) 00861 h->dequant8_coeff[i][q][(x>>3)|((x&7)<<3)] = 00862 ((uint32_t)dequant8_coeff_init[idx][ dequant8_coeff_init_scan[((x>>1)&12) | (x&3)] ] * 00863 h->pps.scaling_matrix8[i][x]) << shift; 00864 } 00865 } 00866 } 00867 00868 static void init_dequant4_coeff_table(H264Context *h){ 00869 int i,j,q,x; 00870 const int max_qp = 51 + 6*(h->sps.bit_depth_luma-8); 00871 for(i=0; i<6; i++ ){ 00872 h->dequant4_coeff[i] = h->dequant4_buffer[i]; 00873 for(j=0; j<i; j++){ 00874 if(!memcmp(h->pps.scaling_matrix4[j], h->pps.scaling_matrix4[i], 16*sizeof(uint8_t))){ 00875 h->dequant4_coeff[i] = h->dequant4_buffer[j]; 00876 break; 00877 } 00878 } 00879 if(j<i) 00880 continue; 00881 00882 for(q=0; q<max_qp+1; q++){ 00883 int shift = div6[q] + 2; 00884 int idx = rem6[q]; 00885 for(x=0; x<16; x++) 00886 h->dequant4_coeff[i][q][(x>>2)|((x<<2)&0xF)] = 00887 ((uint32_t)dequant4_coeff_init[idx][(x&1) + ((x>>2)&1)] * 00888 h->pps.scaling_matrix4[i][x]) << shift; 00889 } 00890 } 00891 } 00892 00893 static void init_dequant_tables(H264Context *h){ 00894 int i,x; 00895 init_dequant4_coeff_table(h); 00896 if(h->pps.transform_8x8_mode) 00897 init_dequant8_coeff_table(h); 00898 if(h->sps.transform_bypass){ 00899 for(i=0; i<6; i++) 00900 for(x=0; x<16; x++) 00901 h->dequant4_coeff[i][0][x] = 1<<6; 00902 if(h->pps.transform_8x8_mode) 00903 for(i=0; i<6; i++) 00904 for(x=0; x<64; x++) 00905 h->dequant8_coeff[i][0][x] = 1<<6; 00906 } 00907 } 00908 00909 00910 int ff_h264_alloc_tables(H264Context *h){ 00911 MpegEncContext * const s = &h->s; 00912 const int big_mb_num= s->mb_stride * (s->mb_height+1); 00913 const int row_mb_num= 2*s->mb_stride*s->avctx->thread_count; 00914 int x,y; 00915 00916 FF_ALLOCZ_OR_GOTO(h->s.avctx, h->intra4x4_pred_mode, row_mb_num * 8 * sizeof(uint8_t), fail) 00917 00918 FF_ALLOCZ_OR_GOTO(h->s.avctx, h->non_zero_count , big_mb_num * 48 * sizeof(uint8_t), fail) 00919 FF_ALLOCZ_OR_GOTO(h->s.avctx, h->slice_table_base , (big_mb_num+s->mb_stride) * sizeof(*h->slice_table_base), fail) 00920 FF_ALLOCZ_OR_GOTO(h->s.avctx, h->cbp_table, big_mb_num * sizeof(uint16_t), fail) 00921 00922 FF_ALLOCZ_OR_GOTO(h->s.avctx, h->chroma_pred_mode_table, big_mb_num * sizeof(uint8_t), fail) 00923 FF_ALLOCZ_OR_GOTO(h->s.avctx, h->mvd_table[0], 16*row_mb_num * sizeof(uint8_t), fail); 00924 FF_ALLOCZ_OR_GOTO(h->s.avctx, h->mvd_table[1], 16*row_mb_num * sizeof(uint8_t), fail); 00925 FF_ALLOCZ_OR_GOTO(h->s.avctx, h->direct_table, 4*big_mb_num * sizeof(uint8_t) , fail); 00926 FF_ALLOCZ_OR_GOTO(h->s.avctx, h->list_counts, big_mb_num * sizeof(uint8_t), fail) 00927 00928 memset(h->slice_table_base, -1, (big_mb_num+s->mb_stride) * sizeof(*h->slice_table_base)); 00929 h->slice_table= h->slice_table_base + s->mb_stride*2 + 1; 00930 00931 FF_ALLOCZ_OR_GOTO(h->s.avctx, h->mb2b_xy , big_mb_num * sizeof(uint32_t), fail); 00932 FF_ALLOCZ_OR_GOTO(h->s.avctx, h->mb2br_xy , big_mb_num * sizeof(uint32_t), fail); 00933 for(y=0; y<s->mb_height; y++){ 00934 for(x=0; x<s->mb_width; x++){ 00935 const int mb_xy= x + y*s->mb_stride; 00936 const int b_xy = 4*x + 4*y*h->b_stride; 00937 00938 h->mb2b_xy [mb_xy]= b_xy; 00939 h->mb2br_xy[mb_xy]= 8*(FMO ? mb_xy : (mb_xy % (2*s->mb_stride))); 00940 } 00941 } 00942 00943 s->obmc_scratchpad = NULL; 00944 00945 if(!h->dequant4_coeff[0]) 00946 init_dequant_tables(h); 00947 00948 return 0; 00949 fail: 00950 free_tables(h, 1); 00951 return -1; 00952 } 00953 00957 static void clone_tables(H264Context *dst, H264Context *src, int i){ 00958 MpegEncContext * const s = &src->s; 00959 dst->intra4x4_pred_mode = src->intra4x4_pred_mode + i*8*2*s->mb_stride; 00960 dst->non_zero_count = src->non_zero_count; 00961 dst->slice_table = src->slice_table; 00962 dst->cbp_table = src->cbp_table; 00963 dst->mb2b_xy = src->mb2b_xy; 00964 dst->mb2br_xy = src->mb2br_xy; 00965 dst->chroma_pred_mode_table = src->chroma_pred_mode_table; 00966 dst->mvd_table[0] = src->mvd_table[0] + i*8*2*s->mb_stride; 00967 dst->mvd_table[1] = src->mvd_table[1] + i*8*2*s->mb_stride; 00968 dst->direct_table = src->direct_table; 00969 dst->list_counts = src->list_counts; 00970 00971 dst->s.obmc_scratchpad = NULL; 00972 ff_h264_pred_init(&dst->hpc, src->s.codec_id, src->sps.bit_depth_luma); 00973 } 00974 00979 static int context_init(H264Context *h){ 00980 FF_ALLOCZ_OR_GOTO(h->s.avctx, h->top_borders[0], h->s.mb_width * 16*3 * sizeof(uint8_t)*2, fail) 00981 FF_ALLOCZ_OR_GOTO(h->s.avctx, h->top_borders[1], h->s.mb_width * 16*3 * sizeof(uint8_t)*2, fail) 00982 00983 h->ref_cache[0][scan8[5 ]+1] = h->ref_cache[0][scan8[7 ]+1] = h->ref_cache[0][scan8[13]+1] = 00984 h->ref_cache[1][scan8[5 ]+1] = h->ref_cache[1][scan8[7 ]+1] = h->ref_cache[1][scan8[13]+1] = PART_NOT_AVAILABLE; 00985 00986 return 0; 00987 fail: 00988 return -1; // free_tables will clean up for us 00989 } 00990 00991 static int decode_nal_units(H264Context *h, const uint8_t *buf, int buf_size); 00992 00993 static av_cold void common_init(H264Context *h){ 00994 MpegEncContext * const s = &h->s; 00995 00996 s->width = s->avctx->width; 00997 s->height = s->avctx->height; 00998 s->codec_id= s->avctx->codec->id; 00999 01000 ff_h264dsp_init(&h->h264dsp, 8); 01001 ff_h264_pred_init(&h->hpc, s->codec_id, 8); 01002 01003 h->dequant_coeff_pps= -1; 01004 s->unrestricted_mv=1; 01005 s->decode=1; //FIXME 01006 01007 dsputil_init(&s->dsp, s->avctx); // needed so that idct permutation is known early 01008 01009 memset(h->pps.scaling_matrix4, 16, 6*16*sizeof(uint8_t)); 01010 memset(h->pps.scaling_matrix8, 16, 2*64*sizeof(uint8_t)); 01011 } 01012 01013 int ff_h264_decode_extradata(H264Context *h) 01014 { 01015 AVCodecContext *avctx = h->s.avctx; 01016 01017 if(avctx->extradata[0] == 1){ 01018 int i, cnt, nalsize; 01019 unsigned char *p = avctx->extradata; 01020 01021 h->is_avc = 1; 01022 01023 if(avctx->extradata_size < 7) { 01024 av_log(avctx, AV_LOG_ERROR, "avcC too short\n"); 01025 return -1; 01026 } 01027 /* sps and pps in the avcC always have length coded with 2 bytes, 01028 so put a fake nal_length_size = 2 while parsing them */ 01029 h->nal_length_size = 2; 01030 // Decode sps from avcC 01031 cnt = *(p+5) & 0x1f; // Number of sps 01032 p += 6; 01033 for (i = 0; i < cnt; i++) { 01034 nalsize = AV_RB16(p) + 2; 01035 if(decode_nal_units(h, p, nalsize) < 0) { 01036 av_log(avctx, AV_LOG_ERROR, "Decoding sps %d from avcC failed\n", i); 01037 return -1; 01038 } 01039 p += nalsize; 01040 } 01041 // Decode pps from avcC 01042 cnt = *(p++); // Number of pps 01043 for (i = 0; i < cnt; i++) { 01044 nalsize = AV_RB16(p) + 2; 01045 if (decode_nal_units(h, p, nalsize) < 0) { 01046 av_log(avctx, AV_LOG_ERROR, "Decoding pps %d from avcC failed\n", i); 01047 return -1; 01048 } 01049 p += nalsize; 01050 } 01051 // Now store right nal length size, that will be use to parse all other nals 01052 h->nal_length_size = (avctx->extradata[4] & 0x03) + 1; 01053 } else { 01054 h->is_avc = 0; 01055 if(decode_nal_units(h, avctx->extradata, avctx->extradata_size) < 0) 01056 return -1; 01057 } 01058 return 0; 01059 } 01060 01061 av_cold int ff_h264_decode_init(AVCodecContext *avctx){ 01062 H264Context *h= avctx->priv_data; 01063 MpegEncContext * const s = &h->s; 01064 01065 MPV_decode_defaults(s); 01066 01067 s->avctx = avctx; 01068 common_init(h); 01069 01070 s->out_format = FMT_H264; 01071 s->workaround_bugs= avctx->workaround_bugs; 01072 01073 // set defaults 01074 // s->decode_mb= ff_h263_decode_mb; 01075 s->quarter_sample = 1; 01076 if(!avctx->has_b_frames) 01077 s->low_delay= 1; 01078 01079 avctx->chroma_sample_location = AVCHROMA_LOC_LEFT; 01080 01081 ff_h264_decode_init_vlc(); 01082 01083 h->pixel_shift = 0; 01084 h->sps.bit_depth_luma = avctx->bits_per_raw_sample = 8; 01085 01086 h->thread_context[0] = h; 01087 h->outputed_poc = h->next_outputed_poc = INT_MIN; 01088 h->prev_poc_msb= 1<<16; 01089 h->x264_build = -1; 01090 ff_h264_reset_sei(h); 01091 if(avctx->codec_id == CODEC_ID_H264){ 01092 if(avctx->ticks_per_frame == 1){ 01093 s->avctx->time_base.den *=2; 01094 } 01095 avctx->ticks_per_frame = 2; 01096 } 01097 01098 if(avctx->extradata_size > 0 && avctx->extradata && 01099 ff_h264_decode_extradata(h)) 01100 return -1; 01101 01102 if(h->sps.bitstream_restriction_flag && s->avctx->has_b_frames < h->sps.num_reorder_frames){ 01103 s->avctx->has_b_frames = h->sps.num_reorder_frames; 01104 s->low_delay = 0; 01105 } 01106 01107 return 0; 01108 } 01109 01110 #define IN_RANGE(a, b, size) (((a) >= (b)) && ((a) < ((b)+(size)))) 01111 static void copy_picture_range(Picture **to, Picture **from, int count, MpegEncContext *new_base, MpegEncContext *old_base) 01112 { 01113 int i; 01114 01115 for (i=0; i<count; i++){ 01116 assert((IN_RANGE(from[i], old_base, sizeof(*old_base)) || 01117 IN_RANGE(from[i], old_base->picture, sizeof(Picture) * old_base->picture_count) || 01118 !from[i])); 01119 to[i] = REBASE_PICTURE(from[i], new_base, old_base); 01120 } 01121 } 01122 01123 static void copy_parameter_set(void **to, void **from, int count, int size) 01124 { 01125 int i; 01126 01127 for (i=0; i<count; i++){ 01128 if (to[i] && !from[i]) av_freep(&to[i]); 01129 else if (from[i] && !to[i]) to[i] = av_malloc(size); 01130 01131 if (from[i]) memcpy(to[i], from[i], size); 01132 } 01133 } 01134 01135 static int decode_init_thread_copy(AVCodecContext *avctx){ 01136 H264Context *h= avctx->priv_data; 01137 01138 if (!avctx->is_copy) return 0; 01139 memset(h->sps_buffers, 0, sizeof(h->sps_buffers)); 01140 memset(h->pps_buffers, 0, sizeof(h->pps_buffers)); 01141 01142 return 0; 01143 } 01144 01145 #define copy_fields(to, from, start_field, end_field) memcpy(&to->start_field, &from->start_field, (char*)&to->end_field - (char*)&to->start_field) 01146 static int decode_update_thread_context(AVCodecContext *dst, const AVCodecContext *src){ 01147 H264Context *h= dst->priv_data, *h1= src->priv_data; 01148 MpegEncContext * const s = &h->s, * const s1 = &h1->s; 01149 int inited = s->context_initialized, err; 01150 int i; 01151 01152 if(dst == src || !s1->context_initialized) return 0; 01153 01154 err = ff_mpeg_update_thread_context(dst, src); 01155 if(err) return err; 01156 01157 //FIXME handle width/height changing 01158 if(!inited){ 01159 for(i = 0; i < MAX_SPS_COUNT; i++) 01160 av_freep(h->sps_buffers + i); 01161 01162 for(i = 0; i < MAX_PPS_COUNT; i++) 01163 av_freep(h->pps_buffers + i); 01164 01165 memcpy(&h->s + 1, &h1->s + 1, sizeof(H264Context) - sizeof(MpegEncContext)); //copy all fields after MpegEnc 01166 memset(h->sps_buffers, 0, sizeof(h->sps_buffers)); 01167 memset(h->pps_buffers, 0, sizeof(h->pps_buffers)); 01168 if (ff_h264_alloc_tables(h) < 0) { 01169 av_log(dst, AV_LOG_ERROR, "Could not allocate memory for h264\n"); 01170 return AVERROR(ENOMEM); 01171 } 01172 context_init(h); 01173 01174 for(i=0; i<2; i++){ 01175 h->rbsp_buffer[i] = NULL; 01176 h->rbsp_buffer_size[i] = 0; 01177 } 01178 01179 h->thread_context[0] = h; 01180 01181 // frame_start may not be called for the next thread (if it's decoding a bottom field) 01182 // so this has to be allocated here 01183 h->s.obmc_scratchpad = av_malloc(16*6*s->linesize); 01184 01185 s->dsp.clear_blocks(h->mb); 01186 s->dsp.clear_blocks(h->mb+(24*16<<h->pixel_shift)); 01187 } 01188 01189 //extradata/NAL handling 01190 h->is_avc = h1->is_avc; 01191 01192 //SPS/PPS 01193 copy_parameter_set((void**)h->sps_buffers, (void**)h1->sps_buffers, MAX_SPS_COUNT, sizeof(SPS)); 01194 h->sps = h1->sps; 01195 copy_parameter_set((void**)h->pps_buffers, (void**)h1->pps_buffers, MAX_PPS_COUNT, sizeof(PPS)); 01196 h->pps = h1->pps; 01197 01198 //Dequantization matrices 01199 //FIXME these are big - can they be only copied when PPS changes? 01200 copy_fields(h, h1, dequant4_buffer, dequant4_coeff); 01201 01202 for(i=0; i<6; i++) 01203 h->dequant4_coeff[i] = h->dequant4_buffer[0] + (h1->dequant4_coeff[i] - h1->dequant4_buffer[0]); 01204 01205 for(i=0; i<6; i++) 01206 h->dequant8_coeff[i] = h->dequant8_buffer[0] + (h1->dequant8_coeff[i] - h1->dequant8_buffer[0]); 01207 01208 h->dequant_coeff_pps = h1->dequant_coeff_pps; 01209 01210 //POC timing 01211 copy_fields(h, h1, poc_lsb, redundant_pic_count); 01212 01213 //reference lists 01214 copy_fields(h, h1, ref_count, list_count); 01215 copy_fields(h, h1, ref_list, intra_gb); 01216 copy_fields(h, h1, short_ref, cabac_init_idc); 01217 01218 copy_picture_range(h->short_ref, h1->short_ref, 32, s, s1); 01219 copy_picture_range(h->long_ref, h1->long_ref, 32, s, s1); 01220 copy_picture_range(h->delayed_pic, h1->delayed_pic, MAX_DELAYED_PIC_COUNT+2, s, s1); 01221 01222 h->last_slice_type = h1->last_slice_type; 01223 01224 if(!s->current_picture_ptr) return 0; 01225 01226 if(!s->dropable) { 01227 ff_h264_execute_ref_pic_marking(h, h->mmco, h->mmco_index); 01228 h->prev_poc_msb = h->poc_msb; 01229 h->prev_poc_lsb = h->poc_lsb; 01230 } 01231 h->prev_frame_num_offset= h->frame_num_offset; 01232 h->prev_frame_num = h->frame_num; 01233 h->outputed_poc = h->next_outputed_poc; 01234 01235 return 0; 01236 } 01237 01238 int ff_h264_frame_start(H264Context *h){ 01239 MpegEncContext * const s = &h->s; 01240 int i; 01241 const int pixel_shift = h->pixel_shift; 01242 int thread_count = (s->avctx->active_thread_type & FF_THREAD_SLICE) ? s->avctx->thread_count : 1; 01243 01244 if(MPV_frame_start(s, s->avctx) < 0) 01245 return -1; 01246 ff_er_frame_start(s); 01247 /* 01248 * MPV_frame_start uses pict_type to derive key_frame. 01249 * This is incorrect for H.264; IDR markings must be used. 01250 * Zero here; IDR markings per slice in frame or fields are ORed in later. 01251 * See decode_nal_units(). 01252 */ 01253 s->current_picture_ptr->key_frame= 0; 01254 s->current_picture_ptr->mmco_reset= 0; 01255 01256 assert(s->linesize && s->uvlinesize); 01257 01258 for(i=0; i<16; i++){ 01259 h->block_offset[i]= (4*((scan8[i] - scan8[0])&7) << pixel_shift) + 4*s->linesize*((scan8[i] - scan8[0])>>3); 01260 h->block_offset[48+i]= (4*((scan8[i] - scan8[0])&7) << pixel_shift) + 8*s->linesize*((scan8[i] - scan8[0])>>3); 01261 } 01262 for(i=0; i<16; i++){ 01263 h->block_offset[16+i]= 01264 h->block_offset[32+i]= (4*((scan8[i] - scan8[0])&7) << pixel_shift) + 4*s->uvlinesize*((scan8[i] - scan8[0])>>3); 01265 h->block_offset[48+16+i]= 01266 h->block_offset[48+32+i]= (4*((scan8[i] - scan8[0])&7) << pixel_shift) + 8*s->uvlinesize*((scan8[i] - scan8[0])>>3); 01267 } 01268 01269 /* can't be in alloc_tables because linesize isn't known there. 01270 * FIXME: redo bipred weight to not require extra buffer? */ 01271 for(i = 0; i < thread_count; i++) 01272 if(h->thread_context[i] && !h->thread_context[i]->s.obmc_scratchpad) 01273 h->thread_context[i]->s.obmc_scratchpad = av_malloc(16*6*s->linesize); 01274 01275 /* some macroblocks can be accessed before they're available in case of lost slices, mbaff or threading*/ 01276 memset(h->slice_table, -1, (s->mb_height*s->mb_stride-1) * sizeof(*h->slice_table)); 01277 01278 // s->decode= (s->flags&CODEC_FLAG_PSNR) || !s->encoding || s->current_picture.reference /*|| h->contains_intra*/ || 1; 01279 01280 // We mark the current picture as non-reference after allocating it, so 01281 // that if we break out due to an error it can be released automatically 01282 // in the next MPV_frame_start(). 01283 // SVQ3 as well as most other codecs have only last/next/current and thus 01284 // get released even with set reference, besides SVQ3 and others do not 01285 // mark frames as reference later "naturally". 01286 if(s->codec_id != CODEC_ID_SVQ3) 01287 s->current_picture_ptr->reference= 0; 01288 01289 s->current_picture_ptr->field_poc[0]= 01290 s->current_picture_ptr->field_poc[1]= INT_MAX; 01291 01292 h->next_output_pic = NULL; 01293 01294 assert(s->current_picture_ptr->long_ref==0); 01295 01296 return 0; 01297 } 01298 01307 static void decode_postinit(H264Context *h, int setup_finished){ 01308 MpegEncContext * const s = &h->s; 01309 Picture *out = s->current_picture_ptr; 01310 Picture *cur = s->current_picture_ptr; 01311 int i, pics, out_of_order, out_idx; 01312 01313 s->current_picture_ptr->qscale_type= FF_QSCALE_TYPE_H264; 01314 s->current_picture_ptr->pict_type= s->pict_type; 01315 01316 if (h->next_output_pic) return; 01317 01318 if (cur->field_poc[0]==INT_MAX || cur->field_poc[1]==INT_MAX) { 01319 //FIXME: if we have two PAFF fields in one packet, we can't start the next thread here. 01320 //If we have one field per packet, we can. The check in decode_nal_units() is not good enough 01321 //to find this yet, so we assume the worst for now. 01322 //if (setup_finished) 01323 // ff_thread_finish_setup(s->avctx); 01324 return; 01325 } 01326 01327 cur->interlaced_frame = 0; 01328 cur->repeat_pict = 0; 01329 01330 /* Signal interlacing information externally. */ 01331 /* Prioritize picture timing SEI information over used decoding process if it exists. */ 01332 01333 if(h->sps.pic_struct_present_flag){ 01334 switch (h->sei_pic_struct) 01335 { 01336 case SEI_PIC_STRUCT_FRAME: 01337 break; 01338 case SEI_PIC_STRUCT_TOP_FIELD: 01339 case SEI_PIC_STRUCT_BOTTOM_FIELD: 01340 cur->interlaced_frame = 1; 01341 break; 01342 case SEI_PIC_STRUCT_TOP_BOTTOM: 01343 case SEI_PIC_STRUCT_BOTTOM_TOP: 01344 if (FIELD_OR_MBAFF_PICTURE) 01345 cur->interlaced_frame = 1; 01346 else 01347 // try to flag soft telecine progressive 01348 cur->interlaced_frame = h->prev_interlaced_frame; 01349 break; 01350 case SEI_PIC_STRUCT_TOP_BOTTOM_TOP: 01351 case SEI_PIC_STRUCT_BOTTOM_TOP_BOTTOM: 01352 // Signal the possibility of telecined film externally (pic_struct 5,6) 01353 // From these hints, let the applications decide if they apply deinterlacing. 01354 cur->repeat_pict = 1; 01355 break; 01356 case SEI_PIC_STRUCT_FRAME_DOUBLING: 01357 // Force progressive here, as doubling interlaced frame is a bad idea. 01358 cur->repeat_pict = 2; 01359 break; 01360 case SEI_PIC_STRUCT_FRAME_TRIPLING: 01361 cur->repeat_pict = 4; 01362 break; 01363 } 01364 01365 if ((h->sei_ct_type & 3) && h->sei_pic_struct <= SEI_PIC_STRUCT_BOTTOM_TOP) 01366 cur->interlaced_frame = (h->sei_ct_type & (1<<1)) != 0; 01367 }else{ 01368 /* Derive interlacing flag from used decoding process. */ 01369 cur->interlaced_frame = FIELD_OR_MBAFF_PICTURE; 01370 } 01371 h->prev_interlaced_frame = cur->interlaced_frame; 01372 01373 if (cur->field_poc[0] != cur->field_poc[1]){ 01374 /* Derive top_field_first from field pocs. */ 01375 cur->top_field_first = cur->field_poc[0] < cur->field_poc[1]; 01376 }else{ 01377 if(cur->interlaced_frame || h->sps.pic_struct_present_flag){ 01378 /* Use picture timing SEI information. Even if it is a information of a past frame, better than nothing. */ 01379 if(h->sei_pic_struct == SEI_PIC_STRUCT_TOP_BOTTOM 01380 || h->sei_pic_struct == SEI_PIC_STRUCT_TOP_BOTTOM_TOP) 01381 cur->top_field_first = 1; 01382 else 01383 cur->top_field_first = 0; 01384 }else{ 01385 /* Most likely progressive */ 01386 cur->top_field_first = 0; 01387 } 01388 } 01389 01390 //FIXME do something with unavailable reference frames 01391 01392 /* Sort B-frames into display order */ 01393 01394 if(h->sps.bitstream_restriction_flag 01395 && s->avctx->has_b_frames < h->sps.num_reorder_frames){ 01396 s->avctx->has_b_frames = h->sps.num_reorder_frames; 01397 s->low_delay = 0; 01398 } 01399 01400 if( s->avctx->strict_std_compliance >= FF_COMPLIANCE_STRICT 01401 && !h->sps.bitstream_restriction_flag){ 01402 s->avctx->has_b_frames= MAX_DELAYED_PIC_COUNT; 01403 s->low_delay= 0; 01404 } 01405 01406 pics = 0; 01407 while(h->delayed_pic[pics]) pics++; 01408 01409 assert(pics <= MAX_DELAYED_PIC_COUNT); 01410 01411 h->delayed_pic[pics++] = cur; 01412 if(cur->reference == 0) 01413 cur->reference = DELAYED_PIC_REF; 01414 01415 out = h->delayed_pic[0]; 01416 out_idx = 0; 01417 for(i=1; h->delayed_pic[i] && !h->delayed_pic[i]->key_frame && !h->delayed_pic[i]->mmco_reset; i++) 01418 if(h->delayed_pic[i]->poc < out->poc){ 01419 out = h->delayed_pic[i]; 01420 out_idx = i; 01421 } 01422 if(s->avctx->has_b_frames == 0 && (h->delayed_pic[0]->key_frame || h->delayed_pic[0]->mmco_reset)) 01423 h->next_outputed_poc= INT_MIN; 01424 out_of_order = out->poc < h->next_outputed_poc; 01425 01426 if(h->sps.bitstream_restriction_flag && s->avctx->has_b_frames >= h->sps.num_reorder_frames) 01427 { } 01428 else if((out_of_order && pics-1 == s->avctx->has_b_frames && s->avctx->has_b_frames < MAX_DELAYED_PIC_COUNT) 01429 || (s->low_delay && 01430 ((h->next_outputed_poc != INT_MIN && out->poc > h->next_outputed_poc + 2) 01431 || cur->pict_type == AV_PICTURE_TYPE_B))) 01432 { 01433 s->low_delay = 0; 01434 s->avctx->has_b_frames++; 01435 } 01436 01437 if(out_of_order || pics > s->avctx->has_b_frames){ 01438 out->reference &= ~DELAYED_PIC_REF; 01439 out->owner2 = s; // for frame threading, the owner must be the second field's thread 01440 // or else the first thread can release the picture and reuse it unsafely 01441 for(i=out_idx; h->delayed_pic[i]; i++) 01442 h->delayed_pic[i] = h->delayed_pic[i+1]; 01443 } 01444 if(!out_of_order && pics > s->avctx->has_b_frames){ 01445 h->next_output_pic = out; 01446 if(out_idx==0 && h->delayed_pic[0] && (h->delayed_pic[0]->key_frame || h->delayed_pic[0]->mmco_reset)) { 01447 h->next_outputed_poc = INT_MIN; 01448 } else 01449 h->next_outputed_poc = out->poc; 01450 }else{ 01451 av_log(s->avctx, AV_LOG_DEBUG, "no picture\n"); 01452 } 01453 01454 if (setup_finished) 01455 ff_thread_finish_setup(s->avctx); 01456 } 01457 01458 static inline void backup_mb_border(H264Context *h, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr, int linesize, int uvlinesize, int chroma444, int simple){ 01459 MpegEncContext * const s = &h->s; 01460 uint8_t *top_border; 01461 int top_idx = 1; 01462 const int pixel_shift = h->pixel_shift; 01463 01464 src_y -= linesize; 01465 src_cb -= uvlinesize; 01466 src_cr -= uvlinesize; 01467 01468 if(!simple && FRAME_MBAFF){ 01469 if(s->mb_y&1){ 01470 if(!MB_MBAFF){ 01471 top_border = h->top_borders[0][s->mb_x]; 01472 AV_COPY128(top_border, src_y + 15*linesize); 01473 if (pixel_shift) 01474 AV_COPY128(top_border+16, src_y+15*linesize+16); 01475 if(simple || !CONFIG_GRAY || !(s->flags&CODEC_FLAG_GRAY)){ 01476 if(chroma444){ 01477 if (pixel_shift){ 01478 AV_COPY128(top_border+32, src_cb + 15*uvlinesize); 01479 AV_COPY128(top_border+48, src_cb + 15*uvlinesize+16); 01480 AV_COPY128(top_border+64, src_cr + 15*uvlinesize); 01481 AV_COPY128(top_border+80, src_cr + 15*uvlinesize+16); 01482 } else { 01483 AV_COPY128(top_border+16, src_cb + 15*uvlinesize); 01484 AV_COPY128(top_border+32, src_cr + 15*uvlinesize); 01485 } 01486 } else { 01487 if (pixel_shift) { 01488 AV_COPY128(top_border+32, src_cb+7*uvlinesize); 01489 AV_COPY128(top_border+48, src_cr+7*uvlinesize); 01490 } else { 01491 AV_COPY64(top_border+16, src_cb+7*uvlinesize); 01492 AV_COPY64(top_border+24, src_cr+7*uvlinesize); 01493 } 01494 } 01495 } 01496 } 01497 }else if(MB_MBAFF){ 01498 top_idx = 0; 01499 }else 01500 return; 01501 } 01502 01503 top_border = h->top_borders[top_idx][s->mb_x]; 01504 // There are two lines saved, the line above the the top macroblock of a pair, 01505 // and the line above the bottom macroblock 01506 AV_COPY128(top_border, src_y + 16*linesize); 01507 if (pixel_shift) 01508 AV_COPY128(top_border+16, src_y+16*linesize+16); 01509 01510 if(simple || !CONFIG_GRAY || !(s->flags&CODEC_FLAG_GRAY)){ 01511 if(chroma444){ 01512 if (pixel_shift){ 01513 AV_COPY128(top_border+32, src_cb + 16*linesize); 01514 AV_COPY128(top_border+48, src_cb + 16*linesize+16); 01515 AV_COPY128(top_border+64, src_cr + 16*linesize); 01516 AV_COPY128(top_border+80, src_cr + 16*linesize+16); 01517 } else { 01518 AV_COPY128(top_border+16, src_cb + 16*linesize); 01519 AV_COPY128(top_border+32, src_cr + 16*linesize); 01520 } 01521 } else { 01522 if (pixel_shift) { 01523 AV_COPY128(top_border+32, src_cb+8*uvlinesize); 01524 AV_COPY128(top_border+48, src_cr+8*uvlinesize); 01525 } else { 01526 AV_COPY64(top_border+16, src_cb+8*uvlinesize); 01527 AV_COPY64(top_border+24, src_cr+8*uvlinesize); 01528 } 01529 } 01530 } 01531 } 01532 01533 static inline void xchg_mb_border(H264Context *h, uint8_t *src_y, 01534 uint8_t *src_cb, uint8_t *src_cr, 01535 int linesize, int uvlinesize, 01536 int xchg, int chroma444, 01537 int simple, int pixel_shift){ 01538 MpegEncContext * const s = &h->s; 01539 int deblock_topleft; 01540 int deblock_top; 01541 int top_idx = 1; 01542 uint8_t *top_border_m1; 01543 uint8_t *top_border; 01544 01545 if(!simple && FRAME_MBAFF){ 01546 if(s->mb_y&1){ 01547 if(!MB_MBAFF) 01548 return; 01549 }else{ 01550 top_idx = MB_MBAFF ? 0 : 1; 01551 } 01552 } 01553 01554 if(h->deblocking_filter == 2) { 01555 deblock_topleft = h->slice_table[h->mb_xy - 1 - s->mb_stride] == h->slice_num; 01556 deblock_top = h->top_type; 01557 } else { 01558 deblock_topleft = (s->mb_x > 0); 01559 deblock_top = (s->mb_y > !!MB_FIELD); 01560 } 01561 01562 src_y -= linesize + 1 + pixel_shift; 01563 src_cb -= uvlinesize + 1 + pixel_shift; 01564 src_cr -= uvlinesize + 1 + pixel_shift; 01565 01566 top_border_m1 = h->top_borders[top_idx][s->mb_x-1]; 01567 top_border = h->top_borders[top_idx][s->mb_x]; 01568 01569 #define XCHG(a,b,xchg)\ 01570 if (pixel_shift) {\ 01571 if (xchg) {\ 01572 AV_SWAP64(b+0,a+0);\ 01573 AV_SWAP64(b+8,a+8);\ 01574 } else {\ 01575 AV_COPY128(b,a); \ 01576 }\ 01577 } else \ 01578 if (xchg) AV_SWAP64(b,a);\ 01579 else AV_COPY64(b,a); 01580 01581 if(deblock_top){ 01582 if(deblock_topleft){ 01583 XCHG(top_border_m1 + (8 << pixel_shift), src_y - (7 << pixel_shift), 1); 01584 } 01585 XCHG(top_border + (0 << pixel_shift), src_y + (1 << pixel_shift), xchg); 01586 XCHG(top_border + (8 << pixel_shift), src_y + (9 << pixel_shift), 1); 01587 if(s->mb_x+1 < s->mb_width){ 01588 XCHG(h->top_borders[top_idx][s->mb_x+1], src_y + (17 << pixel_shift), 1); 01589 } 01590 } 01591 if(simple || !CONFIG_GRAY || !(s->flags&CODEC_FLAG_GRAY)){ 01592 if(chroma444){ 01593 if(deblock_topleft){ 01594 XCHG(top_border_m1 + (24 << pixel_shift), src_cb - (7 << pixel_shift), 1); 01595 XCHG(top_border_m1 + (40 << pixel_shift), src_cr - (7 << pixel_shift), 1); 01596 } 01597 XCHG(top_border + (16 << pixel_shift), src_cb + (1 << pixel_shift), xchg); 01598 XCHG(top_border + (24 << pixel_shift), src_cb + (9 << pixel_shift), 1); 01599 XCHG(top_border + (32 << pixel_shift), src_cr + (1 << pixel_shift), xchg); 01600 XCHG(top_border + (40 << pixel_shift), src_cr + (9 << pixel_shift), 1); 01601 if(s->mb_x+1 < s->mb_width){ 01602 XCHG(h->top_borders[top_idx][s->mb_x+1] + (16 << pixel_shift), src_cb + (17 << pixel_shift), 1); 01603 XCHG(h->top_borders[top_idx][s->mb_x+1] + (32 << pixel_shift), src_cr + (17 << pixel_shift), 1); 01604 } 01605 } else { 01606 if(deblock_top){ 01607 if(deblock_topleft){ 01608 XCHG(top_border_m1 + (16 << pixel_shift), src_cb - (7 << pixel_shift), 1); 01609 XCHG(top_border_m1 + (24 << pixel_shift), src_cr - (7 << pixel_shift), 1); 01610 } 01611 XCHG(top_border + (16 << pixel_shift), src_cb+1+pixel_shift, 1); 01612 XCHG(top_border + (24 << pixel_shift), src_cr+1+pixel_shift, 1); 01613 } 01614 } 01615 } 01616 } 01617 01618 static av_always_inline int dctcoef_get(DCTELEM *mb, int high_bit_depth, int index) { 01619 if (high_bit_depth) { 01620 return AV_RN32A(((int32_t*)mb) + index); 01621 } else 01622 return AV_RN16A(mb + index); 01623 } 01624 01625 static av_always_inline void dctcoef_set(DCTELEM *mb, int high_bit_depth, int index, int value) { 01626 if (high_bit_depth) { 01627 AV_WN32A(((int32_t*)mb) + index, value); 01628 } else 01629 AV_WN16A(mb + index, value); 01630 } 01631 01632 static av_always_inline void hl_decode_mb_predict_luma(H264Context *h, int mb_type, int is_h264, int simple, int transform_bypass, 01633 int pixel_shift, int *block_offset, int linesize, uint8_t *dest_y, int p) 01634 { 01635 MpegEncContext * const s = &h->s; 01636 void (*idct_add)(uint8_t *dst, DCTELEM *block, int stride); 01637 void (*idct_dc_add)(uint8_t *dst, DCTELEM *block, int stride); 01638 int i; 01639 int qscale = p == 0 ? s->qscale : h->chroma_qp[p-1]; 01640 block_offset += 16*p; 01641 if(IS_INTRA4x4(mb_type)){ 01642 if(simple || !s->encoding){ 01643 if(IS_8x8DCT(mb_type)){ 01644 if(transform_bypass){ 01645 idct_dc_add = 01646 idct_add = s->dsp.add_pixels8; 01647 }else{ 01648 idct_dc_add = h->h264dsp.h264_idct8_dc_add; 01649 idct_add = h->h264dsp.h264_idct8_add; 01650 } 01651 for(i=0; i<16; i+=4){ 01652 uint8_t * const ptr= dest_y + block_offset[i]; 01653 const int dir= h->intra4x4_pred_mode_cache[ scan8[i] ]; 01654 if(transform_bypass && h->sps.profile_idc==244 && dir<=1){ 01655 h->hpc.pred8x8l_add[dir](ptr, h->mb + (i*16+p*256 << pixel_shift), linesize); 01656 }else{ 01657 const int nnz = h->non_zero_count_cache[ scan8[i+p*16] ]; 01658 h->hpc.pred8x8l[ dir ](ptr, (h->topleft_samples_available<<i)&0x8000, 01659 (h->topright_samples_available<<i)&0x4000, linesize); 01660 if(nnz){ 01661 if(nnz == 1 && dctcoef_get(h->mb, pixel_shift, i*16+p*256)) 01662 idct_dc_add(ptr, h->mb + (i*16+p*256 << pixel_shift), linesize); 01663 else 01664 idct_add (ptr, h->mb + (i*16+p*256 << pixel_shift), linesize); 01665 } 01666 } 01667 } 01668 }else{ 01669 if(transform_bypass){ 01670 idct_dc_add = 01671 idct_add = s->dsp.add_pixels4; 01672 }else{ 01673 idct_dc_add = h->h264dsp.h264_idct_dc_add; 01674 idct_add = h->h264dsp.h264_idct_add; 01675 } 01676 for(i=0; i<16; i++){ 01677 uint8_t * const ptr= dest_y + block_offset[i]; 01678 const int dir= h->intra4x4_pred_mode_cache[ scan8[i] ]; 01679 01680 if(transform_bypass && h->sps.profile_idc==244 && dir<=1){ 01681 h->hpc.pred4x4_add[dir](ptr, h->mb + (i*16+p*256 << pixel_shift), linesize); 01682 }else{ 01683 uint8_t *topright; 01684 int nnz, tr; 01685 uint64_t tr_high; 01686 if(dir == DIAG_DOWN_LEFT_PRED || dir == VERT_LEFT_PRED){ 01687 const int topright_avail= (h->topright_samples_available<<i)&0x8000; 01688 assert(s->mb_y || linesize <= block_offset[i]); 01689 if(!topright_avail){ 01690 if (pixel_shift) { 01691 tr_high= ((uint16_t*)ptr)[3 - linesize/2]*0x0001000100010001ULL; 01692 topright= (uint8_t*) &tr_high; 01693 } else { 01694 tr= ptr[3 - linesize]*0x01010101; 01695 topright= (uint8_t*) &tr; 01696 } 01697 }else 01698 topright= ptr + (4 << pixel_shift) - linesize; 01699 }else 01700 topright= NULL; 01701 01702 h->hpc.pred4x4[ dir ](ptr, topright, linesize); 01703 nnz = h->non_zero_count_cache[ scan8[i+p*16] ]; 01704 if(nnz){ 01705 if(is_h264){ 01706 if(nnz == 1 && dctcoef_get(h->mb, pixel_shift, i*16+p*256)) 01707 idct_dc_add(ptr, h->mb + (i*16+p*256 << pixel_shift), linesize); 01708 else 01709 idct_add (ptr, h->mb + (i*16+p*256 << pixel_shift), linesize); 01710 }else 01711 ff_svq3_add_idct_c(ptr, h->mb + i*16+p*256, linesize, qscale, 0); 01712 } 01713 } 01714 } 01715 } 01716 } 01717 }else{ 01718 h->hpc.pred16x16[ h->intra16x16_pred_mode ](dest_y , linesize); 01719 if(is_h264){ 01720 if(h->non_zero_count_cache[ scan8[LUMA_DC_BLOCK_INDEX+p] ]){ 01721 if(!transform_bypass) 01722 h->h264dsp.h264_luma_dc_dequant_idct(h->mb+(p*256 << pixel_shift), h->mb_luma_dc[p], h->dequant4_coeff[p][qscale][0]); 01723 else{ 01724 static const uint8_t dc_mapping[16] = { 0*16, 1*16, 4*16, 5*16, 2*16, 3*16, 6*16, 7*16, 01725 8*16, 9*16,12*16,13*16,10*16,11*16,14*16,15*16}; 01726 for(i = 0; i < 16; i++) 01727 dctcoef_set(h->mb+p*256, pixel_shift, dc_mapping[i], dctcoef_get(h->mb_luma_dc[p], pixel_shift, i)); 01728 } 01729 } 01730 }else 01731 ff_svq3_luma_dc_dequant_idct_c(h->mb+p*256, h->mb_luma_dc[p], qscale); 01732 } 01733 } 01734 01735 static av_always_inline void hl_decode_mb_idct_luma(H264Context *h, int mb_type, int is_h264, int simple, int transform_bypass, 01736 int pixel_shift, int *block_offset, int linesize, uint8_t *dest_y, int p) 01737 { 01738 MpegEncContext * const s = &h->s; 01739 void (*idct_add)(uint8_t *dst, DCTELEM *block, int stride); 01740 int i; 01741 block_offset += 16*p; 01742 if(!IS_INTRA4x4(mb_type)){ 01743 if(is_h264){ 01744 if(IS_INTRA16x16(mb_type)){ 01745 if(transform_bypass){ 01746 if(h->sps.profile_idc==244 && (h->intra16x16_pred_mode==VERT_PRED8x8 || h->intra16x16_pred_mode==HOR_PRED8x8)){ 01747 h->hpc.pred16x16_add[h->intra16x16_pred_mode](dest_y, block_offset, h->mb + (p*256 << pixel_shift), linesize); 01748 }else{ 01749 for(i=0; i<16; i++){ 01750 if(h->non_zero_count_cache[ scan8[i+p*16] ] || dctcoef_get(h->mb, pixel_shift, i*16+p*256)) 01751 s->dsp.add_pixels4(dest_y + block_offset[i], h->mb + (i*16+p*256 << pixel_shift), linesize); 01752 } 01753 } 01754 }else{ 01755 h->h264dsp.h264_idct_add16intra(dest_y, block_offset, h->mb + (p*256 << pixel_shift), linesize, h->non_zero_count_cache+p*5*8); 01756 } 01757 }else if(h->cbp&15){ 01758 if(transform_bypass){ 01759 const int di = IS_8x8DCT(mb_type) ? 4 : 1; 01760 idct_add= IS_8x8DCT(mb_type) ? s->dsp.add_pixels8 : s->dsp.add_pixels4; 01761 for(i=0; i<16; i+=di){ 01762 if(h->non_zero_count_cache[ scan8[i+p*16] ]){ 01763 idct_add(dest_y + block_offset[i], h->mb + (i*16+p*256 << pixel_shift), linesize); 01764 } 01765 } 01766 }else{ 01767 if(IS_8x8DCT(mb_type)){ 01768 h->h264dsp.h264_idct8_add4(dest_y, block_offset, h->mb + (p*256 << pixel_shift), linesize, h->non_zero_count_cache+p*5*8); 01769 }else{ 01770 h->h264dsp.h264_idct_add16(dest_y, block_offset, h->mb + (p*256 << pixel_shift), linesize, h->non_zero_count_cache+p*5*8); 01771 } 01772 } 01773 } 01774 }else{ 01775 for(i=0; i<16; i++){ 01776 if(h->non_zero_count_cache[ scan8[i+p*16] ] || h->mb[i*16+p*256]){ //FIXME benchmark weird rule, & below 01777 uint8_t * const ptr= dest_y + block_offset[i]; 01778 ff_svq3_add_idct_c(ptr, h->mb + i*16 + p*256, linesize, s->qscale, IS_INTRA(mb_type) ? 1 : 0); 01779 } 01780 } 01781 } 01782 } 01783 } 01784 01785 static av_always_inline void hl_decode_mb_internal(H264Context *h, int simple, int pixel_shift){ 01786 MpegEncContext * const s = &h->s; 01787 const int mb_x= s->mb_x; 01788 const int mb_y= s->mb_y; 01789 const int mb_xy= h->mb_xy; 01790 const int mb_type= s->current_picture.mb_type[mb_xy]; 01791 uint8_t *dest_y, *dest_cb, *dest_cr; 01792 int linesize, uvlinesize /*dct_offset*/; 01793 int i, j; 01794 int *block_offset = &h->block_offset[0]; 01795 const int transform_bypass = !simple && (s->qscale == 0 && h->sps.transform_bypass); 01796 /* is_h264 should always be true if SVQ3 is disabled. */ 01797 const int is_h264 = !CONFIG_SVQ3_DECODER || simple || s->codec_id == CODEC_ID_H264; 01798 void (*idct_add)(uint8_t *dst, DCTELEM *block, int stride); 01799 01800 dest_y = s->current_picture.data[0] + ((mb_x << pixel_shift) + mb_y * s->linesize ) * 16; 01801 dest_cb = s->current_picture.data[1] + ((mb_x << pixel_shift) + mb_y * s->uvlinesize) * 8; 01802 dest_cr = s->current_picture.data[2] + ((mb_x << pixel_shift) + mb_y * s->uvlinesize) * 8; 01803 01804 s->dsp.prefetch(dest_y + (s->mb_x&3)*4*s->linesize + (64 << pixel_shift), s->linesize, 4); 01805 s->dsp.prefetch(dest_cb + (s->mb_x&7)*s->uvlinesize + (64 << pixel_shift), dest_cr - dest_cb, 2); 01806 01807 h->list_counts[mb_xy]= h->list_count; 01808 01809 if (!simple && MB_FIELD) { 01810 linesize = h->mb_linesize = s->linesize * 2; 01811 uvlinesize = h->mb_uvlinesize = s->uvlinesize * 2; 01812 block_offset = &h->block_offset[48]; 01813 if(mb_y&1){ //FIXME move out of this function? 01814 dest_y -= s->linesize*15; 01815 dest_cb-= s->uvlinesize*7; 01816 dest_cr-= s->uvlinesize*7; 01817 } 01818 if(FRAME_MBAFF) { 01819 int list; 01820 for(list=0; list<h->list_count; list++){ 01821 if(!USES_LIST(mb_type, list)) 01822 continue; 01823 if(IS_16X16(mb_type)){ 01824 int8_t *ref = &h->ref_cache[list][scan8[0]]; 01825 fill_rectangle(ref, 4, 4, 8, (16+*ref)^(s->mb_y&1), 1); 01826 }else{ 01827 for(i=0; i<16; i+=4){ 01828 int ref = h->ref_cache[list][scan8[i]]; 01829 if(ref >= 0) 01830 fill_rectangle(&h->ref_cache[list][scan8[i]], 2, 2, 8, (16+ref)^(s->mb_y&1), 1); 01831 } 01832 } 01833 } 01834 } 01835 } else { 01836 linesize = h->mb_linesize = s->linesize; 01837 uvlinesize = h->mb_uvlinesize = s->uvlinesize; 01838 // dct_offset = s->linesize * 16; 01839 } 01840 01841 if (!simple && IS_INTRA_PCM(mb_type)) { 01842 if (pixel_shift) { 01843 const int bit_depth = h->sps.bit_depth_luma; 01844 int j; 01845 GetBitContext gb; 01846 init_get_bits(&gb, (uint8_t*)h->mb, 384*bit_depth); 01847 01848 for (i = 0; i < 16; i++) { 01849 uint16_t *tmp_y = (uint16_t*)(dest_y + i*linesize); 01850 for (j = 0; j < 16; j++) 01851 tmp_y[j] = get_bits(&gb, bit_depth); 01852 } 01853 if(simple || !CONFIG_GRAY || !(s->flags&CODEC_FLAG_GRAY)){ 01854 if (!h->sps.chroma_format_idc) { 01855 for (i = 0; i < 8; i++) { 01856 uint16_t *tmp_cb = (uint16_t*)(dest_cb + i*uvlinesize); 01857 for (j = 0; j < 8; j++) { 01858 tmp_cb[j] = 1 << (bit_depth - 1); 01859 } 01860 } 01861 for (i = 0; i < 8; i++) { 01862 uint16_t *tmp_cr = (uint16_t*)(dest_cr + i*uvlinesize); 01863 for (j = 0; j < 8; j++) { 01864 tmp_cr[j] = 1 << (bit_depth - 1); 01865 } 01866 } 01867 } else { 01868 for (i = 0; i < 8; i++) { 01869 uint16_t *tmp_cb = (uint16_t*)(dest_cb + i*uvlinesize); 01870 for (j = 0; j < 8; j++) 01871 tmp_cb[j] = get_bits(&gb, bit_depth); 01872 } 01873 for (i = 0; i < 8; i++) { 01874 uint16_t *tmp_cr = (uint16_t*)(dest_cr + i*uvlinesize); 01875 for (j = 0; j < 8; j++) 01876 tmp_cr[j] = get_bits(&gb, bit_depth); 01877 } 01878 } 01879 } 01880 } else { 01881 for (i=0; i<16; i++) { 01882 memcpy(dest_y + i* linesize, h->mb + i*8, 16); 01883 } 01884 if(simple || !CONFIG_GRAY || !(s->flags&CODEC_FLAG_GRAY)){ 01885 if (!h->sps.chroma_format_idc) { 01886 for (i = 0; i < 8; i++) { 01887 memset(dest_cb + i*uvlinesize, 128, 8); 01888 memset(dest_cr + i*uvlinesize, 128, 8); 01889 } 01890 } else { 01891 for (i = 0; i < 8; i++) { 01892 memcpy(dest_cb + i*uvlinesize, h->mb + 128 + i*4, 8); 01893 memcpy(dest_cr + i*uvlinesize, h->mb + 160 + i*4, 8); 01894 } 01895 } 01896 } 01897 } 01898 } else { 01899 if(IS_INTRA(mb_type)){ 01900 if(h->deblocking_filter) 01901 xchg_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, 1, 0, simple, pixel_shift); 01902 01903 if(simple || !CONFIG_GRAY || !(s->flags&CODEC_FLAG_GRAY)){ 01904 h->hpc.pred8x8[ h->chroma_pred_mode ](dest_cb, uvlinesize); 01905 h->hpc.pred8x8[ h->chroma_pred_mode ](dest_cr, uvlinesize); 01906 } 01907 01908 hl_decode_mb_predict_luma(h, mb_type, is_h264, simple, transform_bypass, pixel_shift, block_offset, linesize, dest_y, 0); 01909 01910 if(h->deblocking_filter) 01911 xchg_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, 0, 0, simple, pixel_shift); 01912 }else if(is_h264){ 01913 if (pixel_shift) { 01914 hl_motion_16(h, dest_y, dest_cb, dest_cr, 01915 s->me.qpel_put, s->dsp.put_h264_chroma_pixels_tab, 01916 s->me.qpel_avg, s->dsp.avg_h264_chroma_pixels_tab, 01917 h->h264dsp.weight_h264_pixels_tab, 01918 h->h264dsp.biweight_h264_pixels_tab, 0); 01919 } else 01920 hl_motion_8(h, dest_y, dest_cb, dest_cr, 01921 s->me.qpel_put, s->dsp.put_h264_chroma_pixels_tab, 01922 s->me.qpel_avg, s->dsp.avg_h264_chroma_pixels_tab, 01923 h->h264dsp.weight_h264_pixels_tab, 01924 h->h264dsp.biweight_h264_pixels_tab, 0); 01925 } 01926 01927 hl_decode_mb_idct_luma(h, mb_type, is_h264, simple, transform_bypass, pixel_shift, block_offset, linesize, dest_y, 0); 01928 01929 if((simple || !CONFIG_GRAY || !(s->flags&CODEC_FLAG_GRAY)) && (h->cbp&0x30)){ 01930 uint8_t *dest[2] = {dest_cb, dest_cr}; 01931 if(transform_bypass){ 01932 if(IS_INTRA(mb_type) && h->sps.profile_idc==244 && (h->chroma_pred_mode==VERT_PRED8x8 || h->chroma_pred_mode==HOR_PRED8x8)){ 01933 h->hpc.pred8x8_add[h->chroma_pred_mode](dest[0], block_offset + 16, h->mb + (16*16*1 << pixel_shift), uvlinesize); 01934 h->hpc.pred8x8_add[h->chroma_pred_mode](dest[1], block_offset + 32, h->mb + (16*16*2 << pixel_shift), uvlinesize); 01935 }else{ 01936 idct_add = s->dsp.add_pixels4; 01937 for(j=1; j<3; j++){ 01938 for(i=j*16; i<j*16+4; i++){ 01939 if(h->non_zero_count_cache[ scan8[i] ] || dctcoef_get(h->mb, pixel_shift, i*16)) 01940 idct_add (dest[j-1] + block_offset[i], h->mb + (i*16 << pixel_shift), uvlinesize); 01941 } 01942 } 01943 } 01944 }else{ 01945 if(is_h264){ 01946 if(h->non_zero_count_cache[ scan8[CHROMA_DC_BLOCK_INDEX+0] ]) 01947 h->h264dsp.h264_chroma_dc_dequant_idct(h->mb + (16*16*1 << pixel_shift), h->dequant4_coeff[IS_INTRA(mb_type) ? 1:4][h->chroma_qp[0]][0]); 01948 if(h->non_zero_count_cache[ scan8[CHROMA_DC_BLOCK_INDEX+1] ]) 01949 h->h264dsp.h264_chroma_dc_dequant_idct(h->mb + (16*16*2 << pixel_shift), h->dequant4_coeff[IS_INTRA(mb_type) ? 2:5][h->chroma_qp[1]][0]); 01950 h->h264dsp.h264_idct_add8(dest, block_offset, 01951 h->mb, uvlinesize, 01952 h->non_zero_count_cache); 01953 }else{ 01954 h->h264dsp.h264_chroma_dc_dequant_idct(h->mb + 16*16*1, h->dequant4_coeff[IS_INTRA(mb_type) ? 1:4][h->chroma_qp[0]][0]); 01955 h->h264dsp.h264_chroma_dc_dequant_idct(h->mb + 16*16*2, h->dequant4_coeff[IS_INTRA(mb_type) ? 2:5][h->chroma_qp[1]][0]); 01956 for(j=1; j<3; j++){ 01957 for(i=j*16; i<j*16+4; i++){ 01958 if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16]){ 01959 uint8_t * const ptr= dest[j-1] + block_offset[i]; 01960 ff_svq3_add_idct_c(ptr, h->mb + i*16, uvlinesize, ff_h264_chroma_qp[0][s->qscale + 12] - 12, 2); 01961 } 01962 } 01963 } 01964 } 01965 } 01966 } 01967 } 01968 if(h->cbp || IS_INTRA(mb_type)) 01969 { 01970 s->dsp.clear_blocks(h->mb); 01971 s->dsp.clear_blocks(h->mb+(24*16<<pixel_shift)); 01972 } 01973 } 01974 01975 static av_always_inline void hl_decode_mb_444_internal(H264Context *h, int simple, int pixel_shift){ 01976 MpegEncContext * const s = &h->s; 01977 const int mb_x= s->mb_x; 01978 const int mb_y= s->mb_y; 01979 const int mb_xy= h->mb_xy; 01980 const int mb_type= s->current_picture.mb_type[mb_xy]; 01981 uint8_t *dest[3]; 01982 int linesize; 01983 int i, j, p; 01984 int *block_offset = &h->block_offset[0]; 01985 const int transform_bypass = !simple && (s->qscale == 0 && h->sps.transform_bypass); 01986 const int plane_count = (simple || !CONFIG_GRAY || !(s->flags&CODEC_FLAG_GRAY)) ? 3 : 1; 01987 01988 for (p = 0; p < plane_count; p++) 01989 { 01990 dest[p] = s->current_picture.data[p] + ((mb_x << pixel_shift) + mb_y * s->linesize) * 16; 01991 s->dsp.prefetch(dest[p] + (s->mb_x&3)*4*s->linesize + (64 << pixel_shift), s->linesize, 4); 01992 } 01993 01994 h->list_counts[mb_xy]= h->list_count; 01995 01996 if (!simple && MB_FIELD) { 01997 linesize = h->mb_linesize = h->mb_uvlinesize = s->linesize * 2; 01998 block_offset = &h->block_offset[48]; 01999 if(mb_y&1) //FIXME move out of this function? 02000 for (p = 0; p < 3; p++) 02001 dest[p] -= s->linesize*15; 02002 if(FRAME_MBAFF) { 02003 int list; 02004 for(list=0; list<h->list_count; list++){ 02005 if(!USES_LIST(mb_type, list)) 02006 continue; 02007 if(IS_16X16(mb_type)){ 02008 int8_t *ref = &h->ref_cache[list][scan8[0]]; 02009 fill_rectangle(ref, 4, 4, 8, (16+*ref)^(s->mb_y&1), 1); 02010 }else{ 02011 for(i=0; i<16; i+=4){ 02012 int ref = h->ref_cache[list][scan8[i]]; 02013 if(ref >= 0) 02014 fill_rectangle(&h->ref_cache[list][scan8[i]], 2, 2, 8, (16+ref)^(s->mb_y&1), 1); 02015 } 02016 } 02017 } 02018 } 02019 } else { 02020 linesize = h->mb_linesize = h->mb_uvlinesize = s->linesize; 02021 } 02022 02023 if (!simple && IS_INTRA_PCM(mb_type)) { 02024 if (pixel_shift) { 02025 const int bit_depth = h->sps.bit_depth_luma; 02026 GetBitContext gb; 02027 init_get_bits(&gb, (uint8_t*)h->mb, 768*bit_depth); 02028 02029 for (p = 0; p < plane_count; p++) { 02030 for (i = 0; i < 16; i++) { 02031 uint16_t *tmp = (uint16_t*)(dest[p] + i*linesize); 02032 for (j = 0; j < 16; j++) 02033 tmp[j] = get_bits(&gb, bit_depth); 02034 } 02035 } 02036 } else { 02037 for (p = 0; p < plane_count; p++) { 02038 for (i = 0; i < 16; i++) { 02039 memcpy(dest[p] + i*linesize, h->mb + p*128 + i*8, 16); 02040 } 02041 } 02042 } 02043 } else { 02044 if(IS_INTRA(mb_type)){ 02045 if(h->deblocking_filter) 02046 xchg_mb_border(h, dest[0], dest[1], dest[2], linesize, linesize, 1, 1, simple, pixel_shift); 02047 02048 for (p = 0; p < plane_count; p++) 02049 hl_decode_mb_predict_luma(h, mb_type, 1, simple, transform_bypass, pixel_shift, block_offset, linesize, dest[p], p); 02050 02051 if(h->deblocking_filter) 02052 xchg_mb_border(h, dest[0], dest[1], dest[2], linesize, linesize, 0, 1, simple, pixel_shift); 02053 }else{ 02054 if (pixel_shift) { 02055 hl_motion_16(h, dest[0], dest[1], dest[2], 02056 s->me.qpel_put, s->dsp.put_h264_chroma_pixels_tab, 02057 s->me.qpel_avg, s->dsp.avg_h264_chroma_pixels_tab, 02058 h->h264dsp.weight_h264_pixels_tab, 02059 h->h264dsp.biweight_h264_pixels_tab, 1); 02060 } else 02061 hl_motion_8(h, dest[0], dest[1], dest[2], 02062 s->me.qpel_put, s->dsp.put_h264_chroma_pixels_tab, 02063 s->me.qpel_avg, s->dsp.avg_h264_chroma_pixels_tab, 02064 h->h264dsp.weight_h264_pixels_tab, 02065 h->h264dsp.biweight_h264_pixels_tab, 1); 02066 } 02067 02068 for (p = 0; p < plane_count; p++) 02069 hl_decode_mb_idct_luma(h, mb_type, 1, simple, transform_bypass, pixel_shift, block_offset, linesize, dest[p], p); 02070 } 02071 if(h->cbp || IS_INTRA(mb_type)) 02072 { 02073 s->dsp.clear_blocks(h->mb); 02074 s->dsp.clear_blocks(h->mb+(24*16<<pixel_shift)); 02075 } 02076 } 02077 02081 #define hl_decode_mb_simple(sh, bits) \ 02082 static void hl_decode_mb_simple_ ## bits(H264Context *h){ \ 02083 hl_decode_mb_internal(h, 1, sh); \ 02084 } 02085 hl_decode_mb_simple(0, 8); 02086 hl_decode_mb_simple(1, 16); 02087 02091 static void av_noinline hl_decode_mb_complex(H264Context *h){ 02092 hl_decode_mb_internal(h, 0, h->pixel_shift); 02093 } 02094 02095 static void av_noinline hl_decode_mb_444_complex(H264Context *h){ 02096 hl_decode_mb_444_internal(h, 0, h->pixel_shift); 02097 } 02098 02099 static void av_noinline hl_decode_mb_444_simple(H264Context *h){ 02100 hl_decode_mb_444_internal(h, 1, 0); 02101 } 02102 02103 void ff_h264_hl_decode_mb(H264Context *h){ 02104 MpegEncContext * const s = &h->s; 02105 const int mb_xy= h->mb_xy; 02106 const int mb_type= s->current_picture.mb_type[mb_xy]; 02107 int is_complex = CONFIG_SMALL || h->is_complex || IS_INTRA_PCM(mb_type) || s->qscale == 0; 02108 02109 if (CHROMA444) { 02110 if(is_complex || h->pixel_shift) 02111 hl_decode_mb_444_complex(h); 02112 else 02113 hl_decode_mb_444_simple(h); 02114 } else if (is_complex) { 02115 hl_decode_mb_complex(h); 02116 } else if (h->pixel_shift) { 02117 hl_decode_mb_simple_16(h); 02118 } else 02119 hl_decode_mb_simple_8(h); 02120 } 02121 02122 static int pred_weight_table(H264Context *h){ 02123 MpegEncContext * const s = &h->s; 02124 int list, i; 02125 int luma_def, chroma_def; 02126 02127 h->use_weight= 0; 02128 h->use_weight_chroma= 0; 02129 h->luma_log2_weight_denom= get_ue_golomb(&s->gb); 02130 if(h->sps.chroma_format_idc) 02131 h->chroma_log2_weight_denom= get_ue_golomb(&s->gb); 02132 luma_def = 1<<h->luma_log2_weight_denom; 02133 chroma_def = 1<<h->chroma_log2_weight_denom; 02134 02135 for(list=0; list<2; list++){ 02136 h->luma_weight_flag[list] = 0; 02137 h->chroma_weight_flag[list] = 0; 02138 for(i=0; i<h->ref_count[list]; i++){ 02139 int luma_weight_flag, chroma_weight_flag; 02140 02141 luma_weight_flag= get_bits1(&s->gb); 02142 if(luma_weight_flag){ 02143 h->luma_weight[i][list][0]= get_se_golomb(&s->gb); 02144 h->luma_weight[i][list][1]= get_se_golomb(&s->gb); 02145 if( h->luma_weight[i][list][0] != luma_def 02146 || h->luma_weight[i][list][1] != 0) { 02147 h->use_weight= 1; 02148 h->luma_weight_flag[list]= 1; 02149 } 02150 }else{ 02151 h->luma_weight[i][list][0]= luma_def; 02152 h->luma_weight[i][list][1]= 0; 02153 } 02154 02155 if(h->sps.chroma_format_idc){ 02156 chroma_weight_flag= get_bits1(&s->gb); 02157 if(chroma_weight_flag){ 02158 int j; 02159 for(j=0; j<2; j++){ 02160 h->chroma_weight[i][list][j][0]= get_se_golomb(&s->gb); 02161 h->chroma_weight[i][list][j][1]= get_se_golomb(&s->gb); 02162 if( h->chroma_weight[i][list][j][0] != chroma_def 02163 || h->chroma_weight[i][list][j][1] != 0) { 02164 h->use_weight_chroma= 1; 02165 h->chroma_weight_flag[list]= 1; 02166 } 02167 } 02168 }else{ 02169 int j; 02170 for(j=0; j<2; j++){ 02171 h->chroma_weight[i][list][j][0]= chroma_def; 02172 h->chroma_weight[i][list][j][1]= 0; 02173 } 02174 } 02175 } 02176 } 02177 if(h->slice_type_nos != AV_PICTURE_TYPE_B) break; 02178 } 02179 h->use_weight= h->use_weight || h->use_weight_chroma; 02180 return 0; 02181 } 02182 02188 static void implicit_weight_table(H264Context *h, int field){ 02189 MpegEncContext * const s = &h->s; 02190 int ref0, ref1, i, cur_poc, ref_start, ref_count0, ref_count1; 02191 02192 for (i = 0; i < 2; i++) { 02193 h->luma_weight_flag[i] = 0; 02194 h->chroma_weight_flag[i] = 0; 02195 } 02196 02197 if(field < 0){ 02198 cur_poc = s->current_picture_ptr->poc; 02199 if( h->ref_count[0] == 1 && h->ref_count[1] == 1 && !FRAME_MBAFF 02200 && h->ref_list[0][0].poc + h->ref_list[1][0].poc == 2*cur_poc){ 02201 h->use_weight= 0; 02202 h->use_weight_chroma= 0; 02203 return; 02204 } 02205 ref_start= 0; 02206 ref_count0= h->ref_count[0]; 02207 ref_count1= h->ref_count[1]; 02208 }else{ 02209 cur_poc = s->current_picture_ptr->field_poc[field]; 02210 ref_start= 16; 02211 ref_count0= 16+2*h->ref_count[0]; 02212 ref_count1= 16+2*h->ref_count[1]; 02213 } 02214 02215 h->use_weight= 2; 02216 h->use_weight_chroma= 2; 02217 h->luma_log2_weight_denom= 5; 02218 h->chroma_log2_weight_denom= 5; 02219 02220 for(ref0=ref_start; ref0 < ref_count0; ref0++){ 02221 int poc0 = h->ref_list[0][ref0].poc; 02222 for(ref1=ref_start; ref1 < ref_count1; ref1++){ 02223 int w = 32; 02224 if (!h->ref_list[0][ref0].long_ref && !h->ref_list[1][ref1].long_ref) { 02225 int poc1 = h->ref_list[1][ref1].poc; 02226 int td = av_clip(poc1 - poc0, -128, 127); 02227 if(td){ 02228 int tb = av_clip(cur_poc - poc0, -128, 127); 02229 int tx = (16384 + (FFABS(td) >> 1)) / td; 02230 int dist_scale_factor = (tb*tx + 32) >> 8; 02231 if(dist_scale_factor >= -64 && dist_scale_factor <= 128) 02232 w = 64 - dist_scale_factor; 02233 } 02234 } 02235 if(field<0){ 02236 h->implicit_weight[ref0][ref1][0]= 02237 h->implicit_weight[ref0][ref1][1]= w; 02238 }else{ 02239 h->implicit_weight[ref0][ref1][field]=w; 02240 } 02241 } 02242 } 02243 } 02244 02248 static void idr(H264Context *h){ 02249 ff_h264_remove_all_refs(h); 02250 h->prev_frame_num= 0; 02251 h->prev_frame_num_offset= 0; 02252 h->prev_poc_msb= 02253 h->prev_poc_lsb= 0; 02254 } 02255 02256 /* forget old pics after a seek */ 02257 static void flush_dpb(AVCodecContext *avctx){ 02258 H264Context *h= avctx->priv_data; 02259 int i; 02260 for(i=0; i<MAX_DELAYED_PIC_COUNT; i++) { 02261 if(h->delayed_pic[i]) 02262 h->delayed_pic[i]->reference= 0; 02263 h->delayed_pic[i]= NULL; 02264 } 02265 h->outputed_poc=h->next_outputed_poc= INT_MIN; 02266 h->prev_interlaced_frame = 1; 02267 idr(h); 02268 if(h->s.current_picture_ptr) 02269 h->s.current_picture_ptr->reference= 0; 02270 h->s.first_field= 0; 02271 ff_h264_reset_sei(h); 02272 ff_mpeg_flush(avctx); 02273 } 02274 02275 static int init_poc(H264Context *h){ 02276 MpegEncContext * const s = &h->s; 02277 const int max_frame_num= 1<<h->sps.log2_max_frame_num; 02278 int field_poc[2]; 02279 Picture *cur = s->current_picture_ptr; 02280 02281 h->frame_num_offset= h->prev_frame_num_offset; 02282 if(h->frame_num < h->prev_frame_num) 02283 h->frame_num_offset += max_frame_num; 02284 02285 if(h->sps.poc_type==0){ 02286 const int max_poc_lsb= 1<<h->sps.log2_max_poc_lsb; 02287 02288 if (h->poc_lsb < h->prev_poc_lsb && h->prev_poc_lsb - h->poc_lsb >= max_poc_lsb/2) 02289 h->poc_msb = h->prev_poc_msb + max_poc_lsb; 02290 else if(h->poc_lsb > h->prev_poc_lsb && h->prev_poc_lsb - h->poc_lsb < -max_poc_lsb/2) 02291 h->poc_msb = h->prev_poc_msb - max_poc_lsb; 02292 else 02293 h->poc_msb = h->prev_poc_msb; 02294 //printf("poc: %d %d\n", h->poc_msb, h->poc_lsb); 02295 field_poc[0] = 02296 field_poc[1] = h->poc_msb + h->poc_lsb; 02297 if(s->picture_structure == PICT_FRAME) 02298 field_poc[1] += h->delta_poc_bottom; 02299 }else if(h->sps.poc_type==1){ 02300 int abs_frame_num, expected_delta_per_poc_cycle, expectedpoc; 02301 int i; 02302 02303 if(h->sps.poc_cycle_length != 0) 02304 abs_frame_num = h->frame_num_offset + h->frame_num; 02305 else 02306 abs_frame_num = 0; 02307 02308 if(h->nal_ref_idc==0 && abs_frame_num > 0) 02309 abs_frame_num--; 02310 02311 expected_delta_per_poc_cycle = 0; 02312 for(i=0; i < h->sps.poc_cycle_length; i++) 02313 expected_delta_per_poc_cycle += h->sps.offset_for_ref_frame[ i ]; //FIXME integrate during sps parse 02314 02315 if(abs_frame_num > 0){ 02316 int poc_cycle_cnt = (abs_frame_num - 1) / h->sps.poc_cycle_length; 02317 int frame_num_in_poc_cycle = (abs_frame_num - 1) % h->sps.poc_cycle_length; 02318 02319 expectedpoc = poc_cycle_cnt * expected_delta_per_poc_cycle; 02320 for(i = 0; i <= frame_num_in_poc_cycle; i++) 02321 expectedpoc = expectedpoc + h->sps.offset_for_ref_frame[ i ]; 02322 } else 02323 expectedpoc = 0; 02324 02325 if(h->nal_ref_idc == 0) 02326 expectedpoc = expectedpoc + h->sps.offset_for_non_ref_pic; 02327 02328 field_poc[0] = expectedpoc + h->delta_poc[0]; 02329 field_poc[1] = field_poc[0] + h->sps.offset_for_top_to_bottom_field; 02330 02331 if(s->picture_structure == PICT_FRAME) 02332 field_poc[1] += h->delta_poc[1]; 02333 }else{ 02334 int poc= 2*(h->frame_num_offset + h->frame_num); 02335 02336 if(!h->nal_ref_idc) 02337 poc--; 02338 02339 field_poc[0]= poc; 02340 field_poc[1]= poc; 02341 } 02342 02343 if(s->picture_structure != PICT_BOTTOM_FIELD) 02344 s->current_picture_ptr->field_poc[0]= field_poc[0]; 02345 if(s->picture_structure != PICT_TOP_FIELD) 02346 s->current_picture_ptr->field_poc[1]= field_poc[1]; 02347 cur->poc= FFMIN(cur->field_poc[0], cur->field_poc[1]); 02348 02349 return 0; 02350 } 02351 02352 02356 static void init_scan_tables(H264Context *h){ 02357 int i; 02358 for(i=0; i<16; i++){ 02359 #define T(x) (x>>2) | ((x<<2) & 0xF) 02360 h->zigzag_scan[i] = T(zigzag_scan[i]); 02361 h-> field_scan[i] = T( field_scan[i]); 02362 #undef T 02363 } 02364 for(i=0; i<64; i++){ 02365 #define T(x) (x>>3) | ((x&7)<<3) 02366 h->zigzag_scan8x8[i] = T(ff_zigzag_direct[i]); 02367 h->zigzag_scan8x8_cavlc[i] = T(zigzag_scan8x8_cavlc[i]); 02368 h->field_scan8x8[i] = T(field_scan8x8[i]); 02369 h->field_scan8x8_cavlc[i] = T(field_scan8x8_cavlc[i]); 02370 #undef T 02371 } 02372 if(h->sps.transform_bypass){ //FIXME same ugly 02373 h->zigzag_scan_q0 = zigzag_scan; 02374 h->zigzag_scan8x8_q0 = ff_zigzag_direct; 02375 h->zigzag_scan8x8_cavlc_q0 = zigzag_scan8x8_cavlc; 02376 h->field_scan_q0 = field_scan; 02377 h->field_scan8x8_q0 = field_scan8x8; 02378 h->field_scan8x8_cavlc_q0 = field_scan8x8_cavlc; 02379 }else{ 02380 h->zigzag_scan_q0 = h->zigzag_scan; 02381 h->zigzag_scan8x8_q0 = h->zigzag_scan8x8; 02382 h->zigzag_scan8x8_cavlc_q0 = h->zigzag_scan8x8_cavlc; 02383 h->field_scan_q0 = h->field_scan; 02384 h->field_scan8x8_q0 = h->field_scan8x8; 02385 h->field_scan8x8_cavlc_q0 = h->field_scan8x8_cavlc; 02386 } 02387 } 02388 02389 static void field_end(H264Context *h, int in_setup){ 02390 MpegEncContext * const s = &h->s; 02391 AVCodecContext * const avctx= s->avctx; 02392 s->mb_y= 0; 02393 02394 if (!in_setup && !s->dropable) 02395 ff_thread_report_progress((AVFrame*)s->current_picture_ptr, (16*s->mb_height >> FIELD_PICTURE) - 1, 02396 s->picture_structure==PICT_BOTTOM_FIELD); 02397 02398 if (CONFIG_H264_VDPAU_DECODER && s->avctx->codec->capabilities&CODEC_CAP_HWACCEL_VDPAU) 02399 ff_vdpau_h264_set_reference_frames(s); 02400 02401 if(in_setup || !(avctx->active_thread_type&FF_THREAD_FRAME)){ 02402 if(!s->dropable) { 02403 ff_h264_execute_ref_pic_marking(h, h->mmco, h->mmco_index); 02404 h->prev_poc_msb= h->poc_msb; 02405 h->prev_poc_lsb= h->poc_lsb; 02406 } 02407 h->prev_frame_num_offset= h->frame_num_offset; 02408 h->prev_frame_num= h->frame_num; 02409 h->outputed_poc = h->next_outputed_poc; 02410 } 02411 02412 if (avctx->hwaccel) { 02413 if (avctx->hwaccel->end_frame(avctx) < 0) 02414 av_log(avctx, AV_LOG_ERROR, "hardware accelerator failed to decode picture\n"); 02415 } 02416 02417 if (CONFIG_H264_VDPAU_DECODER && s->avctx->codec->capabilities&CODEC_CAP_HWACCEL_VDPAU) 02418 ff_vdpau_h264_picture_complete(s); 02419 02420 /* 02421 * FIXME: Error handling code does not seem to support interlaced 02422 * when slices span multiple rows 02423 * The ff_er_add_slice calls don't work right for bottom 02424 * fields; they cause massive erroneous error concealing 02425 * Error marking covers both fields (top and bottom). 02426 * This causes a mismatched s->error_count 02427 * and a bad error table. Further, the error count goes to 02428 * INT_MAX when called for bottom field, because mb_y is 02429 * past end by one (callers fault) and resync_mb_y != 0 02430 * causes problems for the first MB line, too. 02431 */ 02432 if (!FIELD_PICTURE) 02433 ff_er_frame_end(s); 02434 02435 MPV_frame_end(s); 02436 02437 h->current_slice=0; 02438 } 02439 02443 static void clone_slice(H264Context *dst, H264Context *src) 02444 { 02445 memcpy(dst->block_offset, src->block_offset, sizeof(dst->block_offset)); 02446 dst->s.current_picture_ptr = src->s.current_picture_ptr; 02447 dst->s.current_picture = src->s.current_picture; 02448 dst->s.linesize = src->s.linesize; 02449 dst->s.uvlinesize = src->s.uvlinesize; 02450 dst->s.first_field = src->s.first_field; 02451 02452 dst->prev_poc_msb = src->prev_poc_msb; 02453 dst->prev_poc_lsb = src->prev_poc_lsb; 02454 dst->prev_frame_num_offset = src->prev_frame_num_offset; 02455 dst->prev_frame_num = src->prev_frame_num; 02456 dst->short_ref_count = src->short_ref_count; 02457 02458 memcpy(dst->short_ref, src->short_ref, sizeof(dst->short_ref)); 02459 memcpy(dst->long_ref, src->long_ref, sizeof(dst->long_ref)); 02460 memcpy(dst->default_ref_list, src->default_ref_list, sizeof(dst->default_ref_list)); 02461 memcpy(dst->ref_list, src->ref_list, sizeof(dst->ref_list)); 02462 02463 memcpy(dst->dequant4_coeff, src->dequant4_coeff, sizeof(src->dequant4_coeff)); 02464 memcpy(dst->dequant8_coeff, src->dequant8_coeff, sizeof(src->dequant8_coeff)); 02465 } 02466 02474 int ff_h264_get_profile(SPS *sps) 02475 { 02476 int profile = sps->profile_idc; 02477 02478 switch(sps->profile_idc) { 02479 case FF_PROFILE_H264_BASELINE: 02480 // constraint_set1_flag set to 1 02481 profile |= (sps->constraint_set_flags & 1<<1) ? FF_PROFILE_H264_CONSTRAINED : 0; 02482 break; 02483 case FF_PROFILE_H264_HIGH_10: 02484 case FF_PROFILE_H264_HIGH_422: 02485 case FF_PROFILE_H264_HIGH_444_PREDICTIVE: 02486 // constraint_set3_flag set to 1 02487 profile |= (sps->constraint_set_flags & 1<<3) ? FF_PROFILE_H264_INTRA : 0; 02488 break; 02489 } 02490 02491 return profile; 02492 } 02493 02503 static int decode_slice_header(H264Context *h, H264Context *h0){ 02504 MpegEncContext * const s = &h->s; 02505 MpegEncContext * const s0 = &h0->s; 02506 unsigned int first_mb_in_slice; 02507 unsigned int pps_id; 02508 int num_ref_idx_active_override_flag; 02509 unsigned int slice_type, tmp, i, j; 02510 int default_ref_list_done = 0; 02511 int last_pic_structure; 02512 02513 s->dropable= h->nal_ref_idc == 0; 02514 02515 /* FIXME: 2tap qpel isn't implemented for high bit depth. */ 02516 if((s->avctx->flags2 & CODEC_FLAG2_FAST) && !h->nal_ref_idc && !h->pixel_shift){ 02517 s->me.qpel_put= s->dsp.put_2tap_qpel_pixels_tab; 02518 s->me.qpel_avg= s->dsp.avg_2tap_qpel_pixels_tab; 02519 }else{ 02520 s->me.qpel_put= s->dsp.put_h264_qpel_pixels_tab; 02521 s->me.qpel_avg= s->dsp.avg_h264_qpel_pixels_tab; 02522 } 02523 02524 first_mb_in_slice= get_ue_golomb(&s->gb); 02525 02526 if(first_mb_in_slice == 0){ //FIXME better field boundary detection 02527 if(h0->current_slice && FIELD_PICTURE){ 02528 field_end(h, 1); 02529 } 02530 02531 h0->current_slice = 0; 02532 if (!s0->first_field) 02533 s->current_picture_ptr= NULL; 02534 } 02535 02536 slice_type= get_ue_golomb_31(&s->gb); 02537 if(slice_type > 9){ 02538 av_log(h->s.avctx, AV_LOG_ERROR, "slice type too large (%d) at %d %d\n", h->slice_type, s->mb_x, s->mb_y); 02539 return -1; 02540 } 02541 if(slice_type > 4){ 02542 slice_type -= 5; 02543 h->slice_type_fixed=1; 02544 }else 02545 h->slice_type_fixed=0; 02546 02547 slice_type= golomb_to_pict_type[ slice_type ]; 02548 if (slice_type == AV_PICTURE_TYPE_I 02549 || (h0->current_slice != 0 && slice_type == h0->last_slice_type) ) { 02550 default_ref_list_done = 1; 02551 } 02552 h->slice_type= slice_type; 02553 h->slice_type_nos= slice_type & 3; 02554 02555 s->pict_type= h->slice_type; // to make a few old functions happy, it's wrong though 02556 02557 pps_id= get_ue_golomb(&s->gb); 02558 if(pps_id>=MAX_PPS_COUNT){ 02559 av_log(h->s.avctx, AV_LOG_ERROR, "pps_id out of range\n"); 02560 return -1; 02561 } 02562 if(!h0->pps_buffers[pps_id]) { 02563 av_log(h->s.avctx, AV_LOG_ERROR, "non-existing PPS %u referenced\n", pps_id); 02564 return -1; 02565 } 02566 h->pps= *h0->pps_buffers[pps_id]; 02567 02568 if(!h0->sps_buffers[h->pps.sps_id]) { 02569 av_log(h->s.avctx, AV_LOG_ERROR, "non-existing SPS %u referenced\n", h->pps.sps_id); 02570 return -1; 02571 } 02572 h->sps = *h0->sps_buffers[h->pps.sps_id]; 02573 02574 s->avctx->profile = ff_h264_get_profile(&h->sps); 02575 s->avctx->level = h->sps.level_idc; 02576 s->avctx->refs = h->sps.ref_frame_count; 02577 02578 if(h == h0 && h->dequant_coeff_pps != pps_id){ 02579 h->dequant_coeff_pps = pps_id; 02580 init_dequant_tables(h); 02581 } 02582 02583 s->mb_width= h->sps.mb_width; 02584 s->mb_height= h->sps.mb_height * (2 - h->sps.frame_mbs_only_flag); 02585 02586 h->b_stride= s->mb_width*4; 02587 02588 s->width = 16*s->mb_width - (2>>CHROMA444)*FFMIN(h->sps.crop_right, (8<<CHROMA444)-1); 02589 if(h->sps.frame_mbs_only_flag) 02590 s->height= 16*s->mb_height - (2>>CHROMA444)*FFMIN(h->sps.crop_bottom, (8<<CHROMA444)-1); 02591 else 02592 s->height= 16*s->mb_height - (4>>CHROMA444)*FFMIN(h->sps.crop_bottom, (8<<CHROMA444)-1); 02593 02594 if (s->context_initialized 02595 && ( s->width != s->avctx->width || s->height != s->avctx->height 02596 || av_cmp_q(h->sps.sar, s->avctx->sample_aspect_ratio))) { 02597 if(h != h0) { 02598 av_log_missing_feature(s->avctx, "Width/height changing with threads is", 0); 02599 return -1; // width / height changed during parallelized decoding 02600 } 02601 free_tables(h, 0); 02602 flush_dpb(s->avctx); 02603 MPV_common_end(s); 02604 } 02605 if (!s->context_initialized) { 02606 if (h != h0) { 02607 av_log(h->s.avctx, AV_LOG_ERROR, "Cannot (re-)initialize context during parallel decoding.\n"); 02608 return -1; 02609 } 02610 02611 avcodec_set_dimensions(s->avctx, s->width, s->height); 02612 s->avctx->sample_aspect_ratio= h->sps.sar; 02613 av_assert0(s->avctx->sample_aspect_ratio.den); 02614 02615 h->s.avctx->coded_width = 16*s->mb_width; 02616 h->s.avctx->coded_height = 16*s->mb_height; 02617 02618 if(h->sps.video_signal_type_present_flag){ 02619 s->avctx->color_range = h->sps.full_range ? AVCOL_RANGE_JPEG : AVCOL_RANGE_MPEG; 02620 if(h->sps.colour_description_present_flag){ 02621 s->avctx->color_primaries = h->sps.color_primaries; 02622 s->avctx->color_trc = h->sps.color_trc; 02623 s->avctx->colorspace = h->sps.colorspace; 02624 } 02625 } 02626 02627 if(h->sps.timing_info_present_flag){ 02628 int64_t den= h->sps.time_scale; 02629 if(h->x264_build < 44U) 02630 den *= 2; 02631 av_reduce(&s->avctx->time_base.num, &s->avctx->time_base.den, 02632 h->sps.num_units_in_tick, den, 1<<30); 02633 } 02634 02635 switch (h->sps.bit_depth_luma) { 02636 case 9 : 02637 s->avctx->pix_fmt = CHROMA444 ? PIX_FMT_YUV444P9 : PIX_FMT_YUV420P9; 02638 break; 02639 case 10 : 02640 s->avctx->pix_fmt = CHROMA444 ? PIX_FMT_YUV444P10 : PIX_FMT_YUV420P10; 02641 break; 02642 default: 02643 if (CHROMA444){ 02644 s->avctx->pix_fmt = s->avctx->color_range == AVCOL_RANGE_JPEG ? PIX_FMT_YUVJ444P : PIX_FMT_YUV444P; 02645 }else{ 02646 s->avctx->pix_fmt = s->avctx->get_format(s->avctx, 02647 s->avctx->codec->pix_fmts ? 02648 s->avctx->codec->pix_fmts : 02649 s->avctx->color_range == AVCOL_RANGE_JPEG ? 02650 hwaccel_pixfmt_list_h264_jpeg_420 : 02651 ff_hwaccel_pixfmt_list_420); 02652 } 02653 } 02654 02655 s->avctx->hwaccel = ff_find_hwaccel(s->avctx->codec->id, s->avctx->pix_fmt); 02656 02657 if (MPV_common_init(s) < 0) { 02658 av_log(h->s.avctx, AV_LOG_ERROR, "MPV_common_init() failed.\n"); 02659 return -1; 02660 } 02661 s->first_field = 0; 02662 h->prev_interlaced_frame = 1; 02663 02664 init_scan_tables(h); 02665 if (ff_h264_alloc_tables(h) < 0) { 02666 av_log(h->s.avctx, AV_LOG_ERROR, "Could not allocate memory for h264\n"); 02667 return AVERROR(ENOMEM); 02668 } 02669 02670 if (!HAVE_THREADS || !(s->avctx->active_thread_type&FF_THREAD_SLICE)) { 02671 if (context_init(h) < 0) { 02672 av_log(h->s.avctx, AV_LOG_ERROR, "context_init() failed.\n"); 02673 return -1; 02674 } 02675 } else { 02676 for(i = 1; i < s->avctx->thread_count; i++) { 02677 H264Context *c; 02678 c = h->thread_context[i] = av_malloc(sizeof(H264Context)); 02679 memcpy(c, h->s.thread_context[i], sizeof(MpegEncContext)); 02680 memset(&c->s + 1, 0, sizeof(H264Context) - sizeof(MpegEncContext)); 02681 c->h264dsp = h->h264dsp; 02682 c->sps = h->sps; 02683 c->pps = h->pps; 02684 c->pixel_shift = h->pixel_shift; 02685 init_scan_tables(c); 02686 clone_tables(c, h, i); 02687 } 02688 02689 for(i = 0; i < s->avctx->thread_count; i++) 02690 if (context_init(h->thread_context[i]) < 0) { 02691 av_log(h->s.avctx, AV_LOG_ERROR, "context_init() failed.\n"); 02692 return -1; 02693 } 02694 } 02695 } 02696 02697 h->frame_num= get_bits(&s->gb, h->sps.log2_max_frame_num); 02698 02699 h->mb_mbaff = 0; 02700 h->mb_aff_frame = 0; 02701 last_pic_structure = s0->picture_structure; 02702 if(h->sps.frame_mbs_only_flag){ 02703 s->picture_structure= PICT_FRAME; 02704 }else{ 02705 if(get_bits1(&s->gb)) { //field_pic_flag 02706 s->picture_structure= PICT_TOP_FIELD + get_bits1(&s->gb); //bottom_field_flag 02707 } else { 02708 s->picture_structure= PICT_FRAME; 02709 h->mb_aff_frame = h->sps.mb_aff; 02710 } 02711 } 02712 h->mb_field_decoding_flag= s->picture_structure != PICT_FRAME; 02713 02714 if(h0->current_slice == 0){ 02715 // Shorten frame num gaps so we don't have to allocate reference frames just to throw them away 02716 if(h->frame_num != h->prev_frame_num) { 02717 int unwrap_prev_frame_num = h->prev_frame_num, max_frame_num = 1<<h->sps.log2_max_frame_num; 02718 02719 if (unwrap_prev_frame_num > h->frame_num) unwrap_prev_frame_num -= max_frame_num; 02720 02721 if ((h->frame_num - unwrap_prev_frame_num) > h->sps.ref_frame_count) { 02722 unwrap_prev_frame_num = (h->frame_num - h->sps.ref_frame_count) - 1; 02723 if (unwrap_prev_frame_num < 0) 02724 unwrap_prev_frame_num += max_frame_num; 02725 02726 h->prev_frame_num = unwrap_prev_frame_num; 02727 } 02728 } 02729 02730 while(h->frame_num != h->prev_frame_num && 02731 h->frame_num != (h->prev_frame_num+1)%(1<<h->sps.log2_max_frame_num)){ 02732 Picture *prev = h->short_ref_count ? h->short_ref[0] : NULL; 02733 av_log(h->s.avctx, AV_LOG_DEBUG, "Frame num gap %d %d\n", h->frame_num, h->prev_frame_num); 02734 if (ff_h264_frame_start(h) < 0) 02735 return -1; 02736 h->prev_frame_num++; 02737 h->prev_frame_num %= 1<<h->sps.log2_max_frame_num; 02738 s->current_picture_ptr->frame_num= h->prev_frame_num; 02739 ff_thread_report_progress((AVFrame*)s->current_picture_ptr, INT_MAX, 0); 02740 ff_thread_report_progress((AVFrame*)s->current_picture_ptr, INT_MAX, 1); 02741 ff_generate_sliding_window_mmcos(h); 02742 ff_h264_execute_ref_pic_marking(h, h->mmco, h->mmco_index); 02743 /* Error concealment: if a ref is missing, copy the previous ref in its place. 02744 * FIXME: avoiding a memcpy would be nice, but ref handling makes many assumptions 02745 * about there being no actual duplicates. 02746 * FIXME: this doesn't copy padding for out-of-frame motion vectors. Given we're 02747 * concealing a lost frame, this probably isn't noticable by comparison, but it should 02748 * be fixed. */ 02749 if (h->short_ref_count) { 02750 if (prev) { 02751 av_image_copy(h->short_ref[0]->data, h->short_ref[0]->linesize, 02752 (const uint8_t**)prev->data, prev->linesize, 02753 s->avctx->pix_fmt, s->mb_width*16, s->mb_height*16); 02754 h->short_ref[0]->poc = prev->poc+2; 02755 } 02756 h->short_ref[0]->frame_num = h->prev_frame_num; 02757 } 02758 } 02759 02760 /* See if we have a decoded first field looking for a pair... */ 02761 if (s0->first_field) { 02762 assert(s0->current_picture_ptr); 02763 assert(s0->current_picture_ptr->data[0]); 02764 assert(s0->current_picture_ptr->reference != DELAYED_PIC_REF); 02765 02766 /* figure out if we have a complementary field pair */ 02767 if (!FIELD_PICTURE || s->picture_structure == last_pic_structure) { 02768 /* 02769 * Previous field is unmatched. Don't display it, but let it 02770 * remain for reference if marked as such. 02771 */ 02772 s0->current_picture_ptr = NULL; 02773 s0->first_field = FIELD_PICTURE; 02774 02775 } else { 02776 if (h->nal_ref_idc && 02777 s0->current_picture_ptr->reference && 02778 s0->current_picture_ptr->frame_num != h->frame_num) { 02779 /* 02780 * This and previous field were reference, but had 02781 * different frame_nums. Consider this field first in 02782 * pair. Throw away previous field except for reference 02783 * purposes. 02784 */ 02785 s0->first_field = 1; 02786 s0->current_picture_ptr = NULL; 02787 02788 } else { 02789 /* Second field in complementary pair */ 02790 s0->first_field = 0; 02791 } 02792 } 02793 02794 } else { 02795 /* Frame or first field in a potentially complementary pair */ 02796 assert(!s0->current_picture_ptr); 02797 s0->first_field = FIELD_PICTURE; 02798 } 02799 02800 if(!FIELD_PICTURE || s0->first_field) { 02801 if (ff_h264_frame_start(h) < 0) { 02802 s0->first_field = 0; 02803 return -1; 02804 } 02805 } else { 02806 ff_release_unused_pictures(s, 0); 02807 } 02808 } 02809 if(h != h0) 02810 clone_slice(h, h0); 02811 02812 s->current_picture_ptr->frame_num= h->frame_num; //FIXME frame_num cleanup 02813 02814 assert(s->mb_num == s->mb_width * s->mb_height); 02815 if(first_mb_in_slice << FIELD_OR_MBAFF_PICTURE >= s->mb_num || 02816 first_mb_in_slice >= s->mb_num){ 02817 av_log(h->s.avctx, AV_LOG_ERROR, "first_mb_in_slice overflow\n"); 02818 return -1; 02819 } 02820 s->resync_mb_x = s->mb_x = first_mb_in_slice % s->mb_width; 02821 s->resync_mb_y = s->mb_y = (first_mb_in_slice / s->mb_width) << FIELD_OR_MBAFF_PICTURE; 02822 if (s->picture_structure == PICT_BOTTOM_FIELD) 02823 s->resync_mb_y = s->mb_y = s->mb_y + 1; 02824 assert(s->mb_y < s->mb_height); 02825 02826 if(s->picture_structure==PICT_FRAME){ 02827 h->curr_pic_num= h->frame_num; 02828 h->max_pic_num= 1<< h->sps.log2_max_frame_num; 02829 }else{ 02830 h->curr_pic_num= 2*h->frame_num + 1; 02831 h->max_pic_num= 1<<(h->sps.log2_max_frame_num + 1); 02832 } 02833 02834 if(h->nal_unit_type == NAL_IDR_SLICE){ 02835 get_ue_golomb(&s->gb); /* idr_pic_id */ 02836 } 02837 02838 if(h->sps.poc_type==0){ 02839 h->poc_lsb= get_bits(&s->gb, h->sps.log2_max_poc_lsb); 02840 02841 if(h->pps.pic_order_present==1 && s->picture_structure==PICT_FRAME){ 02842 h->delta_poc_bottom= get_se_golomb(&s->gb); 02843 } 02844 } 02845 02846 if(h->sps.poc_type==1 && !h->sps.delta_pic_order_always_zero_flag){ 02847 h->delta_poc[0]= get_se_golomb(&s->gb); 02848 02849 if(h->pps.pic_order_present==1 && s->picture_structure==PICT_FRAME) 02850 h->delta_poc[1]= get_se_golomb(&s->gb); 02851 } 02852 02853 init_poc(h); 02854 02855 if(h->pps.redundant_pic_cnt_present){ 02856 h->redundant_pic_count= get_ue_golomb(&s->gb); 02857 } 02858 02859 //set defaults, might be overridden a few lines later 02860 h->ref_count[0]= h->pps.ref_count[0]; 02861 h->ref_count[1]= h->pps.ref_count[1]; 02862 02863 if(h->slice_type_nos != AV_PICTURE_TYPE_I){ 02864 if(h->slice_type_nos == AV_PICTURE_TYPE_B){ 02865 h->direct_spatial_mv_pred= get_bits1(&s->gb); 02866 } 02867 num_ref_idx_active_override_flag= get_bits1(&s->gb); 02868 02869 if(num_ref_idx_active_override_flag){ 02870 h->ref_count[0]= get_ue_golomb(&s->gb) + 1; 02871 if(h->slice_type_nos==AV_PICTURE_TYPE_B) 02872 h->ref_count[1]= get_ue_golomb(&s->gb) + 1; 02873 02874 if(h->ref_count[0]-1 > 32-1 || h->ref_count[1]-1 > 32-1){ 02875 av_log(h->s.avctx, AV_LOG_ERROR, "reference overflow\n"); 02876 h->ref_count[0]= h->ref_count[1]= 1; 02877 return -1; 02878 } 02879 } 02880 if(h->slice_type_nos == AV_PICTURE_TYPE_B) 02881 h->list_count= 2; 02882 else 02883 h->list_count= 1; 02884 }else 02885 h->list_count= 0; 02886 02887 if(!default_ref_list_done){ 02888 ff_h264_fill_default_ref_list(h); 02889 } 02890 02891 if(h->slice_type_nos!=AV_PICTURE_TYPE_I && ff_h264_decode_ref_pic_list_reordering(h) < 0) 02892 return -1; 02893 02894 if(h->slice_type_nos!=AV_PICTURE_TYPE_I){ 02895 s->last_picture_ptr= &h->ref_list[0][0]; 02896 ff_copy_picture(&s->last_picture, s->last_picture_ptr); 02897 } 02898 if(h->slice_type_nos==AV_PICTURE_TYPE_B){ 02899 s->next_picture_ptr= &h->ref_list[1][0]; 02900 ff_copy_picture(&s->next_picture, s->next_picture_ptr); 02901 } 02902 02903 if( (h->pps.weighted_pred && h->slice_type_nos == AV_PICTURE_TYPE_P ) 02904 || (h->pps.weighted_bipred_idc==1 && h->slice_type_nos== AV_PICTURE_TYPE_B ) ) 02905 pred_weight_table(h); 02906 else if(h->pps.weighted_bipred_idc==2 && h->slice_type_nos== AV_PICTURE_TYPE_B){ 02907 implicit_weight_table(h, -1); 02908 }else { 02909 h->use_weight = 0; 02910 for (i = 0; i < 2; i++) { 02911 h->luma_weight_flag[i] = 0; 02912 h->chroma_weight_flag[i] = 0; 02913 } 02914 } 02915 02916 if(h->nal_ref_idc) 02917 ff_h264_decode_ref_pic_marking(h0, &s->gb); 02918 02919 if(FRAME_MBAFF){ 02920 ff_h264_fill_mbaff_ref_list(h); 02921 02922 if(h->pps.weighted_bipred_idc==2 && h->slice_type_nos== AV_PICTURE_TYPE_B){ 02923 implicit_weight_table(h, 0); 02924 implicit_weight_table(h, 1); 02925 } 02926 } 02927 02928 if(h->slice_type_nos==AV_PICTURE_TYPE_B && !h->direct_spatial_mv_pred) 02929 ff_h264_direct_dist_scale_factor(h); 02930 ff_h264_direct_ref_list_init(h); 02931 02932 if( h->slice_type_nos != AV_PICTURE_TYPE_I && h->pps.cabac ){ 02933 tmp = get_ue_golomb_31(&s->gb); 02934 if(tmp > 2){ 02935 av_log(s->avctx, AV_LOG_ERROR, "cabac_init_idc overflow\n"); 02936 return -1; 02937 } 02938 h->cabac_init_idc= tmp; 02939 } 02940 02941 h->last_qscale_diff = 0; 02942 tmp = h->pps.init_qp + get_se_golomb(&s->gb); 02943 if(tmp>51+6*(h->sps.bit_depth_luma-8)){ 02944 av_log(s->avctx, AV_LOG_ERROR, "QP %u out of range\n", tmp); 02945 return -1; 02946 } 02947 s->qscale= tmp; 02948 h->chroma_qp[0] = get_chroma_qp(h, 0, s->qscale); 02949 h->chroma_qp[1] = get_chroma_qp(h, 1, s->qscale); 02950 //FIXME qscale / qp ... stuff 02951 if(h->slice_type == AV_PICTURE_TYPE_SP){ 02952 get_bits1(&s->gb); /* sp_for_switch_flag */ 02953 } 02954 if(h->slice_type==AV_PICTURE_TYPE_SP || h->slice_type == AV_PICTURE_TYPE_SI){ 02955 get_se_golomb(&s->gb); /* slice_qs_delta */ 02956 } 02957 02958 h->deblocking_filter = 1; 02959 h->slice_alpha_c0_offset = 52; 02960 h->slice_beta_offset = 52; 02961 if( h->pps.deblocking_filter_parameters_present ) { 02962 tmp= get_ue_golomb_31(&s->gb); 02963 if(tmp > 2){ 02964 av_log(s->avctx, AV_LOG_ERROR, "deblocking_filter_idc %u out of range\n", tmp); 02965 return -1; 02966 } 02967 h->deblocking_filter= tmp; 02968 if(h->deblocking_filter < 2) 02969 h->deblocking_filter^= 1; // 1<->0 02970 02971 if( h->deblocking_filter ) { 02972 h->slice_alpha_c0_offset += get_se_golomb(&s->gb) << 1; 02973 h->slice_beta_offset += get_se_golomb(&s->gb) << 1; 02974 if( h->slice_alpha_c0_offset > 104U 02975 || h->slice_beta_offset > 104U){ 02976 av_log(s->avctx, AV_LOG_ERROR, "deblocking filter parameters %d %d out of range\n", h->slice_alpha_c0_offset, h->slice_beta_offset); 02977 return -1; 02978 } 02979 } 02980 } 02981 02982 if( s->avctx->skip_loop_filter >= AVDISCARD_ALL 02983 ||(s->avctx->skip_loop_filter >= AVDISCARD_NONKEY && h->slice_type_nos != AV_PICTURE_TYPE_I) 02984 ||(s->avctx->skip_loop_filter >= AVDISCARD_BIDIR && h->slice_type_nos == AV_PICTURE_TYPE_B) 02985 ||(s->avctx->skip_loop_filter >= AVDISCARD_NONREF && h->nal_ref_idc == 0)) 02986 h->deblocking_filter= 0; 02987 02988 if(h->deblocking_filter == 1 && h0->max_contexts > 1) { 02989 if(s->avctx->flags2 & CODEC_FLAG2_FAST) { 02990 /* Cheat slightly for speed: 02991 Do not bother to deblock across slices. */ 02992 h->deblocking_filter = 2; 02993 } else { 02994 h0->max_contexts = 1; 02995 if(!h0->single_decode_warning) { 02996 av_log(s->avctx, AV_LOG_INFO, "Cannot parallelize deblocking type 1, decoding such frames in sequential order\n"); 02997 h0->single_decode_warning = 1; 02998 } 02999 if (h != h0) { 03000 av_log(h->s.avctx, AV_LOG_ERROR, "Deblocking switched inside frame.\n"); 03001 return 1; 03002 } 03003 } 03004 } 03005 h->qp_thresh= 15 + 52 - FFMIN(h->slice_alpha_c0_offset, h->slice_beta_offset) - FFMAX3(0, h->pps.chroma_qp_index_offset[0], h->pps.chroma_qp_index_offset[1]); 03006 03007 #if 0 //FMO 03008 if( h->pps.num_slice_groups > 1 && h->pps.mb_slice_group_map_type >= 3 && h->pps.mb_slice_group_map_type <= 5) 03009 slice_group_change_cycle= get_bits(&s->gb, ?); 03010 #endif 03011 03012 h0->last_slice_type = slice_type; 03013 h->slice_num = ++h0->current_slice; 03014 if(h->slice_num >= MAX_SLICES){ 03015 av_log(s->avctx, AV_LOG_ERROR, "Too many slices, increase MAX_SLICES and recompile\n"); 03016 } 03017 03018 for(j=0; j<2; j++){ 03019 int id_list[16]; 03020 int *ref2frm= h->ref2frm[h->slice_num&(MAX_SLICES-1)][j]; 03021 for(i=0; i<16; i++){ 03022 id_list[i]= 60; 03023 if(h->ref_list[j][i].data[0]){ 03024 int k; 03025 uint8_t *base= h->ref_list[j][i].base[0]; 03026 for(k=0; k<h->short_ref_count; k++) 03027 if(h->short_ref[k]->base[0] == base){ 03028 id_list[i]= k; 03029 break; 03030 } 03031 for(k=0; k<h->long_ref_count; k++) 03032 if(h->long_ref[k] && h->long_ref[k]->base[0] == base){ 03033 id_list[i]= h->short_ref_count + k; 03034 break; 03035 } 03036 } 03037 } 03038 03039 ref2frm[0]= 03040 ref2frm[1]= -1; 03041 for(i=0; i<16; i++) 03042 ref2frm[i+2]= 4*id_list[i] 03043 +(h->ref_list[j][i].reference&3); 03044 ref2frm[18+0]= 03045 ref2frm[18+1]= -1; 03046 for(i=16; i<48; i++) 03047 ref2frm[i+4]= 4*id_list[(i-16)>>1] 03048 +(h->ref_list[j][i].reference&3); 03049 } 03050 03051 //FIXME: fix draw_edges+PAFF+frame threads 03052 h->emu_edge_width= (s->flags&CODEC_FLAG_EMU_EDGE || (!h->sps.frame_mbs_only_flag && s->avctx->active_thread_type)) ? 0 : 16; 03053 h->emu_edge_height= (FRAME_MBAFF || FIELD_PICTURE) ? 0 : h->emu_edge_width; 03054 03055 if(s->avctx->debug&FF_DEBUG_PICT_INFO){ 03056 av_log(h->s.avctx, AV_LOG_DEBUG, "slice:%d %s mb:%d %c%s%s pps:%u frame:%d poc:%d/%d ref:%d/%d qp:%d loop:%d:%d:%d weight:%d%s %s\n", 03057 h->slice_num, 03058 (s->picture_structure==PICT_FRAME ? "F" : s->picture_structure==PICT_TOP_FIELD ? "T" : "B"), 03059 first_mb_in_slice, 03060 av_get_picture_type_char(h->slice_type), h->slice_type_fixed ? " fix" : "", h->nal_unit_type == NAL_IDR_SLICE ? " IDR" : "", 03061 pps_id, h->frame_num, 03062 s->current_picture_ptr->field_poc[0], s->current_picture_ptr->field_poc[1], 03063 h->ref_count[0], h->ref_count[1], 03064 s->qscale, 03065 h->deblocking_filter, h->slice_alpha_c0_offset/2-26, h->slice_beta_offset/2-26, 03066 h->use_weight, 03067 h->use_weight==1 && h->use_weight_chroma ? "c" : "", 03068 h->slice_type == AV_PICTURE_TYPE_B ? (h->direct_spatial_mv_pred ? "SPAT" : "TEMP") : "" 03069 ); 03070 } 03071 03072 return 0; 03073 } 03074 03075 int ff_h264_get_slice_type(const H264Context *h) 03076 { 03077 switch (h->slice_type) { 03078 case AV_PICTURE_TYPE_P: return 0; 03079 case AV_PICTURE_TYPE_B: return 1; 03080 case AV_PICTURE_TYPE_I: return 2; 03081 case AV_PICTURE_TYPE_SP: return 3; 03082 case AV_PICTURE_TYPE_SI: return 4; 03083 default: return -1; 03084 } 03085 } 03086 03091 static int fill_filter_caches(H264Context *h, int mb_type){ 03092 MpegEncContext * const s = &h->s; 03093 const int mb_xy= h->mb_xy; 03094 int top_xy, left_xy[2]; 03095 int top_type, left_type[2]; 03096 03097 top_xy = mb_xy - (s->mb_stride << MB_FIELD); 03098 03099 //FIXME deblocking could skip the intra and nnz parts. 03100 03101 /* Wow, what a mess, why didn't they simplify the interlacing & intra 03102 * stuff, I can't imagine that these complex rules are worth it. */ 03103 03104 left_xy[1] = left_xy[0] = mb_xy-1; 03105 if(FRAME_MBAFF){ 03106 const int left_mb_field_flag = IS_INTERLACED(s->current_picture.mb_type[mb_xy-1]); 03107 const int curr_mb_field_flag = IS_INTERLACED(mb_type); 03108 if(s->mb_y&1){ 03109 if (left_mb_field_flag != curr_mb_field_flag) { 03110 left_xy[0] -= s->mb_stride; 03111 } 03112 }else{ 03113 if(curr_mb_field_flag){ 03114 top_xy += s->mb_stride & (((s->current_picture.mb_type[top_xy ]>>7)&1)-1); 03115 } 03116 if (left_mb_field_flag != curr_mb_field_flag) { 03117 left_xy[1] += s->mb_stride; 03118 } 03119 } 03120 } 03121 03122 h->top_mb_xy = top_xy; 03123 h->left_mb_xy[0] = left_xy[0]; 03124 h->left_mb_xy[1] = left_xy[1]; 03125 { 03126 //for sufficiently low qp, filtering wouldn't do anything 03127 //this is a conservative estimate: could also check beta_offset and more accurate chroma_qp 03128 int qp_thresh = h->qp_thresh; //FIXME strictly we should store qp_thresh for each mb of a slice 03129 int qp = s->current_picture.qscale_table[mb_xy]; 03130 if(qp <= qp_thresh 03131 && (left_xy[0]<0 || ((qp + s->current_picture.qscale_table[left_xy[0]] + 1)>>1) <= qp_thresh) 03132 && (top_xy < 0 || ((qp + s->current_picture.qscale_table[top_xy ] + 1)>>1) <= qp_thresh)){ 03133 if(!FRAME_MBAFF) 03134 return 1; 03135 if( (left_xy[0]< 0 || ((qp + s->current_picture.qscale_table[left_xy[1] ] + 1)>>1) <= qp_thresh) 03136 && (top_xy < s->mb_stride || ((qp + s->current_picture.qscale_table[top_xy -s->mb_stride] + 1)>>1) <= qp_thresh)) 03137 return 1; 03138 } 03139 } 03140 03141 top_type = s->current_picture.mb_type[top_xy] ; 03142 left_type[0] = s->current_picture.mb_type[left_xy[0]]; 03143 left_type[1] = s->current_picture.mb_type[left_xy[1]]; 03144 if(h->deblocking_filter == 2){ 03145 if(h->slice_table[top_xy ] != h->slice_num) top_type= 0; 03146 if(h->slice_table[left_xy[0] ] != h->slice_num) left_type[0]= left_type[1]= 0; 03147 }else{ 03148 if(h->slice_table[top_xy ] == 0xFFFF) top_type= 0; 03149 if(h->slice_table[left_xy[0] ] == 0xFFFF) left_type[0]= left_type[1] =0; 03150 } 03151 h->top_type = top_type ; 03152 h->left_type[0]= left_type[0]; 03153 h->left_type[1]= left_type[1]; 03154 03155 if(IS_INTRA(mb_type)) 03156 return 0; 03157 03158 AV_COPY32(&h->non_zero_count_cache[4+8* 1], &h->non_zero_count[mb_xy][ 0]); 03159 AV_COPY32(&h->non_zero_count_cache[4+8* 2], &h->non_zero_count[mb_xy][ 4]); 03160 AV_COPY32(&h->non_zero_count_cache[4+8* 3], &h->non_zero_count[mb_xy][ 8]); 03161 AV_COPY32(&h->non_zero_count_cache[4+8* 4], &h->non_zero_count[mb_xy][12]); 03162 03163 h->cbp= h->cbp_table[mb_xy]; 03164 03165 { 03166 int list; 03167 for(list=0; list<h->list_count; list++){ 03168 int8_t *ref; 03169 int y, b_stride; 03170 int16_t (*mv_dst)[2]; 03171 int16_t (*mv_src)[2]; 03172 03173 if(!USES_LIST(mb_type, list)){ 03174 fill_rectangle( h->mv_cache[list][scan8[0]], 4, 4, 8, pack16to32(0,0), 4); 03175 AV_WN32A(&h->ref_cache[list][scan8[ 0]], ((LIST_NOT_USED)&0xFF)*0x01010101u); 03176 AV_WN32A(&h->ref_cache[list][scan8[ 2]], ((LIST_NOT_USED)&0xFF)*0x01010101u); 03177 AV_WN32A(&h->ref_cache[list][scan8[ 8]], ((LIST_NOT_USED)&0xFF)*0x01010101u); 03178 AV_WN32A(&h->ref_cache[list][scan8[10]], ((LIST_NOT_USED)&0xFF)*0x01010101u); 03179 continue; 03180 } 03181 03182 ref = &s->current_picture.ref_index[list][4*mb_xy]; 03183 { 03184 int (*ref2frm)[64] = h->ref2frm[ h->slice_num&(MAX_SLICES-1) ][0] + (MB_MBAFF ? 20 : 2); 03185 AV_WN32A(&h->ref_cache[list][scan8[ 0]], (pack16to32(ref2frm[list][ref[0]],ref2frm[list][ref[1]])&0x00FF00FF)*0x0101); 03186 AV_WN32A(&h->ref_cache[list][scan8[ 2]], (pack16to32(ref2frm[list][ref[0]],ref2frm[list][ref[1]])&0x00FF00FF)*0x0101); 03187 ref += 2; 03188 AV_WN32A(&h->ref_cache[list][scan8[ 8]], (pack16to32(ref2frm[list][ref[0]],ref2frm[list][ref[1]])&0x00FF00FF)*0x0101); 03189 AV_WN32A(&h->ref_cache[list][scan8[10]], (pack16to32(ref2frm[list][ref[0]],ref2frm[list][ref[1]])&0x00FF00FF)*0x0101); 03190 } 03191 03192 b_stride = h->b_stride; 03193 mv_dst = &h->mv_cache[list][scan8[0]]; 03194 mv_src = &s->current_picture.motion_val[list][4*s->mb_x + 4*s->mb_y*b_stride]; 03195 for(y=0; y<4; y++){ 03196 AV_COPY128(mv_dst + 8*y, mv_src + y*b_stride); 03197 } 03198 03199 } 03200 } 03201 03202 03203 /* 03204 0 . T T. T T T T 03205 1 L . .L . . . . 03206 2 L . .L . . . . 03207 3 . T TL . . . . 03208 4 L . .L . . . . 03209 5 L . .. . . . . 03210 */ 03211 //FIXME constraint_intra_pred & partitioning & nnz (let us hope this is just a typo in the spec) 03212 if(top_type){ 03213 AV_COPY32(&h->non_zero_count_cache[4+8*0], &h->non_zero_count[top_xy][3*4]); 03214 } 03215 03216 if(left_type[0]){ 03217 h->non_zero_count_cache[3+8*1]= h->non_zero_count[left_xy[0]][3+0*4]; 03218 h->non_zero_count_cache[3+8*2]= h->non_zero_count[left_xy[0]][3+1*4]; 03219 h->non_zero_count_cache[3+8*3]= h->non_zero_count[left_xy[0]][3+2*4]; 03220 h->non_zero_count_cache[3+8*4]= h->non_zero_count[left_xy[0]][3+3*4]; 03221 } 03222 03223 // CAVLC 8x8dct requires NNZ values for residual decoding that differ from what the loop filter needs 03224 if(!CABAC && h->pps.transform_8x8_mode){ 03225 if(IS_8x8DCT(top_type)){ 03226 h->non_zero_count_cache[4+8*0]= 03227 h->non_zero_count_cache[5+8*0]= (h->cbp_table[top_xy] & 0x4000) >> 12; 03228 h->non_zero_count_cache[6+8*0]= 03229 h->non_zero_count_cache[7+8*0]= (h->cbp_table[top_xy] & 0x8000) >> 12; 03230 } 03231 if(IS_8x8DCT(left_type[0])){ 03232 h->non_zero_count_cache[3+8*1]= 03233 h->non_zero_count_cache[3+8*2]= (h->cbp_table[left_xy[0]]&0x2000) >> 12; //FIXME check MBAFF 03234 } 03235 if(IS_8x8DCT(left_type[1])){ 03236 h->non_zero_count_cache[3+8*3]= 03237 h->non_zero_count_cache[3+8*4]= (h->cbp_table[left_xy[1]]&0x8000) >> 12; //FIXME check MBAFF 03238 } 03239 03240 if(IS_8x8DCT(mb_type)){ 03241 h->non_zero_count_cache[scan8[0 ]]= h->non_zero_count_cache[scan8[1 ]]= 03242 h->non_zero_count_cache[scan8[2 ]]= h->non_zero_count_cache[scan8[3 ]]= (h->cbp & 0x1000) >> 12; 03243 03244 h->non_zero_count_cache[scan8[0+ 4]]= h->non_zero_count_cache[scan8[1+ 4]]= 03245 h->non_zero_count_cache[scan8[2+ 4]]= h->non_zero_count_cache[scan8[3+ 4]]= (h->cbp & 0x2000) >> 12; 03246 03247 h->non_zero_count_cache[scan8[0+ 8]]= h->non_zero_count_cache[scan8[1+ 8]]= 03248 h->non_zero_count_cache[scan8[2+ 8]]= h->non_zero_count_cache[scan8[3+ 8]]= (h->cbp & 0x4000) >> 12; 03249 03250 h->non_zero_count_cache[scan8[0+12]]= h->non_zero_count_cache[scan8[1+12]]= 03251 h->non_zero_count_cache[scan8[2+12]]= h->non_zero_count_cache[scan8[3+12]]= (h->cbp & 0x8000) >> 12; 03252 } 03253 } 03254 03255 if(IS_INTER(mb_type) || IS_DIRECT(mb_type)){ 03256 int list; 03257 for(list=0; list<h->list_count; list++){ 03258 if(USES_LIST(top_type, list)){ 03259 const int b_xy= h->mb2b_xy[top_xy] + 3*h->b_stride; 03260 const int b8_xy= 4*top_xy + 2; 03261 int (*ref2frm)[64] = h->ref2frm[ h->slice_table[top_xy]&(MAX_SLICES-1) ][0] + (MB_MBAFF ? 20 : 2); 03262 AV_COPY128(h->mv_cache[list][scan8[0] + 0 - 1*8], s->current_picture.motion_val[list][b_xy + 0]); 03263 h->ref_cache[list][scan8[0] + 0 - 1*8]= 03264 h->ref_cache[list][scan8[0] + 1 - 1*8]= ref2frm[list][s->current_picture.ref_index[list][b8_xy + 0]]; 03265 h->ref_cache[list][scan8[0] + 2 - 1*8]= 03266 h->ref_cache[list][scan8[0] + 3 - 1*8]= ref2frm[list][s->current_picture.ref_index[list][b8_xy + 1]]; 03267 }else{ 03268 AV_ZERO128(h->mv_cache[list][scan8[0] + 0 - 1*8]); 03269 AV_WN32A(&h->ref_cache[list][scan8[0] + 0 - 1*8], ((LIST_NOT_USED)&0xFF)*0x01010101u); 03270 } 03271 03272 if(!IS_INTERLACED(mb_type^left_type[0])){ 03273 if(USES_LIST(left_type[0], list)){ 03274 const int b_xy= h->mb2b_xy[left_xy[0]] + 3; 03275 const int b8_xy= 4*left_xy[0] + 1; 03276 int (*ref2frm)[64] = h->ref2frm[ h->slice_table[left_xy[0]]&(MAX_SLICES-1) ][0] + (MB_MBAFF ? 20 : 2); 03277 AV_COPY32(h->mv_cache[list][scan8[0] - 1 + 0 ], s->current_picture.motion_val[list][b_xy + h->b_stride*0]); 03278 AV_COPY32(h->mv_cache[list][scan8[0] - 1 + 8 ], s->current_picture.motion_val[list][b_xy + h->b_stride*1]); 03279 AV_COPY32(h->mv_cache[list][scan8[0] - 1 +16 ], s->current_picture.motion_val[list][b_xy + h->b_stride*2]); 03280 AV_COPY32(h->mv_cache[list][scan8[0] - 1 +24 ], s->current_picture.motion_val[list][b_xy + h->b_stride*3]); 03281 h->ref_cache[list][scan8[0] - 1 + 0 ]= 03282 h->ref_cache[list][scan8[0] - 1 + 8 ]= ref2frm[list][s->current_picture.ref_index[list][b8_xy + 2*0]]; 03283 h->ref_cache[list][scan8[0] - 1 +16 ]= 03284 h->ref_cache[list][scan8[0] - 1 +24 ]= ref2frm[list][s->current_picture.ref_index[list][b8_xy + 2*1]]; 03285 }else{ 03286 AV_ZERO32(h->mv_cache [list][scan8[0] - 1 + 0 ]); 03287 AV_ZERO32(h->mv_cache [list][scan8[0] - 1 + 8 ]); 03288 AV_ZERO32(h->mv_cache [list][scan8[0] - 1 +16 ]); 03289 AV_ZERO32(h->mv_cache [list][scan8[0] - 1 +24 ]); 03290 h->ref_cache[list][scan8[0] - 1 + 0 ]= 03291 h->ref_cache[list][scan8[0] - 1 + 8 ]= 03292 h->ref_cache[list][scan8[0] - 1 + 16 ]= 03293 h->ref_cache[list][scan8[0] - 1 + 24 ]= LIST_NOT_USED; 03294 } 03295 } 03296 } 03297 } 03298 03299 return 0; 03300 } 03301 03302 static void loop_filter(H264Context *h, int start_x, int end_x){ 03303 MpegEncContext * const s = &h->s; 03304 uint8_t *dest_y, *dest_cb, *dest_cr; 03305 int linesize, uvlinesize, mb_x, mb_y; 03306 const int end_mb_y= s->mb_y + FRAME_MBAFF; 03307 const int old_slice_type= h->slice_type; 03308 const int pixel_shift = h->pixel_shift; 03309 03310 if(h->deblocking_filter) { 03311 for(mb_x= start_x; mb_x<end_x; mb_x++){ 03312 for(mb_y=end_mb_y - FRAME_MBAFF; mb_y<= end_mb_y; mb_y++){ 03313 int mb_xy, mb_type; 03314 mb_xy = h->mb_xy = mb_x + mb_y*s->mb_stride; 03315 h->slice_num= h->slice_table[mb_xy]; 03316 mb_type= s->current_picture.mb_type[mb_xy]; 03317 h->list_count= h->list_counts[mb_xy]; 03318 03319 if(FRAME_MBAFF) 03320 h->mb_mbaff = h->mb_field_decoding_flag = !!IS_INTERLACED(mb_type); 03321 03322 s->mb_x= mb_x; 03323 s->mb_y= mb_y; 03324 dest_y = s->current_picture.data[0] + ((mb_x << pixel_shift) + mb_y * s->linesize ) * 16; 03325 dest_cb = s->current_picture.data[1] + ((mb_x << pixel_shift) + mb_y * s->uvlinesize) * (8 << CHROMA444); 03326 dest_cr = s->current_picture.data[2] + ((mb_x << pixel_shift) + mb_y * s->uvlinesize) * (8 << CHROMA444); 03327 //FIXME simplify above 03328 03329 if (MB_FIELD) { 03330 linesize = h->mb_linesize = s->linesize * 2; 03331 uvlinesize = h->mb_uvlinesize = s->uvlinesize * 2; 03332 if(mb_y&1){ //FIXME move out of this function? 03333 dest_y -= s->linesize*15; 03334 dest_cb-= s->uvlinesize*((8 << CHROMA444)-1); 03335 dest_cr-= s->uvlinesize*((8 << CHROMA444)-1); 03336 } 03337 } else { 03338 linesize = h->mb_linesize = s->linesize; 03339 uvlinesize = h->mb_uvlinesize = s->uvlinesize; 03340 } 03341 backup_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, CHROMA444, 0); 03342 if(fill_filter_caches(h, mb_type)) 03343 continue; 03344 h->chroma_qp[0] = get_chroma_qp(h, 0, s->current_picture.qscale_table[mb_xy]); 03345 h->chroma_qp[1] = get_chroma_qp(h, 1, s->current_picture.qscale_table[mb_xy]); 03346 03347 if (FRAME_MBAFF) { 03348 ff_h264_filter_mb (h, mb_x, mb_y, dest_y, dest_cb, dest_cr, linesize, uvlinesize); 03349 } else { 03350 ff_h264_filter_mb_fast(h, mb_x, mb_y, dest_y, dest_cb, dest_cr, linesize, uvlinesize); 03351 } 03352 } 03353 } 03354 } 03355 h->slice_type= old_slice_type; 03356 s->mb_x= end_x; 03357 s->mb_y= end_mb_y - FRAME_MBAFF; 03358 h->chroma_qp[0] = get_chroma_qp(h, 0, s->qscale); 03359 h->chroma_qp[1] = get_chroma_qp(h, 1, s->qscale); 03360 } 03361 03362 static void predict_field_decoding_flag(H264Context *h){ 03363 MpegEncContext * const s = &h->s; 03364 const int mb_xy= s->mb_x + s->mb_y*s->mb_stride; 03365 int mb_type = (h->slice_table[mb_xy-1] == h->slice_num) 03366 ? s->current_picture.mb_type[mb_xy-1] 03367 : (h->slice_table[mb_xy-s->mb_stride] == h->slice_num) 03368 ? s->current_picture.mb_type[mb_xy-s->mb_stride] 03369 : 0; 03370 h->mb_mbaff = h->mb_field_decoding_flag = IS_INTERLACED(mb_type) ? 1 : 0; 03371 } 03372 03376 static void decode_finish_row(H264Context *h){ 03377 MpegEncContext * const s = &h->s; 03378 int top = 16*(s->mb_y >> FIELD_PICTURE); 03379 int height = 16 << FRAME_MBAFF; 03380 int deblock_border = (16 + 4) << FRAME_MBAFF; 03381 int pic_height = 16*s->mb_height >> FIELD_PICTURE; 03382 03383 if (h->deblocking_filter) { 03384 if((top + height) >= pic_height) 03385 height += deblock_border; 03386 03387 top -= deblock_border; 03388 } 03389 03390 if (top >= pic_height || (top + height) < h->emu_edge_height) 03391 return; 03392 03393 height = FFMIN(height, pic_height - top); 03394 if (top < h->emu_edge_height) { 03395 height = top+height; 03396 top = 0; 03397 } 03398 03399 ff_draw_horiz_band(s, top, height); 03400 03401 if (s->dropable) return; 03402 03403 ff_thread_report_progress((AVFrame*)s->current_picture_ptr, top + height - 1, 03404 s->picture_structure==PICT_BOTTOM_FIELD); 03405 } 03406 03407 static int decode_slice(struct AVCodecContext *avctx, void *arg){ 03408 H264Context *h = *(void**)arg; 03409 MpegEncContext * const s = &h->s; 03410 const int part_mask= s->partitioned_frame ? (AC_END|AC_ERROR) : 0x7F; 03411 int lf_x_start = s->mb_x; 03412 03413 s->mb_skip_run= -1; 03414 03415 h->is_complex = FRAME_MBAFF || s->picture_structure != PICT_FRAME || s->codec_id != CODEC_ID_H264 || 03416 (CONFIG_GRAY && (s->flags&CODEC_FLAG_GRAY)); 03417 03418 if( h->pps.cabac ) { 03419 /* realign */ 03420 align_get_bits( &s->gb ); 03421 03422 /* init cabac */ 03423 ff_init_cabac_states( &h->cabac); 03424 ff_init_cabac_decoder( &h->cabac, 03425 s->gb.buffer + get_bits_count(&s->gb)/8, 03426 (get_bits_left(&s->gb) + 7)/8); 03427 03428 ff_h264_init_cabac_states(h); 03429 03430 for(;;){ 03431 //START_TIMER 03432 int ret = ff_h264_decode_mb_cabac(h); 03433 int eos; 03434 //STOP_TIMER("decode_mb_cabac") 03435 03436 if(ret>=0) ff_h264_hl_decode_mb(h); 03437 03438 if( ret >= 0 && FRAME_MBAFF ) { //FIXME optimal? or let mb_decode decode 16x32 ? 03439 s->mb_y++; 03440 03441 ret = ff_h264_decode_mb_cabac(h); 03442 03443 if(ret>=0) ff_h264_hl_decode_mb(h); 03444 s->mb_y--; 03445 } 03446 eos = get_cabac_terminate( &h->cabac ); 03447 03448 if((s->workaround_bugs & FF_BUG_TRUNCATED) && h->cabac.bytestream > h->cabac.bytestream_end + 2){ 03449 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask); 03450 if (s->mb_x >= lf_x_start) loop_filter(h, lf_x_start, s->mb_x + 1); 03451 return 0; 03452 } 03453 if( ret < 0 || h->cabac.bytestream > h->cabac.bytestream_end + 2) { 03454 av_log(h->s.avctx, AV_LOG_ERROR, "error while decoding MB %d %d, bytestream (%td)\n", s->mb_x, s->mb_y, h->cabac.bytestream_end - h->cabac.bytestream); 03455 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask); 03456 return -1; 03457 } 03458 03459 if( ++s->mb_x >= s->mb_width ) { 03460 loop_filter(h, lf_x_start, s->mb_x); 03461 s->mb_x = lf_x_start = 0; 03462 decode_finish_row(h); 03463 ++s->mb_y; 03464 if(FIELD_OR_MBAFF_PICTURE) { 03465 ++s->mb_y; 03466 if(FRAME_MBAFF && s->mb_y < s->mb_height) 03467 predict_field_decoding_flag(h); 03468 } 03469 } 03470 03471 if( eos || s->mb_y >= s->mb_height ) { 03472 tprintf(s->avctx, "slice end %d %d\n", get_bits_count(&s->gb), s->gb.size_in_bits); 03473 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask); 03474 if (s->mb_x > lf_x_start) loop_filter(h, lf_x_start, s->mb_x); 03475 return 0; 03476 } 03477 } 03478 03479 } else { 03480 for(;;){ 03481 int ret = ff_h264_decode_mb_cavlc(h); 03482 03483 if(ret>=0) ff_h264_hl_decode_mb(h); 03484 03485 if(ret>=0 && FRAME_MBAFF){ //FIXME optimal? or let mb_decode decode 16x32 ? 03486 s->mb_y++; 03487 ret = ff_h264_decode_mb_cavlc(h); 03488 03489 if(ret>=0) ff_h264_hl_decode_mb(h); 03490 s->mb_y--; 03491 } 03492 03493 if(ret<0){ 03494 av_log(h->s.avctx, AV_LOG_ERROR, "error while decoding MB %d %d\n", s->mb_x, s->mb_y); 03495 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask); 03496 return -1; 03497 } 03498 03499 if(++s->mb_x >= s->mb_width){ 03500 loop_filter(h, lf_x_start, s->mb_x); 03501 s->mb_x = lf_x_start = 0; 03502 decode_finish_row(h); 03503 ++s->mb_y; 03504 if(FIELD_OR_MBAFF_PICTURE) { 03505 ++s->mb_y; 03506 if(FRAME_MBAFF && s->mb_y < s->mb_height) 03507 predict_field_decoding_flag(h); 03508 } 03509 if(s->mb_y >= s->mb_height){ 03510 tprintf(s->avctx, "slice end %d %d\n", get_bits_count(&s->gb), s->gb.size_in_bits); 03511 03512 if(get_bits_count(&s->gb) == s->gb.size_in_bits ) { 03513 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask); 03514 03515 return 0; 03516 }else{ 03517 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_END|DC_END|MV_END)&part_mask); 03518 03519 return -1; 03520 } 03521 } 03522 } 03523 03524 if(get_bits_count(&s->gb) >= s->gb.size_in_bits && s->mb_skip_run<=0){ 03525 tprintf(s->avctx, "slice end %d %d\n", get_bits_count(&s->gb), s->gb.size_in_bits); 03526 if(get_bits_count(&s->gb) == s->gb.size_in_bits ){ 03527 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask); 03528 if (s->mb_x > lf_x_start) loop_filter(h, lf_x_start, s->mb_x); 03529 03530 return 0; 03531 }else{ 03532 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask); 03533 03534 return -1; 03535 } 03536 } 03537 } 03538 } 03539 03540 #if 0 03541 for(;s->mb_y < s->mb_height; s->mb_y++){ 03542 for(;s->mb_x < s->mb_width; s->mb_x++){ 03543 int ret= decode_mb(h); 03544 03545 ff_h264_hl_decode_mb(h); 03546 03547 if(ret<0){ 03548 av_log(s->avctx, AV_LOG_ERROR, "error while decoding MB %d %d\n", s->mb_x, s->mb_y); 03549 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask); 03550 03551 return -1; 03552 } 03553 03554 if(++s->mb_x >= s->mb_width){ 03555 s->mb_x=0; 03556 if(++s->mb_y >= s->mb_height){ 03557 if(get_bits_count(s->gb) == s->gb.size_in_bits){ 03558 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask); 03559 03560 return 0; 03561 }else{ 03562 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_END|DC_END|MV_END)&part_mask); 03563 03564 return -1; 03565 } 03566 } 03567 } 03568 03569 if(get_bits_count(s->?gb) >= s->gb?.size_in_bits){ 03570 if(get_bits_count(s->gb) == s->gb.size_in_bits){ 03571 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask); 03572 03573 return 0; 03574 }else{ 03575 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask); 03576 03577 return -1; 03578 } 03579 } 03580 } 03581 s->mb_x=0; 03582 ff_draw_horiz_band(s, 16*s->mb_y, 16); 03583 } 03584 #endif 03585 return -1; //not reached 03586 } 03587 03594 static void execute_decode_slices(H264Context *h, int context_count){ 03595 MpegEncContext * const s = &h->s; 03596 AVCodecContext * const avctx= s->avctx; 03597 H264Context *hx; 03598 int i; 03599 03600 if (s->avctx->hwaccel) 03601 return; 03602 if(s->avctx->codec->capabilities&CODEC_CAP_HWACCEL_VDPAU) 03603 return; 03604 if(context_count == 1) { 03605 decode_slice(avctx, &h); 03606 } else { 03607 for(i = 1; i < context_count; i++) { 03608 hx = h->thread_context[i]; 03609 hx->s.error_recognition = avctx->error_recognition; 03610 hx->s.error_count = 0; 03611 } 03612 03613 avctx->execute(avctx, (void *)decode_slice, 03614 h->thread_context, NULL, context_count, sizeof(void*)); 03615 03616 /* pull back stuff from slices to master context */ 03617 hx = h->thread_context[context_count - 1]; 03618 s->mb_x = hx->s.mb_x; 03619 s->mb_y = hx->s.mb_y; 03620 s->dropable = hx->s.dropable; 03621 s->picture_structure = hx->s.picture_structure; 03622 for(i = 1; i < context_count; i++) 03623 h->s.error_count += h->thread_context[i]->s.error_count; 03624 } 03625 } 03626 03627 03628 static int decode_nal_units(H264Context *h, const uint8_t *buf, int buf_size){ 03629 MpegEncContext * const s = &h->s; 03630 AVCodecContext * const avctx= s->avctx; 03631 H264Context *hx; 03632 int buf_index; 03633 int context_count; 03634 int next_avc; 03635 int pass = !(avctx->active_thread_type & FF_THREAD_FRAME); 03636 int nals_needed=0; 03637 int nal_index; 03638 03639 h->max_contexts = (HAVE_THREADS && (s->avctx->active_thread_type&FF_THREAD_SLICE)) ? avctx->thread_count : 1; 03640 if(!(s->flags2 & CODEC_FLAG2_CHUNKS)){ 03641 h->current_slice = 0; 03642 if (!s->first_field) 03643 s->current_picture_ptr= NULL; 03644 ff_h264_reset_sei(h); 03645 } 03646 03647 for(;pass <= 1;pass++){ 03648 buf_index = 0; 03649 context_count = 0; 03650 next_avc = h->is_avc ? 0 : buf_size; 03651 nal_index = 0; 03652 for(;;){ 03653 int consumed; 03654 int dst_length; 03655 int bit_length; 03656 const uint8_t *ptr; 03657 int i, nalsize = 0; 03658 int err; 03659 03660 if(buf_index >= next_avc) { 03661 if(buf_index >= buf_size) break; 03662 nalsize = 0; 03663 for(i = 0; i < h->nal_length_size; i++) 03664 nalsize = (nalsize << 8) | buf[buf_index++]; 03665 if(nalsize <= 0 || nalsize > buf_size - buf_index){ 03666 av_log(h->s.avctx, AV_LOG_ERROR, "AVC: nal size %d\n", nalsize); 03667 break; 03668 } 03669 next_avc= buf_index + nalsize; 03670 } else { 03671 // start code prefix search 03672 for(; buf_index + 3 < next_avc; buf_index++){ 03673 // This should always succeed in the first iteration. 03674 if(buf[buf_index] == 0 && buf[buf_index+1] == 0 && buf[buf_index+2] == 1) 03675 break; 03676 } 03677 03678 if(buf_index+3 >= buf_size) break; 03679 03680 buf_index+=3; 03681 if(buf_index >= next_avc) continue; 03682 } 03683 03684 hx = h->thread_context[context_count]; 03685 03686 ptr= ff_h264_decode_nal(hx, buf + buf_index, &dst_length, &consumed, next_avc - buf_index); 03687 if (ptr==NULL || dst_length < 0){ 03688 return -1; 03689 } 03690 i= buf_index + consumed; 03691 if((s->workaround_bugs & FF_BUG_AUTODETECT) && i+3<next_avc && 03692 buf[i]==0x00 && buf[i+1]==0x00 && buf[i+2]==0x01 && buf[i+3]==0xE0) 03693 s->workaround_bugs |= FF_BUG_TRUNCATED; 03694 03695 if(!(s->workaround_bugs & FF_BUG_TRUNCATED)){ 03696 while(ptr[dst_length - 1] == 0 && dst_length > 0) 03697 dst_length--; 03698 } 03699 bit_length= !dst_length ? 0 : (8*dst_length - ff_h264_decode_rbsp_trailing(h, ptr + dst_length - 1)); 03700 03701 if(s->avctx->debug&FF_DEBUG_STARTCODE){ 03702 av_log(h->s.avctx, AV_LOG_DEBUG, "NAL %d at %d/%d length %d\n", hx->nal_unit_type, buf_index, buf_size, dst_length); 03703 } 03704 03705 if (h->is_avc && (nalsize != consumed) && nalsize){ 03706 av_log(h->s.avctx, AV_LOG_DEBUG, "AVC: Consumed only %d bytes instead of %d\n", consumed, nalsize); 03707 } 03708 03709 buf_index += consumed; 03710 nal_index++; 03711 03712 if(pass == 0) { 03713 // packets can sometimes contain multiple PPS/SPS 03714 // e.g. two PAFF field pictures in one packet, or a demuxer which splits NALs strangely 03715 // if so, when frame threading we can't start the next thread until we've read all of them 03716 switch (hx->nal_unit_type) { 03717 case NAL_SPS: 03718 case NAL_PPS: 03719 case NAL_IDR_SLICE: 03720 case NAL_SLICE: 03721 nals_needed = nal_index; 03722 } 03723 continue; 03724 } 03725 03726 //FIXME do not discard SEI id 03727 if(avctx->skip_frame >= AVDISCARD_NONREF && h->nal_ref_idc == 0) 03728 continue; 03729 03730 again: 03731 err = 0; 03732 switch(hx->nal_unit_type){ 03733 case NAL_IDR_SLICE: 03734 if (h->nal_unit_type != NAL_IDR_SLICE) { 03735 av_log(h->s.avctx, AV_LOG_ERROR, "Invalid mix of idr and non-idr slices"); 03736 return -1; 03737 } 03738 idr(h); //FIXME ensure we don't loose some frames if there is reordering 03739 case NAL_SLICE: 03740 init_get_bits(&hx->s.gb, ptr, bit_length); 03741 hx->intra_gb_ptr= 03742 hx->inter_gb_ptr= &hx->s.gb; 03743 hx->s.data_partitioning = 0; 03744 03745 if((err = decode_slice_header(hx, h))) 03746 break; 03747 03748 s->current_picture_ptr->key_frame |= 03749 (hx->nal_unit_type == NAL_IDR_SLICE) || 03750 (h->sei_recovery_frame_cnt >= 0); 03751 03752 if (h->current_slice == 1) { 03753 if(!(s->flags2 & CODEC_FLAG2_CHUNKS)) { 03754 decode_postinit(h, nal_index >= nals_needed); 03755 } 03756 03757 if (s->avctx->hwaccel && s->avctx->hwaccel->start_frame(s->avctx, NULL, 0) < 0) 03758 return -1; 03759 if(CONFIG_H264_VDPAU_DECODER && s->avctx->codec->capabilities&CODEC_CAP_HWACCEL_VDPAU) 03760 ff_vdpau_h264_picture_start(s); 03761 } 03762 03763 if(hx->redundant_pic_count==0 03764 && (avctx->skip_frame < AVDISCARD_NONREF || hx->nal_ref_idc) 03765 && (avctx->skip_frame < AVDISCARD_BIDIR || hx->slice_type_nos!=AV_PICTURE_TYPE_B) 03766 && (avctx->skip_frame < AVDISCARD_NONKEY || hx->slice_type_nos==AV_PICTURE_TYPE_I) 03767 && avctx->skip_frame < AVDISCARD_ALL){ 03768 if(avctx->hwaccel) { 03769 if (avctx->hwaccel->decode_slice(avctx, &buf[buf_index - consumed], consumed) < 0) 03770 return -1; 03771 }else 03772 if(CONFIG_H264_VDPAU_DECODER && s->avctx->codec->capabilities&CODEC_CAP_HWACCEL_VDPAU){ 03773 static const uint8_t start_code[] = {0x00, 0x00, 0x01}; 03774 ff_vdpau_add_data_chunk(s, start_code, sizeof(start_code)); 03775 ff_vdpau_add_data_chunk(s, &buf[buf_index - consumed], consumed ); 03776 }else 03777 context_count++; 03778 } 03779 break; 03780 case NAL_DPA: 03781 init_get_bits(&hx->s.gb, ptr, bit_length); 03782 hx->intra_gb_ptr= 03783 hx->inter_gb_ptr= NULL; 03784 03785 if ((err = decode_slice_header(hx, h)) < 0) 03786 break; 03787 03788 hx->s.data_partitioning = 1; 03789 03790 break; 03791 case NAL_DPB: 03792 init_get_bits(&hx->intra_gb, ptr, bit_length); 03793 hx->intra_gb_ptr= &hx->intra_gb; 03794 break; 03795 case NAL_DPC: 03796 init_get_bits(&hx->inter_gb, ptr, bit_length); 03797 hx->inter_gb_ptr= &hx->inter_gb; 03798 03799 if(hx->redundant_pic_count==0 && hx->intra_gb_ptr && hx->s.data_partitioning 03800 && s->context_initialized 03801 && (avctx->skip_frame < AVDISCARD_NONREF || hx->nal_ref_idc) 03802 && (avctx->skip_frame < AVDISCARD_BIDIR || hx->slice_type_nos!=AV_PICTURE_TYPE_B) 03803 && (avctx->skip_frame < AVDISCARD_NONKEY || hx->slice_type_nos==AV_PICTURE_TYPE_I) 03804 && avctx->skip_frame < AVDISCARD_ALL) 03805 context_count++; 03806 break; 03807 case NAL_SEI: 03808 init_get_bits(&s->gb, ptr, bit_length); 03809 ff_h264_decode_sei(h); 03810 break; 03811 case NAL_SPS: 03812 init_get_bits(&s->gb, ptr, bit_length); 03813 ff_h264_decode_seq_parameter_set(h); 03814 03815 if (s->flags& CODEC_FLAG_LOW_DELAY || 03816 (h->sps.bitstream_restriction_flag && !h->sps.num_reorder_frames)) 03817 s->low_delay=1; 03818 03819 if(avctx->has_b_frames < 2) 03820 avctx->has_b_frames= !s->low_delay; 03821 03822 if (avctx->bits_per_raw_sample != h->sps.bit_depth_luma) { 03823 if (h->sps.bit_depth_luma >= 8 && h->sps.bit_depth_luma <= 10) { 03824 avctx->bits_per_raw_sample = h->sps.bit_depth_luma; 03825 h->pixel_shift = h->sps.bit_depth_luma > 8; 03826 03827 ff_h264dsp_init(&h->h264dsp, h->sps.bit_depth_luma); 03828 ff_h264_pred_init(&h->hpc, s->codec_id, h->sps.bit_depth_luma); 03829 dsputil_init(&s->dsp, s->avctx); 03830 } else { 03831 av_log(avctx, AV_LOG_DEBUG, "Unsupported bit depth: %d\n", h->sps.bit_depth_luma); 03832 return -1; 03833 } 03834 } 03835 break; 03836 case NAL_PPS: 03837 init_get_bits(&s->gb, ptr, bit_length); 03838 03839 ff_h264_decode_picture_parameter_set(h, bit_length); 03840 03841 break; 03842 case NAL_AUD: 03843 case NAL_END_SEQUENCE: 03844 case NAL_END_STREAM: 03845 case NAL_FILLER_DATA: 03846 case NAL_SPS_EXT: 03847 case NAL_AUXILIARY_SLICE: 03848 break; 03849 default: 03850 av_log(avctx, AV_LOG_DEBUG, "Unknown NAL code: %d (%d bits)\n", hx->nal_unit_type, bit_length); 03851 } 03852 03853 if(context_count == h->max_contexts) { 03854 execute_decode_slices(h, context_count); 03855 context_count = 0; 03856 } 03857 03858 if (err < 0) 03859 av_log(h->s.avctx, AV_LOG_ERROR, "decode_slice_header error\n"); 03860 else if(err == 1) { 03861 /* Slice could not be decoded in parallel mode, copy down 03862 * NAL unit stuff to context 0 and restart. Note that 03863 * rbsp_buffer is not transferred, but since we no longer 03864 * run in parallel mode this should not be an issue. */ 03865 h->nal_unit_type = hx->nal_unit_type; 03866 h->nal_ref_idc = hx->nal_ref_idc; 03867 hx = h; 03868 goto again; 03869 } 03870 } 03871 } 03872 if(context_count) 03873 execute_decode_slices(h, context_count); 03874 return buf_index; 03875 } 03876 03880 static int get_consumed_bytes(MpegEncContext *s, int pos, int buf_size){ 03881 if(pos==0) pos=1; //avoid infinite loops (i doubt that is needed but ...) 03882 if(pos+10>buf_size) pos=buf_size; // oops ;) 03883 03884 return pos; 03885 } 03886 03887 static int decode_frame(AVCodecContext *avctx, 03888 void *data, int *data_size, 03889 AVPacket *avpkt) 03890 { 03891 const uint8_t *buf = avpkt->data; 03892 int buf_size = avpkt->size; 03893 H264Context *h = avctx->priv_data; 03894 MpegEncContext *s = &h->s; 03895 AVFrame *pict = data; 03896 int buf_index; 03897 03898 s->flags= avctx->flags; 03899 s->flags2= avctx->flags2; 03900 03901 /* end of stream, output what is still in the buffers */ 03902 out: 03903 if (buf_size == 0) { 03904 Picture *out; 03905 int i, out_idx; 03906 03907 s->current_picture_ptr = NULL; 03908 03909 //FIXME factorize this with the output code below 03910 out = h->delayed_pic[0]; 03911 out_idx = 0; 03912 for(i=1; h->delayed_pic[i] && !h->delayed_pic[i]->key_frame && !h->delayed_pic[i]->mmco_reset; i++) 03913 if(h->delayed_pic[i]->poc < out->poc){ 03914 out = h->delayed_pic[i]; 03915 out_idx = i; 03916 } 03917 03918 for(i=out_idx; h->delayed_pic[i]; i++) 03919 h->delayed_pic[i] = h->delayed_pic[i+1]; 03920 03921 if(out){ 03922 *data_size = sizeof(AVFrame); 03923 *pict= *(AVFrame*)out; 03924 } 03925 03926 return 0; 03927 } 03928 03929 buf_index=decode_nal_units(h, buf, buf_size); 03930 if(buf_index < 0) 03931 return -1; 03932 03933 if (!s->current_picture_ptr && h->nal_unit_type == NAL_END_SEQUENCE) { 03934 buf_size = 0; 03935 goto out; 03936 } 03937 03938 if(!(s->flags2 & CODEC_FLAG2_CHUNKS) && !s->current_picture_ptr){ 03939 if (avctx->skip_frame >= AVDISCARD_NONREF) 03940 return 0; 03941 av_log(avctx, AV_LOG_ERROR, "no frame!\n"); 03942 return -1; 03943 } 03944 03945 if(!(s->flags2 & CODEC_FLAG2_CHUNKS) || (s->mb_y >= s->mb_height && s->mb_height)){ 03946 03947 if(s->flags2 & CODEC_FLAG2_CHUNKS) decode_postinit(h, 1); 03948 03949 field_end(h, 0); 03950 03951 if (!h->next_output_pic) { 03952 /* Wait for second field. */ 03953 *data_size = 0; 03954 03955 } else { 03956 *data_size = sizeof(AVFrame); 03957 *pict = *(AVFrame*)h->next_output_pic; 03958 } 03959 } 03960 03961 assert(pict->data[0] || !*data_size); 03962 ff_print_debug_info(s, pict); 03963 //printf("out %d\n", (int)pict->data[0]); 03964 03965 return get_consumed_bytes(s, buf_index, buf_size); 03966 } 03967 #if 0 03968 static inline void fill_mb_avail(H264Context *h){ 03969 MpegEncContext * const s = &h->s; 03970 const int mb_xy= s->mb_x + s->mb_y*s->mb_stride; 03971 03972 if(s->mb_y){ 03973 h->mb_avail[0]= s->mb_x && h->slice_table[mb_xy - s->mb_stride - 1] == h->slice_num; 03974 h->mb_avail[1]= h->slice_table[mb_xy - s->mb_stride ] == h->slice_num; 03975 h->mb_avail[2]= s->mb_x+1 < s->mb_width && h->slice_table[mb_xy - s->mb_stride + 1] == h->slice_num; 03976 }else{ 03977 h->mb_avail[0]= 03978 h->mb_avail[1]= 03979 h->mb_avail[2]= 0; 03980 } 03981 h->mb_avail[3]= s->mb_x && h->slice_table[mb_xy - 1] == h->slice_num; 03982 h->mb_avail[4]= 1; //FIXME move out 03983 h->mb_avail[5]= 0; //FIXME move out 03984 } 03985 #endif 03986 03987 #ifdef TEST 03988 #undef printf 03989 #undef random 03990 #define COUNT 8000 03991 #define SIZE (COUNT*40) 03992 int main(void){ 03993 int i; 03994 uint8_t temp[SIZE]; 03995 PutBitContext pb; 03996 GetBitContext gb; 03997 // int int_temp[10000]; 03998 DSPContext dsp; 03999 AVCodecContext avctx; 04000 04001 dsputil_init(&dsp, &avctx); 04002 04003 init_put_bits(&pb, temp, SIZE); 04004 printf("testing unsigned exp golomb\n"); 04005 for(i=0; i<COUNT; i++){ 04006 START_TIMER 04007 set_ue_golomb(&pb, i); 04008 STOP_TIMER("set_ue_golomb"); 04009 } 04010 flush_put_bits(&pb); 04011 04012 init_get_bits(&gb, temp, 8*SIZE); 04013 for(i=0; i<COUNT; i++){ 04014 int j, s; 04015 04016 s= show_bits(&gb, 24); 04017 04018 START_TIMER 04019 j= get_ue_golomb(&gb); 04020 if(j != i){ 04021 printf("mismatch! at %d (%d should be %d) bits:%6X\n", i, j, i, s); 04022 // return -1; 04023 } 04024 STOP_TIMER("get_ue_golomb"); 04025 } 04026 04027 04028 init_put_bits(&pb, temp, SIZE); 04029 printf("testing signed exp golomb\n"); 04030 for(i=0; i<COUNT; i++){ 04031 START_TIMER 04032 set_se_golomb(&pb, i - COUNT/2); 04033 STOP_TIMER("set_se_golomb"); 04034 } 04035 flush_put_bits(&pb); 04036 04037 init_get_bits(&gb, temp, 8*SIZE); 04038 for(i=0; i<COUNT; i++){ 04039 int j, s; 04040 04041 s= show_bits(&gb, 24); 04042 04043 START_TIMER 04044 j= get_se_golomb(&gb); 04045 if(j != i - COUNT/2){ 04046 printf("mismatch! at %d (%d should be %d) bits:%6X\n", i, j, i, s); 04047 // return -1; 04048 } 04049 STOP_TIMER("get_se_golomb"); 04050 } 04051 04052 #if 0 04053 printf("testing 4x4 (I)DCT\n"); 04054 04055 DCTELEM block[16]; 04056 uint8_t src[16], ref[16]; 04057 uint64_t error= 0, max_error=0; 04058 04059 for(i=0; i<COUNT; i++){ 04060 int j; 04061 // printf("%d %d %d\n", r1, r2, (r2-r1)*16); 04062 for(j=0; j<16; j++){ 04063 ref[j]= random()%255; 04064 src[j]= random()%255; 04065 } 04066 04067 h264_diff_dct_c(block, src, ref, 4); 04068 04069 //normalize 04070 for(j=0; j<16; j++){ 04071 // printf("%d ", block[j]); 04072 block[j]= block[j]*4; 04073 if(j&1) block[j]= (block[j]*4 + 2)/5; 04074 if(j&4) block[j]= (block[j]*4 + 2)/5; 04075 } 04076 // printf("\n"); 04077 04078 h->h264dsp.h264_idct_add(ref, block, 4); 04079 /* for(j=0; j<16; j++){ 04080 printf("%d ", ref[j]); 04081 } 04082 printf("\n");*/ 04083 04084 for(j=0; j<16; j++){ 04085 int diff= FFABS(src[j] - ref[j]); 04086 04087 error+= diff*diff; 04088 max_error= FFMAX(max_error, diff); 04089 } 04090 } 04091 printf("error=%f max_error=%d\n", ((float)error)/COUNT/16, (int)max_error ); 04092 printf("testing quantizer\n"); 04093 for(qp=0; qp<52; qp++){ 04094 for(i=0; i<16; i++) 04095 src1_block[i]= src2_block[i]= random()%255; 04096 04097 } 04098 printf("Testing NAL layer\n"); 04099 04100 uint8_t bitstream[COUNT]; 04101 uint8_t nal[COUNT*2]; 04102 H264Context h; 04103 memset(&h, 0, sizeof(H264Context)); 04104 04105 for(i=0; i<COUNT; i++){ 04106 int zeros= i; 04107 int nal_length; 04108 int consumed; 04109 int out_length; 04110 uint8_t *out; 04111 int j; 04112 04113 for(j=0; j<COUNT; j++){ 04114 bitstream[j]= (random() % 255) + 1; 04115 } 04116 04117 for(j=0; j<zeros; j++){ 04118 int pos= random() % COUNT; 04119 while(bitstream[pos] == 0){ 04120 pos++; 04121 pos %= COUNT; 04122 } 04123 bitstream[pos]=0; 04124 } 04125 04126 START_TIMER 04127 04128 nal_length= encode_nal(&h, nal, bitstream, COUNT, COUNT*2); 04129 if(nal_length<0){ 04130 printf("encoding failed\n"); 04131 return -1; 04132 } 04133 04134 out= ff_h264_decode_nal(&h, nal, &out_length, &consumed, nal_length); 04135 04136 STOP_TIMER("NAL") 04137 04138 if(out_length != COUNT){ 04139 printf("incorrect length %d %d\n", out_length, COUNT); 04140 return -1; 04141 } 04142 04143 if(consumed != nal_length){ 04144 printf("incorrect consumed length %d %d\n", nal_length, consumed); 04145 return -1; 04146 } 04147 04148 if(memcmp(bitstream, out, COUNT)){ 04149 printf("mismatch\n"); 04150 return -1; 04151 } 04152 } 04153 #endif 04154 04155 printf("Testing RBSP\n"); 04156 04157 04158 return 0; 04159 } 04160 #endif /* TEST */ 04161 04162 04163 av_cold void ff_h264_free_context(H264Context *h) 04164 { 04165 int i; 04166 04167 free_tables(h, 1); //FIXME cleanup init stuff perhaps 04168 04169 for(i = 0; i < MAX_SPS_COUNT; i++) 04170 av_freep(h->sps_buffers + i); 04171 04172 for(i = 0; i < MAX_PPS_COUNT; i++) 04173 av_freep(h->pps_buffers + i); 04174 } 04175 04176 av_cold int ff_h264_decode_end(AVCodecContext *avctx) 04177 { 04178 H264Context *h = avctx->priv_data; 04179 MpegEncContext *s = &h->s; 04180 04181 ff_h264_free_context(h); 04182 04183 MPV_common_end(s); 04184 04185 // memset(h, 0, sizeof(H264Context)); 04186 04187 return 0; 04188 } 04189 04190 static const AVProfile profiles[] = { 04191 { FF_PROFILE_H264_BASELINE, "Baseline" }, 04192 { FF_PROFILE_H264_CONSTRAINED_BASELINE, "Constrained Baseline" }, 04193 { FF_PROFILE_H264_MAIN, "Main" }, 04194 { FF_PROFILE_H264_EXTENDED, "Extended" }, 04195 { FF_PROFILE_H264_HIGH, "High" }, 04196 { FF_PROFILE_H264_HIGH_10, "High 10" }, 04197 { FF_PROFILE_H264_HIGH_10_INTRA, "High 10 Intra" }, 04198 { FF_PROFILE_H264_HIGH_422, "High 4:2:2" }, 04199 { FF_PROFILE_H264_HIGH_422_INTRA, "High 4:2:2 Intra" }, 04200 { FF_PROFILE_H264_HIGH_444, "High 4:4:4" }, 04201 { FF_PROFILE_H264_HIGH_444_PREDICTIVE, "High 4:4:4 Predictive" }, 04202 { FF_PROFILE_H264_HIGH_444_INTRA, "High 4:4:4 Intra" }, 04203 { FF_PROFILE_H264_CAVLC_444, "CAVLC 4:4:4" }, 04204 { FF_PROFILE_UNKNOWN }, 04205 }; 04206 04207 AVCodec ff_h264_decoder = { 04208 "h264", 04209 AVMEDIA_TYPE_VIDEO, 04210 CODEC_ID_H264, 04211 sizeof(H264Context), 04212 ff_h264_decode_init, 04213 NULL, 04214 ff_h264_decode_end, 04215 decode_frame, 04216 /*CODEC_CAP_DRAW_HORIZ_BAND |*/ CODEC_CAP_DR1 | CODEC_CAP_DELAY | 04217 CODEC_CAP_SLICE_THREADS | CODEC_CAP_FRAME_THREADS, 04218 .flush= flush_dpb, 04219 .long_name = NULL_IF_CONFIG_SMALL("H.264 / AVC / MPEG-4 AVC / MPEG-4 part 10"), 04220 .init_thread_copy = ONLY_IF_THREADS_ENABLED(decode_init_thread_copy), 04221 .update_thread_context = ONLY_IF_THREADS_ENABLED(decode_update_thread_context), 04222 .profiles = NULL_IF_CONFIG_SMALL(profiles), 04223 }; 04224 04225 #if CONFIG_H264_VDPAU_DECODER 04226 AVCodec ff_h264_vdpau_decoder = { 04227 "h264_vdpau", 04228 AVMEDIA_TYPE_VIDEO, 04229 CODEC_ID_H264, 04230 sizeof(H264Context), 04231 ff_h264_decode_init, 04232 NULL, 04233 ff_h264_decode_end, 04234 decode_frame, 04235 CODEC_CAP_DR1 | CODEC_CAP_DELAY | CODEC_CAP_HWACCEL_VDPAU, 04236 .flush= flush_dpb, 04237 .long_name = NULL_IF_CONFIG_SMALL("H.264 / AVC / MPEG-4 AVC / MPEG-4 part 10 (VDPAU acceleration)"), 04238 .pix_fmts = (const enum PixelFormat[]){PIX_FMT_VDPAU_H264, PIX_FMT_NONE}, 04239 .profiles = NULL_IF_CONFIG_SMALL(profiles), 04240 }; 04241 #endif