Libav 0.7.1
|
00001 /* 00002 * H.26L/H.264/AVC/JVT/14496-10/... loop filter 00003 * Copyright (c) 2003 Michael Niedermayer <michaelni@gmx.at> 00004 * 00005 * This file is part of Libav. 00006 * 00007 * Libav is free software; you can redistribute it and/or 00008 * modify it under the terms of the GNU Lesser General Public 00009 * License as published by the Free Software Foundation; either 00010 * version 2.1 of the License, or (at your option) any later version. 00011 * 00012 * Libav is distributed in the hope that it will be useful, 00013 * but WITHOUT ANY WARRANTY; without even the implied warranty of 00014 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 00015 * Lesser General Public License for more details. 00016 * 00017 * You should have received a copy of the GNU Lesser General Public 00018 * License along with Libav; if not, write to the Free Software 00019 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 00020 */ 00021 00028 #include "libavutil/intreadwrite.h" 00029 #include "internal.h" 00030 #include "dsputil.h" 00031 #include "avcodec.h" 00032 #include "mpegvideo.h" 00033 #include "h264.h" 00034 #include "mathops.h" 00035 #include "rectangle.h" 00036 00037 //#undef NDEBUG 00038 #include <assert.h> 00039 00040 /* Deblocking filter (p153) */ 00041 static const uint8_t alpha_table[52*3] = { 00042 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 00043 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 00044 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 00045 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 00046 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 00047 0, 0, 0, 0, 0, 0, 4, 4, 5, 6, 00048 7, 8, 9, 10, 12, 13, 15, 17, 20, 22, 00049 25, 28, 32, 36, 40, 45, 50, 56, 63, 71, 00050 80, 90,101,113,127,144,162,182,203,226, 00051 255,255, 00052 255,255,255,255,255,255,255,255,255,255,255,255,255, 00053 255,255,255,255,255,255,255,255,255,255,255,255,255, 00054 255,255,255,255,255,255,255,255,255,255,255,255,255, 00055 255,255,255,255,255,255,255,255,255,255,255,255,255, 00056 }; 00057 static const uint8_t beta_table[52*3] = { 00058 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 00059 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 00060 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 00061 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 00062 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 00063 0, 0, 0, 0, 0, 0, 2, 2, 2, 3, 00064 3, 3, 3, 4, 4, 4, 6, 6, 7, 7, 00065 8, 8, 9, 9, 10, 10, 11, 11, 12, 12, 00066 13, 13, 14, 14, 15, 15, 16, 16, 17, 17, 00067 18, 18, 00068 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 00069 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 00070 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 00071 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 00072 }; 00073 static const uint8_t tc0_table[52*3][4] = { 00074 {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, 00075 {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, 00076 {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, 00077 {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, 00078 {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, 00079 {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, 00080 {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, 00081 {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, 00082 {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, 00083 {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, 00084 {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, 00085 {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 1 }, 00086 {-1, 0, 0, 1 }, {-1, 0, 0, 1 }, {-1, 0, 0, 1 }, {-1, 0, 1, 1 }, {-1, 0, 1, 1 }, {-1, 1, 1, 1 }, 00087 {-1, 1, 1, 1 }, {-1, 1, 1, 1 }, {-1, 1, 1, 1 }, {-1, 1, 1, 2 }, {-1, 1, 1, 2 }, {-1, 1, 1, 2 }, 00088 {-1, 1, 1, 2 }, {-1, 1, 2, 3 }, {-1, 1, 2, 3 }, {-1, 2, 2, 3 }, {-1, 2, 2, 4 }, {-1, 2, 3, 4 }, 00089 {-1, 2, 3, 4 }, {-1, 3, 3, 5 }, {-1, 3, 4, 6 }, {-1, 3, 4, 6 }, {-1, 4, 5, 7 }, {-1, 4, 5, 8 }, 00090 {-1, 4, 6, 9 }, {-1, 5, 7,10 }, {-1, 6, 8,11 }, {-1, 6, 8,13 }, {-1, 7,10,14 }, {-1, 8,11,16 }, 00091 {-1, 9,12,18 }, {-1,10,13,20 }, {-1,11,15,23 }, {-1,13,17,25 }, 00092 {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, 00093 {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, 00094 {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, 00095 {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, 00096 {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, 00097 {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, 00098 {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, 00099 {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, 00100 {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, 00101 }; 00102 00103 static void av_always_inline filter_mb_edgev( uint8_t *pix, int stride, int16_t bS[4], unsigned int qp, H264Context *h) { 00104 const int qp_bd_offset = 6 * (h->sps.bit_depth_luma - 8); 00105 const unsigned int index_a = qp - qp_bd_offset + h->slice_alpha_c0_offset; 00106 const int alpha = alpha_table[index_a]; 00107 const int beta = beta_table[qp - qp_bd_offset + h->slice_beta_offset]; 00108 if (alpha ==0 || beta == 0) return; 00109 00110 if( bS[0] < 4 ) { 00111 int8_t tc[4]; 00112 tc[0] = tc0_table[index_a][bS[0]]; 00113 tc[1] = tc0_table[index_a][bS[1]]; 00114 tc[2] = tc0_table[index_a][bS[2]]; 00115 tc[3] = tc0_table[index_a][bS[3]]; 00116 h->h264dsp.h264_h_loop_filter_luma(pix, stride, alpha, beta, tc); 00117 } else { 00118 h->h264dsp.h264_h_loop_filter_luma_intra(pix, stride, alpha, beta); 00119 } 00120 } 00121 static void av_always_inline filter_mb_edgecv( uint8_t *pix, int stride, int16_t bS[4], unsigned int qp, H264Context *h ) { 00122 const int qp_bd_offset = 6 * (h->sps.bit_depth_luma - 8); 00123 const unsigned int index_a = qp - qp_bd_offset + h->slice_alpha_c0_offset; 00124 const int alpha = alpha_table[index_a]; 00125 const int beta = beta_table[qp - qp_bd_offset + h->slice_beta_offset]; 00126 if (alpha ==0 || beta == 0) return; 00127 00128 if( bS[0] < 4 ) { 00129 int8_t tc[4]; 00130 tc[0] = tc0_table[index_a][bS[0]]+1; 00131 tc[1] = tc0_table[index_a][bS[1]]+1; 00132 tc[2] = tc0_table[index_a][bS[2]]+1; 00133 tc[3] = tc0_table[index_a][bS[3]]+1; 00134 h->h264dsp.h264_h_loop_filter_chroma(pix, stride, alpha, beta, tc); 00135 } else { 00136 h->h264dsp.h264_h_loop_filter_chroma_intra(pix, stride, alpha, beta); 00137 } 00138 } 00139 00140 static void filter_mb_mbaff_edgev( H264Context *h, uint8_t *pix, int stride, int16_t bS[7], int bsi, int qp ) { 00141 const int qp_bd_offset = 6 * (h->sps.bit_depth_luma - 8); 00142 int index_a = qp - qp_bd_offset + h->slice_alpha_c0_offset; 00143 int alpha = alpha_table[index_a]; 00144 int beta = beta_table[qp - qp_bd_offset + h->slice_beta_offset]; 00145 if (alpha ==0 || beta == 0) return; 00146 00147 if( bS[0] < 4 ) { 00148 int8_t tc[4]; 00149 tc[0] = tc0_table[index_a][bS[0*bsi]]; 00150 tc[1] = tc0_table[index_a][bS[1*bsi]]; 00151 tc[2] = tc0_table[index_a][bS[2*bsi]]; 00152 tc[3] = tc0_table[index_a][bS[3*bsi]]; 00153 h->h264dsp.h264_h_loop_filter_luma_mbaff(pix, stride, alpha, beta, tc); 00154 } else { 00155 h->h264dsp.h264_h_loop_filter_luma_mbaff_intra(pix, stride, alpha, beta); 00156 } 00157 } 00158 static void filter_mb_mbaff_edgecv( H264Context *h, uint8_t *pix, int stride, int16_t bS[7], int bsi, int qp ) { 00159 const int qp_bd_offset = 6 * (h->sps.bit_depth_luma - 8); 00160 int index_a = qp - qp_bd_offset + h->slice_alpha_c0_offset; 00161 int alpha = alpha_table[index_a]; 00162 int beta = beta_table[qp - qp_bd_offset + h->slice_beta_offset]; 00163 if (alpha ==0 || beta == 0) return; 00164 00165 if( bS[0] < 4 ) { 00166 int8_t tc[4]; 00167 tc[0] = tc0_table[index_a][bS[0*bsi]] + 1; 00168 tc[1] = tc0_table[index_a][bS[1*bsi]] + 1; 00169 tc[2] = tc0_table[index_a][bS[2*bsi]] + 1; 00170 tc[3] = tc0_table[index_a][bS[3*bsi]] + 1; 00171 h->h264dsp.h264_h_loop_filter_chroma_mbaff(pix, stride, alpha, beta, tc); 00172 } else { 00173 h->h264dsp.h264_h_loop_filter_chroma_mbaff_intra(pix, stride, alpha, beta); 00174 } 00175 } 00176 00177 static void av_always_inline filter_mb_edgeh( uint8_t *pix, int stride, int16_t bS[4], unsigned int qp, H264Context *h ) { 00178 const int qp_bd_offset = 6 * (h->sps.bit_depth_luma - 8); 00179 const unsigned int index_a = qp - qp_bd_offset + h->slice_alpha_c0_offset; 00180 const int alpha = alpha_table[index_a]; 00181 const int beta = beta_table[qp - qp_bd_offset + h->slice_beta_offset]; 00182 if (alpha ==0 || beta == 0) return; 00183 00184 if( bS[0] < 4 ) { 00185 int8_t tc[4]; 00186 tc[0] = tc0_table[index_a][bS[0]]; 00187 tc[1] = tc0_table[index_a][bS[1]]; 00188 tc[2] = tc0_table[index_a][bS[2]]; 00189 tc[3] = tc0_table[index_a][bS[3]]; 00190 h->h264dsp.h264_v_loop_filter_luma(pix, stride, alpha, beta, tc); 00191 } else { 00192 h->h264dsp.h264_v_loop_filter_luma_intra(pix, stride, alpha, beta); 00193 } 00194 } 00195 00196 static void av_always_inline filter_mb_edgech( uint8_t *pix, int stride, int16_t bS[4], unsigned int qp, H264Context *h ) { 00197 const int qp_bd_offset = 6 * (h->sps.bit_depth_luma - 8); 00198 const unsigned int index_a = qp - qp_bd_offset + h->slice_alpha_c0_offset; 00199 const int alpha = alpha_table[index_a]; 00200 const int beta = beta_table[qp - qp_bd_offset + h->slice_beta_offset]; 00201 if (alpha ==0 || beta == 0) return; 00202 00203 if( bS[0] < 4 ) { 00204 int8_t tc[4]; 00205 tc[0] = tc0_table[index_a][bS[0]]+1; 00206 tc[1] = tc0_table[index_a][bS[1]]+1; 00207 tc[2] = tc0_table[index_a][bS[2]]+1; 00208 tc[3] = tc0_table[index_a][bS[3]]+1; 00209 h->h264dsp.h264_v_loop_filter_chroma(pix, stride, alpha, beta, tc); 00210 } else { 00211 h->h264dsp.h264_v_loop_filter_chroma_intra(pix, stride, alpha, beta); 00212 } 00213 } 00214 00215 void ff_h264_filter_mb_fast( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize) { 00216 MpegEncContext * const s = &h->s; 00217 int mb_xy; 00218 int mb_type, left_type; 00219 int qp, qp0, qp1, qpc, qpc0, qpc1, qp_thresh; 00220 int chroma = !(CONFIG_GRAY && (s->flags&CODEC_FLAG_GRAY)); 00221 00222 mb_xy = h->mb_xy; 00223 00224 if(!h->top_type || !h->h264dsp.h264_loop_filter_strength || h->pps.chroma_qp_diff || CHROMA444) { 00225 ff_h264_filter_mb(h, mb_x, mb_y, img_y, img_cb, img_cr, linesize, uvlinesize); 00226 return; 00227 } 00228 assert(!FRAME_MBAFF); 00229 left_type= h->left_type[0]; 00230 00231 mb_type = s->current_picture.mb_type[mb_xy]; 00232 qp = s->current_picture.qscale_table[mb_xy]; 00233 qp0 = s->current_picture.qscale_table[mb_xy-1]; 00234 qp1 = s->current_picture.qscale_table[h->top_mb_xy]; 00235 qpc = get_chroma_qp( h, 0, qp ); 00236 qpc0 = get_chroma_qp( h, 0, qp0 ); 00237 qpc1 = get_chroma_qp( h, 0, qp1 ); 00238 qp0 = (qp + qp0 + 1) >> 1; 00239 qp1 = (qp + qp1 + 1) >> 1; 00240 qpc0 = (qpc + qpc0 + 1) >> 1; 00241 qpc1 = (qpc + qpc1 + 1) >> 1; 00242 qp_thresh = 15+52 - h->slice_alpha_c0_offset; 00243 if(qp <= qp_thresh && qp0 <= qp_thresh && qp1 <= qp_thresh && 00244 qpc <= qp_thresh && qpc0 <= qp_thresh && qpc1 <= qp_thresh) 00245 return; 00246 00247 if( IS_INTRA(mb_type) ) { 00248 int16_t bS4[4] = {4,4,4,4}; 00249 int16_t bS3[4] = {3,3,3,3}; 00250 int16_t *bSH = FIELD_PICTURE ? bS3 : bS4; 00251 if(left_type) 00252 filter_mb_edgev( &img_y[4*0], linesize, bS4, qp0, h); 00253 if( IS_8x8DCT(mb_type) ) { 00254 filter_mb_edgev( &img_y[4*2], linesize, bS3, qp, h); 00255 filter_mb_edgeh( &img_y[4*0*linesize], linesize, bSH, qp1, h); 00256 filter_mb_edgeh( &img_y[4*2*linesize], linesize, bS3, qp, h); 00257 } else { 00258 filter_mb_edgev( &img_y[4*1], linesize, bS3, qp, h); 00259 filter_mb_edgev( &img_y[4*2], linesize, bS3, qp, h); 00260 filter_mb_edgev( &img_y[4*3], linesize, bS3, qp, h); 00261 filter_mb_edgeh( &img_y[4*0*linesize], linesize, bSH, qp1, h); 00262 filter_mb_edgeh( &img_y[4*1*linesize], linesize, bS3, qp, h); 00263 filter_mb_edgeh( &img_y[4*2*linesize], linesize, bS3, qp, h); 00264 filter_mb_edgeh( &img_y[4*3*linesize], linesize, bS3, qp, h); 00265 } 00266 if(chroma){ 00267 if(left_type){ 00268 filter_mb_edgecv( &img_cb[2*0], uvlinesize, bS4, qpc0, h); 00269 filter_mb_edgecv( &img_cr[2*0], uvlinesize, bS4, qpc0, h); 00270 } 00271 filter_mb_edgecv( &img_cb[2*2], uvlinesize, bS3, qpc, h); 00272 filter_mb_edgecv( &img_cr[2*2], uvlinesize, bS3, qpc, h); 00273 filter_mb_edgech( &img_cb[2*0*uvlinesize], uvlinesize, bSH, qpc1, h); 00274 filter_mb_edgech( &img_cb[2*2*uvlinesize], uvlinesize, bS3, qpc, h); 00275 filter_mb_edgech( &img_cr[2*0*uvlinesize], uvlinesize, bSH, qpc1, h); 00276 filter_mb_edgech( &img_cr[2*2*uvlinesize], uvlinesize, bS3, qpc, h); 00277 } 00278 return; 00279 } else { 00280 LOCAL_ALIGNED_8(int16_t, bS, [2], [4][4]); 00281 int edges; 00282 if( IS_8x8DCT(mb_type) && (h->cbp&7) == 7 ) { 00283 edges = 4; 00284 AV_WN64A(bS[0][0], 0x0002000200020002ULL); 00285 AV_WN64A(bS[0][2], 0x0002000200020002ULL); 00286 AV_WN64A(bS[1][0], 0x0002000200020002ULL); 00287 AV_WN64A(bS[1][2], 0x0002000200020002ULL); 00288 } else { 00289 int mask_edge1 = (3*(((5*mb_type)>>5)&1)) | (mb_type>>4); //(mb_type & (MB_TYPE_16x16 | MB_TYPE_8x16)) ? 3 : (mb_type & MB_TYPE_16x8) ? 1 : 0; 00290 int mask_edge0 = 3*((mask_edge1>>1) & ((5*left_type)>>5)&1); // (mb_type & (MB_TYPE_16x16 | MB_TYPE_8x16)) && (h->left_type[0] & (MB_TYPE_16x16 | MB_TYPE_8x16)) ? 3 : 0; 00291 int step = 1+(mb_type>>24); //IS_8x8DCT(mb_type) ? 2 : 1; 00292 edges = 4 - 3*((mb_type>>3) & !(h->cbp & 15)); //(mb_type & MB_TYPE_16x16) && !(h->cbp & 15) ? 1 : 4; 00293 h->h264dsp.h264_loop_filter_strength( bS, h->non_zero_count_cache, h->ref_cache, h->mv_cache, 00294 h->list_count==2, edges, step, mask_edge0, mask_edge1, FIELD_PICTURE); 00295 } 00296 if( IS_INTRA(left_type) ) 00297 AV_WN64A(bS[0][0], 0x0004000400040004ULL); 00298 if( IS_INTRA(h->top_type) ) 00299 AV_WN64A(bS[1][0], FIELD_PICTURE ? 0x0003000300030003ULL : 0x0004000400040004ULL); 00300 00301 #define FILTER(hv,dir,edge)\ 00302 if(AV_RN64A(bS[dir][edge])) { \ 00303 filter_mb_edge##hv( &img_y[4*edge*(dir?linesize:1)], linesize, bS[dir][edge], edge ? qp : qp##dir, h );\ 00304 if(chroma && !(edge&1)) {\ 00305 filter_mb_edgec##hv( &img_cb[2*edge*(dir?uvlinesize:1)], uvlinesize, bS[dir][edge], edge ? qpc : qpc##dir, h );\ 00306 filter_mb_edgec##hv( &img_cr[2*edge*(dir?uvlinesize:1)], uvlinesize, bS[dir][edge], edge ? qpc : qpc##dir, h );\ 00307 }\ 00308 } 00309 if(left_type) 00310 FILTER(v,0,0); 00311 if( edges == 1 ) { 00312 FILTER(h,1,0); 00313 } else if( IS_8x8DCT(mb_type) ) { 00314 FILTER(v,0,2); 00315 FILTER(h,1,0); 00316 FILTER(h,1,2); 00317 } else { 00318 FILTER(v,0,1); 00319 FILTER(v,0,2); 00320 FILTER(v,0,3); 00321 FILTER(h,1,0); 00322 FILTER(h,1,1); 00323 FILTER(h,1,2); 00324 FILTER(h,1,3); 00325 } 00326 #undef FILTER 00327 } 00328 } 00329 00330 static int check_mv(H264Context *h, long b_idx, long bn_idx, int mvy_limit){ 00331 int v; 00332 00333 v= h->ref_cache[0][b_idx] != h->ref_cache[0][bn_idx]; 00334 if(!v && h->ref_cache[0][b_idx]!=-1) 00335 v= h->mv_cache[0][b_idx][0] - h->mv_cache[0][bn_idx][0] + 3 >= 7U | 00336 FFABS( h->mv_cache[0][b_idx][1] - h->mv_cache[0][bn_idx][1] ) >= mvy_limit; 00337 00338 if(h->list_count==2){ 00339 if(!v) 00340 v = h->ref_cache[1][b_idx] != h->ref_cache[1][bn_idx] | 00341 h->mv_cache[1][b_idx][0] - h->mv_cache[1][bn_idx][0] + 3 >= 7U | 00342 FFABS( h->mv_cache[1][b_idx][1] - h->mv_cache[1][bn_idx][1] ) >= mvy_limit; 00343 00344 if(v){ 00345 if(h->ref_cache[0][b_idx] != h->ref_cache[1][bn_idx] | 00346 h->ref_cache[1][b_idx] != h->ref_cache[0][bn_idx]) 00347 return 1; 00348 return 00349 h->mv_cache[0][b_idx][0] - h->mv_cache[1][bn_idx][0] + 3 >= 7U | 00350 FFABS( h->mv_cache[0][b_idx][1] - h->mv_cache[1][bn_idx][1] ) >= mvy_limit | 00351 h->mv_cache[1][b_idx][0] - h->mv_cache[0][bn_idx][0] + 3 >= 7U | 00352 FFABS( h->mv_cache[1][b_idx][1] - h->mv_cache[0][bn_idx][1] ) >= mvy_limit; 00353 } 00354 } 00355 00356 return v; 00357 } 00358 00359 static av_always_inline void filter_mb_dir(H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize, int mb_xy, int mb_type, int mvy_limit, int first_vertical_edge_done, int chroma, int chroma444, int dir) { 00360 MpegEncContext * const s = &h->s; 00361 int edge; 00362 int chroma_qp_avg[2]; 00363 const int mbm_xy = dir == 0 ? mb_xy -1 : h->top_mb_xy; 00364 const int mbm_type = dir == 0 ? h->left_type[0] : h->top_type; 00365 00366 // how often to recheck mv-based bS when iterating between edges 00367 static const uint8_t mask_edge_tab[2][8]={{0,3,3,3,1,1,1,1}, 00368 {0,3,1,1,3,3,3,3}}; 00369 const int mask_edge = mask_edge_tab[dir][(mb_type>>3)&7]; 00370 const int edges = mask_edge== 3 && !(h->cbp&15) ? 1 : 4; 00371 00372 // how often to recheck mv-based bS when iterating along each edge 00373 const int mask_par0 = mb_type & (MB_TYPE_16x16 | (MB_TYPE_8x16 >> dir)); 00374 00375 if(mbm_type && !first_vertical_edge_done){ 00376 00377 if (FRAME_MBAFF && (dir == 1) && ((mb_y&1) == 0) 00378 && IS_INTERLACED(mbm_type&~mb_type) 00379 ) { 00380 // This is a special case in the norm where the filtering must 00381 // be done twice (one each of the field) even if we are in a 00382 // frame macroblock. 00383 // 00384 unsigned int tmp_linesize = 2 * linesize; 00385 unsigned int tmp_uvlinesize = 2 * uvlinesize; 00386 int mbn_xy = mb_xy - 2 * s->mb_stride; 00387 int j; 00388 00389 for(j=0; j<2; j++, mbn_xy += s->mb_stride){ 00390 DECLARE_ALIGNED(8, int16_t, bS)[4]; 00391 int qp; 00392 if( IS_INTRA(mb_type|s->current_picture.mb_type[mbn_xy]) ) { 00393 AV_WN64A(bS, 0x0003000300030003ULL); 00394 } else { 00395 if(!CABAC && IS_8x8DCT(s->current_picture.mb_type[mbn_xy])){ 00396 bS[0]= 1+((h->cbp_table[mbn_xy] & 0x4000)||h->non_zero_count_cache[scan8[0]+0]); 00397 bS[1]= 1+((h->cbp_table[mbn_xy] & 0x4000)||h->non_zero_count_cache[scan8[0]+1]); 00398 bS[2]= 1+((h->cbp_table[mbn_xy] & 0x8000)||h->non_zero_count_cache[scan8[0]+2]); 00399 bS[3]= 1+((h->cbp_table[mbn_xy] & 0x8000)||h->non_zero_count_cache[scan8[0]+3]); 00400 }else{ 00401 const uint8_t *mbn_nnz = h->non_zero_count[mbn_xy] + 3*4; 00402 int i; 00403 for( i = 0; i < 4; i++ ) { 00404 bS[i] = 1 + !!(h->non_zero_count_cache[scan8[0]+i] | mbn_nnz[i]); 00405 } 00406 } 00407 } 00408 // Do not use s->qscale as luma quantizer because it has not the same 00409 // value in IPCM macroblocks. 00410 qp = ( s->current_picture.qscale_table[mb_xy] + s->current_picture.qscale_table[mbn_xy] + 1 ) >> 1; 00411 tprintf(s->avctx, "filter mb:%d/%d dir:%d edge:%d, QPy:%d ls:%d uvls:%d", mb_x, mb_y, dir, edge, qp, tmp_linesize, tmp_uvlinesize); 00412 { int i; for (i = 0; i < 4; i++) tprintf(s->avctx, " bS[%d]:%d", i, bS[i]); tprintf(s->avctx, "\n"); } 00413 filter_mb_edgeh( &img_y[j*linesize], tmp_linesize, bS, qp, h ); 00414 chroma_qp_avg[0] = (h->chroma_qp[0] + get_chroma_qp( h, 0, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1; 00415 chroma_qp_avg[1] = (h->chroma_qp[1] + get_chroma_qp( h, 1, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1; 00416 if (chroma) { 00417 if (chroma444) { 00418 filter_mb_edgeh (&img_cb[j*uvlinesize], tmp_uvlinesize, bS, chroma_qp_avg[0], h); 00419 filter_mb_edgeh (&img_cr[j*uvlinesize], tmp_uvlinesize, bS, chroma_qp_avg[1], h); 00420 } else { 00421 filter_mb_edgech(&img_cb[j*uvlinesize], tmp_uvlinesize, bS, chroma_qp_avg[0], h); 00422 filter_mb_edgech(&img_cr[j*uvlinesize], tmp_uvlinesize, bS, chroma_qp_avg[1], h); 00423 } 00424 } 00425 } 00426 }else{ 00427 DECLARE_ALIGNED(8, int16_t, bS)[4]; 00428 int qp; 00429 00430 if( IS_INTRA(mb_type|mbm_type)) { 00431 AV_WN64A(bS, 0x0003000300030003ULL); 00432 if ( (!IS_INTERLACED(mb_type|mbm_type)) 00433 || ((FRAME_MBAFF || (s->picture_structure != PICT_FRAME)) && (dir == 0)) 00434 ) 00435 AV_WN64A(bS, 0x0004000400040004ULL); 00436 } else { 00437 int i; 00438 int mv_done; 00439 00440 if( dir && FRAME_MBAFF && IS_INTERLACED(mb_type ^ mbm_type)) { 00441 AV_WN64A(bS, 0x0001000100010001ULL); 00442 mv_done = 1; 00443 } 00444 else if( mask_par0 && ((mbm_type & (MB_TYPE_16x16 | (MB_TYPE_8x16 >> dir)))) ) { 00445 int b_idx= 8 + 4; 00446 int bn_idx= b_idx - (dir ? 8:1); 00447 00448 bS[0] = bS[1] = bS[2] = bS[3] = check_mv(h, 8 + 4, bn_idx, mvy_limit); 00449 mv_done = 1; 00450 } 00451 else 00452 mv_done = 0; 00453 00454 for( i = 0; i < 4; i++ ) { 00455 int x = dir == 0 ? 0 : i; 00456 int y = dir == 0 ? i : 0; 00457 int b_idx= 8 + 4 + x + 8*y; 00458 int bn_idx= b_idx - (dir ? 8:1); 00459 00460 if( h->non_zero_count_cache[b_idx] | 00461 h->non_zero_count_cache[bn_idx] ) { 00462 bS[i] = 2; 00463 } 00464 else if(!mv_done) 00465 { 00466 bS[i] = check_mv(h, b_idx, bn_idx, mvy_limit); 00467 } 00468 } 00469 } 00470 00471 /* Filter edge */ 00472 // Do not use s->qscale as luma quantizer because it has not the same 00473 // value in IPCM macroblocks. 00474 if(bS[0]+bS[1]+bS[2]+bS[3]){ 00475 qp = ( s->current_picture.qscale_table[mb_xy] + s->current_picture.qscale_table[mbm_xy] + 1 ) >> 1; 00476 //tprintf(s->avctx, "filter mb:%d/%d dir:%d edge:%d, QPy:%d, QPc:%d, QPcn:%d\n", mb_x, mb_y, dir, edge, qp, h->chroma_qp[0], s->current_picture.qscale_table[mbn_xy]); 00477 tprintf(s->avctx, "filter mb:%d/%d dir:%d edge:%d, QPy:%d ls:%d uvls:%d", mb_x, mb_y, dir, edge, qp, linesize, uvlinesize); 00478 //{ int i; for (i = 0; i < 4; i++) tprintf(s->avctx, " bS[%d]:%d", i, bS[i]); tprintf(s->avctx, "\n"); } 00479 chroma_qp_avg[0] = (h->chroma_qp[0] + get_chroma_qp( h, 0, s->current_picture.qscale_table[mbm_xy] ) + 1 ) >> 1; 00480 chroma_qp_avg[1] = (h->chroma_qp[1] + get_chroma_qp( h, 1, s->current_picture.qscale_table[mbm_xy] ) + 1 ) >> 1; 00481 if( dir == 0 ) { 00482 filter_mb_edgev( &img_y[0], linesize, bS, qp, h ); 00483 if (chroma) { 00484 if (chroma444) { 00485 filter_mb_edgev ( &img_cb[0], uvlinesize, bS, chroma_qp_avg[0], h); 00486 filter_mb_edgev ( &img_cr[0], uvlinesize, bS, chroma_qp_avg[1], h); 00487 } else { 00488 filter_mb_edgecv( &img_cb[0], uvlinesize, bS, chroma_qp_avg[0], h); 00489 filter_mb_edgecv( &img_cr[0], uvlinesize, bS, chroma_qp_avg[1], h); 00490 } 00491 } 00492 } else { 00493 filter_mb_edgeh( &img_y[0], linesize, bS, qp, h ); 00494 if (chroma) { 00495 if (chroma444) { 00496 filter_mb_edgeh ( &img_cb[0], uvlinesize, bS, chroma_qp_avg[0], h); 00497 filter_mb_edgeh ( &img_cr[0], uvlinesize, bS, chroma_qp_avg[1], h); 00498 } else { 00499 filter_mb_edgech( &img_cb[0], uvlinesize, bS, chroma_qp_avg[0], h); 00500 filter_mb_edgech( &img_cr[0], uvlinesize, bS, chroma_qp_avg[1], h); 00501 } 00502 } 00503 } 00504 } 00505 } 00506 } 00507 00508 /* Calculate bS */ 00509 for( edge = 1; edge < edges; edge++ ) { 00510 DECLARE_ALIGNED(8, int16_t, bS)[4]; 00511 int qp; 00512 00513 if( IS_8x8DCT(mb_type & (edge<<24)) ) // (edge&1) && IS_8x8DCT(mb_type) 00514 continue; 00515 00516 if( IS_INTRA(mb_type)) { 00517 AV_WN64A(bS, 0x0003000300030003ULL); 00518 } else { 00519 int i; 00520 int mv_done; 00521 00522 if( edge & mask_edge ) { 00523 AV_ZERO64(bS); 00524 mv_done = 1; 00525 } 00526 else if( mask_par0 ) { 00527 int b_idx= 8 + 4 + edge * (dir ? 8:1); 00528 int bn_idx= b_idx - (dir ? 8:1); 00529 00530 bS[0] = bS[1] = bS[2] = bS[3] = check_mv(h, b_idx, bn_idx, mvy_limit); 00531 mv_done = 1; 00532 } 00533 else 00534 mv_done = 0; 00535 00536 for( i = 0; i < 4; i++ ) { 00537 int x = dir == 0 ? edge : i; 00538 int y = dir == 0 ? i : edge; 00539 int b_idx= 8 + 4 + x + 8*y; 00540 int bn_idx= b_idx - (dir ? 8:1); 00541 00542 if( h->non_zero_count_cache[b_idx] | 00543 h->non_zero_count_cache[bn_idx] ) { 00544 bS[i] = 2; 00545 } 00546 else if(!mv_done) 00547 { 00548 bS[i] = check_mv(h, b_idx, bn_idx, mvy_limit); 00549 } 00550 } 00551 00552 if(bS[0]+bS[1]+bS[2]+bS[3] == 0) 00553 continue; 00554 } 00555 00556 /* Filter edge */ 00557 // Do not use s->qscale as luma quantizer because it has not the same 00558 // value in IPCM macroblocks. 00559 qp = s->current_picture.qscale_table[mb_xy]; 00560 //tprintf(s->avctx, "filter mb:%d/%d dir:%d edge:%d, QPy:%d, QPc:%d, QPcn:%d\n", mb_x, mb_y, dir, edge, qp, h->chroma_qp[0], s->current_picture.qscale_table[mbn_xy]); 00561 tprintf(s->avctx, "filter mb:%d/%d dir:%d edge:%d, QPy:%d ls:%d uvls:%d", mb_x, mb_y, dir, edge, qp, linesize, uvlinesize); 00562 //{ int i; for (i = 0; i < 4; i++) tprintf(s->avctx, " bS[%d]:%d", i, bS[i]); tprintf(s->avctx, "\n"); } 00563 if( dir == 0 ) { 00564 filter_mb_edgev( &img_y[4*edge << h->pixel_shift], linesize, bS, qp, h ); 00565 if (chroma) { 00566 if (chroma444) { 00567 filter_mb_edgev ( &img_cb[4*edge << h->pixel_shift], uvlinesize, bS, h->chroma_qp[0], h); 00568 filter_mb_edgev ( &img_cr[4*edge << h->pixel_shift], uvlinesize, bS, h->chroma_qp[1], h); 00569 } else if( (edge&1) == 0 ) { 00570 filter_mb_edgecv( &img_cb[2*edge << h->pixel_shift], uvlinesize, bS, h->chroma_qp[0], h); 00571 filter_mb_edgecv( &img_cr[2*edge << h->pixel_shift], uvlinesize, bS, h->chroma_qp[1], h); 00572 } 00573 } 00574 } else { 00575 filter_mb_edgeh( &img_y[4*edge*linesize], linesize, bS, qp, h ); 00576 if (chroma) { 00577 if (chroma444) { 00578 filter_mb_edgeh ( &img_cb[4*edge*uvlinesize], uvlinesize, bS, h->chroma_qp[0], h); 00579 filter_mb_edgeh ( &img_cr[4*edge*uvlinesize], uvlinesize, bS, h->chroma_qp[1], h); 00580 } else if( (edge&1) == 0 ) { 00581 filter_mb_edgech( &img_cb[2*edge*uvlinesize], uvlinesize, bS, h->chroma_qp[0], h); 00582 filter_mb_edgech( &img_cr[2*edge*uvlinesize], uvlinesize, bS, h->chroma_qp[1], h); 00583 } 00584 } 00585 } 00586 } 00587 } 00588 00589 void ff_h264_filter_mb( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize) { 00590 MpegEncContext * const s = &h->s; 00591 const int mb_xy= mb_x + mb_y*s->mb_stride; 00592 const int mb_type = s->current_picture.mb_type[mb_xy]; 00593 const int mvy_limit = IS_INTERLACED(mb_type) ? 2 : 4; 00594 int first_vertical_edge_done = 0; 00595 av_unused int dir; 00596 int chroma = !(CONFIG_GRAY && (s->flags&CODEC_FLAG_GRAY)); 00597 00598 if (FRAME_MBAFF 00599 // and current and left pair do not have the same interlaced type 00600 && IS_INTERLACED(mb_type^h->left_type[0]) 00601 // and left mb is in available to us 00602 && h->left_type[0]) { 00603 /* First vertical edge is different in MBAFF frames 00604 * There are 8 different bS to compute and 2 different Qp 00605 */ 00606 DECLARE_ALIGNED(8, int16_t, bS)[8]; 00607 int qp[2]; 00608 int bqp[2]; 00609 int rqp[2]; 00610 int mb_qp, mbn0_qp, mbn1_qp; 00611 int i; 00612 first_vertical_edge_done = 1; 00613 00614 if( IS_INTRA(mb_type) ) { 00615 AV_WN64A(&bS[0], 0x0004000400040004ULL); 00616 AV_WN64A(&bS[4], 0x0004000400040004ULL); 00617 } else { 00618 static const uint8_t offset[2][2][8]={ 00619 { 00620 {3+4*0, 3+4*0, 3+4*0, 3+4*0, 3+4*1, 3+4*1, 3+4*1, 3+4*1}, 00621 {3+4*2, 3+4*2, 3+4*2, 3+4*2, 3+4*3, 3+4*3, 3+4*3, 3+4*3}, 00622 },{ 00623 {3+4*0, 3+4*1, 3+4*2, 3+4*3, 3+4*0, 3+4*1, 3+4*2, 3+4*3}, 00624 {3+4*0, 3+4*1, 3+4*2, 3+4*3, 3+4*0, 3+4*1, 3+4*2, 3+4*3}, 00625 } 00626 }; 00627 const uint8_t *off= offset[MB_FIELD][mb_y&1]; 00628 for( i = 0; i < 8; i++ ) { 00629 int j= MB_FIELD ? i>>2 : i&1; 00630 int mbn_xy = h->left_mb_xy[j]; 00631 int mbn_type= h->left_type[j]; 00632 00633 if( IS_INTRA( mbn_type ) ) 00634 bS[i] = 4; 00635 else{ 00636 bS[i] = 1 + !!(h->non_zero_count_cache[12+8*(i>>1)] | 00637 ((!h->pps.cabac && IS_8x8DCT(mbn_type)) ? 00638 (h->cbp_table[mbn_xy] & (((MB_FIELD ? (i&2) : (mb_y&1)) ? 8 : 2) << 12)) 00639 : 00640 h->non_zero_count[mbn_xy][ off[i] ])); 00641 } 00642 } 00643 } 00644 00645 mb_qp = s->current_picture.qscale_table[mb_xy]; 00646 mbn0_qp = s->current_picture.qscale_table[h->left_mb_xy[0]]; 00647 mbn1_qp = s->current_picture.qscale_table[h->left_mb_xy[1]]; 00648 qp[0] = ( mb_qp + mbn0_qp + 1 ) >> 1; 00649 bqp[0] = ( get_chroma_qp( h, 0, mb_qp ) + 00650 get_chroma_qp( h, 0, mbn0_qp ) + 1 ) >> 1; 00651 rqp[0] = ( get_chroma_qp( h, 1, mb_qp ) + 00652 get_chroma_qp( h, 1, mbn0_qp ) + 1 ) >> 1; 00653 qp[1] = ( mb_qp + mbn1_qp + 1 ) >> 1; 00654 bqp[1] = ( get_chroma_qp( h, 0, mb_qp ) + 00655 get_chroma_qp( h, 0, mbn1_qp ) + 1 ) >> 1; 00656 rqp[1] = ( get_chroma_qp( h, 1, mb_qp ) + 00657 get_chroma_qp( h, 1, mbn1_qp ) + 1 ) >> 1; 00658 00659 /* Filter edge */ 00660 tprintf(s->avctx, "filter mb:%d/%d MBAFF, QPy:%d/%d, QPb:%d/%d QPr:%d/%d ls:%d uvls:%d", mb_x, mb_y, qp[0], qp[1], bqp[0], bqp[1], rqp[0], rqp[1], linesize, uvlinesize); 00661 { int i; for (i = 0; i < 8; i++) tprintf(s->avctx, " bS[%d]:%d", i, bS[i]); tprintf(s->avctx, "\n"); } 00662 if(MB_FIELD){ 00663 filter_mb_mbaff_edgev ( h, img_y , linesize, bS , 1, qp [0] ); 00664 filter_mb_mbaff_edgev ( h, img_y + 8* linesize, linesize, bS+4, 1, qp [1] ); 00665 if (chroma){ 00666 if (CHROMA444) { 00667 filter_mb_mbaff_edgev ( h, img_cb, uvlinesize, bS , 1, bqp[0] ); 00668 filter_mb_mbaff_edgev ( h, img_cb + 8*uvlinesize, uvlinesize, bS+4, 1, bqp[1] ); 00669 filter_mb_mbaff_edgev ( h, img_cr, uvlinesize, bS , 1, rqp[0] ); 00670 filter_mb_mbaff_edgev ( h, img_cr + 8*uvlinesize, uvlinesize, bS+4, 1, rqp[1] ); 00671 }else{ 00672 filter_mb_mbaff_edgecv( h, img_cb, uvlinesize, bS , 1, bqp[0] ); 00673 filter_mb_mbaff_edgecv( h, img_cb + 4*uvlinesize, uvlinesize, bS+4, 1, bqp[1] ); 00674 filter_mb_mbaff_edgecv( h, img_cr, uvlinesize, bS , 1, rqp[0] ); 00675 filter_mb_mbaff_edgecv( h, img_cr + 4*uvlinesize, uvlinesize, bS+4, 1, rqp[1] ); 00676 } 00677 } 00678 }else{ 00679 filter_mb_mbaff_edgev ( h, img_y , 2* linesize, bS , 2, qp [0] ); 00680 filter_mb_mbaff_edgev ( h, img_y + linesize, 2* linesize, bS+1, 2, qp [1] ); 00681 if (chroma){ 00682 if (CHROMA444) { 00683 filter_mb_mbaff_edgev ( h, img_cb, 2*uvlinesize, bS , 2, bqp[0] ); 00684 filter_mb_mbaff_edgev ( h, img_cb + uvlinesize, 2*uvlinesize, bS+1, 2, bqp[1] ); 00685 filter_mb_mbaff_edgev ( h, img_cr, 2*uvlinesize, bS , 2, rqp[0] ); 00686 filter_mb_mbaff_edgev ( h, img_cr + uvlinesize, 2*uvlinesize, bS+1, 2, rqp[1] ); 00687 }else{ 00688 filter_mb_mbaff_edgecv( h, img_cb, 2*uvlinesize, bS , 2, bqp[0] ); 00689 filter_mb_mbaff_edgecv( h, img_cb + uvlinesize, 2*uvlinesize, bS+1, 2, bqp[1] ); 00690 filter_mb_mbaff_edgecv( h, img_cr, 2*uvlinesize, bS , 2, rqp[0] ); 00691 filter_mb_mbaff_edgecv( h, img_cr + uvlinesize, 2*uvlinesize, bS+1, 2, rqp[1] ); 00692 } 00693 } 00694 } 00695 } 00696 00697 #if CONFIG_SMALL 00698 for( dir = 0; dir < 2; dir++ ) 00699 filter_mb_dir(h, mb_x, mb_y, img_y, img_cb, img_cr, linesize, uvlinesize, mb_xy, mb_type, mvy_limit, dir ? 0 : first_vertical_edge_done, chroma, CHROMA444, dir); 00700 #else 00701 filter_mb_dir(h, mb_x, mb_y, img_y, img_cb, img_cr, linesize, uvlinesize, mb_xy, mb_type, mvy_limit, first_vertical_edge_done, chroma, CHROMA444, 0); 00702 filter_mb_dir(h, mb_x, mb_y, img_y, img_cb, img_cr, linesize, uvlinesize, mb_xy, mb_type, mvy_limit, 0, chroma, CHROMA444, 1); 00703 #endif 00704 }