Libav 0.7.1
libavcodec/vp8.c
Go to the documentation of this file.
00001 
00025 #include "libavutil/imgutils.h"
00026 #include "avcodec.h"
00027 #include "vp8.h"
00028 #include "vp8data.h"
00029 #include "rectangle.h"
00030 #include "thread.h"
00031 
00032 #if ARCH_ARM
00033 #   include "arm/vp8.h"
00034 #endif
00035 
00036 static void vp8_decode_flush(AVCodecContext *avctx)
00037 {
00038     VP8Context *s = avctx->priv_data;
00039     int i;
00040 
00041     if (!avctx->is_copy) {
00042         for (i = 0; i < 5; i++)
00043             if (s->frames[i].data[0])
00044                 ff_thread_release_buffer(avctx, &s->frames[i]);
00045     }
00046     memset(s->framep, 0, sizeof(s->framep));
00047 
00048     av_freep(&s->macroblocks_base);
00049     av_freep(&s->filter_strength);
00050     av_freep(&s->intra4x4_pred_mode_top);
00051     av_freep(&s->top_nnz);
00052     av_freep(&s->edge_emu_buffer);
00053     av_freep(&s->top_border);
00054     av_freep(&s->segmentation_map);
00055 
00056     s->macroblocks        = NULL;
00057 }
00058 
00059 static int update_dimensions(VP8Context *s, int width, int height)
00060 {
00061     if (width  != s->avctx->width ||
00062         height != s->avctx->height) {
00063         if (av_image_check_size(width, height, 0, s->avctx))
00064             return AVERROR_INVALIDDATA;
00065 
00066         vp8_decode_flush(s->avctx);
00067 
00068         avcodec_set_dimensions(s->avctx, width, height);
00069     }
00070 
00071     s->mb_width  = (s->avctx->coded_width +15) / 16;
00072     s->mb_height = (s->avctx->coded_height+15) / 16;
00073 
00074     s->macroblocks_base        = av_mallocz((s->mb_width+s->mb_height*2+1)*sizeof(*s->macroblocks));
00075     s->filter_strength         = av_mallocz(s->mb_width*sizeof(*s->filter_strength));
00076     s->intra4x4_pred_mode_top  = av_mallocz(s->mb_width*4);
00077     s->top_nnz                 = av_mallocz(s->mb_width*sizeof(*s->top_nnz));
00078     s->top_border              = av_mallocz((s->mb_width+1)*sizeof(*s->top_border));
00079     s->segmentation_map        = av_mallocz(s->mb_width*s->mb_height);
00080 
00081     if (!s->macroblocks_base || !s->filter_strength || !s->intra4x4_pred_mode_top ||
00082         !s->top_nnz || !s->top_border || !s->segmentation_map)
00083         return AVERROR(ENOMEM);
00084 
00085     s->macroblocks        = s->macroblocks_base + 1;
00086 
00087     return 0;
00088 }
00089 
00090 static void parse_segment_info(VP8Context *s)
00091 {
00092     VP56RangeCoder *c = &s->c;
00093     int i;
00094 
00095     s->segmentation.update_map = vp8_rac_get(c);
00096 
00097     if (vp8_rac_get(c)) { // update segment feature data
00098         s->segmentation.absolute_vals = vp8_rac_get(c);
00099 
00100         for (i = 0; i < 4; i++)
00101             s->segmentation.base_quant[i]   = vp8_rac_get_sint(c, 7);
00102 
00103         for (i = 0; i < 4; i++)
00104             s->segmentation.filter_level[i] = vp8_rac_get_sint(c, 6);
00105     }
00106     if (s->segmentation.update_map)
00107         for (i = 0; i < 3; i++)
00108             s->prob->segmentid[i] = vp8_rac_get(c) ? vp8_rac_get_uint(c, 8) : 255;
00109 }
00110 
00111 static void update_lf_deltas(VP8Context *s)
00112 {
00113     VP56RangeCoder *c = &s->c;
00114     int i;
00115 
00116     for (i = 0; i < 4; i++)
00117         s->lf_delta.ref[i]  = vp8_rac_get_sint(c, 6);
00118 
00119     for (i = MODE_I4x4; i <= VP8_MVMODE_SPLIT; i++)
00120         s->lf_delta.mode[i] = vp8_rac_get_sint(c, 6);
00121 }
00122 
00123 static int setup_partitions(VP8Context *s, const uint8_t *buf, int buf_size)
00124 {
00125     const uint8_t *sizes = buf;
00126     int i;
00127 
00128     s->num_coeff_partitions = 1 << vp8_rac_get_uint(&s->c, 2);
00129 
00130     buf      += 3*(s->num_coeff_partitions-1);
00131     buf_size -= 3*(s->num_coeff_partitions-1);
00132     if (buf_size < 0)
00133         return -1;
00134 
00135     for (i = 0; i < s->num_coeff_partitions-1; i++) {
00136         int size = AV_RL24(sizes + 3*i);
00137         if (buf_size - size < 0)
00138             return -1;
00139 
00140         ff_vp56_init_range_decoder(&s->coeff_partition[i], buf, size);
00141         buf      += size;
00142         buf_size -= size;
00143     }
00144     ff_vp56_init_range_decoder(&s->coeff_partition[i], buf, buf_size);
00145 
00146     return 0;
00147 }
00148 
00149 static void get_quants(VP8Context *s)
00150 {
00151     VP56RangeCoder *c = &s->c;
00152     int i, base_qi;
00153 
00154     int yac_qi     = vp8_rac_get_uint(c, 7);
00155     int ydc_delta  = vp8_rac_get_sint(c, 4);
00156     int y2dc_delta = vp8_rac_get_sint(c, 4);
00157     int y2ac_delta = vp8_rac_get_sint(c, 4);
00158     int uvdc_delta = vp8_rac_get_sint(c, 4);
00159     int uvac_delta = vp8_rac_get_sint(c, 4);
00160 
00161     for (i = 0; i < 4; i++) {
00162         if (s->segmentation.enabled) {
00163             base_qi = s->segmentation.base_quant[i];
00164             if (!s->segmentation.absolute_vals)
00165                 base_qi += yac_qi;
00166         } else
00167             base_qi = yac_qi;
00168 
00169         s->qmat[i].luma_qmul[0]    =       vp8_dc_qlookup[av_clip_uintp2(base_qi + ydc_delta , 7)];
00170         s->qmat[i].luma_qmul[1]    =       vp8_ac_qlookup[av_clip_uintp2(base_qi             , 7)];
00171         s->qmat[i].luma_dc_qmul[0] =   2 * vp8_dc_qlookup[av_clip_uintp2(base_qi + y2dc_delta, 7)];
00172         s->qmat[i].luma_dc_qmul[1] = 155 * vp8_ac_qlookup[av_clip_uintp2(base_qi + y2ac_delta, 7)] / 100;
00173         s->qmat[i].chroma_qmul[0]  =       vp8_dc_qlookup[av_clip_uintp2(base_qi + uvdc_delta, 7)];
00174         s->qmat[i].chroma_qmul[1]  =       vp8_ac_qlookup[av_clip_uintp2(base_qi + uvac_delta, 7)];
00175 
00176         s->qmat[i].luma_dc_qmul[1] = FFMAX(s->qmat[i].luma_dc_qmul[1], 8);
00177         s->qmat[i].chroma_qmul[0]  = FFMIN(s->qmat[i].chroma_qmul[0], 132);
00178     }
00179 }
00180 
00194 static VP56Frame ref_to_update(VP8Context *s, int update, VP56Frame ref)
00195 {
00196     VP56RangeCoder *c = &s->c;
00197 
00198     if (update)
00199         return VP56_FRAME_CURRENT;
00200 
00201     switch (vp8_rac_get_uint(c, 2)) {
00202     case 1:
00203         return VP56_FRAME_PREVIOUS;
00204     case 2:
00205         return (ref == VP56_FRAME_GOLDEN) ? VP56_FRAME_GOLDEN2 : VP56_FRAME_GOLDEN;
00206     }
00207     return VP56_FRAME_NONE;
00208 }
00209 
00210 static void update_refs(VP8Context *s)
00211 {
00212     VP56RangeCoder *c = &s->c;
00213 
00214     int update_golden = vp8_rac_get(c);
00215     int update_altref = vp8_rac_get(c);
00216 
00217     s->update_golden = ref_to_update(s, update_golden, VP56_FRAME_GOLDEN);
00218     s->update_altref = ref_to_update(s, update_altref, VP56_FRAME_GOLDEN2);
00219 }
00220 
00221 static int decode_frame_header(VP8Context *s, const uint8_t *buf, int buf_size)
00222 {
00223     VP56RangeCoder *c = &s->c;
00224     int header_size, hscale, vscale, i, j, k, l, m, ret;
00225     int width  = s->avctx->width;
00226     int height = s->avctx->height;
00227 
00228     s->keyframe  = !(buf[0] & 1);
00229     s->profile   =  (buf[0]>>1) & 7;
00230     s->invisible = !(buf[0] & 0x10);
00231     header_size  = AV_RL24(buf) >> 5;
00232     buf      += 3;
00233     buf_size -= 3;
00234 
00235     if (s->profile > 3)
00236         av_log(s->avctx, AV_LOG_WARNING, "Unknown profile %d\n", s->profile);
00237 
00238     if (!s->profile)
00239         memcpy(s->put_pixels_tab, s->vp8dsp.put_vp8_epel_pixels_tab, sizeof(s->put_pixels_tab));
00240     else    // profile 1-3 use bilinear, 4+ aren't defined so whatever
00241         memcpy(s->put_pixels_tab, s->vp8dsp.put_vp8_bilinear_pixels_tab, sizeof(s->put_pixels_tab));
00242 
00243     if (header_size > buf_size - 7*s->keyframe) {
00244         av_log(s->avctx, AV_LOG_ERROR, "Header size larger than data provided\n");
00245         return AVERROR_INVALIDDATA;
00246     }
00247 
00248     if (s->keyframe) {
00249         if (AV_RL24(buf) != 0x2a019d) {
00250             av_log(s->avctx, AV_LOG_ERROR, "Invalid start code 0x%x\n", AV_RL24(buf));
00251             return AVERROR_INVALIDDATA;
00252         }
00253         width  = AV_RL16(buf+3) & 0x3fff;
00254         height = AV_RL16(buf+5) & 0x3fff;
00255         hscale = buf[4] >> 6;
00256         vscale = buf[6] >> 6;
00257         buf      += 7;
00258         buf_size -= 7;
00259 
00260         if (hscale || vscale)
00261             av_log_missing_feature(s->avctx, "Upscaling", 1);
00262 
00263         s->update_golden = s->update_altref = VP56_FRAME_CURRENT;
00264         for (i = 0; i < 4; i++)
00265             for (j = 0; j < 16; j++)
00266                 memcpy(s->prob->token[i][j], vp8_token_default_probs[i][vp8_coeff_band[j]],
00267                        sizeof(s->prob->token[i][j]));
00268         memcpy(s->prob->pred16x16, vp8_pred16x16_prob_inter, sizeof(s->prob->pred16x16));
00269         memcpy(s->prob->pred8x8c , vp8_pred8x8c_prob_inter , sizeof(s->prob->pred8x8c));
00270         memcpy(s->prob->mvc      , vp8_mv_default_prob     , sizeof(s->prob->mvc));
00271         memset(&s->segmentation, 0, sizeof(s->segmentation));
00272     }
00273 
00274     if (!s->macroblocks_base || /* first frame */
00275         width != s->avctx->width || height != s->avctx->height) {
00276         if ((ret = update_dimensions(s, width, height) < 0))
00277             return ret;
00278     }
00279 
00280     ff_vp56_init_range_decoder(c, buf, header_size);
00281     buf      += header_size;
00282     buf_size -= header_size;
00283 
00284     if (s->keyframe) {
00285         if (vp8_rac_get(c))
00286             av_log(s->avctx, AV_LOG_WARNING, "Unspecified colorspace\n");
00287         vp8_rac_get(c); // whether we can skip clamping in dsp functions
00288     }
00289 
00290     if ((s->segmentation.enabled = vp8_rac_get(c)))
00291         parse_segment_info(s);
00292     else
00293         s->segmentation.update_map = 0; // FIXME: move this to some init function?
00294 
00295     s->filter.simple    = vp8_rac_get(c);
00296     s->filter.level     = vp8_rac_get_uint(c, 6);
00297     s->filter.sharpness = vp8_rac_get_uint(c, 3);
00298 
00299     if ((s->lf_delta.enabled = vp8_rac_get(c)))
00300         if (vp8_rac_get(c))
00301             update_lf_deltas(s);
00302 
00303     if (setup_partitions(s, buf, buf_size)) {
00304         av_log(s->avctx, AV_LOG_ERROR, "Invalid partitions\n");
00305         return AVERROR_INVALIDDATA;
00306     }
00307 
00308     get_quants(s);
00309 
00310     if (!s->keyframe) {
00311         update_refs(s);
00312         s->sign_bias[VP56_FRAME_GOLDEN]               = vp8_rac_get(c);
00313         s->sign_bias[VP56_FRAME_GOLDEN2 /* altref */] = vp8_rac_get(c);
00314     }
00315 
00316     // if we aren't saving this frame's probabilities for future frames,
00317     // make a copy of the current probabilities
00318     if (!(s->update_probabilities = vp8_rac_get(c)))
00319         s->prob[1] = s->prob[0];
00320 
00321     s->update_last = s->keyframe || vp8_rac_get(c);
00322 
00323     for (i = 0; i < 4; i++)
00324         for (j = 0; j < 8; j++)
00325             for (k = 0; k < 3; k++)
00326                 for (l = 0; l < NUM_DCT_TOKENS-1; l++)
00327                     if (vp56_rac_get_prob_branchy(c, vp8_token_update_probs[i][j][k][l])) {
00328                         int prob = vp8_rac_get_uint(c, 8);
00329                         for (m = 0; vp8_coeff_band_indexes[j][m] >= 0; m++)
00330                             s->prob->token[i][vp8_coeff_band_indexes[j][m]][k][l] = prob;
00331                     }
00332 
00333     if ((s->mbskip_enabled = vp8_rac_get(c)))
00334         s->prob->mbskip = vp8_rac_get_uint(c, 8);
00335 
00336     if (!s->keyframe) {
00337         s->prob->intra  = vp8_rac_get_uint(c, 8);
00338         s->prob->last   = vp8_rac_get_uint(c, 8);
00339         s->prob->golden = vp8_rac_get_uint(c, 8);
00340 
00341         if (vp8_rac_get(c))
00342             for (i = 0; i < 4; i++)
00343                 s->prob->pred16x16[i] = vp8_rac_get_uint(c, 8);
00344         if (vp8_rac_get(c))
00345             for (i = 0; i < 3; i++)
00346                 s->prob->pred8x8c[i]  = vp8_rac_get_uint(c, 8);
00347 
00348         // 17.2 MV probability update
00349         for (i = 0; i < 2; i++)
00350             for (j = 0; j < 19; j++)
00351                 if (vp56_rac_get_prob_branchy(c, vp8_mv_update_prob[i][j]))
00352                     s->prob->mvc[i][j] = vp8_rac_get_nn(c);
00353     }
00354 
00355     return 0;
00356 }
00357 
00358 static av_always_inline void clamp_mv(VP8Context *s, VP56mv *dst, const VP56mv *src)
00359 {
00360     dst->x = av_clip(src->x, s->mv_min.x, s->mv_max.x);
00361     dst->y = av_clip(src->y, s->mv_min.y, s->mv_max.y);
00362 }
00363 
00367 static int read_mv_component(VP56RangeCoder *c, const uint8_t *p)
00368 {
00369     int bit, x = 0;
00370 
00371     if (vp56_rac_get_prob_branchy(c, p[0])) {
00372         int i;
00373 
00374         for (i = 0; i < 3; i++)
00375             x += vp56_rac_get_prob(c, p[9 + i]) << i;
00376         for (i = 9; i > 3; i--)
00377             x += vp56_rac_get_prob(c, p[9 + i]) << i;
00378         if (!(x & 0xFFF0) || vp56_rac_get_prob(c, p[12]))
00379             x += 8;
00380     } else {
00381         // small_mvtree
00382         const uint8_t *ps = p+2;
00383         bit = vp56_rac_get_prob(c, *ps);
00384         ps += 1 + 3*bit;
00385         x  += 4*bit;
00386         bit = vp56_rac_get_prob(c, *ps);
00387         ps += 1 + bit;
00388         x  += 2*bit;
00389         x  += vp56_rac_get_prob(c, *ps);
00390     }
00391 
00392     return (x && vp56_rac_get_prob(c, p[1])) ? -x : x;
00393 }
00394 
00395 static av_always_inline
00396 const uint8_t *get_submv_prob(uint32_t left, uint32_t top)
00397 {
00398     if (left == top)
00399         return vp8_submv_prob[4-!!left];
00400     if (!top)
00401         return vp8_submv_prob[2];
00402     return vp8_submv_prob[1-!!left];
00403 }
00404 
00409 static av_always_inline
00410 int decode_splitmvs(VP8Context *s, VP56RangeCoder *c, VP8Macroblock *mb)
00411 {
00412     int part_idx;
00413     int n, num;
00414     VP8Macroblock *top_mb  = &mb[2];
00415     VP8Macroblock *left_mb = &mb[-1];
00416     const uint8_t *mbsplits_left = vp8_mbsplits[left_mb->partitioning],
00417                   *mbsplits_top = vp8_mbsplits[top_mb->partitioning],
00418                   *mbsplits_cur, *firstidx;
00419     VP56mv *top_mv  = top_mb->bmv;
00420     VP56mv *left_mv = left_mb->bmv;
00421     VP56mv *cur_mv  = mb->bmv;
00422 
00423     if (vp56_rac_get_prob_branchy(c, vp8_mbsplit_prob[0])) {
00424         if (vp56_rac_get_prob_branchy(c, vp8_mbsplit_prob[1])) {
00425             part_idx = VP8_SPLITMVMODE_16x8 + vp56_rac_get_prob(c, vp8_mbsplit_prob[2]);
00426         } else {
00427             part_idx = VP8_SPLITMVMODE_8x8;
00428         }
00429     } else {
00430         part_idx = VP8_SPLITMVMODE_4x4;
00431     }
00432 
00433     num = vp8_mbsplit_count[part_idx];
00434     mbsplits_cur = vp8_mbsplits[part_idx],
00435     firstidx = vp8_mbfirstidx[part_idx];
00436     mb->partitioning = part_idx;
00437 
00438     for (n = 0; n < num; n++) {
00439         int k = firstidx[n];
00440         uint32_t left, above;
00441         const uint8_t *submv_prob;
00442 
00443         if (!(k & 3))
00444             left = AV_RN32A(&left_mv[mbsplits_left[k + 3]]);
00445         else
00446             left  = AV_RN32A(&cur_mv[mbsplits_cur[k - 1]]);
00447         if (k <= 3)
00448             above = AV_RN32A(&top_mv[mbsplits_top[k + 12]]);
00449         else
00450             above = AV_RN32A(&cur_mv[mbsplits_cur[k - 4]]);
00451 
00452         submv_prob = get_submv_prob(left, above);
00453 
00454         if (vp56_rac_get_prob_branchy(c, submv_prob[0])) {
00455             if (vp56_rac_get_prob_branchy(c, submv_prob[1])) {
00456                 if (vp56_rac_get_prob_branchy(c, submv_prob[2])) {
00457                     mb->bmv[n].y = mb->mv.y + read_mv_component(c, s->prob->mvc[0]);
00458                     mb->bmv[n].x = mb->mv.x + read_mv_component(c, s->prob->mvc[1]);
00459                 } else {
00460                     AV_ZERO32(&mb->bmv[n]);
00461                 }
00462             } else {
00463                 AV_WN32A(&mb->bmv[n], above);
00464             }
00465         } else {
00466             AV_WN32A(&mb->bmv[n], left);
00467         }
00468     }
00469 
00470     return num;
00471 }
00472 
00473 static av_always_inline
00474 void decode_mvs(VP8Context *s, VP8Macroblock *mb, int mb_x, int mb_y)
00475 {
00476     VP8Macroblock *mb_edge[3] = { mb + 2 /* top */,
00477                                   mb - 1 /* left */,
00478                                   mb + 1 /* top-left */ };
00479     enum { CNT_ZERO, CNT_NEAREST, CNT_NEAR, CNT_SPLITMV };
00480     enum { VP8_EDGE_TOP, VP8_EDGE_LEFT, VP8_EDGE_TOPLEFT };
00481     int idx = CNT_ZERO;
00482     int cur_sign_bias = s->sign_bias[mb->ref_frame];
00483     int8_t *sign_bias = s->sign_bias;
00484     VP56mv near_mv[4];
00485     uint8_t cnt[4] = { 0 };
00486     VP56RangeCoder *c = &s->c;
00487 
00488     AV_ZERO32(&near_mv[0]);
00489     AV_ZERO32(&near_mv[1]);
00490 
00491     /* Process MB on top, left and top-left */
00492     #define MV_EDGE_CHECK(n)\
00493     {\
00494         VP8Macroblock *edge = mb_edge[n];\
00495         int edge_ref = edge->ref_frame;\
00496         if (edge_ref != VP56_FRAME_CURRENT) {\
00497             uint32_t mv = AV_RN32A(&edge->mv);\
00498             if (mv) {\
00499                 if (cur_sign_bias != sign_bias[edge_ref]) {\
00500                     /* SWAR negate of the values in mv. */\
00501                     mv = ~mv;\
00502                     mv = ((mv&0x7fff7fff) + 0x00010001) ^ (mv&0x80008000);\
00503                 }\
00504                 if (!n || mv != AV_RN32A(&near_mv[idx]))\
00505                     AV_WN32A(&near_mv[++idx], mv);\
00506                 cnt[idx]      += 1 + (n != 2);\
00507             } else\
00508                 cnt[CNT_ZERO] += 1 + (n != 2);\
00509         }\
00510     }
00511 
00512     MV_EDGE_CHECK(0)
00513     MV_EDGE_CHECK(1)
00514     MV_EDGE_CHECK(2)
00515 
00516     mb->partitioning = VP8_SPLITMVMODE_NONE;
00517     if (vp56_rac_get_prob_branchy(c, vp8_mode_contexts[cnt[CNT_ZERO]][0])) {
00518         mb->mode = VP8_MVMODE_MV;
00519 
00520         /* If we have three distinct MVs, merge first and last if they're the same */
00521         if (cnt[CNT_SPLITMV] && AV_RN32A(&near_mv[1 + VP8_EDGE_TOP]) == AV_RN32A(&near_mv[1 + VP8_EDGE_TOPLEFT]))
00522             cnt[CNT_NEAREST] += 1;
00523 
00524         /* Swap near and nearest if necessary */
00525         if (cnt[CNT_NEAR] > cnt[CNT_NEAREST]) {
00526             FFSWAP(uint8_t,     cnt[CNT_NEAREST],     cnt[CNT_NEAR]);
00527             FFSWAP( VP56mv, near_mv[CNT_NEAREST], near_mv[CNT_NEAR]);
00528         }
00529 
00530         if (vp56_rac_get_prob_branchy(c, vp8_mode_contexts[cnt[CNT_NEAREST]][1])) {
00531             if (vp56_rac_get_prob_branchy(c, vp8_mode_contexts[cnt[CNT_NEAR]][2])) {
00532 
00533                 /* Choose the best mv out of 0,0 and the nearest mv */
00534                 clamp_mv(s, &mb->mv, &near_mv[CNT_ZERO + (cnt[CNT_NEAREST] >= cnt[CNT_ZERO])]);
00535                 cnt[CNT_SPLITMV] = ((mb_edge[VP8_EDGE_LEFT]->mode    == VP8_MVMODE_SPLIT) +
00536                                     (mb_edge[VP8_EDGE_TOP]->mode     == VP8_MVMODE_SPLIT)) * 2 +
00537                                     (mb_edge[VP8_EDGE_TOPLEFT]->mode == VP8_MVMODE_SPLIT);
00538 
00539                 if (vp56_rac_get_prob_branchy(c, vp8_mode_contexts[cnt[CNT_SPLITMV]][3])) {
00540                     mb->mode = VP8_MVMODE_SPLIT;
00541                     mb->mv = mb->bmv[decode_splitmvs(s, c, mb) - 1];
00542                 } else {
00543                     mb->mv.y += read_mv_component(c, s->prob->mvc[0]);
00544                     mb->mv.x += read_mv_component(c, s->prob->mvc[1]);
00545                     mb->bmv[0] = mb->mv;
00546                 }
00547             } else {
00548                 clamp_mv(s, &mb->mv, &near_mv[CNT_NEAR]);
00549                 mb->bmv[0] = mb->mv;
00550             }
00551         } else {
00552             clamp_mv(s, &mb->mv, &near_mv[CNT_NEAREST]);
00553             mb->bmv[0] = mb->mv;
00554         }
00555     } else {
00556         mb->mode = VP8_MVMODE_ZERO;
00557         AV_ZERO32(&mb->mv);
00558         mb->bmv[0] = mb->mv;
00559     }
00560 }
00561 
00562 static av_always_inline
00563 void decode_intra4x4_modes(VP8Context *s, VP56RangeCoder *c,
00564                            int mb_x, int keyframe)
00565 {
00566     uint8_t *intra4x4 = s->intra4x4_pred_mode_mb;
00567     if (keyframe) {
00568         int x, y;
00569         uint8_t* const top = s->intra4x4_pred_mode_top + 4 * mb_x;
00570         uint8_t* const left = s->intra4x4_pred_mode_left;
00571         for (y = 0; y < 4; y++) {
00572             for (x = 0; x < 4; x++) {
00573                 const uint8_t *ctx;
00574                 ctx = vp8_pred4x4_prob_intra[top[x]][left[y]];
00575                 *intra4x4 = vp8_rac_get_tree(c, vp8_pred4x4_tree, ctx);
00576                 left[y] = top[x] = *intra4x4;
00577                 intra4x4++;
00578             }
00579         }
00580     } else {
00581         int i;
00582         for (i = 0; i < 16; i++)
00583             intra4x4[i] = vp8_rac_get_tree(c, vp8_pred4x4_tree, vp8_pred4x4_prob_inter);
00584     }
00585 }
00586 
00587 static av_always_inline
00588 void decode_mb_mode(VP8Context *s, VP8Macroblock *mb, int mb_x, int mb_y, uint8_t *segment, uint8_t *ref)
00589 {
00590     VP56RangeCoder *c = &s->c;
00591 
00592     if (s->segmentation.update_map)
00593         *segment = vp8_rac_get_tree(c, vp8_segmentid_tree, s->prob->segmentid);
00594     else
00595         *segment = ref ? *ref : *segment;
00596     s->segment = *segment;
00597 
00598     mb->skip = s->mbskip_enabled ? vp56_rac_get_prob(c, s->prob->mbskip) : 0;
00599 
00600     if (s->keyframe) {
00601         mb->mode = vp8_rac_get_tree(c, vp8_pred16x16_tree_intra, vp8_pred16x16_prob_intra);
00602 
00603         if (mb->mode == MODE_I4x4) {
00604             decode_intra4x4_modes(s, c, mb_x, 1);
00605         } else {
00606             const uint32_t modes = vp8_pred4x4_mode[mb->mode] * 0x01010101u;
00607             AV_WN32A(s->intra4x4_pred_mode_top + 4 * mb_x, modes);
00608             AV_WN32A(s->intra4x4_pred_mode_left, modes);
00609         }
00610 
00611         s->chroma_pred_mode = vp8_rac_get_tree(c, vp8_pred8x8c_tree, vp8_pred8x8c_prob_intra);
00612         mb->ref_frame = VP56_FRAME_CURRENT;
00613     } else if (vp56_rac_get_prob_branchy(c, s->prob->intra)) {
00614         // inter MB, 16.2
00615         if (vp56_rac_get_prob_branchy(c, s->prob->last))
00616             mb->ref_frame = vp56_rac_get_prob(c, s->prob->golden) ?
00617                 VP56_FRAME_GOLDEN2 /* altref */ : VP56_FRAME_GOLDEN;
00618         else
00619             mb->ref_frame = VP56_FRAME_PREVIOUS;
00620         s->ref_count[mb->ref_frame-1]++;
00621 
00622         // motion vectors, 16.3
00623         decode_mvs(s, mb, mb_x, mb_y);
00624     } else {
00625         // intra MB, 16.1
00626         mb->mode = vp8_rac_get_tree(c, vp8_pred16x16_tree_inter, s->prob->pred16x16);
00627 
00628         if (mb->mode == MODE_I4x4)
00629             decode_intra4x4_modes(s, c, mb_x, 0);
00630 
00631         s->chroma_pred_mode = vp8_rac_get_tree(c, vp8_pred8x8c_tree, s->prob->pred8x8c);
00632         mb->ref_frame = VP56_FRAME_CURRENT;
00633         mb->partitioning = VP8_SPLITMVMODE_NONE;
00634         AV_ZERO32(&mb->bmv[0]);
00635     }
00636 }
00637 
00638 #ifndef decode_block_coeffs_internal
00639 
00648 static int decode_block_coeffs_internal(VP56RangeCoder *c, DCTELEM block[16],
00649                                         uint8_t probs[16][3][NUM_DCT_TOKENS-1],
00650                                         int i, uint8_t *token_prob, int16_t qmul[2])
00651 {
00652     goto skip_eob;
00653     do {
00654         int coeff;
00655         if (!vp56_rac_get_prob_branchy(c, token_prob[0]))   // DCT_EOB
00656             return i;
00657 
00658 skip_eob:
00659         if (!vp56_rac_get_prob_branchy(c, token_prob[1])) { // DCT_0
00660             if (++i == 16)
00661                 return i; // invalid input; blocks should end with EOB
00662             token_prob = probs[i][0];
00663             goto skip_eob;
00664         }
00665 
00666         if (!vp56_rac_get_prob_branchy(c, token_prob[2])) { // DCT_1
00667             coeff = 1;
00668             token_prob = probs[i+1][1];
00669         } else {
00670             if (!vp56_rac_get_prob_branchy(c, token_prob[3])) { // DCT 2,3,4
00671                 coeff = vp56_rac_get_prob_branchy(c, token_prob[4]);
00672                 if (coeff)
00673                     coeff += vp56_rac_get_prob(c, token_prob[5]);
00674                 coeff += 2;
00675             } else {
00676                 // DCT_CAT*
00677                 if (!vp56_rac_get_prob_branchy(c, token_prob[6])) {
00678                     if (!vp56_rac_get_prob_branchy(c, token_prob[7])) { // DCT_CAT1
00679                         coeff  = 5 + vp56_rac_get_prob(c, vp8_dct_cat1_prob[0]);
00680                     } else {                                    // DCT_CAT2
00681                         coeff  = 7;
00682                         coeff += vp56_rac_get_prob(c, vp8_dct_cat2_prob[0]) << 1;
00683                         coeff += vp56_rac_get_prob(c, vp8_dct_cat2_prob[1]);
00684                     }
00685                 } else {    // DCT_CAT3 and up
00686                     int a = vp56_rac_get_prob(c, token_prob[8]);
00687                     int b = vp56_rac_get_prob(c, token_prob[9+a]);
00688                     int cat = (a<<1) + b;
00689                     coeff  = 3 + (8<<cat);
00690                     coeff += vp8_rac_get_coeff(c, ff_vp8_dct_cat_prob[cat]);
00691                 }
00692             }
00693             token_prob = probs[i+1][2];
00694         }
00695         block[zigzag_scan[i]] = (vp8_rac_get(c) ? -coeff : coeff) * qmul[!!i];
00696     } while (++i < 16);
00697 
00698     return i;
00699 }
00700 #endif
00701 
00713 static av_always_inline
00714 int decode_block_coeffs(VP56RangeCoder *c, DCTELEM block[16],
00715                         uint8_t probs[16][3][NUM_DCT_TOKENS-1],
00716                         int i, int zero_nhood, int16_t qmul[2])
00717 {
00718     uint8_t *token_prob = probs[i][zero_nhood];
00719     if (!vp56_rac_get_prob_branchy(c, token_prob[0]))   // DCT_EOB
00720         return 0;
00721     return decode_block_coeffs_internal(c, block, probs, i, token_prob, qmul);
00722 }
00723 
00724 static av_always_inline
00725 void decode_mb_coeffs(VP8Context *s, VP56RangeCoder *c, VP8Macroblock *mb,
00726                       uint8_t t_nnz[9], uint8_t l_nnz[9])
00727 {
00728     int i, x, y, luma_start = 0, luma_ctx = 3;
00729     int nnz_pred, nnz, nnz_total = 0;
00730     int segment = s->segment;
00731     int block_dc = 0;
00732 
00733     if (mb->mode != MODE_I4x4 && mb->mode != VP8_MVMODE_SPLIT) {
00734         nnz_pred = t_nnz[8] + l_nnz[8];
00735 
00736         // decode DC values and do hadamard
00737         nnz = decode_block_coeffs(c, s->block_dc, s->prob->token[1], 0, nnz_pred,
00738                                   s->qmat[segment].luma_dc_qmul);
00739         l_nnz[8] = t_nnz[8] = !!nnz;
00740         if (nnz) {
00741             nnz_total += nnz;
00742             block_dc = 1;
00743             if (nnz == 1)
00744                 s->vp8dsp.vp8_luma_dc_wht_dc(s->block, s->block_dc);
00745             else
00746                 s->vp8dsp.vp8_luma_dc_wht(s->block, s->block_dc);
00747         }
00748         luma_start = 1;
00749         luma_ctx = 0;
00750     }
00751 
00752     // luma blocks
00753     for (y = 0; y < 4; y++)
00754         for (x = 0; x < 4; x++) {
00755             nnz_pred = l_nnz[y] + t_nnz[x];
00756             nnz = decode_block_coeffs(c, s->block[y][x], s->prob->token[luma_ctx], luma_start,
00757                                       nnz_pred, s->qmat[segment].luma_qmul);
00758             // nnz+block_dc may be one more than the actual last index, but we don't care
00759             s->non_zero_count_cache[y][x] = nnz + block_dc;
00760             t_nnz[x] = l_nnz[y] = !!nnz;
00761             nnz_total += nnz;
00762         }
00763 
00764     // chroma blocks
00765     // TODO: what to do about dimensions? 2nd dim for luma is x,
00766     // but for chroma it's (y<<1)|x
00767     for (i = 4; i < 6; i++)
00768         for (y = 0; y < 2; y++)
00769             for (x = 0; x < 2; x++) {
00770                 nnz_pred = l_nnz[i+2*y] + t_nnz[i+2*x];
00771                 nnz = decode_block_coeffs(c, s->block[i][(y<<1)+x], s->prob->token[2], 0,
00772                                           nnz_pred, s->qmat[segment].chroma_qmul);
00773                 s->non_zero_count_cache[i][(y<<1)+x] = nnz;
00774                 t_nnz[i+2*x] = l_nnz[i+2*y] = !!nnz;
00775                 nnz_total += nnz;
00776             }
00777 
00778     // if there were no coded coeffs despite the macroblock not being marked skip,
00779     // we MUST not do the inner loop filter and should not do IDCT
00780     // Since skip isn't used for bitstream prediction, just manually set it.
00781     if (!nnz_total)
00782         mb->skip = 1;
00783 }
00784 
00785 static av_always_inline
00786 void backup_mb_border(uint8_t *top_border, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr,
00787                       int linesize, int uvlinesize, int simple)
00788 {
00789     AV_COPY128(top_border, src_y + 15*linesize);
00790     if (!simple) {
00791         AV_COPY64(top_border+16, src_cb + 7*uvlinesize);
00792         AV_COPY64(top_border+24, src_cr + 7*uvlinesize);
00793     }
00794 }
00795 
00796 static av_always_inline
00797 void xchg_mb_border(uint8_t *top_border, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr,
00798                     int linesize, int uvlinesize, int mb_x, int mb_y, int mb_width,
00799                     int simple, int xchg)
00800 {
00801     uint8_t *top_border_m1 = top_border-32;     // for TL prediction
00802     src_y  -=   linesize;
00803     src_cb -= uvlinesize;
00804     src_cr -= uvlinesize;
00805 
00806 #define XCHG(a,b,xchg) do {                     \
00807         if (xchg) AV_SWAP64(b,a);               \
00808         else      AV_COPY64(b,a);               \
00809     } while (0)
00810 
00811     XCHG(top_border_m1+8, src_y-8, xchg);
00812     XCHG(top_border,      src_y,   xchg);
00813     XCHG(top_border+8,    src_y+8, 1);
00814     if (mb_x < mb_width-1)
00815         XCHG(top_border+32, src_y+16, 1);
00816 
00817     // only copy chroma for normal loop filter
00818     // or to initialize the top row to 127
00819     if (!simple || !mb_y) {
00820         XCHG(top_border_m1+16, src_cb-8, xchg);
00821         XCHG(top_border_m1+24, src_cr-8, xchg);
00822         XCHG(top_border+16,    src_cb, 1);
00823         XCHG(top_border+24,    src_cr, 1);
00824     }
00825 }
00826 
00827 static av_always_inline
00828 int check_dc_pred8x8_mode(int mode, int mb_x, int mb_y)
00829 {
00830     if (!mb_x) {
00831         return mb_y ? TOP_DC_PRED8x8 : DC_128_PRED8x8;
00832     } else {
00833         return mb_y ? mode : LEFT_DC_PRED8x8;
00834     }
00835 }
00836 
00837 static av_always_inline
00838 int check_tm_pred8x8_mode(int mode, int mb_x, int mb_y)
00839 {
00840     if (!mb_x) {
00841         return mb_y ? VERT_PRED8x8 : DC_129_PRED8x8;
00842     } else {
00843         return mb_y ? mode : HOR_PRED8x8;
00844     }
00845 }
00846 
00847 static av_always_inline
00848 int check_intra_pred8x8_mode(int mode, int mb_x, int mb_y)
00849 {
00850     if (mode == DC_PRED8x8) {
00851         return check_dc_pred8x8_mode(mode, mb_x, mb_y);
00852     } else {
00853         return mode;
00854     }
00855 }
00856 
00857 static av_always_inline
00858 int check_intra_pred8x8_mode_emuedge(int mode, int mb_x, int mb_y)
00859 {
00860     switch (mode) {
00861     case DC_PRED8x8:
00862         return check_dc_pred8x8_mode(mode, mb_x, mb_y);
00863     case VERT_PRED8x8:
00864         return !mb_y ? DC_127_PRED8x8 : mode;
00865     case HOR_PRED8x8:
00866         return !mb_x ? DC_129_PRED8x8 : mode;
00867     case PLANE_PRED8x8 /*TM*/:
00868         return check_tm_pred8x8_mode(mode, mb_x, mb_y);
00869     }
00870     return mode;
00871 }
00872 
00873 static av_always_inline
00874 int check_tm_pred4x4_mode(int mode, int mb_x, int mb_y)
00875 {
00876     if (!mb_x) {
00877         return mb_y ? VERT_VP8_PRED : DC_129_PRED;
00878     } else {
00879         return mb_y ? mode : HOR_VP8_PRED;
00880     }
00881 }
00882 
00883 static av_always_inline
00884 int check_intra_pred4x4_mode_emuedge(int mode, int mb_x, int mb_y, int *copy_buf)
00885 {
00886     switch (mode) {
00887     case VERT_PRED:
00888         if (!mb_x && mb_y) {
00889             *copy_buf = 1;
00890             return mode;
00891         }
00892         /* fall-through */
00893     case DIAG_DOWN_LEFT_PRED:
00894     case VERT_LEFT_PRED:
00895         return !mb_y ? DC_127_PRED : mode;
00896     case HOR_PRED:
00897         if (!mb_y) {
00898             *copy_buf = 1;
00899             return mode;
00900         }
00901         /* fall-through */
00902     case HOR_UP_PRED:
00903         return !mb_x ? DC_129_PRED : mode;
00904     case TM_VP8_PRED:
00905         return check_tm_pred4x4_mode(mode, mb_x, mb_y);
00906     case DC_PRED: // 4x4 DC doesn't use the same "H.264-style" exceptions as 16x16/8x8 DC
00907     case DIAG_DOWN_RIGHT_PRED:
00908     case VERT_RIGHT_PRED:
00909     case HOR_DOWN_PRED:
00910         if (!mb_y || !mb_x)
00911             *copy_buf = 1;
00912         return mode;
00913     }
00914     return mode;
00915 }
00916 
00917 static av_always_inline
00918 void intra_predict(VP8Context *s, uint8_t *dst[3], VP8Macroblock *mb,
00919                    int mb_x, int mb_y)
00920 {
00921     AVCodecContext *avctx = s->avctx;
00922     int x, y, mode, nnz, tr;
00923 
00924     // for the first row, we need to run xchg_mb_border to init the top edge to 127
00925     // otherwise, skip it if we aren't going to deblock
00926     if (!(avctx->flags & CODEC_FLAG_EMU_EDGE && !mb_y) && (s->deblock_filter || !mb_y))
00927         xchg_mb_border(s->top_border[mb_x+1], dst[0], dst[1], dst[2],
00928                        s->linesize, s->uvlinesize, mb_x, mb_y, s->mb_width,
00929                        s->filter.simple, 1);
00930 
00931     if (mb->mode < MODE_I4x4) {
00932         if (avctx->flags & CODEC_FLAG_EMU_EDGE) { // tested
00933             mode = check_intra_pred8x8_mode_emuedge(mb->mode, mb_x, mb_y);
00934         } else {
00935             mode = check_intra_pred8x8_mode(mb->mode, mb_x, mb_y);
00936         }
00937         s->hpc.pred16x16[mode](dst[0], s->linesize);
00938     } else {
00939         uint8_t *ptr = dst[0];
00940         uint8_t *intra4x4 = s->intra4x4_pred_mode_mb;
00941         uint8_t tr_top[4] = { 127, 127, 127, 127 };
00942 
00943         // all blocks on the right edge of the macroblock use bottom edge
00944         // the top macroblock for their topright edge
00945         uint8_t *tr_right = ptr - s->linesize + 16;
00946 
00947         // if we're on the right edge of the frame, said edge is extended
00948         // from the top macroblock
00949         if (!(!mb_y && avctx->flags & CODEC_FLAG_EMU_EDGE) &&
00950             mb_x == s->mb_width-1) {
00951             tr = tr_right[-1]*0x01010101;
00952             tr_right = (uint8_t *)&tr;
00953         }
00954 
00955         if (mb->skip)
00956             AV_ZERO128(s->non_zero_count_cache);
00957 
00958         for (y = 0; y < 4; y++) {
00959             uint8_t *topright = ptr + 4 - s->linesize;
00960             for (x = 0; x < 4; x++) {
00961                 int copy = 0, linesize = s->linesize;
00962                 uint8_t *dst = ptr+4*x;
00963                 DECLARE_ALIGNED(4, uint8_t, copy_dst)[5*8];
00964 
00965                 if ((y == 0 || x == 3) && mb_y == 0 && avctx->flags & CODEC_FLAG_EMU_EDGE) {
00966                     topright = tr_top;
00967                 } else if (x == 3)
00968                     topright = tr_right;
00969 
00970                 if (avctx->flags & CODEC_FLAG_EMU_EDGE) { // mb_x+x or mb_y+y is a hack but works
00971                     mode = check_intra_pred4x4_mode_emuedge(intra4x4[x], mb_x + x, mb_y + y, &copy);
00972                     if (copy) {
00973                         dst = copy_dst + 12;
00974                         linesize = 8;
00975                         if (!(mb_y + y)) {
00976                             copy_dst[3] = 127U;
00977                             AV_WN32A(copy_dst+4, 127U * 0x01010101U);
00978                         } else {
00979                             AV_COPY32(copy_dst+4, ptr+4*x-s->linesize);
00980                             if (!(mb_x + x)) {
00981                                 copy_dst[3] = 129U;
00982                             } else {
00983                                 copy_dst[3] = ptr[4*x-s->linesize-1];
00984                             }
00985                         }
00986                         if (!(mb_x + x)) {
00987                             copy_dst[11] =
00988                             copy_dst[19] =
00989                             copy_dst[27] =
00990                             copy_dst[35] = 129U;
00991                         } else {
00992                             copy_dst[11] = ptr[4*x              -1];
00993                             copy_dst[19] = ptr[4*x+s->linesize  -1];
00994                             copy_dst[27] = ptr[4*x+s->linesize*2-1];
00995                             copy_dst[35] = ptr[4*x+s->linesize*3-1];
00996                         }
00997                     }
00998                 } else {
00999                     mode = intra4x4[x];
01000                 }
01001                 s->hpc.pred4x4[mode](dst, topright, linesize);
01002                 if (copy) {
01003                     AV_COPY32(ptr+4*x              , copy_dst+12);
01004                     AV_COPY32(ptr+4*x+s->linesize  , copy_dst+20);
01005                     AV_COPY32(ptr+4*x+s->linesize*2, copy_dst+28);
01006                     AV_COPY32(ptr+4*x+s->linesize*3, copy_dst+36);
01007                 }
01008 
01009                 nnz = s->non_zero_count_cache[y][x];
01010                 if (nnz) {
01011                     if (nnz == 1)
01012                         s->vp8dsp.vp8_idct_dc_add(ptr+4*x, s->block[y][x], s->linesize);
01013                     else
01014                         s->vp8dsp.vp8_idct_add(ptr+4*x, s->block[y][x], s->linesize);
01015                 }
01016                 topright += 4;
01017             }
01018 
01019             ptr   += 4*s->linesize;
01020             intra4x4 += 4;
01021         }
01022     }
01023 
01024     if (avctx->flags & CODEC_FLAG_EMU_EDGE) {
01025         mode = check_intra_pred8x8_mode_emuedge(s->chroma_pred_mode, mb_x, mb_y);
01026     } else {
01027         mode = check_intra_pred8x8_mode(s->chroma_pred_mode, mb_x, mb_y);
01028     }
01029     s->hpc.pred8x8[mode](dst[1], s->uvlinesize);
01030     s->hpc.pred8x8[mode](dst[2], s->uvlinesize);
01031 
01032     if (!(avctx->flags & CODEC_FLAG_EMU_EDGE && !mb_y) && (s->deblock_filter || !mb_y))
01033         xchg_mb_border(s->top_border[mb_x+1], dst[0], dst[1], dst[2],
01034                        s->linesize, s->uvlinesize, mb_x, mb_y, s->mb_width,
01035                        s->filter.simple, 0);
01036 }
01037 
01038 static const uint8_t subpel_idx[3][8] = {
01039     { 0, 1, 2, 1, 2, 1, 2, 1 }, // nr. of left extra pixels,
01040                                 // also function pointer index
01041     { 0, 3, 5, 3, 5, 3, 5, 3 }, // nr. of extra pixels required
01042     { 0, 2, 3, 2, 3, 2, 3, 2 }, // nr. of right extra pixels
01043 };
01044 
01061 static av_always_inline
01062 void vp8_mc_luma(VP8Context *s, uint8_t *dst, AVFrame *ref, const VP56mv *mv,
01063                  int x_off, int y_off, int block_w, int block_h,
01064                  int width, int height, int linesize,
01065                  vp8_mc_func mc_func[3][3])
01066 {
01067     uint8_t *src = ref->data[0];
01068 
01069     if (AV_RN32A(mv)) {
01070 
01071         int mx = (mv->x << 1)&7, mx_idx = subpel_idx[0][mx];
01072         int my = (mv->y << 1)&7, my_idx = subpel_idx[0][my];
01073 
01074         x_off += mv->x >> 2;
01075         y_off += mv->y >> 2;
01076 
01077         // edge emulation
01078         ff_thread_await_progress(ref, (3 + y_off + block_h + subpel_idx[2][my]) >> 4, 0);
01079         src += y_off * linesize + x_off;
01080         if (x_off < mx_idx || x_off >= width  - block_w - subpel_idx[2][mx] ||
01081             y_off < my_idx || y_off >= height - block_h - subpel_idx[2][my]) {
01082             s->dsp.emulated_edge_mc(s->edge_emu_buffer, src - my_idx * linesize - mx_idx, linesize,
01083                                     block_w + subpel_idx[1][mx], block_h + subpel_idx[1][my],
01084                                     x_off - mx_idx, y_off - my_idx, width, height);
01085             src = s->edge_emu_buffer + mx_idx + linesize * my_idx;
01086         }
01087         mc_func[my_idx][mx_idx](dst, linesize, src, linesize, block_h, mx, my);
01088     } else {
01089         ff_thread_await_progress(ref, (3 + y_off + block_h) >> 4, 0);
01090         mc_func[0][0](dst, linesize, src + y_off * linesize + x_off, linesize, block_h, 0, 0);
01091     }
01092 }
01093 
01111 static av_always_inline
01112 void vp8_mc_chroma(VP8Context *s, uint8_t *dst1, uint8_t *dst2, AVFrame *ref,
01113                    const VP56mv *mv, int x_off, int y_off,
01114                    int block_w, int block_h, int width, int height, int linesize,
01115                    vp8_mc_func mc_func[3][3])
01116 {
01117     uint8_t *src1 = ref->data[1], *src2 = ref->data[2];
01118 
01119     if (AV_RN32A(mv)) {
01120         int mx = mv->x&7, mx_idx = subpel_idx[0][mx];
01121         int my = mv->y&7, my_idx = subpel_idx[0][my];
01122 
01123         x_off += mv->x >> 3;
01124         y_off += mv->y >> 3;
01125 
01126         // edge emulation
01127         src1 += y_off * linesize + x_off;
01128         src2 += y_off * linesize + x_off;
01129         ff_thread_await_progress(ref, (3 + y_off + block_h + subpel_idx[2][my]) >> 3, 0);
01130         if (x_off < mx_idx || x_off >= width  - block_w - subpel_idx[2][mx] ||
01131             y_off < my_idx || y_off >= height - block_h - subpel_idx[2][my]) {
01132             s->dsp.emulated_edge_mc(s->edge_emu_buffer, src1 - my_idx * linesize - mx_idx, linesize,
01133                                     block_w + subpel_idx[1][mx], block_h + subpel_idx[1][my],
01134                                     x_off - mx_idx, y_off - my_idx, width, height);
01135             src1 = s->edge_emu_buffer + mx_idx + linesize * my_idx;
01136             mc_func[my_idx][mx_idx](dst1, linesize, src1, linesize, block_h, mx, my);
01137 
01138             s->dsp.emulated_edge_mc(s->edge_emu_buffer, src2 - my_idx * linesize - mx_idx, linesize,
01139                                     block_w + subpel_idx[1][mx], block_h + subpel_idx[1][my],
01140                                     x_off - mx_idx, y_off - my_idx, width, height);
01141             src2 = s->edge_emu_buffer + mx_idx + linesize * my_idx;
01142             mc_func[my_idx][mx_idx](dst2, linesize, src2, linesize, block_h, mx, my);
01143         } else {
01144             mc_func[my_idx][mx_idx](dst1, linesize, src1, linesize, block_h, mx, my);
01145             mc_func[my_idx][mx_idx](dst2, linesize, src2, linesize, block_h, mx, my);
01146         }
01147     } else {
01148         ff_thread_await_progress(ref, (3 + y_off + block_h) >> 3, 0);
01149         mc_func[0][0](dst1, linesize, src1 + y_off * linesize + x_off, linesize, block_h, 0, 0);
01150         mc_func[0][0](dst2, linesize, src2 + y_off * linesize + x_off, linesize, block_h, 0, 0);
01151     }
01152 }
01153 
01154 static av_always_inline
01155 void vp8_mc_part(VP8Context *s, uint8_t *dst[3],
01156                  AVFrame *ref_frame, int x_off, int y_off,
01157                  int bx_off, int by_off,
01158                  int block_w, int block_h,
01159                  int width, int height, VP56mv *mv)
01160 {
01161     VP56mv uvmv = *mv;
01162 
01163     /* Y */
01164     vp8_mc_luma(s, dst[0] + by_off * s->linesize + bx_off,
01165                 ref_frame, mv, x_off + bx_off, y_off + by_off,
01166                 block_w, block_h, width, height, s->linesize,
01167                 s->put_pixels_tab[block_w == 8]);
01168 
01169     /* U/V */
01170     if (s->profile == 3) {
01171         uvmv.x &= ~7;
01172         uvmv.y &= ~7;
01173     }
01174     x_off   >>= 1; y_off   >>= 1;
01175     bx_off  >>= 1; by_off  >>= 1;
01176     width   >>= 1; height  >>= 1;
01177     block_w >>= 1; block_h >>= 1;
01178     vp8_mc_chroma(s, dst[1] + by_off * s->uvlinesize + bx_off,
01179                   dst[2] + by_off * s->uvlinesize + bx_off, ref_frame,
01180                   &uvmv, x_off + bx_off, y_off + by_off,
01181                   block_w, block_h, width, height, s->uvlinesize,
01182                   s->put_pixels_tab[1 + (block_w == 4)]);
01183 }
01184 
01185 /* Fetch pixels for estimated mv 4 macroblocks ahead.
01186  * Optimized for 64-byte cache lines.  Inspired by ffh264 prefetch_motion. */
01187 static av_always_inline void prefetch_motion(VP8Context *s, VP8Macroblock *mb, int mb_x, int mb_y, int mb_xy, int ref)
01188 {
01189     /* Don't prefetch refs that haven't been used very often this frame. */
01190     if (s->ref_count[ref-1] > (mb_xy >> 5)) {
01191         int x_off = mb_x << 4, y_off = mb_y << 4;
01192         int mx = (mb->mv.x>>2) + x_off + 8;
01193         int my = (mb->mv.y>>2) + y_off;
01194         uint8_t **src= s->framep[ref]->data;
01195         int off= mx + (my + (mb_x&3)*4)*s->linesize + 64;
01196         /* For threading, a ff_thread_await_progress here might be useful, but
01197          * it actually slows down the decoder. Since a bad prefetch doesn't
01198          * generate bad decoder output, we don't run it here. */
01199         s->dsp.prefetch(src[0]+off, s->linesize, 4);
01200         off= (mx>>1) + ((my>>1) + (mb_x&7))*s->uvlinesize + 64;
01201         s->dsp.prefetch(src[1]+off, src[2]-src[1], 2);
01202     }
01203 }
01204 
01208 static av_always_inline
01209 void inter_predict(VP8Context *s, uint8_t *dst[3], VP8Macroblock *mb,
01210                    int mb_x, int mb_y)
01211 {
01212     int x_off = mb_x << 4, y_off = mb_y << 4;
01213     int width = 16*s->mb_width, height = 16*s->mb_height;
01214     AVFrame *ref = s->framep[mb->ref_frame];
01215     VP56mv *bmv = mb->bmv;
01216 
01217     switch (mb->partitioning) {
01218     case VP8_SPLITMVMODE_NONE:
01219         vp8_mc_part(s, dst, ref, x_off, y_off,
01220                     0, 0, 16, 16, width, height, &mb->mv);
01221         break;
01222     case VP8_SPLITMVMODE_4x4: {
01223         int x, y;
01224         VP56mv uvmv;
01225 
01226         /* Y */
01227         for (y = 0; y < 4; y++) {
01228             for (x = 0; x < 4; x++) {
01229                 vp8_mc_luma(s, dst[0] + 4*y*s->linesize + x*4,
01230                             ref, &bmv[4*y + x],
01231                             4*x + x_off, 4*y + y_off, 4, 4,
01232                             width, height, s->linesize,
01233                             s->put_pixels_tab[2]);
01234             }
01235         }
01236 
01237         /* U/V */
01238         x_off >>= 1; y_off >>= 1; width >>= 1; height >>= 1;
01239         for (y = 0; y < 2; y++) {
01240             for (x = 0; x < 2; x++) {
01241                 uvmv.x = mb->bmv[ 2*y    * 4 + 2*x  ].x +
01242                          mb->bmv[ 2*y    * 4 + 2*x+1].x +
01243                          mb->bmv[(2*y+1) * 4 + 2*x  ].x +
01244                          mb->bmv[(2*y+1) * 4 + 2*x+1].x;
01245                 uvmv.y = mb->bmv[ 2*y    * 4 + 2*x  ].y +
01246                          mb->bmv[ 2*y    * 4 + 2*x+1].y +
01247                          mb->bmv[(2*y+1) * 4 + 2*x  ].y +
01248                          mb->bmv[(2*y+1) * 4 + 2*x+1].y;
01249                 uvmv.x = (uvmv.x + 2 + (uvmv.x >> (INT_BIT-1))) >> 2;
01250                 uvmv.y = (uvmv.y + 2 + (uvmv.y >> (INT_BIT-1))) >> 2;
01251                 if (s->profile == 3) {
01252                     uvmv.x &= ~7;
01253                     uvmv.y &= ~7;
01254                 }
01255                 vp8_mc_chroma(s, dst[1] + 4*y*s->uvlinesize + x*4,
01256                               dst[2] + 4*y*s->uvlinesize + x*4, ref, &uvmv,
01257                               4*x + x_off, 4*y + y_off, 4, 4,
01258                               width, height, s->uvlinesize,
01259                               s->put_pixels_tab[2]);
01260             }
01261         }
01262         break;
01263     }
01264     case VP8_SPLITMVMODE_16x8:
01265         vp8_mc_part(s, dst, ref, x_off, y_off,
01266                     0, 0, 16, 8, width, height, &bmv[0]);
01267         vp8_mc_part(s, dst, ref, x_off, y_off,
01268                     0, 8, 16, 8, width, height, &bmv[1]);
01269         break;
01270     case VP8_SPLITMVMODE_8x16:
01271         vp8_mc_part(s, dst, ref, x_off, y_off,
01272                     0, 0, 8, 16, width, height, &bmv[0]);
01273         vp8_mc_part(s, dst, ref, x_off, y_off,
01274                     8, 0, 8, 16, width, height, &bmv[1]);
01275         break;
01276     case VP8_SPLITMVMODE_8x8:
01277         vp8_mc_part(s, dst, ref, x_off, y_off,
01278                     0, 0, 8, 8, width, height, &bmv[0]);
01279         vp8_mc_part(s, dst, ref, x_off, y_off,
01280                     8, 0, 8, 8, width, height, &bmv[1]);
01281         vp8_mc_part(s, dst, ref, x_off, y_off,
01282                     0, 8, 8, 8, width, height, &bmv[2]);
01283         vp8_mc_part(s, dst, ref, x_off, y_off,
01284                     8, 8, 8, 8, width, height, &bmv[3]);
01285         break;
01286     }
01287 }
01288 
01289 static av_always_inline void idct_mb(VP8Context *s, uint8_t *dst[3], VP8Macroblock *mb)
01290 {
01291     int x, y, ch;
01292 
01293     if (mb->mode != MODE_I4x4) {
01294         uint8_t *y_dst = dst[0];
01295         for (y = 0; y < 4; y++) {
01296             uint32_t nnz4 = AV_RL32(s->non_zero_count_cache[y]);
01297             if (nnz4) {
01298                 if (nnz4&~0x01010101) {
01299                     for (x = 0; x < 4; x++) {
01300                         if ((uint8_t)nnz4 == 1)
01301                             s->vp8dsp.vp8_idct_dc_add(y_dst+4*x, s->block[y][x], s->linesize);
01302                         else if((uint8_t)nnz4 > 1)
01303                             s->vp8dsp.vp8_idct_add(y_dst+4*x, s->block[y][x], s->linesize);
01304                         nnz4 >>= 8;
01305                         if (!nnz4)
01306                             break;
01307                     }
01308                 } else {
01309                     s->vp8dsp.vp8_idct_dc_add4y(y_dst, s->block[y], s->linesize);
01310                 }
01311             }
01312             y_dst += 4*s->linesize;
01313         }
01314     }
01315 
01316     for (ch = 0; ch < 2; ch++) {
01317         uint32_t nnz4 = AV_RL32(s->non_zero_count_cache[4+ch]);
01318         if (nnz4) {
01319             uint8_t *ch_dst = dst[1+ch];
01320             if (nnz4&~0x01010101) {
01321                 for (y = 0; y < 2; y++) {
01322                     for (x = 0; x < 2; x++) {
01323                         if ((uint8_t)nnz4 == 1)
01324                             s->vp8dsp.vp8_idct_dc_add(ch_dst+4*x, s->block[4+ch][(y<<1)+x], s->uvlinesize);
01325                         else if((uint8_t)nnz4 > 1)
01326                             s->vp8dsp.vp8_idct_add(ch_dst+4*x, s->block[4+ch][(y<<1)+x], s->uvlinesize);
01327                         nnz4 >>= 8;
01328                         if (!nnz4)
01329                             goto chroma_idct_end;
01330                     }
01331                     ch_dst += 4*s->uvlinesize;
01332                 }
01333             } else {
01334                 s->vp8dsp.vp8_idct_dc_add4uv(ch_dst, s->block[4+ch], s->uvlinesize);
01335             }
01336         }
01337 chroma_idct_end: ;
01338     }
01339 }
01340 
01341 static av_always_inline void filter_level_for_mb(VP8Context *s, VP8Macroblock *mb, VP8FilterStrength *f )
01342 {
01343     int interior_limit, filter_level;
01344 
01345     if (s->segmentation.enabled) {
01346         filter_level = s->segmentation.filter_level[s->segment];
01347         if (!s->segmentation.absolute_vals)
01348             filter_level += s->filter.level;
01349     } else
01350         filter_level = s->filter.level;
01351 
01352     if (s->lf_delta.enabled) {
01353         filter_level += s->lf_delta.ref[mb->ref_frame];
01354         filter_level += s->lf_delta.mode[mb->mode];
01355     }
01356 
01357     filter_level = av_clip_uintp2(filter_level, 6);
01358 
01359     interior_limit = filter_level;
01360     if (s->filter.sharpness) {
01361         interior_limit >>= (s->filter.sharpness + 3) >> 2;
01362         interior_limit = FFMIN(interior_limit, 9 - s->filter.sharpness);
01363     }
01364     interior_limit = FFMAX(interior_limit, 1);
01365 
01366     f->filter_level = filter_level;
01367     f->inner_limit = interior_limit;
01368     f->inner_filter = !mb->skip || mb->mode == MODE_I4x4 || mb->mode == VP8_MVMODE_SPLIT;
01369 }
01370 
01371 static av_always_inline void filter_mb(VP8Context *s, uint8_t *dst[3], VP8FilterStrength *f, int mb_x, int mb_y)
01372 {
01373     int mbedge_lim, bedge_lim, hev_thresh;
01374     int filter_level = f->filter_level;
01375     int inner_limit = f->inner_limit;
01376     int inner_filter = f->inner_filter;
01377     int linesize = s->linesize;
01378     int uvlinesize = s->uvlinesize;
01379     static const uint8_t hev_thresh_lut[2][64] = {
01380         { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1,
01381           2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
01382           3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
01383           3, 3, 3, 3 },
01384         { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1,
01385           1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
01386           2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
01387           2, 2, 2, 2 }
01388     };
01389 
01390     if (!filter_level)
01391         return;
01392 
01393      bedge_lim = 2*filter_level + inner_limit;
01394     mbedge_lim = bedge_lim + 4;
01395 
01396     hev_thresh = hev_thresh_lut[s->keyframe][filter_level];
01397 
01398     if (mb_x) {
01399         s->vp8dsp.vp8_h_loop_filter16y(dst[0],     linesize,
01400                                        mbedge_lim, inner_limit, hev_thresh);
01401         s->vp8dsp.vp8_h_loop_filter8uv(dst[1],     dst[2],      uvlinesize,
01402                                        mbedge_lim, inner_limit, hev_thresh);
01403     }
01404 
01405     if (inner_filter) {
01406         s->vp8dsp.vp8_h_loop_filter16y_inner(dst[0]+ 4, linesize, bedge_lim,
01407                                              inner_limit, hev_thresh);
01408         s->vp8dsp.vp8_h_loop_filter16y_inner(dst[0]+ 8, linesize, bedge_lim,
01409                                              inner_limit, hev_thresh);
01410         s->vp8dsp.vp8_h_loop_filter16y_inner(dst[0]+12, linesize, bedge_lim,
01411                                              inner_limit, hev_thresh);
01412         s->vp8dsp.vp8_h_loop_filter8uv_inner(dst[1] + 4, dst[2] + 4,
01413                                              uvlinesize,  bedge_lim,
01414                                              inner_limit, hev_thresh);
01415     }
01416 
01417     if (mb_y) {
01418         s->vp8dsp.vp8_v_loop_filter16y(dst[0],     linesize,
01419                                        mbedge_lim, inner_limit, hev_thresh);
01420         s->vp8dsp.vp8_v_loop_filter8uv(dst[1],     dst[2],      uvlinesize,
01421                                        mbedge_lim, inner_limit, hev_thresh);
01422     }
01423 
01424     if (inner_filter) {
01425         s->vp8dsp.vp8_v_loop_filter16y_inner(dst[0]+ 4*linesize,
01426                                              linesize,    bedge_lim,
01427                                              inner_limit, hev_thresh);
01428         s->vp8dsp.vp8_v_loop_filter16y_inner(dst[0]+ 8*linesize,
01429                                              linesize,    bedge_lim,
01430                                              inner_limit, hev_thresh);
01431         s->vp8dsp.vp8_v_loop_filter16y_inner(dst[0]+12*linesize,
01432                                              linesize,    bedge_lim,
01433                                              inner_limit, hev_thresh);
01434         s->vp8dsp.vp8_v_loop_filter8uv_inner(dst[1] + 4 * uvlinesize,
01435                                              dst[2] + 4 * uvlinesize,
01436                                              uvlinesize,  bedge_lim,
01437                                              inner_limit, hev_thresh);
01438     }
01439 }
01440 
01441 static av_always_inline void filter_mb_simple(VP8Context *s, uint8_t *dst, VP8FilterStrength *f, int mb_x, int mb_y)
01442 {
01443     int mbedge_lim, bedge_lim;
01444     int filter_level = f->filter_level;
01445     int inner_limit = f->inner_limit;
01446     int inner_filter = f->inner_filter;
01447     int linesize = s->linesize;
01448 
01449     if (!filter_level)
01450         return;
01451 
01452      bedge_lim = 2*filter_level + inner_limit;
01453     mbedge_lim = bedge_lim + 4;
01454 
01455     if (mb_x)
01456         s->vp8dsp.vp8_h_loop_filter_simple(dst, linesize, mbedge_lim);
01457     if (inner_filter) {
01458         s->vp8dsp.vp8_h_loop_filter_simple(dst+ 4, linesize, bedge_lim);
01459         s->vp8dsp.vp8_h_loop_filter_simple(dst+ 8, linesize, bedge_lim);
01460         s->vp8dsp.vp8_h_loop_filter_simple(dst+12, linesize, bedge_lim);
01461     }
01462 
01463     if (mb_y)
01464         s->vp8dsp.vp8_v_loop_filter_simple(dst, linesize, mbedge_lim);
01465     if (inner_filter) {
01466         s->vp8dsp.vp8_v_loop_filter_simple(dst+ 4*linesize, linesize, bedge_lim);
01467         s->vp8dsp.vp8_v_loop_filter_simple(dst+ 8*linesize, linesize, bedge_lim);
01468         s->vp8dsp.vp8_v_loop_filter_simple(dst+12*linesize, linesize, bedge_lim);
01469     }
01470 }
01471 
01472 static void filter_mb_row(VP8Context *s, AVFrame *curframe, int mb_y)
01473 {
01474     VP8FilterStrength *f = s->filter_strength;
01475     uint8_t *dst[3] = {
01476         curframe->data[0] + 16*mb_y*s->linesize,
01477         curframe->data[1] +  8*mb_y*s->uvlinesize,
01478         curframe->data[2] +  8*mb_y*s->uvlinesize
01479     };
01480     int mb_x;
01481 
01482     for (mb_x = 0; mb_x < s->mb_width; mb_x++) {
01483         backup_mb_border(s->top_border[mb_x+1], dst[0], dst[1], dst[2], s->linesize, s->uvlinesize, 0);
01484         filter_mb(s, dst, f++, mb_x, mb_y);
01485         dst[0] += 16;
01486         dst[1] += 8;
01487         dst[2] += 8;
01488     }
01489 }
01490 
01491 static void filter_mb_row_simple(VP8Context *s, AVFrame *curframe, int mb_y)
01492 {
01493     VP8FilterStrength *f = s->filter_strength;
01494     uint8_t *dst = curframe->data[0] + 16*mb_y*s->linesize;
01495     int mb_x;
01496 
01497     for (mb_x = 0; mb_x < s->mb_width; mb_x++) {
01498         backup_mb_border(s->top_border[mb_x+1], dst, NULL, NULL, s->linesize, 0, 1);
01499         filter_mb_simple(s, dst, f++, mb_x, mb_y);
01500         dst += 16;
01501     }
01502 }
01503 
01504 static int vp8_decode_frame(AVCodecContext *avctx, void *data, int *data_size,
01505                             AVPacket *avpkt)
01506 {
01507     VP8Context *s = avctx->priv_data;
01508     int ret, mb_x, mb_y, i, y, referenced;
01509     enum AVDiscard skip_thresh;
01510     AVFrame *av_uninit(curframe), *prev_frame = s->framep[VP56_FRAME_CURRENT];
01511 
01512     if ((ret = decode_frame_header(s, avpkt->data, avpkt->size)) < 0)
01513         return ret;
01514 
01515     referenced = s->update_last || s->update_golden == VP56_FRAME_CURRENT
01516                                 || s->update_altref == VP56_FRAME_CURRENT;
01517 
01518     skip_thresh = !referenced ? AVDISCARD_NONREF :
01519                     !s->keyframe ? AVDISCARD_NONKEY : AVDISCARD_ALL;
01520 
01521     if (avctx->skip_frame >= skip_thresh) {
01522         s->invisible = 1;
01523         goto skip_decode;
01524     }
01525     s->deblock_filter = s->filter.level && avctx->skip_loop_filter < skip_thresh;
01526 
01527     // release no longer referenced frames
01528     for (i = 0; i < 5; i++)
01529         if (s->frames[i].data[0] &&
01530             &s->frames[i] != prev_frame &&
01531             &s->frames[i] != s->framep[VP56_FRAME_PREVIOUS] &&
01532             &s->frames[i] != s->framep[VP56_FRAME_GOLDEN] &&
01533             &s->frames[i] != s->framep[VP56_FRAME_GOLDEN2])
01534             ff_thread_release_buffer(avctx, &s->frames[i]);
01535 
01536     // find a free buffer
01537     for (i = 0; i < 5; i++)
01538         if (&s->frames[i] != prev_frame &&
01539             &s->frames[i] != s->framep[VP56_FRAME_PREVIOUS] &&
01540             &s->frames[i] != s->framep[VP56_FRAME_GOLDEN] &&
01541             &s->frames[i] != s->framep[VP56_FRAME_GOLDEN2]) {
01542             curframe = s->framep[VP56_FRAME_CURRENT] = &s->frames[i];
01543             break;
01544         }
01545     if (i == 5) {
01546         av_log(avctx, AV_LOG_FATAL, "Ran out of free frames!\n");
01547         abort();
01548     }
01549     if (curframe->data[0])
01550         ff_thread_release_buffer(avctx, curframe);
01551 
01552     curframe->key_frame = s->keyframe;
01553     curframe->pict_type = s->keyframe ? AV_PICTURE_TYPE_I : AV_PICTURE_TYPE_P;
01554     curframe->reference = referenced ? 3 : 0;
01555     curframe->ref_index[0] = s->segmentation_map;
01556     if ((ret = ff_thread_get_buffer(avctx, curframe))) {
01557         av_log(avctx, AV_LOG_ERROR, "get_buffer() failed!\n");
01558         return ret;
01559     }
01560 
01561     // check if golden and altref are swapped
01562     if (s->update_altref != VP56_FRAME_NONE) {
01563         s->next_framep[VP56_FRAME_GOLDEN2]  = s->framep[s->update_altref];
01564     } else {
01565         s->next_framep[VP56_FRAME_GOLDEN2]  = s->framep[VP56_FRAME_GOLDEN2];
01566     }
01567     if (s->update_golden != VP56_FRAME_NONE) {
01568         s->next_framep[VP56_FRAME_GOLDEN]   = s->framep[s->update_golden];
01569     } else {
01570         s->next_framep[VP56_FRAME_GOLDEN]   = s->framep[VP56_FRAME_GOLDEN];
01571     }
01572     if (s->update_last) {
01573         s->next_framep[VP56_FRAME_PREVIOUS] = curframe;
01574     } else {
01575         s->next_framep[VP56_FRAME_PREVIOUS] = s->framep[VP56_FRAME_PREVIOUS];
01576     }
01577     s->next_framep[VP56_FRAME_CURRENT]      = curframe;
01578 
01579     ff_thread_finish_setup(avctx);
01580 
01581     // Given that arithmetic probabilities are updated every frame, it's quite likely
01582     // that the values we have on a random interframe are complete junk if we didn't
01583     // start decode on a keyframe. So just don't display anything rather than junk.
01584     if (!s->keyframe && (!s->framep[VP56_FRAME_PREVIOUS] ||
01585                          !s->framep[VP56_FRAME_GOLDEN] ||
01586                          !s->framep[VP56_FRAME_GOLDEN2])) {
01587         av_log(avctx, AV_LOG_WARNING, "Discarding interframe without a prior keyframe!\n");
01588         return AVERROR_INVALIDDATA;
01589     }
01590 
01591     s->linesize   = curframe->linesize[0];
01592     s->uvlinesize = curframe->linesize[1];
01593 
01594     if (!s->edge_emu_buffer)
01595         s->edge_emu_buffer = av_malloc(21*s->linesize);
01596 
01597     memset(s->top_nnz, 0, s->mb_width*sizeof(*s->top_nnz));
01598 
01599     /* Zero macroblock structures for top/top-left prediction from outside the frame. */
01600     memset(s->macroblocks + s->mb_height*2 - 1, 0, (s->mb_width+1)*sizeof(*s->macroblocks));
01601 
01602     // top edge of 127 for intra prediction
01603     if (!(avctx->flags & CODEC_FLAG_EMU_EDGE)) {
01604         s->top_border[0][15] = s->top_border[0][23] = 127;
01605         memset(s->top_border[1]-1, 127, s->mb_width*sizeof(*s->top_border)+1);
01606     }
01607     memset(s->ref_count, 0, sizeof(s->ref_count));
01608     if (s->keyframe)
01609         memset(s->intra4x4_pred_mode_top, DC_PRED, s->mb_width*4);
01610 
01611 #define MARGIN (16 << 2)
01612     s->mv_min.y = -MARGIN;
01613     s->mv_max.y = ((s->mb_height - 1) << 6) + MARGIN;
01614 
01615     for (mb_y = 0; mb_y < s->mb_height; mb_y++) {
01616         VP56RangeCoder *c = &s->coeff_partition[mb_y & (s->num_coeff_partitions-1)];
01617         VP8Macroblock *mb = s->macroblocks + (s->mb_height - mb_y - 1)*2;
01618         int mb_xy = mb_y*s->mb_width;
01619         uint8_t *dst[3] = {
01620             curframe->data[0] + 16*mb_y*s->linesize,
01621             curframe->data[1] +  8*mb_y*s->uvlinesize,
01622             curframe->data[2] +  8*mb_y*s->uvlinesize
01623         };
01624 
01625         memset(mb - 1, 0, sizeof(*mb));   // zero left macroblock
01626         memset(s->left_nnz, 0, sizeof(s->left_nnz));
01627         AV_WN32A(s->intra4x4_pred_mode_left, DC_PRED*0x01010101);
01628 
01629         // left edge of 129 for intra prediction
01630         if (!(avctx->flags & CODEC_FLAG_EMU_EDGE)) {
01631             for (i = 0; i < 3; i++)
01632                 for (y = 0; y < 16>>!!i; y++)
01633                     dst[i][y*curframe->linesize[i]-1] = 129;
01634             if (mb_y == 1) // top left edge is also 129
01635                 s->top_border[0][15] = s->top_border[0][23] = s->top_border[0][31] = 129;
01636         }
01637 
01638         s->mv_min.x = -MARGIN;
01639         s->mv_max.x = ((s->mb_width  - 1) << 6) + MARGIN;
01640         if (prev_frame && s->segmentation.enabled && !s->segmentation.update_map)
01641             ff_thread_await_progress(prev_frame, mb_y, 0);
01642 
01643         for (mb_x = 0; mb_x < s->mb_width; mb_x++, mb_xy++, mb++) {
01644             /* Prefetch the current frame, 4 MBs ahead */
01645             s->dsp.prefetch(dst[0] + (mb_x&3)*4*s->linesize + 64, s->linesize, 4);
01646             s->dsp.prefetch(dst[1] + (mb_x&7)*s->uvlinesize + 64, dst[2] - dst[1], 2);
01647 
01648             decode_mb_mode(s, mb, mb_x, mb_y, s->segmentation_map + mb_xy,
01649                            prev_frame ? prev_frame->ref_index[0] + mb_xy : NULL);
01650 
01651             prefetch_motion(s, mb, mb_x, mb_y, mb_xy, VP56_FRAME_PREVIOUS);
01652 
01653             if (!mb->skip)
01654                 decode_mb_coeffs(s, c, mb, s->top_nnz[mb_x], s->left_nnz);
01655 
01656             if (mb->mode <= MODE_I4x4)
01657                 intra_predict(s, dst, mb, mb_x, mb_y);
01658             else
01659                 inter_predict(s, dst, mb, mb_x, mb_y);
01660 
01661             prefetch_motion(s, mb, mb_x, mb_y, mb_xy, VP56_FRAME_GOLDEN);
01662 
01663             if (!mb->skip) {
01664                 idct_mb(s, dst, mb);
01665             } else {
01666                 AV_ZERO64(s->left_nnz);
01667                 AV_WN64(s->top_nnz[mb_x], 0);   // array of 9, so unaligned
01668 
01669                 // Reset DC block predictors if they would exist if the mb had coefficients
01670                 if (mb->mode != MODE_I4x4 && mb->mode != VP8_MVMODE_SPLIT) {
01671                     s->left_nnz[8]      = 0;
01672                     s->top_nnz[mb_x][8] = 0;
01673                 }
01674             }
01675 
01676             if (s->deblock_filter)
01677                 filter_level_for_mb(s, mb, &s->filter_strength[mb_x]);
01678 
01679             prefetch_motion(s, mb, mb_x, mb_y, mb_xy, VP56_FRAME_GOLDEN2);
01680 
01681             dst[0] += 16;
01682             dst[1] += 8;
01683             dst[2] += 8;
01684             s->mv_min.x -= 64;
01685             s->mv_max.x -= 64;
01686         }
01687         if (s->deblock_filter) {
01688             if (s->filter.simple)
01689                 filter_mb_row_simple(s, curframe, mb_y);
01690             else
01691                 filter_mb_row(s, curframe, mb_y);
01692         }
01693         s->mv_min.y -= 64;
01694         s->mv_max.y -= 64;
01695 
01696         ff_thread_report_progress(curframe, mb_y, 0);
01697     }
01698 
01699     ff_thread_report_progress(curframe, INT_MAX, 0);
01700 skip_decode:
01701     // if future frames don't use the updated probabilities,
01702     // reset them to the values we saved
01703     if (!s->update_probabilities)
01704         s->prob[0] = s->prob[1];
01705 
01706     memcpy(&s->framep[0], &s->next_framep[0], sizeof(s->framep[0]) * 4);
01707 
01708     if (!s->invisible) {
01709         *(AVFrame*)data = *curframe;
01710         *data_size = sizeof(AVFrame);
01711     }
01712 
01713     return avpkt->size;
01714 }
01715 
01716 static av_cold int vp8_decode_init(AVCodecContext *avctx)
01717 {
01718     VP8Context *s = avctx->priv_data;
01719 
01720     s->avctx = avctx;
01721     avctx->pix_fmt = PIX_FMT_YUV420P;
01722 
01723     dsputil_init(&s->dsp, avctx);
01724     ff_h264_pred_init(&s->hpc, CODEC_ID_VP8, 8);
01725     ff_vp8dsp_init(&s->vp8dsp);
01726 
01727     return 0;
01728 }
01729 
01730 static av_cold int vp8_decode_free(AVCodecContext *avctx)
01731 {
01732     vp8_decode_flush(avctx);
01733     return 0;
01734 }
01735 
01736 static av_cold int vp8_decode_init_thread_copy(AVCodecContext *avctx)
01737 {
01738     VP8Context *s = avctx->priv_data;
01739 
01740     s->avctx = avctx;
01741 
01742     return 0;
01743 }
01744 
01745 #define REBASE(pic) \
01746     pic ? pic - &s_src->frames[0] + &s->frames[0] : NULL
01747 
01748 static int vp8_decode_update_thread_context(AVCodecContext *dst, const AVCodecContext *src)
01749 {
01750     VP8Context *s = dst->priv_data, *s_src = src->priv_data;
01751 
01752     s->prob[0] = s_src->prob[!s_src->update_probabilities];
01753     s->segmentation = s_src->segmentation;
01754     s->lf_delta = s_src->lf_delta;
01755     memcpy(s->sign_bias, s_src->sign_bias, sizeof(s->sign_bias));
01756 
01757     memcpy(&s->frames, &s_src->frames, sizeof(s->frames));
01758     s->framep[0] = REBASE(s_src->next_framep[0]);
01759     s->framep[1] = REBASE(s_src->next_framep[1]);
01760     s->framep[2] = REBASE(s_src->next_framep[2]);
01761     s->framep[3] = REBASE(s_src->next_framep[3]);
01762 
01763     return 0;
01764 }
01765 
01766 AVCodec ff_vp8_decoder = {
01767     "vp8",
01768     AVMEDIA_TYPE_VIDEO,
01769     CODEC_ID_VP8,
01770     sizeof(VP8Context),
01771     vp8_decode_init,
01772     NULL,
01773     vp8_decode_free,
01774     vp8_decode_frame,
01775     CODEC_CAP_DR1 | CODEC_CAP_FRAME_THREADS,
01776     .flush = vp8_decode_flush,
01777     .long_name = NULL_IF_CONFIG_SMALL("On2 VP8"),
01778     .init_thread_copy      = ONLY_IF_THREADS_ENABLED(vp8_decode_init_thread_copy),
01779     .update_thread_context = ONLY_IF_THREADS_ENABLED(vp8_decode_update_thread_context),
01780 };