00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026 #include "libavutil/imgutils.h"
00027 #include "avcodec.h"
00028 #include "internal.h"
00029 #include "vp8.h"
00030 #include "vp8data.h"
00031 #include "rectangle.h"
00032 #include "thread.h"
00033
00034 #if ARCH_ARM
00035 # include "arm/vp8.h"
00036 #endif
00037
00038 static void free_buffers(VP8Context *s)
00039 {
00040 int i;
00041 if (s->thread_data)
00042 for (i = 0; i < MAX_THREADS; i++) {
00043 av_freep(&s->thread_data[i].filter_strength);
00044 av_freep(&s->thread_data[i].edge_emu_buffer);
00045 }
00046 av_freep(&s->thread_data);
00047 av_freep(&s->macroblocks_base);
00048 av_freep(&s->intra4x4_pred_mode_top);
00049 av_freep(&s->top_nnz);
00050 av_freep(&s->top_border);
00051
00052 s->macroblocks = NULL;
00053 }
00054
00055 static int vp8_alloc_frame(VP8Context *s, AVFrame *f)
00056 {
00057 int ret;
00058 if ((ret = ff_thread_get_buffer(s->avctx, f)) < 0)
00059 return ret;
00060 if (s->num_maps_to_be_freed && !s->maps_are_invalid) {
00061 f->ref_index[0] = s->segmentation_maps[--s->num_maps_to_be_freed];
00062 } else if (!(f->ref_index[0] = av_mallocz(s->mb_width * s->mb_height))) {
00063 ff_thread_release_buffer(s->avctx, f);
00064 return AVERROR(ENOMEM);
00065 }
00066 return 0;
00067 }
00068
00069 static void vp8_release_frame(VP8Context *s, AVFrame *f, int prefer_delayed_free, int can_direct_free)
00070 {
00071 if (f->ref_index[0]) {
00072 if (prefer_delayed_free) {
00073
00074
00075
00076
00077 int max_queued_maps = can_direct_free ? 1 : FF_ARRAY_ELEMS(s->segmentation_maps);
00078 if (s->num_maps_to_be_freed < max_queued_maps) {
00079 s->segmentation_maps[s->num_maps_to_be_freed++] = f->ref_index[0];
00080 } else if (can_direct_free) {
00081 av_free(f->ref_index[0]);
00082 }
00083 f->ref_index[0] = NULL;
00084 } else {
00085 av_free(f->ref_index[0]);
00086 }
00087 }
00088 ff_thread_release_buffer(s->avctx, f);
00089 }
00090
00091 static void vp8_decode_flush_impl(AVCodecContext *avctx,
00092 int prefer_delayed_free, int can_direct_free, int free_mem)
00093 {
00094 VP8Context *s = avctx->priv_data;
00095 int i;
00096
00097 if (!avctx->internal->is_copy) {
00098 for (i = 0; i < 5; i++)
00099 if (s->frames[i].data[0])
00100 vp8_release_frame(s, &s->frames[i], prefer_delayed_free, can_direct_free);
00101 }
00102 memset(s->framep, 0, sizeof(s->framep));
00103
00104 if (free_mem) {
00105 free_buffers(s);
00106 s->maps_are_invalid = 1;
00107 }
00108 }
00109
00110 static void vp8_decode_flush(AVCodecContext *avctx)
00111 {
00112 vp8_decode_flush_impl(avctx, 1, 1, 0);
00113 }
00114
00115 static int update_dimensions(VP8Context *s, int width, int height)
00116 {
00117 AVCodecContext *avctx = s->avctx;
00118 int i;
00119
00120 if (width != s->avctx->width || ((width+15)/16 != s->mb_width || (height+15)/16 != s->mb_height) && s->macroblocks_base ||
00121 height != s->avctx->height) {
00122 if (av_image_check_size(width, height, 0, s->avctx))
00123 return AVERROR_INVALIDDATA;
00124
00125 vp8_decode_flush_impl(s->avctx, 1, 0, 1);
00126
00127 avcodec_set_dimensions(s->avctx, width, height);
00128 }
00129
00130 s->mb_width = (s->avctx->coded_width +15) / 16;
00131 s->mb_height = (s->avctx->coded_height+15) / 16;
00132
00133 s->mb_layout = (avctx->active_thread_type == FF_THREAD_SLICE) && (FFMIN(s->num_coeff_partitions, avctx->thread_count) > 1);
00134 if (!s->mb_layout) {
00135 s->macroblocks_base = av_mallocz((s->mb_width+s->mb_height*2+1)*sizeof(*s->macroblocks));
00136 s->intra4x4_pred_mode_top = av_mallocz(s->mb_width*4);
00137 }
00138 else
00139 s->macroblocks_base = av_mallocz((s->mb_width+2)*(s->mb_height+2)*sizeof(*s->macroblocks));
00140 s->top_nnz = av_mallocz(s->mb_width*sizeof(*s->top_nnz));
00141 s->top_border = av_mallocz((s->mb_width+1)*sizeof(*s->top_border));
00142 s->thread_data = av_mallocz(MAX_THREADS*sizeof(VP8ThreadData));
00143
00144 for (i = 0; i < MAX_THREADS; i++) {
00145 s->thread_data[i].filter_strength = av_mallocz(s->mb_width*sizeof(*s->thread_data[0].filter_strength));
00146 #if HAVE_THREADS
00147 pthread_mutex_init(&s->thread_data[i].lock, NULL);
00148 pthread_cond_init(&s->thread_data[i].cond, NULL);
00149 #endif
00150 }
00151
00152 if (!s->macroblocks_base || !s->top_nnz || !s->top_border ||
00153 (!s->intra4x4_pred_mode_top && !s->mb_layout))
00154 return AVERROR(ENOMEM);
00155
00156 s->macroblocks = s->macroblocks_base + 1;
00157
00158 return 0;
00159 }
00160
00161 static void parse_segment_info(VP8Context *s)
00162 {
00163 VP56RangeCoder *c = &s->c;
00164 int i;
00165
00166 s->segmentation.update_map = vp8_rac_get(c);
00167
00168 if (vp8_rac_get(c)) {
00169 s->segmentation.absolute_vals = vp8_rac_get(c);
00170
00171 for (i = 0; i < 4; i++)
00172 s->segmentation.base_quant[i] = vp8_rac_get_sint(c, 7);
00173
00174 for (i = 0; i < 4; i++)
00175 s->segmentation.filter_level[i] = vp8_rac_get_sint(c, 6);
00176 }
00177 if (s->segmentation.update_map)
00178 for (i = 0; i < 3; i++)
00179 s->prob->segmentid[i] = vp8_rac_get(c) ? vp8_rac_get_uint(c, 8) : 255;
00180 }
00181
00182 static void update_lf_deltas(VP8Context *s)
00183 {
00184 VP56RangeCoder *c = &s->c;
00185 int i;
00186
00187 for (i = 0; i < 4; i++) {
00188 if (vp8_rac_get(c)) {
00189 s->lf_delta.ref[i] = vp8_rac_get_uint(c, 6);
00190
00191 if (vp8_rac_get(c))
00192 s->lf_delta.ref[i] = -s->lf_delta.ref[i];
00193 }
00194 }
00195
00196 for (i = MODE_I4x4; i <= VP8_MVMODE_SPLIT; i++) {
00197 if (vp8_rac_get(c)) {
00198 s->lf_delta.mode[i] = vp8_rac_get_uint(c, 6);
00199
00200 if (vp8_rac_get(c))
00201 s->lf_delta.mode[i] = -s->lf_delta.mode[i];
00202 }
00203 }
00204 }
00205
00206 static int setup_partitions(VP8Context *s, const uint8_t *buf, int buf_size)
00207 {
00208 const uint8_t *sizes = buf;
00209 int i;
00210
00211 s->num_coeff_partitions = 1 << vp8_rac_get_uint(&s->c, 2);
00212
00213 buf += 3*(s->num_coeff_partitions-1);
00214 buf_size -= 3*(s->num_coeff_partitions-1);
00215 if (buf_size < 0)
00216 return -1;
00217
00218 for (i = 0; i < s->num_coeff_partitions-1; i++) {
00219 int size = AV_RL24(sizes + 3*i);
00220 if (buf_size - size < 0)
00221 return -1;
00222
00223 ff_vp56_init_range_decoder(&s->coeff_partition[i], buf, size);
00224 buf += size;
00225 buf_size -= size;
00226 }
00227 ff_vp56_init_range_decoder(&s->coeff_partition[i], buf, buf_size);
00228
00229 return 0;
00230 }
00231
00232 static void get_quants(VP8Context *s)
00233 {
00234 VP56RangeCoder *c = &s->c;
00235 int i, base_qi;
00236
00237 int yac_qi = vp8_rac_get_uint(c, 7);
00238 int ydc_delta = vp8_rac_get_sint(c, 4);
00239 int y2dc_delta = vp8_rac_get_sint(c, 4);
00240 int y2ac_delta = vp8_rac_get_sint(c, 4);
00241 int uvdc_delta = vp8_rac_get_sint(c, 4);
00242 int uvac_delta = vp8_rac_get_sint(c, 4);
00243
00244 for (i = 0; i < 4; i++) {
00245 if (s->segmentation.enabled) {
00246 base_qi = s->segmentation.base_quant[i];
00247 if (!s->segmentation.absolute_vals)
00248 base_qi += yac_qi;
00249 } else
00250 base_qi = yac_qi;
00251
00252 s->qmat[i].luma_qmul[0] = vp8_dc_qlookup[av_clip_uintp2(base_qi + ydc_delta , 7)];
00253 s->qmat[i].luma_qmul[1] = vp8_ac_qlookup[av_clip_uintp2(base_qi , 7)];
00254 s->qmat[i].luma_dc_qmul[0] = 2 * vp8_dc_qlookup[av_clip_uintp2(base_qi + y2dc_delta, 7)];
00255
00256 s->qmat[i].luma_dc_qmul[1] = (101581 * vp8_ac_qlookup[av_clip_uintp2(base_qi + y2ac_delta, 7)]) >> 16;
00257 s->qmat[i].chroma_qmul[0] = vp8_dc_qlookup[av_clip_uintp2(base_qi + uvdc_delta, 7)];
00258 s->qmat[i].chroma_qmul[1] = vp8_ac_qlookup[av_clip_uintp2(base_qi + uvac_delta, 7)];
00259
00260 s->qmat[i].luma_dc_qmul[1] = FFMAX(s->qmat[i].luma_dc_qmul[1], 8);
00261 s->qmat[i].chroma_qmul[0] = FFMIN(s->qmat[i].chroma_qmul[0], 132);
00262 }
00263 }
00264
00278 static VP56Frame ref_to_update(VP8Context *s, int update, VP56Frame ref)
00279 {
00280 VP56RangeCoder *c = &s->c;
00281
00282 if (update)
00283 return VP56_FRAME_CURRENT;
00284
00285 switch (vp8_rac_get_uint(c, 2)) {
00286 case 1:
00287 return VP56_FRAME_PREVIOUS;
00288 case 2:
00289 return (ref == VP56_FRAME_GOLDEN) ? VP56_FRAME_GOLDEN2 : VP56_FRAME_GOLDEN;
00290 }
00291 return VP56_FRAME_NONE;
00292 }
00293
00294 static void update_refs(VP8Context *s)
00295 {
00296 VP56RangeCoder *c = &s->c;
00297
00298 int update_golden = vp8_rac_get(c);
00299 int update_altref = vp8_rac_get(c);
00300
00301 s->update_golden = ref_to_update(s, update_golden, VP56_FRAME_GOLDEN);
00302 s->update_altref = ref_to_update(s, update_altref, VP56_FRAME_GOLDEN2);
00303 }
00304
00305 static int decode_frame_header(VP8Context *s, const uint8_t *buf, int buf_size)
00306 {
00307 VP56RangeCoder *c = &s->c;
00308 int header_size, hscale, vscale, i, j, k, l, m, ret;
00309 int width = s->avctx->width;
00310 int height = s->avctx->height;
00311
00312 s->keyframe = !(buf[0] & 1);
00313 s->profile = (buf[0]>>1) & 7;
00314 s->invisible = !(buf[0] & 0x10);
00315 header_size = AV_RL24(buf) >> 5;
00316 buf += 3;
00317 buf_size -= 3;
00318
00319 if (s->profile > 3)
00320 av_log(s->avctx, AV_LOG_WARNING, "Unknown profile %d\n", s->profile);
00321
00322 if (!s->profile)
00323 memcpy(s->put_pixels_tab, s->vp8dsp.put_vp8_epel_pixels_tab, sizeof(s->put_pixels_tab));
00324 else
00325 memcpy(s->put_pixels_tab, s->vp8dsp.put_vp8_bilinear_pixels_tab, sizeof(s->put_pixels_tab));
00326
00327 if (header_size > buf_size - 7*s->keyframe) {
00328 av_log(s->avctx, AV_LOG_ERROR, "Header size larger than data provided\n");
00329 return AVERROR_INVALIDDATA;
00330 }
00331
00332 if (s->keyframe) {
00333 if (AV_RL24(buf) != 0x2a019d) {
00334 av_log(s->avctx, AV_LOG_ERROR, "Invalid start code 0x%x\n", AV_RL24(buf));
00335 return AVERROR_INVALIDDATA;
00336 }
00337 width = AV_RL16(buf+3) & 0x3fff;
00338 height = AV_RL16(buf+5) & 0x3fff;
00339 hscale = buf[4] >> 6;
00340 vscale = buf[6] >> 6;
00341 buf += 7;
00342 buf_size -= 7;
00343
00344 if (hscale || vscale)
00345 av_log_missing_feature(s->avctx, "Upscaling", 1);
00346
00347 s->update_golden = s->update_altref = VP56_FRAME_CURRENT;
00348 for (i = 0; i < 4; i++)
00349 for (j = 0; j < 16; j++)
00350 memcpy(s->prob->token[i][j], vp8_token_default_probs[i][vp8_coeff_band[j]],
00351 sizeof(s->prob->token[i][j]));
00352 memcpy(s->prob->pred16x16, vp8_pred16x16_prob_inter, sizeof(s->prob->pred16x16));
00353 memcpy(s->prob->pred8x8c , vp8_pred8x8c_prob_inter , sizeof(s->prob->pred8x8c));
00354 memcpy(s->prob->mvc , vp8_mv_default_prob , sizeof(s->prob->mvc));
00355 memset(&s->segmentation, 0, sizeof(s->segmentation));
00356 }
00357
00358 ff_vp56_init_range_decoder(c, buf, header_size);
00359 buf += header_size;
00360 buf_size -= header_size;
00361
00362 if (s->keyframe) {
00363 if (vp8_rac_get(c))
00364 av_log(s->avctx, AV_LOG_WARNING, "Unspecified colorspace\n");
00365 vp8_rac_get(c);
00366 }
00367
00368 if ((s->segmentation.enabled = vp8_rac_get(c)))
00369 parse_segment_info(s);
00370 else
00371 s->segmentation.update_map = 0;
00372
00373 s->filter.simple = vp8_rac_get(c);
00374 s->filter.level = vp8_rac_get_uint(c, 6);
00375 s->filter.sharpness = vp8_rac_get_uint(c, 3);
00376
00377 if ((s->lf_delta.enabled = vp8_rac_get(c)))
00378 if (vp8_rac_get(c))
00379 update_lf_deltas(s);
00380
00381 if (setup_partitions(s, buf, buf_size)) {
00382 av_log(s->avctx, AV_LOG_ERROR, "Invalid partitions\n");
00383 return AVERROR_INVALIDDATA;
00384 }
00385
00386 if (!s->macroblocks_base ||
00387 width != s->avctx->width || height != s->avctx->height || (width+15)/16 != s->mb_width || (height+15)/16 != s->mb_height) {
00388 if ((ret = update_dimensions(s, width, height)) < 0)
00389 return ret;
00390 }
00391
00392 get_quants(s);
00393
00394 if (!s->keyframe) {
00395 update_refs(s);
00396 s->sign_bias[VP56_FRAME_GOLDEN] = vp8_rac_get(c);
00397 s->sign_bias[VP56_FRAME_GOLDEN2 ] = vp8_rac_get(c);
00398 }
00399
00400
00401
00402 if (!(s->update_probabilities = vp8_rac_get(c)))
00403 s->prob[1] = s->prob[0];
00404
00405 s->update_last = s->keyframe || vp8_rac_get(c);
00406
00407 for (i = 0; i < 4; i++)
00408 for (j = 0; j < 8; j++)
00409 for (k = 0; k < 3; k++)
00410 for (l = 0; l < NUM_DCT_TOKENS-1; l++)
00411 if (vp56_rac_get_prob_branchy(c, vp8_token_update_probs[i][j][k][l])) {
00412 int prob = vp8_rac_get_uint(c, 8);
00413 for (m = 0; vp8_coeff_band_indexes[j][m] >= 0; m++)
00414 s->prob->token[i][vp8_coeff_band_indexes[j][m]][k][l] = prob;
00415 }
00416
00417 if ((s->mbskip_enabled = vp8_rac_get(c)))
00418 s->prob->mbskip = vp8_rac_get_uint(c, 8);
00419
00420 if (!s->keyframe) {
00421 s->prob->intra = vp8_rac_get_uint(c, 8);
00422 s->prob->last = vp8_rac_get_uint(c, 8);
00423 s->prob->golden = vp8_rac_get_uint(c, 8);
00424
00425 if (vp8_rac_get(c))
00426 for (i = 0; i < 4; i++)
00427 s->prob->pred16x16[i] = vp8_rac_get_uint(c, 8);
00428 if (vp8_rac_get(c))
00429 for (i = 0; i < 3; i++)
00430 s->prob->pred8x8c[i] = vp8_rac_get_uint(c, 8);
00431
00432
00433 for (i = 0; i < 2; i++)
00434 for (j = 0; j < 19; j++)
00435 if (vp56_rac_get_prob_branchy(c, vp8_mv_update_prob[i][j]))
00436 s->prob->mvc[i][j] = vp8_rac_get_nn(c);
00437 }
00438
00439 return 0;
00440 }
00441
00442 static av_always_inline void clamp_mv(VP8Context *s, VP56mv *dst, const VP56mv *src)
00443 {
00444 dst->x = av_clip(src->x, s->mv_min.x, s->mv_max.x);
00445 dst->y = av_clip(src->y, s->mv_min.y, s->mv_max.y);
00446 }
00447
00451 static int read_mv_component(VP56RangeCoder *c, const uint8_t *p)
00452 {
00453 int bit, x = 0;
00454
00455 if (vp56_rac_get_prob_branchy(c, p[0])) {
00456 int i;
00457
00458 for (i = 0; i < 3; i++)
00459 x += vp56_rac_get_prob(c, p[9 + i]) << i;
00460 for (i = 9; i > 3; i--)
00461 x += vp56_rac_get_prob(c, p[9 + i]) << i;
00462 if (!(x & 0xFFF0) || vp56_rac_get_prob(c, p[12]))
00463 x += 8;
00464 } else {
00465
00466 const uint8_t *ps = p+2;
00467 bit = vp56_rac_get_prob(c, *ps);
00468 ps += 1 + 3*bit;
00469 x += 4*bit;
00470 bit = vp56_rac_get_prob(c, *ps);
00471 ps += 1 + bit;
00472 x += 2*bit;
00473 x += vp56_rac_get_prob(c, *ps);
00474 }
00475
00476 return (x && vp56_rac_get_prob(c, p[1])) ? -x : x;
00477 }
00478
00479 static av_always_inline
00480 const uint8_t *get_submv_prob(uint32_t left, uint32_t top)
00481 {
00482 if (left == top)
00483 return vp8_submv_prob[4-!!left];
00484 if (!top)
00485 return vp8_submv_prob[2];
00486 return vp8_submv_prob[1-!!left];
00487 }
00488
00493 static av_always_inline
00494 int decode_splitmvs(VP8Context *s, VP56RangeCoder *c, VP8Macroblock *mb, int layout)
00495 {
00496 int part_idx;
00497 int n, num;
00498 VP8Macroblock *top_mb;
00499 VP8Macroblock *left_mb = &mb[-1];
00500 const uint8_t *mbsplits_left = vp8_mbsplits[left_mb->partitioning],
00501 *mbsplits_top,
00502 *mbsplits_cur, *firstidx;
00503 VP56mv *top_mv;
00504 VP56mv *left_mv = left_mb->bmv;
00505 VP56mv *cur_mv = mb->bmv;
00506
00507 if (!layout)
00508 top_mb = &mb[2];
00509 else
00510 top_mb = &mb[-s->mb_width-1];
00511 mbsplits_top = vp8_mbsplits[top_mb->partitioning];
00512 top_mv = top_mb->bmv;
00513
00514 if (vp56_rac_get_prob_branchy(c, vp8_mbsplit_prob[0])) {
00515 if (vp56_rac_get_prob_branchy(c, vp8_mbsplit_prob[1])) {
00516 part_idx = VP8_SPLITMVMODE_16x8 + vp56_rac_get_prob(c, vp8_mbsplit_prob[2]);
00517 } else {
00518 part_idx = VP8_SPLITMVMODE_8x8;
00519 }
00520 } else {
00521 part_idx = VP8_SPLITMVMODE_4x4;
00522 }
00523
00524 num = vp8_mbsplit_count[part_idx];
00525 mbsplits_cur = vp8_mbsplits[part_idx],
00526 firstidx = vp8_mbfirstidx[part_idx];
00527 mb->partitioning = part_idx;
00528
00529 for (n = 0; n < num; n++) {
00530 int k = firstidx[n];
00531 uint32_t left, above;
00532 const uint8_t *submv_prob;
00533
00534 if (!(k & 3))
00535 left = AV_RN32A(&left_mv[mbsplits_left[k + 3]]);
00536 else
00537 left = AV_RN32A(&cur_mv[mbsplits_cur[k - 1]]);
00538 if (k <= 3)
00539 above = AV_RN32A(&top_mv[mbsplits_top[k + 12]]);
00540 else
00541 above = AV_RN32A(&cur_mv[mbsplits_cur[k - 4]]);
00542
00543 submv_prob = get_submv_prob(left, above);
00544
00545 if (vp56_rac_get_prob_branchy(c, submv_prob[0])) {
00546 if (vp56_rac_get_prob_branchy(c, submv_prob[1])) {
00547 if (vp56_rac_get_prob_branchy(c, submv_prob[2])) {
00548 mb->bmv[n].y = mb->mv.y + read_mv_component(c, s->prob->mvc[0]);
00549 mb->bmv[n].x = mb->mv.x + read_mv_component(c, s->prob->mvc[1]);
00550 } else {
00551 AV_ZERO32(&mb->bmv[n]);
00552 }
00553 } else {
00554 AV_WN32A(&mb->bmv[n], above);
00555 }
00556 } else {
00557 AV_WN32A(&mb->bmv[n], left);
00558 }
00559 }
00560
00561 return num;
00562 }
00563
00564 static av_always_inline
00565 void decode_mvs(VP8Context *s, VP8Macroblock *mb, int mb_x, int mb_y, int layout)
00566 {
00567 VP8Macroblock *mb_edge[3] = { 0 ,
00568 mb - 1 ,
00569 0 };
00570 enum { CNT_ZERO, CNT_NEAREST, CNT_NEAR, CNT_SPLITMV };
00571 enum { VP8_EDGE_TOP, VP8_EDGE_LEFT, VP8_EDGE_TOPLEFT };
00572 int idx = CNT_ZERO;
00573 int cur_sign_bias = s->sign_bias[mb->ref_frame];
00574 int8_t *sign_bias = s->sign_bias;
00575 VP56mv near_mv[4];
00576 uint8_t cnt[4] = { 0 };
00577 VP56RangeCoder *c = &s->c;
00578
00579 if (!layout) {
00580 mb_edge[0] = mb + 2;
00581 mb_edge[2] = mb + 1;
00582 }
00583 else {
00584 mb_edge[0] = mb - s->mb_width-1;
00585 mb_edge[2] = mb - s->mb_width-2;
00586 }
00587
00588 AV_ZERO32(&near_mv[0]);
00589 AV_ZERO32(&near_mv[1]);
00590 AV_ZERO32(&near_mv[2]);
00591
00592
00593 #define MV_EDGE_CHECK(n)\
00594 {\
00595 VP8Macroblock *edge = mb_edge[n];\
00596 int edge_ref = edge->ref_frame;\
00597 if (edge_ref != VP56_FRAME_CURRENT) {\
00598 uint32_t mv = AV_RN32A(&edge->mv);\
00599 if (mv) {\
00600 if (cur_sign_bias != sign_bias[edge_ref]) {\
00601 \
00602 mv = ~mv;\
00603 mv = ((mv&0x7fff7fff) + 0x00010001) ^ (mv&0x80008000);\
00604 }\
00605 if (!n || mv != AV_RN32A(&near_mv[idx]))\
00606 AV_WN32A(&near_mv[++idx], mv);\
00607 cnt[idx] += 1 + (n != 2);\
00608 } else\
00609 cnt[CNT_ZERO] += 1 + (n != 2);\
00610 }\
00611 }
00612
00613 MV_EDGE_CHECK(0)
00614 MV_EDGE_CHECK(1)
00615 MV_EDGE_CHECK(2)
00616
00617 mb->partitioning = VP8_SPLITMVMODE_NONE;
00618 if (vp56_rac_get_prob_branchy(c, vp8_mode_contexts[cnt[CNT_ZERO]][0])) {
00619 mb->mode = VP8_MVMODE_MV;
00620
00621
00622 if (cnt[CNT_SPLITMV] && AV_RN32A(&near_mv[1 + VP8_EDGE_TOP]) == AV_RN32A(&near_mv[1 + VP8_EDGE_TOPLEFT]))
00623 cnt[CNT_NEAREST] += 1;
00624
00625
00626 if (cnt[CNT_NEAR] > cnt[CNT_NEAREST]) {
00627 FFSWAP(uint8_t, cnt[CNT_NEAREST], cnt[CNT_NEAR]);
00628 FFSWAP( VP56mv, near_mv[CNT_NEAREST], near_mv[CNT_NEAR]);
00629 }
00630
00631 if (vp56_rac_get_prob_branchy(c, vp8_mode_contexts[cnt[CNT_NEAREST]][1])) {
00632 if (vp56_rac_get_prob_branchy(c, vp8_mode_contexts[cnt[CNT_NEAR]][2])) {
00633
00634
00635 clamp_mv(s, &mb->mv, &near_mv[CNT_ZERO + (cnt[CNT_NEAREST] >= cnt[CNT_ZERO])]);
00636 cnt[CNT_SPLITMV] = ((mb_edge[VP8_EDGE_LEFT]->mode == VP8_MVMODE_SPLIT) +
00637 (mb_edge[VP8_EDGE_TOP]->mode == VP8_MVMODE_SPLIT)) * 2 +
00638 (mb_edge[VP8_EDGE_TOPLEFT]->mode == VP8_MVMODE_SPLIT);
00639
00640 if (vp56_rac_get_prob_branchy(c, vp8_mode_contexts[cnt[CNT_SPLITMV]][3])) {
00641 mb->mode = VP8_MVMODE_SPLIT;
00642 mb->mv = mb->bmv[decode_splitmvs(s, c, mb, layout) - 1];
00643 } else {
00644 mb->mv.y += read_mv_component(c, s->prob->mvc[0]);
00645 mb->mv.x += read_mv_component(c, s->prob->mvc[1]);
00646 mb->bmv[0] = mb->mv;
00647 }
00648 } else {
00649 clamp_mv(s, &mb->mv, &near_mv[CNT_NEAR]);
00650 mb->bmv[0] = mb->mv;
00651 }
00652 } else {
00653 clamp_mv(s, &mb->mv, &near_mv[CNT_NEAREST]);
00654 mb->bmv[0] = mb->mv;
00655 }
00656 } else {
00657 mb->mode = VP8_MVMODE_ZERO;
00658 AV_ZERO32(&mb->mv);
00659 mb->bmv[0] = mb->mv;
00660 }
00661 }
00662
00663 static av_always_inline
00664 void decode_intra4x4_modes(VP8Context *s, VP56RangeCoder *c, VP8Macroblock *mb,
00665 int mb_x, int keyframe, int layout)
00666 {
00667 uint8_t *intra4x4 = mb->intra4x4_pred_mode_mb;
00668
00669 if (layout == 1) {
00670 VP8Macroblock *mb_top = mb - s->mb_width - 1;
00671 memcpy(mb->intra4x4_pred_mode_top, mb_top->intra4x4_pred_mode_top, 4);
00672 }
00673 if (keyframe) {
00674 int x, y;
00675 uint8_t* top;
00676 uint8_t* const left = s->intra4x4_pred_mode_left;
00677 if (layout == 1)
00678 top = mb->intra4x4_pred_mode_top;
00679 else
00680 top = s->intra4x4_pred_mode_top + 4 * mb_x;
00681 for (y = 0; y < 4; y++) {
00682 for (x = 0; x < 4; x++) {
00683 const uint8_t *ctx;
00684 ctx = vp8_pred4x4_prob_intra[top[x]][left[y]];
00685 *intra4x4 = vp8_rac_get_tree(c, vp8_pred4x4_tree, ctx);
00686 left[y] = top[x] = *intra4x4;
00687 intra4x4++;
00688 }
00689 }
00690 } else {
00691 int i;
00692 for (i = 0; i < 16; i++)
00693 intra4x4[i] = vp8_rac_get_tree(c, vp8_pred4x4_tree, vp8_pred4x4_prob_inter);
00694 }
00695 }
00696
00697 static av_always_inline
00698 void decode_mb_mode(VP8Context *s, VP8Macroblock *mb, int mb_x, int mb_y,
00699 uint8_t *segment, uint8_t *ref, int layout)
00700 {
00701 VP56RangeCoder *c = &s->c;
00702
00703 if (s->segmentation.update_map) {
00704 int bit = vp56_rac_get_prob(c, s->prob->segmentid[0]);
00705 *segment = vp56_rac_get_prob(c, s->prob->segmentid[1+bit]) + 2*bit;
00706 } else if (s->segmentation.enabled)
00707 *segment = ref ? *ref : *segment;
00708 mb->segment = *segment;
00709
00710 mb->skip = s->mbskip_enabled ? vp56_rac_get_prob(c, s->prob->mbskip) : 0;
00711
00712 if (s->keyframe) {
00713 mb->mode = vp8_rac_get_tree(c, vp8_pred16x16_tree_intra, vp8_pred16x16_prob_intra);
00714
00715 if (mb->mode == MODE_I4x4) {
00716 decode_intra4x4_modes(s, c, mb, mb_x, 1, layout);
00717 } else {
00718 const uint32_t modes = vp8_pred4x4_mode[mb->mode] * 0x01010101u;
00719 if (s->mb_layout == 1)
00720 AV_WN32A(mb->intra4x4_pred_mode_top, modes);
00721 else
00722 AV_WN32A(s->intra4x4_pred_mode_top + 4 * mb_x, modes);
00723 AV_WN32A( s->intra4x4_pred_mode_left, modes);
00724 }
00725
00726 mb->chroma_pred_mode = vp8_rac_get_tree(c, vp8_pred8x8c_tree, vp8_pred8x8c_prob_intra);
00727 mb->ref_frame = VP56_FRAME_CURRENT;
00728 } else if (vp56_rac_get_prob_branchy(c, s->prob->intra)) {
00729
00730 if (vp56_rac_get_prob_branchy(c, s->prob->last))
00731 mb->ref_frame = vp56_rac_get_prob(c, s->prob->golden) ?
00732 VP56_FRAME_GOLDEN2 : VP56_FRAME_GOLDEN;
00733 else
00734 mb->ref_frame = VP56_FRAME_PREVIOUS;
00735 s->ref_count[mb->ref_frame-1]++;
00736
00737
00738 decode_mvs(s, mb, mb_x, mb_y, layout);
00739 } else {
00740
00741 mb->mode = vp8_rac_get_tree(c, vp8_pred16x16_tree_inter, s->prob->pred16x16);
00742
00743 if (mb->mode == MODE_I4x4)
00744 decode_intra4x4_modes(s, c, mb, mb_x, 0, layout);
00745
00746 mb->chroma_pred_mode = vp8_rac_get_tree(c, vp8_pred8x8c_tree, s->prob->pred8x8c);
00747 mb->ref_frame = VP56_FRAME_CURRENT;
00748 mb->partitioning = VP8_SPLITMVMODE_NONE;
00749 AV_ZERO32(&mb->bmv[0]);
00750 }
00751 }
00752
00753 #ifndef decode_block_coeffs_internal
00754
00763 static int decode_block_coeffs_internal(VP56RangeCoder *r, DCTELEM block[16],
00764 uint8_t probs[16][3][NUM_DCT_TOKENS-1],
00765 int i, uint8_t *token_prob, int16_t qmul[2])
00766 {
00767 VP56RangeCoder c = *r;
00768 goto skip_eob;
00769 do {
00770 int coeff;
00771 if (!vp56_rac_get_prob_branchy(&c, token_prob[0]))
00772 break;
00773
00774 skip_eob:
00775 if (!vp56_rac_get_prob_branchy(&c, token_prob[1])) {
00776 if (++i == 16)
00777 break;
00778 token_prob = probs[i][0];
00779 goto skip_eob;
00780 }
00781
00782 if (!vp56_rac_get_prob_branchy(&c, token_prob[2])) {
00783 coeff = 1;
00784 token_prob = probs[i+1][1];
00785 } else {
00786 if (!vp56_rac_get_prob_branchy(&c, token_prob[3])) {
00787 coeff = vp56_rac_get_prob_branchy(&c, token_prob[4]);
00788 if (coeff)
00789 coeff += vp56_rac_get_prob(&c, token_prob[5]);
00790 coeff += 2;
00791 } else {
00792
00793 if (!vp56_rac_get_prob_branchy(&c, token_prob[6])) {
00794 if (!vp56_rac_get_prob_branchy(&c, token_prob[7])) {
00795 coeff = 5 + vp56_rac_get_prob(&c, vp8_dct_cat1_prob[0]);
00796 } else {
00797 coeff = 7;
00798 coeff += vp56_rac_get_prob(&c, vp8_dct_cat2_prob[0]) << 1;
00799 coeff += vp56_rac_get_prob(&c, vp8_dct_cat2_prob[1]);
00800 }
00801 } else {
00802 int a = vp56_rac_get_prob(&c, token_prob[8]);
00803 int b = vp56_rac_get_prob(&c, token_prob[9+a]);
00804 int cat = (a<<1) + b;
00805 coeff = 3 + (8<<cat);
00806 coeff += vp8_rac_get_coeff(&c, ff_vp8_dct_cat_prob[cat]);
00807 }
00808 }
00809 token_prob = probs[i+1][2];
00810 }
00811 block[zigzag_scan[i]] = (vp8_rac_get(&c) ? -coeff : coeff) * qmul[!!i];
00812 } while (++i < 16);
00813
00814 *r = c;
00815 return i;
00816 }
00817 #endif
00818
00830 static av_always_inline
00831 int decode_block_coeffs(VP56RangeCoder *c, DCTELEM block[16],
00832 uint8_t probs[16][3][NUM_DCT_TOKENS-1],
00833 int i, int zero_nhood, int16_t qmul[2])
00834 {
00835 uint8_t *token_prob = probs[i][zero_nhood];
00836 if (!vp56_rac_get_prob_branchy(c, token_prob[0]))
00837 return 0;
00838 return decode_block_coeffs_internal(c, block, probs, i, token_prob, qmul);
00839 }
00840
00841 static av_always_inline
00842 void decode_mb_coeffs(VP8Context *s, VP8ThreadData *td, VP56RangeCoder *c, VP8Macroblock *mb,
00843 uint8_t t_nnz[9], uint8_t l_nnz[9])
00844 {
00845 int i, x, y, luma_start = 0, luma_ctx = 3;
00846 int nnz_pred, nnz, nnz_total = 0;
00847 int segment = mb->segment;
00848 int block_dc = 0;
00849
00850 if (mb->mode != MODE_I4x4 && mb->mode != VP8_MVMODE_SPLIT) {
00851 nnz_pred = t_nnz[8] + l_nnz[8];
00852
00853
00854 nnz = decode_block_coeffs(c, td->block_dc, s->prob->token[1], 0, nnz_pred,
00855 s->qmat[segment].luma_dc_qmul);
00856 l_nnz[8] = t_nnz[8] = !!nnz;
00857 if (nnz) {
00858 nnz_total += nnz;
00859 block_dc = 1;
00860 if (nnz == 1)
00861 s->vp8dsp.vp8_luma_dc_wht_dc(td->block, td->block_dc);
00862 else
00863 s->vp8dsp.vp8_luma_dc_wht(td->block, td->block_dc);
00864 }
00865 luma_start = 1;
00866 luma_ctx = 0;
00867 }
00868
00869
00870 for (y = 0; y < 4; y++)
00871 for (x = 0; x < 4; x++) {
00872 nnz_pred = l_nnz[y] + t_nnz[x];
00873 nnz = decode_block_coeffs(c, td->block[y][x], s->prob->token[luma_ctx], luma_start,
00874 nnz_pred, s->qmat[segment].luma_qmul);
00875
00876 td->non_zero_count_cache[y][x] = nnz + block_dc;
00877 t_nnz[x] = l_nnz[y] = !!nnz;
00878 nnz_total += nnz;
00879 }
00880
00881
00882
00883
00884 for (i = 4; i < 6; i++)
00885 for (y = 0; y < 2; y++)
00886 for (x = 0; x < 2; x++) {
00887 nnz_pred = l_nnz[i+2*y] + t_nnz[i+2*x];
00888 nnz = decode_block_coeffs(c, td->block[i][(y<<1)+x], s->prob->token[2], 0,
00889 nnz_pred, s->qmat[segment].chroma_qmul);
00890 td->non_zero_count_cache[i][(y<<1)+x] = nnz;
00891 t_nnz[i+2*x] = l_nnz[i+2*y] = !!nnz;
00892 nnz_total += nnz;
00893 }
00894
00895
00896
00897
00898 if (!nnz_total)
00899 mb->skip = 1;
00900 }
00901
00902 static av_always_inline
00903 void backup_mb_border(uint8_t *top_border, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr,
00904 int linesize, int uvlinesize, int simple)
00905 {
00906 AV_COPY128(top_border, src_y + 15*linesize);
00907 if (!simple) {
00908 AV_COPY64(top_border+16, src_cb + 7*uvlinesize);
00909 AV_COPY64(top_border+24, src_cr + 7*uvlinesize);
00910 }
00911 }
00912
00913 static av_always_inline
00914 void xchg_mb_border(uint8_t *top_border, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr,
00915 int linesize, int uvlinesize, int mb_x, int mb_y, int mb_width,
00916 int simple, int xchg)
00917 {
00918 uint8_t *top_border_m1 = top_border-32;
00919 src_y -= linesize;
00920 src_cb -= uvlinesize;
00921 src_cr -= uvlinesize;
00922
00923 #define XCHG(a,b,xchg) do { \
00924 if (xchg) AV_SWAP64(b,a); \
00925 else AV_COPY64(b,a); \
00926 } while (0)
00927
00928 XCHG(top_border_m1+8, src_y-8, xchg);
00929 XCHG(top_border, src_y, xchg);
00930 XCHG(top_border+8, src_y+8, 1);
00931 if (mb_x < mb_width-1)
00932 XCHG(top_border+32, src_y+16, 1);
00933
00934
00935
00936 if (!simple || !mb_y) {
00937 XCHG(top_border_m1+16, src_cb-8, xchg);
00938 XCHG(top_border_m1+24, src_cr-8, xchg);
00939 XCHG(top_border+16, src_cb, 1);
00940 XCHG(top_border+24, src_cr, 1);
00941 }
00942 }
00943
00944 static av_always_inline
00945 int check_dc_pred8x8_mode(int mode, int mb_x, int mb_y)
00946 {
00947 if (!mb_x) {
00948 return mb_y ? TOP_DC_PRED8x8 : DC_128_PRED8x8;
00949 } else {
00950 return mb_y ? mode : LEFT_DC_PRED8x8;
00951 }
00952 }
00953
00954 static av_always_inline
00955 int check_tm_pred8x8_mode(int mode, int mb_x, int mb_y)
00956 {
00957 if (!mb_x) {
00958 return mb_y ? VERT_PRED8x8 : DC_129_PRED8x8;
00959 } else {
00960 return mb_y ? mode : HOR_PRED8x8;
00961 }
00962 }
00963
00964 static av_always_inline
00965 int check_intra_pred8x8_mode(int mode, int mb_x, int mb_y)
00966 {
00967 if (mode == DC_PRED8x8) {
00968 return check_dc_pred8x8_mode(mode, mb_x, mb_y);
00969 } else {
00970 return mode;
00971 }
00972 }
00973
00974 static av_always_inline
00975 int check_intra_pred8x8_mode_emuedge(int mode, int mb_x, int mb_y)
00976 {
00977 switch (mode) {
00978 case DC_PRED8x8:
00979 return check_dc_pred8x8_mode(mode, mb_x, mb_y);
00980 case VERT_PRED8x8:
00981 return !mb_y ? DC_127_PRED8x8 : mode;
00982 case HOR_PRED8x8:
00983 return !mb_x ? DC_129_PRED8x8 : mode;
00984 case PLANE_PRED8x8 :
00985 return check_tm_pred8x8_mode(mode, mb_x, mb_y);
00986 }
00987 return mode;
00988 }
00989
00990 static av_always_inline
00991 int check_tm_pred4x4_mode(int mode, int mb_x, int mb_y)
00992 {
00993 if (!mb_x) {
00994 return mb_y ? VERT_VP8_PRED : DC_129_PRED;
00995 } else {
00996 return mb_y ? mode : HOR_VP8_PRED;
00997 }
00998 }
00999
01000 static av_always_inline
01001 int check_intra_pred4x4_mode_emuedge(int mode, int mb_x, int mb_y, int *copy_buf)
01002 {
01003 switch (mode) {
01004 case VERT_PRED:
01005 if (!mb_x && mb_y) {
01006 *copy_buf = 1;
01007 return mode;
01008 }
01009
01010 case DIAG_DOWN_LEFT_PRED:
01011 case VERT_LEFT_PRED:
01012 return !mb_y ? DC_127_PRED : mode;
01013 case HOR_PRED:
01014 if (!mb_y) {
01015 *copy_buf = 1;
01016 return mode;
01017 }
01018
01019 case HOR_UP_PRED:
01020 return !mb_x ? DC_129_PRED : mode;
01021 case TM_VP8_PRED:
01022 return check_tm_pred4x4_mode(mode, mb_x, mb_y);
01023 case DC_PRED:
01024 case DIAG_DOWN_RIGHT_PRED:
01025 case VERT_RIGHT_PRED:
01026 case HOR_DOWN_PRED:
01027 if (!mb_y || !mb_x)
01028 *copy_buf = 1;
01029 return mode;
01030 }
01031 return mode;
01032 }
01033
01034 static av_always_inline
01035 void intra_predict(VP8Context *s, VP8ThreadData *td, uint8_t *dst[3],
01036 VP8Macroblock *mb, int mb_x, int mb_y)
01037 {
01038 AVCodecContext *avctx = s->avctx;
01039 int x, y, mode, nnz;
01040 uint32_t tr;
01041
01042
01043
01044 if (!(avctx->flags & CODEC_FLAG_EMU_EDGE && !mb_y) && (s->deblock_filter || !mb_y) && td->thread_nr == 0)
01045 xchg_mb_border(s->top_border[mb_x+1], dst[0], dst[1], dst[2],
01046 s->linesize, s->uvlinesize, mb_x, mb_y, s->mb_width,
01047 s->filter.simple, 1);
01048
01049 if (mb->mode < MODE_I4x4) {
01050 if (avctx->flags & CODEC_FLAG_EMU_EDGE) {
01051 mode = check_intra_pred8x8_mode_emuedge(mb->mode, mb_x, mb_y);
01052 } else {
01053 mode = check_intra_pred8x8_mode(mb->mode, mb_x, mb_y);
01054 }
01055 s->hpc.pred16x16[mode](dst[0], s->linesize);
01056 } else {
01057 uint8_t *ptr = dst[0];
01058 uint8_t *intra4x4 = mb->intra4x4_pred_mode_mb;
01059 uint8_t tr_top[4] = { 127, 127, 127, 127 };
01060
01061
01062
01063 uint8_t *tr_right = ptr - s->linesize + 16;
01064
01065
01066
01067 if (!(!mb_y && avctx->flags & CODEC_FLAG_EMU_EDGE) &&
01068 mb_x == s->mb_width-1) {
01069 tr = tr_right[-1]*0x01010101u;
01070 tr_right = (uint8_t *)&tr;
01071 }
01072
01073 if (mb->skip)
01074 AV_ZERO128(td->non_zero_count_cache);
01075
01076 for (y = 0; y < 4; y++) {
01077 uint8_t *topright = ptr + 4 - s->linesize;
01078 for (x = 0; x < 4; x++) {
01079 int copy = 0, linesize = s->linesize;
01080 uint8_t *dst = ptr+4*x;
01081 DECLARE_ALIGNED(4, uint8_t, copy_dst)[5*8];
01082
01083 if ((y == 0 || x == 3) && mb_y == 0 && avctx->flags & CODEC_FLAG_EMU_EDGE) {
01084 topright = tr_top;
01085 } else if (x == 3)
01086 topright = tr_right;
01087
01088 if (avctx->flags & CODEC_FLAG_EMU_EDGE) {
01089 mode = check_intra_pred4x4_mode_emuedge(intra4x4[x], mb_x + x, mb_y + y, ©);
01090 if (copy) {
01091 dst = copy_dst + 12;
01092 linesize = 8;
01093 if (!(mb_y + y)) {
01094 copy_dst[3] = 127U;
01095 AV_WN32A(copy_dst+4, 127U * 0x01010101U);
01096 } else {
01097 AV_COPY32(copy_dst+4, ptr+4*x-s->linesize);
01098 if (!(mb_x + x)) {
01099 copy_dst[3] = 129U;
01100 } else {
01101 copy_dst[3] = ptr[4*x-s->linesize-1];
01102 }
01103 }
01104 if (!(mb_x + x)) {
01105 copy_dst[11] =
01106 copy_dst[19] =
01107 copy_dst[27] =
01108 copy_dst[35] = 129U;
01109 } else {
01110 copy_dst[11] = ptr[4*x -1];
01111 copy_dst[19] = ptr[4*x+s->linesize -1];
01112 copy_dst[27] = ptr[4*x+s->linesize*2-1];
01113 copy_dst[35] = ptr[4*x+s->linesize*3-1];
01114 }
01115 }
01116 } else {
01117 mode = intra4x4[x];
01118 }
01119 s->hpc.pred4x4[mode](dst, topright, linesize);
01120 if (copy) {
01121 AV_COPY32(ptr+4*x , copy_dst+12);
01122 AV_COPY32(ptr+4*x+s->linesize , copy_dst+20);
01123 AV_COPY32(ptr+4*x+s->linesize*2, copy_dst+28);
01124 AV_COPY32(ptr+4*x+s->linesize*3, copy_dst+36);
01125 }
01126
01127 nnz = td->non_zero_count_cache[y][x];
01128 if (nnz) {
01129 if (nnz == 1)
01130 s->vp8dsp.vp8_idct_dc_add(ptr+4*x, td->block[y][x], s->linesize);
01131 else
01132 s->vp8dsp.vp8_idct_add(ptr+4*x, td->block[y][x], s->linesize);
01133 }
01134 topright += 4;
01135 }
01136
01137 ptr += 4*s->linesize;
01138 intra4x4 += 4;
01139 }
01140 }
01141
01142 if (avctx->flags & CODEC_FLAG_EMU_EDGE) {
01143 mode = check_intra_pred8x8_mode_emuedge(mb->chroma_pred_mode, mb_x, mb_y);
01144 } else {
01145 mode = check_intra_pred8x8_mode(mb->chroma_pred_mode, mb_x, mb_y);
01146 }
01147 s->hpc.pred8x8[mode](dst[1], s->uvlinesize);
01148 s->hpc.pred8x8[mode](dst[2], s->uvlinesize);
01149
01150 if (!(avctx->flags & CODEC_FLAG_EMU_EDGE && !mb_y) && (s->deblock_filter || !mb_y) && td->thread_nr == 0)
01151 xchg_mb_border(s->top_border[mb_x+1], dst[0], dst[1], dst[2],
01152 s->linesize, s->uvlinesize, mb_x, mb_y, s->mb_width,
01153 s->filter.simple, 0);
01154 }
01155
01156 static const uint8_t subpel_idx[3][8] = {
01157 { 0, 1, 2, 1, 2, 1, 2, 1 },
01158
01159 { 0, 3, 5, 3, 5, 3, 5, 3 },
01160 { 0, 2, 3, 2, 3, 2, 3, 2 },
01161 };
01162
01179 static av_always_inline
01180 void vp8_mc_luma(VP8Context *s, VP8ThreadData *td, uint8_t *dst,
01181 AVFrame *ref, const VP56mv *mv,
01182 int x_off, int y_off, int block_w, int block_h,
01183 int width, int height, int linesize,
01184 vp8_mc_func mc_func[3][3])
01185 {
01186 uint8_t *src = ref->data[0];
01187
01188 if (AV_RN32A(mv)) {
01189
01190 int mx = (mv->x << 1)&7, mx_idx = subpel_idx[0][mx];
01191 int my = (mv->y << 1)&7, my_idx = subpel_idx[0][my];
01192
01193 x_off += mv->x >> 2;
01194 y_off += mv->y >> 2;
01195
01196
01197 ff_thread_await_progress(ref, (3 + y_off + block_h + subpel_idx[2][my]) >> 4, 0);
01198 src += y_off * linesize + x_off;
01199 if (x_off < mx_idx || x_off >= width - block_w - subpel_idx[2][mx] ||
01200 y_off < my_idx || y_off >= height - block_h - subpel_idx[2][my]) {
01201 s->dsp.emulated_edge_mc(td->edge_emu_buffer, src - my_idx * linesize - mx_idx, linesize,
01202 block_w + subpel_idx[1][mx], block_h + subpel_idx[1][my],
01203 x_off - mx_idx, y_off - my_idx, width, height);
01204 src = td->edge_emu_buffer + mx_idx + linesize * my_idx;
01205 }
01206 mc_func[my_idx][mx_idx](dst, linesize, src, linesize, block_h, mx, my);
01207 } else {
01208 ff_thread_await_progress(ref, (3 + y_off + block_h) >> 4, 0);
01209 mc_func[0][0](dst, linesize, src + y_off * linesize + x_off, linesize, block_h, 0, 0);
01210 }
01211 }
01212
01230 static av_always_inline
01231 void vp8_mc_chroma(VP8Context *s, VP8ThreadData *td, uint8_t *dst1, uint8_t *dst2,
01232 AVFrame *ref, const VP56mv *mv, int x_off, int y_off,
01233 int block_w, int block_h, int width, int height, int linesize,
01234 vp8_mc_func mc_func[3][3])
01235 {
01236 uint8_t *src1 = ref->data[1], *src2 = ref->data[2];
01237
01238 if (AV_RN32A(mv)) {
01239 int mx = mv->x&7, mx_idx = subpel_idx[0][mx];
01240 int my = mv->y&7, my_idx = subpel_idx[0][my];
01241
01242 x_off += mv->x >> 3;
01243 y_off += mv->y >> 3;
01244
01245
01246 src1 += y_off * linesize + x_off;
01247 src2 += y_off * linesize + x_off;
01248 ff_thread_await_progress(ref, (3 + y_off + block_h + subpel_idx[2][my]) >> 3, 0);
01249 if (x_off < mx_idx || x_off >= width - block_w - subpel_idx[2][mx] ||
01250 y_off < my_idx || y_off >= height - block_h - subpel_idx[2][my]) {
01251 s->dsp.emulated_edge_mc(td->edge_emu_buffer, src1 - my_idx * linesize - mx_idx, linesize,
01252 block_w + subpel_idx[1][mx], block_h + subpel_idx[1][my],
01253 x_off - mx_idx, y_off - my_idx, width, height);
01254 src1 = td->edge_emu_buffer + mx_idx + linesize * my_idx;
01255 mc_func[my_idx][mx_idx](dst1, linesize, src1, linesize, block_h, mx, my);
01256
01257 s->dsp.emulated_edge_mc(td->edge_emu_buffer, src2 - my_idx * linesize - mx_idx, linesize,
01258 block_w + subpel_idx[1][mx], block_h + subpel_idx[1][my],
01259 x_off - mx_idx, y_off - my_idx, width, height);
01260 src2 = td->edge_emu_buffer + mx_idx + linesize * my_idx;
01261 mc_func[my_idx][mx_idx](dst2, linesize, src2, linesize, block_h, mx, my);
01262 } else {
01263 mc_func[my_idx][mx_idx](dst1, linesize, src1, linesize, block_h, mx, my);
01264 mc_func[my_idx][mx_idx](dst2, linesize, src2, linesize, block_h, mx, my);
01265 }
01266 } else {
01267 ff_thread_await_progress(ref, (3 + y_off + block_h) >> 3, 0);
01268 mc_func[0][0](dst1, linesize, src1 + y_off * linesize + x_off, linesize, block_h, 0, 0);
01269 mc_func[0][0](dst2, linesize, src2 + y_off * linesize + x_off, linesize, block_h, 0, 0);
01270 }
01271 }
01272
01273 static av_always_inline
01274 void vp8_mc_part(VP8Context *s, VP8ThreadData *td, uint8_t *dst[3],
01275 AVFrame *ref_frame, int x_off, int y_off,
01276 int bx_off, int by_off,
01277 int block_w, int block_h,
01278 int width, int height, VP56mv *mv)
01279 {
01280 VP56mv uvmv = *mv;
01281
01282
01283 vp8_mc_luma(s, td, dst[0] + by_off * s->linesize + bx_off,
01284 ref_frame, mv, x_off + bx_off, y_off + by_off,
01285 block_w, block_h, width, height, s->linesize,
01286 s->put_pixels_tab[block_w == 8]);
01287
01288
01289 if (s->profile == 3) {
01290 uvmv.x &= ~7;
01291 uvmv.y &= ~7;
01292 }
01293 x_off >>= 1; y_off >>= 1;
01294 bx_off >>= 1; by_off >>= 1;
01295 width >>= 1; height >>= 1;
01296 block_w >>= 1; block_h >>= 1;
01297 vp8_mc_chroma(s, td, dst[1] + by_off * s->uvlinesize + bx_off,
01298 dst[2] + by_off * s->uvlinesize + bx_off, ref_frame,
01299 &uvmv, x_off + bx_off, y_off + by_off,
01300 block_w, block_h, width, height, s->uvlinesize,
01301 s->put_pixels_tab[1 + (block_w == 4)]);
01302 }
01303
01304
01305
01306 static av_always_inline void prefetch_motion(VP8Context *s, VP8Macroblock *mb, int mb_x, int mb_y, int mb_xy, int ref)
01307 {
01308
01309 if (s->ref_count[ref-1] > (mb_xy >> 5)) {
01310 int x_off = mb_x << 4, y_off = mb_y << 4;
01311 int mx = (mb->mv.x>>2) + x_off + 8;
01312 int my = (mb->mv.y>>2) + y_off;
01313 uint8_t **src= s->framep[ref]->data;
01314 int off= mx + (my + (mb_x&3)*4)*s->linesize + 64;
01315
01316
01317
01318 s->dsp.prefetch(src[0]+off, s->linesize, 4);
01319 off= (mx>>1) + ((my>>1) + (mb_x&7))*s->uvlinesize + 64;
01320 s->dsp.prefetch(src[1]+off, src[2]-src[1], 2);
01321 }
01322 }
01323
01327 static av_always_inline
01328 void inter_predict(VP8Context *s, VP8ThreadData *td, uint8_t *dst[3],
01329 VP8Macroblock *mb, int mb_x, int mb_y)
01330 {
01331 int x_off = mb_x << 4, y_off = mb_y << 4;
01332 int width = 16*s->mb_width, height = 16*s->mb_height;
01333 AVFrame *ref = s->framep[mb->ref_frame];
01334 VP56mv *bmv = mb->bmv;
01335
01336 switch (mb->partitioning) {
01337 case VP8_SPLITMVMODE_NONE:
01338 vp8_mc_part(s, td, dst, ref, x_off, y_off,
01339 0, 0, 16, 16, width, height, &mb->mv);
01340 break;
01341 case VP8_SPLITMVMODE_4x4: {
01342 int x, y;
01343 VP56mv uvmv;
01344
01345
01346 for (y = 0; y < 4; y++) {
01347 for (x = 0; x < 4; x++) {
01348 vp8_mc_luma(s, td, dst[0] + 4*y*s->linesize + x*4,
01349 ref, &bmv[4*y + x],
01350 4*x + x_off, 4*y + y_off, 4, 4,
01351 width, height, s->linesize,
01352 s->put_pixels_tab[2]);
01353 }
01354 }
01355
01356
01357 x_off >>= 1; y_off >>= 1; width >>= 1; height >>= 1;
01358 for (y = 0; y < 2; y++) {
01359 for (x = 0; x < 2; x++) {
01360 uvmv.x = mb->bmv[ 2*y * 4 + 2*x ].x +
01361 mb->bmv[ 2*y * 4 + 2*x+1].x +
01362 mb->bmv[(2*y+1) * 4 + 2*x ].x +
01363 mb->bmv[(2*y+1) * 4 + 2*x+1].x;
01364 uvmv.y = mb->bmv[ 2*y * 4 + 2*x ].y +
01365 mb->bmv[ 2*y * 4 + 2*x+1].y +
01366 mb->bmv[(2*y+1) * 4 + 2*x ].y +
01367 mb->bmv[(2*y+1) * 4 + 2*x+1].y;
01368 uvmv.x = (uvmv.x + 2 + (uvmv.x >> (INT_BIT-1))) >> 2;
01369 uvmv.y = (uvmv.y + 2 + (uvmv.y >> (INT_BIT-1))) >> 2;
01370 if (s->profile == 3) {
01371 uvmv.x &= ~7;
01372 uvmv.y &= ~7;
01373 }
01374 vp8_mc_chroma(s, td, dst[1] + 4*y*s->uvlinesize + x*4,
01375 dst[2] + 4*y*s->uvlinesize + x*4, ref, &uvmv,
01376 4*x + x_off, 4*y + y_off, 4, 4,
01377 width, height, s->uvlinesize,
01378 s->put_pixels_tab[2]);
01379 }
01380 }
01381 break;
01382 }
01383 case VP8_SPLITMVMODE_16x8:
01384 vp8_mc_part(s, td, dst, ref, x_off, y_off,
01385 0, 0, 16, 8, width, height, &bmv[0]);
01386 vp8_mc_part(s, td, dst, ref, x_off, y_off,
01387 0, 8, 16, 8, width, height, &bmv[1]);
01388 break;
01389 case VP8_SPLITMVMODE_8x16:
01390 vp8_mc_part(s, td, dst, ref, x_off, y_off,
01391 0, 0, 8, 16, width, height, &bmv[0]);
01392 vp8_mc_part(s, td, dst, ref, x_off, y_off,
01393 8, 0, 8, 16, width, height, &bmv[1]);
01394 break;
01395 case VP8_SPLITMVMODE_8x8:
01396 vp8_mc_part(s, td, dst, ref, x_off, y_off,
01397 0, 0, 8, 8, width, height, &bmv[0]);
01398 vp8_mc_part(s, td, dst, ref, x_off, y_off,
01399 8, 0, 8, 8, width, height, &bmv[1]);
01400 vp8_mc_part(s, td, dst, ref, x_off, y_off,
01401 0, 8, 8, 8, width, height, &bmv[2]);
01402 vp8_mc_part(s, td, dst, ref, x_off, y_off,
01403 8, 8, 8, 8, width, height, &bmv[3]);
01404 break;
01405 }
01406 }
01407
01408 static av_always_inline void idct_mb(VP8Context *s, VP8ThreadData *td,
01409 uint8_t *dst[3], VP8Macroblock *mb)
01410 {
01411 int x, y, ch;
01412
01413 if (mb->mode != MODE_I4x4) {
01414 uint8_t *y_dst = dst[0];
01415 for (y = 0; y < 4; y++) {
01416 uint32_t nnz4 = AV_RL32(td->non_zero_count_cache[y]);
01417 if (nnz4) {
01418 if (nnz4&~0x01010101) {
01419 for (x = 0; x < 4; x++) {
01420 if ((uint8_t)nnz4 == 1)
01421 s->vp8dsp.vp8_idct_dc_add(y_dst+4*x, td->block[y][x], s->linesize);
01422 else if((uint8_t)nnz4 > 1)
01423 s->vp8dsp.vp8_idct_add(y_dst+4*x, td->block[y][x], s->linesize);
01424 nnz4 >>= 8;
01425 if (!nnz4)
01426 break;
01427 }
01428 } else {
01429 s->vp8dsp.vp8_idct_dc_add4y(y_dst, td->block[y], s->linesize);
01430 }
01431 }
01432 y_dst += 4*s->linesize;
01433 }
01434 }
01435
01436 for (ch = 0; ch < 2; ch++) {
01437 uint32_t nnz4 = AV_RL32(td->non_zero_count_cache[4+ch]);
01438 if (nnz4) {
01439 uint8_t *ch_dst = dst[1+ch];
01440 if (nnz4&~0x01010101) {
01441 for (y = 0; y < 2; y++) {
01442 for (x = 0; x < 2; x++) {
01443 if ((uint8_t)nnz4 == 1)
01444 s->vp8dsp.vp8_idct_dc_add(ch_dst+4*x, td->block[4+ch][(y<<1)+x], s->uvlinesize);
01445 else if((uint8_t)nnz4 > 1)
01446 s->vp8dsp.vp8_idct_add(ch_dst+4*x, td->block[4+ch][(y<<1)+x], s->uvlinesize);
01447 nnz4 >>= 8;
01448 if (!nnz4)
01449 goto chroma_idct_end;
01450 }
01451 ch_dst += 4*s->uvlinesize;
01452 }
01453 } else {
01454 s->vp8dsp.vp8_idct_dc_add4uv(ch_dst, td->block[4+ch], s->uvlinesize);
01455 }
01456 }
01457 chroma_idct_end: ;
01458 }
01459 }
01460
01461 static av_always_inline void filter_level_for_mb(VP8Context *s, VP8Macroblock *mb, VP8FilterStrength *f )
01462 {
01463 int interior_limit, filter_level;
01464
01465 if (s->segmentation.enabled) {
01466 filter_level = s->segmentation.filter_level[mb->segment];
01467 if (!s->segmentation.absolute_vals)
01468 filter_level += s->filter.level;
01469 } else
01470 filter_level = s->filter.level;
01471
01472 if (s->lf_delta.enabled) {
01473 filter_level += s->lf_delta.ref[mb->ref_frame];
01474 filter_level += s->lf_delta.mode[mb->mode];
01475 }
01476
01477 filter_level = av_clip_uintp2(filter_level, 6);
01478
01479 interior_limit = filter_level;
01480 if (s->filter.sharpness) {
01481 interior_limit >>= (s->filter.sharpness + 3) >> 2;
01482 interior_limit = FFMIN(interior_limit, 9 - s->filter.sharpness);
01483 }
01484 interior_limit = FFMAX(interior_limit, 1);
01485
01486 f->filter_level = filter_level;
01487 f->inner_limit = interior_limit;
01488 f->inner_filter = !mb->skip || mb->mode == MODE_I4x4 || mb->mode == VP8_MVMODE_SPLIT;
01489 }
01490
01491 static av_always_inline void filter_mb(VP8Context *s, uint8_t *dst[3], VP8FilterStrength *f, int mb_x, int mb_y)
01492 {
01493 int mbedge_lim, bedge_lim, hev_thresh;
01494 int filter_level = f->filter_level;
01495 int inner_limit = f->inner_limit;
01496 int inner_filter = f->inner_filter;
01497 int linesize = s->linesize;
01498 int uvlinesize = s->uvlinesize;
01499 static const uint8_t hev_thresh_lut[2][64] = {
01500 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1,
01501 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
01502 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
01503 3, 3, 3, 3 },
01504 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1,
01505 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
01506 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
01507 2, 2, 2, 2 }
01508 };
01509
01510 if (!filter_level)
01511 return;
01512
01513 bedge_lim = 2*filter_level + inner_limit;
01514 mbedge_lim = bedge_lim + 4;
01515
01516 hev_thresh = hev_thresh_lut[s->keyframe][filter_level];
01517
01518 if (mb_x) {
01519 s->vp8dsp.vp8_h_loop_filter16y(dst[0], linesize,
01520 mbedge_lim, inner_limit, hev_thresh);
01521 s->vp8dsp.vp8_h_loop_filter8uv(dst[1], dst[2], uvlinesize,
01522 mbedge_lim, inner_limit, hev_thresh);
01523 }
01524
01525 if (inner_filter) {
01526 s->vp8dsp.vp8_h_loop_filter16y_inner(dst[0]+ 4, linesize, bedge_lim,
01527 inner_limit, hev_thresh);
01528 s->vp8dsp.vp8_h_loop_filter16y_inner(dst[0]+ 8, linesize, bedge_lim,
01529 inner_limit, hev_thresh);
01530 s->vp8dsp.vp8_h_loop_filter16y_inner(dst[0]+12, linesize, bedge_lim,
01531 inner_limit, hev_thresh);
01532 s->vp8dsp.vp8_h_loop_filter8uv_inner(dst[1] + 4, dst[2] + 4,
01533 uvlinesize, bedge_lim,
01534 inner_limit, hev_thresh);
01535 }
01536
01537 if (mb_y) {
01538 s->vp8dsp.vp8_v_loop_filter16y(dst[0], linesize,
01539 mbedge_lim, inner_limit, hev_thresh);
01540 s->vp8dsp.vp8_v_loop_filter8uv(dst[1], dst[2], uvlinesize,
01541 mbedge_lim, inner_limit, hev_thresh);
01542 }
01543
01544 if (inner_filter) {
01545 s->vp8dsp.vp8_v_loop_filter16y_inner(dst[0]+ 4*linesize,
01546 linesize, bedge_lim,
01547 inner_limit, hev_thresh);
01548 s->vp8dsp.vp8_v_loop_filter16y_inner(dst[0]+ 8*linesize,
01549 linesize, bedge_lim,
01550 inner_limit, hev_thresh);
01551 s->vp8dsp.vp8_v_loop_filter16y_inner(dst[0]+12*linesize,
01552 linesize, bedge_lim,
01553 inner_limit, hev_thresh);
01554 s->vp8dsp.vp8_v_loop_filter8uv_inner(dst[1] + 4 * uvlinesize,
01555 dst[2] + 4 * uvlinesize,
01556 uvlinesize, bedge_lim,
01557 inner_limit, hev_thresh);
01558 }
01559 }
01560
01561 static av_always_inline void filter_mb_simple(VP8Context *s, uint8_t *dst, VP8FilterStrength *f, int mb_x, int mb_y)
01562 {
01563 int mbedge_lim, bedge_lim;
01564 int filter_level = f->filter_level;
01565 int inner_limit = f->inner_limit;
01566 int inner_filter = f->inner_filter;
01567 int linesize = s->linesize;
01568
01569 if (!filter_level)
01570 return;
01571
01572 bedge_lim = 2*filter_level + inner_limit;
01573 mbedge_lim = bedge_lim + 4;
01574
01575 if (mb_x)
01576 s->vp8dsp.vp8_h_loop_filter_simple(dst, linesize, mbedge_lim);
01577 if (inner_filter) {
01578 s->vp8dsp.vp8_h_loop_filter_simple(dst+ 4, linesize, bedge_lim);
01579 s->vp8dsp.vp8_h_loop_filter_simple(dst+ 8, linesize, bedge_lim);
01580 s->vp8dsp.vp8_h_loop_filter_simple(dst+12, linesize, bedge_lim);
01581 }
01582
01583 if (mb_y)
01584 s->vp8dsp.vp8_v_loop_filter_simple(dst, linesize, mbedge_lim);
01585 if (inner_filter) {
01586 s->vp8dsp.vp8_v_loop_filter_simple(dst+ 4*linesize, linesize, bedge_lim);
01587 s->vp8dsp.vp8_v_loop_filter_simple(dst+ 8*linesize, linesize, bedge_lim);
01588 s->vp8dsp.vp8_v_loop_filter_simple(dst+12*linesize, linesize, bedge_lim);
01589 }
01590 }
01591
01592 static void release_queued_segmaps(VP8Context *s, int is_close)
01593 {
01594 int leave_behind = is_close ? 0 : !s->maps_are_invalid;
01595 while (s->num_maps_to_be_freed > leave_behind)
01596 av_freep(&s->segmentation_maps[--s->num_maps_to_be_freed]);
01597 s->maps_are_invalid = 0;
01598 }
01599
01600 #define MARGIN (16 << 2)
01601 static void vp8_decode_mv_mb_modes(AVCodecContext *avctx, AVFrame *curframe,
01602 AVFrame *prev_frame)
01603 {
01604 VP8Context *s = avctx->priv_data;
01605 int mb_x, mb_y;
01606
01607 s->mv_min.y = -MARGIN;
01608 s->mv_max.y = ((s->mb_height - 1) << 6) + MARGIN;
01609 for (mb_y = 0; mb_y < s->mb_height; mb_y++) {
01610 VP8Macroblock *mb = s->macroblocks_base + ((s->mb_width+1)*(mb_y + 1) + 1);
01611 int mb_xy = mb_y*s->mb_width;
01612
01613 AV_WN32A(s->intra4x4_pred_mode_left, DC_PRED*0x01010101);
01614
01615 s->mv_min.x = -MARGIN;
01616 s->mv_max.x = ((s->mb_width - 1) << 6) + MARGIN;
01617 for (mb_x = 0; mb_x < s->mb_width; mb_x++, mb_xy++, mb++) {
01618 if (mb_y == 0)
01619 AV_WN32A((mb-s->mb_width-1)->intra4x4_pred_mode_top, DC_PRED*0x01010101);
01620 decode_mb_mode(s, mb, mb_x, mb_y, curframe->ref_index[0] + mb_xy,
01621 prev_frame && prev_frame->ref_index[0] ? prev_frame->ref_index[0] + mb_xy : NULL, 1);
01622 s->mv_min.x -= 64;
01623 s->mv_max.x -= 64;
01624 }
01625 s->mv_min.y -= 64;
01626 s->mv_max.y -= 64;
01627 }
01628 }
01629
01630 #if HAVE_THREADS
01631 #define check_thread_pos(td, otd, mb_x_check, mb_y_check)\
01632 do {\
01633 int tmp = (mb_y_check << 16) | (mb_x_check & 0xFFFF);\
01634 if (otd->thread_mb_pos < tmp) {\
01635 pthread_mutex_lock(&otd->lock);\
01636 td->wait_mb_pos = tmp;\
01637 do {\
01638 if (otd->thread_mb_pos >= tmp)\
01639 break;\
01640 pthread_cond_wait(&otd->cond, &otd->lock);\
01641 } while (1);\
01642 td->wait_mb_pos = INT_MAX;\
01643 pthread_mutex_unlock(&otd->lock);\
01644 }\
01645 } while(0);
01646
01647 #define update_pos(td, mb_y, mb_x)\
01648 do {\
01649 int pos = (mb_y << 16) | (mb_x & 0xFFFF);\
01650 int sliced_threading = (avctx->active_thread_type == FF_THREAD_SLICE) && (num_jobs > 1);\
01651 int is_null = (next_td == NULL) || (prev_td == NULL);\
01652 int pos_check = (is_null) ? 1 :\
01653 (next_td != td && pos >= next_td->wait_mb_pos) ||\
01654 (prev_td != td && pos >= prev_td->wait_mb_pos);\
01655 td->thread_mb_pos = pos;\
01656 if (sliced_threading && pos_check) {\
01657 pthread_mutex_lock(&td->lock);\
01658 pthread_cond_broadcast(&td->cond);\
01659 pthread_mutex_unlock(&td->lock);\
01660 }\
01661 } while(0);
01662 #else
01663 #define check_thread_pos(td, otd, mb_x_check, mb_y_check)
01664 #define update_pos(td, mb_y, mb_x)
01665 #endif
01666
01667 static void vp8_decode_mb_row_no_filter(AVCodecContext *avctx, void *tdata,
01668 int jobnr, int threadnr)
01669 {
01670 VP8Context *s = avctx->priv_data;
01671 VP8ThreadData *prev_td, *next_td, *td = &s->thread_data[threadnr];
01672 int mb_y = td->thread_mb_pos>>16;
01673 int i, y, mb_x, mb_xy = mb_y*s->mb_width;
01674 int num_jobs = s->num_jobs;
01675 AVFrame *curframe = s->curframe, *prev_frame = s->prev_frame;
01676 VP56RangeCoder *c = &s->coeff_partition[mb_y & (s->num_coeff_partitions-1)];
01677 VP8Macroblock *mb;
01678 uint8_t *dst[3] = {
01679 curframe->data[0] + 16*mb_y*s->linesize,
01680 curframe->data[1] + 8*mb_y*s->uvlinesize,
01681 curframe->data[2] + 8*mb_y*s->uvlinesize
01682 };
01683 if (mb_y == 0) prev_td = td;
01684 else prev_td = &s->thread_data[(jobnr + num_jobs - 1)%num_jobs];
01685 if (mb_y == s->mb_height-1) next_td = td;
01686 else next_td = &s->thread_data[(jobnr + 1)%num_jobs];
01687 if (s->mb_layout == 1)
01688 mb = s->macroblocks_base + ((s->mb_width+1)*(mb_y + 1) + 1);
01689 else {
01690 mb = s->macroblocks + (s->mb_height - mb_y - 1)*2;
01691 memset(mb - 1, 0, sizeof(*mb));
01692 AV_WN32A(s->intra4x4_pred_mode_left, DC_PRED*0x01010101);
01693 }
01694
01695 memset(td->left_nnz, 0, sizeof(td->left_nnz));
01696
01697 if (!(avctx->flags & CODEC_FLAG_EMU_EDGE)) {
01698 for (i = 0; i < 3; i++)
01699 for (y = 0; y < 16>>!!i; y++)
01700 dst[i][y*curframe->linesize[i]-1] = 129;
01701 if (mb_y == 1) {
01702 s->top_border[0][15] = s->top_border[0][23] = s->top_border[0][31] = 129;
01703 }
01704 }
01705
01706 s->mv_min.x = -MARGIN;
01707 s->mv_max.x = ((s->mb_width - 1) << 6) + MARGIN;
01708
01709 for (mb_x = 0; mb_x < s->mb_width; mb_x++, mb_xy++, mb++) {
01710
01711 if (prev_td != td) {
01712 if (threadnr != 0) {
01713 check_thread_pos(td, prev_td, mb_x+1, mb_y-1);
01714 } else {
01715 check_thread_pos(td, prev_td, (s->mb_width+3) + (mb_x+1), mb_y-1);
01716 }
01717 }
01718
01719 s->dsp.prefetch(dst[0] + (mb_x&3)*4*s->linesize + 64, s->linesize, 4);
01720 s->dsp.prefetch(dst[1] + (mb_x&7)*s->uvlinesize + 64, dst[2] - dst[1], 2);
01721
01722 if (!s->mb_layout)
01723 decode_mb_mode(s, mb, mb_x, mb_y, curframe->ref_index[0] + mb_xy,
01724 prev_frame && prev_frame->ref_index[0] ? prev_frame->ref_index[0] + mb_xy : NULL, 0);
01725
01726 prefetch_motion(s, mb, mb_x, mb_y, mb_xy, VP56_FRAME_PREVIOUS);
01727
01728 if (!mb->skip)
01729 decode_mb_coeffs(s, td, c, mb, s->top_nnz[mb_x], td->left_nnz);
01730
01731 if (mb->mode <= MODE_I4x4)
01732 intra_predict(s, td, dst, mb, mb_x, mb_y);
01733 else
01734 inter_predict(s, td, dst, mb, mb_x, mb_y);
01735
01736 prefetch_motion(s, mb, mb_x, mb_y, mb_xy, VP56_FRAME_GOLDEN);
01737
01738 if (!mb->skip) {
01739 idct_mb(s, td, dst, mb);
01740 } else {
01741 AV_ZERO64(td->left_nnz);
01742 AV_WN64(s->top_nnz[mb_x], 0);
01743
01744
01745 if (mb->mode != MODE_I4x4 && mb->mode != VP8_MVMODE_SPLIT) {
01746 td->left_nnz[8] = 0;
01747 s->top_nnz[mb_x][8] = 0;
01748 }
01749 }
01750
01751 if (s->deblock_filter)
01752 filter_level_for_mb(s, mb, &td->filter_strength[mb_x]);
01753
01754 if (s->deblock_filter && num_jobs != 1 && threadnr == num_jobs-1) {
01755 if (s->filter.simple)
01756 backup_mb_border(s->top_border[mb_x+1], dst[0], NULL, NULL, s->linesize, 0, 1);
01757 else
01758 backup_mb_border(s->top_border[mb_x+1], dst[0], dst[1], dst[2], s->linesize, s->uvlinesize, 0);
01759 }
01760
01761 prefetch_motion(s, mb, mb_x, mb_y, mb_xy, VP56_FRAME_GOLDEN2);
01762
01763 dst[0] += 16;
01764 dst[1] += 8;
01765 dst[2] += 8;
01766 s->mv_min.x -= 64;
01767 s->mv_max.x -= 64;
01768
01769 if (mb_x == s->mb_width+1) {
01770 update_pos(td, mb_y, s->mb_width+3);
01771 } else {
01772 update_pos(td, mb_y, mb_x);
01773 }
01774 }
01775 }
01776
01777 static void vp8_filter_mb_row(AVCodecContext *avctx, void *tdata,
01778 int jobnr, int threadnr)
01779 {
01780 VP8Context *s = avctx->priv_data;
01781 VP8ThreadData *td = &s->thread_data[threadnr];
01782 int mb_x, mb_y = td->thread_mb_pos>>16, num_jobs = s->num_jobs;
01783 AVFrame *curframe = s->curframe;
01784 VP8Macroblock *mb;
01785 VP8ThreadData *prev_td, *next_td;
01786 uint8_t *dst[3] = {
01787 curframe->data[0] + 16*mb_y*s->linesize,
01788 curframe->data[1] + 8*mb_y*s->uvlinesize,
01789 curframe->data[2] + 8*mb_y*s->uvlinesize
01790 };
01791
01792 if (s->mb_layout == 1)
01793 mb = s->macroblocks_base + ((s->mb_width+1)*(mb_y + 1) + 1);
01794 else
01795 mb = s->macroblocks + (s->mb_height - mb_y - 1)*2;
01796
01797 if (mb_y == 0) prev_td = td;
01798 else prev_td = &s->thread_data[(jobnr + num_jobs - 1)%num_jobs];
01799 if (mb_y == s->mb_height-1) next_td = td;
01800 else next_td = &s->thread_data[(jobnr + 1)%num_jobs];
01801
01802 for (mb_x = 0; mb_x < s->mb_width; mb_x++, mb++) {
01803 VP8FilterStrength *f = &td->filter_strength[mb_x];
01804 if (prev_td != td) {
01805 check_thread_pos(td, prev_td, (mb_x+1) + (s->mb_width+3), mb_y-1);
01806 }
01807 if (next_td != td)
01808 if (next_td != &s->thread_data[0]) {
01809 check_thread_pos(td, next_td, mb_x+1, mb_y+1);
01810 }
01811
01812 if (num_jobs == 1) {
01813 if (s->filter.simple)
01814 backup_mb_border(s->top_border[mb_x+1], dst[0], NULL, NULL, s->linesize, 0, 1);
01815 else
01816 backup_mb_border(s->top_border[mb_x+1], dst[0], dst[1], dst[2], s->linesize, s->uvlinesize, 0);
01817 }
01818
01819 if (s->filter.simple)
01820 filter_mb_simple(s, dst[0], f, mb_x, mb_y);
01821 else
01822 filter_mb(s, dst, f, mb_x, mb_y);
01823 dst[0] += 16;
01824 dst[1] += 8;
01825 dst[2] += 8;
01826
01827 update_pos(td, mb_y, (s->mb_width+3) + mb_x);
01828 }
01829 }
01830
01831 static int vp8_decode_mb_row_sliced(AVCodecContext *avctx, void *tdata,
01832 int jobnr, int threadnr)
01833 {
01834 VP8Context *s = avctx->priv_data;
01835 VP8ThreadData *td = &s->thread_data[jobnr];
01836 VP8ThreadData *next_td = NULL, *prev_td = NULL;
01837 AVFrame *curframe = s->curframe;
01838 int mb_y, num_jobs = s->num_jobs;
01839 td->thread_nr = threadnr;
01840 for (mb_y = jobnr; mb_y < s->mb_height; mb_y += num_jobs) {
01841 if (mb_y >= s->mb_height) break;
01842 td->thread_mb_pos = mb_y<<16;
01843 vp8_decode_mb_row_no_filter(avctx, tdata, jobnr, threadnr);
01844 if (s->deblock_filter)
01845 vp8_filter_mb_row(avctx, tdata, jobnr, threadnr);
01846 update_pos(td, mb_y, INT_MAX & 0xFFFF);
01847
01848 s->mv_min.y -= 64;
01849 s->mv_max.y -= 64;
01850
01851 if (avctx->active_thread_type == FF_THREAD_FRAME)
01852 ff_thread_report_progress(curframe, mb_y, 0);
01853 }
01854
01855 return 0;
01856 }
01857
01858 static int vp8_decode_frame(AVCodecContext *avctx, void *data, int *data_size,
01859 AVPacket *avpkt)
01860 {
01861 VP8Context *s = avctx->priv_data;
01862 int ret, i, referenced, num_jobs;
01863 enum AVDiscard skip_thresh;
01864 AVFrame *av_uninit(curframe), *prev_frame;
01865
01866 release_queued_segmaps(s, 0);
01867
01868 if ((ret = decode_frame_header(s, avpkt->data, avpkt->size)) < 0)
01869 goto err;
01870
01871 prev_frame = s->framep[VP56_FRAME_CURRENT];
01872
01873 referenced = s->update_last || s->update_golden == VP56_FRAME_CURRENT
01874 || s->update_altref == VP56_FRAME_CURRENT;
01875
01876 skip_thresh = !referenced ? AVDISCARD_NONREF :
01877 !s->keyframe ? AVDISCARD_NONKEY : AVDISCARD_ALL;
01878
01879 if (avctx->skip_frame >= skip_thresh) {
01880 s->invisible = 1;
01881 memcpy(&s->next_framep[0], &s->framep[0], sizeof(s->framep[0]) * 4);
01882 goto skip_decode;
01883 }
01884 s->deblock_filter = s->filter.level && avctx->skip_loop_filter < skip_thresh;
01885
01886
01887 for (i = 0; i < 5; i++)
01888 if (s->frames[i].data[0] &&
01889 &s->frames[i] != prev_frame &&
01890 &s->frames[i] != s->framep[VP56_FRAME_PREVIOUS] &&
01891 &s->frames[i] != s->framep[VP56_FRAME_GOLDEN] &&
01892 &s->frames[i] != s->framep[VP56_FRAME_GOLDEN2])
01893 vp8_release_frame(s, &s->frames[i], 1, 0);
01894
01895
01896 for (i = 0; i < 5; i++)
01897 if (&s->frames[i] != prev_frame &&
01898 &s->frames[i] != s->framep[VP56_FRAME_PREVIOUS] &&
01899 &s->frames[i] != s->framep[VP56_FRAME_GOLDEN] &&
01900 &s->frames[i] != s->framep[VP56_FRAME_GOLDEN2]) {
01901 curframe = s->framep[VP56_FRAME_CURRENT] = &s->frames[i];
01902 break;
01903 }
01904 if (i == 5) {
01905 av_log(avctx, AV_LOG_FATAL, "Ran out of free frames!\n");
01906 abort();
01907 }
01908 if (curframe->data[0])
01909 vp8_release_frame(s, curframe, 1, 0);
01910
01911
01912
01913
01914 if (!s->keyframe && (!s->framep[VP56_FRAME_PREVIOUS] ||
01915 !s->framep[VP56_FRAME_GOLDEN] ||
01916 !s->framep[VP56_FRAME_GOLDEN2])) {
01917 av_log(avctx, AV_LOG_WARNING, "Discarding interframe without a prior keyframe!\n");
01918 ret = AVERROR_INVALIDDATA;
01919 goto err;
01920 }
01921
01922 curframe->key_frame = s->keyframe;
01923 curframe->pict_type = s->keyframe ? AV_PICTURE_TYPE_I : AV_PICTURE_TYPE_P;
01924 curframe->reference = referenced ? 3 : 0;
01925 if ((ret = vp8_alloc_frame(s, curframe))) {
01926 av_log(avctx, AV_LOG_ERROR, "get_buffer() failed!\n");
01927 goto err;
01928 }
01929
01930
01931 if (s->update_altref != VP56_FRAME_NONE) {
01932 s->next_framep[VP56_FRAME_GOLDEN2] = s->framep[s->update_altref];
01933 } else {
01934 s->next_framep[VP56_FRAME_GOLDEN2] = s->framep[VP56_FRAME_GOLDEN2];
01935 }
01936 if (s->update_golden != VP56_FRAME_NONE) {
01937 s->next_framep[VP56_FRAME_GOLDEN] = s->framep[s->update_golden];
01938 } else {
01939 s->next_framep[VP56_FRAME_GOLDEN] = s->framep[VP56_FRAME_GOLDEN];
01940 }
01941 if (s->update_last) {
01942 s->next_framep[VP56_FRAME_PREVIOUS] = curframe;
01943 } else {
01944 s->next_framep[VP56_FRAME_PREVIOUS] = s->framep[VP56_FRAME_PREVIOUS];
01945 }
01946 s->next_framep[VP56_FRAME_CURRENT] = curframe;
01947
01948 ff_thread_finish_setup(avctx);
01949
01950 s->linesize = curframe->linesize[0];
01951 s->uvlinesize = curframe->linesize[1];
01952
01953 if (!s->thread_data[0].edge_emu_buffer)
01954 for (i = 0; i < MAX_THREADS; i++)
01955 s->thread_data[i].edge_emu_buffer = av_malloc(21*s->linesize);
01956
01957 memset(s->top_nnz, 0, s->mb_width*sizeof(*s->top_nnz));
01958
01959 if (!s->mb_layout)
01960 memset(s->macroblocks + s->mb_height*2 - 1, 0, (s->mb_width+1)*sizeof(*s->macroblocks));
01961 if (!s->mb_layout && s->keyframe)
01962 memset(s->intra4x4_pred_mode_top, DC_PRED, s->mb_width*4);
01963
01964
01965 if (!(avctx->flags & CODEC_FLAG_EMU_EDGE)) {
01966 s->top_border[0][15] = s->top_border[0][23] = 127;
01967 memset(s->top_border[1]-1, 127, s->mb_width*sizeof(*s->top_border)+1);
01968 }
01969 memset(s->ref_count, 0, sizeof(s->ref_count));
01970
01971
01972
01973
01974 if (prev_frame && s->segmentation.enabled && !s->segmentation.update_map)
01975 ff_thread_await_progress(prev_frame, 1, 0);
01976
01977 if (s->mb_layout == 1)
01978 vp8_decode_mv_mb_modes(avctx, curframe, prev_frame);
01979
01980 if (avctx->active_thread_type == FF_THREAD_FRAME)
01981 num_jobs = 1;
01982 else
01983 num_jobs = FFMIN(s->num_coeff_partitions, avctx->thread_count);
01984 s->num_jobs = num_jobs;
01985 s->curframe = curframe;
01986 s->prev_frame = prev_frame;
01987 s->mv_min.y = -MARGIN;
01988 s->mv_max.y = ((s->mb_height - 1) << 6) + MARGIN;
01989 for (i = 0; i < MAX_THREADS; i++) {
01990 s->thread_data[i].thread_mb_pos = 0;
01991 s->thread_data[i].wait_mb_pos = INT_MAX;
01992 }
01993 avctx->execute2(avctx, vp8_decode_mb_row_sliced, s->thread_data, NULL, num_jobs);
01994
01995 ff_thread_report_progress(curframe, INT_MAX, 0);
01996 memcpy(&s->framep[0], &s->next_framep[0], sizeof(s->framep[0]) * 4);
01997
01998 skip_decode:
01999
02000
02001 if (!s->update_probabilities)
02002 s->prob[0] = s->prob[1];
02003
02004 if (!s->invisible) {
02005 *(AVFrame*)data = *curframe;
02006 *data_size = sizeof(AVFrame);
02007 }
02008
02009 return avpkt->size;
02010 err:
02011 memcpy(&s->next_framep[0], &s->framep[0], sizeof(s->framep[0]) * 4);
02012 return ret;
02013 }
02014
02015 static av_cold int vp8_decode_init(AVCodecContext *avctx)
02016 {
02017 VP8Context *s = avctx->priv_data;
02018
02019 s->avctx = avctx;
02020 avctx->pix_fmt = PIX_FMT_YUV420P;
02021
02022 ff_dsputil_init(&s->dsp, avctx);
02023 ff_h264_pred_init(&s->hpc, AV_CODEC_ID_VP8, 8, 1);
02024 ff_vp8dsp_init(&s->vp8dsp);
02025
02026 return 0;
02027 }
02028
02029 static av_cold int vp8_decode_free(AVCodecContext *avctx)
02030 {
02031 vp8_decode_flush_impl(avctx, 0, 1, 1);
02032 release_queued_segmaps(avctx->priv_data, 1);
02033 return 0;
02034 }
02035
02036 static av_cold int vp8_decode_init_thread_copy(AVCodecContext *avctx)
02037 {
02038 VP8Context *s = avctx->priv_data;
02039
02040 s->avctx = avctx;
02041
02042 return 0;
02043 }
02044
02045 #define REBASE(pic) \
02046 pic ? pic - &s_src->frames[0] + &s->frames[0] : NULL
02047
02048 static int vp8_decode_update_thread_context(AVCodecContext *dst, const AVCodecContext *src)
02049 {
02050 VP8Context *s = dst->priv_data, *s_src = src->priv_data;
02051
02052 if (s->macroblocks_base &&
02053 (s_src->mb_width != s->mb_width || s_src->mb_height != s->mb_height)) {
02054 free_buffers(s);
02055 s->maps_are_invalid = 1;
02056 s->mb_width = s_src->mb_width;
02057 s->mb_height = s_src->mb_height;
02058 }
02059
02060 s->prob[0] = s_src->prob[!s_src->update_probabilities];
02061 s->segmentation = s_src->segmentation;
02062 s->lf_delta = s_src->lf_delta;
02063 memcpy(s->sign_bias, s_src->sign_bias, sizeof(s->sign_bias));
02064
02065 memcpy(&s->frames, &s_src->frames, sizeof(s->frames));
02066 s->framep[0] = REBASE(s_src->next_framep[0]);
02067 s->framep[1] = REBASE(s_src->next_framep[1]);
02068 s->framep[2] = REBASE(s_src->next_framep[2]);
02069 s->framep[3] = REBASE(s_src->next_framep[3]);
02070
02071 return 0;
02072 }
02073
02074 AVCodec ff_vp8_decoder = {
02075 .name = "vp8",
02076 .type = AVMEDIA_TYPE_VIDEO,
02077 .id = AV_CODEC_ID_VP8,
02078 .priv_data_size = sizeof(VP8Context),
02079 .init = vp8_decode_init,
02080 .close = vp8_decode_free,
02081 .decode = vp8_decode_frame,
02082 .capabilities = CODEC_CAP_DR1 | CODEC_CAP_FRAME_THREADS | CODEC_CAP_SLICE_THREADS,
02083 .flush = vp8_decode_flush,
02084 .long_name = NULL_IF_CONFIG_SMALL("On2 VP8"),
02085 .init_thread_copy = ONLY_IF_THREADS_ENABLED(vp8_decode_init_thread_copy),
02086 .update_thread_context = ONLY_IF_THREADS_ENABLED(vp8_decode_update_thread_context),
02087 };