24 #include "config_components.h"
49 #define VP9_SYNCCODE 0x498342
102 f->segmentation_map =
NULL;
114 sz = 64 *
s->sb_cols *
s->sb_rows;
115 if (sz !=
s->frame_extradata_pool_size) {
119 if (!
s->frame_extradata_pool) {
120 s->frame_extradata_pool_size = 0;
124 s->frame_extradata_pool_size = sz;
132 f->segmentation_map =
f->extradata;
157 src->hwaccel_picture_private);
162 #define HWACCEL_MAX (CONFIG_VP9_DXVA2_HWACCEL + \
163 CONFIG_VP9_D3D11VA_HWACCEL * 2 + \
164 CONFIG_VP9_D3D12VA_HWACCEL + \
165 CONFIG_VP9_NVDEC_HWACCEL + \
166 CONFIG_VP9_VAAPI_HWACCEL + \
167 CONFIG_VP9_VDPAU_HWACCEL + \
168 CONFIG_VP9_VIDEOTOOLBOX_HWACCEL)
172 int bytesperpixel =
s->bytesperpixel,
ret, cols, rows;
177 if (!(
s->pix_fmt ==
s->gf_fmt &&
w ==
s->w &&
h ==
s->h)) {
181 switch (
s->pix_fmt) {
184 #if CONFIG_VP9_DXVA2_HWACCEL
187 #if CONFIG_VP9_D3D11VA_HWACCEL
191 #if CONFIG_VP9_D3D12VA_HWACCEL
194 #if CONFIG_VP9_NVDEC_HWACCEL
197 #if CONFIG_VP9_VAAPI_HWACCEL
200 #if CONFIG_VP9_VDPAU_HWACCEL
203 #if CONFIG_VP9_VIDEOTOOLBOX_HWACCEL
208 #if CONFIG_VP9_NVDEC_HWACCEL
211 #if CONFIG_VP9_VAAPI_HWACCEL
214 #if CONFIG_VP9_VDPAU_HWACCEL
221 #if CONFIG_VP9_VAAPI_HWACCEL
228 #if CONFIG_VP9_VAAPI_HWACCEL
234 *fmtp++ =
s->pix_fmt;
242 s->gf_fmt =
s->pix_fmt;
250 if (
s->intra_pred_data[0] && cols ==
s->cols && rows ==
s->rows &&
s->pix_fmt ==
s->last_fmt)
253 s->last_fmt =
s->pix_fmt;
254 s->sb_cols = (
w + 63) >> 6;
255 s->sb_rows = (
h + 63) >> 6;
256 s->cols = (
w + 7) >> 3;
257 s->rows = (
h + 7) >> 3;
260 #define assign(var, type, n) var = (type) p; p += s->sb_cols * (n) * sizeof(*var)
264 p =
av_malloc(
s->sb_cols * (128 + 192 * bytesperpixel +
265 lflvl_len *
sizeof(*
s->lflvl) + 16 *
sizeof(*
s->above_mv_ctx)));
268 assign(
s->intra_pred_data[0], uint8_t *, 64 * bytesperpixel);
269 assign(
s->intra_pred_data[1], uint8_t *, 64 * bytesperpixel);
270 assign(
s->intra_pred_data[2], uint8_t *, 64 * bytesperpixel);
271 assign(
s->above_y_nnz_ctx, uint8_t *, 16);
272 assign(
s->above_mode_ctx, uint8_t *, 16);
274 assign(
s->above_uv_nnz_ctx[0], uint8_t *, 16);
275 assign(
s->above_uv_nnz_ctx[1], uint8_t *, 16);
276 assign(
s->above_partition_ctx, uint8_t *, 8);
277 assign(
s->above_skip_ctx, uint8_t *, 8);
278 assign(
s->above_txfm_ctx, uint8_t *, 8);
279 assign(
s->above_segpred_ctx, uint8_t *, 8);
280 assign(
s->above_intra_ctx, uint8_t *, 8);
281 assign(
s->above_comp_ctx, uint8_t *, 8);
282 assign(
s->above_ref_ctx, uint8_t *, 8);
283 assign(
s->above_filter_ctx, uint8_t *, 8);
288 for (
i = 0;
i <
s->active_tile_cols;
i++)
292 if (
s->s.h.bpp !=
s->last_bpp) {
295 s->last_bpp =
s->s.h.bpp;
305 int chroma_blocks, chroma_eobs, bytesperpixel =
s->bytesperpixel;
308 if (
td->b_base &&
td->block_base &&
s->block_alloc_using_2pass ==
s->s.frames[
CUR_FRAME].uses_2pass)
312 chroma_blocks = 64 * 64 >> (
s->ss_h +
s->ss_v);
313 chroma_eobs = 16 * 16 >> (
s->ss_h +
s->ss_v);
315 int sbs =
s->sb_cols *
s->sb_rows;
318 td->block_base =
av_mallocz(((64 * 64 + 2 * chroma_blocks) * bytesperpixel *
sizeof(int16_t) +
319 16 * 16 + 2 * chroma_eobs) * sbs);
320 if (!
td->b_base || !
td->block_base)
322 td->uvblock_base[0] =
td->block_base + sbs * 64 * 64 * bytesperpixel;
323 td->uvblock_base[1] =
td->uvblock_base[0] + sbs * chroma_blocks * bytesperpixel;
324 td->eob_base = (uint8_t *) (
td->uvblock_base[1] + sbs * chroma_blocks * bytesperpixel);
325 td->uveob_base[0] =
td->eob_base + 16 * 16 * sbs;
326 td->uveob_base[1] =
td->uveob_base[0] + chroma_eobs * sbs;
330 if (!
td->block_structure)
334 for (
i = 1;
i <
s->active_tile_cols;
i++)
337 for (
i = 0;
i <
s->active_tile_cols;
i++) {
339 s->td[
i].block_base =
av_mallocz((64 * 64 + 2 * chroma_blocks) * bytesperpixel *
sizeof(int16_t) +
340 16 * 16 + 2 * chroma_eobs);
341 if (!
s->td[
i].b_base || !
s->td[
i].block_base)
343 s->td[
i].uvblock_base[0] =
s->td[
i].block_base + 64 * 64 * bytesperpixel;
344 s->td[
i].uvblock_base[1] =
s->td[
i].uvblock_base[0] + chroma_blocks * bytesperpixel;
345 s->td[
i].eob_base = (uint8_t *) (
s->td[
i].uvblock_base[1] + chroma_blocks * bytesperpixel);
346 s->td[
i].uveob_base[0] =
s->td[
i].eob_base + 16 * 16;
347 s->td[
i].uveob_base[1] =
s->td[
i].uveob_base[0] + chroma_eobs;
351 if (!
s->td[
i].block_structure)
356 s->block_alloc_using_2pass =
s->s.frames[
CUR_FRAME].uses_2pass;
373 return m - ((v + 1) >> 1);
380 static const uint8_t inv_map_table[255] = {
381 7, 20, 33, 46, 59, 72, 85, 98, 111, 124, 137, 150, 163, 176,
382 189, 202, 215, 228, 241, 254, 1, 2, 3, 4, 5, 6, 8, 9,
383 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 21, 22, 23, 24,
384 25, 26, 27, 28, 29, 30, 31, 32, 34, 35, 36, 37, 38, 39,
385 40, 41, 42, 43, 44, 45, 47, 48, 49, 50, 51, 52, 53, 54,
386 55, 56, 57, 58, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69,
387 70, 71, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84,
388 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 99, 100,
389 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 112, 113, 114, 115,
390 116, 117, 118, 119, 120, 121, 122, 123, 125, 126, 127, 128, 129, 130,
391 131, 132, 133, 134, 135, 136, 138, 139, 140, 141, 142, 143, 144, 145,
392 146, 147, 148, 149, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160,
393 161, 162, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175,
394 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 190, 191,
395 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 203, 204, 205, 206,
396 207, 208, 209, 210, 211, 212, 213, 214, 216, 217, 218, 219, 220, 221,
397 222, 223, 224, 225, 226, 227, 229, 230, 231, 232, 233, 234, 235, 236,
398 237, 238, 239, 240, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251,
446 s->s.h.bpp = 8 +
bits * 2;
447 s->bytesperpixel = (7 +
s->s.h.bpp) >> 3;
453 s->ss_h =
s->ss_v = 0;
467 static const enum AVPixelFormat pix_fmt_for_ss[3][2 ][2 ] = {
479 s->pix_fmt = pix_fmt_for_ss[
bits][
s->ss_v][
s->ss_h];
490 s->ss_h =
s->ss_v = 1;
491 s->pix_fmt = pix_fmt_for_ss[
bits][1][1];
502 int c,
i, j, k, l, m, n,
w,
h,
max, size2,
ret, sharp;
504 const uint8_t *data2;
528 s->last_keyframe =
s->s.h.keyframe;
531 last_invisible =
s->s.h.invisible;
534 s->s.h.use_last_frame_mvs = !
s->s.h.errorres && !last_invisible;
536 if (
s->s.h.keyframe) {
544 s->s.h.refreshrefmask = 0xff;
550 s->s.h.intraonly =
s->s.h.invisible ?
get_bits1(&
s->gb) : 0;
551 s->s.h.resetctx =
s->s.h.errorres ? 0 :
get_bits(&
s->gb, 2);
552 if (
s->s.h.intraonly) {
561 s->ss_h =
s->ss_v = 1;
564 s->bytesperpixel = 1;
577 s->s.h.signbias[0] =
get_bits1(&
s->gb) && !
s->s.h.errorres;
579 s->s.h.signbias[1] =
get_bits1(&
s->gb) && !
s->s.h.errorres;
581 s->s.h.signbias[2] =
get_bits1(&
s->gb) && !
s->s.h.errorres;
582 if (!
s->s.refs[
s->s.h.refidx[0]].f ||
583 !
s->s.refs[
s->s.h.refidx[1]].f ||
584 !
s->s.refs[
s->s.h.refidx[2]].f) {
589 w =
s->s.refs[
s->s.h.refidx[0]].f->width;
590 h =
s->s.refs[
s->s.h.refidx[0]].f->height;
592 w =
s->s.refs[
s->s.h.refidx[1]].f->width;
593 h =
s->s.refs[
s->s.h.refidx[1]].f->height;
595 w =
s->s.refs[
s->s.h.refidx[2]].f->width;
596 h =
s->s.refs[
s->s.h.refidx[2]].f->height;
604 s->s.h.use_last_frame_mvs &=
s->s.frames[
CUR_FRAME].tf.f &&
612 s->s.h.allowcompinter =
s->s.h.signbias[0] !=
s->s.h.signbias[1] ||
613 s->s.h.signbias[0] !=
s->s.h.signbias[2];
614 if (
s->s.h.allowcompinter) {
615 if (
s->s.h.signbias[0] ==
s->s.h.signbias[1]) {
616 s->s.h.fixcompref = 2;
617 s->s.h.varcompref[0] = 0;
618 s->s.h.varcompref[1] = 1;
619 }
else if (
s->s.h.signbias[0] ==
s->s.h.signbias[2]) {
620 s->s.h.fixcompref = 1;
621 s->s.h.varcompref[0] = 0;
622 s->s.h.varcompref[1] = 2;
624 s->s.h.fixcompref = 0;
625 s->s.h.varcompref[0] = 1;
626 s->s.h.varcompref[1] = 2;
631 s->s.h.refreshctx =
s->s.h.errorres ? 0 :
get_bits1(&
s->gb);
632 s->s.h.parallelmode =
s->s.h.errorres ? 1 :
get_bits1(&
s->gb);
634 if (
s->s.h.keyframe ||
s->s.h.intraonly)
635 s->s.h.framectxid = 0;
638 if (
s->s.h.keyframe ||
s->s.h.errorres ||
s->s.h.intraonly) {
640 s->s.h.lf_delta.ref[0] = 1;
641 s->s.h.lf_delta.ref[1] = 0;
642 s->s.h.lf_delta.ref[2] = -1;
643 s->s.h.lf_delta.ref[3] = -1;
644 s->s.h.lf_delta.mode[0] = 0;
645 s->s.h.lf_delta.mode[1] = 0;
646 memset(
s->s.h.segmentation.feat, 0,
sizeof(
s->s.h.segmentation.feat));
652 if (
s->s.h.filter.sharpness != sharp) {
653 for (
i = 1;
i <= 63;
i++) {
657 limit >>= (sharp + 3) >> 2;
662 s->filter_lut.lim_lut[
i] =
limit;
663 s->filter_lut.mblim_lut[
i] = 2 * (
i + 2) +
limit;
666 s->s.h.filter.sharpness = sharp;
667 if ((
s->s.h.lf_delta.enabled =
get_bits1(&
s->gb))) {
668 if ((
s->s.h.lf_delta.updated =
get_bits1(&
s->gb))) {
669 for (
i = 0;
i < 4;
i++)
672 for (
i = 0;
i < 2;
i++)
683 s->s.h.lossless =
s->s.h.yac_qi == 0 &&
s->s.h.ydc_qdelta == 0 &&
684 s->s.h.uvdc_qdelta == 0 &&
s->s.h.uvac_qdelta == 0;
689 if ((
s->s.h.segmentation.enabled =
get_bits1(&
s->gb))) {
690 if ((
s->s.h.segmentation.update_map =
get_bits1(&
s->gb))) {
691 for (
i = 0;
i < 7;
i++)
694 if ((
s->s.h.segmentation.temporal =
get_bits1(&
s->gb)))
695 for (
i = 0;
i < 3;
i++)
701 s->s.h.segmentation.absolute_vals =
get_bits1(&
s->gb);
702 for (
i = 0;
i < 8;
i++) {
703 if ((
s->s.h.segmentation.feat[
i].q_enabled =
get_bits1(&
s->gb)))
705 if ((
s->s.h.segmentation.feat[
i].lf_enabled =
get_bits1(&
s->gb)))
707 if ((
s->s.h.segmentation.feat[
i].ref_enabled =
get_bits1(&
s->gb)))
708 s->s.h.segmentation.feat[
i].ref_val =
get_bits(&
s->gb, 2);
709 s->s.h.segmentation.feat[
i].skip_enabled =
get_bits1(&
s->gb);
715 for (
i = 0;
i < (
s->s.h.segmentation.enabled ? 8 : 1);
i++) {
716 int qyac, qydc, quvac, quvdc, lflvl, sh;
718 if (
s->s.h.segmentation.enabled &&
s->s.h.segmentation.feat[
i].q_enabled) {
719 if (
s->s.h.segmentation.absolute_vals)
724 qyac =
s->s.h.yac_qi;
736 sh =
s->s.h.filter.level >= 32;
737 if (
s->s.h.segmentation.enabled &&
s->s.h.segmentation.feat[
i].lf_enabled) {
738 if (
s->s.h.segmentation.absolute_vals)
741 lflvl =
av_clip_uintp2(
s->s.h.filter.level +
s->s.h.segmentation.feat[
i].lf_val, 6);
743 lflvl =
s->s.h.filter.level;
745 if (
s->s.h.lf_delta.enabled) {
746 s->s.h.segmentation.feat[
i].lflvl[0][0] =
747 s->s.h.segmentation.feat[
i].lflvl[0][1] =
749 for (j = 1; j < 4; j++) {
750 s->s.h.segmentation.feat[
i].lflvl[j][0] =
752 s->s.h.lf_delta.mode[0]) * (1 << sh)), 6);
753 s->s.h.segmentation.feat[
i].lflvl[j][1] =
755 s->s.h.lf_delta.mode[1]) * (1 << sh)), 6);
758 memset(
s->s.h.segmentation.feat[
i].lflvl, lflvl,
759 sizeof(
s->s.h.segmentation.feat[
i].lflvl));
769 for (
s->s.h.tiling.log2_tile_cols = 0;
770 s->sb_cols > (64 <<
s->s.h.tiling.log2_tile_cols);
771 s->s.h.tiling.log2_tile_cols++) ;
772 for (
max = 0; (
s->sb_cols >>
max) >= 4;
max++) ;
774 while (
max >
s->s.h.tiling.log2_tile_cols) {
776 s->s.h.tiling.log2_tile_cols++;
781 s->s.h.tiling.tile_rows = 1 <<
s->s.h.tiling.log2_tile_rows;
782 if (
s->s.h.tiling.tile_cols != (1 <<
s->s.h.tiling.log2_tile_cols)) {
787 for (
i = 0;
i <
s->active_tile_cols;
i++)
792 s->s.h.tiling.tile_cols = 1 <<
s->s.h.tiling.log2_tile_cols;
794 s->s.h.tiling.tile_cols : 1;
799 n_range_coders =
s->s.h.tiling.tile_cols;
806 for (
i = 0;
i <
s->active_tile_cols;
i++) {
809 rc += n_range_coders;
814 if (!
s->s.h.keyframe && !
s->s.h.intraonly) {
815 int valid_ref_frame = 0;
816 for (
i = 0;
i < 3;
i++) {
818 int refw =
ref->width, refh =
ref->height;
822 "Ref pixfmt (%s) did not match current frame (%s)",
826 }
else if (refw ==
w && refh ==
h) {
827 s->mvscale[
i][0] =
s->mvscale[
i][1] = 0;
831 if (
w * 2 < refw ||
h * 2 < refh ||
w > 16 * refw ||
h > 16 * refh) {
833 "Invalid ref frame dimensions %dx%d for frame size %dx%d\n",
838 s->mvscale[
i][0] = (refw << 14) /
w;
839 s->mvscale[
i][1] = (refh << 14) /
h;
840 s->mvstep[
i][0] = 16 *
s->mvscale[
i][0] >> 14;
841 s->mvstep[
i][1] = 16 *
s->mvscale[
i][1] >> 14;
845 if (!valid_ref_frame) {
846 av_log(avctx,
AV_LOG_ERROR,
"No valid reference frame is found, bitstream not supported\n");
851 if (
s->s.h.keyframe ||
s->s.h.errorres || (
s->s.h.intraonly &&
s->s.h.resetctx == 3)) {
852 s->prob_ctx[0].p =
s->prob_ctx[1].p =
s->prob_ctx[2].p =
862 }
else if (
s->s.h.intraonly &&
s->s.h.resetctx == 2) {
869 s->s.h.compressed_header_size = size2 =
get_bits(&
s->gb, 16);
873 if (size2 >
size - (data2 -
data)) {
886 for (
i = 0;
i <
s->active_tile_cols;
i++) {
887 if (
s->s.h.keyframe ||
s->s.h.intraonly) {
888 memset(
s->td[
i].counts.coef, 0,
sizeof(
s->td[0].counts.coef));
889 memset(
s->td[
i].counts.eob, 0,
sizeof(
s->td[0].counts.eob));
891 memset(&
s->td[
i].counts, 0,
sizeof(
s->td[0].counts));
893 s->td[
i].nb_block_structure = 0;
899 s->prob.p =
s->prob_ctx[
c].p;
902 if (
s->s.h.lossless) {
906 if (
s->s.h.txfmmode == 3)
910 for (
i = 0;
i < 2;
i++)
913 for (
i = 0;
i < 2;
i++)
914 for (j = 0; j < 2; j++)
916 s->prob.p.tx16p[
i][j] =
918 for (
i = 0;
i < 2;
i++)
919 for (j = 0; j < 3; j++)
921 s->prob.p.tx32p[
i][j] =
927 for (
i = 0;
i < 4;
i++) {
928 uint8_t (*
ref)[2][6][6][3] =
s->prob_ctx[
c].coef[
i];
930 for (j = 0; j < 2; j++)
931 for (k = 0; k < 2; k++)
932 for (l = 0; l < 6; l++)
933 for (m = 0; m < 6; m++) {
934 uint8_t *p =
s->prob.coef[
i][j][k][l][m];
935 uint8_t *
r =
ref[j][k][l][m];
936 if (m >= 3 && l == 0)
938 for (n = 0; n < 3; n++) {
947 for (j = 0; j < 2; j++)
948 for (k = 0; k < 2; k++)
949 for (l = 0; l < 6; l++)
950 for (m = 0; m < 6; m++) {
951 uint8_t *p =
s->prob.coef[
i][j][k][l][m];
952 uint8_t *
r =
ref[j][k][l][m];
959 if (
s->s.h.txfmmode ==
i)
964 for (
i = 0;
i < 3;
i++)
967 if (!
s->s.h.keyframe && !
s->s.h.intraonly) {
968 for (
i = 0;
i < 7;
i++)
969 for (j = 0; j < 3; j++)
971 s->prob.p.mv_mode[
i][j] =
975 for (
i = 0;
i < 4;
i++)
976 for (j = 0; j < 2; j++)
978 s->prob.p.filter[
i][j] =
981 for (
i = 0;
i < 4;
i++)
985 if (
s->s.h.allowcompinter) {
987 if (
s->s.h.comppredmode)
990 for (
i = 0;
i < 5;
i++)
999 for (
i = 0;
i < 5;
i++) {
1001 s->prob.p.single_ref[
i][0] =
1004 s->prob.p.single_ref[
i][1] =
1010 for (
i = 0;
i < 5;
i++)
1012 s->prob.p.comp_ref[
i] =
1016 for (
i = 0;
i < 4;
i++)
1017 for (j = 0; j < 9; j++)
1019 s->prob.p.y_mode[
i][j] =
1022 for (
i = 0;
i < 4;
i++)
1023 for (j = 0; j < 4; j++)
1024 for (k = 0; k < 3; k++)
1026 s->prob.p.partition[3 -
i][j][k] =
1028 s->prob.p.partition[3 -
i][j][k]);
1031 for (
i = 0;
i < 3;
i++)
1035 for (
i = 0;
i < 2;
i++) {
1037 s->prob.p.mv_comp[
i].sign =
1040 for (j = 0; j < 10; j++)
1042 s->prob.p.mv_comp[
i].classes[j] =
1046 s->prob.p.mv_comp[
i].class0 =
1049 for (j = 0; j < 10; j++)
1051 s->prob.p.mv_comp[
i].bits[j] =
1055 for (
i = 0;
i < 2;
i++) {
1056 for (j = 0; j < 2; j++)
1057 for (k = 0; k < 3; k++)
1059 s->prob.p.mv_comp[
i].class0_fp[j][k] =
1062 for (j = 0; j < 3; j++)
1064 s->prob.p.mv_comp[
i].fp[j] =
1068 if (
s->s.h.highprecisionmvs) {
1069 for (
i = 0;
i < 2;
i++) {
1071 s->prob.p.mv_comp[
i].class0_hp =
1075 s->prob.p.mv_comp[
i].hp =
1081 return (data2 -
data) + size2;
1085 ptrdiff_t yoff, ptrdiff_t uvoff,
enum BlockLevel bl)
1088 int c = ((
s->above_partition_ctx[col] >> (3 - bl)) & 1) |
1089 (((
td->left_partition_ctx[row & 0x7] >> (3 - bl)) & 1) << 1);
1091 s->prob.p.partition[bl][
c];
1093 ptrdiff_t hbs = 4 >> bl;
1095 ptrdiff_t y_stride =
f->linesize[0], uv_stride =
f->linesize[1];
1096 int bytesperpixel =
s->bytesperpixel;
1101 }
else if (col + hbs < s->cols) {
1102 if (row + hbs < s->rows) {
1110 yoff += hbs * 8 * y_stride;
1111 uvoff += hbs * 8 * uv_stride >>
s->ss_v;
1116 yoff += hbs * 8 * bytesperpixel;
1117 uvoff += hbs * 8 * bytesperpixel >>
s->ss_h;
1121 decode_sb(
td, row, col, lflvl, yoff, uvoff, bl + 1);
1123 yoff + 8 * hbs * bytesperpixel,
1124 uvoff + (8 * hbs * bytesperpixel >>
s->ss_h), bl + 1);
1125 yoff += hbs * 8 * y_stride;
1126 uvoff += hbs * 8 * uv_stride >>
s->ss_v;
1127 decode_sb(
td, row + hbs, col, lflvl, yoff, uvoff, bl + 1);
1129 yoff + 8 * hbs * bytesperpixel,
1130 uvoff + (8 * hbs * bytesperpixel >>
s->ss_h), bl + 1);
1137 decode_sb(
td, row, col, lflvl, yoff, uvoff, bl + 1);
1139 yoff + 8 * hbs * bytesperpixel,
1140 uvoff + (8 * hbs * bytesperpixel >>
s->ss_h), bl + 1);
1145 }
else if (row + hbs < s->rows) {
1148 decode_sb(
td, row, col, lflvl, yoff, uvoff, bl + 1);
1149 yoff += hbs * 8 * y_stride;
1150 uvoff += hbs * 8 * uv_stride >>
s->ss_v;
1151 decode_sb(
td, row + hbs, col, lflvl, yoff, uvoff, bl + 1);
1158 decode_sb(
td, row, col, lflvl, yoff, uvoff, bl + 1);
1160 td->counts.partition[bl][
c][bp]++;
1164 ptrdiff_t yoff, ptrdiff_t uvoff,
enum BlockLevel bl)
1168 ptrdiff_t hbs = 4 >> bl;
1170 ptrdiff_t y_stride =
f->linesize[0], uv_stride =
f->linesize[1];
1171 int bytesperpixel =
s->bytesperpixel;
1176 }
else if (
td->b->bl == bl) {
1179 yoff += hbs * 8 * y_stride;
1180 uvoff += hbs * 8 * uv_stride >>
s->ss_v;
1182 }
else if (
b->bp ==
PARTITION_V && col + hbs < s->cols) {
1183 yoff += hbs * 8 * bytesperpixel;
1184 uvoff += hbs * 8 * bytesperpixel >>
s->ss_h;
1189 if (col + hbs < s->cols) {
1190 if (row + hbs < s->rows) {
1191 decode_sb_mem(
td, row, col + hbs, lflvl, yoff + 8 * hbs * bytesperpixel,
1192 uvoff + (8 * hbs * bytesperpixel >>
s->ss_h), bl + 1);
1193 yoff += hbs * 8 * y_stride;
1194 uvoff += hbs * 8 * uv_stride >>
s->ss_v;
1197 yoff + 8 * hbs * bytesperpixel,
1198 uvoff + (8 * hbs * bytesperpixel >>
s->ss_h), bl + 1);
1200 yoff += hbs * 8 * bytesperpixel;
1201 uvoff += hbs * 8 * bytesperpixel >>
s->ss_h;
1204 }
else if (row + hbs < s->rows) {
1205 yoff += hbs * 8 * y_stride;
1206 uvoff += hbs * 8 * uv_stride >>
s->ss_v;
1214 int sb_start = ( idx * n) >> log2_n;
1215 int sb_end = ((idx + 1) * n) >> log2_n;
1216 *start =
FFMIN(sb_start, n) << 3;
1217 *end =
FFMIN(sb_end, n) << 3;
1225 for (
i = 0;
i <
s->active_tile_cols;
i++)
1234 for (
int i = 0;
i < 3;
i++)
1237 for (
i = 0;
i < 8;
i++) {
1256 int row, col, tile_row, tile_col,
ret;
1258 int tile_row_start, tile_row_end, tile_col_start, tile_col_end;
1260 ptrdiff_t yoff, uvoff, ls_y, ls_uv;
1263 ls_y =
f->linesize[0];
1264 ls_uv =
f->linesize[1];
1265 bytesperpixel =
s->bytesperpixel;
1268 for (tile_row = 0; tile_row <
s->s.h.tiling.tile_rows; tile_row++) {
1270 tile_row,
s->s.h.tiling.log2_tile_rows,
s->sb_rows);
1272 for (tile_col = 0; tile_col <
s->s.h.tiling.tile_cols; tile_col++) {
1275 if (tile_col ==
s->s.h.tiling.tile_cols - 1 &&
1276 tile_row ==
s->s.h.tiling.tile_rows - 1) {
1283 if (tile_size >
size)
1294 for (row = tile_row_start; row < tile_row_end;
1295 row += 8, yoff += ls_y * 64, uvoff += ls_uv * 64 >>
s->ss_v) {
1297 ptrdiff_t yoff2 = yoff, uvoff2 = uvoff;
1299 for (tile_col = 0; tile_col <
s->s.h.tiling.tile_cols; tile_col++) {
1301 tile_col,
s->s.h.tiling.log2_tile_cols,
s->sb_cols);
1302 td->tile_col_start = tile_col_start;
1304 memset(
td->left_partition_ctx, 0, 8);
1305 memset(
td->left_skip_ctx, 0, 8);
1306 if (
s->s.h.keyframe ||
s->s.h.intraonly) {
1311 memset(
td->left_y_nnz_ctx, 0, 16);
1312 memset(
td->left_uv_nnz_ctx, 0, 32);
1313 memset(
td->left_segpred_ctx, 0, 8);
1315 td->c = &
td->c_b[tile_col];
1318 for (col = tile_col_start;
1320 col += 8, yoff2 += 64 * bytesperpixel,
1321 uvoff2 += 64 * bytesperpixel >>
s->ss_h, lflvl_ptr++) {
1325 memset(lflvl_ptr->
mask, 0,
sizeof(lflvl_ptr->
mask));
1346 if (row + 8 <
s->rows) {
1347 memcpy(
s->intra_pred_data[0],
1348 f->data[0] + yoff + 63 * ls_y,
1349 8 *
s->cols * bytesperpixel);
1350 memcpy(
s->intra_pred_data[1],
1351 f->data[1] + uvoff + ((64 >>
s->ss_v) - 1) * ls_uv,
1352 8 *
s->cols * bytesperpixel >>
s->ss_h);
1353 memcpy(
s->intra_pred_data[2],
1354 f->data[2] + uvoff + ((64 >>
s->ss_v) - 1) * ls_uv,
1355 8 *
s->cols * bytesperpixel >>
s->ss_h);
1359 if (
s->s.h.filter.level) {
1362 lflvl_ptr =
s->lflvl;
1363 for (col = 0; col <
s->cols;
1364 col += 8, yoff2 += 64 * bytesperpixel,
1365 uvoff2 += 64 * bytesperpixel >>
s->ss_h, lflvl_ptr++) {
1382 int decode_tiles_mt(
AVCodecContext *avctx,
void *tdata,
int jobnr,
1387 ptrdiff_t uvoff, yoff, ls_y, ls_uv;
1388 int bytesperpixel =
s->bytesperpixel, row, col, tile_row;
1389 unsigned tile_cols_len;
1390 int tile_row_start, tile_row_end, tile_col_start, tile_col_end;
1395 ls_y =
f->linesize[0];
1396 ls_uv =
f->linesize[1];
1399 jobnr,
s->s.h.tiling.log2_tile_cols,
s->sb_cols);
1400 td->tile_col_start = tile_col_start;
1401 uvoff = (64 * bytesperpixel >>
s->ss_h)*(tile_col_start >> 3);
1402 yoff = (64 * bytesperpixel)*(tile_col_start >> 3);
1403 lflvl_ptr_base =
s->lflvl+(tile_col_start >> 3);
1405 for (tile_row = 0; tile_row <
s->s.h.tiling.tile_rows; tile_row++) {
1407 tile_row,
s->s.h.tiling.log2_tile_rows,
s->sb_rows);
1409 td->c = &
td->c_b[tile_row];
1410 for (row = tile_row_start; row < tile_row_end;
1411 row += 8, yoff += ls_y * 64, uvoff += ls_uv * 64 >>
s->ss_v) {
1412 ptrdiff_t yoff2 = yoff, uvoff2 = uvoff;
1413 VP9Filter *lflvl_ptr = lflvl_ptr_base+
s->sb_cols*(row >> 3);
1415 memset(
td->left_partition_ctx, 0, 8);
1416 memset(
td->left_skip_ctx, 0, 8);
1417 if (
s->s.h.keyframe ||
s->s.h.intraonly) {
1422 memset(
td->left_y_nnz_ctx, 0, 16);
1423 memset(
td->left_uv_nnz_ctx, 0, 32);
1424 memset(
td->left_segpred_ctx, 0, 8);
1426 for (col = tile_col_start;
1428 col += 8, yoff2 += 64 * bytesperpixel,
1429 uvoff2 += 64 * bytesperpixel >>
s->ss_h, lflvl_ptr++) {
1432 memset(lflvl_ptr->
mask, 0,
sizeof(lflvl_ptr->
mask));
1439 tile_cols_len = tile_col_end - tile_col_start;
1440 if (row + 8 <
s->rows) {
1441 memcpy(
s->intra_pred_data[0] + (tile_col_start * 8 * bytesperpixel),
1442 f->data[0] + yoff + 63 * ls_y,
1443 8 * tile_cols_len * bytesperpixel);
1444 memcpy(
s->intra_pred_data[1] + (tile_col_start * 8 * bytesperpixel >>
s->ss_h),
1445 f->data[1] + uvoff + ((64 >>
s->ss_v) - 1) * ls_uv,
1446 8 * tile_cols_len * bytesperpixel >>
s->ss_h);
1447 memcpy(
s->intra_pred_data[2] + (tile_col_start * 8 * bytesperpixel >>
s->ss_h),
1448 f->data[2] + uvoff + ((64 >>
s->ss_v) - 1) * ls_uv,
1449 8 * tile_cols_len * bytesperpixel >>
s->ss_h);
1452 vp9_report_tile_progress(
s, row >> 3, 1);
1462 ptrdiff_t uvoff, yoff, ls_y, ls_uv;
1464 int bytesperpixel =
s->bytesperpixel, col,
i;
1468 ls_y =
f->linesize[0];
1469 ls_uv =
f->linesize[1];
1471 for (
i = 0;
i <
s->sb_rows;
i++) {
1472 vp9_await_tile_progress(
s,
i,
s->s.h.tiling.tile_cols);
1474 if (
s->s.h.filter.level) {
1475 yoff = (ls_y * 64)*
i;
1476 uvoff = (ls_uv * 64 >>
s->ss_v)*
i;
1477 lflvl_ptr =
s->lflvl+
s->sb_cols*
i;
1478 for (col = 0; col <
s->cols;
1479 col += 8, yoff += 64 * bytesperpixel,
1480 uvoff += 64 * bytesperpixel >>
s->ss_h, lflvl_ptr++) {
1493 unsigned int tile, nb_blocks = 0;
1495 if (
s->s.h.segmentation.enabled) {
1496 for (tile = 0; tile <
s->active_tile_cols; tile++)
1497 nb_blocks +=
s->td[tile].nb_block_structure;
1505 par->
qp =
s->s.h.yac_qi;
1506 par->
delta_qp[0][0] =
s->s.h.ydc_qdelta;
1507 par->
delta_qp[1][0] =
s->s.h.uvdc_qdelta;
1508 par->
delta_qp[2][0] =
s->s.h.uvdc_qdelta;
1509 par->
delta_qp[1][1] =
s->s.h.uvac_qdelta;
1510 par->
delta_qp[2][1] =
s->s.h.uvac_qdelta;
1513 unsigned int block = 0;
1514 unsigned int tile, block_tile;
1516 for (tile = 0; tile <
s->active_tile_cols; tile++) {
1519 for (block_tile = 0; block_tile <
td->nb_block_structure; block_tile++) {
1521 unsigned int row =
td->block_structure[block_tile].row;
1522 unsigned int col =
td->block_structure[block_tile].col;
1523 uint8_t seg_id =
frame->segmentation_map[row * 8 *
s->sb_cols + col];
1527 b->w = 1 << (3 +
td->block_structure[block_tile].block_size_idx_x);
1528 b->h = 1 << (3 +
td->block_structure[block_tile].block_size_idx_y);
1530 if (
s->s.h.segmentation.feat[seg_id].q_enabled) {
1531 b->delta_qp =
s->s.h.segmentation.feat[seg_id].q_val;
1532 if (
s->s.h.segmentation.absolute_vals)
1533 b->delta_qp -= par->
qp;
1550 (!
s->s.h.segmentation.enabled || !
s->s.h.segmentation.update_map);
1556 }
else if (
ret == 0) {
1557 if (!
s->s.refs[
ref].f) {
1561 for (
int i = 0;
i < 8;
i++)
1576 src = !
s->s.h.keyframe && !
s->s.h.intraonly && !
s->s.h.errorres ?
1578 if (!retain_segmap_ref ||
s->s.h.keyframe ||
s->s.h.intraonly)
1585 if (
s->s.h.keyframe)
1599 for (
i = 0;
i < 8;
i++) {
1601 s->s.h.refreshrefmask & (1 <<
i) ?
1620 memset(
s->above_partition_ctx, 0,
s->cols);
1621 memset(
s->above_skip_ctx, 0,
s->cols);
1622 if (
s->s.h.keyframe ||
s->s.h.intraonly) {
1623 memset(
s->above_mode_ctx,
DC_PRED,
s->cols * 2);
1627 memset(
s->above_y_nnz_ctx, 0,
s->sb_cols * 16);
1628 memset(
s->above_uv_nnz_ctx[0], 0,
s->sb_cols * 16 >>
s->ss_h);
1629 memset(
s->above_uv_nnz_ctx[1], 0,
s->sb_cols * 16 >>
s->ss_h);
1630 memset(
s->above_segpred_ctx, 0,
s->cols);
1635 "Failed to allocate block buffers\n");
1638 if (
s->s.h.refreshctx &&
s->s.h.parallelmode) {
1641 for (
i = 0;
i < 4;
i++) {
1642 for (j = 0; j < 2; j++)
1643 for (k = 0; k < 2; k++)
1644 for (l = 0; l < 6; l++)
1645 for (m = 0; m < 6; m++)
1646 memcpy(
s->prob_ctx[
s->s.h.framectxid].coef[
i][j][k][l][m],
1647 s->prob.coef[
i][j][k][l][m], 3);
1648 if (
s->s.h.txfmmode ==
i)
1651 s->prob_ctx[
s->s.h.framectxid].p =
s->prob.p;
1653 }
else if (!
s->s.h.refreshctx) {
1659 for (
i = 0;
i <
s->sb_rows;
i++)
1665 for (
i = 0;
i <
s->active_tile_cols;
i++) {
1666 s->td[
i].b =
s->td[
i].b_base;
1667 s->td[
i].block =
s->td[
i].block_base;
1668 s->td[
i].uvblock[0] =
s->td[
i].uvblock_base[0];
1669 s->td[
i].uvblock[1] =
s->td[
i].uvblock_base[1];
1670 s->td[
i].eob =
s->td[
i].eob_base;
1671 s->td[
i].uveob[0] =
s->td[
i].uveob_base[0];
1672 s->td[
i].uveob[1] =
s->td[
i].uveob_base[1];
1673 s->td[
i].error_info = 0;
1678 int tile_row, tile_col;
1682 for (tile_row = 0; tile_row <
s->s.h.tiling.tile_rows; tile_row++) {
1683 for (tile_col = 0; tile_col <
s->s.h.tiling.tile_cols; tile_col++) {
1686 if (tile_col ==
s->s.h.tiling.tile_cols - 1 &&
1687 tile_row ==
s->s.h.tiling.tile_rows - 1) {
1694 if (tile_size >
size)
1717 for (
i = 1;
i <
s->s.h.tiling.tile_cols;
i++)
1718 for (j = 0; j <
sizeof(
s->td[
i].counts) /
sizeof(
unsigned); j++)
1719 ((
unsigned *)&
s->td[0].counts)[j] += ((
unsigned *)&
s->td[
i].counts)[j];
1721 if (
s->pass < 2 &&
s->s.h.refreshctx && !
s->s.h.parallelmode) {
1725 }
while (
s->pass++ == 1);
1727 if (
s->td->error_info < 0) {
1729 s->td->error_info = 0;
1742 for (
int i = 0;
i < 8;
i++)
1745 if (!
s->s.h.invisible) {
1762 for (
i = 0;
i < 3;
i++)
1764 for (
i = 0;
i < 8;
i++)
1777 s->s.h.filter.sharpness = -1;
1795 for (
int i = 0;
i < 3;
i++)
1797 for (
int i = 0;
i < 8;
i++)
1800 s->frame_extradata_pool_size = ssrc->frame_extradata_pool_size;
1802 s->s.h.invisible = ssrc->s.h.invisible;
1803 s->s.h.keyframe = ssrc->s.h.keyframe;
1804 s->s.h.intraonly = ssrc->s.h.intraonly;
1805 s->ss_v = ssrc->ss_v;
1806 s->ss_h = ssrc->ss_h;
1807 s->s.h.segmentation.enabled = ssrc->s.h.segmentation.enabled;
1808 s->s.h.segmentation.update_map = ssrc->s.h.segmentation.update_map;
1809 s->s.h.segmentation.absolute_vals = ssrc->s.h.segmentation.absolute_vals;
1810 s->bytesperpixel = ssrc->bytesperpixel;
1811 s->gf_fmt = ssrc->gf_fmt;
1814 s->s.h.bpp = ssrc->s.h.bpp;
1815 s->bpp_index = ssrc->bpp_index;
1816 s->pix_fmt = ssrc->pix_fmt;
1817 memcpy(&
s->prob_ctx, &ssrc->prob_ctx,
sizeof(
s->prob_ctx));
1818 memcpy(&
s->s.h.lf_delta, &ssrc->s.h.lf_delta,
sizeof(
s->s.h.lf_delta));
1819 memcpy(&
s->s.h.segmentation.feat, &ssrc->s.h.segmentation.feat,
1820 sizeof(
s->s.h.segmentation.feat));
1842 .bsfs =
"vp9_superframe_split",
1844 #if CONFIG_VP9_DXVA2_HWACCEL
1847 #if CONFIG_VP9_D3D11VA_HWACCEL
1850 #if CONFIG_VP9_D3D11VA2_HWACCEL
1853 #if CONFIG_VP9_D3D12VA_HWACCEL
1856 #if CONFIG_VP9_NVDEC_HWACCEL
1859 #if CONFIG_VP9_VAAPI_HWACCEL
1862 #if CONFIG_VP9_VDPAU_HWACCEL
1865 #if CONFIG_VP9_VIDEOTOOLBOX_HWACCEL