24 #include "config_components.h"
49 #define VP9_SYNCCODE 0x498342
102 f->segmentation_map =
NULL;
114 sz = 64 *
s->sb_cols *
s->sb_rows;
115 if (sz !=
s->frame_extradata_pool_size) {
119 if (!
s->frame_extradata_pool) {
120 s->frame_extradata_pool_size = 0;
124 s->frame_extradata_pool_size = sz;
132 f->segmentation_map =
f->extradata;
152 dst->segmentation_map =
src->segmentation_map;
154 dst->uses_2pass =
src->uses_2pass;
157 src->hwaccel_picture_private);
162 #define HWACCEL_MAX (CONFIG_VP9_DXVA2_HWACCEL + \
163 CONFIG_VP9_D3D11VA_HWACCEL * 2 + \
164 CONFIG_VP9_D3D12VA_HWACCEL + \
165 CONFIG_VP9_NVDEC_HWACCEL + \
166 CONFIG_VP9_VAAPI_HWACCEL + \
167 CONFIG_VP9_VDPAU_HWACCEL + \
168 CONFIG_VP9_VIDEOTOOLBOX_HWACCEL)
172 int bytesperpixel =
s->bytesperpixel,
ret, cols, rows;
177 if (!(
s->pix_fmt ==
s->gf_fmt &&
w ==
s->w &&
h ==
s->h)) {
181 switch (
s->pix_fmt) {
184 #if CONFIG_VP9_DXVA2_HWACCEL
187 #if CONFIG_VP9_D3D11VA_HWACCEL
191 #if CONFIG_VP9_D3D12VA_HWACCEL
194 #if CONFIG_VP9_NVDEC_HWACCEL
197 #if CONFIG_VP9_VAAPI_HWACCEL
200 #if CONFIG_VP9_VDPAU_HWACCEL
203 #if CONFIG_VP9_VIDEOTOOLBOX_HWACCEL
208 #if CONFIG_VP9_NVDEC_HWACCEL
211 #if CONFIG_VP9_VAAPI_HWACCEL
214 #if CONFIG_VP9_VDPAU_HWACCEL
221 #if CONFIG_VP9_VAAPI_HWACCEL
228 #if CONFIG_VP9_VAAPI_HWACCEL
234 *fmtp++ =
s->pix_fmt;
242 s->gf_fmt =
s->pix_fmt;
250 if (
s->intra_pred_data[0] && cols ==
s->cols && rows ==
s->rows &&
s->pix_fmt ==
s->last_fmt)
253 s->last_fmt =
s->pix_fmt;
254 s->sb_cols = (
w + 63) >> 6;
255 s->sb_rows = (
h + 63) >> 6;
256 s->cols = (
w + 7) >> 3;
257 s->rows = (
h + 7) >> 3;
260 #define assign(var, type, n) var = (type) p; p += s->sb_cols * (n) * sizeof(*var)
264 p =
av_malloc(
s->sb_cols * (128 + 192 * bytesperpixel +
265 lflvl_len *
sizeof(*
s->lflvl) + 16 *
sizeof(*
s->above_mv_ctx)));
268 assign(
s->intra_pred_data[0], uint8_t *, 64 * bytesperpixel);
269 assign(
s->intra_pred_data[1], uint8_t *, 64 * bytesperpixel);
270 assign(
s->intra_pred_data[2], uint8_t *, 64 * bytesperpixel);
271 assign(
s->above_y_nnz_ctx, uint8_t *, 16);
272 assign(
s->above_mode_ctx, uint8_t *, 16);
274 assign(
s->above_uv_nnz_ctx[0], uint8_t *, 16);
275 assign(
s->above_uv_nnz_ctx[1], uint8_t *, 16);
276 assign(
s->above_partition_ctx, uint8_t *, 8);
277 assign(
s->above_skip_ctx, uint8_t *, 8);
278 assign(
s->above_txfm_ctx, uint8_t *, 8);
279 assign(
s->above_segpred_ctx, uint8_t *, 8);
280 assign(
s->above_intra_ctx, uint8_t *, 8);
281 assign(
s->above_comp_ctx, uint8_t *, 8);
282 assign(
s->above_ref_ctx, uint8_t *, 8);
283 assign(
s->above_filter_ctx, uint8_t *, 8);
288 for (
i = 0;
i <
s->active_tile_cols;
i++)
292 if (
s->s.h.bpp !=
s->last_bpp) {
295 s->last_bpp =
s->s.h.bpp;
305 int chroma_blocks, chroma_eobs, bytesperpixel =
s->bytesperpixel;
312 chroma_blocks = 64 * 64 >> (
s->ss_h +
s->ss_v);
313 chroma_eobs = 16 * 16 >> (
s->ss_h +
s->ss_v);
315 int sbs =
s->sb_cols *
s->sb_rows;
319 16 * 16 + 2 * chroma_eobs) * sbs);
334 for (
i = 1;
i <
s->active_tile_cols;
i++)
337 for (
i = 0;
i <
s->active_tile_cols;
i++) {
339 s->td[
i].block_base =
av_mallocz((64 * 64 + 2 * chroma_blocks) * bytesperpixel *
sizeof(int16_t) +
340 16 * 16 + 2 * chroma_eobs);
341 if (!
s->td[
i].b_base || !
s->td[
i].block_base)
343 s->td[
i].uvblock_base[0] =
s->td[
i].block_base + 64 * 64 * bytesperpixel;
344 s->td[
i].uvblock_base[1] =
s->td[
i].uvblock_base[0] + chroma_blocks * bytesperpixel;
345 s->td[
i].eob_base = (uint8_t *) (
s->td[
i].uvblock_base[1] + chroma_blocks * bytesperpixel);
346 s->td[
i].uveob_base[0] =
s->td[
i].eob_base + 16 * 16;
347 s->td[
i].uveob_base[1] =
s->td[
i].uveob_base[0] + chroma_eobs;
351 if (!
s->td[
i].block_structure)
356 s->block_alloc_using_2pass =
s->s.frames[
CUR_FRAME].uses_2pass;
373 return m - ((v + 1) >> 1);
380 static const uint8_t inv_map_table[255] = {
381 7, 20, 33, 46, 59, 72, 85, 98, 111, 124, 137, 150, 163, 176,
382 189, 202, 215, 228, 241, 254, 1, 2, 3, 4, 5, 6, 8, 9,
383 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 21, 22, 23, 24,
384 25, 26, 27, 28, 29, 30, 31, 32, 34, 35, 36, 37, 38, 39,
385 40, 41, 42, 43, 44, 45, 47, 48, 49, 50, 51, 52, 53, 54,
386 55, 56, 57, 58, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69,
387 70, 71, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84,
388 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 99, 100,
389 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 112, 113, 114, 115,
390 116, 117, 118, 119, 120, 121, 122, 123, 125, 126, 127, 128, 129, 130,
391 131, 132, 133, 134, 135, 136, 138, 139, 140, 141, 142, 143, 144, 145,
392 146, 147, 148, 149, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160,
393 161, 162, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175,
394 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 190, 191,
395 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 203, 204, 205, 206,
396 207, 208, 209, 210, 211, 212, 213, 214, 216, 217, 218, 219, 220, 221,
397 222, 223, 224, 225, 226, 227, 229, 230, 231, 232, 233, 234, 235, 236,
398 237, 238, 239, 240, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251,
446 s->s.h.bpp = 8 +
bits * 2;
447 s->bytesperpixel = (7 +
s->s.h.bpp) >> 3;
453 s->ss_h =
s->ss_v = 0;
467 static const enum AVPixelFormat pix_fmt_for_ss[3][2 ][2 ] = {
479 s->pix_fmt = pix_fmt_for_ss[
bits][
s->ss_v][
s->ss_h];
490 s->ss_h =
s->ss_v = 1;
491 s->pix_fmt = pix_fmt_for_ss[
bits][1][1];
502 int c,
i, j, k, l, m, n,
w,
h,
max, size2,
ret, sharp;
504 const uint8_t *data2;
528 s->last_keyframe =
s->s.h.keyframe;
531 last_invisible =
s->s.h.invisible;
534 s->s.h.use_last_frame_mvs = !
s->s.h.errorres && !last_invisible;
536 if (
s->s.h.keyframe) {
544 s->s.h.refreshrefmask = 0xff;
550 s->s.h.intraonly =
s->s.h.invisible ?
get_bits1(&
s->gb) : 0;
551 s->s.h.resetctx =
s->s.h.errorres ? 0 :
get_bits(&
s->gb, 2);
552 if (
s->s.h.intraonly) {
561 s->ss_h =
s->ss_v = 1;
564 s->bytesperpixel = 1;
577 s->s.h.signbias[0] =
get_bits1(&
s->gb) && !
s->s.h.errorres;
579 s->s.h.signbias[1] =
get_bits1(&
s->gb) && !
s->s.h.errorres;
581 s->s.h.signbias[2] =
get_bits1(&
s->gb) && !
s->s.h.errorres;
582 if (!
s->s.refs[
s->s.h.refidx[0]].f ||
583 !
s->s.refs[
s->s.h.refidx[1]].f ||
584 !
s->s.refs[
s->s.h.refidx[2]].f) {
589 w =
s->s.refs[
s->s.h.refidx[0]].f->width;
590 h =
s->s.refs[
s->s.h.refidx[0]].f->height;
592 w =
s->s.refs[
s->s.h.refidx[1]].f->width;
593 h =
s->s.refs[
s->s.h.refidx[1]].f->height;
595 w =
s->s.refs[
s->s.h.refidx[2]].f->width;
596 h =
s->s.refs[
s->s.h.refidx[2]].f->height;
604 s->s.h.use_last_frame_mvs &=
s->s.frames[
CUR_FRAME].tf.f &&
612 s->s.h.allowcompinter =
s->s.h.signbias[0] !=
s->s.h.signbias[1] ||
613 s->s.h.signbias[0] !=
s->s.h.signbias[2];
614 if (
s->s.h.allowcompinter) {
615 if (
s->s.h.signbias[0] ==
s->s.h.signbias[1]) {
616 s->s.h.fixcompref = 2;
617 s->s.h.varcompref[0] = 0;
618 s->s.h.varcompref[1] = 1;
619 }
else if (
s->s.h.signbias[0] ==
s->s.h.signbias[2]) {
620 s->s.h.fixcompref = 1;
621 s->s.h.varcompref[0] = 0;
622 s->s.h.varcompref[1] = 2;
624 s->s.h.fixcompref = 0;
625 s->s.h.varcompref[0] = 1;
626 s->s.h.varcompref[1] = 2;
631 s->s.h.refreshctx =
s->s.h.errorres ? 0 :
get_bits1(&
s->gb);
632 s->s.h.parallelmode =
s->s.h.errorres ? 1 :
get_bits1(&
s->gb);
634 if (
s->s.h.keyframe ||
s->s.h.intraonly)
635 s->s.h.framectxid = 0;
638 if (
s->s.h.keyframe ||
s->s.h.errorres ||
s->s.h.intraonly) {
640 s->s.h.lf_delta.ref[0] = 1;
641 s->s.h.lf_delta.ref[1] = 0;
642 s->s.h.lf_delta.ref[2] = -1;
643 s->s.h.lf_delta.ref[3] = -1;
644 s->s.h.lf_delta.mode[0] = 0;
645 s->s.h.lf_delta.mode[1] = 0;
646 memset(
s->s.h.segmentation.feat, 0,
sizeof(
s->s.h.segmentation.feat));
652 if (
s->s.h.filter.sharpness != sharp) {
653 for (
i = 1;
i <= 63;
i++) {
657 limit >>= (sharp + 3) >> 2;
662 s->filter_lut.lim_lut[
i] =
limit;
663 s->filter_lut.mblim_lut[
i] = 2 * (
i + 2) +
limit;
666 s->s.h.filter.sharpness = sharp;
667 if ((
s->s.h.lf_delta.enabled =
get_bits1(&
s->gb))) {
668 if ((
s->s.h.lf_delta.updated =
get_bits1(&
s->gb))) {
669 for (
i = 0;
i < 4;
i++)
672 for (
i = 0;
i < 2;
i++)
683 s->s.h.lossless =
s->s.h.yac_qi == 0 &&
s->s.h.ydc_qdelta == 0 &&
684 s->s.h.uvdc_qdelta == 0 &&
s->s.h.uvac_qdelta == 0;
685 #if FF_API_CODEC_PROPS
693 if ((
s->s.h.segmentation.enabled =
get_bits1(&
s->gb))) {
694 if ((
s->s.h.segmentation.update_map =
get_bits1(&
s->gb))) {
695 for (
i = 0;
i < 7;
i++)
698 if ((
s->s.h.segmentation.temporal =
get_bits1(&
s->gb)))
699 for (
i = 0;
i < 3;
i++)
705 s->s.h.segmentation.absolute_vals =
get_bits1(&
s->gb);
706 for (
i = 0;
i < 8;
i++) {
707 if ((
s->s.h.segmentation.feat[
i].q_enabled =
get_bits1(&
s->gb)))
709 if ((
s->s.h.segmentation.feat[
i].lf_enabled =
get_bits1(&
s->gb)))
711 if ((
s->s.h.segmentation.feat[
i].ref_enabled =
get_bits1(&
s->gb)))
712 s->s.h.segmentation.feat[
i].ref_val =
get_bits(&
s->gb, 2);
713 s->s.h.segmentation.feat[
i].skip_enabled =
get_bits1(&
s->gb);
720 s->s.h.segmentation.temporal = 0;
721 s->s.h.segmentation.update_map = 0;
725 for (
i = 0;
i < (
s->s.h.segmentation.enabled ? 8 : 1);
i++) {
726 int qyac, qydc, quvac, quvdc, lflvl, sh;
728 if (
s->s.h.segmentation.enabled &&
s->s.h.segmentation.feat[
i].q_enabled) {
729 if (
s->s.h.segmentation.absolute_vals)
734 qyac =
s->s.h.yac_qi;
746 sh =
s->s.h.filter.level >= 32;
747 if (
s->s.h.segmentation.enabled &&
s->s.h.segmentation.feat[
i].lf_enabled) {
748 if (
s->s.h.segmentation.absolute_vals)
751 lflvl =
av_clip_uintp2(
s->s.h.filter.level +
s->s.h.segmentation.feat[
i].lf_val, 6);
753 lflvl =
s->s.h.filter.level;
755 if (
s->s.h.lf_delta.enabled) {
756 s->s.h.segmentation.feat[
i].lflvl[0][0] =
757 s->s.h.segmentation.feat[
i].lflvl[0][1] =
759 for (j = 1; j < 4; j++) {
760 s->s.h.segmentation.feat[
i].lflvl[j][0] =
762 s->s.h.lf_delta.mode[0]) * (1 << sh)), 6);
763 s->s.h.segmentation.feat[
i].lflvl[j][1] =
765 s->s.h.lf_delta.mode[1]) * (1 << sh)), 6);
768 memset(
s->s.h.segmentation.feat[
i].lflvl, lflvl,
769 sizeof(
s->s.h.segmentation.feat[
i].lflvl));
779 for (
s->s.h.tiling.log2_tile_cols = 0;
780 s->sb_cols > (64 <<
s->s.h.tiling.log2_tile_cols);
781 s->s.h.tiling.log2_tile_cols++) ;
782 for (
max = 0; (
s->sb_cols >>
max) >= 4;
max++) ;
784 while (
max >
s->s.h.tiling.log2_tile_cols) {
786 s->s.h.tiling.log2_tile_cols++;
791 s->s.h.tiling.tile_rows = 1 <<
s->s.h.tiling.log2_tile_rows;
792 if (
s->s.h.tiling.tile_cols != (1 <<
s->s.h.tiling.log2_tile_cols)) {
797 for (
i = 0;
i <
s->active_tile_cols;
i++)
802 s->s.h.tiling.tile_cols = 1 <<
s->s.h.tiling.log2_tile_cols;
804 s->s.h.tiling.tile_cols : 1;
809 n_range_coders =
s->s.h.tiling.tile_cols;
816 for (
i = 0;
i <
s->active_tile_cols;
i++) {
819 rc += n_range_coders;
824 if (!
s->s.h.keyframe && !
s->s.h.intraonly) {
825 int valid_ref_frame = 0;
826 for (
i = 0;
i < 3;
i++) {
828 int refw =
ref->width, refh =
ref->height;
832 "Ref pixfmt (%s) did not match current frame (%s)",
836 }
else if (refw ==
w && refh ==
h) {
837 s->mvscale[
i][0] =
s->mvscale[
i][1] = 0;
841 if (
w * 2 < refw ||
h * 2 < refh ||
w > 16 * refw ||
h > 16 * refh) {
843 "Invalid ref frame dimensions %dx%d for frame size %dx%d\n",
848 s->mvscale[
i][0] = (refw << 14) /
w;
849 s->mvscale[
i][1] = (refh << 14) /
h;
850 s->mvstep[
i][0] = 16 *
s->mvscale[
i][0] >> 14;
851 s->mvstep[
i][1] = 16 *
s->mvscale[
i][1] >> 14;
855 if (!valid_ref_frame) {
856 av_log(avctx,
AV_LOG_ERROR,
"No valid reference frame is found, bitstream not supported\n");
861 if (
s->s.h.keyframe ||
s->s.h.errorres || (
s->s.h.intraonly &&
s->s.h.resetctx == 3)) {
862 s->prob_ctx[0].p =
s->prob_ctx[1].p =
s->prob_ctx[2].p =
872 }
else if (
s->s.h.intraonly &&
s->s.h.resetctx == 2) {
879 s->s.h.compressed_header_size = size2 =
get_bits(&
s->gb, 16);
883 if (size2 >
size - (data2 -
data)) {
896 for (
i = 0;
i <
s->active_tile_cols;
i++) {
897 if (
s->s.h.keyframe ||
s->s.h.intraonly) {
898 memset(
s->td[
i].counts.coef, 0,
sizeof(
s->td[0].counts.coef));
899 memset(
s->td[
i].counts.eob, 0,
sizeof(
s->td[0].counts.eob));
901 memset(&
s->td[
i].counts, 0,
sizeof(
s->td[0].counts));
903 s->td[
i].nb_block_structure = 0;
909 s->prob.p =
s->prob_ctx[
c].p;
912 if (
s->s.h.lossless) {
916 if (
s->s.h.txfmmode == 3)
920 for (
i = 0;
i < 2;
i++)
923 for (
i = 0;
i < 2;
i++)
924 for (j = 0; j < 2; j++)
926 s->prob.p.tx16p[
i][j] =
928 for (
i = 0;
i < 2;
i++)
929 for (j = 0; j < 3; j++)
931 s->prob.p.tx32p[
i][j] =
937 for (
i = 0;
i < 4;
i++) {
938 uint8_t (*
ref)[2][6][6][3] =
s->prob_ctx[
c].coef[
i];
940 for (j = 0; j < 2; j++)
941 for (k = 0; k < 2; k++)
942 for (l = 0; l < 6; l++)
943 for (m = 0; m < 6; m++) {
944 uint8_t *p =
s->prob.coef[
i][j][k][l][m];
945 uint8_t *
r =
ref[j][k][l][m];
946 if (m >= 3 && l == 0)
948 for (n = 0; n < 3; n++) {
957 for (j = 0; j < 2; j++)
958 for (k = 0; k < 2; k++)
959 for (l = 0; l < 6; l++)
960 for (m = 0; m < 6; m++) {
961 uint8_t *p =
s->prob.coef[
i][j][k][l][m];
962 uint8_t *
r =
ref[j][k][l][m];
969 if (
s->s.h.txfmmode ==
i)
974 for (
i = 0;
i < 3;
i++)
977 if (!
s->s.h.keyframe && !
s->s.h.intraonly) {
978 for (
i = 0;
i < 7;
i++)
979 for (j = 0; j < 3; j++)
981 s->prob.p.mv_mode[
i][j] =
985 for (
i = 0;
i < 4;
i++)
986 for (j = 0; j < 2; j++)
988 s->prob.p.filter[
i][j] =
991 for (
i = 0;
i < 4;
i++)
995 if (
s->s.h.allowcompinter) {
997 if (
s->s.h.comppredmode)
1000 for (
i = 0;
i < 5;
i++)
1009 for (
i = 0;
i < 5;
i++) {
1011 s->prob.p.single_ref[
i][0] =
1014 s->prob.p.single_ref[
i][1] =
1020 for (
i = 0;
i < 5;
i++)
1022 s->prob.p.comp_ref[
i] =
1026 for (
i = 0;
i < 4;
i++)
1027 for (j = 0; j < 9; j++)
1029 s->prob.p.y_mode[
i][j] =
1032 for (
i = 0;
i < 4;
i++)
1033 for (j = 0; j < 4; j++)
1034 for (k = 0; k < 3; k++)
1036 s->prob.p.partition[3 -
i][j][k] =
1038 s->prob.p.partition[3 -
i][j][k]);
1041 for (
i = 0;
i < 3;
i++)
1045 for (
i = 0;
i < 2;
i++) {
1047 s->prob.p.mv_comp[
i].sign =
1050 for (j = 0; j < 10; j++)
1052 s->prob.p.mv_comp[
i].classes[j] =
1056 s->prob.p.mv_comp[
i].class0 =
1059 for (j = 0; j < 10; j++)
1061 s->prob.p.mv_comp[
i].bits[j] =
1065 for (
i = 0;
i < 2;
i++) {
1066 for (j = 0; j < 2; j++)
1067 for (k = 0; k < 3; k++)
1069 s->prob.p.mv_comp[
i].class0_fp[j][k] =
1072 for (j = 0; j < 3; j++)
1074 s->prob.p.mv_comp[
i].fp[j] =
1078 if (
s->s.h.highprecisionmvs) {
1079 for (
i = 0;
i < 2;
i++) {
1081 s->prob.p.mv_comp[
i].class0_hp =
1085 s->prob.p.mv_comp[
i].hp =
1091 return (data2 -
data) + size2;
1095 ptrdiff_t yoff, ptrdiff_t uvoff,
enum BlockLevel bl)
1098 int c = ((
s->above_partition_ctx[col] >> (3 - bl)) & 1) |
1101 s->prob.p.partition[bl][
c];
1103 ptrdiff_t hbs = 4 >> bl;
1105 ptrdiff_t y_stride =
f->linesize[0], uv_stride =
f->linesize[1];
1106 int bytesperpixel =
s->bytesperpixel;
1111 }
else if (col + hbs < s->cols) {
1112 if (row + hbs < s->rows) {
1120 yoff += hbs * 8 * y_stride;
1121 uvoff += hbs * 8 * uv_stride >>
s->ss_v;
1126 yoff += hbs * 8 * bytesperpixel;
1127 uvoff += hbs * 8 * bytesperpixel >>
s->ss_h;
1131 decode_sb(td, row, col, lflvl, yoff, uvoff, bl + 1);
1133 yoff + 8 * hbs * bytesperpixel,
1134 uvoff + (8 * hbs * bytesperpixel >>
s->ss_h), bl + 1);
1135 yoff += hbs * 8 * y_stride;
1136 uvoff += hbs * 8 * uv_stride >>
s->ss_v;
1137 decode_sb(td, row + hbs, col, lflvl, yoff, uvoff, bl + 1);
1138 decode_sb(td, row + hbs, col + hbs, lflvl,
1139 yoff + 8 * hbs * bytesperpixel,
1140 uvoff + (8 * hbs * bytesperpixel >>
s->ss_h), bl + 1);
1144 "the four PARTITION_* terminal codes");
1148 decode_sb(td, row, col, lflvl, yoff, uvoff, bl + 1);
1150 yoff + 8 * hbs * bytesperpixel,
1151 uvoff + (8 * hbs * bytesperpixel >>
s->ss_h), bl + 1);
1156 }
else if (row + hbs < s->rows) {
1159 decode_sb(td, row, col, lflvl, yoff, uvoff, bl + 1);
1160 yoff += hbs * 8 * y_stride;
1161 uvoff += hbs * 8 * uv_stride >>
s->ss_v;
1162 decode_sb(td, row + hbs, col, lflvl, yoff, uvoff, bl + 1);
1169 decode_sb(td, row, col, lflvl, yoff, uvoff, bl + 1);
1175 ptrdiff_t yoff, ptrdiff_t uvoff,
enum BlockLevel bl)
1179 ptrdiff_t hbs = 4 >> bl;
1181 ptrdiff_t y_stride =
f->linesize[0], uv_stride =
f->linesize[1];
1182 int bytesperpixel =
s->bytesperpixel;
1187 }
else if (td->
b->
bl == bl) {
1190 yoff += hbs * 8 * y_stride;
1191 uvoff += hbs * 8 * uv_stride >>
s->ss_v;
1193 }
else if (
b->bp ==
PARTITION_V && col + hbs < s->cols) {
1194 yoff += hbs * 8 * bytesperpixel;
1195 uvoff += hbs * 8 * bytesperpixel >>
s->ss_h;
1200 if (col + hbs < s->cols) {
1201 if (row + hbs < s->rows) {
1202 decode_sb_mem(td, row, col + hbs, lflvl, yoff + 8 * hbs * bytesperpixel,
1203 uvoff + (8 * hbs * bytesperpixel >>
s->ss_h), bl + 1);
1204 yoff += hbs * 8 * y_stride;
1205 uvoff += hbs * 8 * uv_stride >>
s->ss_v;
1206 decode_sb_mem(td, row + hbs, col, lflvl, yoff, uvoff, bl + 1);
1208 yoff + 8 * hbs * bytesperpixel,
1209 uvoff + (8 * hbs * bytesperpixel >>
s->ss_h), bl + 1);
1211 yoff += hbs * 8 * bytesperpixel;
1212 uvoff += hbs * 8 * bytesperpixel >>
s->ss_h;
1213 decode_sb_mem(td, row, col + hbs, lflvl, yoff, uvoff, bl + 1);
1215 }
else if (row + hbs < s->rows) {
1216 yoff += hbs * 8 * y_stride;
1217 uvoff += hbs * 8 * uv_stride >>
s->ss_v;
1218 decode_sb_mem(td, row + hbs, col, lflvl, yoff, uvoff, bl + 1);
1225 int sb_start = ( idx * n) >> log2_n;
1226 int sb_end = ((idx + 1) * n) >> log2_n;
1227 *start =
FFMIN(sb_start, n) << 3;
1228 *end =
FFMIN(sb_end, n) << 3;
1236 for (
i = 0;
i <
s->active_tile_cols;
i++)
1245 for (
int i = 0;
i < 3;
i++)
1248 for (
i = 0;
i < 8;
i++) {
1267 int row, col, tile_row, tile_col,
ret;
1269 int tile_row_start, tile_row_end, tile_col_start, tile_col_end;
1271 ptrdiff_t yoff, uvoff, ls_y, ls_uv;
1274 ls_y =
f->linesize[0];
1275 ls_uv =
f->linesize[1];
1276 bytesperpixel =
s->bytesperpixel;
1279 for (tile_row = 0; tile_row <
s->s.h.tiling.tile_rows; tile_row++) {
1281 tile_row,
s->s.h.tiling.log2_tile_rows,
s->sb_rows);
1283 for (tile_col = 0; tile_col <
s->s.h.tiling.tile_cols; tile_col++) {
1286 if (tile_col ==
s->s.h.tiling.tile_cols - 1 &&
1287 tile_row ==
s->s.h.tiling.tile_rows - 1) {
1294 if (tile_size >
size)
1305 for (row = tile_row_start; row < tile_row_end;
1306 row += 8, yoff += ls_y * 64, uvoff += ls_uv * 64 >>
s->ss_v) {
1308 ptrdiff_t yoff2 = yoff, uvoff2 = uvoff;
1310 for (tile_col = 0; tile_col <
s->s.h.tiling.tile_cols; tile_col++) {
1312 tile_col,
s->s.h.tiling.log2_tile_cols,
s->sb_cols);
1317 if (
s->s.h.keyframe ||
s->s.h.intraonly) {
1326 td->
c = &td->
c_b[tile_col];
1329 for (col = tile_col_start;
1331 col += 8, yoff2 += 64 * bytesperpixel,
1332 uvoff2 += 64 * bytesperpixel >>
s->ss_h, lflvl_ptr++) {
1336 memset(lflvl_ptr->
mask, 0,
sizeof(lflvl_ptr->
mask));
1357 if (row + 8 <
s->rows) {
1358 memcpy(
s->intra_pred_data[0],
1359 f->data[0] + yoff + 63 * ls_y,
1360 8 *
s->cols * bytesperpixel);
1361 memcpy(
s->intra_pred_data[1],
1362 f->data[1] + uvoff + ((64 >>
s->ss_v) - 1) * ls_uv,
1363 8 *
s->cols * bytesperpixel >>
s->ss_h);
1364 memcpy(
s->intra_pred_data[2],
1365 f->data[2] + uvoff + ((64 >>
s->ss_v) - 1) * ls_uv,
1366 8 *
s->cols * bytesperpixel >>
s->ss_h);
1370 if (
s->s.h.filter.level) {
1373 lflvl_ptr =
s->lflvl;
1374 for (col = 0; col <
s->cols;
1375 col += 8, yoff2 += 64 * bytesperpixel,
1376 uvoff2 += 64 * bytesperpixel >>
s->ss_h, lflvl_ptr++) {
1393 int decode_tiles_mt(
AVCodecContext *avctx,
void *tdata,
int jobnr,
1398 ptrdiff_t uvoff, yoff, ls_y, ls_uv;
1399 int bytesperpixel =
s->bytesperpixel, row, col, tile_row;
1400 unsigned tile_cols_len;
1401 int tile_row_start, tile_row_end, tile_col_start, tile_col_end;
1406 ls_y =
f->linesize[0];
1407 ls_uv =
f->linesize[1];
1410 jobnr,
s->s.h.tiling.log2_tile_cols,
s->sb_cols);
1412 uvoff = (64 * bytesperpixel >>
s->ss_h)*(tile_col_start >> 3);
1413 yoff = (64 * bytesperpixel)*(tile_col_start >> 3);
1414 lflvl_ptr_base =
s->lflvl+(tile_col_start >> 3);
1416 for (tile_row = 0; tile_row <
s->s.h.tiling.tile_rows; tile_row++) {
1418 tile_row,
s->s.h.tiling.log2_tile_rows,
s->sb_rows);
1420 td->
c = &td->
c_b[tile_row];
1421 for (row = tile_row_start; row < tile_row_end;
1422 row += 8, yoff += ls_y * 64, uvoff += ls_uv * 64 >>
s->ss_v) {
1423 ptrdiff_t yoff2 = yoff, uvoff2 = uvoff;
1424 VP9Filter *lflvl_ptr = lflvl_ptr_base+
s->sb_cols*(row >> 3);
1428 if (
s->s.h.keyframe ||
s->s.h.intraonly) {
1437 for (col = tile_col_start;
1439 col += 8, yoff2 += 64 * bytesperpixel,
1440 uvoff2 += 64 * bytesperpixel >>
s->ss_h, lflvl_ptr++) {
1443 memset(lflvl_ptr->
mask, 0,
sizeof(lflvl_ptr->
mask));
1450 tile_cols_len = tile_col_end - tile_col_start;
1451 if (row + 8 <
s->rows) {
1452 memcpy(
s->intra_pred_data[0] + (tile_col_start * 8 * bytesperpixel),
1453 f->data[0] + yoff + 63 * ls_y,
1454 8 * tile_cols_len * bytesperpixel);
1455 memcpy(
s->intra_pred_data[1] + (tile_col_start * 8 * bytesperpixel >>
s->ss_h),
1456 f->data[1] + uvoff + ((64 >>
s->ss_v) - 1) * ls_uv,
1457 8 * tile_cols_len * bytesperpixel >>
s->ss_h);
1458 memcpy(
s->intra_pred_data[2] + (tile_col_start * 8 * bytesperpixel >>
s->ss_h),
1459 f->data[2] + uvoff + ((64 >>
s->ss_v) - 1) * ls_uv,
1460 8 * tile_cols_len * bytesperpixel >>
s->ss_h);
1463 vp9_report_tile_progress(
s, row >> 3, 1);
1473 ptrdiff_t uvoff, yoff, ls_y, ls_uv;
1475 int bytesperpixel =
s->bytesperpixel, col,
i;
1479 ls_y =
f->linesize[0];
1480 ls_uv =
f->linesize[1];
1482 for (
i = 0;
i <
s->sb_rows;
i++) {
1483 vp9_await_tile_progress(
s,
i,
s->s.h.tiling.tile_cols);
1485 if (
s->s.h.filter.level) {
1486 yoff = (ls_y * 64)*
i;
1487 uvoff = (ls_uv * 64 >>
s->ss_v)*
i;
1488 lflvl_ptr =
s->lflvl+
s->sb_cols*
i;
1489 for (col = 0; col <
s->cols;
1490 col += 8, yoff += 64 * bytesperpixel,
1491 uvoff += 64 * bytesperpixel >>
s->ss_h, lflvl_ptr++) {
1504 unsigned int tile, nb_blocks = 0;
1506 if (
s->s.h.segmentation.enabled) {
1508 nb_blocks +=
s->td[
tile].nb_block_structure;
1516 par->
qp =
s->s.h.yac_qi;
1517 par->
delta_qp[0][0] =
s->s.h.ydc_qdelta;
1518 par->
delta_qp[1][0] =
s->s.h.uvdc_qdelta;
1519 par->
delta_qp[2][0] =
s->s.h.uvdc_qdelta;
1520 par->
delta_qp[1][1] =
s->s.h.uvac_qdelta;
1521 par->
delta_qp[2][1] =
s->s.h.uvac_qdelta;
1524 unsigned int block = 0;
1525 unsigned int tile, block_tile;
1534 uint8_t seg_id =
frame->segmentation_map[row * 8 *
s->sb_cols + col];
1541 if (
s->s.h.segmentation.feat[seg_id].q_enabled) {
1542 b->delta_qp =
s->s.h.segmentation.feat[seg_id].q_val;
1543 if (
s->s.h.segmentation.absolute_vals)
1544 b->delta_qp -= par->
qp;
1561 (!
s->s.h.segmentation.enabled || !
s->s.h.segmentation.update_map);
1567 }
else if (
ret == 0) {
1568 if (!
s->s.refs[
ref].f) {
1572 for (
int i = 0;
i < 8;
i++)
1587 src = !
s->s.h.keyframe && !
s->s.h.intraonly && !
s->s.h.errorres ?
1589 if (!retain_segmap_ref ||
s->s.h.keyframe ||
s->s.h.intraonly)
1596 if (
s->s.h.keyframe)
1600 if (
s->s.h.lossless)
1614 for (
i = 0;
i < 8;
i++) {
1616 s->s.h.refreshrefmask & (1 <<
i) ?
1635 memset(
s->above_partition_ctx, 0,
s->cols);
1636 memset(
s->above_skip_ctx, 0,
s->cols);
1637 if (
s->s.h.keyframe ||
s->s.h.intraonly) {
1638 memset(
s->above_mode_ctx,
DC_PRED,
s->cols * 2);
1642 memset(
s->above_y_nnz_ctx, 0,
s->sb_cols * 16);
1643 memset(
s->above_uv_nnz_ctx[0], 0,
s->sb_cols * 16 >>
s->ss_h);
1644 memset(
s->above_uv_nnz_ctx[1], 0,
s->sb_cols * 16 >>
s->ss_h);
1645 memset(
s->above_segpred_ctx, 0,
s->cols);
1650 "Failed to allocate block buffers\n");
1653 if (
s->s.h.refreshctx &&
s->s.h.parallelmode) {
1656 for (
i = 0;
i < 4;
i++) {
1657 for (j = 0; j < 2; j++)
1658 for (k = 0; k < 2; k++)
1659 for (l = 0; l < 6; l++)
1660 for (m = 0; m < 6; m++)
1661 memcpy(
s->prob_ctx[
s->s.h.framectxid].coef[
i][j][k][l][m],
1662 s->prob.coef[
i][j][k][l][m], 3);
1663 if (
s->s.h.txfmmode ==
i)
1666 s->prob_ctx[
s->s.h.framectxid].p =
s->prob.p;
1668 }
else if (!
s->s.h.refreshctx) {
1674 for (
i = 0;
i <
s->sb_rows;
i++)
1680 for (
i = 0;
i <
s->active_tile_cols;
i++) {
1681 s->td[
i].b =
s->td[
i].b_base;
1682 s->td[
i].block =
s->td[
i].block_base;
1683 s->td[
i].uvblock[0] =
s->td[
i].uvblock_base[0];
1684 s->td[
i].uvblock[1] =
s->td[
i].uvblock_base[1];
1685 s->td[
i].eob =
s->td[
i].eob_base;
1686 s->td[
i].uveob[0] =
s->td[
i].uveob_base[0];
1687 s->td[
i].uveob[1] =
s->td[
i].uveob_base[1];
1688 s->td[
i].error_info = 0;
1693 int tile_row, tile_col;
1697 for (tile_row = 0; tile_row <
s->s.h.tiling.tile_rows; tile_row++) {
1698 for (tile_col = 0; tile_col <
s->s.h.tiling.tile_cols; tile_col++) {
1701 if (tile_col ==
s->s.h.tiling.tile_cols - 1 &&
1702 tile_row ==
s->s.h.tiling.tile_rows - 1) {
1709 if (tile_size >
size)
1732 for (
i = 1;
i <
s->s.h.tiling.tile_cols;
i++)
1733 for (j = 0; j <
sizeof(
s->td[
i].counts) /
sizeof(
unsigned); j++)
1734 ((
unsigned *)&
s->td[0].counts)[j] += ((
unsigned *)&
s->td[
i].counts)[j];
1736 if (
s->pass < 2 &&
s->s.h.refreshctx && !
s->s.h.parallelmode) {
1740 }
while (
s->pass++ == 1);
1742 if (
s->td->error_info < 0) {
1744 s->td->error_info = 0;
1757 for (
int i = 0;
i < 8;
i++)
1760 if (!
s->s.h.invisible) {
1777 for (
i = 0;
i < 3;
i++)
1779 for (
i = 0;
i < 8;
i++)
1792 s->s.h.filter.sharpness = -1;
1810 for (
int i = 0;
i < 3;
i++)
1812 for (
int i = 0;
i < 8;
i++)
1815 s->frame_extradata_pool_size = ssrc->frame_extradata_pool_size;
1817 s->s.h.invisible = ssrc->s.h.invisible;
1818 s->s.h.keyframe = ssrc->s.h.keyframe;
1819 s->s.h.intraonly = ssrc->s.h.intraonly;
1820 s->ss_v = ssrc->ss_v;
1821 s->ss_h = ssrc->ss_h;
1822 s->s.h.segmentation.enabled = ssrc->s.h.segmentation.enabled;
1823 s->s.h.segmentation.update_map = ssrc->s.h.segmentation.update_map;
1824 s->s.h.segmentation.absolute_vals = ssrc->s.h.segmentation.absolute_vals;
1825 s->bytesperpixel = ssrc->bytesperpixel;
1826 s->gf_fmt = ssrc->gf_fmt;
1829 s->s.h.bpp = ssrc->s.h.bpp;
1830 s->bpp_index = ssrc->bpp_index;
1831 s->pix_fmt = ssrc->pix_fmt;
1832 memcpy(&
s->prob_ctx, &ssrc->prob_ctx,
sizeof(
s->prob_ctx));
1833 memcpy(&
s->s.h.lf_delta, &ssrc->s.h.lf_delta,
sizeof(
s->s.h.lf_delta));
1834 memcpy(&
s->s.h.segmentation.feat, &ssrc->s.h.segmentation.feat,
1835 sizeof(
s->s.h.segmentation.feat));
1857 .bsfs =
"vp9_superframe_split",
1859 #if CONFIG_VP9_DXVA2_HWACCEL
1862 #if CONFIG_VP9_D3D11VA_HWACCEL
1865 #if CONFIG_VP9_D3D11VA2_HWACCEL
1868 #if CONFIG_VP9_D3D12VA_HWACCEL
1871 #if CONFIG_VP9_NVDEC_HWACCEL
1874 #if CONFIG_VP9_VAAPI_HWACCEL
1877 #if CONFIG_VP9_VDPAU_HWACCEL
1880 #if CONFIG_VP9_VIDEOTOOLBOX_HWACCEL