24 #include "config_components.h"
50 #define VP9_SYNCCODE 0x498342
104 f->segmentation_map =
NULL;
116 sz = 64 *
s->sb_cols *
s->sb_rows;
117 if (sz !=
s->frame_extradata_pool_size) {
121 if (!
s->frame_extradata_pool) {
122 s->frame_extradata_pool_size = 0;
126 s->frame_extradata_pool_size = sz;
134 f->segmentation_map =
f->extradata;
151 dst->frame_header =
src->frame_header;
157 dst->segmentation_map =
src->segmentation_map;
159 dst->uses_2pass =
src->uses_2pass;
162 src->hwaccel_picture_private);
167 #define HWACCEL_MAX (CONFIG_VP9_DXVA2_HWACCEL + \
168 CONFIG_VP9_D3D11VA_HWACCEL * 2 + \
169 CONFIG_VP9_D3D12VA_HWACCEL + \
170 CONFIG_VP9_NVDEC_HWACCEL + \
171 CONFIG_VP9_VAAPI_HWACCEL + \
172 CONFIG_VP9_VDPAU_HWACCEL + \
173 CONFIG_VP9_VIDEOTOOLBOX_HWACCEL + \
174 CONFIG_VP9_VULKAN_HWACCEL)
178 int bytesperpixel =
s->bytesperpixel,
ret, cols, rows;
184 if (!(
s->pix_fmt ==
s->gf_fmt &&
w ==
s->w &&
h ==
s->h)) {
189 switch (
s->pix_fmt) {
192 #if CONFIG_VP9_DXVA2_HWACCEL
195 #if CONFIG_VP9_D3D11VA_HWACCEL
199 #if CONFIG_VP9_D3D12VA_HWACCEL
202 #if CONFIG_VP9_NVDEC_HWACCEL
205 #if CONFIG_VP9_VAAPI_HWACCEL
208 #if CONFIG_VP9_VDPAU_HWACCEL
211 #if CONFIG_VP9_VIDEOTOOLBOX_HWACCEL
214 #if CONFIG_VP9_VULKAN_HWACCEL
219 #if CONFIG_VP9_NVDEC_HWACCEL
222 #if CONFIG_VP9_VAAPI_HWACCEL
225 #if CONFIG_VP9_VDPAU_HWACCEL
228 #if CONFIG_VP9_VULKAN_HWACCEL
235 #if CONFIG_VP9_VAAPI_HWACCEL
238 #if CONFIG_VP9_VULKAN_HWACCEL
245 #if CONFIG_VP9_VAAPI_HWACCEL
248 #if CONFIG_VP9_VULKAN_HWACCEL
254 *fmtp++ =
s->pix_fmt;
264 s->gf_fmt =
s->pix_fmt;
272 if (
s->intra_pred_data[0] && cols ==
s->cols && rows ==
s->rows &&
s->pix_fmt ==
s->last_fmt)
275 s->last_fmt =
s->pix_fmt;
276 s->sb_cols = (
w + 63) >> 6;
277 s->sb_rows = (
h + 63) >> 6;
278 s->cols = (
w + 7) >> 3;
279 s->rows = (
h + 7) >> 3;
282 #define assign(var, type, n) var = (type) p; p += s->sb_cols * (n) * sizeof(*var)
286 p =
av_malloc(
s->sb_cols * (128 + 192 * bytesperpixel +
287 lflvl_len *
sizeof(*
s->lflvl) + 16 *
sizeof(*
s->above_mv_ctx)));
290 assign(
s->intra_pred_data[0], uint8_t *, 64 * bytesperpixel);
291 assign(
s->intra_pred_data[1], uint8_t *, 64 * bytesperpixel);
292 assign(
s->intra_pred_data[2], uint8_t *, 64 * bytesperpixel);
293 assign(
s->above_y_nnz_ctx, uint8_t *, 16);
294 assign(
s->above_mode_ctx, uint8_t *, 16);
296 assign(
s->above_uv_nnz_ctx[0], uint8_t *, 16);
297 assign(
s->above_uv_nnz_ctx[1], uint8_t *, 16);
298 assign(
s->above_partition_ctx, uint8_t *, 8);
299 assign(
s->above_skip_ctx, uint8_t *, 8);
300 assign(
s->above_txfm_ctx, uint8_t *, 8);
301 assign(
s->above_segpred_ctx, uint8_t *, 8);
302 assign(
s->above_intra_ctx, uint8_t *, 8);
303 assign(
s->above_comp_ctx, uint8_t *, 8);
304 assign(
s->above_ref_ctx, uint8_t *, 8);
305 assign(
s->above_filter_ctx, uint8_t *, 8);
310 for (
i = 0;
i <
s->active_tile_cols;
i++)
314 if (
s->s.h.bpp !=
s->last_bpp) {
317 s->last_bpp =
s->s.h.bpp;
328 int chroma_blocks, chroma_eobs, bytesperpixel =
s->bytesperpixel;
335 chroma_blocks = 64 * 64 >> (
s->ss_h +
s->ss_v);
336 chroma_eobs = 16 * 16 >> (
s->ss_h +
s->ss_v);
338 int sbs =
s->sb_cols *
s->sb_rows;
342 16 * 16 + 2 * chroma_eobs) * sbs);
357 for (
i = 1;
i <
s->active_tile_cols;
i++)
360 for (
i = 0;
i <
s->active_tile_cols;
i++) {
362 s->td[
i].block_base =
av_mallocz((64 * 64 + 2 * chroma_blocks) * bytesperpixel *
sizeof(int16_t) +
363 16 * 16 + 2 * chroma_eobs);
364 if (!
s->td[
i].b_base || !
s->td[
i].block_base)
366 s->td[
i].uvblock_base[0] =
s->td[
i].block_base + 64 * 64 * bytesperpixel;
367 s->td[
i].uvblock_base[1] =
s->td[
i].uvblock_base[0] + chroma_blocks * bytesperpixel;
368 s->td[
i].eob_base = (uint8_t *) (
s->td[
i].uvblock_base[1] + chroma_blocks * bytesperpixel);
369 s->td[
i].uveob_base[0] =
s->td[
i].eob_base + 16 * 16;
370 s->td[
i].uveob_base[1] =
s->td[
i].uveob_base[0] + chroma_eobs;
374 if (!
s->td[
i].block_structure)
379 s->block_alloc_using_2pass =
s->s.frames[
CUR_FRAME].uses_2pass;
396 return m - ((v + 1) >> 1);
403 static const uint8_t inv_map_table[255] = {
404 7, 20, 33, 46, 59, 72, 85, 98, 111, 124, 137, 150, 163, 176,
405 189, 202, 215, 228, 241, 254, 1, 2, 3, 4, 5, 6, 8, 9,
406 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 21, 22, 23, 24,
407 25, 26, 27, 28, 29, 30, 31, 32, 34, 35, 36, 37, 38, 39,
408 40, 41, 42, 43, 44, 45, 47, 48, 49, 50, 51, 52, 53, 54,
409 55, 56, 57, 58, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69,
410 70, 71, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84,
411 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 99, 100,
412 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 112, 113, 114, 115,
413 116, 117, 118, 119, 120, 121, 122, 123, 125, 126, 127, 128, 129, 130,
414 131, 132, 133, 134, 135, 136, 138, 139, 140, 141, 142, 143, 144, 145,
415 146, 147, 148, 149, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160,
416 161, 162, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175,
417 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 190, 191,
418 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 203, 204, 205, 206,
419 207, 208, 209, 210, 211, 212, 213, 214, 216, 217, 218, 219, 220, 221,
420 222, 223, 224, 225, 226, 227, 229, 230, 231, 232, 233, 234, 235, 236,
421 237, 238, 239, 240, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251,
469 s->s.h.bpp = 8 +
bits * 2;
470 s->bytesperpixel = (7 +
s->s.h.bpp) >> 3;
476 s->ss_h =
s->ss_v = 0;
490 static const enum AVPixelFormat pix_fmt_for_ss[3][2 ][2 ] = {
502 s->pix_fmt = pix_fmt_for_ss[
bits][
s->ss_v][
s->ss_h];
513 s->ss_h =
s->ss_v = 1;
514 s->pix_fmt = pix_fmt_for_ss[
bits][1][1];
525 int c,
i, j, k, l, m, n,
w,
h,
max, size2,
ret, sharp;
527 const uint8_t *data2;
552 s->last_keyframe =
s->s.h.keyframe;
555 last_invisible =
s->s.h.invisible;
558 s->s.h.use_last_frame_mvs = !
s->s.h.errorres && !last_invisible;
560 if (
s->s.h.keyframe) {
568 s->s.h.refreshrefmask = 0xff;
574 s->s.h.intraonly =
s->s.h.invisible ?
get_bits1(&
s->gb) : 0;
575 s->s.h.resetctx =
s->s.h.errorres ? 0 :
get_bits(&
s->gb, 2);
576 if (
s->s.h.intraonly) {
585 s->ss_h =
s->ss_v = 1;
588 s->bytesperpixel = 1;
601 s->s.h.signbias[0] =
get_bits1(&
s->gb) && !
s->s.h.errorres;
603 s->s.h.signbias[1] =
get_bits1(&
s->gb) && !
s->s.h.errorres;
605 s->s.h.signbias[2] =
get_bits1(&
s->gb) && !
s->s.h.errorres;
606 if (!
s->s.refs[
s->s.h.refidx[0]].f ||
607 !
s->s.refs[
s->s.h.refidx[1]].f ||
608 !
s->s.refs[
s->s.h.refidx[2]].f) {
613 w =
s->s.refs[
s->s.h.refidx[0]].f->width;
614 h =
s->s.refs[
s->s.h.refidx[0]].f->height;
616 w =
s->s.refs[
s->s.h.refidx[1]].f->width;
617 h =
s->s.refs[
s->s.h.refidx[1]].f->height;
619 w =
s->s.refs[
s->s.h.refidx[2]].f->width;
620 h =
s->s.refs[
s->s.h.refidx[2]].f->height;
628 s->s.h.use_last_frame_mvs &=
s->s.frames[
CUR_FRAME].tf.f &&
636 s->s.h.allowcompinter =
s->s.h.signbias[0] !=
s->s.h.signbias[1] ||
637 s->s.h.signbias[0] !=
s->s.h.signbias[2];
638 if (
s->s.h.allowcompinter) {
639 if (
s->s.h.signbias[0] ==
s->s.h.signbias[1]) {
640 s->s.h.fixcompref = 2;
641 s->s.h.varcompref[0] = 0;
642 s->s.h.varcompref[1] = 1;
643 }
else if (
s->s.h.signbias[0] ==
s->s.h.signbias[2]) {
644 s->s.h.fixcompref = 1;
645 s->s.h.varcompref[0] = 0;
646 s->s.h.varcompref[1] = 2;
648 s->s.h.fixcompref = 0;
649 s->s.h.varcompref[0] = 1;
650 s->s.h.varcompref[1] = 2;
655 s->s.h.refreshctx =
s->s.h.errorres ? 0 :
get_bits1(&
s->gb);
656 s->s.h.parallelmode =
s->s.h.errorres ? 1 :
get_bits1(&
s->gb);
658 if (
s->s.h.keyframe ||
s->s.h.intraonly)
659 s->s.h.framectxid = 0;
662 if (
s->s.h.keyframe ||
s->s.h.errorres ||
s->s.h.intraonly) {
664 s->s.h.lf_delta.ref[0] = 1;
665 s->s.h.lf_delta.ref[1] = 0;
666 s->s.h.lf_delta.ref[2] = -1;
667 s->s.h.lf_delta.ref[3] = -1;
668 s->s.h.lf_delta.mode[0] = 0;
669 s->s.h.lf_delta.mode[1] = 0;
670 memset(
s->s.h.segmentation.feat, 0,
sizeof(
s->s.h.segmentation.feat));
676 if (
s->s.h.filter.sharpness != sharp) {
677 for (
i = 1;
i <= 63;
i++) {
681 limit >>= (sharp + 3) >> 2;
686 s->filter_lut.lim_lut[
i] =
limit;
687 s->filter_lut.mblim_lut[
i] = 2 * (
i + 2) +
limit;
690 s->s.h.filter.sharpness = sharp;
691 if ((
s->s.h.lf_delta.enabled =
get_bits1(&
s->gb))) {
692 if ((
s->s.h.lf_delta.updated =
get_bits1(&
s->gb))) {
693 for (
i = 0;
i < 4;
i++)
696 for (
i = 0;
i < 2;
i++)
707 s->s.h.lossless =
s->s.h.yac_qi == 0 &&
s->s.h.ydc_qdelta == 0 &&
708 s->s.h.uvdc_qdelta == 0 &&
s->s.h.uvac_qdelta == 0;
711 if ((
s->s.h.segmentation.enabled =
get_bits1(&
s->gb))) {
712 if ((
s->s.h.segmentation.update_map =
get_bits1(&
s->gb))) {
713 for (
i = 0;
i < 7;
i++)
716 if ((
s->s.h.segmentation.temporal =
get_bits1(&
s->gb)))
717 for (
i = 0;
i < 3;
i++)
723 s->s.h.segmentation.absolute_vals =
get_bits1(&
s->gb);
724 for (
i = 0;
i < 8;
i++) {
725 if ((
s->s.h.segmentation.feat[
i].q_enabled =
get_bits1(&
s->gb)))
727 if ((
s->s.h.segmentation.feat[
i].lf_enabled =
get_bits1(&
s->gb)))
729 if ((
s->s.h.segmentation.feat[
i].ref_enabled =
get_bits1(&
s->gb)))
730 s->s.h.segmentation.feat[
i].ref_val =
get_bits(&
s->gb, 2);
731 s->s.h.segmentation.feat[
i].skip_enabled =
get_bits1(&
s->gb);
738 s->s.h.segmentation.temporal = 0;
739 s->s.h.segmentation.update_map = 0;
743 for (
i = 0;
i < (
s->s.h.segmentation.enabled ? 8 : 1);
i++) {
744 int qyac, qydc, quvac, quvdc, lflvl, sh;
746 if (
s->s.h.segmentation.enabled &&
s->s.h.segmentation.feat[
i].q_enabled) {
747 if (
s->s.h.segmentation.absolute_vals)
752 qyac =
s->s.h.yac_qi;
764 sh =
s->s.h.filter.level >= 32;
765 if (
s->s.h.segmentation.enabled &&
s->s.h.segmentation.feat[
i].lf_enabled) {
766 if (
s->s.h.segmentation.absolute_vals)
769 lflvl =
av_clip_uintp2(
s->s.h.filter.level +
s->s.h.segmentation.feat[
i].lf_val, 6);
771 lflvl =
s->s.h.filter.level;
773 if (
s->s.h.lf_delta.enabled) {
774 s->s.h.segmentation.feat[
i].lflvl[0][0] =
775 s->s.h.segmentation.feat[
i].lflvl[0][1] =
777 for (j = 1; j < 4; j++) {
778 s->s.h.segmentation.feat[
i].lflvl[j][0] =
780 s->s.h.lf_delta.mode[0]) * (1 << sh)), 6);
781 s->s.h.segmentation.feat[
i].lflvl[j][1] =
783 s->s.h.lf_delta.mode[1]) * (1 << sh)), 6);
786 memset(
s->s.h.segmentation.feat[
i].lflvl, lflvl,
787 sizeof(
s->s.h.segmentation.feat[
i].lflvl));
797 for (
s->s.h.tiling.log2_tile_cols = 0;
798 s->sb_cols > (64 <<
s->s.h.tiling.log2_tile_cols);
799 s->s.h.tiling.log2_tile_cols++) ;
800 for (
max = 0; (
s->sb_cols >>
max) >= 4;
max++) ;
802 while (
max >
s->s.h.tiling.log2_tile_cols) {
804 s->s.h.tiling.log2_tile_cols++;
809 s->s.h.tiling.tile_rows = 1 <<
s->s.h.tiling.log2_tile_rows;
810 if (
s->s.h.tiling.tile_cols != (1 <<
s->s.h.tiling.log2_tile_cols) || changed) {
815 for (
i = 0;
i <
s->active_tile_cols;
i++)
820 s->s.h.tiling.tile_cols = 1 <<
s->s.h.tiling.log2_tile_cols;
822 s->s.h.tiling.tile_cols : 1;
827 n_range_coders =
s->s.h.tiling.tile_cols;
834 for (
i = 0;
i <
s->active_tile_cols;
i++) {
837 rc += n_range_coders;
842 if (!
s->s.h.keyframe && !
s->s.h.intraonly) {
843 int valid_ref_frame = 0;
844 for (
i = 0;
i < 3;
i++) {
846 int refw =
ref->width, refh =
ref->height;
850 "Ref pixfmt (%s) did not match current frame (%s)",
854 }
else if (refw ==
w && refh ==
h) {
855 s->mvscale[
i][0] =
s->mvscale[
i][1] = 0;
859 if (
w * 2 < refw ||
h * 2 < refh ||
w > 16 * refw ||
h > 16 * refh) {
861 "Invalid ref frame dimensions %dx%d for frame size %dx%d\n",
866 s->mvscale[
i][0] = (refw << 14) /
w;
867 s->mvscale[
i][1] = (refh << 14) /
h;
868 s->mvstep[
i][0] = 16 *
s->mvscale[
i][0] >> 14;
869 s->mvstep[
i][1] = 16 *
s->mvscale[
i][1] >> 14;
873 if (!valid_ref_frame) {
874 av_log(avctx,
AV_LOG_ERROR,
"No valid reference frame is found, bitstream not supported\n");
879 if (
s->s.h.keyframe ||
s->s.h.errorres || (
s->s.h.intraonly &&
s->s.h.resetctx == 3)) {
880 s->prob_ctx[0].p =
s->prob_ctx[1].p =
s->prob_ctx[2].p =
890 }
else if (
s->s.h.intraonly &&
s->s.h.resetctx == 2) {
897 s->s.h.compressed_header_size = size2 =
get_bits(&
s->gb, 16);
901 if (size2 >
size - (data2 -
data)) {
914 for (
i = 0;
i <
s->active_tile_cols;
i++) {
915 if (
s->s.h.keyframe ||
s->s.h.intraonly) {
916 memset(
s->td[
i].counts.coef, 0,
sizeof(
s->td[0].counts.coef));
917 memset(
s->td[
i].counts.eob, 0,
sizeof(
s->td[0].counts.eob));
919 memset(&
s->td[
i].counts, 0,
sizeof(
s->td[0].counts));
921 s->td[
i].nb_block_structure = 0;
927 s->prob.p =
s->prob_ctx[
c].p;
930 if (
s->s.h.lossless) {
934 if (
s->s.h.txfmmode == 3)
938 for (
i = 0;
i < 2;
i++)
941 for (
i = 0;
i < 2;
i++)
942 for (j = 0; j < 2; j++)
944 s->prob.p.tx16p[
i][j] =
946 for (
i = 0;
i < 2;
i++)
947 for (j = 0; j < 3; j++)
949 s->prob.p.tx32p[
i][j] =
955 for (
i = 0;
i < 4;
i++) {
956 uint8_t (*
ref)[2][6][6][3] =
s->prob_ctx[
c].coef[
i];
958 for (j = 0; j < 2; j++)
959 for (k = 0; k < 2; k++)
960 for (l = 0; l < 6; l++)
961 for (m = 0; m < 6; m++) {
962 uint8_t *
p =
s->prob.coef[
i][j][k][l][m];
963 uint8_t *
r =
ref[j][k][l][m];
964 if (m >= 3 && l == 0)
966 for (n = 0; n < 3; n++) {
975 for (j = 0; j < 2; j++)
976 for (k = 0; k < 2; k++)
977 for (l = 0; l < 6; l++)
978 for (m = 0; m < 6; m++) {
979 uint8_t *
p =
s->prob.coef[
i][j][k][l][m];
980 uint8_t *
r =
ref[j][k][l][m];
987 if (
s->s.h.txfmmode ==
i)
992 for (
i = 0;
i < 3;
i++)
995 if (!
s->s.h.keyframe && !
s->s.h.intraonly) {
996 for (
i = 0;
i < 7;
i++)
997 for (j = 0; j < 3; j++)
999 s->prob.p.mv_mode[
i][j] =
1003 for (
i = 0;
i < 4;
i++)
1004 for (j = 0; j < 2; j++)
1006 s->prob.p.filter[
i][j] =
1009 for (
i = 0;
i < 4;
i++)
1013 if (
s->s.h.allowcompinter) {
1015 if (
s->s.h.comppredmode)
1018 for (
i = 0;
i < 5;
i++)
1027 for (
i = 0;
i < 5;
i++) {
1029 s->prob.p.single_ref[
i][0] =
1032 s->prob.p.single_ref[
i][1] =
1038 for (
i = 0;
i < 5;
i++)
1040 s->prob.p.comp_ref[
i] =
1044 for (
i = 0;
i < 4;
i++)
1045 for (j = 0; j < 9; j++)
1047 s->prob.p.y_mode[
i][j] =
1050 for (
i = 0;
i < 4;
i++)
1051 for (j = 0; j < 4; j++)
1052 for (k = 0; k < 3; k++)
1054 s->prob.p.partition[3 -
i][j][k] =
1056 s->prob.p.partition[3 -
i][j][k]);
1059 for (
i = 0;
i < 3;
i++)
1063 for (
i = 0;
i < 2;
i++) {
1065 s->prob.p.mv_comp[
i].sign =
1068 for (j = 0; j < 10; j++)
1070 s->prob.p.mv_comp[
i].classes[j] =
1074 s->prob.p.mv_comp[
i].class0 =
1077 for (j = 0; j < 10; j++)
1079 s->prob.p.mv_comp[
i].bits[j] =
1083 for (
i = 0;
i < 2;
i++) {
1084 for (j = 0; j < 2; j++)
1085 for (k = 0; k < 3; k++)
1087 s->prob.p.mv_comp[
i].class0_fp[j][k] =
1090 for (j = 0; j < 3; j++)
1092 s->prob.p.mv_comp[
i].fp[j] =
1096 if (
s->s.h.highprecisionmvs) {
1097 for (
i = 0;
i < 2;
i++) {
1099 s->prob.p.mv_comp[
i].class0_hp =
1103 s->prob.p.mv_comp[
i].hp =
1109 return (data2 -
data) + size2;
1113 ptrdiff_t yoff, ptrdiff_t uvoff,
enum BlockLevel bl)
1116 int c = ((
s->above_partition_ctx[col] >> (3 - bl)) & 1) |
1119 s->prob.p.partition[bl][
c];
1121 ptrdiff_t hbs = 4 >> bl;
1123 ptrdiff_t y_stride =
f->linesize[0], uv_stride =
f->linesize[1];
1124 int bytesperpixel =
s->bytesperpixel;
1129 }
else if (col + hbs < s->cols) {
1130 if (row + hbs < s->rows) {
1138 yoff += hbs * 8 * y_stride;
1139 uvoff += hbs * 8 * uv_stride >>
s->ss_v;
1144 yoff += hbs * 8 * bytesperpixel;
1145 uvoff += hbs * 8 * bytesperpixel >>
s->ss_h;
1149 decode_sb(td, row, col, lflvl, yoff, uvoff, bl + 1);
1151 yoff + 8 * hbs * bytesperpixel,
1152 uvoff + (8 * hbs * bytesperpixel >>
s->ss_h), bl + 1);
1153 yoff += hbs * 8 * y_stride;
1154 uvoff += hbs * 8 * uv_stride >>
s->ss_v;
1155 decode_sb(td, row + hbs, col, lflvl, yoff, uvoff, bl + 1);
1156 decode_sb(td, row + hbs, col + hbs, lflvl,
1157 yoff + 8 * hbs * bytesperpixel,
1158 uvoff + (8 * hbs * bytesperpixel >>
s->ss_h), bl + 1);
1162 "the four PARTITION_* terminal codes");
1166 decode_sb(td, row, col, lflvl, yoff, uvoff, bl + 1);
1168 yoff + 8 * hbs * bytesperpixel,
1169 uvoff + (8 * hbs * bytesperpixel >>
s->ss_h), bl + 1);
1174 }
else if (row + hbs < s->rows) {
1177 decode_sb(td, row, col, lflvl, yoff, uvoff, bl + 1);
1178 yoff += hbs * 8 * y_stride;
1179 uvoff += hbs * 8 * uv_stride >>
s->ss_v;
1180 decode_sb(td, row + hbs, col, lflvl, yoff, uvoff, bl + 1);
1187 decode_sb(td, row, col, lflvl, yoff, uvoff, bl + 1);
1193 ptrdiff_t yoff, ptrdiff_t uvoff,
enum BlockLevel bl)
1197 ptrdiff_t hbs = 4 >> bl;
1199 ptrdiff_t y_stride =
f->linesize[0], uv_stride =
f->linesize[1];
1200 int bytesperpixel =
s->bytesperpixel;
1205 }
else if (td->
b->
bl == bl) {
1208 yoff += hbs * 8 * y_stride;
1209 uvoff += hbs * 8 * uv_stride >>
s->ss_v;
1211 }
else if (
b->bp ==
PARTITION_V && col + hbs < s->cols) {
1212 yoff += hbs * 8 * bytesperpixel;
1213 uvoff += hbs * 8 * bytesperpixel >>
s->ss_h;
1218 if (col + hbs < s->cols) {
1219 if (row + hbs < s->rows) {
1220 decode_sb_mem(td, row, col + hbs, lflvl, yoff + 8 * hbs * bytesperpixel,
1221 uvoff + (8 * hbs * bytesperpixel >>
s->ss_h), bl + 1);
1222 yoff += hbs * 8 * y_stride;
1223 uvoff += hbs * 8 * uv_stride >>
s->ss_v;
1224 decode_sb_mem(td, row + hbs, col, lflvl, yoff, uvoff, bl + 1);
1226 yoff + 8 * hbs * bytesperpixel,
1227 uvoff + (8 * hbs * bytesperpixel >>
s->ss_h), bl + 1);
1229 yoff += hbs * 8 * bytesperpixel;
1230 uvoff += hbs * 8 * bytesperpixel >>
s->ss_h;
1231 decode_sb_mem(td, row, col + hbs, lflvl, yoff, uvoff, bl + 1);
1233 }
else if (row + hbs < s->rows) {
1234 yoff += hbs * 8 * y_stride;
1235 uvoff += hbs * 8 * uv_stride >>
s->ss_v;
1236 decode_sb_mem(td, row + hbs, col, lflvl, yoff, uvoff, bl + 1);
1243 int sb_start = ( idx * n) >> log2_n;
1244 int sb_end = ((idx + 1) * n) >> log2_n;
1245 *start =
FFMIN(sb_start, n) << 3;
1246 *end =
FFMIN(sb_end, n) << 3;
1254 for (
i = 0;
i <
s->active_tile_cols;
i++)
1263 for (
int i = 0;
i < 3;
i++)
1266 for (
i = 0;
i < 8;
i++) {
1279 ff_cbs_fragment_free(&
s->current_frag);
1280 ff_cbs_close(&
s->cbc);
1291 int row, col, tile_row, tile_col,
ret;
1293 int tile_row_start, tile_row_end, tile_col_start, tile_col_end;
1295 ptrdiff_t yoff, uvoff, ls_y, ls_uv;
1298 ls_y =
f->linesize[0];
1299 ls_uv =
f->linesize[1];
1300 bytesperpixel =
s->bytesperpixel;
1303 for (tile_row = 0; tile_row <
s->s.h.tiling.tile_rows; tile_row++) {
1305 tile_row,
s->s.h.tiling.log2_tile_rows,
s->sb_rows);
1307 for (tile_col = 0; tile_col <
s->s.h.tiling.tile_cols; tile_col++) {
1310 if (tile_col ==
s->s.h.tiling.tile_cols - 1 &&
1311 tile_row ==
s->s.h.tiling.tile_rows - 1) {
1318 if (tile_size >
size)
1329 for (row = tile_row_start; row < tile_row_end;
1330 row += 8, yoff += ls_y * 64, uvoff += ls_uv * 64 >>
s->ss_v) {
1332 ptrdiff_t yoff2 = yoff, uvoff2 = uvoff;
1334 for (tile_col = 0; tile_col <
s->s.h.tiling.tile_cols; tile_col++) {
1336 tile_col,
s->s.h.tiling.log2_tile_cols,
s->sb_cols);
1341 if (
s->s.h.keyframe ||
s->s.h.intraonly) {
1350 td->
c = &td->
c_b[tile_col];
1353 for (col = tile_col_start;
1355 col += 8, yoff2 += 64 * bytesperpixel,
1356 uvoff2 += 64 * bytesperpixel >>
s->ss_h, lflvl_ptr++) {
1360 memset(lflvl_ptr->
mask, 0,
sizeof(lflvl_ptr->
mask));
1381 if (row + 8 <
s->rows) {
1382 memcpy(
s->intra_pred_data[0],
1383 f->data[0] + yoff + 63 * ls_y,
1384 8 *
s->cols * bytesperpixel);
1385 memcpy(
s->intra_pred_data[1],
1386 f->data[1] + uvoff + ((64 >>
s->ss_v) - 1) * ls_uv,
1387 8 *
s->cols * bytesperpixel >>
s->ss_h);
1388 memcpy(
s->intra_pred_data[2],
1389 f->data[2] + uvoff + ((64 >>
s->ss_v) - 1) * ls_uv,
1390 8 *
s->cols * bytesperpixel >>
s->ss_h);
1394 if (
s->s.h.filter.level) {
1397 lflvl_ptr =
s->lflvl;
1398 for (col = 0; col <
s->cols;
1399 col += 8, yoff2 += 64 * bytesperpixel,
1400 uvoff2 += 64 * bytesperpixel >>
s->ss_h, lflvl_ptr++) {
1417 int decode_tiles_mt(
AVCodecContext *avctx,
void *tdata,
int jobnr,
1422 ptrdiff_t uvoff, yoff, ls_y, ls_uv;
1423 int bytesperpixel =
s->bytesperpixel, row, col, tile_row;
1424 unsigned tile_cols_len;
1425 int tile_row_start, tile_row_end, tile_col_start, tile_col_end;
1430 ls_y =
f->linesize[0];
1431 ls_uv =
f->linesize[1];
1434 jobnr,
s->s.h.tiling.log2_tile_cols,
s->sb_cols);
1436 uvoff = (64 * bytesperpixel >>
s->ss_h)*(tile_col_start >> 3);
1437 yoff = (64 * bytesperpixel)*(tile_col_start >> 3);
1438 lflvl_ptr_base =
s->lflvl+(tile_col_start >> 3);
1440 for (tile_row = 0; tile_row <
s->s.h.tiling.tile_rows; tile_row++) {
1442 tile_row,
s->s.h.tiling.log2_tile_rows,
s->sb_rows);
1444 td->
c = &td->
c_b[tile_row];
1445 for (row = tile_row_start; row < tile_row_end;
1446 row += 8, yoff += ls_y * 64, uvoff += ls_uv * 64 >>
s->ss_v) {
1447 ptrdiff_t yoff2 = yoff, uvoff2 = uvoff;
1448 VP9Filter *lflvl_ptr = lflvl_ptr_base+
s->sb_cols*(row >> 3);
1452 if (
s->s.h.keyframe ||
s->s.h.intraonly) {
1461 for (col = tile_col_start;
1463 col += 8, yoff2 += 64 * bytesperpixel,
1464 uvoff2 += 64 * bytesperpixel >>
s->ss_h, lflvl_ptr++) {
1467 memset(lflvl_ptr->
mask, 0,
sizeof(lflvl_ptr->
mask));
1474 tile_cols_len = tile_col_end - tile_col_start;
1475 if (row + 8 <
s->rows) {
1476 memcpy(
s->intra_pred_data[0] + (tile_col_start * 8 * bytesperpixel),
1477 f->data[0] + yoff + 63 * ls_y,
1478 8 * tile_cols_len * bytesperpixel);
1479 memcpy(
s->intra_pred_data[1] + (tile_col_start * 8 * bytesperpixel >>
s->ss_h),
1480 f->data[1] + uvoff + ((64 >>
s->ss_v) - 1) * ls_uv,
1481 8 * tile_cols_len * bytesperpixel >>
s->ss_h);
1482 memcpy(
s->intra_pred_data[2] + (tile_col_start * 8 * bytesperpixel >>
s->ss_h),
1483 f->data[2] + uvoff + ((64 >>
s->ss_v) - 1) * ls_uv,
1484 8 * tile_cols_len * bytesperpixel >>
s->ss_h);
1487 vp9_report_tile_progress(
s, row >> 3, 1);
1497 ptrdiff_t uvoff, yoff, ls_y, ls_uv;
1499 int bytesperpixel =
s->bytesperpixel, col,
i;
1503 ls_y =
f->linesize[0];
1504 ls_uv =
f->linesize[1];
1506 for (
i = 0;
i <
s->sb_rows;
i++) {
1507 vp9_await_tile_progress(
s,
i,
s->s.h.tiling.tile_cols);
1509 if (
s->s.h.filter.level) {
1510 yoff = (ls_y * 64)*
i;
1511 uvoff = (ls_uv * 64 >>
s->ss_v)*
i;
1512 lflvl_ptr =
s->lflvl+
s->sb_cols*
i;
1513 for (col = 0; col <
s->cols;
1514 col += 8, yoff += 64 * bytesperpixel,
1515 uvoff += 64 * bytesperpixel >>
s->ss_h, lflvl_ptr++) {
1528 unsigned int tile, nb_blocks = 0;
1530 if (
s->s.h.segmentation.enabled) {
1532 nb_blocks +=
s->td[
tile].nb_block_structure;
1540 par->
qp =
s->s.h.yac_qi;
1541 par->
delta_qp[0][0] =
s->s.h.ydc_qdelta;
1542 par->
delta_qp[1][0] =
s->s.h.uvdc_qdelta;
1543 par->
delta_qp[2][0] =
s->s.h.uvdc_qdelta;
1544 par->
delta_qp[1][1] =
s->s.h.uvac_qdelta;
1545 par->
delta_qp[2][1] =
s->s.h.uvac_qdelta;
1548 unsigned int block = 0;
1549 unsigned int tile, block_tile;
1558 uint8_t seg_id =
frame->segmentation_map[row * 8 *
s->sb_cols + col];
1565 if (
s->s.h.segmentation.feat[seg_id].q_enabled) {
1566 b->delta_qp =
s->s.h.segmentation.feat[seg_id].q_val;
1567 if (
s->s.h.segmentation.absolute_vals)
1568 b->delta_qp -= par->
qp;
1588 (!
s->s.h.segmentation.enabled || !
s->s.h.segmentation.update_map);
1592 ret = ff_cbs_read_packet(
s->cbc, &
s->current_frag,
pkt);
1594 ff_cbs_fragment_reset(&
s->current_frag);
1599 unit = &
s->current_frag.units[0];
1603 s->frame_header = &rf->
header;
1606 ff_cbs_fragment_reset(&
s->current_frag);
1608 }
else if (
ret == 0) {
1609 if (!
s->s.refs[
ref].f) {
1611 ff_cbs_fragment_reset(&
s->current_frag);
1614 for (
int i = 0;
i < 8;
i++)
1618 ff_cbs_fragment_reset(&
s->current_frag);
1630 src = !
s->s.h.keyframe && !
s->s.h.intraonly && !
s->s.h.errorres ?
1632 if (!retain_segmap_ref ||
s->s.h.keyframe ||
s->s.h.intraonly)
1637 ff_cbs_fragment_reset(&
s->current_frag);
1642 s->s.frames[
CUR_FRAME].frame_header =
s->frame_header;
1645 if (
s->s.h.keyframe)
1649 if (
s->s.h.lossless)
1663 for (
i = 0;
i < 8;
i++) {
1665 s->s.h.refreshrefmask & (1 <<
i) ?
1681 for (
i = 0;
i < 8;
i++) {
1683 s->s.h.refreshrefmask & (1 <<
i) ?
1691 memset(
s->above_partition_ctx, 0,
s->cols);
1692 memset(
s->above_skip_ctx, 0,
s->cols);
1693 if (
s->s.h.keyframe ||
s->s.h.intraonly) {
1694 memset(
s->above_mode_ctx,
DC_PRED,
s->cols * 2);
1698 memset(
s->above_y_nnz_ctx, 0,
s->sb_cols * 16);
1699 memset(
s->above_uv_nnz_ctx[0], 0,
s->sb_cols * 16 >>
s->ss_h);
1700 memset(
s->above_uv_nnz_ctx[1], 0,
s->sb_cols * 16 >>
s->ss_h);
1701 memset(
s->above_segpred_ctx, 0,
s->cols);
1706 "Failed to allocate block buffers\n");
1709 if (
s->s.h.refreshctx &&
s->s.h.parallelmode) {
1712 for (
i = 0;
i < 4;
i++) {
1713 for (j = 0; j < 2; j++)
1714 for (k = 0; k < 2; k++)
1715 for (l = 0; l < 6; l++)
1716 for (m = 0; m < 6; m++)
1717 memcpy(
s->prob_ctx[
s->s.h.framectxid].coef[
i][j][k][l][m],
1718 s->prob.coef[
i][j][k][l][m], 3);
1719 if (
s->s.h.txfmmode ==
i)
1722 s->prob_ctx[
s->s.h.framectxid].p =
s->prob.p;
1724 }
else if (!
s->s.h.refreshctx) {
1730 for (
i = 0;
i <
s->sb_rows;
i++)
1736 for (
i = 0;
i <
s->active_tile_cols;
i++) {
1737 s->td[
i].b =
s->td[
i].b_base;
1738 s->td[
i].block =
s->td[
i].block_base;
1739 s->td[
i].uvblock[0] =
s->td[
i].uvblock_base[0];
1740 s->td[
i].uvblock[1] =
s->td[
i].uvblock_base[1];
1741 s->td[
i].eob =
s->td[
i].eob_base;
1742 s->td[
i].uveob[0] =
s->td[
i].uveob_base[0];
1743 s->td[
i].uveob[1] =
s->td[
i].uveob_base[1];
1744 s->td[
i].error_info = 0;
1749 int tile_row, tile_col;
1753 for (tile_row = 0; tile_row <
s->s.h.tiling.tile_rows; tile_row++) {
1754 for (tile_col = 0; tile_col <
s->s.h.tiling.tile_cols; tile_col++) {
1757 if (tile_col ==
s->s.h.tiling.tile_cols - 1 &&
1758 tile_row ==
s->s.h.tiling.tile_rows - 1) {
1765 if (tile_size >
size)
1788 for (
i = 1;
i <
s->s.h.tiling.tile_cols;
i++)
1789 for (j = 0; j <
sizeof(
s->td[
i].counts) /
sizeof(
unsigned); j++)
1790 ((
unsigned *)&
s->td[0].counts)[j] += ((
unsigned *)&
s->td[
i].counts)[j];
1792 if (
s->pass < 2 &&
s->s.h.refreshctx && !
s->s.h.parallelmode) {
1796 }
while (
s->pass++ == 1);
1798 if (
s->td->error_info < 0) {
1800 s->td->error_info = 0;
1811 ff_cbs_fragment_reset(&
s->current_frag);
1815 for (
int i = 0;
i < 8;
i++)
1818 if (!
s->s.h.invisible) {
1826 ff_cbs_fragment_reset(&
s->current_frag);
1836 for (
i = 0;
i < 3;
i++)
1839 for (
i = 0;
i < 8;
i++) {
1844 ff_cbs_fragment_reset(&
s->current_frag);
1845 ff_cbs_flush(
s->cbc);
1857 s->s.h.filter.sharpness = -1;
1879 for (
int i = 0;
i < 3;
i++)
1881 for (
int i = 0;
i < 8;
i++)
1884 s->frame_extradata_pool_size = ssrc->frame_extradata_pool_size;
1887 for (
int i = 0;
i < 8;
i++)
1890 s->frame_header = ssrc->frame_header;
1893 s->s.h.invisible = ssrc->s.h.invisible;
1894 s->s.h.keyframe = ssrc->s.h.keyframe;
1895 s->s.h.intraonly = ssrc->s.h.intraonly;
1896 s->ss_v = ssrc->ss_v;
1897 s->ss_h = ssrc->ss_h;
1898 s->s.h.segmentation.enabled = ssrc->s.h.segmentation.enabled;
1899 s->s.h.segmentation.update_map = ssrc->s.h.segmentation.update_map;
1900 s->s.h.segmentation.absolute_vals = ssrc->s.h.segmentation.absolute_vals;
1901 s->bytesperpixel = ssrc->bytesperpixel;
1902 s->gf_fmt = ssrc->gf_fmt;
1905 s->s.h.bpp = ssrc->s.h.bpp;
1906 s->bpp_index = ssrc->bpp_index;
1907 s->pix_fmt = ssrc->pix_fmt;
1908 memcpy(&
s->prob_ctx, &ssrc->prob_ctx,
sizeof(
s->prob_ctx));
1909 memcpy(&
s->s.h.lf_delta, &ssrc->s.h.lf_delta,
sizeof(
s->s.h.lf_delta));
1910 memcpy(&
s->s.h.segmentation.feat, &ssrc->s.h.segmentation.feat,
1911 sizeof(
s->s.h.segmentation.feat));
1933 .bsfs =
"vp9_superframe_split",
1935 #if CONFIG_VP9_DXVA2_HWACCEL
1938 #if CONFIG_VP9_D3D11VA_HWACCEL
1941 #if CONFIG_VP9_D3D11VA2_HWACCEL
1944 #if CONFIG_VP9_D3D12VA_HWACCEL
1947 #if CONFIG_VP9_NVDEC_HWACCEL
1950 #if CONFIG_VP9_VAAPI_HWACCEL
1953 #if CONFIG_VP9_VDPAU_HWACCEL
1956 #if CONFIG_VP9_VIDEOTOOLBOX_HWACCEL
1959 #if CONFIG_VP9_VULKAN_HWACCEL