41 .queue_flags = VK_QUEUE_COMPUTE_BIT,
114 GLSLC(0,
layout(push_constant, scalar) uniform pushConstants { );
117 GLSLC(1, u8buf slice_data; );
118 GLSLC(1, u8buf slice_state; );
119 GLSLC(1, u8buf scratch_data; );
121 GLSLC(1, uvec2 img_size; );
122 GLSLC(1, uvec2 chroma_shift; );
124 GLSLC(1, uint plane_state_size; );
125 GLSLC(1, uint32_t crcref; );
127 GLSLC(1, uint8_t bits_per_raw_sample; );
128 GLSLC(1, uint8_t quant_table_count; );
130 GLSLC(1, uint8_t micro_version; );
131 GLSLC(1, uint8_t key_frame; );
133 GLSLC(1, uint8_t codec_planes; );
134 GLSLC(1, uint8_t color_planes; );
135 GLSLC(1, uint8_t transparency; );
136 GLSLC(1, uint8_t colorspace; );
137 GLSLC(1, uint8_t ec; );
138 GLSLC(1, uint8_t golomb; );
139 GLSLC(1, uint8_t check_crc; );
140 GLSLC(1, uint8_t padding[3]; );
143 VK_SHADER_STAGE_COMPUTE_BIT);
168 for (
int i = 0;
i <
f->quant_table_count;
i++)
190 VK_BUFFER_USAGE_STORAGE_BUFFER_BIT |
191 VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT);
200 VK_BUFFER_USAGE_STORAGE_BUFFER_BIT |
201 VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT,
203 VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT);
216 VK_BUFFER_USAGE_STORAGE_BUFFER_BIT |
217 VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT,
219 VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT);
226 VK_BUFFER_USAGE_STORAGE_BUFFER_BIT |
227 VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT,
228 NULL, 2*
f->max_slice_count*
sizeof(uint32_t),
229 VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT |
230 VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT);
237 VK_BUFFER_USAGE_STORAGE_BUFFER_BIT |
238 VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT,
239 NULL,
f->max_slice_count*
sizeof(uint32_t),
240 VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT |
241 VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT);
318 int bits =
f->avctx->bits_per_raw_sample > 0 ?
f->avctx->bits_per_raw_sample : 8;
338 VkImageView *decode_dst_view = is_rgb ? rct_image_views : vp->
view.
out;
340 VkImageMemoryBarrier2 img_bar[37];
342 VkBufferMemoryBarrier2 buf_bar[8];
350 VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT,
351 VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT));
360 VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT,
361 VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT));
372 VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT));
386 buf_bar[nb_buf_bar++] = (VkBufferMemoryBarrier2) {
387 .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER_2,
388 .srcStageMask = slice_state->stage,
389 .dstStageMask = VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT,
390 .srcAccessMask = slice_state->access,
391 .dstAccessMask = VK_ACCESS_2_SHADER_STORAGE_WRITE_BIT,
392 .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
393 .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
394 .buffer = slice_state->buf,
396 .size = VK_WHOLE_SIZE,
400 vk->CmdPipelineBarrier2(exec->buf, &(VkDependencyInfo) {
401 .sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO,
402 .pBufferMemoryBarriers = buf_bar,
403 .bufferMemoryBarrierCount = nb_buf_bar,
406 slice_state->stage = buf_bar[1].dstStageMask;
407 slice_state->access = buf_bar[1].dstAccessMask;
416 VK_FORMAT_UNDEFINED);
420 0, 2*
f->slice_count*
sizeof(uint32_t),
421 VK_FORMAT_UNDEFINED);
425 0,
f->slice_count*
sizeof(uint32_t),
426 VK_FORMAT_UNDEFINED);
434 .scratch_data = tmp_data->
address,
436 .img_size[0] =
f->picture.f->width,
437 .img_size[1] =
f->picture.f->height,
438 .chroma_shift[0] =
f->chroma_h_shift,
439 .chroma_shift[1] =
f->chroma_v_shift,
444 .bits_per_raw_sample =
bits,
445 .quant_table_count =
f->quant_table_count,
446 .version =
f->version,
447 .micro_version =
f->micro_version,
450 .codec_planes =
f->plane_count,
451 .color_planes = color_planes,
452 .transparency =
f->transparency,
453 .colorspace =
f->colorspace,
462 VK_SHADER_STAGE_COMPUTE_BIT,
465 vk->CmdDispatch(exec->buf,
f->num_h_slices,
f->num_v_slices, 1);
473 VK_FORMAT_UNDEFINED);
481 .codec_planes =
f->plane_count,
483 .version =
f->version,
484 .micro_version =
f->micro_version,
487 VK_SHADER_STAGE_COMPUTE_BIT,
488 0,
sizeof(pd_reset), &pd_reset);
491 buf_bar[nb_buf_bar++] = (VkBufferMemoryBarrier2) {
492 .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER_2,
493 .srcStageMask = slice_state->stage,
494 .dstStageMask = VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT,
495 .srcAccessMask = slice_state->access,
496 .dstAccessMask = VK_ACCESS_2_SHADER_STORAGE_READ_BIT |
497 VK_ACCESS_2_SHADER_STORAGE_WRITE_BIT,
498 .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
499 .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
500 .buffer = slice_state->buf,
504 vk->CmdPipelineBarrier2(exec->buf, &(VkDependencyInfo) {
505 .sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO,
506 .pBufferMemoryBarriers = buf_bar,
507 .bufferMemoryBarrierCount = nb_buf_bar,
509 slice_state->stage = buf_bar[0].dstStageMask;
510 slice_state->access = buf_bar[0].dstAccessMask;
513 vk->CmdDispatch(exec->buf,
f->num_h_slices,
f->num_v_slices,
522 VK_FORMAT_UNDEFINED);
524 decode_dst, decode_dst_view,
526 VK_IMAGE_LAYOUT_GENERAL,
531 VK_SHADER_STAGE_COMPUTE_BIT,
535 buf_bar[nb_buf_bar++] = (VkBufferMemoryBarrier2) {
536 .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER_2,
537 .srcStageMask = slice_state->stage,
538 .dstStageMask = VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT,
539 .srcAccessMask = slice_state->access,
540 .dstAccessMask = VK_ACCESS_2_SHADER_STORAGE_READ_BIT |
541 VK_ACCESS_2_SHADER_STORAGE_WRITE_BIT,
542 .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
543 .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
544 .buffer = slice_state->buf,
551 VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT,
552 VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT,
553 VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT,
554 VK_IMAGE_LAYOUT_GENERAL,
555 VK_QUEUE_FAMILY_IGNORED);
557 vk->CmdPipelineBarrier2(exec->buf, &(VkDependencyInfo) {
558 .sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO,
559 .pImageMemoryBarriers = img_bar,
560 .imageMemoryBarrierCount = nb_img_bar,
561 .pBufferMemoryBarriers = buf_bar,
562 .bufferMemoryBarrierCount = nb_buf_bar,
564 slice_state->stage = buf_bar[0].dstStageMask;
565 slice_state->access = buf_bar[0].dstAccessMask;
569 vk->CmdDispatch(exec->buf,
f->num_h_slices,
f->num_v_slices, 1);
580 VK_FORMAT_UNDEFINED);
582 decode_dst, decode_dst_view,
584 VK_IMAGE_LAYOUT_GENERAL,
589 VK_IMAGE_LAYOUT_GENERAL,
599 .color_planes = color_planes,
600 .transparency =
f->transparency,
606 memcpy(pd_rct.
fmt_lut, (
int [4]) { 2, 1, 0, 3 }, 4*
sizeof(
int));
608 memcpy(pd_rct.
fmt_lut, (
int [4]) { 0, 2, 1, 3 }, 4*
sizeof(
int));
613 VK_SHADER_STAGE_COMPUTE_BIT,
614 0,
sizeof(pd_rct), &pd_rct);
617 VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT,
618 VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT,
619 VK_ACCESS_SHADER_READ_BIT,
620 VK_IMAGE_LAYOUT_GENERAL,
621 VK_QUEUE_FAMILY_IGNORED);
623 VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT,
624 VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT,
625 VK_ACCESS_SHADER_WRITE_BIT,
626 VK_IMAGE_LAYOUT_GENERAL,
627 VK_QUEUE_FAMILY_IGNORED);
629 vk->CmdPipelineBarrier2(exec->buf, &(VkDependencyInfo) {
630 .sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO,
631 .pImageMemoryBarriers = img_bar,
632 .imageMemoryBarrierCount = nb_img_bar,
636 vk->CmdDispatch(exec->buf,
f->num_h_slices,
f->num_v_slices, 1);
652 int smp_bits = use32bit ? 32 : 16;
659 GLSLF(0, #define
TYPE int%i_t ,smp_bits);
660 GLSLF(0, #define VTYPE2
i%ivec2 ,smp_bits);
661 GLSLF(0, #define VTYPE3
i%ivec3 ,smp_bits);
675 void *spv_opaque =
NULL;
678 VK_SHADER_STAGE_COMPUTE_BIT,
679 (
const char *[]) {
"GL_EXT_buffer_reference",
680 "GL_EXT_buffer_reference2" }, 2,
695 .
name =
"rangecoder_static_buf",
696 .type = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER,
697 .stages = VK_SHADER_STAGE_COMPUTE_BIT,
698 .mem_layout =
"scalar",
699 .buf_content =
"uint8_t zero_one_state[512];",
702 .name =
"crc_ieee_buf",
703 .type = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER,
704 .stages = VK_SHADER_STAGE_COMPUTE_BIT,
705 .mem_layout =
"scalar",
706 .buf_content =
"uint32_t crc_ieee[256];",
710 .type = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER,
711 .stages = VK_SHADER_STAGE_COMPUTE_BIT,
712 .mem_layout =
"scalar",
713 .buf_content =
"int16_t quant_table[MAX_QUANT_TABLES]"
714 "[MAX_CONTEXT_INPUTS][MAX_QUANT_TABLE_SIZE];",
724 .
name =
"slice_data_buf",
725 .type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
726 .stages = VK_SHADER_STAGE_COMPUTE_BIT,
727 .buf_content =
"SliceContext slice_ctx",
728 .buf_elems =
f->max_slice_count,
731 .name =
"slice_offsets_buf",
732 .type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
733 .stages = VK_SHADER_STAGE_COMPUTE_BIT,
734 .mem_quali =
"readonly",
735 .buf_content =
"uint32_t slice_offsets",
736 .buf_elems = 2*
f->max_slice_count,
739 .name =
"slice_status_buf",
740 .type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
741 .stages = VK_SHADER_STAGE_COMPUTE_BIT,
742 .mem_quali =
"writeonly",
743 .buf_content =
"uint32_t slice_crc_mismatch",
744 .buf_elems =
f->max_slice_count,
773 void *spv_opaque =
NULL;
774 int wg_dim =
FFMIN(
s->props.properties.limits.maxComputeWorkGroupSize[0], 1024);
777 VK_SHADER_STAGE_COMPUTE_BIT,
778 (
const char *[]) {
"GL_EXT_buffer_reference",
779 "GL_EXT_buffer_reference2" }, 2,
789 GLSLC(0,
layout(push_constant, scalar) uniform pushConstants { );
790 GLSLC(1, u8buf slice_state; );
791 GLSLC(1, uint plane_state_size; );
792 GLSLC(1, uint context_count; );
793 GLSLC(1, uint8_t codec_planes; );
794 GLSLC(1, uint8_t key_frame; );
796 GLSLC(1, uint8_t micro_version; );
797 GLSLC(1, uint8_t padding[1]; );
800 VK_SHADER_STAGE_COMPUTE_BIT);
808 .
name =
"rangecoder_static_buf",
809 .type = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER,
810 .stages = VK_SHADER_STAGE_COMPUTE_BIT,
811 .mem_layout =
"scalar",
812 .buf_content =
"uint8_t zero_one_state[512];",
816 .type = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER,
817 .stages = VK_SHADER_STAGE_COMPUTE_BIT,
818 .mem_layout =
"scalar",
819 .buf_content =
"int16_t quant_table[MAX_QUANT_TABLES]"
820 "[MAX_CONTEXT_INPUTS][MAX_QUANT_TABLE_SIZE];",
831 .
name =
"slice_data_buf",
832 .type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
833 .mem_quali =
"readonly",
834 .stages = VK_SHADER_STAGE_COMPUTE_BIT,
835 .buf_content =
"SliceContext slice_ctx",
836 .buf_elems =
f->max_slice_count,
859 int use32bit,
int ac,
int rgb)
866 void *spv_opaque =
NULL;
869 VK_SHADER_STAGE_COMPUTE_BIT,
870 (
const char *[]) {
"GL_EXT_buffer_reference",
871 "GL_EXT_buffer_reference2" }, 2,
892 .
name =
"rangecoder_static_buf",
893 .type = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER,
894 .stages = VK_SHADER_STAGE_COMPUTE_BIT,
895 .mem_layout =
"scalar",
896 .buf_content =
"uint8_t zero_one_state[512];",
900 .type = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER,
901 .stages = VK_SHADER_STAGE_COMPUTE_BIT,
902 .mem_layout =
"scalar",
903 .buf_content =
"int16_t quant_table[MAX_QUANT_TABLES]"
904 "[MAX_CONTEXT_INPUTS][MAX_QUANT_TABLE_SIZE];",
916 .
name =
"slice_data_buf",
917 .type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
918 .stages = VK_SHADER_STAGE_COMPUTE_BIT,
919 .buf_content =
"SliceContext slice_ctx",
920 .buf_elems =
f->max_slice_count,
924 .type = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
929 .stages = VK_SHADER_STAGE_COMPUTE_BIT,
959 void *spv_opaque =
NULL;
960 int wg_count = sqrt(
s->props.properties.limits.maxComputeWorkGroupInvocations);
963 VK_SHADER_STAGE_COMPUTE_BIT,
964 (
const char *[]) {
"GL_EXT_buffer_reference",
965 "GL_EXT_buffer_reference2" }, 2,
966 wg_count, wg_count, 1,
972 GLSLC(0,
layout(push_constant, scalar) uniform pushConstants { );
973 GLSLC(1, ivec4 fmt_lut; );
976 GLSLC(1, uint8_t planar_rgb; );
977 GLSLC(1, uint8_t color_planes; );
978 GLSLC(1, uint8_t transparency; );
980 GLSLC(1, uint8_t micro_version; );
981 GLSLC(1, uint8_t padding[2]; );
984 VK_SHADER_STAGE_COMPUTE_BIT);
992 .
name =
"rangecoder_static_buf",
993 .type = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER,
994 .stages = VK_SHADER_STAGE_COMPUTE_BIT,
995 .mem_layout =
"scalar",
996 .buf_content =
"uint8_t zero_one_state[512];",
1000 .type = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER,
1001 .stages = VK_SHADER_STAGE_COMPUTE_BIT,
1002 .mem_layout =
"scalar",
1003 .buf_content =
"int16_t quant_table[MAX_QUANT_TABLES]"
1004 "[MAX_CONTEXT_INPUTS][MAX_QUANT_TABLE_SIZE];",
1013 .
name =
"slice_data_buf",
1014 .type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
1015 .mem_quali =
"readonly",
1016 .stages = VK_SHADER_STAGE_COMPUTE_BIT,
1017 .buf_content =
"SliceContext slice_ctx",
1018 .buf_elems =
f->max_slice_count,
1022 .type = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
1026 .mem_quali =
"readonly",
1028 .stages = VK_SHADER_STAGE_COMPUTE_BIT,
1032 .type = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
1036 .mem_quali =
"writeonly",
1038 .stages = VK_SHADER_STAGE_COMPUTE_BIT,
1075 vk_frames = frames_ctx->
hwctx;
1076 vk_frames->
tiling = VK_IMAGE_TILING_OPTIMAL;
1077 vk_frames->
usage = VK_IMAGE_USAGE_STORAGE_BIT;
1078 vk_frames->
img_flags = VK_IMAGE_CREATE_MUTABLE_FORMAT_BIT;
1097 for (
int i = 0;
i < 2;
i++)
1100 for (
int i = 0;
i < 2;
i++)
1103 for (
int i = 0;
i < 2;
i++)
1104 for (
int j = 0; j < 2; j++)
1105 for (
int k = 0; k < 2; k++)
1108 for (
int i = 0;
i < 2;
i++)
1130 if (
f->version < 3 ||
1131 (
f->version == 4 &&
f->micro_version > 3))
1134 spv = ff_vk_spirv_init();
1145 switch (
ctx->s.driver_props.driverID) {
1146 case VK_DRIVER_ID_INTEL_PROPRIETARY_WINDOWS:
1147 case VK_DRIVER_ID_INTEL_OPEN_SOURCE_MESA:
1150 "Intel's drivers are unsupported, use -strict -1 to enable acceleration.\n");
1154 "Enabling acceleration on Intel's drivers.\n");
1168 for (
int i = 0;
i < 2;
i++) {
1181 for (
int i = 0;
i < 2;
i++) {
1189 for (
int i = 0;
i < 2;
i++) {
1190 for (
int j = 0; j < 2; j++) {
1191 for (
int k = 0; k < 2; k++) {
1208 for (
int i = 0;
i < 2;
i++) {
1210 spv, &fv->
rct[
i],
i,
1240 &fv->
setup, 0, 0, 0,
1243 VK_FORMAT_UNDEFINED));
1245 &fv->
setup, 0, 1, 0,
1248 VK_FORMAT_UNDEFINED));
1251 for (
int i = 0;
i < 2;
i++) {
1252 for (
int j = 0; j < 2; j++) {
1253 for (
int k = 0; k < 2; k++) {
1255 &fv->
decode[
i][j][k], 0, 0, 0,
1258 VK_FORMAT_UNDEFINED));
1260 &fv->
decode[
i][j][k], 0, 1, 0,
1263 VK_FORMAT_UNDEFINED));
1300 .
p.
name =
"ffv1_vulkan",