24 #include "../ops_internal.h"
25 #include "../swscale_internal.h"
29 #if HAVE_SPIRV_HEADERS_SPIRV_H || HAVE_SPIRV_UNIFIED1_SPIRV_H
37 #if CONFIG_LIBSHADERC || CONFIG_LIBGLSLANG
39 s->spvc->uninit(&
s->spvc);
57 if (
s->vkctx.device_ref &&
s->vkctx.device_ref->data != dev_ref->
data) {
60 }
else if (
s->vkctx.device_ref &&
s->vkctx.device_ref->data == dev_ref->
data) {
74 #if CONFIG_LIBSHADERC || CONFIG_LIBGLSLANG
76 s->spvc = ff_vk_spirv_init();
85 #define MAX_DITHER_BUFS 4
108 VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT,
109 VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT);
111 VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT,
112 VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT);
120 0, 0, VK_IMAGE_LAYOUT_GENERAL, VK_NULL_HANDLE);
122 0, 1, VK_IMAGE_LAYOUT_GENERAL, VK_NULL_HANDLE);
125 VkImageMemoryBarrier2 img_bar[8];
127 VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT,
128 VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT,
129 VK_ACCESS_SHADER_READ_BIT,
130 VK_IMAGE_LAYOUT_GENERAL,
131 VK_QUEUE_FAMILY_IGNORED);
133 VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT,
134 VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT,
135 VK_ACCESS_SHADER_WRITE_BIT,
136 VK_IMAGE_LAYOUT_GENERAL,
137 VK_QUEUE_FAMILY_IGNORED);
138 vk->CmdPipelineBarrier2(ec->buf, &(VkDependencyInfo) {
139 .sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO,
140 .pImageMemoryBarriers = img_bar,
141 .imageMemoryBarrierCount = nb_img_bar,
146 vk->CmdDispatch(ec->buf,
160 for (
int i = 0;
i <
p->nb_dither_buf;
i++)
169 p->nb_dither_buf = 0;
170 for (
int n = 0; n < ops->
num_ops; n++) {
176 int size = (1 <<
op->dither.size_log2);
177 int idx =
p->nb_dither_buf;
180 VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT,
181 VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT |
182 VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT);
189 (uint8_t **)&dither_data, 0);
193 for (
int i = 0;
i <
size;
i++) {
194 for (
int j = 0; j <
size; j++) {
206 for (
int i = 0;
i <
p->nb_dither_buf;
i++)
211 #if HAVE_SPIRV_HEADERS_SPIRV_H || HAVE_SPIRV_UNIFIED1_SPIRV_H
222 typedef struct SPIRVIDs {
255 int linear_deco_off[16];
256 int linear_deco_ops[16];
260 int dither_ptr_elem_id;
264 int out_img_array_id;
280 SPICtx *spi, SPIRVIDs *
id)
299 id->in_vars, 3 +
id->nb_dither_bufs);
305 SpvBuiltInGlobalInvocationId);
317 for (
int i = 0;
i <
id->nb_dither_bufs;
i++) {
321 id->dither[
i].size*
sizeof(
float));
329 for (
int n = 0; n < ops->
num_ops; n++) {
336 for (
int j = 0; j < 4; j++) {
337 nb_ops += !!
op->lin.m[j][0].num;
338 nb_ops +=
op->lin.m[j][0].num &&
op->lin.m[j][4].num;
339 for (
int i = 1;
i < 4;
i++) {
340 nb_ops += !!
op->lin.m[j][
i].num;
341 nb_ops +=
op->lin.m[j][
i].num &&
342 (
op->lin.m[j][0].num ||
op->lin.m[j][4].num);
346 id->linear_deco_off[
id->nb_linear_ops] =
spi_reserve(spi, nb_ops*4*3);
347 id->linear_deco_ops[
id->nb_linear_ops] = nb_ops;
353 static void define_shader_consts(
SwsOpList *ops,
SPICtx *spi, SPIRVIDs *
id)
359 id->u32_type = spi_OpTypeInt(spi, 32, 0);
360 id->i32_type = spi_OpTypeInt(spi, 32, 1);
362 id->f32_type = spi_OpTypeFloat(spi, 32);
366 id->bvec2_type = spi_OpTypeVector(spi,
id->b_type, 2);
367 id->u32vec2_type = spi_OpTypeVector(spi, u32_type, 2);
368 id->i32vec2_type = spi_OpTypeVector(spi,
id->i32_type, 2);
370 id->u32vec3_type = spi_OpTypeVector(spi, u32_type, 3);
372 id->u32vec4_type = spi_OpTypeVector(spi, u32_type, 4);
373 id->f32vec4_type = spi_OpTypeVector(spi, f32_type, 4);
379 for (
int i = 0;
i < 5;
i++)
383 id->nb_const_ids = 0;
384 for (
int n = 0; n < ops->
num_ops; n++) {
396 id->const_ids[
id->nb_const_ids++] =
tmp;
400 for (
int i = 0;
i < 4;
i++) {
404 id->const_ids[
id->nb_const_ids++] =
406 }
else if (
op->clear.value[
i].den) {
408 id->const_ids[
id->nb_const_ids++] =
418 id->const_ids[
id->nb_const_ids++] =
tmp;
424 float q =
op->scale.factor.num/(
float)
op->scale.factor.den;
434 id->const_ids[
id->nb_const_ids++] =
tmp;
439 for (
int i = 0;
i < 4;
i++) {
442 if (!
op->clamp.limit[
i].den) {
451 id->const_ids[
id->nb_const_ids++] =
tmp;
455 for (
int i = 0;
i < 4;
i++) {
456 if (
op->dither.y_offset[
i] < 0)
459 id->const_ids[
id->nb_const_ids++] =
tmp;
463 for (
int i = 0;
i < 4;
i++) {
465 if (
op->lin.m[
i][0].num) {
467 id->const_ids[
id->nb_const_ids++] =
470 if (
op->lin.m[
i][4].num) {
472 id->const_ids[
id->nb_const_ids++] =
475 for (
int j = 1; j < 4; j++) {
476 if (!
op->lin.m[
i][j].num)
479 id->const_ids[
id->nb_const_ids++] =
492 static void define_shader_bindings(
SwsOpList *ops,
SPICtx *spi, SPIRVIDs *
id,
493 int in_img_count,
int out_img_count)
498 struct DitherData *
dither =
id->dither;
499 for (
int i = 0;
i <
id->nb_dither_bufs;
i++) {
508 SpvStorageClassUniform, 0);
517 id->f32_type :
id->u32_type,
518 2, 0, 0, 0, 2, SpvImageFormatUnknown);
520 id->u32_cid[out_img_count]);
523 id->in_img_array_id = 0;
529 id->in_img_type = match ?
id->out_img_type :
532 id->f32_type :
id->u32_type,
533 2, 0, 0, 0, 2, SpvImageFormatUnknown);
535 id->u32_cid[in_img_count]);
542 id->out_img_array_id);
550 id->in_img_array_id);
557 SpvStorageClassInput, 0);
560 SpvStorageClassUniformConstant, 0);
563 SpvStorageClassUniformConstant, 0);
569 uint8_t spvbuf[1024*16];
570 SPICtx spi_context = { 0 }, *spi = &spi_context;
571 SPIRVIDs spid_data = { 0 }, *
id = &spid_data;
572 spi_init(spi, spvbuf,
sizeof(spvbuf));
579 (uint32_t []) { 32, 32, 1 }, 0);
594 .
type = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
595 .stages = VK_SHADER_STAGE_COMPUTE_BIT,
599 .type = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
600 .stages = VK_SHADER_STAGE_COMPUTE_BIT,
617 id->nb_dither_bufs = 0;
618 for (
int n = 0; n < ops->
num_ops; n++) {
623 id->
dither[
id->nb_dither_bufs].size = 1 <<
op->dither.size_log2;
624 id->dither[
id->nb_dither_bufs].arr_1d_id =
spi_get_id(spi);
625 id->dither[
id->nb_dither_bufs].arr_2d_id =
spi_get_id(spi);
626 id->dither[
id->nb_dither_bufs].struct_id =
spi_get_id(spi);
627 id->dither[
id->nb_dither_bufs].id =
spi_get_id(spi);
628 id->in_vars[3 +
id->nb_dither_bufs] =
id->dither[
id->nb_dither_bufs].id;
631 .
type = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER,
632 .stages = VK_SHADER_STAGE_COMPUTE_BIT,
635 if (
id->nb_dither_bufs)
637 id->nb_dither_bufs, 1, 0);
640 define_shader_header(shd, ops, spi,
id);
641 define_shader_consts(ops, spi,
id);
642 define_shader_bindings(ops, spi,
id, in_img_count, out_img_count);
649 int in_img[4] = { 0 };
650 for (
int i = 0;
i < in_img_count;
i++) {
653 id->in_vars[1],
id->u32_cid[
i]);
655 SpvMemoryAccessMaskNone, 0);
660 for (
int i = 0;
i < out_img_count;
i++) {
662 id->in_vars[2],
id->u32_cid[
i]);
664 SpvMemoryAccessMaskNone, 0);
669 SpvMemoryAccessMaskNone, 0);
673 int gi2 = spi_OpBitcast(spi,
id->i32vec2_type, gid);
676 int img1_s = spi_OpImageQuerySize(spi,
id->i32vec2_type, out_img[0]);
677 int scmp = spi_OpSGreaterThanEqual(spi,
id->bvec2_type, gi2, img1_s);
678 scmp = spi_OpAny(spi,
id->b_type, scmp);
693 id->f32_p,
id->f32_p,
694 id->f32_p,
id->f32_p);
697 id->u32_p,
id->u32_p,
698 id->u32_p,
id->u32_p);
701 int nb_const_ids = 0;
702 int nb_dither_bufs = 0;
703 int nb_linear_ops = 0;
706 for (
int n = 0; n < ops->
num_ops; n++) {
709 op->convert.to :
op->type;
711 id->f32vec4_type :
id->u32vec4_type;
713 id->f32_type :
id->u32_type;
715 id->f32_p :
id->u32_p;
719 if (
op->rw.frac ||
op->rw.filter) {
721 }
else if (
op->rw.packed) {
723 gid, SpvImageOperandsMaskNone);
726 for (
int i = 0;
i <
op->rw.elems;
i++) {
729 SpvImageOperandsMaskNone);
737 if (
op->rw.frac ||
op->rw.filter) {
739 }
else if (
op->rw.packed) {
741 SpvImageOperandsMaskNone);
743 for (
int i = 0;
i <
op->rw.elems;
i++) {
747 SpvImageOperandsMaskNone);
752 for (
int i = 0;
i < 4;
i++) {
753 if (!
op->clear.value[
i].den)
756 id->const_ids[nb_const_ids++],
769 data = spi_OpIMul(spi, type_v,
data,
id->const_ids[nb_const_ids++]);
771 data = spi_OpConvertFToU(spi, type_v,
data);
773 data = spi_OpConvertUToF(spi, type_v,
data);
776 data = spi_OpShiftLeftLogical(spi, type_v,
data,
777 id->const_ids[nb_const_ids++]);
780 data = spi_OpShiftRightLogical(spi, type_v,
data,
781 id->const_ids[nb_const_ids++]);
785 data = spi_OpFMul(spi, type_v,
data,
786 id->const_ids[nb_const_ids++]);
788 data = spi_OpIMul(spi, type_v,
data,
789 id->const_ids[nb_const_ids++]);
794 op->op ==
SWS_OP_MIN ? GLSLstd450FMin : GLSLstd450FMax :
796 for (
int i = 0;
i < 4;
i++) {
797 if (!
op->clamp.limit[
i].den)
801 tmp,
id->const_ids[nb_const_ids++]);
807 int did = nb_dither_bufs++;
810 x_id = spi_OpBitwiseAnd(spi,
id->u32_type, x_id,
811 id->dither[did].mask_id);
812 for (
int i = 0;
i < 4;
i++) {
813 if (
op->dither.y_offset[
i] < 0)
816 int y_id = spi_OpIAdd(spi,
id->u32_type, y_pos,
817 id->const_ids[nb_const_ids++]);
818 y_id = spi_OpBitwiseAnd(spi,
id->u32_type, y_id,
819 id->dither[did].mask_id);
822 id->dither[did].id,
id->u32_cid[0],
825 SpvMemoryAccessMaskNone, 0);
841 spi->
off =
id->linear_deco_off[nb_linear_ops];
842 for (
int i = 0;
i <
id->linear_deco_ops[nb_linear_ops];
i++)
847 for (
int j = 0; j < 4; j++) {
849 if (
op->lin.m[j][0].num)
850 res[j] = spi_OpFMul(spi, type_s,
tmp[0],
851 id->const_ids[nb_const_ids++]);
853 if (
op->lin.m[j][0].num &&
op->lin.m[j][4].num)
854 res[j] = spi_OpFAdd(spi, type_s,
855 id->const_ids[nb_const_ids++], res[j]);
856 else if (
op->lin.m[j][4].num)
857 res[j] =
id->const_ids[nb_const_ids++];
859 for (
int i = 1;
i < 4;
i++) {
860 if (!
op->lin.m[j][
i].num)
863 int v = spi_OpFMul(spi, type_s,
tmp[
i],
864 id->const_ids[nb_const_ids++]);
865 if (
op->lin.m[j][0].num ||
op->lin.m[j][4].num)
866 res[j] = spi_OpFAdd(spi, type_s, res[j], v);
872 res[0], res[1], res[2], res[3]);
893 #if CONFIG_LIBSHADERC || CONFIG_LIBGLSLANG
905 .type = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
906 .mem_layout = img_type,
910 .stages = VK_SHADER_STAGE_COMPUTE_BIT,
916 #define QSTR "(%i/%i%s)"
917 #define QTYPE(Q) (Q).num, (Q).den, cur_type == SWS_PIXEL_F32 ? ".0f" : ""
925 void *spv_opaque =
NULL;
932 VK_SHADER_STAGE_COMPUTE_BIT,
933 NULL, 0, 32, 32, 1, 0);
943 add_desc_read_write(&buf_desc[nb_desc++], &
p->src_rep,
read);
944 add_desc_read_write(&buf_desc[nb_desc++], &
p->dst_rep, write);
954 for (
int n = 0; n < ops->
num_ops; n++) {
958 int size = (1 <<
op->dither.size_log2);
960 snprintf(dither_buf_name[nb_desc], 64,
"dither_buf%i", n);
961 snprintf(dither_mat_name[nb_desc], 64,
"float dither_mat%i[%i][%i];",
964 .
name = dither_buf_name[nb_desc],
965 .type = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER,
966 .stages = VK_SHADER_STAGE_COMPUTE_BIT,
967 .mem_layout =
"scalar",
968 .buf_content = dither_mat_name[nb_desc],
978 GLSLC(1, ivec2
pos = ivec2(gl_GlobalInvocationID.xy); );
979 GLSLC(1, ivec2
size = imageSize(src_img[0]); );
983 GLSLC(1, u8vec4 u8; );
984 GLSLC(1, u16vec4 u16; );
985 GLSLC(1, u32vec4 u32; );
986 GLSLC(1, precise f32vec4 f32; );
990 for (
int n = 0; n < ops->
num_ops; n++) {
1005 if (
op->rw.frac ||
op->rw.filter) {
1007 }
else if (
op->rw.packed) {
1011 for (
int i = 0;
i <
op->rw.elems;
i++)
1018 if (
op->rw.frac ||
op->rw.filter) {
1020 }
else if (
op->rw.packed) {
1024 for (
int i = 0;
i <
op->rw.elems;
i++)
1032 for (
int i = 0;
i < 4;
i++)
1038 for (
int i = 0;
i < 4;
i++) {
1039 if (!
op->clear.value[
i].den)
1042 "xyzw"[
i], type_s, QTYPE(
op->clear.value[
i]));
1048 type_name, type_name, QTYPE(
op->scale.factor));
1052 for (
int i = 0;
i < 4;
i++) {
1053 if (!
op->clamp.limit[
i].den)
1056 type_name,
"xyzw"[
i],
1058 type_name,
"xyzw"[
i], QTYPE(
op->clamp.limit[
i]));
1078 int size = (1 <<
op->dither.size_log2);
1079 for (
int i = 0;
i < 4;
i++) {
1080 if (
op->dither.y_offset[
i] < 0)
1082 av_bprintf(&shd->
src,
" %s.%c += dither_mat%i[(pos.y + %i) & %i]"
1084 type_name,
"xyzw"[
i], n,
1085 op->dither.y_offset[
i],
size - 1,
1091 for (
int i = 0;
i < 4;
i++) {
1092 if (
op->lin.m[
i][4].num)
1094 QTYPE(
op->lin.m[
i][4]));
1097 for (
int j = 0; j < 4; j++) {
1098 if (!
op->lin.m[
i][j].num)
1101 "xyzw"[
i],
"xyzw"[j], QTYPE(
op->lin.m[
i][j]));
1113 err =
s->spvc->compile_shader(&
s->vkctx,
s->spvc, shd,
1114 &spv_data, &spv_len,
"main",
1122 s->spvc->free_shader(
s->spvc, &spv_opaque);
1153 VkFormatProperties2 prop = {
1154 .sType = VK_STRUCTURE_TYPE_FORMAT_PROPERTIES_2,
1157 vk->GetPhysicalDeviceFormatProperties2(
s->vkctx.hwctx->phys_dev,
1158 VK_FORMAT_B8G8R8A8_UNORM,
1160 if (!(prop.formatProperties.optimalTilingFeatures &
1161 VK_FORMAT_FEATURE_2_STORAGE_WRITE_WITHOUT_FORMAT_BIT)) {
1169 #if CONFIG_LIBSHADERC || CONFIG_LIBGLSLANG
1170 err = add_ops_glsl(
p,
s, ops, &
p->shd);
1174 #if HAVE_SPIRV_HEADERS_SPIRV_H || HAVE_SPIRV_UNIFIED1_SPIRV_H
1175 err = add_ops_spirv(
p,
s, ops, &
p->shd);
1185 for (
int i = 0;
i <
p->nb_dither_buf;
i++)
1187 1,
i, 0, &
p->dither_buf[
i],
1188 0, VK_WHOLE_SIZE, VK_FORMAT_UNDEFINED);
1204 #if HAVE_SPIRV_HEADERS_SPIRV_H || HAVE_SPIRV_UNIFIED1_SPIRV_H
1212 .compile = compile_spirv,
1217 #if CONFIG_LIBSHADERC || CONFIG_LIBGLSLANG
1225 .compile = compile_glsl,