FFmpeg
ops.c
Go to the documentation of this file.
1 /**
2  * Copyright (C) 2026 Lynne
3  *
4  * This file is part of FFmpeg.
5  *
6  * FFmpeg is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU Lesser General Public
8  * License as published by the Free Software Foundation; either
9  * version 2.1 of the License, or (at your option) any later version.
10  *
11  * FFmpeg is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14  * Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public
17  * License along with FFmpeg; if not, write to the Free Software
18  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19  */
20 
21 #include "libavutil/mem.h"
22 #include "libavutil/refstruct.h"
23 
24 #include "../ops_internal.h"
25 #include "../swscale_internal.h"
26 
27 #include "ops.h"
28 
29 #if HAVE_SPIRV_HEADERS_SPIRV_H || HAVE_SPIRV_UNIFIED1_SPIRV_H
30 #include "spvasm.h"
31 #endif
32 
33 static void ff_sws_vk_uninit(AVRefStructOpaque opaque, void *obj)
34 {
35  FFVulkanOpsCtx *s = obj;
36 
37 #if CONFIG_LIBSHADERC || CONFIG_LIBGLSLANG
38  if (s->spvc)
39  s->spvc->uninit(&s->spvc);
40 #endif
41  ff_vk_uninit(&s->vkctx);
42 }
43 
45 {
46  int err;
47  SwsInternal *c = sws_internal(sws);
48 
49  if (!c->hw_priv) {
50  c->hw_priv = av_refstruct_alloc_ext(sizeof(FFVulkanOpsCtx), 0, NULL,
52  if (!c->hw_priv)
53  return AVERROR(ENOMEM);
54  }
55 
56  FFVulkanOpsCtx *s = c->hw_priv;
57  if (s->vkctx.device_ref && s->vkctx.device_ref->data != dev_ref->data) {
58  /* Reinitialize with new context */
59  ff_vk_uninit(&s->vkctx);
60  } else if (s->vkctx.device_ref && s->vkctx.device_ref->data == dev_ref->data) {
61  return 0;
62  }
63 
64  err = ff_vk_init(&s->vkctx, sws, dev_ref, NULL);
65  if (err < 0)
66  return err;
67 
68  s->qf = ff_vk_qf_find(&s->vkctx, VK_QUEUE_COMPUTE_BIT, 0);
69  if (!s->qf) {
70  av_log(sws, AV_LOG_ERROR, "Device has no compute queues\n");
71  return AVERROR(ENOTSUP);
72  }
73 
74 #if CONFIG_LIBSHADERC || CONFIG_LIBGLSLANG
75  if (!s->spvc) {
76  s->spvc = ff_vk_spirv_init();
77  if (!s->spvc)
78  return AVERROR(ENOMEM);
79  }
80 #endif
81 
82  return 0;
83 }
84 
86 {
87  SwsInternal *c = sws_internal(sws);
88  FFVulkanOpsCtx *s = c->hw_priv;
89  return s ? s->vkctx.device_ref : NULL;
90 }
91 
92 #define MAX_DITHER_BUFS 4
93 #define MAX_FILT_BUFS 4
94 #define MAX_DATA_BUFS (MAX_DITHER_BUFS + MAX_FILT_BUFS*4)
95 
96 typedef struct VulkanPriv {
104 } VulkanPriv;
105 
106 static void process(const SwsFrame *dst, const SwsFrame *src, int y, int h,
107  const SwsPass *pass)
108 {
109  VulkanPriv *p = (VulkanPriv *) pass->priv;
110  FFVkExecContext *ec = ff_vk_exec_get(&p->s->vkctx, &p->e);
111  FFVulkanFunctions *vk = &p->s->vkctx.vkfn;
112  ff_vk_exec_start(&p->s->vkctx, ec);
113 
114  AVFrame *src_f = (AVFrame *) src->avframe;
115  AVFrame *dst_f = (AVFrame *) dst->avframe;
116  ff_vk_exec_add_dep_frame(&p->s->vkctx, ec, src_f,
117  VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT,
118  VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT);
119  ff_vk_exec_add_dep_frame(&p->s->vkctx, ec, dst_f,
120  VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT,
121  VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT);
122 
123  VkImageView src_views[AV_NUM_DATA_POINTERS];
124  VkImageView dst_views[AV_NUM_DATA_POINTERS];
125  ff_vk_create_imageviews(&p->s->vkctx, ec, src_views, src_f, p->src_rep);
126  ff_vk_create_imageviews(&p->s->vkctx, ec, dst_views, dst_f, p->dst_rep);
127 
128  ff_vk_shader_update_img_array(&p->s->vkctx, ec, &p->shd, src_f, src_views,
129  0, 0, VK_IMAGE_LAYOUT_GENERAL, VK_NULL_HANDLE);
130  ff_vk_shader_update_img_array(&p->s->vkctx, ec, &p->shd, dst_f, dst_views,
131  0, 1, VK_IMAGE_LAYOUT_GENERAL, VK_NULL_HANDLE);
132 
133  int nb_img_bar = 0;
134  VkImageMemoryBarrier2 img_bar[8];
135  ff_vk_frame_barrier(&p->s->vkctx, ec, src_f, img_bar, &nb_img_bar,
136  VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT,
137  VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT,
138  VK_ACCESS_SHADER_READ_BIT,
139  VK_IMAGE_LAYOUT_GENERAL,
140  VK_QUEUE_FAMILY_IGNORED);
141  ff_vk_frame_barrier(&p->s->vkctx, ec, dst_f, img_bar, &nb_img_bar,
142  VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT,
143  VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT,
144  VK_ACCESS_SHADER_WRITE_BIT,
145  VK_IMAGE_LAYOUT_GENERAL,
146  VK_QUEUE_FAMILY_IGNORED);
147  vk->CmdPipelineBarrier2(ec->buf, &(VkDependencyInfo) {
148  .sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO,
149  .pImageMemoryBarriers = img_bar,
150  .imageMemoryBarrierCount = nb_img_bar,
151  });
152 
153  ff_vk_exec_bind_shader(&p->s->vkctx, ec, &p->shd);
154 
155  vk->CmdDispatch(ec->buf,
156  FFALIGN(dst_f->width, p->shd.lg_size[0])/p->shd.lg_size[0],
157  FFALIGN(dst_f->height, p->shd.lg_size[1])/p->shd.lg_size[1],
158  1);
159 
160  ff_vk_exec_submit(&p->s->vkctx, ec);
161  ff_vk_exec_wait(&p->s->vkctx, ec);
162 }
163 
164 static void free_fn(void *priv)
165 {
166  VulkanPriv *p = priv;
167  ff_vk_exec_pool_free(&p->s->vkctx, &p->e);
168  ff_vk_shader_free(&p->s->vkctx, &p->shd);
169  for (int i = 0; i < p->nb_data_bufs; i++)
170  ff_vk_free_buf(&p->s->vkctx, &p->data_bufs[i]);
171  av_refstruct_unref(&p->s);
172  av_free(priv);
173 }
174 
176  const SwsFilterWeights *wd, FFVkBuffer *buf)
177 {
178  int err;
179 
180  /* Weights */
181  err = ff_vk_create_buf(&s->vkctx, buf,
182  wd->num_weights*sizeof(float) +
183  wd->dst_size*sizeof(int32_t), NULL, NULL,
184  VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT,
185  VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT |
186  VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT);
187  if (err < 0)
188  goto fail;
189 
190  float *weights_data;
191  err = ff_vk_map_buffer(&s->vkctx, buf,
192  (uint8_t **)&weights_data, 0);
193  if (err < 0)
194  goto fail;
195  for (int i = 0; i < wd->num_weights; i++)
196  weights_data[i] = (float) wd->weights[i] / SWS_FILTER_SCALE;
197 
198  memcpy(weights_data + wd->num_weights,
199  wd->offsets, wd->dst_size*sizeof(int32_t));
200 
201  ff_vk_unmap_buffer(&s->vkctx, buf, 1);
202 
203  return 0;
204 
205 fail:
206  ff_vk_free_buf(&p->s->vkctx, buf);
207  return 0;
208 }
209 
211  const SwsDitherOp *dd, FFVkBuffer *buf)
212 {
213  int err;
214 
215  int size = (1 << dd->size_log2);
216  err = ff_vk_create_buf(&s->vkctx, buf,
217  size*size*sizeof(float), NULL, NULL,
218  VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT,
219  VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT |
220  VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT);
221  if (err < 0)
222  return err;
223 
224  float *dither_data;
225  err = ff_vk_map_buffer(&s->vkctx, buf, (uint8_t **)&dither_data, 0);
226  if (err < 0)
227  goto fail;
228 
229  for (int i = 0; i < size; i++) {
230  for (int j = 0; j < size; j++) {
231  const AVRational r = dd->matrix[i*size + j];
232  dither_data[i*size + j] = r.num/(float)r.den;
233  }
234  }
235 
236  ff_vk_unmap_buffer(&s->vkctx, buf, 1);
237 
238  return 0;
239 
240 fail:
241  ff_vk_free_buf(&p->s->vkctx, buf);
242  return err;
243 }
244 
246 {
247  int err;
248  p->nb_data_bufs = 0;
249  for (int n = 0; n < ops->num_ops; n++) {
250  const SwsOp *op = &ops->ops[n];
251  if (op->op == SWS_OP_DITHER) {
252  av_assert0(p->nb_data_bufs + 1 <= FF_ARRAY_ELEMS(p->data_bufs));
253  err = create_dither_buf(s, p, &op->dither,
254  &p->data_bufs[p->nb_data_bufs]);
255  if (err < 0)
256  goto fail;
257  p->nb_data_bufs++;
258  } else if (op->op == SWS_OP_FILTER_H || op->op == SWS_OP_FILTER_V) {
259  av_assert0(p->nb_data_bufs + 1 <= FF_ARRAY_ELEMS(p->data_bufs));
260  err = create_filter_buf(s, p, op->filter.kernel,
261  &p->data_bufs[p->nb_data_bufs]);
262  if (err < 0)
263  goto fail;
264  p->nb_data_bufs++;
265  } else if ((op->op == SWS_OP_READ ||
266  op->op == SWS_OP_WRITE) && op->rw.filter) {
267  av_assert0(p->nb_data_bufs + 1 <= FF_ARRAY_ELEMS(p->data_bufs));
268  err = create_filter_buf(s, p, op->rw.kernel,
269  &p->data_bufs[p->nb_data_bufs]);
270  if (err < 0)
271  goto fail;
272  p->nb_data_bufs++;
273  }
274  }
275 
276  return 0;
277 
278 fail:
279  for (int i = 0; i < p->nb_data_bufs; i++)
280  ff_vk_free_buf(&p->s->vkctx, &p->data_bufs[i]);
281  return err;
282 }
283 
284 #if HAVE_SPIRV_HEADERS_SPIRV_H || HAVE_SPIRV_UNIFIED1_SPIRV_H
285 struct DitherData {
286  int size;
287  int arr_1d_id;
288  int arr_2d_id;
289  int struct_id;
290  int struct_ptr_id;
291  int id;
292  int mask_id;
293 };
294 
295 typedef struct SPIRVIDs {
296  int in_vars[3 + MAX_DATA_BUFS];
297 
298  int glfn;
299  int ep;
300 
301  /* Types */
302  int void_type;
303  int b_type;
304  int u32_type;
305  int i32_type;
306  int f32_type;
307  int void_fn_type;
308 
309  /* Define vector types */
310  int bvec2_type;
311  int u32vec2_type;
312  int i32vec2_type;
313 
314  int u32vec3_type;
315 
316  int u32vec4_type;
317  int f32vec4_type;
318  int f32mat4_type;
319 
320  /* Constants */
321  int u32_p;
322  int f32_p;
323  int f32_0;
324  int u32_cid[5];
325 
326  int const_ids[128];
327  int nb_const_ids;
328 
329  int linear_deco_off[16];
330  int linear_deco_ops[16];
331  int nb_linear_ops;
332 
333  struct DitherData dither[MAX_DITHER_BUFS];
334  int dither_ptr_elem_id;
335  int nb_dither_bufs;
336 
337  int out_img_type;
338  int out_img_array_id;
339 
340  int in_img_type;
341  int in_img_array_id;
342 
343  /* Pointer types for images */
344  int u32vec3_tptr;
345  int out_img_tptr;
346  int out_img_sptr;
347 
348  int in_img_tptr;
349  int in_img_sptr;
350 } SPIRVIDs;
351 
352 /* Section 1: Function to define all shader header data, and decorations */
353 static void define_shader_header(SwsContext *sws, FFVulkanShader *shd, SwsOpList *ops,
354  SPICtx *spi, SPIRVIDs *id)
355 {
356  spi_OpCapability(spi, SpvCapabilityShader); /* Shader type */
357 
358  /* Declare required capabilities */
359  spi_OpCapability(spi, SpvCapabilityInt16);
360  spi_OpCapability(spi, SpvCapabilityInt8);
361  spi_OpCapability(spi, SpvCapabilityImageQuery);
362  spi_OpCapability(spi, SpvCapabilityStorageImageReadWithoutFormat);
363  spi_OpCapability(spi, SpvCapabilityStorageImageWriteWithoutFormat);
364  spi_OpCapability(spi, SpvCapabilityStorageBuffer8BitAccess);
365  /* Import the GLSL set of functions (used for min/max) */
366  id->glfn = spi_OpExtInstImport(spi, "GLSL.std.450");
367 
368  /* Next section starts here */
369  spi_OpMemoryModel(spi, SpvAddressingModelLogical, SpvMemoryModelGLSL450);
370 
371  /* Entrypoint */
372  id->ep = spi_OpEntryPoint(spi, SpvExecutionModelGLCompute, "main",
373  id->in_vars, 3 + id->nb_dither_bufs);
374  spi_OpExecutionMode(spi, id->ep, SpvExecutionModeLocalSize,
375  shd->lg_size, 3);
376 
377  /* gl_GlobalInvocationID descriptor decorations */
378  spi_OpDecorate(spi, id->in_vars[0], SpvDecorationBuiltIn,
379  SpvBuiltInGlobalInvocationId);
380 
381  /* Input image descriptor decorations */
382  spi_OpDecorate(spi, id->in_vars[1], SpvDecorationNonWritable);
383  spi_OpDecorate(spi, id->in_vars[1], SpvDecorationDescriptorSet, 0);
384  spi_OpDecorate(spi, id->in_vars[1], SpvDecorationBinding, 0);
385 
386  /* Output image descriptor decorations */
387  spi_OpDecorate(spi, id->in_vars[2], SpvDecorationNonReadable);
388  spi_OpDecorate(spi, id->in_vars[2], SpvDecorationDescriptorSet, 0);
389  spi_OpDecorate(spi, id->in_vars[2], SpvDecorationBinding, 1);
390 
391  for (int i = 0; i < id->nb_dither_bufs; i++) {
392  spi_OpDecorate(spi, id->dither[i].arr_1d_id, SpvDecorationArrayStride,
393  sizeof(float));
394  spi_OpDecorate(spi, id->dither[i].arr_2d_id, SpvDecorationArrayStride,
395  id->dither[i].size*sizeof(float));
396  spi_OpDecorate(spi, id->dither[i].struct_id, SpvDecorationBlock);
397  spi_OpMemberDecorate(spi, id->dither[i].struct_id, 0, SpvDecorationOffset, 0);
398  spi_OpDecorate(spi, id->dither[i].id, SpvDecorationDescriptorSet, 1);
399  spi_OpDecorate(spi, id->dither[i].id, SpvDecorationBinding, i);
400  }
401 
402  if (!(sws->flags & SWS_BITEXACT))
403  return;
404 
405  /* All linear arithmetic ops must be decorated with NoContraction */
406  for (int n = 0; n < ops->num_ops; n++) {
407  const SwsOp *op = &ops->ops[n];
408  if (op->op != SWS_OP_LINEAR)
409  continue;
410  av_assert0((id->nb_linear_ops + 1) <= FF_ARRAY_ELEMS(id->linear_deco_off));
411 
412  int nb_ops = 0;
413  for (int j = 0; j < 4; j++) {
414  nb_ops += !!op->lin.m[j][0].num;
415  nb_ops += op->lin.m[j][0].num && op->lin.m[j][4].num;
416  for (int i = 1; i < 4; i++) {
417  nb_ops += !!op->lin.m[j][i].num;
418  nb_ops += op->lin.m[j][i].num &&
419  (op->lin.m[j][0].num || op->lin.m[j][4].num);
420  }
421  }
422 
423  id->linear_deco_off[id->nb_linear_ops] = spi_reserve(spi, nb_ops*4*3);
424  id->linear_deco_ops[id->nb_linear_ops] = nb_ops;
425  id->nb_linear_ops++;
426  }
427 }
428 
429 /* Section 2: Define all types and constants */
430 static void define_shader_consts(SwsContext *sws, SwsOpList *ops, SPICtx *spi, SPIRVIDs *id)
431 {
432  /* Define scalar types */
433  id->void_type = spi_OpTypeVoid(spi);
434  id->b_type = spi_OpTypeBool(spi);
435  int u32_type =
436  id->u32_type = spi_OpTypeInt(spi, 32, 0);
437  id->i32_type = spi_OpTypeInt(spi, 32, 1);
438  int f32_type =
439  id->f32_type = spi_OpTypeFloat(spi, 32);
440  id->void_fn_type = spi_OpTypeFunction(spi, id->void_type, NULL, 0);
441 
442  /* Define vector types */
443  id->bvec2_type = spi_OpTypeVector(spi, id->b_type, 2);
444  id->u32vec2_type = spi_OpTypeVector(spi, u32_type, 2);
445  id->i32vec2_type = spi_OpTypeVector(spi, id->i32_type, 2);
446 
447  id->u32vec3_type = spi_OpTypeVector(spi, u32_type, 3);
448 
449  id->u32vec4_type = spi_OpTypeVector(spi, u32_type, 4);
450  id->f32vec4_type = spi_OpTypeVector(spi, f32_type, 4);
451  id->f32mat4_type = spi_OpTypeMatrix(spi, id->f32vec4_type, 4);
452 
453  /* Constants */
454  id->u32_p = spi_OpUndef(spi, u32_type);
455  id->f32_p = spi_OpUndef(spi, f32_type);
456  id->f32_0 = spi_OpConstantFloat(spi, f32_type, 0);
457  for (int i = 0; i < 5; i++)
458  id->u32_cid[i] = spi_OpConstantUInt(spi, u32_type, i);
459 
460  /* Operation constants */
461  id->nb_const_ids = 0;
462  for (int n = 0; n < ops->num_ops; n++) {
463  /* Make sure there's always enough space for the maximum number of
464  * constants a single operation needs (currently linear, 31 consts). */
465  av_assert0((id->nb_const_ids + 31) <= FF_ARRAY_ELEMS(id->const_ids));
466  const SwsOp *op = &ops->ops[n];
467  switch (op->op) {
468  case SWS_OP_CONVERT:
469  if (ff_sws_pixel_type_is_int(op->convert.to) && op->convert.expand) {
470  AVRational m = ff_sws_pixel_expand(op->type, op->convert.to);
471  int tmp = spi_OpConstantUInt(spi, id->u32_type, m.num);
472  tmp = spi_OpConstantComposite(spi, id->u32vec4_type,
473  tmp, tmp, tmp, tmp);
474  id->const_ids[id->nb_const_ids++] = tmp;
475  }
476  break;
477  case SWS_OP_CLEAR:
478  for (int i = 0; i < 4; i++) {
479  if (!SWS_COMP_TEST(op->clear.mask, i))
480  continue;
481  AVRational cv = op->clear.value[i];
482  if (op->type == SWS_PIXEL_F32) {
483  float q = (float)cv.num/cv.den;
484  id->const_ids[id->nb_const_ids++] =
485  spi_OpConstantFloat(spi, f32_type, q);
486  } else {
487  av_assert0(cv.den == 1);
488  id->const_ids[id->nb_const_ids++] =
489  spi_OpConstantUInt(spi, u32_type, cv.num);
490  }
491  }
492  break;
493  case SWS_OP_LSHIFT:
494  case SWS_OP_RSHIFT: {
495  int tmp = spi_OpConstantUInt(spi, u32_type, op->shift.amount);
496  tmp = spi_OpConstantComposite(spi, id->u32vec4_type,
497  tmp, tmp, tmp, tmp);
498  id->const_ids[id->nb_const_ids++] = tmp;
499  break;
500  }
501  case SWS_OP_SCALE: {
502  int tmp;
503  if (op->type == SWS_PIXEL_F32) {
504  float q = op->scale.factor.num/(float)op->scale.factor.den;
505  tmp = spi_OpConstantFloat(spi, f32_type, q);
506  tmp = spi_OpConstantComposite(spi, id->f32vec4_type,
507  tmp, tmp, tmp, tmp);
508  } else {
509  av_assert0(op->scale.factor.den == 1);
510  tmp = spi_OpConstantUInt(spi, u32_type, op->scale.factor.num);
511  tmp = spi_OpConstantComposite(spi, id->u32vec4_type,
512  tmp, tmp, tmp, tmp);
513  }
514  id->const_ids[id->nb_const_ids++] = tmp;
515  break;
516  }
517  case SWS_OP_MIN:
518  case SWS_OP_MAX:
519  for (int i = 0; i < 4; i++) {
520  int tmp;
521  AVRational cl = op->clamp.limit[i];
522  if (!op->clamp.limit[i].den) {
523  continue;
524  } else if (op->type == SWS_PIXEL_F32) {
525  float q = (float)cl.num/((float)cl.den);
526  tmp = spi_OpConstantFloat(spi, f32_type, q);
527  } else {
528  av_assert0(cl.den == 1);
529  tmp = spi_OpConstantUInt(spi, u32_type, cl.num);
530  }
531  id->const_ids[id->nb_const_ids++] = tmp;
532  }
533  break;
534  case SWS_OP_DITHER:
535  for (int i = 0; i < 4; i++) {
536  if (op->dither.y_offset[i] < 0)
537  continue;
538  int tmp = spi_OpConstantUInt(spi, u32_type, op->dither.y_offset[i]);
539  id->const_ids[id->nb_const_ids++] = tmp;
540  }
541  break;
542  case SWS_OP_LINEAR: {
543  int tmp;
544  float val;
545  for (int i = 0; i < 4; i++) {
546  for (int j = 0; j < 4; j++) {
547  int k = sws->flags & SWS_BITEXACT ? i : j;
548  int l = sws->flags & SWS_BITEXACT ? j : i;
549  val = op->lin.m[k][l].num/(float)op->lin.m[k][l].den;
550  id->const_ids[id->nb_const_ids++] =
551  spi_OpConstantFloat(spi, f32_type, val);
552  }
553  tmp = spi_OpConstantComposite(spi, id->f32vec4_type,
554  id->const_ids[id->nb_const_ids - 4],
555  id->const_ids[id->nb_const_ids - 3],
556  id->const_ids[id->nb_const_ids - 2],
557  id->const_ids[id->nb_const_ids - 1]);
558  id->const_ids[id->nb_const_ids++] = tmp;
559  }
560 
561  tmp = spi_OpConstantComposite(spi, id->f32mat4_type,
562  id->const_ids[id->nb_const_ids - 5*4 + 4],
563  id->const_ids[id->nb_const_ids - 5*3 + 4],
564  id->const_ids[id->nb_const_ids - 5*2 + 4],
565  id->const_ids[id->nb_const_ids - 5*1 + 4]);
566  id->const_ids[id->nb_const_ids++] = tmp;
567 
568  for (int i = 0; i < 4; i++) {
569  val = op->lin.m[i][4].num/(float)op->lin.m[i][4].den;
570  id->const_ids[id->nb_const_ids++] =
571  spi_OpConstantFloat(spi, f32_type, val);
572  }
573 
574  tmp = spi_OpConstantComposite(spi, id->f32vec4_type,
575  id->const_ids[id->nb_const_ids - 4],
576  id->const_ids[id->nb_const_ids - 3],
577  id->const_ids[id->nb_const_ids - 2],
578  id->const_ids[id->nb_const_ids - 1]);
579  id->const_ids[id->nb_const_ids++] = tmp;
580  break;
581  }
582  default:
583  break;
584  }
585  }
586 }
587 
588 /* Section 3: Define bindings */
589 static void define_shader_bindings(SwsOpList *ops, SPICtx *spi, SPIRVIDs *id,
590  int in_img_count, int out_img_count)
591 {
592  id->dither_ptr_elem_id = spi_OpTypePointer(spi, SpvStorageClassUniform,
593  id->f32_type);
594 
595  struct DitherData *dither = id->dither;
596  for (int i = 0; i < id->nb_dither_bufs; i++) {
597  int size_id = spi_OpConstantUInt(spi, id->u32_type, dither[i].size);
598  dither[i].mask_id = spi_OpConstantUInt(spi, id->u32_type, dither[i].size - 1);
599  spi_OpTypeArray(spi, id->f32_type, dither[i].arr_1d_id, size_id);
600  spi_OpTypeArray(spi, dither[i].arr_1d_id, dither[i].arr_2d_id, size_id);
601  spi_OpTypeStruct(spi, dither[i].struct_id, dither[i].arr_2d_id);
602  dither[i].struct_ptr_id = spi_OpTypePointer(spi, SpvStorageClassUniform,
603  dither[i].struct_id);
604  dither[i].id = spi_OpVariable(spi, dither[i].id, dither[i].struct_ptr_id,
605  SpvStorageClassUniform, 0);
606  }
607 
608  const SwsOp *op_w = ff_sws_op_list_output(ops);
609  const SwsOp *op_r = ff_sws_op_list_input(ops);
610 
611  /* Define image types for descriptors */
612  id->out_img_type = spi_OpTypeImage(spi,
613  op_w->type == SWS_PIXEL_F32 ?
614  id->f32_type : id->u32_type,
615  2, 0, 0, 0, 2, SpvImageFormatUnknown);
616  id->out_img_array_id = spi_OpTypeArray(spi, id->out_img_type, spi_get_id(spi),
617  id->u32_cid[out_img_count]);
618 
619  id->in_img_type = 0;
620  id->in_img_array_id = 0;
621  if (op_r) {
622  /* If the formats match, we have to reuse the types due to SPIR-V not
623  * allowing redundant type defines */
624  int match = ((op_w->type == SWS_PIXEL_F32) ==
625  (op_r->type == SWS_PIXEL_F32));
626  id->in_img_type = match ? id->out_img_type :
627  spi_OpTypeImage(spi,
628  op_r->type == SWS_PIXEL_F32 ?
629  id->f32_type : id->u32_type,
630  2, 0, 0, 0, 2, SpvImageFormatUnknown);
631  id->in_img_array_id = spi_OpTypeArray(spi, id->in_img_type, spi_get_id(spi),
632  id->u32_cid[in_img_count]);
633  }
634 
635  /* Pointer types for images */
636  id->u32vec3_tptr = spi_OpTypePointer(spi, SpvStorageClassInput,
637  id->u32vec3_type);
638  id->out_img_tptr = spi_OpTypePointer(spi, SpvStorageClassUniformConstant,
639  id->out_img_array_id);
640  id->out_img_sptr = spi_OpTypePointer(spi, SpvStorageClassUniformConstant,
641  id->out_img_type);
642 
643  id->in_img_tptr = 0;
644  id->in_img_sptr = 0;
645  if (op_r) {
646  id->in_img_tptr= spi_OpTypePointer(spi, SpvStorageClassUniformConstant,
647  id->in_img_array_id);
648  id->in_img_sptr= spi_OpTypePointer(spi, SpvStorageClassUniformConstant,
649  id->in_img_type);
650  }
651 
652  /* Define inputs */
653  spi_OpVariable(spi, id->in_vars[0], id->u32vec3_tptr,
654  SpvStorageClassInput, 0);
655  if (op_r) {
656  spi_OpVariable(spi, id->in_vars[1], id->in_img_tptr,
657  SpvStorageClassUniformConstant, 0);
658  }
659  spi_OpVariable(spi, id->in_vars[2], id->out_img_tptr,
660  SpvStorageClassUniformConstant, 0);
661 }
662 
663 static int insert_vmat_linear(const SwsOp *op, SPICtx *spi, SPIRVIDs *id,
664  int data, int const_off)
665 {
666  data = spi_OpMatrixTimesVector(spi, id->f32vec4_type,
667  id->const_ids[const_off + 4*5],
668  data);
669  return spi_OpFAdd(spi, id->f32vec4_type,
670  id->const_ids[const_off + 4*5 + 1 + 4], data);
671 }
672 
673 static int insert_bitexact_linear(const SwsOp *op, SPICtx *spi, SPIRVIDs *id,
674  int data, int linear_ops_idx, int const_off)
675 {
676  int type_s = op->type == SWS_PIXEL_F32 ? id->f32_type : id->u32_type;
677  int type_v = op->type == SWS_PIXEL_F32 ? id->f32vec4_type : id->u32vec4_type;
678 
679  int tmp[4];
680  tmp[0] = spi_OpCompositeExtract(spi, type_s, data, 0);
681  tmp[1] = spi_OpCompositeExtract(spi, type_s, data, 1);
682  tmp[2] = spi_OpCompositeExtract(spi, type_s, data, 2);
683  tmp[3] = spi_OpCompositeExtract(spi, type_s, data, 3);
684 
685  int off = spi_reserve(spi, 0); /* Current offset */
686  spi->off = id->linear_deco_off[linear_ops_idx];
687  for (int i = 0; i < id->linear_deco_ops[linear_ops_idx]; i++)
688  spi_OpDecorate(spi, spi->id + i, SpvDecorationNoContraction);
689  spi->off = off;
690 
691  int res[4];
692  for (int j = 0; j < 4; j++) {
693  res[j] = op->type == SWS_PIXEL_F32 ? id->f32_0 : id->u32_cid[0];
694  if (op->lin.m[j][0].num)
695  res[j] = spi_OpFMul(spi, type_s, tmp[0],
696  id->const_ids[const_off + j*5 + 0]);
697 
698  if (op->lin.m[j][0].num && op->lin.m[j][4].num)
699  res[j] = spi_OpFAdd(spi, type_s,
700  id->const_ids[const_off + 4*5 + 1 + j], res[j]);
701  else if (op->lin.m[j][4].num)
702  res[j] = id->const_ids[const_off + 4*5 + 1 + j];
703 
704  for (int i = 1; i < 4; i++) {
705  if (!op->lin.m[j][i].num)
706  continue;
707 
708  int v = spi_OpFMul(spi, type_s, tmp[i],
709  id->const_ids[const_off + j*5 + i]);
710  if (op->lin.m[j][0].num || op->lin.m[j][4].num)
711  res[j] = spi_OpFAdd(spi, type_s, res[j], v);
712  else
713  res[j] = v;
714  }
715  }
716 
717  return spi_OpCompositeConstruct(spi, type_v,
718  res[0], res[1], res[2], res[3]);
719 }
720 
721 static int add_ops_spirv(SwsContext *sws, VulkanPriv *p, FFVulkanOpsCtx *s,
722  SwsOpList *ops, FFVulkanShader *shd)
723 {
724  uint8_t spvbuf[1024*16];
725  SPICtx spi_context = { 0 }, *spi = &spi_context;
726  SPIRVIDs spid_data = { 0 }, *id = &spid_data;
727  spi_init(spi, spvbuf, sizeof(spvbuf));
728 
729  /* Interlaced formats are not currently supported */
730  if (ops->src.interlaced || ops->dst.interlaced)
731  return AVERROR(ENOTSUP);
732 
733  ff_vk_shader_load(shd, VK_SHADER_STAGE_COMPUTE_BIT, NULL,
734  (uint32_t []) { 32, 32, 1 }, 0);
735  shd->precompiled = 0;
736 
737  /* Image ops, to determine types */
738  const SwsOp *op_w = ff_sws_op_list_output(ops);
739  int out_img_count = op_w->rw.packed ? 1 : op_w->rw.elems;
740  p->dst_rep = op_w->type == SWS_PIXEL_F32 ? FF_VK_REP_FLOAT : FF_VK_REP_UINT;
741 
742  const SwsOp *op_r = ff_sws_op_list_input(ops);
743  int in_img_count = op_r ? op_r->rw.packed ? 1 : op_r->rw.elems : 0;
744  if (op_r)
745  p->src_rep = op_r->type == SWS_PIXEL_F32 ? FF_VK_REP_FLOAT : FF_VK_REP_UINT;
746 
748  {
749  .type = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
750  .stages = VK_SHADER_STAGE_COMPUTE_BIT,
751  .elems = 4,
752  },
753  {
754  .type = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
755  .stages = VK_SHADER_STAGE_COMPUTE_BIT,
756  .elems = 4,
757  },
758  };
759  ff_vk_shader_add_descriptor_set(&s->vkctx, shd, desc_set, 2, 0, 0);
760 
761  /* Create dither buffers */
762  int err = create_bufs(s, p, ops);
763  if (err < 0)
764  return err;
765 
766  /* Entrypoint inputs: gl_GlobalInvocationID, input and output images, dither */
767  id->in_vars[0] = spi_get_id(spi);
768  id->in_vars[1] = spi_get_id(spi);
769  id->in_vars[2] = spi_get_id(spi);
770 
771  /* Create dither buffer descriptor set */
772  id->nb_dither_bufs = 0;
773  for (int n = 0; n < ops->num_ops; n++) {
774  const SwsOp *op = &ops->ops[n];
775  if (op->op != SWS_OP_DITHER)
776  continue;
777 
778  id->dither[id->nb_dither_bufs].size = 1 << op->dither.size_log2;
779  id->dither[id->nb_dither_bufs].arr_1d_id = spi_get_id(spi);
780  id->dither[id->nb_dither_bufs].arr_2d_id = spi_get_id(spi);
781  id->dither[id->nb_dither_bufs].struct_id = spi_get_id(spi);
782  id->dither[id->nb_dither_bufs].id = spi_get_id(spi);
783  id->in_vars[3 + id->nb_dither_bufs] = id->dither[id->nb_dither_bufs].id;
784 
785  desc_set[id->nb_dither_bufs++] = (FFVulkanDescriptorSetBinding) {
786  .type = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER,
787  .stages = VK_SHADER_STAGE_COMPUTE_BIT,
788  };
789  }
790  if (id->nb_dither_bufs)
791  ff_vk_shader_add_descriptor_set(&s->vkctx, shd, desc_set,
792  id->nb_dither_bufs, 1, 0);
793 
794  /* Define shader header sections */
795  define_shader_header(sws, shd, ops, spi, id);
796  define_shader_consts(sws, ops, spi, id);
797  define_shader_bindings(ops, spi, id, in_img_count, out_img_count);
798 
799  /* Main function starts here */
800  spi_OpFunction(spi, id->ep, id->void_type, 0, id->void_fn_type);
801  spi_OpLabel(spi, spi_get_id(spi));
802 
803  /* Load input image handles */
804  int in_img[4] = { 0 };
805  for (int i = 0; i < in_img_count; i++) {
806  /* Deref array and then the pointer */
807  int img = spi_OpAccessChain(spi, id->in_img_sptr,
808  id->in_vars[1], id->u32_cid[i]);
809  in_img[i] = spi_OpLoad(spi, id->in_img_type, img,
810  SpvMemoryAccessMaskNone, 0);
811  }
812 
813  /* Load output image handles */
814  int out_img[4];
815  for (int i = 0; i < out_img_count; i++) {
816  int img = spi_OpAccessChain(spi, id->out_img_sptr,
817  id->in_vars[2], id->u32_cid[i]);
818  out_img[i] = spi_OpLoad(spi, id->out_img_type, img,
819  SpvMemoryAccessMaskNone, 0);
820  }
821 
822  /* Load gl_GlobalInvocationID */
823  int gid = spi_OpLoad(spi, id->u32vec3_type, id->in_vars[0],
824  SpvMemoryAccessMaskNone, 0);
825 
826  /* ivec2(gl_GlobalInvocationID.xy) */
827  gid = spi_OpVectorShuffle(spi, id->u32vec2_type, gid, gid, 0, 1);
828  int gi2 = spi_OpBitcast(spi, id->i32vec2_type, gid);
829 
830  /* imageSize(out_img[0]); */
831  int img1_s = spi_OpImageQuerySize(spi, id->i32vec2_type, out_img[0]);
832  int scmp = spi_OpSGreaterThanEqual(spi, id->bvec2_type, gi2, img1_s);
833  scmp = spi_OpAny(spi, id->b_type, scmp);
834 
835  /* if (out of bounds) return */
836  int quit_label = spi_get_id(spi), merge_label = spi_get_id(spi);
837  spi_OpSelectionMerge(spi, merge_label, SpvSelectionControlMaskNone);
838  spi_OpBranchConditional(spi, scmp, quit_label, merge_label, 0);
839 
840  spi_OpLabel(spi, quit_label);
841  spi_OpReturn(spi); /* Quit if out of bounds here */
842  spi_OpLabel(spi, merge_label);
843 
844  /* Initialize main data state */
845  int data;
846  if (ops->ops[0].type == SWS_PIXEL_F32)
847  data = spi_OpCompositeConstruct(spi, id->f32vec4_type,
848  id->f32_p, id->f32_p,
849  id->f32_p, id->f32_p);
850  else
851  data = spi_OpCompositeConstruct(spi, id->u32vec4_type,
852  id->u32_p, id->u32_p,
853  id->u32_p, id->u32_p);
854 
855  /* Keep track of which constant/buffer to use */
856  int nb_const_ids = 0;
857  int nb_dither_bufs = 0;
858  int nb_linear_ops = 0;
859 
860  /* Operations */
861  for (int n = 0; n < ops->num_ops; n++) {
862  const SwsOp *op = &ops->ops[n];
863  SwsPixelType cur_type = op->op == SWS_OP_CONVERT ?
864  op->convert.to : op->type;
865  int type_v = cur_type == SWS_PIXEL_F32 ?
866  id->f32vec4_type : id->u32vec4_type;
867  int type_s = cur_type == SWS_PIXEL_F32 ?
868  id->f32_type : id->u32_type;
869  int uid = cur_type == SWS_PIXEL_F32 ?
870  id->f32_p : id->u32_p;
871 
872  switch (op->op) {
873  case SWS_OP_READ:
874  if (op->rw.frac || op->rw.filter) {
875  return AVERROR(ENOTSUP);
876  } else if (op->rw.packed) {
877  data = spi_OpImageRead(spi, type_v, in_img[ops->plane_src[0]],
878  gid, SpvImageOperandsMaskNone);
879  } else {
880  int tmp[4] = { uid, uid, uid, uid };
881  for (int i = 0; i < op->rw.elems; i++) {
882  tmp[i] = spi_OpImageRead(spi, type_v,
883  in_img[ops->plane_src[i]], gid,
884  SpvImageOperandsMaskNone);
885  tmp[i] = spi_OpCompositeExtract(spi, type_s, tmp[i], 0);
886  }
887  data = spi_OpCompositeConstruct(spi, type_v,
888  tmp[0], tmp[1], tmp[2], tmp[3]);
889  }
890  break;
891  case SWS_OP_WRITE:
892  if (op->rw.frac || op->rw.filter) {
893  return AVERROR(ENOTSUP);
894  } else if (op->rw.packed) {
895  spi_OpImageWrite(spi, out_img[ops->plane_dst[0]], gid, data,
896  SpvImageOperandsMaskNone);
897  } else {
898  for (int i = 0; i < op->rw.elems; i++) {
899  int tmp = spi_OpCompositeExtract(spi, type_s, data, i);
900  tmp = spi_OpCompositeConstruct(spi, type_v, tmp, tmp, tmp, tmp);
901  spi_OpImageWrite(spi, out_img[ops->plane_dst[i]], gid, tmp,
902  SpvImageOperandsMaskNone);
903  }
904  }
905  break;
906  case SWS_OP_CLEAR:
907  for (int i = 0; i < 4; i++) {
908  if (!op->clear.value[i].den)
909  continue;
910  data = spi_OpCompositeInsert(spi, type_v,
911  id->const_ids[nb_const_ids++],
912  data, i);
913  }
914  break;
915  case SWS_OP_SWIZZLE:
916  data = spi_OpVectorShuffle(spi, type_v, data, data,
917  op->swizzle.in[0],
918  op->swizzle.in[1],
919  op->swizzle.in[2],
920  op->swizzle.in[3]);
921  break;
922  case SWS_OP_CONVERT:
923  if (ff_sws_pixel_type_is_int(cur_type) && op->convert.expand)
924  data = spi_OpIMul(spi, type_v, data, id->const_ids[nb_const_ids++]);
925  else if (op->type == SWS_PIXEL_F32 && type_s == id->u32_type)
926  data = spi_OpConvertFToU(spi, type_v, data);
927  else if (op->type != SWS_PIXEL_F32 && type_s == id->f32_type)
928  data = spi_OpConvertUToF(spi, type_v, data);
929  break;
930  case SWS_OP_LSHIFT:
931  data = spi_OpShiftLeftLogical(spi, type_v, data,
932  id->const_ids[nb_const_ids++]);
933  break;
934  case SWS_OP_RSHIFT:
935  data = spi_OpShiftRightLogical(spi, type_v, data,
936  id->const_ids[nb_const_ids++]);
937  break;
938  case SWS_OP_SCALE:
939  if (op->type == SWS_PIXEL_F32)
940  data = spi_OpFMul(spi, type_v, data,
941  id->const_ids[nb_const_ids++]);
942  else
943  data = spi_OpIMul(spi, type_v, data,
944  id->const_ids[nb_const_ids++]);
945  break;
946  case SWS_OP_MIN:
947  case SWS_OP_MAX: {
948  int t = op->type == SWS_PIXEL_F32 ?
949  op->op == SWS_OP_MIN ? GLSLstd450FMin : GLSLstd450FMax :
950  op->op == SWS_OP_MIN ? GLSLstd450UMin : GLSLstd450UMax;
951  for (int i = 0; i < 4; i++) {
952  if (!op->clamp.limit[i].den)
953  continue;
954  int tmp = spi_OpCompositeExtract(spi, type_s, data, i);
955  tmp = spi_OpExtInst(spi, type_s, id->glfn, t,
956  tmp, id->const_ids[nb_const_ids++]);
957  data = spi_OpCompositeInsert(spi, type_v, tmp, data, i);
958  }
959  break;
960  }
961  case SWS_OP_DITHER: {
962  int did = nb_dither_bufs++;
963  int x_id = spi_OpCompositeExtract(spi, id->u32_type, gid, 0);
964  int y_pos = spi_OpCompositeExtract(spi, id->u32_type, gid, 1);
965  x_id = spi_OpBitwiseAnd(spi, id->u32_type, x_id,
966  id->dither[did].mask_id);
967  for (int i = 0; i < 4; i++) {
968  if (op->dither.y_offset[i] < 0)
969  continue;
970 
971  int y_id = spi_OpIAdd(spi, id->u32_type, y_pos,
972  id->const_ids[nb_const_ids++]);
973  y_id = spi_OpBitwiseAnd(spi, id->u32_type, y_id,
974  id->dither[did].mask_id);
975 
976  int ptr = spi_OpAccessChain(spi, id->dither_ptr_elem_id,
977  id->dither[did].id, id->u32_cid[0],
978  y_id, x_id);
979  int val = spi_OpLoad(spi, id->f32_type, ptr,
980  SpvMemoryAccessMaskNone, 0);
981 
982  int tmp = spi_OpCompositeExtract(spi, type_s, data, i);
983  tmp = spi_OpFAdd(spi, type_s, tmp, val);
984  data = spi_OpCompositeInsert(spi, type_v, tmp, data, i);
985  }
986  break;
987  }
988  case SWS_OP_LINEAR: {
989  if (sws->flags & SWS_BITEXACT)
990  data = insert_bitexact_linear(op, spi, id, data, nb_linear_ops, nb_const_ids);
991  else
992  data = insert_vmat_linear(op, spi, id, data, nb_const_ids);
993  nb_linear_ops++;
994  nb_const_ids += 5*5 + 1;
995  break;
996  }
997  case SWS_OP_UNPACK:
998  if (ops->src.format == AV_PIX_FMT_X2BGR10)
999  data = spi_OpVectorShuffle(spi, type_v, data, data, 3, 2, 1, 0);
1000  else
1001  data = spi_OpVectorShuffle(spi, type_v, data, data, 3, 0, 1, 2);
1002  break;
1003  case SWS_OP_PACK:
1004  if (ops->dst.format == AV_PIX_FMT_X2BGR10)
1005  data = spi_OpVectorShuffle(spi, type_v, data, data, 3, 2, 1, 0);
1006  else
1007  data = spi_OpVectorShuffle(spi, type_v, data, data, 1, 2, 3, 0);
1008  break;
1009  default:
1010  return AVERROR(ENOTSUP);
1011  }
1012  }
1013 
1014  /* Return and finalize */
1015  spi_OpReturn(spi);
1016  spi_OpFunctionEnd(spi);
1017 
1018  int len = spi_end(spi);
1019  if (len < 0)
1020  return AVERROR_INVALIDDATA;
1021 
1022  return ff_vk_shader_link(&s->vkctx, shd, spvbuf, len, "main");
1023 }
1024 #endif
1025 
1026 #if CONFIG_LIBSHADERC || CONFIG_LIBGLSLANG
1027 static void add_desc_read_write(FFVulkanDescriptorSetBinding *out_desc,
1028  enum FFVkShaderRepFormat *out_rep,
1029  const SwsOp *op)
1030 {
1031  const char *img_type = op->type == SWS_PIXEL_F32 ? "rgba32f" :
1032  op->type == SWS_PIXEL_U32 ? "rgba32ui" :
1033  op->type == SWS_PIXEL_U16 ? "rgba16ui" :
1034  "rgba8ui";
1035 
1036  *out_desc = (FFVulkanDescriptorSetBinding) {
1037  .name = op->op == SWS_OP_WRITE ? "dst_img" : "src_img",
1038  .type = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
1039  .mem_layout = img_type,
1040  .mem_quali = op->op == SWS_OP_WRITE ? "writeonly" : "readonly",
1041  .dimensions = 2,
1042  .elems = 4,
1043  .stages = VK_SHADER_STAGE_COMPUTE_BIT,
1044  };
1045 
1046  *out_rep = op->type == SWS_PIXEL_F32 ? FF_VK_REP_FLOAT : FF_VK_REP_UINT;
1047 }
1048 
1049 #define QSTR "(%i/%i%s)"
1050 #define QTYPE(Q) (Q).num, (Q).den, cur_type == SWS_PIXEL_F32 ? ".0f" : ""
1051 
1052 static void read_glsl(SwsOpList *ops, const SwsOp *op, FFVulkanShader *shd,
1053  int idx, const char *type_name,
1054  const char *type_v, const char *type_s)
1055 {
1056  const SwsFilterWeights *wd = op->rw.kernel;
1057  if (op->rw.filter) {
1058  const char *axis = op->rw.filter == SWS_OP_FILTER_H ? "pos.x" : "pos.y";
1059  const char *coord_x = op->rw.filter == SWS_OP_FILTER_H ? "o + i" : "pos.x";
1060  const char *coord_y = op->rw.filter == SWS_OP_FILTER_H ? "pos.y" : "o + i";
1061  GLSLC(1, tmp = vec4(0); );
1062  av_bprintf(&shd->src, " int o = filter_o%i[%s];\n", idx, axis);
1063  av_bprintf(&shd->src, " for (int i = 0; i < %i; i++) {\n",
1064  wd->filter_size);
1065  av_bprintf(&shd->src, " float w = filter_w%i[%s][i];\n",
1066  idx, axis);
1067  if (op->rw.packed) {
1068  GLSLF(2, tmp += w * %s(imageLoad(src_img[%i], ivec2(%s, %s))); ,
1069  type_v, ops->plane_src[0], coord_x, coord_y);
1070  } else {
1071  for (int i = 0; i < op->rw.elems; i++)
1072  GLSLF(2,
1073  tmp.%c += w * %s(imageLoad(src_img[%i], ivec2(%s, %s))[0]); ,
1074  "xyzw"[i], type_s, ops->plane_src[i], coord_x, coord_y);
1075  }
1076  GLSLC(1, } );
1077  GLSLC(1, f32 = tmp; );
1078  } else {
1079  if (op->rw.packed) {
1080  GLSLF(1, %s = %s(imageLoad(src_img[%i], pos)); ,
1081  type_name, type_v, ops->plane_src[0]);
1082  } else {
1083  for (int i = 0; i < op->rw.elems; i++)
1084  GLSLF(1, %s.%c = %s(imageLoad(src_img[%i], pos)[0]); ,
1085  type_name, "xyzw"[i], type_s, ops->plane_src[i]);
1086  }
1087  }
1088 }
1089 
1090 static int add_ops_glsl(SwsContext *sws, VulkanPriv *p, FFVulkanOpsCtx *s,
1091  SwsOpList *ops, FFVulkanShader *shd)
1092 {
1093  int err;
1094  uint8_t *spv_data;
1095  size_t spv_len;
1096  void *spv_opaque = NULL;
1097 
1098  /* Interlaced formats are not currently supported */
1099  if (ops->src.interlaced || ops->dst.interlaced)
1100  return AVERROR(ENOTSUP);
1101 
1102  err = ff_vk_shader_init(&s->vkctx, shd, "sws_pass",
1103  VK_SHADER_STAGE_COMPUTE_BIT,
1104  NULL, 0, 32, 32, 1, 0);
1105  if (err < 0)
1106  return err;
1107 
1108  int nb_desc = 0;
1110 
1111  const SwsOp *read = ff_sws_op_list_input(ops);
1112  const SwsOp *write = ff_sws_op_list_output(ops);
1113  if (read)
1114  add_desc_read_write(&buf_desc[nb_desc++], &p->src_rep, read);
1115  add_desc_read_write(&buf_desc[nb_desc++], &p->dst_rep, write);
1116  ff_vk_shader_add_descriptor_set(&s->vkctx, shd, buf_desc, nb_desc, 0, 0);
1117 
1118  err = create_bufs(s, p, ops);
1119  if (err < 0)
1120  return err;
1121 
1122  nb_desc = 0;
1123  char data_buf_name[MAX_DATA_BUFS][256];
1124  char data_str_name[MAX_DATA_BUFS][256];
1125  for (int n = 0; n < ops->num_ops; n++) {
1126  const SwsOp *op = &ops->ops[n];
1127  if (op->op == SWS_OP_DITHER) {
1128  int size = (1 << op->dither.size_log2);
1129  av_assert0(size < 8192);
1130  snprintf(data_buf_name[nb_desc], 256, "dither_buf%i", n);
1131  snprintf(data_str_name[nb_desc], 256, "float dither_mat%i[%i][%i];",
1132  n, size, size);
1133  buf_desc[nb_desc] = (FFVulkanDescriptorSetBinding) {
1134  .name = data_buf_name[nb_desc],
1135  .type = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER,
1136  .stages = VK_SHADER_STAGE_COMPUTE_BIT,
1137  .mem_layout = "scalar",
1138  .buf_content = data_str_name[nb_desc],
1139  };
1140  nb_desc++;
1141  } else if (op->op == SWS_OP_FILTER_H || op->op == SWS_OP_FILTER_V ||
1142  ((op->op == SWS_OP_READ || op->op == SWS_OP_WRITE) &&
1143  op->rw.filter)) {
1144  const SwsFilterWeights *wd = (op->op == SWS_OP_READ ||
1145  op->op == SWS_OP_WRITE) ?
1146  op->rw.kernel : op->filter.kernel;
1147  snprintf(data_buf_name[nb_desc], 256, "filter_buf%i", n);
1148  snprintf(data_str_name[nb_desc], 256,
1149  "float filter_w%i[%i][%i];\n"
1150  " int filter_o%i[%i];",
1151  n, wd->dst_size, wd->filter_size,
1152  n, wd->dst_size);
1153  buf_desc[nb_desc] = (FFVulkanDescriptorSetBinding) {
1154  .name = data_buf_name[nb_desc],
1155  .type = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER,
1156  .stages = VK_SHADER_STAGE_COMPUTE_BIT,
1157  .mem_layout = "scalar",
1158  .buf_content = data_str_name[nb_desc],
1159  };
1160  nb_desc++;
1161  }
1162  }
1163  if (nb_desc)
1165  nb_desc, 1, 0);
1166 
1167  GLSLC(0, void main() );
1168  GLSLC(0, { );
1169  GLSLC(1, ivec2 pos = ivec2(gl_GlobalInvocationID.xy); );
1170  GLSLC(1, ivec2 size = imageSize(dst_img[0]); );
1171  GLSLC(1, if (any(greaterThanEqual(pos, size))) );
1172  GLSLC(2, return; );
1173  GLSLC(0, );
1174  GLSLC(1, u8vec4 u8; );
1175  GLSLC(1, u16vec4 u16; );
1176  GLSLC(1, u32vec4 u32; );
1177  GLSLC(1, precise f32vec4 f32; );
1178  GLSLC(1, precise f32vec4 tmp; );
1179  GLSLC(0, );
1180 
1181  for (int n = 0; n < ops->num_ops; n++) {
1182  const SwsOp *op = &ops->ops[n];
1183  SwsPixelType cur_type = op->op == SWS_OP_CONVERT ? op->convert.to :
1184  op->type;
1185  const char *type_name = ff_sws_pixel_type_name(cur_type);
1186  const char *type_v = cur_type == SWS_PIXEL_F32 ? "f32vec4" :
1187  cur_type == SWS_PIXEL_U32 ? "u32vec4" :
1188  cur_type == SWS_PIXEL_U16 ? "u16vec4" : "u8vec4";
1189  const char *type_s = cur_type == SWS_PIXEL_F32 ? "float" :
1190  cur_type == SWS_PIXEL_U32 ? "uint32_t" :
1191  cur_type == SWS_PIXEL_U16 ? "uint16_t" : "uint8_t";
1192  av_bprintf(&shd->src, " // %s\n", ff_sws_op_type_name(op->op));
1193 
1194  switch (op->op) {
1195  case SWS_OP_READ: {
1196  if (op->rw.frac)
1197  return AVERROR(ENOTSUP);
1198  read_glsl(ops, op, shd, n, type_name, type_v, type_s);
1199  break;
1200  }
1201  case SWS_OP_WRITE: {
1202  if (op->rw.frac || op->rw.filter) {
1203  return AVERROR(ENOTSUP);
1204  } else if (op->rw.packed) {
1205  GLSLF(1, imageStore(dst_img[%i], pos, %s(%s)); ,
1206  ops->plane_dst[0], type_v, type_name);
1207  } else {
1208  for (int i = 0; i < op->rw.elems; i++)
1209  GLSLF(1, imageStore(dst_img[%i], pos, %s(%s[%i])); ,
1210  ops->plane_dst[i], type_v, type_name, i);
1211  }
1212  break;
1213  }
1214  case SWS_OP_SWIZZLE: {
1215  av_bprintf(&shd->src, " %s = %s.", type_name, type_name);
1216  for (int i = 0; i < 4; i++)
1217  av_bprintf(&shd->src, "%c", "xyzw"[op->swizzle.in[i]]);
1218  av_bprintf(&shd->src, ";\n");
1219  break;
1220  }
1221  case SWS_OP_CLEAR: {
1222  for (int i = 0; i < 4; i++) {
1223  if (!SWS_COMP_TEST(op->clear.mask, i))
1224  continue;
1225  av_bprintf(&shd->src, " %s.%c = %s"QSTR";\n", type_name,
1226  "xyzw"[i], type_s, QTYPE(op->clear.value[i]));
1227  }
1228  break;
1229  }
1230  case SWS_OP_SCALE:
1231  av_bprintf(&shd->src, " %s = %s * "QSTR";\n",
1232  type_name, type_name, QTYPE(op->scale.factor));
1233  break;
1234  case SWS_OP_MIN:
1235  case SWS_OP_MAX:
1236  for (int i = 0; i < 4; i++) {
1237  if (!op->clamp.limit[i].den)
1238  continue;
1239  av_bprintf(&shd->src, " %s.%c = %s(%s.%c, "QSTR");\n",
1240  type_name, "xyzw"[i],
1241  op->op == SWS_OP_MIN ? "min" : "max",
1242  type_name, "xyzw"[i], QTYPE(op->clamp.limit[i]));
1243  }
1244  break;
1245  case SWS_OP_LSHIFT:
1246  case SWS_OP_RSHIFT:
1247  av_bprintf(&shd->src, " %s %s= %i;\n", type_name,
1248  op->op == SWS_OP_LSHIFT ? "<<" : ">>", op->shift.amount);
1249  break;
1250  case SWS_OP_CONVERT:
1251  if (ff_sws_pixel_type_is_int(cur_type) && op->convert.expand) {
1252  const AVRational sc = ff_sws_pixel_expand(op->type, op->convert.to);
1253  av_bprintf(&shd->src, " %s = %s((%s*%i)/%i);\n",
1254  type_name, type_v, ff_sws_pixel_type_name(op->type),
1255  sc.num, sc.den);
1256  } else {
1257  av_bprintf(&shd->src, " %s = %s(%s);\n",
1258  type_name, type_v, ff_sws_pixel_type_name(op->type));
1259  }
1260  break;
1261  case SWS_OP_DITHER: {
1262  int size = (1 << op->dither.size_log2);
1263  for (int i = 0; i < 4; i++) {
1264  if (op->dither.y_offset[i] < 0)
1265  continue;
1266  av_bprintf(&shd->src, " %s.%c += dither_mat%i[(pos.y + %i) & %i]"
1267  "[pos.x & %i];\n",
1268  type_name, "xyzw"[i], n,
1269  op->dither.y_offset[i], size - 1,
1270  size - 1);
1271  }
1272  break;
1273  }
1274  case SWS_OP_LINEAR:
1275  for (int i = 0; i < 4; i++) {
1276  if (op->lin.m[i][4].num)
1277  av_bprintf(&shd->src, " tmp.%c = "QSTR";\n", "xyzw"[i],
1278  QTYPE(op->lin.m[i][4]));
1279  else
1280  av_bprintf(&shd->src, " tmp.%c = 0;\n", "xyzw"[i]);
1281  for (int j = 0; j < 4; j++) {
1282  if (!op->lin.m[i][j].num)
1283  continue;
1284  av_bprintf(&shd->src, " tmp.%c += f32.%c*"QSTR";\n",
1285  "xyzw"[i], "xyzw"[j], QTYPE(op->lin.m[i][j]));
1286  }
1287  }
1288  av_bprintf(&shd->src, " f32 = tmp;\n");
1289  break;
1290  case SWS_OP_UNPACK:
1291  /* MSB->LSB indexing */
1292  av_bprintf(&shd->src, " %s = %s.%s;\n", type_name, type_name,
1293  ops->src.format == AV_PIX_FMT_X2BGR10 ? "wzyx" : "wxyz");
1294  break;
1295  case SWS_OP_PACK:
1296  /* LSB->MSB indexing */
1297  av_bprintf(&shd->src, " %s = %s.%s;\n", type_name, type_name,
1298  ops->dst.format == AV_PIX_FMT_X2BGR10 ? "wzyx" : "yzwx");
1299  break;
1300  default:
1301  return AVERROR(ENOTSUP);
1302  }
1303  }
1304 
1305  GLSLC(0, } );
1306 
1307  err = s->spvc->compile_shader(&s->vkctx, s->spvc, shd,
1308  &spv_data, &spv_len, "main",
1309  &spv_opaque);
1310  if (err < 0)
1311  return err;
1312 
1313  err = ff_vk_shader_link(&s->vkctx, shd, spv_data, spv_len, "main");
1314 
1315  if (spv_opaque)
1316  s->spvc->free_shader(s->spvc, &spv_opaque);
1317 
1318  if (err < 0)
1319  return err;
1320 
1321  return 0;
1322 }
1323 #endif
1324 
1325 static int compile(SwsContext *sws, SwsOpList *ops, SwsCompiledOp *out, int glsl)
1326 {
1327  int err;
1328  SwsInternal *c = sws_internal(sws);
1329  FFVulkanOpsCtx *s = c->hw_priv;
1330  if (!s)
1331  return AVERROR(ENOTSUP);
1332 
1333  VulkanPriv *p = av_mallocz(sizeof(*p));
1334  if (!p)
1335  return AVERROR(ENOMEM);
1336  p->s = av_refstruct_ref(c->hw_priv);
1337 
1338  err = ff_vk_exec_pool_init(&s->vkctx, s->qf, &p->e, 1,
1339  0, 0, 0, NULL);
1340  if (err < 0)
1341  goto fail;
1342 
1343  if (ops->src.format == AV_PIX_FMT_BGR0 ||
1344  ops->src.format == AV_PIX_FMT_BGRA ||
1345  ops->dst.format == AV_PIX_FMT_BGR0 ||
1346  ops->dst.format == AV_PIX_FMT_BGRA) {
1347  VkFormatProperties2 prop = {
1348  .sType = VK_STRUCTURE_TYPE_FORMAT_PROPERTIES_2,
1349  };
1350  FFVulkanFunctions *vk = &s->vkctx.vkfn;
1351  vk->GetPhysicalDeviceFormatProperties2(s->vkctx.hwctx->phys_dev,
1352  VK_FORMAT_B8G8R8A8_UNORM,
1353  &prop);
1354  if (!(prop.formatProperties.optimalTilingFeatures &
1355  VK_FORMAT_FEATURE_2_STORAGE_WRITE_WITHOUT_FORMAT_BIT)) {
1356  err = AVERROR(ENOTSUP);
1357  goto fail;
1358  }
1359  }
1360 
1361  if (glsl) {
1362  err = AVERROR(ENOTSUP);
1363 #if CONFIG_LIBSHADERC || CONFIG_LIBGLSLANG
1364  err = add_ops_glsl(sws, p, s, ops, &p->shd);
1365 #endif
1366  } else {
1367  err = AVERROR(ENOTSUP);
1368 #if HAVE_SPIRV_HEADERS_SPIRV_H || HAVE_SPIRV_UNIFIED1_SPIRV_H
1369  err = add_ops_spirv(sws, p, s, ops, &p->shd);
1370 #endif
1371  }
1372  if (err < 0)
1373  goto fail;
1374 
1375  err = ff_vk_shader_register_exec(&s->vkctx, &p->e, &p->shd);
1376  if (err < 0)
1377  goto fail;
1378 
1379  for (int i = 0; i < p->nb_data_bufs; i++)
1380  ff_vk_shader_update_desc_buffer(&s->vkctx, &p->e.contexts[0], &p->shd,
1381  1, i, 0, &p->data_bufs[i],
1382  0, VK_WHOLE_SIZE, VK_FORMAT_UNDEFINED);
1383 
1384  *out = (SwsCompiledOp) {
1385  .opaque = true,
1386  .func_opaque = process,
1387  .priv = p,
1388  .free = free_fn,
1389  };
1390 
1391  return 0;
1392 
1393 fail:
1394  free_fn(p);
1395  return err;
1396 }
1397 
1398 #if HAVE_SPIRV_HEADERS_SPIRV_H || HAVE_SPIRV_UNIFIED1_SPIRV_H
1399 static int compile_spirv(SwsContext *sws, SwsOpList *ops, SwsCompiledOp *out)
1400 {
1401  return compile(sws, ops, out, 0);
1402 }
1403 
1404 const SwsOpBackend backend_spirv = {
1405  .name = "spirv",
1406  .compile = compile_spirv,
1407  .hw_format = AV_PIX_FMT_VULKAN,
1408 };
1409 #endif
1410 
1411 #if CONFIG_LIBSHADERC || CONFIG_LIBGLSLANG
1412 static int compile_glsl(SwsContext *sws, SwsOpList *ops, SwsCompiledOp *out)
1413 {
1414  return compile(sws, ops, out, 1);
1415 }
1416 
1417 const SwsOpBackend backend_glsl = {
1418  .name = "glsl",
1419  .compile = compile_glsl,
1420  .hw_format = AV_PIX_FMT_VULKAN,
1421 };
1422 #endif
SWS_OP_READ
@ SWS_OP_READ
Definition: ops.h:50
ff_vk_create_buf
int ff_vk_create_buf(FFVulkanContext *s, FFVkBuffer *buf, size_t size, void *pNext, void *alloc_pNext, VkBufferUsageFlags usage, VkMemoryPropertyFlagBits flags)
Definition: vulkan.c:1050
spi_OpExecutionMode
static void spi_OpExecutionMode(SPICtx *spi, int entry_point_id, SpvExecutionMode mode, int *s, int nb_s)
Definition: spvasm.h:404
SWS_PIXEL_U16
@ SWS_PIXEL_U16
Definition: ops.h:36
VulkanPriv::e
FFVkExecPool e
Definition: ops.c:98
VulkanPriv::data_bufs
FFVkBuffer data_bufs[MAX_DATA_BUFS]
Definition: ops.c:100
FFVulkanOpsCtx
Copyright (C) 2026 Lynne.
Definition: ops.h:31
SWS_OP_SWIZZLE
@ SWS_OP_SWIZZLE
Definition: ops.h:53
SwsPass
Represents a single filter pass in the scaling graph.
Definition: graph.h:75
VulkanPriv::src_rep
enum FFVkShaderRepFormat src_rep
Definition: ops.c:102
r
const char * r
Definition: vf_curves.c:127
AVERROR
Filter the word “frame” indicates either a video frame or a group of audio as stored in an AVFrame structure Format for each input and each output the list of supported formats For video that means pixel format For audio that means channel sample they are references to shared objects When the negotiation mechanism computes the intersection of the formats supported at each end of a all references to both lists are replaced with a reference to the intersection And when a single format is eventually chosen for a link amongst the remaining all references to the list are updated That means that if a filter requires that its input and output have the same format amongst a supported all it has to do is use a reference to the same list of formats query_formats can leave some formats unset and return AVERROR(EAGAIN) to cause the negotiation mechanism toagain later. That can be used by filters with complex requirements to use the format negotiated on one link to set the formats supported on another. Frame references ownership and permissions
SwsFilterWeights::filter_size
int filter_size
The number of source texels to convolve over for each row.
Definition: filters.h:68
uid
UID uid
Definition: mxfenc.c:2488
spi_end
static int spi_end(SPICtx *spi)
Definition: spvasm.h:100
SWS_OP_LSHIFT
@ SWS_OP_LSHIFT
Definition: ops.h:58
SWS_OP_UNPACK
@ SWS_OP_UNPACK
Definition: ops.h:56
spi_OpVariable
static int spi_OpVariable(SPICtx *spi, int var_id, int ptr_type_id, SpvStorageClass storage_class, int initializer_id)
Definition: spvasm.h:536
ff_vk_shader_free
void ff_vk_shader_free(FFVulkanContext *s, FFVulkanShader *shd)
Free a shader.
Definition: vulkan.c:2845
ff_vk_shader_init
int ff_vk_shader_init(FFVulkanContext *s, FFVulkanShader *shd, const char *name, VkPipelineStageFlags stage, const char *extensions[], int nb_extensions, int lg_x, int lg_y, int lg_z, uint32_t required_subgroup_size)
Initialize a shader object, with a specific set of extensions, type+bind, local group size,...
Definition: vulkan.c:2157
out
static FILE * out
Definition: movenc.c:55
create_filter_buf
static int create_filter_buf(FFVulkanOpsCtx *s, VulkanPriv *p, const SwsFilterWeights *wd, FFVkBuffer *buf)
Definition: ops.c:175
compile
static int compile(SwsContext *sws, SwsOpList *ops, SwsCompiledOp *out, int glsl)
Definition: ops.c:1325
MAX_DITHER_BUFS
#define MAX_DITHER_BUFS
Definition: ops.c:92
SwsFormat::interlaced
int interlaced
Definition: format.h:79
spi_OpTypeFunction
static int spi_OpTypeFunction(SPICtx *spi, int return_type_id, const int *args, int nb_args)
Definition: spvasm.h:497
ff_sws_op_list_input
const SwsOp * ff_sws_op_list_input(const SwsOpList *ops)
Returns the input operation for a given op list, or NULL if there is none (e.g.
Definition: ops.c:671
AVBufferRef::data
uint8_t * data
The data buffer.
Definition: buffer.h:90
SWS_OP_CLEAR
@ SWS_OP_CLEAR
Definition: ops.h:62
ff_vk_exec_pool_init
int ff_vk_exec_pool_init(FFVulkanContext *s, AVVulkanDeviceQueueFamily *qf, FFVkExecPool *pool, int nb_contexts, int nb_queries, VkQueryType query_type, int query_64bit, const void *query_create_pnext)
Allocates/frees an execution pool.
Definition: vulkan.c:357
AVRefStructOpaque
RefStruct is an API for creating reference-counted objects with minimal overhead.
Definition: refstruct.h:58
SwsOp::rw
SwsReadWriteOp rw
Definition: ops.h:243
spi_OpConstantUInt
static int spi_OpConstantUInt(SPICtx *spi, int type_id, uint32_t val)
Definition: spvasm.h:564
AVFrame
This structure describes decoded (raw) audio or video data.
Definition: frame.h:459
spi_OpFunctionEnd
static void spi_OpFunctionEnd(SPICtx *spi)
Definition: spvasm.h:531
ff_vk_map_buffer
static int ff_vk_map_buffer(FFVulkanContext *s, FFVkBuffer *buf, uint8_t **mem, int invalidate)
Definition: vulkan.h:603
SWS_OP_DITHER
@ SWS_OP_DITHER
Definition: ops.h:70
SwsFilterWeights
Represents a computed filter kernel.
Definition: filters.h:64
SWS_BITEXACT
@ SWS_BITEXACT
Definition: swscale.h:157
FFVulkanShader::src
AVBPrint src
Definition: vulkan.h:234
data
const char data[16]
Definition: mxf.c:149
spi_OpDecorate
#define spi_OpDecorate(spi, target, deco,...)
Definition: spvasm.h:355
SwsFilterWeights::offsets
int * offsets
The computed source pixel positions for each row of the filter.
Definition: filters.h:84
SwsContext::flags
unsigned flags
Bitmask of SWS_*.
Definition: swscale.h:219
VulkanPriv::s
FFVulkanOpsCtx * s
Definition: ops.c:97
AV_PIX_FMT_BGRA
@ AV_PIX_FMT_BGRA
packed BGRA 8:8:8:8, 32bpp, BGRABGRA...
Definition: pixfmt.h:102
ff_vk_init
int ff_vk_init(FFVulkanContext *s, void *log_parent, AVBufferRef *device_ref, AVBufferRef *frames_ref)
Initializes the AVClass, in case this context is not used as the main user's context.
Definition: vulkan.c:2883
ff_vk_exec_get
FFVkExecContext * ff_vk_exec_get(FFVulkanContext *s, FFVkExecPool *pool)
Retrieve an execution pool.
Definition: vulkan.c:568
SWS_PIXEL_U32
@ SWS_PIXEL_U32
Definition: ops.h:37
ff_vk_uninit
void ff_vk_uninit(FFVulkanContext *s)
Frees main context.
Definition: vulkan.c:2871
spi_OpAccessChain
#define spi_OpAccessChain(spi, res_type, ptr_id,...)
Definition: spvasm.h:311
spi_OpCompositeExtract
#define spi_OpCompositeExtract(spi, res_type, src,...)
Definition: spvasm.h:319
ff_vk_exec_bind_shader
void ff_vk_exec_bind_shader(FFVulkanContext *s, FFVkExecContext *e, const FFVulkanShader *shd)
Bind a shader.
Definition: vulkan.c:2822
SwsOpBackend::name
const char * name
Definition: ops_dispatch.h:131
AV_PIX_FMT_VULKAN
@ AV_PIX_FMT_VULKAN
Vulkan hardware images.
Definition: pixfmt.h:379
ff_vk_exec_add_dep_frame
int ff_vk_exec_add_dep_frame(FFVulkanContext *s, FFVkExecContext *e, AVFrame *f, VkPipelineStageFlagBits2 wait_stage, VkPipelineStageFlagBits2 signal_stage)
Definition: vulkan.c:800
FFVkShaderRepFormat
FFVkShaderRepFormat
Returns the format to use for images in shaders.
Definition: vulkan.h:447
SwsPixelType
SwsPixelType
Copyright (C) 2025 Niklas Haas.
Definition: ops.h:33
SwsOpList::plane_dst
uint8_t plane_dst[4]
Definition: ops.h:296
SWS_PIXEL_F32
@ SWS_PIXEL_F32
Definition: ops.h:38
fail
#define fail()
Definition: checkasm.h:225
SwsOpList::num_ops
int num_ops
Definition: ops.h:290
SwsDitherOp
Definition: ops.h:186
ff_vk_shader_update_img_array
void ff_vk_shader_update_img_array(FFVulkanContext *s, FFVkExecContext *e, FFVulkanShader *shd, AVFrame *f, VkImageView *views, int set, int binding, VkImageLayout layout, VkSampler sampler)
Update a descriptor in a buffer with an image array.
Definition: vulkan.c:2773
ff_vk_frame_barrier
void ff_vk_frame_barrier(FFVulkanContext *s, FFVkExecContext *e, AVFrame *pic, VkImageMemoryBarrier2 *bar, int *nb_bar, VkPipelineStageFlags2 src_stage, VkPipelineStageFlags2 dst_stage, VkAccessFlagBits2 new_access, VkImageLayout new_layout, uint32_t new_qf)
Definition: vulkan.c:2085
SPICtx
Definition: spvasm.h:52
SWS_COMP_TEST
#define SWS_COMP_TEST(mask, X)
Definition: ops.h:89
ff_vk_shader_register_exec
int ff_vk_shader_register_exec(FFVulkanContext *s, FFVkExecPool *pool, FFVulkanShader *shd)
Register a shader with an exec pool.
Definition: vulkan.c:2638
ff_sws_pixel_type_is_int
bool ff_sws_pixel_type_is_int(SwsPixelType type)
Definition: ops.c:92
create_dither_buf
static int create_dither_buf(FFVulkanOpsCtx *s, VulkanPriv *p, const SwsDitherOp *dd, FFVkBuffer *buf)
Definition: ops.c:210
val
static double val(void *priv, double ch)
Definition: aeval.c:77
spi_OpTypeBool
static int spi_OpTypeBool(SPICtx *spi)
Definition: spvasm.h:429
AVRational::num
int num
Numerator.
Definition: rational.h:59
spi_OpConstantComposite
#define spi_OpConstantComposite(spi, res_type, src,...)
Definition: spvasm.h:307
refstruct.h
spvasm.h
FFVulkanDescriptorSetBinding::type
VkDescriptorType type
Definition: vulkan.h:114
SwsFrame
Represents a view into a single field of frame data.
Definition: format.h:210
SWS_OP_SCALE
@ SWS_OP_SCALE
Definition: ops.h:66
GLSLC
#define GLSLC(N, S)
Definition: vulkan.h:45
AV_LOG_ERROR
#define AV_LOG_ERROR
Something went wrong and cannot losslessly be recovered.
Definition: log.h:210
FF_ARRAY_ELEMS
#define FF_ARRAY_ELEMS(a)
Definition: sinewin_tablegen.c:29
main
int main
Definition: dovi_rpuenc.c:38
spi_reserve
static int spi_reserve(SPICtx *spi, int len)
Definition: spvasm.h:108
SwsPass::priv
void * priv
Definition: graph.h:110
float
float
Definition: af_crystalizer.c:122
MAX_DATA_BUFS
#define MAX_DATA_BUFS
Definition: ops.c:94
ff_sws_vk_init
int ff_sws_vk_init(SwsContext *sws, AVBufferRef *dev_ref)
Definition: ops.c:44
dither
static const uint16_t dither[8][8]
Definition: vf_gradfun.c:46
s
#define s(width, name)
Definition: cbs_vp9.c:198
spi_init
static void spi_init(SPICtx *spi, uint8_t *spv_buf, int buf_len)
Definition: spvasm.h:86
spi_OpFunction
static void spi_OpFunction(SPICtx *spi, int fn_id, int result_type_id, SpvFunctionControlMask function_control, int function_type_id)
Definition: spvasm.h:508
op
static int op(uint8_t **dst, const uint8_t *dst_end, GetByteContext *gb, int pixel, int count, int *x, int width, int linesize)
Perform decode operation.
Definition: anm.c:76
ops.h
spi_OpMemoryModel
static void spi_OpMemoryModel(SPICtx *spi, SpvAddressingModel addressing_model, SpvMemoryModel memory_model)
Definition: spvasm.h:125
ff_vk_exec_wait
void ff_vk_exec_wait(FFVulkanContext *s, FFVkExecContext *e)
Definition: vulkan.c:573
spi_OpLabel
static int spi_OpLabel(SPICtx *spi, int label_id)
Definition: spvasm.h:519
av_assert0
#define av_assert0(cond)
assert() equivalent, that is always enabled.
Definition: avassert.h:42
FF_VK_REP_FLOAT
@ FF_VK_REP_FLOAT
Definition: vulkan.h:451
av_refstruct_alloc_ext
static void * av_refstruct_alloc_ext(size_t size, unsigned flags, void *opaque, void(*free_cb)(AVRefStructOpaque opaque, void *obj))
A wrapper around av_refstruct_alloc_ext_c() for the common case of a non-const qualified opaque.
Definition: refstruct.h:94
SWS_OP_MIN
@ SWS_OP_MIN
Definition: ops.h:64
ff_sws_pixel_expand
static AVRational ff_sws_pixel_expand(SwsPixelType from, SwsPixelType to)
Definition: ops_internal.h:31
SPICtx::id
int id
Definition: spvasm.h:59
SWS_OP_LINEAR
@ SWS_OP_LINEAR
Definition: ops.h:69
ff_sws_op_list_output
const SwsOp * ff_sws_op_list_output(const SwsOpList *ops)
Returns the output operation for a given op list, or NULL if there is none.
Definition: ops.c:680
SWS_OP_FILTER_H
@ SWS_OP_FILTER_H
Definition: ops.h:73
SPICtx::off
int off
Definition: spvasm.h:55
ff_vk_exec_pool_free
void ff_vk_exec_pool_free(FFVulkanContext *s, FFVkExecPool *pool)
Definition: vulkan.c:299
av_mallocz
#define av_mallocz(s)
Definition: tableprint_vlc.h:31
SwsOpBackend
Definition: ops_dispatch.h:130
spi_OpUndef
static int spi_OpUndef(SPICtx *spi, int type_id)
Definition: spvasm.h:414
tmp
static uint8_t tmp[40]
Definition: aes_ctr.c:52
buf_desc
Definition: v4l2.c:128
SWS_OP_PACK
@ SWS_OP_PACK
Definition: ops.h:57
SwsOp::dither
SwsDitherOp dither
Definition: ops.h:251
spi_OpBranchConditional
static void spi_OpBranchConditional(SPICtx *spi, int cond_id, int true_label, int false_label, uint32_t branch_weights)
Definition: spvasm.h:641
NULL
#define NULL
Definition: coverity.c:32
spi_OpExtInst
#define spi_OpExtInst(spi, res_type, instr_id, set_id,...)
Definition: spvasm.h:347
SwsFilterWeights::dst_size
int dst_size
Definition: filters.h:90
AVRational
Rational number (pair of numerator and denominator).
Definition: rational.h:58
spi_OpTypeImage
static int spi_OpTypeImage(SPICtx *spi, int sampled_type_id, SpvDim dim, int depth, int arrayed, int ms, int sampled, SpvImageFormat image_format)
Definition: spvasm.h:452
ff_vk_shader_link
int ff_vk_shader_link(FFVulkanContext *s, FFVulkanShader *shd, const char *spirv, size_t spirv_len, const char *entrypoint)
Link a shader into an executable.
Definition: vulkan.c:2411
SWS_OP_FILTER_V
@ SWS_OP_FILTER_V
Definition: ops.h:74
AV_PIX_FMT_BGR0
@ AV_PIX_FMT_BGR0
packed BGR 8:8:8, 32bpp, BGRXBGRX... X=unused/undefined
Definition: pixfmt.h:265
spi_OpEntryPoint
static int spi_OpEntryPoint(SPICtx *spi, SpvExecutionModel execution_model, const char *name, const int *args, int nb_args)
Definition: spvasm.h:371
spi_OpTypeStruct
#define spi_OpTypeStruct(spi, id,...)
Definition: spvasm.h:351
c
Undefined Behavior In the C some operations are like signed integer dereferencing freed accessing outside allocated Undefined Behavior must not occur in a C it is not safe even if the output of undefined operations is unused The unsafety may seem nit picking but Optimizing compilers have in fact optimized code on the assumption that no undefined Behavior occurs Optimizing code based on wrong assumptions can and has in some cases lead to effects beyond the output of computations The signed integer overflow problem in speed critical code Code which is highly optimized and works with signed integers sometimes has the problem that often the output of the computation does not c
Definition: undefined.txt:32
SWS_FILTER_SCALE
@ SWS_FILTER_SCALE
14-bit coefficients are picked to fit comfortably within int16_t for efficient SIMD processing (e....
Definition: filters.h:40
spi_OpSelectionMerge
static void spi_OpSelectionMerge(SPICtx *spi, int merge_block, SpvSelectionControlMask selection_control)
Definition: spvasm.h:633
AV_PIX_FMT_X2BGR10
#define AV_PIX_FMT_X2BGR10
Definition: pixfmt.h:614
FFVulkanDescriptorSetBinding
Definition: vulkan.h:112
SwsDitherOp::size_log2
int size_log2
Definition: ops.h:189
dst
uint8_t ptrdiff_t const uint8_t ptrdiff_t int intptr_t intptr_t int int16_t * dst
Definition: dsp.h:87
i
#define i(width, name, range_min, range_max)
Definition: cbs_h264.c:63
SwsOp::type
SwsPixelType type
Definition: ops.h:240
SwsDitherOp::matrix
AVRational * matrix
Definition: ops.h:187
size
int size
Definition: twinvq_data.h:10344
AV_NUM_DATA_POINTERS
#define AV_NUM_DATA_POINTERS
Definition: frame.h:460
SWS_OP_RSHIFT
@ SWS_OP_RSHIFT
Definition: ops.h:59
VulkanPriv::dst_rep
enum FFVkShaderRepFormat dst_rep
Definition: ops.c:103
SwsOpList::src
SwsFormat src
Definition: ops.h:293
FFVulkanShader
Definition: vulkan.h:225
SWS_OP_WRITE
@ SWS_OP_WRITE
Definition: ops.h:51
av_refstruct_ref
void * av_refstruct_ref(void *obj)
Create a new reference to an object managed via this API, i.e.
Definition: refstruct.c:140
img
#define img
Definition: vf_colormatrix.c:114
spi_OpTypeArray
static int spi_OpTypeArray(SPICtx *spi, int element_type_id, int id, int length_id)
Definition: spvasm.h:469
FFVkExecContext
Definition: vulkan.h:145
spi_OpExtInstImport
static int spi_OpExtInstImport(SPICtx *spi, const char *name)
Definition: spvasm.h:396
ff_vk_shader_update_desc_buffer
int ff_vk_shader_update_desc_buffer(FFVulkanContext *s, FFVkExecContext *e, FFVulkanShader *shd, int set, int bind, int elem, FFVkBuffer *buf, VkDeviceSize offset, VkDeviceSize len, VkFormat fmt)
Update a descriptor in a buffer with a buffer.
Definition: vulkan.c:2786
FFVulkanDescriptorSetBinding::name
const char * name
Definition: vulkan.h:113
av_refstruct_unref
void av_refstruct_unref(void *objp)
Decrement the reference count of the underlying object and automatically free the object if there are...
Definition: refstruct.c:120
EnumOpaque::opaque
void * opaque
Definition: ops.c:1089
SwsFormat::format
enum AVPixelFormat format
Definition: format.h:80
ff_vk_exec_start
int ff_vk_exec_start(FFVulkanContext *s, FFVkExecContext *e)
Start/submit/wait an execution.
Definition: vulkan.c:580
spi_get_id
static int spi_get_id(SPICtx *spi)
Definition: spvasm.h:133
FF_VK_REP_UINT
@ FF_VK_REP_UINT
Definition: vulkan.h:455
process
static void process(const SwsFrame *dst, const SwsFrame *src, int y, int h, const SwsPass *pass)
Definition: ops.c:106
SwsOpList::ops
SwsOp * ops
Definition: ops.h:289
VulkanPriv
Definition: ops.c:96
ff_vk_unmap_buffer
static int ff_vk_unmap_buffer(FFVulkanContext *s, FFVkBuffer *buf, int flush)
Definition: vulkan.h:610
spi_OpCompositeConstruct
#define spi_OpCompositeConstruct(spi, res_type, src,...)
Definition: spvasm.h:315
spi_OpImageWrite
static void spi_OpImageWrite(SPICtx *spi, int img_id, int pos_id, int src_id, SpvImageOperandsMask image_operands)
Definition: spvasm.h:668
len
int len
Definition: vorbis_enc_data.h:426
SwsOp
Definition: ops.h:238
spi_OpVectorShuffle
#define spi_OpVectorShuffle(spi, res_type, src1, src2,...)
Definition: spvasm.h:363
ff_vk_free_buf
void ff_vk_free_buf(FFVulkanContext *s, FFVkBuffer *buf)
Definition: vulkan.c:1264
SwsInternal
Definition: swscale_internal.h:335
ff_vk_create_imageviews
int ff_vk_create_imageviews(FFVulkanContext *s, FFVkExecContext *e, VkImageView views[AV_NUM_DATA_POINTERS], AVFrame *f, enum FFVkShaderRepFormat rep_fmt)
Create an imageview and add it as a dependency to an execution.
Definition: vulkan.c:2002
spi_OpTypeVoid
static int spi_OpTypeVoid(SPICtx *spi)
Definition: spvasm.h:422
SwsOpList::dst
SwsFormat dst
Definition: ops.h:293
SWS_OP_MAX
@ SWS_OP_MAX
Definition: ops.h:65
SwsCompiledOp
Definition: ops_dispatch.h:100
FFVkExecPool
Definition: vulkan.h:290
pos
unsigned int pos
Definition: spdifenc.c:414
av_bprintf
void av_bprintf(AVBPrint *buf, const char *fmt,...)
Definition: bprint.c:122
ff_vk_qf_find
AVVulkanDeviceQueueFamily * ff_vk_qf_find(FFVulkanContext *s, VkQueueFlagBits dev_family, VkVideoCodecOperationFlagBitsKHR vid_ops)
Chooses an appropriate QF.
Definition: vulkan.c:286
id
enum AVCodecID id
Definition: dts2pts.c:550
spi_OpReturn
static void spi_OpReturn(SPICtx *spi)
Definition: spvasm.h:526
ff_vk_shader_add_descriptor_set
int ff_vk_shader_add_descriptor_set(FFVulkanContext *s, FFVulkanShader *shd, const FFVulkanDescriptorSetBinding *desc, int nb, int singular, int print_to_shader_only)
Add descriptor to a shader.
Definition: vulkan.c:2538
spi_OpImageRead
static int spi_OpImageRead(SPICtx *spi, int result_type_id, int img_id, int pos_id, SpvImageOperandsMask image_operands)
Definition: spvasm.h:655
FFVulkanShader::precompiled
int precompiled
Definition: vulkan.h:230
GLSLF
#define GLSLF(N, S,...)
Definition: vulkan.h:55
AVRational::den
int den
Denominator.
Definition: rational.h:60
SwsReadWriteOp::packed
bool packed
Definition: ops.h:128
spi_OpMemberDecorate
#define spi_OpMemberDecorate(spi, type, target, deco,...)
Definition: spvasm.h:359
ff_sws_pixel_type_name
const char * ff_sws_pixel_type_name(SwsPixelType type)
Definition: ops.c:62
spi_OpCapability
static void spi_OpCapability(SPICtx *spi, SpvCapability capability)
Definition: spvasm.h:119
FFVulkanShader::lg_size
uint32_t lg_size[3]
Definition: vulkan.h:237
spi_OpConstantFloat
static int spi_OpConstantFloat(SPICtx *spi, int type_id, float val)
Definition: spvasm.h:595
SwsFilterWeights::num_weights
size_t num_weights
Definition: filters.h:77
VulkanPriv::shd
FFVulkanShader shd
Definition: ops.c:99
Windows::Graphics::DirectX::Direct3D11::p
IDirect3DDxgiInterfaceAccess _COM_Outptr_ void ** p
Definition: vsrc_gfxcapture_winrt.hpp:53
free_fn
static void free_fn(void *priv)
Definition: ops.c:164
SwsReadWriteOp::elems
uint8_t elems
Examples: rgba = 4x u8 packed yuv444p = 3x u8 rgb565 = 1x u16 <- use SWS_OP_UNPACK to unpack monow = ...
Definition: ops.h:126
mem.h
spi_OpLoad
static int spi_OpLoad(SPICtx *spi, int result_type_id, int ptr_id, SpvMemoryAccessMask memory_access, int align)
Definition: spvasm.h:607
AVBufferRef
A reference to a data buffer.
Definition: buffer.h:82
VulkanPriv::nb_data_bufs
int nb_data_bufs
Definition: ops.c:101
spi_OpTypePointer
static int spi_OpTypePointer(SPICtx *spi, SpvStorageClass storage_class, int type_id)
Definition: spvasm.h:487
spi_OpCompositeInsert
#define spi_OpCompositeInsert(spi, res_type, src1, src2,...)
Definition: spvasm.h:367
w
uint8_t w
Definition: llvidencdsp.c:39
av_free
#define av_free(p)
Definition: tableprint_vlc.h:34
FFALIGN
#define FFALIGN(x, a)
Definition: macros.h:78
SWS_OP_CONVERT
@ SWS_OP_CONVERT
Definition: ops.h:63
FFVkBuffer
Definition: vulkan.h:125
ff_sws_vk_uninit
static void ff_sws_vk_uninit(AVRefStructOpaque opaque, void *obj)
Copyright (C) 2026 Lynne.
Definition: ops.c:33
int32_t
int32_t
Definition: audioconvert.c:56
ff_vk_exec_submit
int ff_vk_exec_submit(FFVulkanContext *s, FFVkExecContext *e)
Definition: vulkan.c:925
av_log
#define av_log(a,...)
Definition: tableprint_vlc.h:27
sws_internal
static SwsInternal * sws_internal(const SwsContext *sws)
Definition: swscale_internal.h:79
AVERROR_INVALIDDATA
#define AVERROR_INVALIDDATA
Invalid data found when processing input.
Definition: error.h:61
create_bufs
static int create_bufs(FFVulkanOpsCtx *s, VulkanPriv *p, SwsOpList *ops)
Definition: ops.c:245
h
h
Definition: vp9dsp_template.c:2070
SwsOpList::plane_src
uint8_t plane_src[4]
Definition: ops.h:296
ff_sws_vk_device_ref
AVBufferRef * ff_sws_vk_device_ref(SwsContext *sws)
Returns the Vulkan device reference associated with sws, or NULL if Vulkan has not been initialized f...
Definition: ops.c:85
SwsOpList
Helper struct for representing a list of operations.
Definition: ops.h:288
ff_sws_op_type_name
const char * ff_sws_op_type_name(SwsOpType op)
Definition: ops.c:109
SwsContext
Main external API structure.
Definition: swscale.h:206
snprintf
#define snprintf
Definition: snprintf.h:34
SwsFilterWeights::weights
int * weights
The computed look-up table (LUT).
Definition: filters.h:76
FFVulkanFunctions
Definition: vulkan_functions.h:275
ff_vk_shader_load
int ff_vk_shader_load(FFVulkanShader *shd, VkPipelineStageFlags stage, VkSpecializationInfo *spec, uint32_t wg_size[3], uint32_t required_subgroup_size)
Initialize a shader object.
Definition: vulkan.c:2128
src
#define src
Definition: vp8dsp.c:248
read
static uint32_t BS_FUNC() read(BSCTX *bc, unsigned int n)
Return n bits from the buffer, n has to be in the 0-32 range.
Definition: bitstream_template.h:239