21 #define VK_NO_PROTOTYPES
22 #define VK_ENABLE_BETA_EXTENSIONS
26 #include <versionhelpers.h>
53 #include <va/va_drmcommon.h>
56 #include <sys/sysmacros.h>
60 #include <drm_fourcc.h>
64 #if HAVE_LINUX_DMA_BUF_H
65 #include <sys/ioctl.h>
66 #include <linux/dma-buf.h>
72 #define CHECK_CU(x) FF_CUDA_CHECK_DL(cuda_cu, cu, x)
85 #ifdef VK_EXT_zero_initialize_device_memory
86 VkPhysicalDeviceZeroInitializeDeviceMemoryFeaturesEXT zero_initialize;
89 #ifdef VK_KHR_shader_expect_assume
90 VkPhysicalDeviceShaderExpectAssumeFeaturesKHR expect_assume;
94 #ifdef VK_KHR_video_maintenance2
95 VkPhysicalDeviceVideoMaintenance2FeaturesKHR video_maintenance_2;
97 #ifdef VK_KHR_video_decode_vp9
98 VkPhysicalDeviceVideoDecodeVP9FeaturesKHR vp9_decode;
100 #ifdef VK_KHR_video_encode_av1
101 VkPhysicalDeviceVideoEncodeAV1FeaturesKHR av1_encode;
109 #ifdef VK_KHR_shader_relaxed_extended_instruction
110 VkPhysicalDeviceShaderRelaxedExtendedInstructionFeaturesKHR relaxed_extended_instruction;
130 VkPhysicalDeviceExternalMemoryHostPropertiesEXT
hprops;
211 feats->
device = (VkPhysicalDeviceFeatures2) {
212 .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2,
216 VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_1_FEATURES);
218 VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_2_FEATURES);
220 VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_3_FEATURES);
223 VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TIMELINE_SEMAPHORE_FEATURES);
225 VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_SUBGROUP_ROTATE_FEATURES_KHR);
227 VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_HOST_IMAGE_COPY_FEATURES_EXT);
229 #ifdef VK_EXT_zero_initialize_device_memory
231 VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ZERO_INITIALIZE_DEVICE_MEMORY_FEATURES_EXT);
234 #ifdef VK_KHR_shader_expect_assume
236 VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_EXPECT_ASSUME_FEATURES_KHR);
240 VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VIDEO_MAINTENANCE_1_FEATURES_KHR);
241 #ifdef VK_KHR_video_maintenance2
243 VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VIDEO_MAINTENANCE_2_FEATURES_KHR);
245 #ifdef VK_KHR_video_decode_vp9
247 VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VIDEO_DECODE_VP9_FEATURES_KHR);
249 #ifdef VK_KHR_video_encode_av1
251 VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VIDEO_ENCODE_AV1_FEATURES_KHR);
255 VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_OBJECT_FEATURES_EXT);
257 VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_COOPERATIVE_MATRIX_FEATURES_KHR);
259 VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DESCRIPTOR_BUFFER_FEATURES_EXT);
261 VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_ATOMIC_FLOAT_FEATURES_EXT);
263 #ifdef VK_KHR_shader_relaxed_extended_instruction
265 VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_RELAXED_EXTENDED_INSTRUCTION_FEATURES_KHR);
272 #define COPY_VAL(VAL) \
274 dst->VAL = src->VAL; \
277 COPY_VAL(device.features.shaderImageGatherExtended);
278 COPY_VAL(device.features.shaderStorageImageReadWithoutFormat);
279 COPY_VAL(device.features.shaderStorageImageWriteWithoutFormat);
280 COPY_VAL(device.features.fragmentStoresAndAtomics);
281 COPY_VAL(device.features.vertexPipelineStoresAndAtomics);
282 COPY_VAL(device.features.shaderInt64);
283 COPY_VAL(device.features.shaderInt16);
284 COPY_VAL(device.features.shaderFloat64);
285 COPY_VAL(device.features.shaderStorageImageReadWithoutFormat);
286 COPY_VAL(device.features.shaderStorageImageWriteWithoutFormat);
288 COPY_VAL(vulkan_1_1.samplerYcbcrConversion);
289 COPY_VAL(vulkan_1_1.storagePushConstant16);
290 COPY_VAL(vulkan_1_1.storageBuffer16BitAccess);
291 COPY_VAL(vulkan_1_1.uniformAndStorageBuffer16BitAccess);
293 COPY_VAL(vulkan_1_2.timelineSemaphore);
294 COPY_VAL(vulkan_1_2.scalarBlockLayout);
295 COPY_VAL(vulkan_1_2.bufferDeviceAddress);
296 COPY_VAL(vulkan_1_2.hostQueryReset);
297 COPY_VAL(vulkan_1_2.storagePushConstant8);
299 COPY_VAL(vulkan_1_2.storageBuffer8BitAccess);
300 COPY_VAL(vulkan_1_2.uniformAndStorageBuffer8BitAccess);
302 COPY_VAL(vulkan_1_2.shaderBufferInt64Atomics);
303 COPY_VAL(vulkan_1_2.shaderSharedInt64Atomics);
304 COPY_VAL(vulkan_1_2.vulkanMemoryModel);
305 COPY_VAL(vulkan_1_2.vulkanMemoryModelDeviceScope);
306 COPY_VAL(vulkan_1_2.uniformBufferStandardLayout);
307 COPY_VAL(vulkan_1_2.runtimeDescriptorArray);
309 COPY_VAL(vulkan_1_3.dynamicRendering);
311 COPY_VAL(vulkan_1_3.synchronization2);
312 COPY_VAL(vulkan_1_3.computeFullSubgroups);
313 COPY_VAL(vulkan_1_3.subgroupSizeControl);
314 COPY_VAL(vulkan_1_3.shaderZeroInitializeWorkgroupMemory);
315 COPY_VAL(vulkan_1_3.dynamicRendering);
317 COPY_VAL(timeline_semaphore.timelineSemaphore);
318 COPY_VAL(subgroup_rotate.shaderSubgroupRotate);
319 COPY_VAL(host_image_copy.hostImageCopy);
321 #ifdef VK_EXT_zero_initialize_device_memory
322 COPY_VAL(zero_initialize.zeroInitializeDeviceMemory);
325 COPY_VAL(video_maintenance_1.videoMaintenance1);
326 #ifdef VK_KHR_video_maintenance2
327 COPY_VAL(video_maintenance_2.videoMaintenance2);
330 #ifdef VK_KHR_video_decode_vp9
331 COPY_VAL(vp9_decode.videoDecodeVP9);
334 #ifdef VK_KHR_video_encode_av1
335 COPY_VAL(av1_encode.videoEncodeAV1);
338 COPY_VAL(shader_object.shaderObject);
340 COPY_VAL(cooperative_matrix.cooperativeMatrix);
342 COPY_VAL(descriptor_buffer.descriptorBuffer);
343 COPY_VAL(descriptor_buffer.descriptorBufferPushDescriptors);
345 COPY_VAL(atomic_float.shaderBufferFloat32Atomics);
346 COPY_VAL(atomic_float.shaderBufferFloat32AtomicAdd);
348 #ifdef VK_KHR_shader_relaxed_extended_instruction
349 COPY_VAL(relaxed_extended_instruction.shaderRelaxedExtendedInstruction);
352 #ifdef VK_KHR_shader_expect_assume
353 COPY_VAL(expect_assume.shaderExpectAssume);
359 #define ASPECT_2PLANE (VK_IMAGE_ASPECT_PLANE_0_BIT | VK_IMAGE_ASPECT_PLANE_1_BIT)
360 #define ASPECT_3PLANE (VK_IMAGE_ASPECT_PLANE_0_BIT | VK_IMAGE_ASPECT_PLANE_1_BIT | VK_IMAGE_ASPECT_PLANE_2_BIT)
372 { VK_FORMAT_R8_UNORM,
AV_PIX_FMT_GRAY8, VK_IMAGE_ASPECT_COLOR_BIT, 1, 1, 1, { VK_FORMAT_R8_UNORM } },
373 { VK_FORMAT_R16_UNORM,
AV_PIX_FMT_GRAY10, VK_IMAGE_ASPECT_COLOR_BIT, 1, 1, 1, { VK_FORMAT_R16_UNORM } },
374 { VK_FORMAT_R16_UNORM,
AV_PIX_FMT_GRAY12, VK_IMAGE_ASPECT_COLOR_BIT, 1, 1, 1, { VK_FORMAT_R16_UNORM } },
375 { VK_FORMAT_R16_UNORM,
AV_PIX_FMT_GRAY14, VK_IMAGE_ASPECT_COLOR_BIT, 1, 1, 1, { VK_FORMAT_R16_UNORM } },
376 { VK_FORMAT_R16_UNORM,
AV_PIX_FMT_GRAY16, VK_IMAGE_ASPECT_COLOR_BIT, 1, 1, 1, { VK_FORMAT_R16_UNORM } },
377 { VK_FORMAT_R32_UINT,
AV_PIX_FMT_GRAY32, VK_IMAGE_ASPECT_COLOR_BIT, 1, 1, 1, { VK_FORMAT_R32_UINT } },
378 { VK_FORMAT_R32_SFLOAT,
AV_PIX_FMT_GRAYF32, VK_IMAGE_ASPECT_COLOR_BIT, 1, 1, 1, { VK_FORMAT_R32_SFLOAT } },
381 { VK_FORMAT_B8G8R8A8_UNORM,
AV_PIX_FMT_BGRA, VK_IMAGE_ASPECT_COLOR_BIT, 1, 1, 1, { VK_FORMAT_B8G8R8A8_UNORM } },
382 { VK_FORMAT_R8G8B8A8_UNORM,
AV_PIX_FMT_RGBA, VK_IMAGE_ASPECT_COLOR_BIT, 1, 1, 1, { VK_FORMAT_R8G8B8A8_UNORM } },
383 { VK_FORMAT_R8G8B8_UNORM,
AV_PIX_FMT_RGB24, VK_IMAGE_ASPECT_COLOR_BIT, 1, 1, 1, { VK_FORMAT_R8G8B8_UNORM } },
384 { VK_FORMAT_B8G8R8_UNORM,
AV_PIX_FMT_BGR24, VK_IMAGE_ASPECT_COLOR_BIT, 1, 1, 1, { VK_FORMAT_B8G8R8_UNORM } },
385 { VK_FORMAT_R16G16B16_UNORM,
AV_PIX_FMT_RGB48, VK_IMAGE_ASPECT_COLOR_BIT, 1, 1, 1, { VK_FORMAT_R16G16B16_UNORM } },
386 { VK_FORMAT_R16G16B16A16_UNORM,
AV_PIX_FMT_RGBA64, VK_IMAGE_ASPECT_COLOR_BIT, 1, 1, 1, { VK_FORMAT_R16G16B16A16_UNORM } },
387 { VK_FORMAT_R5G6B5_UNORM_PACK16,
AV_PIX_FMT_RGB565, VK_IMAGE_ASPECT_COLOR_BIT, 1, 1, 1, { VK_FORMAT_R5G6B5_UNORM_PACK16 } },
388 { VK_FORMAT_B5G6R5_UNORM_PACK16,
AV_PIX_FMT_BGR565, VK_IMAGE_ASPECT_COLOR_BIT, 1, 1, 1, { VK_FORMAT_B5G6R5_UNORM_PACK16 } },
389 { VK_FORMAT_B8G8R8A8_UNORM,
AV_PIX_FMT_BGR0, VK_IMAGE_ASPECT_COLOR_BIT, 1, 1, 1, { VK_FORMAT_B8G8R8A8_UNORM } },
390 { VK_FORMAT_R8G8B8A8_UNORM,
AV_PIX_FMT_RGB0, VK_IMAGE_ASPECT_COLOR_BIT, 1, 1, 1, { VK_FORMAT_R8G8B8A8_UNORM } },
391 { VK_FORMAT_A2R10G10B10_UNORM_PACK32,
AV_PIX_FMT_X2RGB10, VK_IMAGE_ASPECT_COLOR_BIT, 1, 1, 1, { VK_FORMAT_A2R10G10B10_UNORM_PACK32 } },
392 { VK_FORMAT_A2B10G10R10_UNORM_PACK32,
AV_PIX_FMT_X2BGR10, VK_IMAGE_ASPECT_COLOR_BIT, 1, 1, 1, { VK_FORMAT_A2B10G10R10_UNORM_PACK32 } },
393 { VK_FORMAT_R32G32B32_SFLOAT,
AV_PIX_FMT_RGBF32, VK_IMAGE_ASPECT_COLOR_BIT, 1, 1, 1, { VK_FORMAT_R32G32B32_SFLOAT } },
394 { VK_FORMAT_R32G32B32A32_SFLOAT,
AV_PIX_FMT_RGBAF32, VK_IMAGE_ASPECT_COLOR_BIT, 1, 1, 1, { VK_FORMAT_R32G32B32A32_SFLOAT } },
395 { VK_FORMAT_R32G32B32_UINT,
AV_PIX_FMT_RGB96, VK_IMAGE_ASPECT_COLOR_BIT, 1, 1, 1, { VK_FORMAT_R32G32B32_UINT } },
396 { VK_FORMAT_R32G32B32A32_UINT,
AV_PIX_FMT_RGBA128, VK_IMAGE_ASPECT_COLOR_BIT, 1, 1, 1, { VK_FORMAT_R32G32B32A32_UINT } },
399 { VK_FORMAT_R8_UNORM,
AV_PIX_FMT_GBRP, VK_IMAGE_ASPECT_COLOR_BIT, 3, 3, 3, { VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM } },
400 { VK_FORMAT_R16_UNORM,
AV_PIX_FMT_GBRP10, VK_IMAGE_ASPECT_COLOR_BIT, 3, 3, 3, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } },
401 { VK_FORMAT_R16_UNORM,
AV_PIX_FMT_GBRP12, VK_IMAGE_ASPECT_COLOR_BIT, 3, 3, 3, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } },
402 { VK_FORMAT_R16_UNORM,
AV_PIX_FMT_GBRP14, VK_IMAGE_ASPECT_COLOR_BIT, 3, 3, 3, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } },
403 { VK_FORMAT_R16_UNORM,
AV_PIX_FMT_GBRP16, VK_IMAGE_ASPECT_COLOR_BIT, 3, 3, 3, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } },
404 { VK_FORMAT_R32_SFLOAT,
AV_PIX_FMT_GBRPF32, VK_IMAGE_ASPECT_COLOR_BIT, 3, 3, 3, { VK_FORMAT_R32_SFLOAT, VK_FORMAT_R32_SFLOAT, VK_FORMAT_R32_SFLOAT } },
407 { VK_FORMAT_R8_UNORM,
AV_PIX_FMT_GBRAP, VK_IMAGE_ASPECT_COLOR_BIT, 4, 4, 4, { VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM } },
408 { VK_FORMAT_R16_UNORM,
AV_PIX_FMT_GBRAP10, VK_IMAGE_ASPECT_COLOR_BIT, 4, 4, 4, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } },
409 { VK_FORMAT_R16_UNORM,
AV_PIX_FMT_GBRAP12, VK_IMAGE_ASPECT_COLOR_BIT, 4, 4, 4, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } },
410 { VK_FORMAT_R16_UNORM,
AV_PIX_FMT_GBRAP14, VK_IMAGE_ASPECT_COLOR_BIT, 4, 4, 4, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } },
411 { VK_FORMAT_R16_UNORM,
AV_PIX_FMT_GBRAP16, VK_IMAGE_ASPECT_COLOR_BIT, 4, 4, 4, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } },
412 { VK_FORMAT_R32_UINT,
AV_PIX_FMT_GBRAP32, VK_IMAGE_ASPECT_COLOR_BIT, 4, 4, 4, { VK_FORMAT_R32_UINT, VK_FORMAT_R32_UINT, VK_FORMAT_R32_UINT, VK_FORMAT_R32_UINT } },
413 { VK_FORMAT_R32_SFLOAT,
AV_PIX_FMT_GBRAPF32, VK_IMAGE_ASPECT_COLOR_BIT, 4, 4, 4, { VK_FORMAT_R32_SFLOAT, VK_FORMAT_R32_SFLOAT, VK_FORMAT_R32_SFLOAT, VK_FORMAT_R32_SFLOAT } },
420 { VK_FORMAT_G10X6_B10X6R10X6_2PLANE_420_UNORM_3PACK16,
AV_PIX_FMT_P010,
ASPECT_2PLANE, 2, 1, 2, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16G16_UNORM } },
421 { VK_FORMAT_G12X4_B12X4R12X4_2PLANE_420_UNORM_3PACK16,
AV_PIX_FMT_P012,
ASPECT_2PLANE, 2, 1, 2, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16G16_UNORM } },
426 { VK_FORMAT_G10X6_B10X6R10X6_2PLANE_422_UNORM_3PACK16,
AV_PIX_FMT_P210,
ASPECT_2PLANE, 2, 1, 2, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16G16_UNORM } },
427 { VK_FORMAT_G12X4_B12X4R12X4_2PLANE_422_UNORM_3PACK16,
AV_PIX_FMT_P212,
ASPECT_2PLANE, 2, 1, 2, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16G16_UNORM } },
432 { VK_FORMAT_G10X6_B10X6R10X6_2PLANE_444_UNORM_3PACK16,
AV_PIX_FMT_P410,
ASPECT_2PLANE, 2, 1, 2, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16G16_UNORM } },
433 { VK_FORMAT_G12X4_B12X4R12X4_2PLANE_444_UNORM_3PACK16,
AV_PIX_FMT_P412,
ASPECT_2PLANE, 2, 1, 2, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16G16_UNORM } },
451 { VK_FORMAT_G8B8G8R8_422_UNORM,
AV_PIX_FMT_YUYV422, VK_IMAGE_ASPECT_COLOR_BIT, 1, 1, 1, { VK_FORMAT_R8G8B8A8_UNORM } },
452 { VK_FORMAT_B8G8R8G8_422_UNORM,
AV_PIX_FMT_UYVY422, VK_IMAGE_ASPECT_COLOR_BIT, 1, 1, 1, { VK_FORMAT_R8G8B8A8_UNORM } },
453 { VK_FORMAT_G10X6B10X6G10X6R10X6_422_UNORM_4PACK16,
AV_PIX_FMT_Y210, VK_IMAGE_ASPECT_COLOR_BIT, 1, 1, 1, { VK_FORMAT_R16G16B16A16_UNORM } },
454 { VK_FORMAT_G12X4B12X4G12X4R12X4_422_UNORM_4PACK16,
AV_PIX_FMT_Y212, VK_IMAGE_ASPECT_COLOR_BIT, 1, 1, 1, { VK_FORMAT_R16G16B16A16_UNORM } },
455 { VK_FORMAT_G16B16G16R16_422_UNORM,
AV_PIX_FMT_Y216, VK_IMAGE_ASPECT_COLOR_BIT, 1, 1, 1, { VK_FORMAT_R16G16B16A16_UNORM } },
458 { VK_FORMAT_R8_UNORM,
AV_PIX_FMT_YUVA420P, VK_IMAGE_ASPECT_COLOR_BIT, 4, 4, 4, { VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM } },
459 { VK_FORMAT_R16_UNORM,
AV_PIX_FMT_YUVA420P10, VK_IMAGE_ASPECT_COLOR_BIT, 4, 4, 4, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } },
460 { VK_FORMAT_R16_UNORM,
AV_PIX_FMT_YUVA420P16, VK_IMAGE_ASPECT_COLOR_BIT, 4, 4, 4, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } },
463 { VK_FORMAT_R8_UNORM,
AV_PIX_FMT_YUVA422P, VK_IMAGE_ASPECT_COLOR_BIT, 4, 4, 4, { VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM } },
464 { VK_FORMAT_R16_UNORM,
AV_PIX_FMT_YUVA422P10, VK_IMAGE_ASPECT_COLOR_BIT, 4, 4, 4, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } },
465 { VK_FORMAT_R16_UNORM,
AV_PIX_FMT_YUVA422P12, VK_IMAGE_ASPECT_COLOR_BIT, 4, 4, 4, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } },
466 { VK_FORMAT_R16_UNORM,
AV_PIX_FMT_YUVA422P16, VK_IMAGE_ASPECT_COLOR_BIT, 4, 4, 4, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } },
469 { VK_FORMAT_R8_UNORM,
AV_PIX_FMT_YUVA444P, VK_IMAGE_ASPECT_COLOR_BIT, 4, 4, 4, { VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM } },
470 { VK_FORMAT_R16_UNORM,
AV_PIX_FMT_YUVA444P10, VK_IMAGE_ASPECT_COLOR_BIT, 4, 4, 4, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } },
471 { VK_FORMAT_R16_UNORM,
AV_PIX_FMT_YUVA444P12, VK_IMAGE_ASPECT_COLOR_BIT, 4, 4, 4, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } },
472 { VK_FORMAT_R16_UNORM,
AV_PIX_FMT_YUVA444P16, VK_IMAGE_ASPECT_COLOR_BIT, 4, 4, 4, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } },
475 { VK_FORMAT_B8G8R8A8_UNORM,
AV_PIX_FMT_UYVA, VK_IMAGE_ASPECT_COLOR_BIT, 1, 1, 1, { VK_FORMAT_B8G8R8A8_UNORM } },
476 { VK_FORMAT_A2R10G10B10_UNORM_PACK32,
AV_PIX_FMT_XV30, VK_IMAGE_ASPECT_COLOR_BIT, 1, 1, 1, { VK_FORMAT_R16G16B16A16_UNORM } },
477 { VK_FORMAT_R12X4G12X4B12X4A12X4_UNORM_4PACK16,
AV_PIX_FMT_XV36, VK_IMAGE_ASPECT_COLOR_BIT, 1, 1, 1, { VK_FORMAT_R16G16B16A16_UNORM } },
478 { VK_FORMAT_R16G16B16A16_UNORM,
AV_PIX_FMT_XV48, VK_IMAGE_ASPECT_COLOR_BIT, 1, 1, 1, { VK_FORMAT_R16G16B16A16_UNORM } },
499 VkImageTiling tiling,
502 VkImageAspectFlags *
aspect,
503 VkImageUsageFlags *supported_usage,
504 int disable_multiplane,
int need_storage)
510 const VkFormatFeatureFlagBits2 basic_flags = VK_FORMAT_FEATURE_2_SAMPLED_IMAGE_BIT |
511 VK_FORMAT_FEATURE_2_TRANSFER_SRC_BIT |
512 VK_FORMAT_FEATURE_2_TRANSFER_DST_BIT;
516 VkFormatProperties3 fprops = {
517 .sType = VK_STRUCTURE_TYPE_FORMAT_PROPERTIES_3,
519 VkFormatProperties2 prop = {
520 .sType = VK_STRUCTURE_TYPE_FORMAT_PROPERTIES_2,
523 VkFormatFeatureFlagBits2 feats_primary, feats_secondary;
524 int basics_primary = 0, basics_secondary = 0;
525 int storage_primary = 0, storage_secondary = 0;
527 vk->GetPhysicalDeviceFormatProperties2(hwctx->
phys_dev,
531 feats_primary = tiling == VK_IMAGE_TILING_LINEAR ?
532 fprops.linearTilingFeatures : fprops.optimalTilingFeatures;
533 basics_primary = (feats_primary & basic_flags) == basic_flags;
534 storage_primary = !!(feats_primary & VK_FORMAT_FEATURE_2_STORAGE_IMAGE_BIT);
537 vk->GetPhysicalDeviceFormatProperties2(hwctx->
phys_dev,
540 feats_secondary = tiling == VK_IMAGE_TILING_LINEAR ?
541 fprops.linearTilingFeatures : fprops.optimalTilingFeatures;
542 basics_secondary = (feats_secondary & basic_flags) == basic_flags;
543 storage_secondary = !!(feats_secondary & VK_FORMAT_FEATURE_2_STORAGE_IMAGE_BIT);
545 basics_secondary = basics_primary;
546 storage_secondary = storage_primary;
549 if (basics_primary &&
551 (!need_storage || (need_storage && (storage_primary | storage_secondary)))) {
566 ((need_storage && (storage_primary | storage_secondary)) ?
567 VK_IMAGE_USAGE_STORAGE_BIT : 0);
569 }
else if (basics_secondary &&
570 (!need_storage || (need_storage && storage_secondary))) {
591 #if CONFIG_VULKAN_STATIC
592 VKAPI_ATTR PFN_vkVoidFunction VKAPI_CALL vkGetInstanceProcAddr(VkInstance instance,
601 #if CONFIG_VULKAN_STATIC
604 static const char *lib_names[] = {
607 #elif defined(__APPLE__)
618 p->libvulkan = dlopen(lib_names[
i], RTLD_NOW | RTLD_LOCAL);
628 hwctx->
get_proc_addr = (PFN_vkGetInstanceProcAddr)dlsym(
p->libvulkan,
"vkGetInstanceProcAddr");
657 #ifdef VK_EXT_zero_initialize_device_memory
660 #ifdef VK_KHR_shader_expect_assume
664 #ifdef VK_KHR_video_maintenance2
687 #ifdef VK_KHR_video_decode_vp9
690 #ifdef VK_KHR_video_encode_av1
724 static VkBool32 VKAPI_CALL
vk_dbg_callback(VkDebugUtilsMessageSeverityFlagBitsEXT severity,
725 VkDebugUtilsMessageTypeFlagsEXT messageType,
726 const VkDebugUtilsMessengerCallbackDataEXT *
data,
733 switch (
data->messageIdNumber) {
744 case VK_DEBUG_UTILS_MESSAGE_SEVERITY_VERBOSE_BIT_EXT: l =
AV_LOG_VERBOSE;
break;
745 case VK_DEBUG_UTILS_MESSAGE_SEVERITY_INFO_BIT_EXT: l =
AV_LOG_INFO;
break;
746 case VK_DEBUG_UTILS_MESSAGE_SEVERITY_WARNING_BIT_EXT: l =
AV_LOG_WARNING;
break;
747 case VK_DEBUG_UTILS_MESSAGE_SEVERITY_ERROR_BIT_EXT: l =
AV_LOG_ERROR;
break;
752 for (
int i = 0;
i <
data->cmdBufLabelCount;
i++)
758 #define ADD_VAL_TO_LIST(list, count, val) \
760 list = av_realloc_array(list, ++count, sizeof(*list)); \
762 err = AVERROR(ENOMEM); \
765 list[count - 1] = av_strdup(val); \
766 if (!list[count - 1]) { \
767 err = AVERROR(ENOMEM); \
772 #define RELEASE_PROPS(props, count) \
774 for (int i = 0; i < count; i++) \
775 av_free((void *)((props)[i])); \
776 av_free((void *)props); \
782 VkDeviceSize max_vram = 0, max_visible_vram = 0;
786 for (
int i = 0;
i <
p->mprops.memoryTypeCount;
i++) {
787 const VkMemoryType
type =
p->mprops.memoryTypes[
i];
788 const VkMemoryHeap heap =
p->mprops.memoryHeaps[
type.heapIndex];
789 if (!(
type.propertyFlags & VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT))
791 max_vram =
FFMAX(max_vram, heap.size);
792 if (
type.propertyFlags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT)
793 max_visible_vram =
FFMAX(max_visible_vram, heap.size);
796 return max_vram - max_visible_vram < 1024;
814 const char *
const **
dst, uint32_t *num,
818 const char **extension_names =
NULL;
822 int err = 0, found, extensions_found = 0;
825 int optional_exts_num;
826 uint32_t sup_ext_count;
827 char *user_exts_str =
NULL;
829 VkExtensionProperties *sup_ext;
839 if (!user_exts_str) {
844 vk->EnumerateInstanceExtensionProperties(
NULL, &sup_ext_count,
NULL);
845 sup_ext =
av_malloc_array(sup_ext_count,
sizeof(VkExtensionProperties));
848 vk->EnumerateInstanceExtensionProperties(
NULL, &sup_ext_count, sup_ext);
856 if (!user_exts_str) {
861 vk->EnumerateDeviceExtensionProperties(hwctx->
phys_dev,
NULL,
862 &sup_ext_count,
NULL);
863 sup_ext =
av_malloc_array(sup_ext_count,
sizeof(VkExtensionProperties));
866 vk->EnumerateDeviceExtensionProperties(hwctx->
phys_dev,
NULL,
867 &sup_ext_count, sup_ext);
870 for (
int i = 0;
i < optional_exts_num;
i++) {
871 tstr = optional_exts[
i].
name;
875 if (
p->dprops.driverID == VK_DRIVER_ID_INTEL_OPEN_SOURCE_MESA &&
876 !strcmp(tstr, VK_EXT_DESCRIPTOR_BUFFER_EXTENSION_NAME))
880 if (!strcmp(tstr, VK_EXT_HOST_IMAGE_COPY_EXTENSION_NAME) &&
888 !strcmp(tstr, VK_EXT_DESCRIPTOR_BUFFER_EXTENSION_NAME)) {
892 for (
int j = 0; j < sup_ext_count; j++) {
893 if (!strcmp(tstr, sup_ext[j].extensionName)) {
902 p->vkctx.extensions |= optional_exts[
i].
flag;
910 tstr = VK_EXT_DEBUG_UTILS_EXTENSION_NAME;
912 for (
int j = 0; j < sup_ext_count; j++) {
913 if (!strcmp(tstr, sup_ext[j].extensionName)) {
929 #ifdef VK_KHR_shader_relaxed_extended_instruction
932 tstr = VK_KHR_SHADER_RELAXED_EXTENDED_INSTRUCTION_EXTENSION_NAME;
934 for (
int j = 0; j < sup_ext_count; j++) {
935 if (!strcmp(tstr, sup_ext[j].extensionName)) {
953 char *save, *token =
av_strtok(user_exts_str,
"+", &save);
956 for (
int j = 0; j < sup_ext_count; j++) {
957 if (!strcmp(token, sup_ext[j].extensionName)) {
973 *
dst = extension_names;
974 *num = extensions_found;
988 const char *
const **
dst, uint32_t *num,
995 static const char layer_standard_validation[] = {
"VK_LAYER_KHRONOS_validation" };
996 int layer_standard_validation_found = 0;
998 uint32_t sup_layer_count;
999 VkLayerProperties *sup_layers;
1002 char *user_layers_str =
NULL;
1005 const char **enabled_layers =
NULL;
1006 uint32_t enabled_layers_count = 0;
1014 vk->EnumerateInstanceLayerProperties(&sup_layer_count,
NULL);
1015 sup_layers =
av_malloc_array(sup_layer_count,
sizeof(VkLayerProperties));
1018 vk->EnumerateInstanceLayerProperties(&sup_layer_count, sup_layers);
1021 for (
int i = 0;
i < sup_layer_count;
i++)
1025 if (!debug_opt && !user_layers)
1030 if (!strcmp(debug_opt->
value,
"profile")) {
1032 }
else if (!strcmp(debug_opt->
value,
"printf")) {
1034 }
else if (!strcmp(debug_opt->
value,
"validate")) {
1036 }
else if (!strcmp(debug_opt->
value,
"practices")) {
1039 char *end_ptr =
NULL;
1040 int idx = strtol(debug_opt->
value, &end_ptr, 10);
1041 if (end_ptr == debug_opt->
value || end_ptr[0] !=
'\0' ||
1056 for (
int i = 0;
i < sup_layer_count;
i++) {
1057 if (!strcmp(layer_standard_validation, sup_layers[
i].layerName)) {
1059 layer_standard_validation);
1060 ADD_VAL_TO_LIST(enabled_layers, enabled_layers_count, layer_standard_validation);
1062 layer_standard_validation_found = 1;
1066 if (!layer_standard_validation_found) {
1068 "Validation Layer \"%s\" not supported\n", layer_standard_validation);
1081 if (!user_layers_str) {
1086 token =
av_strtok(user_layers_str,
"+", &save);
1091 if (!strcmp(layer_standard_validation, token) && layer_standard_validation_found) {
1097 for (
int j = 0; j < sup_layer_count; j++) {
1098 if (!strcmp(token, sup_layers[j].layerName)) {
1109 if (!strcmp(layer_standard_validation, token))
1113 "Layer \"%s\" not supported\n", token);
1130 *
dst = enabled_layers;
1131 *num = enabled_layers_count;
1146 VkApplicationInfo application_info = {
1147 .sType = VK_STRUCTURE_TYPE_APPLICATION_INFO,
1148 .pApplicationName =
"ffmpeg",
1152 .pEngineName =
"libavutil",
1153 .apiVersion = VK_API_VERSION_1_3,
1158 VkValidationFeaturesEXT validation_features = {
1159 .sType = VK_STRUCTURE_TYPE_VALIDATION_FEATURES_EXT,
1161 VkInstanceCreateInfo inst_props = {
1162 .sType = VK_STRUCTURE_TYPE_INSTANCE_CREATE_INFO,
1163 .pApplicationInfo = &application_info,
1179 &inst_props.enabledLayerCount, debug_mode);
1185 &inst_props.enabledExtensionCount, *debug_mode);
1193 static const VkValidationFeatureEnableEXT feat_list_validate[] = {
1194 VK_VALIDATION_FEATURE_ENABLE_SYNCHRONIZATION_VALIDATION_EXT,
1195 VK_VALIDATION_FEATURE_ENABLE_GPU_ASSISTED_RESERVE_BINDING_SLOT_EXT,
1196 VK_VALIDATION_FEATURE_ENABLE_GPU_ASSISTED_EXT,
1198 validation_features.pEnabledValidationFeatures = feat_list_validate;
1199 validation_features.enabledValidationFeatureCount =
FF_ARRAY_ELEMS(feat_list_validate);
1200 inst_props.pNext = &validation_features;
1202 static const VkValidationFeatureEnableEXT feat_list_debug[] = {
1203 VK_VALIDATION_FEATURE_ENABLE_SYNCHRONIZATION_VALIDATION_EXT,
1204 VK_VALIDATION_FEATURE_ENABLE_GPU_ASSISTED_RESERVE_BINDING_SLOT_EXT,
1205 VK_VALIDATION_FEATURE_ENABLE_DEBUG_PRINTF_EXT,
1207 validation_features.pEnabledValidationFeatures = feat_list_debug;
1208 validation_features.enabledValidationFeatureCount =
FF_ARRAY_ELEMS(feat_list_debug);
1209 inst_props.pNext = &validation_features;
1211 static const VkValidationFeatureEnableEXT feat_list_practices[] = {
1212 VK_VALIDATION_FEATURE_ENABLE_SYNCHRONIZATION_VALIDATION_EXT,
1213 VK_VALIDATION_FEATURE_ENABLE_BEST_PRACTICES_EXT,
1215 validation_features.pEnabledValidationFeatures = feat_list_practices;
1216 validation_features.enabledValidationFeatureCount =
FF_ARRAY_ELEMS(feat_list_practices);
1217 inst_props.pNext = &validation_features;
1221 for (
int i = 0;
i < inst_props.enabledExtensionCount;
i++) {
1222 if (!strcmp(VK_KHR_PORTABILITY_ENUMERATION_EXTENSION_NAME,
1223 inst_props.ppEnabledExtensionNames[
i])) {
1224 inst_props.flags |= VK_INSTANCE_CREATE_ENUMERATE_PORTABILITY_BIT_KHR;
1231 ret = vk->CreateInstance(&inst_props, hwctx->
alloc, &hwctx->
inst);
1234 if (
ret != VK_SUCCESS) {
1251 VkDebugUtilsMessengerCreateInfoEXT dbg = {
1252 .sType = VK_STRUCTURE_TYPE_DEBUG_UTILS_MESSENGER_CREATE_INFO_EXT,
1253 .messageSeverity = VK_DEBUG_UTILS_MESSAGE_SEVERITY_VERBOSE_BIT_EXT |
1254 VK_DEBUG_UTILS_MESSAGE_SEVERITY_INFO_BIT_EXT |
1255 VK_DEBUG_UTILS_MESSAGE_SEVERITY_WARNING_BIT_EXT |
1256 VK_DEBUG_UTILS_MESSAGE_SEVERITY_ERROR_BIT_EXT,
1257 .messageType = VK_DEBUG_UTILS_MESSAGE_TYPE_GENERAL_BIT_EXT |
1258 VK_DEBUG_UTILS_MESSAGE_TYPE_VALIDATION_BIT_EXT |
1259 VK_DEBUG_UTILS_MESSAGE_TYPE_PERFORMANCE_BIT_EXT,
1264 vk->CreateDebugUtilsMessengerEXT(hwctx->
inst, &dbg,
1265 hwctx->
alloc, &
p->debug_ctx);
1271 RELEASE_PROPS(inst_props.ppEnabledLayerNames, inst_props.enabledLayerCount);
1290 case VK_PHYSICAL_DEVICE_TYPE_INTEGRATED_GPU:
return "integrated";
1291 case VK_PHYSICAL_DEVICE_TYPE_DISCRETE_GPU:
return "discrete";
1292 case VK_PHYSICAL_DEVICE_TYPE_VIRTUAL_GPU:
return "virtual";
1293 case VK_PHYSICAL_DEVICE_TYPE_CPU:
return "software";
1294 default:
return "unknown";
1301 int err = 0, choice = -1;
1307 VkPhysicalDevice *devices =
NULL;
1308 VkPhysicalDeviceIDProperties *idp =
NULL;
1309 VkPhysicalDeviceProperties2 *prop =
NULL;
1310 VkPhysicalDeviceDriverProperties *driver_prop =
NULL;
1311 VkPhysicalDeviceDrmPropertiesEXT *drm_prop =
NULL;
1313 ret = vk->EnumeratePhysicalDevices(hwctx->
inst, &num,
NULL);
1314 if (
ret != VK_SUCCESS || !num) {
1323 ret = vk->EnumeratePhysicalDevices(hwctx->
inst, &num, devices);
1324 if (
ret != VK_SUCCESS) {
1343 driver_prop =
av_calloc(num,
sizeof(*driver_prop));
1350 drm_prop =
av_calloc(num,
sizeof(*drm_prop));
1358 for (
int i = 0;
i < num;
i++) {
1360 drm_prop[
i].sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DRM_PROPERTIES_EXT;
1361 driver_prop[
i].pNext = &drm_prop[
i];
1363 driver_prop[
i].sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DRIVER_PROPERTIES;
1364 idp[
i].pNext = &driver_prop[
i];
1365 idp[
i].sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ID_PROPERTIES;
1366 prop[
i].sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2;
1367 prop[
i].pNext = &idp[
i];
1369 vk->GetPhysicalDeviceProperties2(devices[
i], &prop[
i]);
1371 prop[
i].properties.deviceName,
1373 prop[
i].properties.deviceID);
1377 for (
int i = 0;
i < num;
i++) {
1378 if (!strncmp(idp[
i].deviceUUID, select->
uuid, VK_UUID_SIZE)) {
1387 for (
int i = 0;
i < num;
i++) {
1388 if ((select->
drm_major == drm_prop[
i].primaryMajor &&
1389 select->
drm_minor == drm_prop[
i].primaryMinor) ||
1390 (select->
drm_major == drm_prop[
i].renderMajor &&
1391 select->
drm_minor == drm_prop[
i].renderMinor)) {
1400 }
else if (select->
name) {
1402 for (
int i = 0;
i < num;
i++) {
1403 if (strstr(prop[
i].properties.deviceName, select->
name)) {
1414 for (
int i = 0;
i < num;
i++) {
1415 if (select->
pci_device == prop[
i].properties.deviceID) {
1426 for (
int i = 0;
i < num;
i++) {
1427 if (select->
vendor_id == prop[
i].properties.vendorID) {
1437 if (select->
index < num) {
1438 choice = select->
index;
1450 choice, prop[choice].properties.deviceName,
1452 prop[choice].properties.deviceID);
1454 p->props = prop[choice];
1455 p->props.pNext =
NULL;
1456 p->dprops = driver_prop[choice];
1457 p->dprops.pNext =
NULL;
1471 VkQueueFlagBits
flags)
1474 uint32_t min_score = UINT32_MAX;
1476 for (
int i = 0;
i < num_qf;
i++) {
1477 VkQueueFlagBits qflags = qf[
i].queueFamilyProperties.queueFlags;
1480 if ((
flags & VK_QUEUE_TRANSFER_BIT) &&
1481 (qflags & (VK_QUEUE_GRAPHICS_BIT | VK_QUEUE_COMPUTE_BIT)))
1482 qflags |= VK_QUEUE_TRANSFER_BIT;
1484 if (qflags &
flags) {
1485 uint32_t score =
av_popcount(qflags) + qf[
i].queueFamilyProperties.timestampValidBits;
1486 if (score < min_score) {
1494 qf[
index].queueFamilyProperties.timestampValidBits++;
1500 VkQueueFamilyVideoPropertiesKHR *qf_vid, uint32_t num_qf,
1501 VkVideoCodecOperationFlagsKHR
flags)
1504 uint32_t min_score = UINT32_MAX;
1506 for (
int i = 0;
i < num_qf;
i++) {
1507 const VkQueueFlags qflags = qf[
i].queueFamilyProperties.queueFlags;
1508 const VkVideoCodecOperationFlagsKHR vflags = qf_vid[
i].videoCodecOperations;
1510 if (!(qflags & (VK_QUEUE_VIDEO_ENCODE_BIT_KHR | VK_QUEUE_VIDEO_DECODE_BIT_KHR)))
1513 if (vflags &
flags) {
1514 uint32_t score =
av_popcount(vflags) + qf[
i].queueFamilyProperties.timestampValidBits;
1515 if (score < min_score) {
1523 qf[
index].queueFamilyProperties.timestampValidBits++;
1535 VkQueueFamilyProperties2 *qf =
NULL;
1536 VkQueueFamilyVideoPropertiesKHR *qf_vid =
NULL;
1539 vk->GetPhysicalDeviceQueueFamilyProperties(hwctx->
phys_dev, &num,
NULL);
1550 qf_vid =
av_malloc_array(num,
sizeof(VkQueueFamilyVideoPropertiesKHR));
1554 for (uint32_t
i = 0;
i < num;
i++) {
1555 qf_vid[
i] = (VkQueueFamilyVideoPropertiesKHR) {
1556 .sType = VK_STRUCTURE_TYPE_QUEUE_FAMILY_VIDEO_PROPERTIES_KHR,
1558 qf[
i] = (VkQueueFamilyProperties2) {
1559 .sType = VK_STRUCTURE_TYPE_QUEUE_FAMILY_PROPERTIES_2,
1565 vk->GetPhysicalDeviceQueueFamilyProperties2(hwctx->
phys_dev, &num, qf);
1568 for (
int i = 0;
i < num;
i++) {
1570 ((qf[
i].queueFamilyProperties.queueFlags) & VK_QUEUE_GRAPHICS_BIT) ?
" graphics" :
"",
1571 ((qf[
i].queueFamilyProperties.queueFlags) & VK_QUEUE_COMPUTE_BIT) ?
" compute" :
"",
1572 ((qf[
i].queueFamilyProperties.queueFlags) & VK_QUEUE_TRANSFER_BIT) ?
" transfer" :
"",
1573 ((qf[
i].queueFamilyProperties.queueFlags) & VK_QUEUE_VIDEO_ENCODE_BIT_KHR) ?
" encode" :
"",
1574 ((qf[
i].queueFamilyProperties.queueFlags) & VK_QUEUE_VIDEO_DECODE_BIT_KHR) ?
" decode" :
"",
1575 ((qf[
i].queueFamilyProperties.queueFlags) & VK_QUEUE_SPARSE_BINDING_BIT) ?
" sparse" :
"",
1576 ((qf[
i].queueFamilyProperties.queueFlags) & VK_QUEUE_OPTICAL_FLOW_BIT_NV) ?
" optical_flow" :
"",
1577 ((qf[
i].queueFamilyProperties.queueFlags) & VK_QUEUE_PROTECTED_BIT) ?
" protected" :
"",
1578 qf[
i].queueFamilyProperties.queueCount);
1582 qf[
i].queueFamilyProperties.timestampValidBits = 0;
1588 #define PICK_QF(type, vid_op) \
1594 idx = pick_video_queue_family(qf, qf_vid, num, vid_op); \
1596 idx = pick_queue_family(qf, num, type); \
1601 for (i = 0; i < hwctx->nb_qf; i++) { \
1602 if (hwctx->qf[i].idx == idx) { \
1603 hwctx->qf[i].flags |= type; \
1604 hwctx->qf[i].video_caps |= vid_op; \
1608 if (i == hwctx->nb_qf) { \
1609 hwctx->qf[i].idx = idx; \
1610 hwctx->qf[i].num = qf[idx].queueFamilyProperties.queueCount; \
1611 if (p->limit_queues || \
1612 p->dprops.driverID == VK_DRIVER_ID_NVIDIA_PROPRIETARY) { \
1613 int max = p->limit_queues; \
1614 if (type == VK_QUEUE_GRAPHICS_BIT) \
1615 hwctx->qf[i].num = FFMIN(hwctx->qf[i].num, \
1618 hwctx->qf[i].num = FFMIN(hwctx->qf[i].num, max); \
1620 hwctx->qf[i].flags = type; \
1621 hwctx->qf[i].video_caps = vid_op; \
1626 PICK_QF(VK_QUEUE_GRAPHICS_BIT, VK_VIDEO_CODEC_OPERATION_NONE_KHR);
1627 PICK_QF(VK_QUEUE_COMPUTE_BIT, VK_VIDEO_CODEC_OPERATION_NONE_KHR);
1628 PICK_QF(VK_QUEUE_TRANSFER_BIT, VK_VIDEO_CODEC_OPERATION_NONE_KHR);
1630 PICK_QF(VK_QUEUE_VIDEO_ENCODE_BIT_KHR, VK_VIDEO_CODEC_OPERATION_ENCODE_H264_BIT_KHR);
1631 PICK_QF(VK_QUEUE_VIDEO_DECODE_BIT_KHR, VK_VIDEO_CODEC_OPERATION_DECODE_H264_BIT_KHR);
1633 PICK_QF(VK_QUEUE_VIDEO_ENCODE_BIT_KHR, VK_VIDEO_CODEC_OPERATION_ENCODE_H265_BIT_KHR);
1634 PICK_QF(VK_QUEUE_VIDEO_DECODE_BIT_KHR, VK_VIDEO_CODEC_OPERATION_DECODE_H265_BIT_KHR);
1636 #ifdef VK_KHR_video_decode_vp9
1637 PICK_QF(VK_QUEUE_VIDEO_DECODE_BIT_KHR, VK_VIDEO_CODEC_OPERATION_DECODE_VP9_BIT_KHR);
1640 #ifdef VK_KHR_video_encode_av1
1641 PICK_QF(VK_QUEUE_VIDEO_ENCODE_BIT_KHR, VK_VIDEO_CODEC_OPERATION_ENCODE_AV1_BIT_KHR);
1643 PICK_QF(VK_QUEUE_VIDEO_DECODE_BIT_KHR, VK_VIDEO_CODEC_OPERATION_DECODE_AV1_BIT_KHR);
1651 sizeof(VkDeviceQueueCreateInfo));
1652 if (!cd->pQueueCreateInfos)
1655 for (uint32_t
i = 0;
i < hwctx->
nb_qf;
i++) {
1658 VkDeviceQueueCreateInfo *pc;
1659 for (uint32_t j = 0; j < cd->queueCreateInfoCount; j++) {
1660 if (hwctx->
qf[
i].
idx == cd->pQueueCreateInfos[j].queueFamilyIndex) {
1670 for (uint32_t j = 0; j < cd->queueCreateInfoCount; j++)
1671 av_free((
void *)cd->pQueueCreateInfos[
i].pQueuePriorities);
1672 av_free((
void *)cd->pQueueCreateInfos);
1676 for (uint32_t j = 0; j < hwctx->
qf[
i].
num; j++)
1679 pc = (VkDeviceQueueCreateInfo *)cd->pQueueCreateInfos;
1680 pc[cd->queueCreateInfoCount++] = (VkDeviceQueueCreateInfo) {
1681 .sType = VK_STRUCTURE_TYPE_DEVICE_QUEUE_CREATE_INFO,
1682 .queueFamilyIndex = hwctx->
qf[
i].
idx,
1683 .queueCount = hwctx->
qf[
i].
num,
1688 #if FF_API_VULKAN_FIXED_QUEUES
1697 #define SET_OLD_QF(field, nb_field, type) \
1699 if (field < 0 && hwctx->qf[i].flags & type) { \
1700 field = hwctx->qf[i].idx; \
1701 nb_field = hwctx->qf[i].num; \
1705 for (uint32_t
i = 0;
i < hwctx->
nb_qf;
i++) {
1735 vk->DestroyDebugUtilsMessengerEXT(hwctx->
inst,
p->debug_ctx,
1739 vk->DestroyInstance(hwctx->
inst, hwctx->
alloc);
1742 dlclose(
p->libvulkan);
1752 for (uint32_t
i = 0;
i <
p->nb_tot_qfs;
i++) {
1763 int disable_multiplane,
1774 VkDeviceCreateInfo dev_info = {
1775 .sType = VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO,
1787 vk->GetPhysicalDeviceMemoryProperties(hwctx->
phys_dev, &
p->mprops);
1791 &dev_info.enabledExtensionCount, debug_mode))) {
1792 for (
int i = 0;
i < dev_info.queueCreateInfoCount;
i++)
1793 av_free((
void *)dev_info.pQueueCreateInfos[
i].pQueuePriorities);
1794 av_free((
void *)dev_info.pQueueCreateInfos);
1800 vk->GetPhysicalDeviceFeatures2(hwctx->
phys_dev, &supported_feats.
device);
1805 dev_info.pNext =
p->feats.device.pNext;
1806 dev_info.pEnabledFeatures = &
p->feats.device.features;
1811 p->limit_queues = strtol(opt_d->
value,
NULL, 10);
1821 for (
int i = 0;
i < dev_info.queueCreateInfoCount;
i++)
1822 av_free((
void *)dev_info.pQueueCreateInfos[
i].pQueuePriorities);
1823 av_free((
void *)dev_info.pQueueCreateInfos);
1825 if (
ret != VK_SUCCESS) {
1828 for (
int i = 0;
i < dev_info.enabledExtensionCount;
i++)
1829 av_free((
void *)dev_info.ppEnabledExtensionNames[
i]);
1830 av_free((
void *)dev_info.ppEnabledExtensionNames);
1838 p->use_linear_images = strtol(opt_d->
value,
NULL, 10);
1841 p->disable_multiplane = disable_multiplane;
1842 if (!
p->disable_multiplane) {
1845 p->disable_multiplane = strtol(opt_d->
value,
NULL, 10);
1849 p->avoid_host_import =
p->dprops.driverID == VK_DRIVER_ID_NVIDIA_PROPRIETARY;
1852 p->avoid_host_import = strtol(opt_d->
value,
NULL, 10);
1887 VkQueueFamilyProperties2 *qf;
1888 VkQueueFamilyVideoPropertiesKHR *qf_vid;
1889 VkPhysicalDeviceExternalSemaphoreInfo ext_sem_props_info;
1890 int graph_index, comp_index, tx_index, enc_index, dec_index;
1909 p->props.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2;
1910 p->props.pNext = &
p->hprops;
1911 p->hprops.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_EXTERNAL_MEMORY_HOST_PROPERTIES_EXT;
1912 p->hprops.pNext = &
p->dprops;
1913 p->dprops.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DRIVER_PROPERTIES;
1915 vk->GetPhysicalDeviceProperties2(hwctx->
phys_dev, &
p->props);
1917 p->props.properties.deviceName);
1920 p->props.properties.limits.optimalBufferCopyRowPitchAlignment);
1922 p->props.properties.limits.minMemoryMapAlignment);
1924 p->props.properties.limits.nonCoherentAtomSize);
1927 p->hprops.minImportedHostPointerAlignment);
1929 vk->GetPhysicalDeviceQueueFamilyProperties(hwctx->
phys_dev, &qf_num,
NULL);
1935 ext_sem_props_info = (VkPhysicalDeviceExternalSemaphoreInfo) {
1936 .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_EXTERNAL_SEMAPHORE_INFO,
1940 ext_sem_props_info.handleType =
1942 IsWindows8OrGreater()
1943 ? VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_WIN32_BIT
1944 : VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_WIN32_KMT_BIT;
1946 VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT;
1948 p->ext_sem_props_opaque.sType = VK_STRUCTURE_TYPE_EXTERNAL_SEMAPHORE_PROPERTIES;
1949 vk->GetPhysicalDeviceExternalSemaphoreProperties(hwctx->
phys_dev,
1950 &ext_sem_props_info,
1951 &
p->ext_sem_props_opaque);
1957 qf_vid =
av_malloc_array(qf_num,
sizeof(VkQueueFamilyVideoPropertiesKHR));
1963 for (uint32_t
i = 0;
i < qf_num;
i++) {
1964 qf_vid[
i] = (VkQueueFamilyVideoPropertiesKHR) {
1965 .sType = VK_STRUCTURE_TYPE_QUEUE_FAMILY_VIDEO_PROPERTIES_KHR,
1967 qf[
i] = (VkQueueFamilyProperties2) {
1968 .sType = VK_STRUCTURE_TYPE_QUEUE_FAMILY_PROPERTIES_2,
1973 vk->GetPhysicalDeviceQueueFamilyProperties2(hwctx->
phys_dev, &qf_num, qf);
1975 p->qf_mutex =
av_calloc(qf_num,
sizeof(*
p->qf_mutex));
1980 p->nb_tot_qfs = qf_num;
1982 for (uint32_t
i = 0;
i < qf_num;
i++) {
1983 p->qf_mutex[
i] =
av_calloc(qf[
i].queueFamilyProperties.queueCount,
1984 sizeof(**
p->qf_mutex));
1985 if (!
p->qf_mutex[
i]) {
1989 for (uint32_t j = 0; j < qf[
i].queueFamilyProperties.queueCount; j++) {
2000 #if FF_API_VULKAN_FIXED_QUEUES
2008 #define CHECK_QUEUE(type, required, fidx, ctx_qf, qc) \
2010 if (ctx_qf < 0 && required) { \
2011 av_log(ctx, AV_LOG_ERROR, "%s queue family is required, but marked as missing" \
2012 " in the context!\n", type); \
2013 err = AVERROR(EINVAL); \
2015 } else if (fidx < 0 || ctx_qf < 0) { \
2017 } else if (ctx_qf >= qf_num) { \
2018 av_log(ctx, AV_LOG_ERROR, "Invalid %s family index %i (device has %i families)!\n", \
2019 type, ctx_qf, qf_num); \
2020 err = AVERROR(EINVAL); \
2024 av_log(ctx, AV_LOG_VERBOSE, "Using queue family %i (queues: %i)" \
2025 " for%s%s%s%s%s\n", \
2027 ctx_qf == graph_index ? " graphics" : "", \
2028 ctx_qf == comp_index ? " compute" : "", \
2029 ctx_qf == tx_index ? " transfers" : "", \
2030 ctx_qf == enc_index ? " encode" : "", \
2031 ctx_qf == dec_index ? " decode" : ""); \
2032 graph_index = (ctx_qf == graph_index) ? -1 : graph_index; \
2033 comp_index = (ctx_qf == comp_index) ? -1 : comp_index; \
2034 tx_index = (ctx_qf == tx_index) ? -1 : tx_index; \
2035 enc_index = (ctx_qf == enc_index) ? -1 : enc_index; \
2036 dec_index = (ctx_qf == dec_index) ? -1 : dec_index; \
2049 if (!hwctx->
nb_qf) {
2050 #define ADD_QUEUE(ctx_qf, qc, flag) \
2052 if (ctx_qf != -1) { \
2053 hwctx->qf[hwctx->nb_qf++] = (AVVulkanDeviceQueueFamily) { \
2071 for (
int i = 0;
i < hwctx->
nb_qf;
i++) {
2073 hwctx->
qf[
i].
flags & (VK_QUEUE_VIDEO_DECODE_BIT_KHR |
2074 VK_QUEUE_VIDEO_ENCODE_BIT_KHR)) {
2081 for (
int i = 0;
i < hwctx->
nb_qf;
i++) {
2085 for (
int j = (
i - 1); j >= 0; j--) {
2092 p->img_qfs[
p->nb_img_qfs++] = hwctx->
qf[
i].
idx;
2101 vk->GetPhysicalDeviceMemoryProperties(hwctx->
phys_dev, &
p->mprops);
2103 p->vkctx.device =
ctx;
2104 p->vkctx.hwctx = hwctx;
2111 vk->GetPhysicalDeviceMemoryProperties(hwctx->
phys_dev, &
p->mprops);
2123 if (device && device[0]) {
2125 dev_select.
index = strtol(device, &end, 10);
2126 if (end == device) {
2127 dev_select.
index = 0;
2128 dev_select.
name = device;
2144 switch(src_ctx->
type) {
2148 VADisplay dpy = src_hwctx->
display;
2149 #if VA_CHECK_VERSION(1, 15, 0)
2151 VADisplayAttribute attr = {
2152 .type = VADisplayPCIID,
2157 #if VA_CHECK_VERSION(1, 15, 0)
2158 vas = vaGetDisplayAttributes(dpy, &attr, 1);
2159 if (vas == VA_STATUS_SUCCESS && attr.flags != VA_DISPLAY_ATTRIB_NOT_SUPPORTED)
2160 dev_select.pci_device = (attr.value & 0xFFFF);
2163 if (!dev_select.pci_device) {
2164 vendor = vaQueryVendorString(dpy);
2170 if (strstr(vendor,
"AMD"))
2171 dev_select.vendor_id = 0x1002;
2180 struct stat drm_node_info;
2181 drmDevice *drm_dev_info;
2184 err = fstat(src_hwctx->
fd, &drm_node_info);
2191 dev_select.drm_major = major(drm_node_info.st_dev);
2192 dev_select.drm_minor = minor(drm_node_info.st_dev);
2193 dev_select.has_drm = 1;
2195 err = drmGetDevice(src_hwctx->
fd, &drm_dev_info);
2202 if (drm_dev_info->bustype == DRM_BUS_PCI)
2203 dev_select.pci_device = drm_dev_info->deviceinfo.pci->device_id;
2205 drmFreeDevice(&drm_dev_info);
2215 CudaFunctions *cu = cu_internal->
cuda_dl;
2217 int ret =
CHECK_CU(cu->cuDeviceGetUuid((CUuuid *)&dev_select.uuid,
2224 dev_select.has_uuid = 1;
2239 const void *hwconfig,
2247 p->use_linear_images ? VK_IMAGE_TILING_LINEAR :
2248 VK_IMAGE_TILING_OPTIMAL,
2260 p->use_linear_images ? VK_IMAGE_TILING_LINEAR :
2261 VK_IMAGE_TILING_OPTIMAL,
2271 constraints->
max_width =
p->props.properties.limits.maxImageDimension2D;
2272 constraints->
max_height =
p->props.properties.limits.maxImageDimension2D;
2285 VkMemoryPropertyFlagBits req_flags,
const void *alloc_extension,
2286 VkMemoryPropertyFlagBits *mem_flags, VkDeviceMemory *mem)
2293 VkMemoryAllocateInfo alloc_info = {
2294 .sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO,
2295 .pNext = alloc_extension,
2296 .allocationSize = req->size,
2301 for (
int i = 0;
i <
p->mprops.memoryTypeCount;
i++) {
2302 const VkMemoryType *
type = &
p->mprops.memoryTypes[
i];
2305 if (!(req->memoryTypeBits & (1 <<
i)))
2309 if ((
type->propertyFlags & req_flags) != req_flags)
2313 if (req->size >
p->mprops.memoryHeaps[
type->heapIndex].size)
2327 alloc_info.memoryTypeIndex =
index;
2329 ret = vk->AllocateMemory(dev_hwctx->
act_dev, &alloc_info,
2330 dev_hwctx->
alloc, mem);
2331 if (
ret != VK_SUCCESS) {
2337 *mem_flags |=
p->mprops.memoryTypes[
index].propertyFlags;
2347 if (internal->cuda_fc_ref) {
2353 CudaFunctions *cu = cu_internal->
cuda_dl;
2356 if (internal->cu_sem[
i])
2357 CHECK_CU(cu->cuDestroyExternalSemaphore(internal->cu_sem[
i]));
2358 if (internal->cu_mma[
i])
2359 CHECK_CU(cu->cuMipmappedArrayDestroy(internal->cu_mma[
i]));
2360 if (internal->ext_mem[
i])
2361 CHECK_CU(cu->cuDestroyExternalMemory(internal->ext_mem[
i]));
2363 if (internal->ext_sem_handle[
i])
2364 CloseHandle(internal->ext_sem_handle[
i]);
2365 if (internal->ext_mem_handle[
i])
2366 CloseHandle(internal->ext_mem_handle[
i]);
2391 .sType = VK_STRUCTURE_TYPE_SEMAPHORE_WAIT_INFO,
2393 .pSemaphores =
f->sem,
2394 .pValues =
f->sem_value,
2395 .semaphoreCount = nb_sems,
2403 for (
int i = 0;
i < nb_images;
i++) {
2418 void *alloc_pnext,
size_t alloc_pnext_stride)
2420 int img_cnt = 0, err;
2428 while (
f->img[img_cnt]) {
2430 VkImageMemoryRequirementsInfo2 req_desc = {
2431 .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_REQUIREMENTS_INFO_2,
2432 .image =
f->img[img_cnt],
2434 VkMemoryDedicatedAllocateInfo ded_alloc = {
2435 .sType = VK_STRUCTURE_TYPE_MEMORY_DEDICATED_ALLOCATE_INFO,
2436 .pNext = (
void *)(((uint8_t *)alloc_pnext) + img_cnt*alloc_pnext_stride),
2438 VkMemoryDedicatedRequirements ded_req = {
2439 .sType = VK_STRUCTURE_TYPE_MEMORY_DEDICATED_REQUIREMENTS,
2441 VkMemoryRequirements2 req = {
2442 .sType = VK_STRUCTURE_TYPE_MEMORY_REQUIREMENTS_2,
2446 vk->GetImageMemoryRequirements2(hwctx->
act_dev, &req_desc, &req);
2448 if (
f->tiling == VK_IMAGE_TILING_LINEAR)
2449 req.memoryRequirements.size =
FFALIGN(req.memoryRequirements.size,
2450 p->props.properties.limits.minMemoryMapAlignment);
2453 use_ded_mem = ded_req.prefersDedicatedAllocation |
2454 ded_req.requiresDedicatedAllocation;
2456 ded_alloc.image =
f->img[img_cnt];
2460 f->tiling == VK_IMAGE_TILING_LINEAR ?
2461 VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT :
2462 VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT,
2463 use_ded_mem ? &ded_alloc : (
void *)ded_alloc.pNext,
2464 &
f->flags, &
f->mem[img_cnt])))
2467 f->size[img_cnt] = req.memoryRequirements.size;
2468 bind_info[img_cnt].sType = VK_STRUCTURE_TYPE_BIND_IMAGE_MEMORY_INFO;
2469 bind_info[img_cnt].image =
f->img[img_cnt];
2470 bind_info[img_cnt].memory =
f->mem[img_cnt];
2476 ret = vk->BindImageMemory2(hwctx->
act_dev, img_cnt, bind_info);
2477 if (
ret != VK_SUCCESS) {
2497 VkAccessFlags2 *new_access)
2501 *new_layout = VK_IMAGE_LAYOUT_GENERAL;
2502 *new_access = VK_ACCESS_TRANSFER_WRITE_BIT;
2505 *new_layout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL;
2506 *new_access = VK_ACCESS_TRANSFER_WRITE_BIT;
2509 *new_layout = VK_IMAGE_LAYOUT_GENERAL;
2510 *new_access = VK_ACCESS_MEMORY_READ_BIT | VK_ACCESS_MEMORY_WRITE_BIT;
2513 *new_layout = VK_IMAGE_LAYOUT_GENERAL;
2514 *new_access = VK_ACCESS_MEMORY_READ_BIT | VK_ACCESS_MEMORY_WRITE_BIT;
2517 *new_layout = VK_IMAGE_LAYOUT_VIDEO_DECODE_DST_KHR;
2518 *new_access = VK_ACCESS_TRANSFER_WRITE_BIT;
2521 *new_layout = VK_IMAGE_LAYOUT_VIDEO_DECODE_DPB_KHR;
2522 *new_access = VK_ACCESS_TRANSFER_READ_BIT | VK_ACCESS_TRANSFER_WRITE_BIT;
2525 *new_layout = VK_IMAGE_LAYOUT_VIDEO_ENCODE_DPB_KHR;
2526 *new_access = VK_ACCESS_TRANSFER_READ_BIT | VK_ACCESS_TRANSFER_WRITE_BIT;
2540 VkImageLayout new_layout;
2541 VkAccessFlags2 new_access;
2544 uint32_t dst_qf =
p->nb_img_qfs > 1 ? VK_QUEUE_FAMILY_IGNORED :
p->img_qfs[0];
2545 VkPipelineStageFlagBits2 src_stage = VK_PIPELINE_STAGE_2_NONE;
2547 dst_qf = VK_QUEUE_FAMILY_EXTERNAL_KHR;
2548 src_stage = VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT;
2555 .
data = (uint8_t *)hwfc,
2559 .hw_frames_ctx = &tmp_ref,
2562 VkCommandBuffer cmd_buf;
2564 cmd_buf = exec->
buf;
2568 VK_PIPELINE_STAGE_2_NONE,
2569 VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT);
2575 VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT,
2576 new_access, new_layout, dst_qf);
2578 vk->CmdPipelineBarrier2(cmd_buf, &(VkDependencyInfo) {
2579 .sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO,
2580 .pImageMemoryBarriers = img_bar,
2581 .imageMemoryBarrierCount = nb_img_bar,
2603 VkImageLayout new_layout;
2604 VkAccessFlags2 new_access;
2608 for (
i = 0;
i <
p->vkctx.host_image_props.copyDstLayoutCount;
i++) {
2609 if (
p->vkctx.host_image_props.pCopyDstLayouts[
i] == new_layout)
2612 if (
i ==
p->vkctx.host_image_props.copyDstLayoutCount)
2615 for (
i = 0;
i < nb_images;
i++) {
2616 layout_change[
i] = (VkHostImageLayoutTransitionInfoEXT) {
2617 .sType = VK_STRUCTURE_TYPE_HOST_IMAGE_LAYOUT_TRANSITION_INFO_EXT,
2619 .oldLayout =
frame->layout[
i],
2620 .newLayout = new_layout,
2621 .subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
2622 .subresourceRange.layerCount = 1,
2623 .subresourceRange.levelCount = 1,
2625 frame->layout[
i] = new_layout;
2628 ret = vk->TransitionImageLayoutEXT(
p->vkctx.hwctx->act_dev,
2629 nb_images, layout_change);
2630 if (
ret != VK_SUCCESS) {
2644 if (hwfc_vk->
usage & VK_IMAGE_USAGE_HOST_TRANSFER_BIT_EXT &&
2656 int frame_w,
int frame_h,
int plane)
2673 VkImageTiling tiling, VkImageUsageFlagBits
usage,
2674 VkImageCreateFlags
flags,
int nb_layers,
2686 VkSemaphoreTypeCreateInfo sem_type_info = {
2687 .sType = VK_STRUCTURE_TYPE_SEMAPHORE_TYPE_CREATE_INFO,
2688 .semaphoreType = VK_SEMAPHORE_TYPE_TIMELINE,
2691 VkSemaphoreCreateInfo sem_spawn = {
2692 .sType = VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO,
2693 .pNext = &sem_type_info,
2696 VkExportSemaphoreCreateInfo ext_sem_info_opaque = {
2697 .sType = VK_STRUCTURE_TYPE_EXPORT_SEMAPHORE_CREATE_INFO,
2699 .handleTypes = IsWindows8OrGreater()
2700 ? VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_WIN32_BIT
2701 : VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_WIN32_KMT_BIT,
2703 .handleTypes = VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT,
2708 if (
p->ext_sem_props_opaque.externalSemaphoreFeatures & VK_EXTERNAL_SEMAPHORE_FEATURE_EXPORTABLE_BIT) {
2722 for (
int i = 0; (hwfc_vk->
format[
i] != VK_FORMAT_UNDEFINED);
i++) {
2723 VkImageCreateInfo create_info = {
2724 .sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO,
2725 .pNext = create_pnext,
2726 .imageType = VK_IMAGE_TYPE_2D,
2730 .arrayLayers = nb_layers,
2733 .initialLayout = VK_IMAGE_LAYOUT_UNDEFINED,
2735 .samples = VK_SAMPLE_COUNT_1_BIT,
2736 .pQueueFamilyIndices =
p->img_qfs,
2737 .queueFamilyIndexCount =
p->nb_img_qfs,
2738 .sharingMode =
p->nb_img_qfs > 1 ? VK_SHARING_MODE_CONCURRENT :
2739 VK_SHARING_MODE_EXCLUSIVE,
2742 get_plane_wh(&create_info.extent.width, &create_info.extent.height,
2745 ret = vk->CreateImage(hwctx->
act_dev, &create_info,
2747 if (
ret != VK_SUCCESS) {
2755 ret = vk->CreateSemaphore(hwctx->
act_dev, &sem_spawn,
2757 if (
ret != VK_SUCCESS) {
2764 f->queue_family[
i] =
p->nb_img_qfs > 1 ? VK_QUEUE_FAMILY_IGNORED :
p->img_qfs[0];
2765 f->layout[
i] = create_info.initialLayout;
2767 f->sem_value[
i] = 0;
2783 VkExternalMemoryHandleTypeFlags *comp_handle_types,
2784 VkExternalMemoryHandleTypeFlags *iexp,
2785 VkExternalMemoryHandleTypeFlagBits
exp)
2793 const VkImageDrmFormatModifierListCreateInfoEXT *drm_mod_info =
2795 VK_STRUCTURE_TYPE_IMAGE_DRM_FORMAT_MODIFIER_LIST_CREATE_INFO_EXT);
2796 int has_mods = hwctx->
tiling == VK_IMAGE_TILING_DRM_FORMAT_MODIFIER_EXT && drm_mod_info;
2799 VkExternalImageFormatProperties eprops = {
2800 .sType = VK_STRUCTURE_TYPE_EXTERNAL_IMAGE_FORMAT_PROPERTIES_KHR,
2802 VkImageFormatProperties2 props = {
2803 .sType = VK_STRUCTURE_TYPE_IMAGE_FORMAT_PROPERTIES_2,
2806 VkPhysicalDeviceImageDrmFormatModifierInfoEXT phy_dev_mod_info = {
2807 .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_IMAGE_DRM_FORMAT_MODIFIER_INFO_EXT,
2809 .pQueueFamilyIndices =
p->img_qfs,
2810 .queueFamilyIndexCount =
p->nb_img_qfs,
2811 .sharingMode =
p->nb_img_qfs > 1 ? VK_SHARING_MODE_CONCURRENT :
2812 VK_SHARING_MODE_EXCLUSIVE,
2814 VkPhysicalDeviceExternalImageFormatInfo enext = {
2815 .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_EXTERNAL_IMAGE_FORMAT_INFO,
2817 .pNext = has_mods ? &phy_dev_mod_info :
NULL,
2819 VkPhysicalDeviceImageFormatInfo2 pinfo = {
2820 .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_IMAGE_FORMAT_INFO_2,
2821 .pNext = !
exp ?
NULL : &enext,
2823 .type = VK_IMAGE_TYPE_2D,
2825 .usage = hwctx->
usage,
2826 .flags = VK_IMAGE_CREATE_ALIAS_BIT,
2829 nb_mods = has_mods ? drm_mod_info->drmFormatModifierCount : 1;
2830 for (
int i = 0;
i < nb_mods;
i++) {
2832 phy_dev_mod_info.drmFormatModifier = drm_mod_info->pDrmFormatModifiers[
i];
2834 ret = vk->GetPhysicalDeviceImageFormatProperties2(dev_hwctx->
phys_dev,
2837 if (
ret == VK_SUCCESS) {
2839 *comp_handle_types |= eprops.externalMemoryProperties.compatibleHandleTypes;
2853 VkExternalMemoryHandleTypeFlags e = 0x0;
2856 VkExternalMemoryImageCreateInfo eiinfo = {
2857 .sType = VK_STRUCTURE_TYPE_EXTERNAL_MEMORY_IMAGE_CREATE_INFO,
2864 ? VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_BIT
2865 : VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_KMT_BIT);
2869 VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT);
2872 hwctx->
tiling == VK_IMAGE_TILING_DRM_FORMAT_MODIFIER_EXT)
2874 VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT);
2878 eminfo[
i].sType = VK_STRUCTURE_TYPE_EXPORT_MEMORY_ALLOCATE_INFO;
2880 eminfo[
i].handleTypes = e;
2893 if ( (hwctx->
usage & VK_IMAGE_USAGE_VIDEO_DECODE_DPB_BIT_KHR) &&
2894 !(hwctx->
usage & VK_IMAGE_USAGE_VIDEO_DECODE_DST_BIT_KHR))
2896 else if (hwctx->
usage & VK_IMAGE_USAGE_VIDEO_DECODE_DST_BIT_KHR)
2898 else if (hwctx->
usage & VK_IMAGE_USAGE_VIDEO_ENCODE_DPB_BIT_KHR)
2900 else if (hwctx->
usage & VK_IMAGE_USAGE_TRANSFER_DST_BIT)
2955 VkImageUsageFlags supported_usage;
2958 int disable_multiplane =
p->disable_multiplane ||
2966 if (
p->use_linear_images &&
2967 (hwctx->
tiling != VK_IMAGE_TILING_DRM_FORMAT_MODIFIER_EXT))
2968 hwctx->
tiling = VK_IMAGE_TILING_LINEAR;
2978 if (hwctx->
format[0] != VK_FORMAT_UNDEFINED) {
2983 "for the current sw_format %s!\n",
2995 (hwctx->
usage & VK_IMAGE_USAGE_STORAGE_BIT));
3004 NULL, &supported_usage,
3007 (hwctx->
usage & VK_IMAGE_USAGE_STORAGE_BIT));
3013 hwctx->
usage |= supported_usage & (VK_IMAGE_USAGE_TRANSFER_DST_BIT |
3014 VK_IMAGE_USAGE_TRANSFER_SRC_BIT |
3015 VK_IMAGE_USAGE_STORAGE_BIT |
3016 VK_IMAGE_USAGE_SAMPLED_BIT);
3019 !(
p->dprops.driverID == VK_DRIVER_ID_NVIDIA_PROPRIETARY))
3020 hwctx->
usage |= supported_usage & VK_IMAGE_USAGE_HOST_TRANSFER_BIT_EXT;
3023 if ((supported_usage & VK_IMAGE_USAGE_VIDEO_ENCODE_SRC_BIT_KHR) &&
3026 hwctx->
usage |= VK_IMAGE_USAGE_VIDEO_ENCODE_SRC_BIT_KHR;
3032 int is_lone_dpb = ((hwctx->
usage & VK_IMAGE_USAGE_VIDEO_ENCODE_DPB_BIT_KHR) ||
3033 ((hwctx->
usage & VK_IMAGE_USAGE_VIDEO_DECODE_DPB_BIT_KHR) &&
3034 !(hwctx->
usage & VK_IMAGE_USAGE_VIDEO_DECODE_DST_BIT_KHR)));
3035 int sampleable = hwctx->
usage & (VK_IMAGE_USAGE_SAMPLED_BIT |
3036 VK_IMAGE_USAGE_STORAGE_BIT);
3037 hwctx->
img_flags = VK_IMAGE_CREATE_MUTABLE_FORMAT_BIT;
3038 if (sampleable && !is_lone_dpb) {
3039 hwctx->
img_flags |= VK_IMAGE_CREATE_ALIAS_BIT;
3041 hwctx->
img_flags |= VK_IMAGE_CREATE_EXTENDED_USAGE_BIT;
3049 if ((hwctx->
usage & VK_IMAGE_USAGE_VIDEO_ENCODE_SRC_BIT_KHR) &&
3052 const VkVideoProfileListInfoKHR *pl;
3055 hwctx->
img_flags |= VK_IMAGE_CREATE_VIDEO_PROFILE_INDEPENDENT_BIT_KHR;
3058 for (
i = 0;
i < pl->profileCount;
i++) {
3060 if (pl->pProfiles[
i].videoCodecOperation & 0xFFFF0000)
3063 if (
i == pl->profileCount)
3064 hwctx->
img_flags |= VK_IMAGE_CREATE_VIDEO_PROFILE_INDEPENDENT_BIT_KHR;
3075 p->compute_qf->num, 0, 0, 0,
NULL);
3080 p->transfer_qf->num*2, 0, 0, 0,
NULL);
3085 p->transfer_qf->num, 0, 0, 0,
NULL);
3097 VkImageDrmFormatModifierPropertiesEXT drm_mod = {
3098 .sType = VK_STRUCTURE_TYPE_IMAGE_DRM_FORMAT_MODIFIER_PROPERTIES_EXT,
3100 err = vk->GetImageDrmFormatModifierPropertiesEXT(dev_hwctx->
act_dev,
f->img[0],
3102 if (err != VK_SUCCESS) {
3108 VkDrmFormatModifierPropertiesListEXT modp;
3109 VkFormatProperties2 fmtp;
3110 VkDrmFormatModifierPropertiesEXT *mod_props =
NULL;
3112 modp = (VkDrmFormatModifierPropertiesListEXT) {
3113 .sType = VK_STRUCTURE_TYPE_DRM_FORMAT_MODIFIER_PROPERTIES_LIST_EXT,
3115 fmtp = (VkFormatProperties2) {
3116 .sType = VK_STRUCTURE_TYPE_FORMAT_PROPERTIES_2,
3121 vk->GetPhysicalDeviceFormatProperties2(dev_hwctx->
phys_dev, fmt->
fallback[
i], &fmtp);
3123 modp.pDrmFormatModifierProperties =
3124 av_calloc(modp.drmFormatModifierCount,
sizeof(*modp.pDrmFormatModifierProperties));
3125 if (!modp.pDrmFormatModifierProperties) {
3129 vk->GetPhysicalDeviceFormatProperties2(dev_hwctx->
phys_dev, fmt->
fallback[
i], &fmtp);
3131 for (uint32_t
i = 0;
i < modp.drmFormatModifierCount; ++
i) {
3132 VkDrmFormatModifierPropertiesEXT *m = &modp.pDrmFormatModifierProperties[
i];
3133 if (m->drmFormatModifier == drm_mod.drmFormatModifier) {
3139 if (mod_props ==
NULL) {
3140 av_log(hwfc,
AV_LOG_ERROR,
"No DRM format modifier properties found for modifier 0x%016"PRIx64
"\n",
3141 drm_mod.drmFormatModifier);
3142 av_free(modp.pDrmFormatModifierProperties);
3148 av_free(modp.pDrmFormatModifierProperties);
3212 static const struct {
3213 uint32_t drm_fourcc;
3215 } vulkan_drm_format_map[] = {
3216 { DRM_FORMAT_R8, VK_FORMAT_R8_UNORM },
3217 { DRM_FORMAT_R16, VK_FORMAT_R16_UNORM },
3218 { DRM_FORMAT_GR88, VK_FORMAT_R8G8_UNORM },
3219 { DRM_FORMAT_RG88, VK_FORMAT_R8G8_UNORM },
3220 { DRM_FORMAT_GR1616, VK_FORMAT_R16G16_UNORM },
3221 { DRM_FORMAT_RG1616, VK_FORMAT_R16G16_UNORM },
3222 { DRM_FORMAT_ARGB8888, VK_FORMAT_B8G8R8A8_UNORM },
3223 { DRM_FORMAT_XRGB8888, VK_FORMAT_B8G8R8A8_UNORM },
3224 { DRM_FORMAT_ABGR8888, VK_FORMAT_R8G8B8A8_UNORM },
3225 { DRM_FORMAT_XBGR8888, VK_FORMAT_R8G8B8A8_UNORM },
3226 { DRM_FORMAT_ARGB2101010, VK_FORMAT_A2B10G10R10_UNORM_PACK32 },
3227 { DRM_FORMAT_ABGR2101010, VK_FORMAT_A2R10G10B10_UNORM_PACK32 },
3228 { DRM_FORMAT_XRGB2101010, VK_FORMAT_A2B10G10R10_UNORM_PACK32 },
3229 { DRM_FORMAT_XBGR2101010, VK_FORMAT_A2R10G10B10_UNORM_PACK32 },
3232 #ifdef DRM_FORMAT_XYUV8888
3233 { DRM_FORMAT_XYUV8888, VK_FORMAT_R8G8B8A8_UNORM },
3234 { DRM_FORMAT_XVYU2101010, VK_FORMAT_A2R10G10B10_UNORM_PACK32 } ,
3235 { DRM_FORMAT_XVYU12_16161616, VK_FORMAT_R12X4G12X4B12X4A12X4_UNORM_4PACK16 } ,
3236 { DRM_FORMAT_XVYU16161616, VK_FORMAT_R16G16B16A16_UNORM } ,
3240 static inline VkFormat drm_to_vulkan_fmt(uint32_t drm_fourcc)
3243 if (vulkan_drm_format_map[
i].drm_fourcc == drm_fourcc)
3244 return vulkan_drm_format_map[
i].vk_format;
3245 return VK_FORMAT_UNDEFINED;
3254 int bind_counts = 0;
3264 if (drm_to_vulkan_fmt(
desc->layers[
i].format) == VK_FORMAT_UNDEFINED) {
3266 desc->layers[
i].format);
3277 f->tiling = VK_IMAGE_TILING_DRM_FORMAT_MODIFIER_EXT;
3279 for (
int i = 0;
i <
desc->nb_layers;
i++) {
3283 VkSemaphoreTypeCreateInfo sem_type_info = {
3284 .sType = VK_STRUCTURE_TYPE_SEMAPHORE_TYPE_CREATE_INFO,
3285 .semaphoreType = VK_SEMAPHORE_TYPE_TIMELINE,
3288 VkSemaphoreCreateInfo sem_spawn = {
3289 .sType = VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO,
3290 .pNext = &sem_type_info,
3295 VkImageDrmFormatModifierExplicitCreateInfoEXT ext_img_mod_spec = {
3296 .sType = VK_STRUCTURE_TYPE_IMAGE_DRM_FORMAT_MODIFIER_EXPLICIT_CREATE_INFO_EXT,
3297 .drmFormatModifier =
desc->objects[0].format_modifier,
3298 .drmFormatModifierPlaneCount =
planes,
3299 .pPlaneLayouts = (
const VkSubresourceLayout *)&ext_img_layouts,
3301 VkExternalMemoryImageCreateInfo ext_img_spec = {
3302 .sType = VK_STRUCTURE_TYPE_EXTERNAL_MEMORY_IMAGE_CREATE_INFO,
3303 .pNext = &ext_img_mod_spec,
3304 .handleTypes = VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT,
3306 VkImageCreateInfo create_info = {
3307 .sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO,
3308 .pNext = &ext_img_spec,
3309 .imageType = VK_IMAGE_TYPE_2D,
3310 .format = drm_to_vulkan_fmt(
desc->layers[
i].format),
3315 .tiling = VK_IMAGE_TILING_DRM_FORMAT_MODIFIER_EXT,
3316 .initialLayout = VK_IMAGE_LAYOUT_UNDEFINED,
3318 .samples = VK_SAMPLE_COUNT_1_BIT,
3319 .pQueueFamilyIndices =
p->img_qfs,
3320 .queueFamilyIndexCount =
p->nb_img_qfs,
3321 .sharingMode =
p->nb_img_qfs > 1 ? VK_SHARING_MODE_CONCURRENT :
3322 VK_SHARING_MODE_EXCLUSIVE,
3326 VkExternalImageFormatProperties ext_props = {
3327 .sType = VK_STRUCTURE_TYPE_EXTERNAL_IMAGE_FORMAT_PROPERTIES_KHR,
3329 VkImageFormatProperties2 props_ret = {
3330 .sType = VK_STRUCTURE_TYPE_IMAGE_FORMAT_PROPERTIES_2,
3331 .pNext = &ext_props,
3333 VkPhysicalDeviceImageDrmFormatModifierInfoEXT props_drm_mod = {
3334 .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_IMAGE_DRM_FORMAT_MODIFIER_INFO_EXT,
3335 .drmFormatModifier = ext_img_mod_spec.drmFormatModifier,
3336 .pQueueFamilyIndices = create_info.pQueueFamilyIndices,
3337 .queueFamilyIndexCount = create_info.queueFamilyIndexCount,
3338 .sharingMode = create_info.sharingMode,
3340 VkPhysicalDeviceExternalImageFormatInfo props_ext = {
3341 .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_EXTERNAL_IMAGE_FORMAT_INFO,
3342 .pNext = &props_drm_mod,
3343 .handleType = ext_img_spec.handleTypes,
3345 VkPhysicalDeviceImageFormatInfo2 fmt_props;
3348 create_info.usage |= VK_IMAGE_USAGE_SAMPLED_BIT |
3349 VK_IMAGE_USAGE_TRANSFER_SRC_BIT;
3351 create_info.usage |= VK_IMAGE_USAGE_STORAGE_BIT |
3352 VK_IMAGE_USAGE_TRANSFER_DST_BIT;
3354 fmt_props = (VkPhysicalDeviceImageFormatInfo2) {
3355 .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_IMAGE_FORMAT_INFO_2,
3356 .pNext = &props_ext,
3357 .format = create_info.format,
3358 .type = create_info.imageType,
3359 .tiling = create_info.tiling,
3360 .usage = create_info.usage,
3361 .flags = create_info.flags,
3365 ret = vk->GetPhysicalDeviceImageFormatProperties2(hwctx->
phys_dev,
3366 &fmt_props, &props_ret);
3367 if (
ret != VK_SUCCESS) {
3375 get_plane_wh(&create_info.extent.width, &create_info.extent.height,
3379 for (
int j = 0; j <
planes; j++) {
3380 ext_img_layouts[j].offset =
desc->layers[
i].planes[j].offset;
3381 ext_img_layouts[j].rowPitch =
desc->layers[
i].planes[j].pitch;
3382 ext_img_layouts[j].size = 0;
3383 ext_img_layouts[j].arrayPitch = 0;
3384 ext_img_layouts[j].depthPitch = 0;
3388 ret = vk->CreateImage(hwctx->
act_dev, &create_info,
3390 if (
ret != VK_SUCCESS) {
3397 ret = vk->CreateSemaphore(hwctx->
act_dev, &sem_spawn,
3399 if (
ret != VK_SUCCESS) {
3406 f->queue_family[
i] = VK_QUEUE_FAMILY_EXTERNAL;
3407 f->layout[
i] = create_info.initialLayout;
3409 f->sem_value[
i] = 0;
3412 for (
int i = 0;
i <
desc->nb_layers;
i++) {
3414 VkImageMemoryRequirementsInfo2 req_desc = {
3415 .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_REQUIREMENTS_INFO_2,
3418 VkMemoryDedicatedRequirements ded_req = {
3419 .sType = VK_STRUCTURE_TYPE_MEMORY_DEDICATED_REQUIREMENTS,
3421 VkMemoryRequirements2 req2 = {
3422 .sType = VK_STRUCTURE_TYPE_MEMORY_REQUIREMENTS_2,
3427 VkMemoryFdPropertiesKHR fdmp = {
3428 .sType = VK_STRUCTURE_TYPE_MEMORY_FD_PROPERTIES_KHR,
3434 VkImportMemoryFdInfoKHR idesc = {
3435 .sType = VK_STRUCTURE_TYPE_IMPORT_MEMORY_FD_INFO_KHR,
3436 .fd = dup(
desc->objects[
desc->layers[
i].planes[0].object_index].fd),
3437 .handleType = VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT,
3439 VkMemoryDedicatedAllocateInfo ded_alloc = {
3440 .sType = VK_STRUCTURE_TYPE_MEMORY_DEDICATED_ALLOCATE_INFO,
3442 .image = req_desc.image,
3446 ret = vk->GetMemoryFdPropertiesKHR(hwctx->
act_dev,
3447 VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT,
3449 if (
ret != VK_SUCCESS) {
3457 vk->GetImageMemoryRequirements2(hwctx->
act_dev, &req_desc, &req2);
3460 req2.memoryRequirements.memoryTypeBits = fdmp.memoryTypeBits;
3463 VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT,
3464 (ded_req.prefersDedicatedAllocation ||
3465 ded_req.requiresDedicatedAllocation) ?
3466 &ded_alloc : ded_alloc.pNext,
3467 &
f->flags, &
f->mem[
i]);
3473 f->size[
i] = req2.memoryRequirements.size;
3476 for (
int i = 0;
i <
desc->nb_layers;
i++) {
3478 for (
int j = 0; j <
planes; j++) {
3479 VkImageAspectFlagBits aspect = j == 0 ? VK_IMAGE_ASPECT_MEMORY_PLANE_0_BIT_EXT :
3480 j == 1 ? VK_IMAGE_ASPECT_MEMORY_PLANE_1_BIT_EXT :
3481 VK_IMAGE_ASPECT_MEMORY_PLANE_2_BIT_EXT;
3483 plane_info[bind_counts].sType = VK_STRUCTURE_TYPE_BIND_IMAGE_PLANE_MEMORY_INFO;
3485 plane_info[bind_counts].planeAspect = aspect;
3487 bind_info[bind_counts].sType = VK_STRUCTURE_TYPE_BIND_IMAGE_MEMORY_INFO;
3489 bind_info[bind_counts].image =
f->img[
i];
3490 bind_info[bind_counts].memory =
f->mem[
i];
3493 bind_info[bind_counts].memoryOffset = 0;
3500 ret = vk->BindImageMemory2(hwctx->
act_dev, bind_counts, bind_info);
3501 if (
ret != VK_SUCCESS) {
3531 #ifdef DMA_BUF_IOCTL_EXPORT_SYNC_FILE
3533 VkCommandBuffer cmd_buf;
3539 for (
int i = 0;
i <
desc->nb_objects;
i++) {
3540 VkSemaphoreTypeCreateInfo sem_type_info = {
3541 .sType = VK_STRUCTURE_TYPE_SEMAPHORE_TYPE_CREATE_INFO,
3542 .semaphoreType = VK_SEMAPHORE_TYPE_BINARY,
3544 VkSemaphoreCreateInfo sem_spawn = {
3545 .sType = VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO,
3546 .pNext = &sem_type_info,
3548 VkImportSemaphoreFdInfoKHR import_info;
3549 struct dma_buf_export_sync_file implicit_fd_info = {
3550 .flags = DMA_BUF_SYNC_READ,
3554 if (ioctl(
desc->objects[
i].fd, DMA_BUF_IOCTL_EXPORT_SYNC_FILE,
3555 &implicit_fd_info)) {
3560 vk->DestroySemaphore(hwctx->
act_dev, drm_sync_sem[
i], hwctx->
alloc);
3564 ret = vk->CreateSemaphore(hwctx->
act_dev, &sem_spawn,
3565 hwctx->
alloc, &drm_sync_sem[
i]);
3566 if (
ret != VK_SUCCESS) {
3571 vk->DestroySemaphore(hwctx->
act_dev, drm_sync_sem[
i], hwctx->
alloc);
3575 import_info = (VkImportSemaphoreFdInfoKHR) {
3576 .sType = VK_STRUCTURE_TYPE_IMPORT_SEMAPHORE_FD_INFO_KHR,
3577 .handleType = VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD_BIT,
3578 .flags = VK_SEMAPHORE_IMPORT_TEMPORARY_BIT,
3579 .semaphore = drm_sync_sem[
i],
3580 .fd = implicit_fd_info.fd,
3583 ret = vk->ImportSemaphoreFdKHR(hwctx->
act_dev, &import_info);
3584 if (
ret != VK_SUCCESS) {
3589 vk->DestroySemaphore(hwctx->
act_dev, drm_sync_sem[
i], hwctx->
alloc);
3595 cmd_buf = exec->
buf;
3601 drm_sync_sem,
desc->nb_objects,
3602 VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT, 1);
3607 VK_PIPELINE_STAGE_2_NONE,
3608 VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT);
3613 VK_PIPELINE_STAGE_2_NONE,
3614 VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT,
3616 VK_ACCESS_2_SHADER_SAMPLED_READ_BIT : 0x0) |
3618 VK_ACCESS_2_SHADER_STORAGE_WRITE_BIT : 0x0),
3619 VK_IMAGE_LAYOUT_GENERAL,
3620 p->nb_img_qfs > 1 ? VK_QUEUE_FAMILY_IGNORED :
p->img_qfs[0]);
3622 vk->CmdPipelineBarrier2(cmd_buf, &(VkDependencyInfo) {
3623 .sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO,
3624 .pImageMemoryBarriers = img_bar,
3625 .imageMemoryBarrierCount = nb_img_bar,
3636 "image may be corrupted.\n");
3651 if ((err = vulkan_map_from_drm_frame_desc(hwfc, &
f,
src,
flags)))
3655 dst->data[0] = (uint8_t *)
f;
3657 dst->height =
src->height;
3660 &vulkan_unmap_from_drm,
f);
3664 err = vulkan_map_from_drm_frame_sync(hwfc,
dst,
src,
flags);
3687 VASurfaceID surface_id = (VASurfaceID)(uintptr_t)
src->data[3];
3693 vaSyncSurface(vaapi_ctx->display, surface_id);
3701 err = vulkan_map_from_drm(dst_fc,
dst,
tmp,
flags);
3718 VkDeviceMemory mem,
size_t size)
3726 CUDA_EXTERNAL_MEMORY_HANDLE_DESC ext_desc = {
3727 .type = IsWindows8OrGreater()
3728 ? CU_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32
3729 : CU_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_KMT,
3732 VkMemoryGetWin32HandleInfoKHR export_info = {
3733 .sType = VK_STRUCTURE_TYPE_MEMORY_GET_WIN32_HANDLE_INFO_KHR,
3735 .handleType = IsWindows8OrGreater()
3736 ? VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_BIT
3737 : VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_KMT_BIT,
3740 ret = vk->GetMemoryWin32HandleKHR(hwctx->
act_dev, &export_info,
3741 &ext_desc.handle.win32.handle);
3742 if (
ret != VK_SUCCESS) {
3747 dst_int->ext_mem_handle[idx] = ext_desc.handle.win32.handle;
3749 CUDA_EXTERNAL_MEMORY_HANDLE_DESC ext_desc = {
3750 .type = CU_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD,
3753 VkMemoryGetFdInfoKHR export_info = {
3754 .sType = VK_STRUCTURE_TYPE_MEMORY_GET_FD_INFO_KHR,
3756 .handleType = VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT_KHR,
3759 ret = vk->GetMemoryFdKHR(hwctx->
act_dev, &export_info,
3760 &ext_desc.handle.fd);
3761 if (
ret != VK_SUCCESS) {
3768 ret =
CHECK_CU(cu->cuImportExternalMemory(&dst_int->ext_mem[idx], &ext_desc));
3771 close(ext_desc.handle.fd);
3790 VkSemaphoreGetWin32HandleInfoKHR sem_export = {
3791 .sType = VK_STRUCTURE_TYPE_SEMAPHORE_GET_WIN32_HANDLE_INFO_KHR,
3793 .handleType = IsWindows8OrGreater()
3794 ? VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_WIN32_BIT
3795 : VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_WIN32_KMT_BIT,
3797 CUDA_EXTERNAL_SEMAPHORE_HANDLE_DESC ext_sem_desc = {
3801 VkSemaphoreGetFdInfoKHR sem_export = {
3802 .sType = VK_STRUCTURE_TYPE_SEMAPHORE_GET_FD_INFO_KHR,
3804 .handleType = VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT,
3806 CUDA_EXTERNAL_SEMAPHORE_HANDLE_DESC ext_sem_desc = {
3812 ret = vk->GetSemaphoreWin32HandleKHR(hwctx->
act_dev, &sem_export,
3813 &ext_sem_desc.handle.win32.handle);
3815 ret = vk->GetSemaphoreFdKHR(hwctx->
act_dev, &sem_export,
3816 &ext_sem_desc.handle.fd);
3818 if (
ret != VK_SUCCESS) {
3824 dst_int->ext_sem_handle[idx] = ext_sem_desc.handle.win32.handle;
3827 ret =
CHECK_CU(cu->cuImportExternalSemaphore(&dst_int->cu_sem[idx],
3831 close(ext_sem_desc.handle.fd);
3859 CudaFunctions *cu = cu_internal->
cuda_dl;
3860 CUarray_format cufmt =
desc->comp[0].depth > 8 ? CU_AD_FORMAT_UNSIGNED_INT16 :
3861 CU_AD_FORMAT_UNSIGNED_INT8;
3866 if (!dst_int->cuda_fc_ref) {
3870 if (!dst_int->cuda_fc_ref)
3874 for (
int i = 0;
i < nb_images;
i++) {
3875 err = export_mem_to_cuda(
ctx, cuda_cu, cu, dst_int,
i,
3880 err = export_sem_to_cuda(
ctx, cuda_cu, cu, dst_int,
i,
3886 if (nb_images !=
planes) {
3888 VkImageSubresource subres = {
3889 .aspectMask =
i == 2 ? VK_IMAGE_ASPECT_MEMORY_PLANE_2_BIT_EXT :
3890 i == 1 ? VK_IMAGE_ASPECT_MEMORY_PLANE_1_BIT_EXT :
3891 VK_IMAGE_ASPECT_MEMORY_PLANE_0_BIT_EXT
3893 VkSubresourceLayout
layout = { 0 };
3894 vk->GetImageSubresourceLayout(hwctx->
act_dev, dst_f->
img[
FFMIN(
i, nb_images - 1)],
3901 CUDA_EXTERNAL_MEMORY_MIPMAPPED_ARRAY_DESC tex_desc = {
3906 .NumChannels = 1 + ((
planes == 2) &&
i),
3914 tex_desc.arrayDesc.Width = p_w;
3915 tex_desc.arrayDesc.Height = p_h;
3917 ret =
CHECK_CU(cu->cuExternalMemoryGetMappedMipmappedArray(&dst_int->cu_mma[
i],
3918 dst_int->ext_mem[
FFMIN(
i, nb_images - 1)],
3925 ret =
CHECK_CU(cu->cuMipmappedArrayGetLevel(&dst_int->cu_array[
i],
3926 dst_int->cu_mma[
i], 0));
3957 CudaFunctions *cu = cu_internal->
cuda_dl;
3967 err =
CHECK_CU(cu->cuCtxPushCurrent(cuda_dev->cuda_ctx));
3971 err = vulkan_export_to_cuda(hwfc,
src->hw_frames_ctx,
dst);
3980 s_w_par[
i].params.fence.value = dst_f->
sem_value[
i] + 0;
3981 s_s_par[
i].params.fence.value = dst_f->
sem_value[
i] + 1;
3984 err =
CHECK_CU(cu->cuWaitExternalSemaphoresAsync(dst_int->cu_sem, s_w_par,
3985 planes, cuda_dev->stream));
3990 CUDA_MEMCPY2D cpy = {
3991 .srcMemoryType = CU_MEMORYTYPE_DEVICE,
3992 .srcDevice = (CUdeviceptr)
src->data[
i],
3993 .srcPitch =
src->linesize[
i],
3996 .dstMemoryType = CU_MEMORYTYPE_ARRAY,
3997 .dstArray = dst_int->cu_array[
i],
4003 cpy.WidthInBytes = p_w *
desc->comp[
i].step;
4006 err =
CHECK_CU(cu->cuMemcpy2DAsync(&cpy, cuda_dev->stream));
4011 err =
CHECK_CU(cu->cuSignalExternalSemaphoresAsync(dst_int->cu_sem, s_s_par,
4012 planes, cuda_dev->stream));
4038 switch (
src->format) {
4043 return vulkan_map_from_vaapi(hwfc,
dst,
src,
flags);
4049 return vulkan_map_from_drm(hwfc,
dst,
src,
flags);
4059 typedef struct VulkanDRMMapping {
4074 static inline uint32_t vulkan_fmt_to_drm(
VkFormat vkfmt)
4077 if (vulkan_drm_format_map[
i].vk_format == vkfmt)
4078 return vulkan_drm_format_map[
i].drm_fourcc;
4079 return DRM_FORMAT_INVALID;
4082 #define MAX_MEMORY_PLANES 4
4083 static VkImageAspectFlags plane_index_to_aspect(
int plane) {
4084 if (plane == 0)
return VK_IMAGE_ASPECT_MEMORY_PLANE_0_BIT_EXT;
4085 if (plane == 1)
return VK_IMAGE_ASPECT_MEMORY_PLANE_1_BIT_EXT;
4086 if (plane == 2)
return VK_IMAGE_ASPECT_MEMORY_PLANE_2_BIT_EXT;
4087 if (plane == 3)
return VK_IMAGE_ASPECT_MEMORY_PLANE_3_BIT_EXT;
4090 return VK_IMAGE_ASPECT_MEMORY_PLANE_0_BIT_EXT;
4104 VkImageDrmFormatModifierPropertiesEXT drm_mod = {
4105 .sType = VK_STRUCTURE_TYPE_IMAGE_DRM_FORMAT_MODIFIER_PROPERTIES_EXT,
4107 VkSemaphoreWaitInfo wait_info = {
4108 .sType = VK_STRUCTURE_TYPE_SEMAPHORE_WAIT_INFO,
4110 .semaphoreCount =
planes,
4122 wait_info.pSemaphores =
f->sem;
4123 wait_info.pValues =
f->sem_value;
4125 vk->WaitSemaphores(hwctx->
act_dev, &wait_info, UINT64_MAX);
4131 ret = vk->GetImageDrmFormatModifierPropertiesEXT(hwctx->
act_dev,
f->img[0],
4133 if (
ret != VK_SUCCESS) {
4139 for (
int i = 0; (
i <
planes) && (
f->mem[
i]);
i++) {
4140 VkMemoryGetFdInfoKHR export_info = {
4141 .sType = VK_STRUCTURE_TYPE_MEMORY_GET_FD_INFO_KHR,
4142 .memory =
f->mem[
i],
4143 .handleType = VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT,
4146 ret = vk->GetMemoryFdKHR(hwctx->
act_dev, &export_info,
4148 if (
ret != VK_SUCCESS) {
4163 drm_desc->
layers[
i].
format = vulkan_fmt_to_drm(plane_vkfmt);
4173 VkSubresourceLayout
layout;
4174 VkImageSubresource sub = {
4175 .aspectMask = plane_index_to_aspect(j),
4180 vk->GetImageSubresourceLayout(hwctx->
act_dev,
f->img[
i], &sub, &
layout);
4192 if (
f->tiling == VK_IMAGE_TILING_OPTIMAL)
4198 dst->height =
src->height;
4199 dst->data[0] = (uint8_t *)drm_desc;
4243 switch (
dst->format) {
4253 return vulkan_map_to_vaapi(hwfc,
dst,
src,
flags);
4265 AVFrame *swf, VkBufferImageCopy *region,
4275 region[
i].bufferRowLength,
4279 region[
i].imageExtent.height);
4282 if (err != VK_SUCCESS) {
4289 if (err != VK_SUCCESS) {
4299 region[
i].bufferRowLength,
4301 region[
i].imageExtent.height);
4308 AVFrame *swf, VkBufferImageCopy *region,
int upload)
4315 VkBufferUsageFlags buf_usage = upload ? VK_BUFFER_USAGE_TRANSFER_SRC_BIT :
4316 VK_BUFFER_USAGE_TRANSFER_DST_BIT;
4318 size_t buf_offset = 0;
4322 region[
i] = (VkBufferImageCopy) {
4323 .bufferOffset = buf_offset,
4325 p->props.properties.limits.optimalBufferCopyRowPitchAlignment),
4326 .bufferImageHeight = p_h,
4327 .imageSubresource.layerCount = 1,
4328 .imageExtent = (VkExtent3D){ p_w, p_h, 1 },
4332 buf_offset +=
FFALIGN(p_h*region[
i].bufferRowLength,
4333 p->props.properties.limits.optimalBufferCopyOffsetAlignment);
4338 VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
4339 VK_MEMORY_PROPERTY_HOST_CACHED_BIT);
4347 AVFrame *swf, VkBufferImageCopy *region,
int upload)
4354 VkBufferUsageFlags buf_usage = upload ? VK_BUFFER_USAGE_TRANSFER_SRC_BIT :
4355 VK_BUFFER_USAGE_TRANSFER_DST_BIT;
4364 while (swf->
buf[nb_src_bufs])
4368 if (nb_src_bufs == 1) {
4379 }
else if (nb_src_bufs ==
planes) {
4398 for (
int i = 0;
i < (*nb_bufs);
i++)
4419 int nb_layout_ch = 0;
4423 for (
int i = 0;
i < nb_images;
i++) {
4425 for (
int j = 0; j <
p->vkctx.host_image_props.copySrcLayoutCount; j++) {
4426 if (hwf_vk->
layout[
i] ==
p->vkctx.host_image_props.pCopySrcLayouts[j]) {
4434 layout_ch_info[nb_layout_ch] = (VkHostImageLayoutTransitionInfoEXT) {
4435 .sType = VK_STRUCTURE_TYPE_HOST_IMAGE_LAYOUT_TRANSITION_INFO_EXT,
4436 .image = hwf_vk->
img[
i],
4437 .oldLayout = hwf_vk->
layout[
i],
4438 .newLayout = VK_IMAGE_LAYOUT_GENERAL,
4439 .subresourceRange = {
4440 .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
4446 hwf_vk->
layout[
i] = layout_ch_info[nb_layout_ch].newLayout;
4451 .sType = VK_STRUCTURE_TYPE_SEMAPHORE_WAIT_INFO,
4452 .pSemaphores = hwf_vk->
sem,
4454 .semaphoreCount = nb_images,
4460 vk->TransitionImageLayoutEXT(hwctx->
act_dev,
4461 nb_layout_ch, layout_ch_info);
4464 VkMemoryToImageCopyEXT region_info = {
4465 .sType = VK_STRUCTURE_TYPE_MEMORY_TO_IMAGE_COPY_EXT,
4466 .imageSubresource = {
4470 VkCopyMemoryToImageInfoEXT copy_info = {
4471 .sType = VK_STRUCTURE_TYPE_COPY_MEMORY_TO_IMAGE_INFO_EXT,
4473 .pRegions = ®ion_info,
4476 int img_idx =
FFMIN(
i, (nb_images - 1));
4480 region_info.pHostPointer = swf->
data[
i];
4482 region_info.imageExtent = (VkExtent3D){ p_w, p_h, 1 };
4483 copy_info.dstImage = hwf_vk->
img[img_idx];
4484 copy_info.dstImageLayout = hwf_vk->
layout[img_idx];
4486 vk->CopyMemoryToImageEXT(hwctx->
act_dev, ©_info);
4489 VkImageToMemoryCopyEXT region_info = {
4490 .sType = VK_STRUCTURE_TYPE_IMAGE_TO_MEMORY_COPY_EXT,
4491 .imageSubresource = {
4495 VkCopyImageToMemoryInfoEXT copy_info = {
4496 .sType = VK_STRUCTURE_TYPE_COPY_IMAGE_TO_MEMORY_INFO_EXT,
4498 .pRegions = ®ion_info,
4501 int img_idx =
FFMIN(
i, (nb_images - 1));
4505 region_info.pHostPointer = swf->
data[
i];
4506 region_info.memoryRowLength = swf->
linesize[
i] /
desc->comp[
i].step;
4508 region_info.imageExtent = (VkExtent3D){ p_w, p_h, 1 };
4509 copy_info.srcImage = hwf_vk->
img[img_idx];
4510 copy_info.srcImageLayout = hwf_vk->
layout[img_idx];
4512 vk->CopyImageToMemoryEXT(hwctx->
act_dev, ©_info);
4531 int host_mapped = 0;
4546 VkCommandBuffer cmd_buf;
4558 if (hwctx->
usage & VK_IMAGE_USAGE_HOST_TRANSFER_BIT_EXT &&
4559 !(
p->dprops.driverID == VK_DRIVER_ID_NVIDIA_PROPRIETARY))
4567 region[
i] = (VkBufferImageCopy) {
4570 .bufferImageHeight = p_h,
4571 .imageSubresource.layerCount = 1,
4572 .imageExtent = (VkExtent3D){ p_w, p_h, 1 },
4598 cmd_buf = exec->
buf;
4604 VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT,
4605 VK_PIPELINE_STAGE_2_TRANSFER_BIT);
4629 VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT,
4630 VK_PIPELINE_STAGE_2_TRANSFER_BIT_KHR,
4631 upload ? VK_ACCESS_TRANSFER_WRITE_BIT :
4632 VK_ACCESS_TRANSFER_READ_BIT,
4633 upload ? VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL :
4634 VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL,
4635 p->nb_img_qfs > 1 ? VK_QUEUE_FAMILY_IGNORED :
p->img_qfs[0]);
4637 vk->CmdPipelineBarrier2(cmd_buf, &(VkDependencyInfo) {
4638 .sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO,
4639 .pImageMemoryBarriers = img_bar,
4640 .imageMemoryBarrierCount = nb_img_bar,
4644 int buf_idx =
FFMIN(
i, (nb_bufs - 1));
4645 int img_idx =
FFMIN(
i, (nb_images - 1));
4648 uint32_t orig_stride = region[
i].bufferRowLength;
4649 region[
i].bufferRowLength /=
desc->comp[
i].step;
4653 vk->CmdCopyBufferToImage(cmd_buf, vkbuf->
buf,
4654 hwf_vk->
img[img_idx],
4655 img_bar[img_idx].newLayout,
4658 vk->CmdCopyImageToBuffer(cmd_buf, hwf_vk->
img[img_idx],
4659 img_bar[img_idx].newLayout,
4663 region[
i].bufferRowLength = orig_stride;
4669 }
else if (!upload) {
4676 for (
int i = 0;
i < nb_bufs;
i++)
4687 switch (
src->format) {
4697 return vulkan_transfer_data_from_cuda(hwfc,
dst,
src);
4700 if (
src->hw_frames_ctx)
4724 CudaFunctions *cu = cu_internal->
cuda_dl;
4735 err =
CHECK_CU(cu->cuCtxPushCurrent(cuda_dev->cuda_ctx));
4739 err = vulkan_export_to_cuda(hwfc,
dst->hw_frames_ctx,
src);
4748 s_w_par[
i].params.fence.value = dst_f->
sem_value[
i] + 0;
4749 s_s_par[
i].params.fence.value = dst_f->
sem_value[
i] + 1;
4752 err =
CHECK_CU(cu->cuWaitExternalSemaphoresAsync(dst_int->cu_sem, s_w_par,
4753 nb_images, cuda_dev->stream));
4758 CUDA_MEMCPY2D cpy = {
4759 .dstMemoryType = CU_MEMORYTYPE_DEVICE,
4760 .dstDevice = (CUdeviceptr)
dst->data[
i],
4761 .dstPitch =
dst->linesize[
i],
4764 .srcMemoryType = CU_MEMORYTYPE_ARRAY,
4765 .srcArray = dst_int->cu_array[
i],
4771 cpy.WidthInBytes =
w *
desc->comp[
i].step;
4774 err =
CHECK_CU(cu->cuMemcpy2DAsync(&cpy, cuda_dev->stream));
4779 err =
CHECK_CU(cu->cuSignalExternalSemaphoresAsync(dst_int->cu_sem, s_s_par,
4780 nb_images, cuda_dev->stream));
4806 switch (
dst->format) {
4816 return vulkan_transfer_data_to_cuda(hwfc,
dst,
src);
4819 if (
dst->hw_frames_ctx)