aboutsummaryrefslogtreecommitdiff
path: root/src/layer.cc
diff options
context:
space:
mode:
Diffstat (limited to 'src/layer.cc')
-rw-r--r--src/layer.cc586
1 files changed, 336 insertions, 250 deletions
diff --git a/src/layer.cc b/src/layer.cc
index 5e652f0..cead7cd 100644
--- a/src/layer.cc
+++ b/src/layer.cc
@@ -5,6 +5,9 @@
#include <unordered_map>
#include <utility>
+// hack
+#include <deque>
+
#include <vulkan/utility/vk_dispatch_table.h>
#include <vulkan/vk_layer.h>
#include <vulkan/vk_platform.h>
@@ -16,6 +19,7 @@
#include "instance_context.hh"
#include "layer_context.hh"
#include "queue_context.hh"
+#include "timestamp_pool.hh"
namespace low_latency {
@@ -25,8 +29,9 @@ LayerContext layer_context;
} // namespace
-template <typename T, typename sType>
-static T* get_link_info(const void* const head, const sType& stype) {
+template <typename T, typename sType, typename fType>
+static T* get_link_info(const void* const head, const sType& stype,
+ const fType& ftype) {
for (auto i = reinterpret_cast<const VkBaseInStructure*>(head); i;
i = i->pNext) {
@@ -35,7 +40,7 @@ static T* get_link_info(const void* const head, const sType& stype) {
}
const auto info = reinterpret_cast<const T*>(i);
- if (info->function != VK_LAYER_LINK_INFO) {
+ if (info->function != ftype) {
continue;
}
@@ -49,7 +54,8 @@ CreateInstance(const VkInstanceCreateInfo* pCreateInfo,
const VkAllocationCallbacks* pAllocator, VkInstance* pInstance) {
const auto link_info = get_link_info<VkLayerInstanceCreateInfo>(
- pCreateInfo->pNext, VK_STRUCTURE_TYPE_LOADER_INSTANCE_CREATE_INFO);
+ pCreateInfo->pNext, VK_STRUCTURE_TYPE_LOADER_INSTANCE_CREATE_INFO,
+ VK_LAYER_LINK_INFO);
if (!link_info || !link_info->u.pLayerInfo) {
return VK_ERROR_INITIALIZATION_FAILED;
@@ -78,23 +84,23 @@ CreateInstance(const VkInstanceCreateInfo* pCreateInfo,
}
const auto key = layer_context.get_key(*pInstance);
+
+#define INSTANCE_VTABLE_LOAD(name) \
+ .name = reinterpret_cast<PFN_vk##name>(gipa(*pInstance, "vk" #name))
auto vtable = VkuInstanceDispatchTable{
- .DestroyInstance = reinterpret_cast<PFN_vkDestroyInstance>(
- gipa(*pInstance, "vkDestroyInstance")),
- .EnumeratePhysicalDevices =
- reinterpret_cast<PFN_vkEnumeratePhysicalDevices>(
- gipa(*pInstance, "vkEnumeratePhysicalDevices")),
- .GetInstanceProcAddr = reinterpret_cast<PFN_vkGetInstanceProcAddr>(
- gipa(*pInstance, "vkGetInstanceProcAddr")),
- .EnumerateDeviceExtensionProperties =
- reinterpret_cast<PFN_vkEnumerateDeviceExtensionProperties>(
- gipa(*pInstance, "vkEnumerateDeviceExtensionProperties")),
+ INSTANCE_VTABLE_LOAD(DestroyInstance),
+ INSTANCE_VTABLE_LOAD(EnumeratePhysicalDevices),
+ INSTANCE_VTABLE_LOAD(GetInstanceProcAddr),
+ INSTANCE_VTABLE_LOAD(CreateDevice),
+ INSTANCE_VTABLE_LOAD(EnumerateDeviceExtensionProperties),
};
+#undef INSTANCE_VTABLE_LOAD
const auto lock = std::scoped_lock{layer_context.mutex};
assert(!layer_context.contexts.contains(key));
+
layer_context.contexts.try_emplace(
- key, std::make_unique<InstanceContext>(*pInstance, std::move(vtable)));
+ key, std::make_shared<InstanceContext>(*pInstance, std::move(vtable)));
return VK_SUCCESS;
}
@@ -102,11 +108,55 @@ CreateInstance(const VkInstanceCreateInfo* pCreateInfo,
static VKAPI_ATTR void VKAPI_CALL
DestroyInstance(VkInstance instance, const VkAllocationCallbacks* allocator) {
+ const auto destroy_instance_func = [&]() -> auto {
+ const auto context = layer_context.get_context(instance);
+ const auto lock = std::scoped_lock{layer_context.mutex};
+
+ // Erase our physical devices owned by this instance from the global
+ // context.
+ for (const auto& [key, _] : context->phys_devices) {
+ assert(layer_context.contexts.erase(key));
+ }
+
+ const auto key = layer_context.get_key(instance);
+ assert(layer_context.contexts.erase(key));
+
+ // Should be the last ptr now like DestroyDevice.
+ assert(context.unique());
+ return context->vtable.DestroyInstance;
+ }();
+
+ destroy_instance_func(instance, allocator);
+}
+
+static VKAPI_ATTR VkResult VKAPI_CALL EnumeratePhysicalDevices(
+ VkInstance instance, std::uint32_t* count, VkPhysicalDevice* devices) {
+
+ const auto context = layer_context.get_context(instance);
+
+ if (const auto result =
+ context->vtable.EnumeratePhysicalDevices(instance, count, devices);
+ !devices || !count || result != VK_SUCCESS) {
+
+ return result;
+ }
+
const auto lock = std::scoped_lock{layer_context.mutex};
+ const auto C = *count;
+ for (auto i = std::uint32_t{0}; i < C; ++i) {
+ const auto& device = devices[i];
+
+ const auto key = layer_context.get_key(device);
+ const auto [it, inserted] =
+ layer_context.contexts.try_emplace(key, nullptr);
- const auto key = layer_context.get_key(instance);
- assert(layer_context.contexts.contains(key));
- layer_context.contexts.erase(key);
+ if (inserted) {
+ it->second =
+ std::make_shared<PhysicalDeviceContext>(*context, device);
+ }
+ }
+
+ return VK_SUCCESS;
}
static VKAPI_ATTR VkResult VKAPI_CALL CreateDevice(
@@ -114,56 +164,64 @@ static VKAPI_ATTR VkResult VKAPI_CALL CreateDevice(
const VkAllocationCallbacks* pAllocator, VkDevice* pDevice) {
const auto create_info = get_link_info<VkLayerDeviceCreateInfo>(
- pCreateInfo->pNext, VK_STRUCTURE_TYPE_LOADER_DEVICE_CREATE_INFO);
+ pCreateInfo->pNext, VK_STRUCTURE_TYPE_LOADER_DEVICE_CREATE_INFO,
+ VK_LAYER_LINK_INFO);
if (!create_info || !create_info->u.pLayerInfo) {
return VK_ERROR_INITIALIZATION_FAILED;
}
+ const auto callback_info = get_link_info<VkLayerDeviceCreateInfo>(
+ pCreateInfo->pNext, VK_STRUCTURE_TYPE_LOADER_DEVICE_CREATE_INFO,
+ VK_LOADER_DATA_CALLBACK);
+ if (!callback_info || !callback_info->u.pLayerInfo) {
+ return VK_ERROR_INITIALIZATION_FAILED;
+ }
+
+ const auto sdld = callback_info->u.pfnSetDeviceLoaderData;
const auto gipa = create_info->u.pLayerInfo->pfnNextGetInstanceProcAddr;
const auto gdpa = create_info->u.pLayerInfo->pfnNextGetDeviceProcAddr;
- if (!gipa || !gdpa) {
+ if (!sdld || !gipa || !gdpa) {
return VK_ERROR_INITIALIZATION_FAILED;
}
create_info->u.pLayerInfo = create_info->u.pLayerInfo->pNext;
- const auto lock = std::scoped_lock{layer_context.mutex};
-
- auto& context = layer_context.get_context<InstanceContext>(physical_device);
+ const auto physical_device_context =
+ layer_context.get_context(physical_device);
+ auto& instance_context = physical_device_context->instance;
const auto next_extensions =
[&]() -> std::optional<std::vector<const char*>> {
- const auto supported_extensions =
- [&]() -> std::optional<std::vector<VkExtensionProperties>> {
- const auto enumerate_device_extensions =
- reinterpret_cast<PFN_vkEnumerateDeviceExtensionProperties>(gipa(
- context.instance, "vkEnumerateDeviceExtensionProperties"));
- if (!enumerate_device_extensions) {
- return std::nullopt;
- }
+ const auto enumerate_device_extensions =
+ reinterpret_cast<PFN_vkEnumerateDeviceExtensionProperties>(
+ gipa(instance_context.instance,
+ "vkEnumerateDeviceExtensionProperties"));
+ if (!enumerate_device_extensions) {
+ return std::nullopt;
+ }
- auto count = std::uint32_t{};
- if (enumerate_device_extensions(physical_device, nullptr, &count,
- nullptr) != VK_SUCCESS) {
+ auto count = std::uint32_t{};
+ if (enumerate_device_extensions(physical_device, nullptr, &count,
+ nullptr) != VK_SUCCESS) {
- return std::nullopt;
- }
+ return std::nullopt;
+ }
- auto supported_extensions =
- std::vector<VkExtensionProperties>(count);
- if (enumerate_device_extensions(physical_device, nullptr, &count,
- std::data(supported_extensions)) !=
- VK_SUCCESS) {
+ auto supported_extensions = std::vector<VkExtensionProperties>(count);
+ if (enumerate_device_extensions(physical_device, nullptr, &count,
+ std::data(supported_extensions)) !=
+ VK_SUCCESS) {
- return std::nullopt;
- }
+ return std::nullopt;
+ }
- return supported_extensions;
- }();
+ auto next_extensions = std::vector<const char*>{};
+ if (pCreateInfo->enabledExtensionCount &&
+ pCreateInfo->ppEnabledExtensionNames) {
- auto next_extensions =
- std::vector{*pCreateInfo->ppEnabledExtensionNames,
- std::next(*pCreateInfo->ppEnabledExtensionNames +
- pCreateInfo->enabledExtensionCount)};
+ std::ranges::copy_n(pCreateInfo->ppEnabledExtensionNames,
+ pCreateInfo->enabledExtensionCount,
+ std::back_inserter(next_extensions));
+ }
const auto wanted_extensions = {
VK_KHR_SYNCHRONIZATION_2_EXTENSION_NAME,
@@ -180,12 +238,11 @@ static VKAPI_ATTR VkResult VKAPI_CALL CreateDevice(
continue; // Already included, ignore it.
}
- if (std::ranges::none_of(*supported_extensions,
- [&](const auto& supported_extension) {
- return !std::strcmp(
- supported_extension.extensionName,
- wanted);
- })) {
+ if (std::ranges::none_of(
+ supported_extensions, [&](const auto& supported_extension) {
+ return !std::strcmp(supported_extension.extensionName,
+ wanted);
+ })) {
return std::nullopt; // We don't support it, the layer can't
// work.
@@ -201,8 +258,7 @@ static VKAPI_ATTR VkResult VKAPI_CALL CreateDevice(
return VK_ERROR_INITIALIZATION_FAILED;
}
- const auto create_device = reinterpret_cast<PFN_vkCreateDevice>(
- gipa(VK_NULL_HANDLE, "vkCreateDevice"));
+ const auto create_device = instance_context.vtable.CreateDevice;
if (!create_device) {
return VK_ERROR_INITIALIZATION_FAILED;
}
@@ -221,164 +277,199 @@ static VKAPI_ATTR VkResult VKAPI_CALL CreateDevice(
return result;
}
+#define DEVICE_VTABLE_LOAD(name) \
+ .name = reinterpret_cast<PFN_vk##name>(gdpa(*pDevice, "vk" #name))
auto vtable = VkuDeviceDispatchTable{
- .GetDeviceProcAddr = reinterpret_cast<PFN_vkGetDeviceProcAddr>(
- gdpa(*pDevice, "vkGetDeviceProcAddr")),
- .DestroyDevice = reinterpret_cast<PFN_vkDestroyDevice>(
- gdpa(*pDevice, "vkDestroyDevice")),
- .GetDeviceQueue = reinterpret_cast<PFN_vkGetDeviceQueue>(
- gdpa(*pDevice, "vkGetDeviceQueue")),
- .QueueSubmit = reinterpret_cast<PFN_vkQueueSubmit>(
- gdpa(*pDevice, "vkQueueSubmit")),
- .CreateSemaphore = reinterpret_cast<PFN_vkCreateSemaphore>(
- gdpa(*pDevice, "vkCreateSemaphore")),
- .DestroySemaphore = reinterpret_cast<PFN_vkDestroySemaphore>(
- gdpa(*pDevice, "vkDestroySemaphore")),
- .CreateQueryPool = reinterpret_cast<PFN_vkCreateQueryPool>(
- gdpa(*pDevice, "vkCreateQueryPool")),
- .DestroyQueryPool = reinterpret_cast<PFN_vkDestroyQueryPool>(
- gdpa(*pDevice, "vkDestroyQueryPool")),
- .GetQueryPoolResults = reinterpret_cast<PFN_vkGetQueryPoolResults>(
- gdpa(*pDevice, "vkGetQueryPoolResults")),
- .CreateCommandPool = reinterpret_cast<PFN_vkCreateCommandPool>(
- gdpa(*pDevice, "vkCreateCommandPool")),
- .DestroyCommandPool = reinterpret_cast<PFN_vkDestroyCommandPool>(
- gdpa(*pDevice, "vkDestroyCommandPool")),
- .AllocateCommandBuffers =
- reinterpret_cast<PFN_vkAllocateCommandBuffers>(
- gdpa(*pDevice, "vkAllocateCommandBuffers")),
- .FreeCommandBuffers = reinterpret_cast<PFN_vkFreeCommandBuffers>(
- gdpa(*pDevice, "vkFreeCommandBuffers")),
- .BeginCommandBuffer = reinterpret_cast<PFN_vkBeginCommandBuffer>(
- gdpa(*pDevice, "vkBeginCommandBuffer")),
- .EndCommandBuffer = reinterpret_cast<PFN_vkEndCommandBuffer>(
- gdpa(*pDevice, "vkEndCommandBuffer")),
- .ResetCommandBuffer = reinterpret_cast<PFN_vkResetCommandBuffer>(
- gdpa(*pDevice, "vkResetCommandBuffer")),
- .CmdDraw = reinterpret_cast<PFN_vkCmdDraw>(gdpa(*pDevice, "vkCmdDraw")),
- .CmdDrawIndexed = reinterpret_cast<PFN_vkCmdDrawIndexed>(
- gdpa(*pDevice, "vkCmdDrawIndexed")),
- .CmdResetQueryPool = reinterpret_cast<PFN_vkCmdResetQueryPool>(
- gdpa(*pDevice, "vkCmdResetQueryPool")),
- .GetDeviceQueue2 = reinterpret_cast<PFN_vkGetDeviceQueue2>(
- gdpa(*pDevice, "vkGetDeviceQueue2")),
- .QueueSubmit2 = reinterpret_cast<PFN_vkQueueSubmit2>(
- gdpa(*pDevice, "vkQueueSubmit2")),
- .QueuePresentKHR = reinterpret_cast<PFN_vkQueuePresentKHR>(
- gdpa(*pDevice, "vkQueuePresentKHR")),
- .GetSemaphoreCounterValueKHR =
- reinterpret_cast<PFN_vkGetSemaphoreCounterValueKHR>(
- gdpa(*pDevice, "vkGetSemaphoreCounterValueKHR")),
- .CmdWriteTimestamp2KHR = reinterpret_cast<PFN_vkCmdWriteTimestamp2KHR>(
- gdpa(*pDevice, "vkCmdWriteTimestamp2KHR")),
- .QueueSubmit2KHR = reinterpret_cast<PFN_vkQueueSubmit2KHR>(
- gdpa(*pDevice, "vkQueueSubmit2KHR")),
+ DEVICE_VTABLE_LOAD(GetDeviceProcAddr),
+ DEVICE_VTABLE_LOAD(DestroyDevice),
+ DEVICE_VTABLE_LOAD(GetDeviceQueue),
+ DEVICE_VTABLE_LOAD(QueueSubmit),
+ DEVICE_VTABLE_LOAD(CreateSemaphore),
+ DEVICE_VTABLE_LOAD(DestroySemaphore),
+ DEVICE_VTABLE_LOAD(CreateQueryPool),
+ DEVICE_VTABLE_LOAD(DestroyQueryPool),
+ DEVICE_VTABLE_LOAD(GetQueryPoolResults),
+ DEVICE_VTABLE_LOAD(CreateCommandPool),
+ DEVICE_VTABLE_LOAD(DestroyCommandPool),
+ DEVICE_VTABLE_LOAD(AllocateCommandBuffers),
+ DEVICE_VTABLE_LOAD(FreeCommandBuffers),
+ DEVICE_VTABLE_LOAD(BeginCommandBuffer),
+ DEVICE_VTABLE_LOAD(EndCommandBuffer),
+ DEVICE_VTABLE_LOAD(ResetCommandBuffer),
+ DEVICE_VTABLE_LOAD(CmdResetQueryPool),
+ DEVICE_VTABLE_LOAD(CmdDraw),
+ DEVICE_VTABLE_LOAD(CmdDrawIndexed),
+ DEVICE_VTABLE_LOAD(GetDeviceQueue2),
+ DEVICE_VTABLE_LOAD(QueueSubmit2),
+ DEVICE_VTABLE_LOAD(AcquireNextImageKHR),
+ DEVICE_VTABLE_LOAD(QueuePresentKHR),
+ DEVICE_VTABLE_LOAD(AcquireNextImage2KHR),
+ DEVICE_VTABLE_LOAD(GetSemaphoreCounterValueKHR),
+ DEVICE_VTABLE_LOAD(CmdWriteTimestamp2KHR),
+ DEVICE_VTABLE_LOAD(QueueSubmit2KHR),
};
+#undef DEVICE_VTABLE_LOAD
const auto key = layer_context.get_key(*pDevice);
+ const auto lock = std::scoped_lock{layer_context.mutex};
assert(!layer_context.contexts.contains(key));
+
layer_context.contexts.try_emplace(
- key,
- std::make_unique<DeviceContext>(context, *pDevice, std::move(vtable)));
+ key, std::make_shared<DeviceContext>(instance_context, *pDevice, sdld,
+ std::move(vtable)));
return VK_SUCCESS;
}
static VKAPI_ATTR void VKAPI_CALL
DestroyDevice(VkDevice device, const VkAllocationCallbacks* allocator) {
- const auto lock = std::scoped_lock{layer_context.mutex};
- const auto key = layer_context.get_key(device);
- assert(layer_context.contexts.contains(key));
- layer_context.contexts.erase(key);
+
+ const auto destroy_device_func = [&]() -> auto {
+ const auto device_context = layer_context.get_context(device);
+
+ const auto func = device_context->vtable.DestroyDevice;
+ const auto lock = std::scoped_lock{layer_context.mutex};
+ // Remove all owned queues from our global context pool.
+ for (const auto& [queue, _] : device_context->queues) {
+ const auto key = layer_context.get_key(queue);
+ assert(layer_context.contexts.erase(key));
+ }
+
+ const auto key = layer_context.get_key(device);
+ assert(layer_context.contexts.erase(key));
+
+ // should be the last shared ptr now, so its destructor can be called.
+ // the destructor should expect its owned queues to be unique as well!
+ assert(device_context.unique());
+
+ return func;
+ }();
+
+ destroy_device_func(device, allocator);
}
-// Small amount of duplication, we can't assume gdq2 is available apparently.
static VKAPI_ATTR void VKAPI_CALL
GetDeviceQueue(VkDevice device, std::uint32_t queue_family_index,
std::uint32_t queue_index, VkQueue* queue) {
- const auto lock = std::scoped_lock{layer_context.mutex};
-
- auto& device_context = layer_context.get_context<DeviceContext>(device);
+ const auto device_context = layer_context.get_context(device);
- device_context.vtable.GetDeviceQueue(device, queue_family_index,
- queue_index, queue);
+ device_context->vtable.GetDeviceQueue(device, queue_family_index,
+ queue_index, queue);
if (!queue || !*queue) {
return;
}
- auto& queue_contexts = device_context.queue_contexts;
- if (!queue_contexts.contains(*queue)) {
- queue_contexts.try_emplace(
- *queue, std::make_unique<QueueContext>(device_context, *queue,
- queue_family_index));
+ // Look in our layer context, which has everything. If we were able to
+ // insert a nullptr key, then it didn't already exist so we should
+ // construct a new one.
+ const auto key = layer_context.get_key(*queue);
+ const auto lock = std::scoped_lock{layer_context.mutex};
+ const auto [it, inserted] = layer_context.contexts.try_emplace(key);
+ if (inserted) {
+ it->second = std::make_shared<QueueContext>(*device_context, *queue,
+ queue_family_index);
}
+
+ // it->second should be QueueContext, also it might already be there
+ // but this is expected.
+ const auto ptr = std::dynamic_pointer_cast<QueueContext>(it->second);
+ assert(ptr);
+ device_context->queues.emplace(*queue, ptr);
}
+// Identical logic to gdq so some amount of duplication, we can't assume gdq1 is
+// available apparently, what do I know?
static VKAPI_ATTR void VKAPI_CALL GetDeviceQueue2(
VkDevice device, const VkDeviceQueueInfo2* info, VkQueue* queue) {
- const auto lock = std::scoped_lock{layer_context.mutex};
- auto& device_context = layer_context.get_context<DeviceContext>(device);
+ const auto device_context = layer_context.get_context(device);
- device_context.vtable.GetDeviceQueue2(device, info, queue);
+ device_context->vtable.GetDeviceQueue2(device, info, queue);
if (!queue || !*queue) {
return;
}
- auto& queue_contexts = device_context.queue_contexts;
- if (!queue_contexts.contains(*queue)) {
- queue_contexts.try_emplace(
- *queue, std::make_unique<QueueContext>(device_context, *queue,
- info->queueFamilyIndex));
+ const auto key = layer_context.get_key(*queue);
+ const auto lock = std::scoped_lock{layer_context.mutex};
+ const auto [it, inserted] = layer_context.contexts.try_emplace(key);
+ if (inserted) {
+ it->second = std::make_shared<QueueContext>(*device_context, *queue,
+ info->queueFamilyIndex);
+ }
+
+ const auto ptr = std::dynamic_pointer_cast<QueueContext>(it->second);
+ assert(ptr);
+ device_context->queues.emplace(*queue, ptr);
+}
+
+static VKAPI_ATTR VkResult VKAPI_CALL vkAcquireNextImageKHR(
+ VkDevice device, VkSwapchainKHR swapchain, std::uint64_t timeout,
+ VkSemaphore semaphore, VkFence fence, std::uint32_t* pImageIndex) {
+
+ const auto context = layer_context.get_context(device);
+ if (const auto result = context->vtable.AcquireNextImageKHR(
+ device, swapchain, timeout, semaphore, fence, pImageIndex);
+ result != VK_SUCCESS) {
+
+ return result;
+ }
+
+ return VK_SUCCESS;
+}
+
+static VKAPI_ATTR VkResult VKAPI_CALL vkAcquireNextImage2KHR(
+ VkDevice device, const VkAcquireNextImageInfoKHR* pAcquireInfo,
+ std::uint32_t* pImageIndex) {
+
+ const auto context = layer_context.get_context(device);
+ if (const auto result = context->vtable.AcquireNextImage2KHR(
+ device, pAcquireInfo, pImageIndex);
+ result != VK_SUCCESS) {
+
+ return result;
}
+
+ return VK_SUCCESS;
}
static VKAPI_ATTR VkResult VKAPI_CALL
vkQueueSubmit(VkQueue queue, std::uint32_t submit_count,
const VkSubmitInfo* submit_info, VkFence fence) {
- const auto lock = std::scoped_lock{layer_context.mutex};
-
- auto& queue_context = layer_context.get_context<QueueContext>(queue);
- const auto& vtable = queue_context.device_context.vtable;
+ const auto& queue_context = layer_context.get_context(queue);
+ const auto& vtable = queue_context->device_context.vtable;
if (!submit_count) { // no-op submit we shouldn't worry about
return vtable.QueueSubmit(queue, submit_count, submit_info, fence);
}
- // Create a new vector of submit infos, copy their existing ones.
+ // Create a new vector of submit infos.
auto next_submit_infos = std::vector<VkSubmitInfo>{};
- next_submit_infos.reserve(submit_count + 2);
- auto timestamp_handle = queue_context.timestamp_pool->acquire();
+ auto timestamp_handle = queue_context->timestamp_pool->acquire();
timestamp_handle->setup_command_buffers(vtable);
const auto& [head_cb, tail_cb] = timestamp_handle->command_buffers;
- // The first submit info we use will steal their wait semaphores.
- next_submit_infos.push_back(VkSubmitInfo{
- .sType = VK_STRUCTURE_TYPE_SUBMIT_INFO,
- .pNext = submit_info->pNext,
- .waitSemaphoreCount = submit_info[0].waitSemaphoreCount,
- .pWaitSemaphores = submit_info[0].pWaitSemaphores,
- .pWaitDstStageMask = submit_info[0].pWaitDstStageMask,
- .commandBufferCount = 1,
- .pCommandBuffers = &head_cb,
- });
+ const auto next_command_buffers = [&]() -> auto {
+ auto next_command_buffers = std::vector<VkCommandBuffer>{head_cb};
+ std::ranges::copy_n(submit_info[0].pCommandBuffers,
+ submit_info[0].commandBufferCount,
+ std::back_inserter(next_command_buffers));
+ return next_command_buffers;
+ }();
- // Fill in original submit infos but erase the wait semaphores on the
- // first because we stole them earlier.
std::ranges::copy_n(submit_info, submit_count,
std::back_inserter(next_submit_infos));
- next_submit_infos[1].pWaitSemaphores = nullptr;
- next_submit_infos[1].waitSemaphoreCount = 0u;
+ next_submit_infos[0].pCommandBuffers = std::data(next_command_buffers);
+ next_submit_infos[0].commandBufferCount = std::size(next_command_buffers);
- const auto TODO_next = std::uint64_t{layer_context.current_frame + 1};
+ const auto next_signal = queue_context->semaphore_sequence + 1;
const auto tail_tssi = VkTimelineSemaphoreSubmitInfo{
.sType = VK_STRUCTURE_TYPE_TIMELINE_SEMAPHORE_SUBMIT_INFO_KHR,
.signalSemaphoreValueCount = 1,
- .pSignalSemaphoreValues = &TODO_next,
+ .pSignalSemaphoreValues = &next_signal,
};
next_submit_infos.push_back(VkSubmitInfo{
.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO,
@@ -386,7 +477,7 @@ vkQueueSubmit(VkQueue queue, std::uint32_t submit_count,
.commandBufferCount = 1,
.pCommandBuffers = &tail_cb,
.signalSemaphoreCount = 1,
- .pSignalSemaphores = &queue_context.semaphore,
+ .pSignalSemaphores = &queue_context->semaphore,
});
if (const auto res =
@@ -397,6 +488,14 @@ vkQueueSubmit(VkQueue queue, std::uint32_t submit_count,
return res;
}
+ // Hack for now, store timestamp handles.
+ queue_context->handle_hack.push_front(std::move(timestamp_handle));
+ if (std::size(queue_context->handle_hack) > 250) {
+ queue_context->handle_hack.pop_back();
+ }
+
+ ++queue_context->semaphore_sequence;
+
return VK_SUCCESS;
}
@@ -405,55 +504,69 @@ static VKAPI_ATTR VkResult VKAPI_CALL
vkQueueSubmit2(VkQueue queue, std::uint32_t submit_count,
const VkSubmitInfo2* submit_infos, VkFence fence) {
- const auto lock = std::scoped_lock{layer_context.mutex};
- auto& queue_context = layer_context.get_context<QueueContext>(queue);
- const auto& vtable = queue_context.device_context.vtable;
+ const auto queue_context = layer_context.get_context(queue);
+ const auto& vtable = queue_context->device_context.vtable;
- if (!submit_count) { // another no-op submit
+ if (!submit_count) {
return vtable.QueueSubmit2(queue, submit_count, submit_infos, fence);
}
- auto next_submit_infos = std::vector<VkSubmitInfo2>();
- next_submit_infos.reserve(submit_count + 2);
-
- auto timestamp_handle = queue_context.timestamp_pool->acquire();
+ auto timestamp_handle = queue_context->timestamp_pool->acquire();
timestamp_handle->setup_command_buffers(vtable);
const auto& [head_cb, tail_cb] = timestamp_handle->command_buffers;
- const auto head_cb_info = VkCommandBufferSubmitInfo{
- .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_SUBMIT_INFO,
- .commandBuffer = head_cb,
- };
- next_submit_infos.push_back(VkSubmitInfo2{
- .sType = VK_STRUCTURE_TYPE_SUBMIT_INFO_2,
- .waitSemaphoreInfoCount = submit_infos[0].waitSemaphoreInfoCount,
- .pWaitSemaphoreInfos = submit_infos[0].pWaitSemaphoreInfos,
- .commandBufferInfoCount = 1,
- .pCommandBufferInfos = &head_cb_info,
- });
+ const auto next_command_buffers = [&]() -> auto {
+ auto next_command_buffers = std::vector<VkCommandBufferSubmitInfo>{};
+ next_command_buffers.push_back(VkCommandBufferSubmitInfo{
+ .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_SUBMIT_INFO,
+ .commandBuffer = head_cb,
+ });
+ std::ranges::copy_n(submit_infos[0].pCommandBufferInfos,
+ submit_infos[0].commandBufferInfoCount,
+ std::back_inserter(next_command_buffers));
+ return next_command_buffers;
+ }();
+
+ auto next_submit_infos = std::vector<VkSubmitInfo2>();
std::ranges::copy_n(submit_infos, submit_count,
std::back_inserter(next_submit_infos));
- next_submit_infos[1].pWaitSemaphoreInfos = nullptr;
- next_submit_infos[1].waitSemaphoreInfoCount = 0;
-
- const auto tail_cb_info = VkCommandBufferSubmitInfo{
+ next_submit_infos[0].pCommandBufferInfos = std::data(next_command_buffers);
+ next_submit_infos[0].commandBufferInfoCount =
+ std::size(next_command_buffers);
+
+ const auto tail_ssi = VkSemaphoreSubmitInfo{
+ .sType = VK_STRUCTURE_TYPE_SEMAPHORE_SUBMIT_INFO,
+ .semaphore = queue_context->semaphore,
+ .value = queue_context->semaphore_sequence + 1,
+ .stageMask = VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT,
+ };
+ const auto tail_cbsi = VkCommandBufferSubmitInfo{
.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_SUBMIT_INFO,
.commandBuffer = tail_cb,
};
next_submit_infos.push_back(VkSubmitInfo2{
.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO_2,
- .waitSemaphoreInfoCount = submit_infos[0].waitSemaphoreInfoCount,
- .pWaitSemaphoreInfos = submit_infos[0].pWaitSemaphoreInfos,
.commandBufferInfoCount = 1,
- .pCommandBufferInfos = &tail_cb_info,
+ .pCommandBufferInfos = &tail_cbsi,
+ .signalSemaphoreInfoCount = 1,
+ .pSignalSemaphoreInfos = &tail_ssi,
});
if (const auto res =
- vtable.QueueSubmit2(queue, submit_count, submit_infos, fence);
+ vtable.QueueSubmit2(queue, std::size(next_submit_infos),
+ std::data(next_submit_infos), fence);
res != VK_SUCCESS) {
return res;
}
+ // hack
+ queue_context->handle_hack.push_front(std::move(timestamp_handle));
+ if (std::size(queue_context->handle_hack) > 250) {
+ queue_context->handle_hack.pop_back();
+ }
+
+ ++queue_context->semaphore_sequence;
+
return VK_SUCCESS;
}
@@ -467,9 +580,8 @@ vkQueueSubmit2KHR(VkQueue queue, std::uint32_t submit_count,
static VKAPI_ATTR VkResult VKAPI_CALL
vkQueuePresentKHR(VkQueue queue, const VkPresentInfoKHR* present_info) {
- const auto lock = std::scoped_lock{layer_context.mutex};
- auto& queue_context = layer_context.get_context<QueueContext>(queue);
- const auto& vtable = queue_context.device_context.vtable;
+ const auto& vtable =
+ layer_context.get_context(queue)->device_context.vtable;
if (const auto res = vtable.QueuePresentKHR(queue, present_info);
res != VK_SUCCESS) {
@@ -477,69 +589,49 @@ vkQueuePresentKHR(VkQueue queue, const VkPresentInfoKHR* present_info) {
return res;
}
- std::cout << "queuePresentKHR called for queue " << queue << '\n';
-
- // Update all of our information about this queue's timestamp pool!
- queue_context.timestamp_pool->poll();
-
- // While we might be submitting on this queue, let's see what our timeline
- // semaphore says we're at.
- uint64_t value = 0;
- if (const auto res = vtable.GetSemaphoreCounterValueKHR(
- queue_context.device_context.device, queue_context.semaphore,
- &value);
- res != VK_SUCCESS) {
-
- return res;
- }
-
- std::cout << " frame_index: " << layer_context.current_frame << '\n';
- std::cout << " semaphore: " << value << '\n';
- std::cout << " queue: " << queue << '\n';
-
- ++layer_context.current_frame;
return VK_SUCCESS;
}
} // namespace low_latency
-static const auto instance_functions =
- std::unordered_map<std::string_view, const PFN_vkVoidFunction>{
- {"vkGetInstanceProcAddr",
- reinterpret_cast<PFN_vkVoidFunction>(LowLatency_GetInstanceProcAddr)},
+using func_map_t = std::unordered_map<std::string_view, PFN_vkVoidFunction>;
+#define HOOK_ENTRY(vk_name_literal, fn_sym) \
+ {vk_name_literal, reinterpret_cast<PFN_vkVoidFunction>(fn_sym)}
+static const auto instance_functions = func_map_t{
+ HOOK_ENTRY("vkCreateDevice", low_latency::CreateDevice),
- {"vkCreateInstance",
- reinterpret_cast<PFN_vkVoidFunction>(low_latency::CreateInstance)},
- {"vkDestroyInstance",
- reinterpret_cast<PFN_vkVoidFunction>(low_latency::DestroyInstance)},
- };
+ HOOK_ENTRY("vkGetInstanceProcAddr", LowLatency_GetInstanceProcAddr),
+ HOOK_ENTRY("vkGetDeviceProcAddr", LowLatency_GetDeviceProcAddr),
-static const auto device_functions =
- std::unordered_map<std::string_view, const PFN_vkVoidFunction>{
- {"vkGetDeviceProcAddr",
- reinterpret_cast<PFN_vkVoidFunction>(LowLatency_GetDeviceProcAddr)},
+ HOOK_ENTRY("vkEnumeratePhysicalDevices",
+ low_latency::EnumeratePhysicalDevices),
- {"vkCreateDevice",
- reinterpret_cast<PFN_vkVoidFunction>(low_latency::CreateDevice)},
- {"vkDestroyDevice",
- reinterpret_cast<PFN_vkVoidFunction>(low_latency::DestroyDevice)},
+ HOOK_ENTRY("vkCreateInstance", low_latency::CreateInstance),
+ HOOK_ENTRY("vkDestroyInstance", low_latency::DestroyInstance),
+};
+static const auto device_functions = func_map_t{
+ HOOK_ENTRY("vkGetDeviceProcAddr", LowLatency_GetDeviceProcAddr),
- {"vkGetDeviceQueue",
- reinterpret_cast<PFN_vkVoidFunction>(low_latency::GetDeviceQueue)},
- {"vkGetDeviceQueue2",
- reinterpret_cast<PFN_vkVoidFunction>(low_latency::GetDeviceQueue2)},
+ HOOK_ENTRY("vkDestroyDevice", low_latency::DestroyDevice),
- {"vkQueueSubmit",
- reinterpret_cast<PFN_vkVoidFunction>(low_latency::vkQueueSubmit)},
- {"vkQueueSubmit2",
- reinterpret_cast<PFN_vkVoidFunction>(low_latency::vkQueueSubmit2)},
+ HOOK_ENTRY("vkGetDeviceQueue", low_latency::GetDeviceQueue),
+ HOOK_ENTRY("vkGetDeviceQueue2", low_latency::GetDeviceQueue2),
- {"vkQueuePresentKHR",
- reinterpret_cast<PFN_vkVoidFunction>(low_latency::vkQueuePresentKHR)},
- };
+ HOOK_ENTRY("vkQueueSubmit", low_latency::vkQueueSubmit),
+ HOOK_ENTRY("vkQueueSubmit2", low_latency::vkQueueSubmit2),
+
+ HOOK_ENTRY("vkQueuePresentKHR", low_latency::vkQueuePresentKHR),
+
+ HOOK_ENTRY("vkAcquireNextImageKHR", low_latency::vkAcquireNextImageKHR),
+ HOOK_ENTRY("vkAcquireNextImage2KHR", low_latency::vkAcquireNextImage2KHR),
+};
+#undef HOOK_ENTRY
VKAPI_ATTR PFN_vkVoidFunction VKAPI_CALL
LowLatency_GetDeviceProcAddr(VkDevice device, const char* const pName) {
+ if (!pName || !device) {
+ return nullptr;
+ }
if (const auto it = device_functions.find(pName);
it != std::end(device_functions)) {
@@ -547,26 +639,20 @@ LowLatency_GetDeviceProcAddr(VkDevice device, const char* const pName) {
return it->second;
}
- const auto lock = std::scoped_lock{low_latency::layer_context.mutex};
-
using namespace low_latency;
- const auto& context = layer_context.get_context<DeviceContext>(device);
- return context.vtable.GetDeviceProcAddr(device, pName);
+ const auto& vtable = layer_context.get_context(device)->vtable;
+ return vtable.GetDeviceProcAddr(device, pName);
}
VKAPI_ATTR PFN_vkVoidFunction VKAPI_CALL
LowLatency_GetInstanceProcAddr(VkInstance instance, const char* const pName) {
+ if (const auto it = instance_functions.find(pName);
+ it != std::end(instance_functions)) {
- for (const auto& functions : {device_functions, instance_functions}) {
-
- if (const auto it = functions.find(pName); it != std::end(functions)) {
- return it->second;
- }
+ return it->second;
}
- const auto lock = std::scoped_lock{low_latency::layer_context.mutex};
-
using namespace low_latency;
- const auto& context = layer_context.get_context<InstanceContext>(instance);
- return context.vtable.GetInstanceProcAddr(instance, pName);
-} \ No newline at end of file
+ const auto& vtable = layer_context.get_context(instance)->vtable;
+ return vtable.GetInstanceProcAddr(instance, pName);
+}