diff options
| -rw-r--r-- | src/context.hh | 15 | ||||
| -rw-r--r-- | src/device_context.cc | 6 | ||||
| -rw-r--r-- | src/instance_context.cc | 5 | ||||
| -rw-r--r-- | src/instance_context.hh | 9 | ||||
| -rw-r--r-- | src/layer.cc | 55 | ||||
| -rw-r--r-- | src/layer_context.cc | 10 | ||||
| -rw-r--r-- | src/layer_context.hh | 20 | ||||
| -rw-r--r-- | src/queue_context.cc | 18 |
8 files changed, 88 insertions, 50 deletions
diff --git a/src/context.hh b/src/context.hh index 5972740..91fbf91 100644 --- a/src/context.hh +++ b/src/context.hh @@ -1,13 +1,20 @@ #ifndef CONTEXT_HH_ #define CONTEXT_HH_ -// The purpose of this class is to provide a base class for Context classes. - namespace low_latency { +// A context class doesn't do much by itself. We just use it to provide a +// virtual destructor so we can store a bunch of shared_ptrs in the same +// container and rely on RTTI in the layer context. It also deletes the copy and +// move constructors for derived classes implicitly, and that's pretty much it. +// +// We _could_ do something weird and complicated where we define virtual pure +// hashing and equality functions so we can store them in an unordered_set, but +// it's just unnecessary complexity and doesn't allow us to perform 'do you exist' +// lookups without creating an object. class Context { - -public: + + public: Context(); Context(const Context& context) = delete; Context(Context&& context) = delete; diff --git a/src/device_context.cc b/src/device_context.cc index 49b7808..97103de 100644 --- a/src/device_context.cc +++ b/src/device_context.cc @@ -134,9 +134,11 @@ void DeviceContext::notify_antilag_update(const VkAntiLagDataAMD& data) { return; } - if (this->antilag_mode == VK_ANTI_LAG_MODE_ON_AMD) { - this->sleep_in_input(); + if (this->antilag_mode != VK_ANTI_LAG_MODE_ON_AMD) { + return; } + + this->sleep_in_input(); } void DeviceContext::notify_queue_present(const QueueContext& queue) { diff --git a/src/instance_context.cc b/src/instance_context.cc index d12766f..5a4d48a 100644 --- a/src/instance_context.cc +++ b/src/instance_context.cc @@ -5,9 +5,10 @@ namespace low_latency { -InstanceContext::InstanceContext(const VkInstance& instance, +InstanceContext::InstanceContext(const LayerContext& parent_context, + const VkInstance& instance, VkuInstanceDispatchTable&& vtable) - : instance(instance), vtable(std::move(vtable)) {} + : layer(parent_context), instance(instance), vtable(std::move(vtable)) {} InstanceContext::~InstanceContext() { // Similar to devices, we should own the only shared ptr at this point so diff --git a/src/instance_context.hh b/src/instance_context.hh index 3b71a82..001cde8 100644 --- a/src/instance_context.hh +++ b/src/instance_context.hh @@ -10,17 +10,22 @@ namespace low_latency { +class LayerContext; class PhysicalDeviceContext; struct InstanceContext final : public Context { + const LayerContext& layer; + const VkInstance instance; const VkuInstanceDispatchTable vtable; - std::unordered_map<void*, std::shared_ptr<PhysicalDeviceContext>> phys_devices; + std::unordered_map<void*, std::shared_ptr<PhysicalDeviceContext>> + phys_devices; public: - InstanceContext(const VkInstance& instance, + InstanceContext(const LayerContext& parent_context, + const VkInstance& instance, VkuInstanceDispatchTable&& vtable); virtual ~InstanceContext(); }; diff --git a/src/layer.cc b/src/layer.cc index d2977b7..3600a47 100644 --- a/src/layer.cc +++ b/src/layer.cc @@ -28,15 +28,15 @@ LayerContext layer_context; } // namespace +// Small templates which allow us to SFINAE find pNext structs. template <typename T> static T* find_next(void* const head, const VkStructureType& stype) { - for (auto i = reinterpret_cast<VkBaseOutStructure*>(head); i; + for (auto i = reinterpret_cast<VkBaseOutStructure*>(head)->pNext; i; i = i->pNext) { - if (i->sType != stype) { - continue; + if (i->sType == stype) { + return reinterpret_cast<T*>(i); } - return reinterpret_cast<T*>(i); } return nullptr; } @@ -44,13 +44,13 @@ static T* find_next(void* const head, const VkStructureType& stype) { template <typename T> static const T* find_next(const void* const head, const VkStructureType& stype) { - for (auto i = reinterpret_cast<const VkBaseInStructure*>(head); i; + + for (auto i = reinterpret_cast<const VkBaseInStructure*>(head)->pNext; i; i = i->pNext) { - if (i->sType != stype) { - continue; + if (i->sType == stype) { + return reinterpret_cast<const T*>(i); } - return reinterpret_cast<const T*>(i); } return nullptr; } @@ -59,12 +59,11 @@ template <typename T> static const T* find_link(const void* const head, const VkStructureType& stype) { for (auto info = find_next<T>(head, stype); info; - info = find_next<T>(info->pNext, stype)) { + info = find_next<T>(info, stype)) { - if (info->function != VK_LAYER_LINK_INFO) { - continue; + if (info->function == VK_LAYER_LINK_INFO) { + return reinterpret_cast<const T*>(info); } - return reinterpret_cast<const T*>(info); } return nullptr; } @@ -74,7 +73,7 @@ CreateInstance(const VkInstanceCreateInfo* pCreateInfo, const VkAllocationCallbacks* pAllocator, VkInstance* pInstance) { const auto link_info = find_link<VkLayerInstanceCreateInfo>( - pCreateInfo->pNext, VK_STRUCTURE_TYPE_LOADER_INSTANCE_CREATE_INFO); + pCreateInfo, VK_STRUCTURE_TYPE_LOADER_INSTANCE_CREATE_INFO); if (!link_info || !link_info->u.pLayerInfo) { return VK_ERROR_INITIALIZATION_FAILED; @@ -122,7 +121,8 @@ CreateInstance(const VkInstanceCreateInfo* pCreateInfo, assert(!layer_context.contexts.contains(key)); layer_context.contexts.try_emplace( - key, std::make_shared<InstanceContext>(*pInstance, std::move(vtable))); + key, std::make_shared<InstanceContext>(layer_context, *pInstance, + std::move(vtable))); return VK_SUCCESS; } @@ -182,16 +182,18 @@ static VKAPI_ATTR VkResult VKAPI_CALL CreateDevice( VkPhysicalDevice physical_device, const VkDeviceCreateInfo* pCreateInfo, const VkAllocationCallbacks* pAllocator, VkDevice* pDevice) { + const auto enabled_extensions = + std::span{pCreateInfo->ppEnabledExtensionNames, + pCreateInfo->enabledExtensionCount}; + // Hook logic after create device looks like this. // !PHYS_SUPPORT && AL2_REQUESTED -> return INITIALIZATION_FAILED here. // !PHYS_SUPPORT && !AL2_REQUESTED -> hooks are no-ops // PHYS_SUPPORT -> hooks inject timestamps regardless // because AL1 might be used and it // costs virtually nothing to do. - const auto was_antilag_requested = std::ranges::any_of( - std::span{pCreateInfo->ppEnabledExtensionNames, - pCreateInfo->enabledExtensionCount}, - [](const auto& ext) { + const auto was_antilag_requested = + std::ranges::any_of(enabled_extensions, [](const auto& ext) { return std::string_view{ext} == VK_AMD_ANTI_LAG_EXTENSION_NAME; }); @@ -201,7 +203,7 @@ static VKAPI_ATTR VkResult VKAPI_CALL CreateDevice( } const auto create_info = find_link<VkLayerDeviceCreateInfo>( - pCreateInfo->pNext, VK_STRUCTURE_TYPE_LOADER_DEVICE_CREATE_INFO); + pCreateInfo, VK_STRUCTURE_TYPE_LOADER_DEVICE_CREATE_INFO); if (!create_info || !create_info->u.pLayerInfo) { return VK_ERROR_INITIALIZATION_FAILED; } @@ -216,9 +218,7 @@ static VKAPI_ATTR VkResult VKAPI_CALL CreateDevice( // Build a next extensions vector from what they have requested. const auto next_extensions = [&]() -> std::vector<const char*> { - auto next_extensions = std::span{pCreateInfo->ppEnabledExtensionNames, - pCreateInfo->enabledExtensionCount} | - std::ranges::to<std::vector>(); + auto next_extensions = std::vector(std::from_range, enabled_extensions); // Don't append anything extra if we don't support what we need. if (!context->supports_required_extensions) { @@ -251,6 +251,7 @@ static VKAPI_ATTR VkResult VKAPI_CALL CreateDevice( if (const auto result = context->instance.vtable.CreateDevice( physical_device, &next_create_info, pAllocator, pDevice); result != VK_SUCCESS) { + return result; } @@ -327,6 +328,9 @@ GetDeviceQueue(VkDevice device, std::uint32_t queue_family_index, const auto context = layer_context.get_context(device); + // Get device queue, unlike CreateDevice or CreateInstance, can be + // called multiple times to return the same queue object. Our insertion + // handling has to be a little different where we account for this. context->vtable.GetDeviceQueue(device, queue_family_index, queue_index, queue); if (!queue || !*queue) { @@ -344,8 +348,7 @@ GetDeviceQueue(VkDevice device, std::uint32_t queue_family_index, queue_family_index); } - // it->second should be QueueContext, also it might already be there - // but this is expected. + // it->second should be QueueContext, also it might already be there. const auto ptr = std::dynamic_pointer_cast<QueueContext>(it->second); assert(ptr); context->queues.emplace(*queue, ptr); @@ -617,8 +620,7 @@ static VKAPI_ATTR void VKAPI_CALL GetPhysicalDeviceFeatures2( vtable.GetPhysicalDeviceFeatures2(physical_device, pFeatures); const auto feature = find_next<VkPhysicalDeviceAntiLagFeaturesAMD>( - pFeatures->pNext, - VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ANTI_LAG_FEATURES_AMD); + pFeatures, VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ANTI_LAG_FEATURES_AMD); if (feature) { feature->antiLag = context->supports_required_extensions; @@ -633,6 +635,7 @@ static VKAPI_ATTR void VKAPI_CALL GetPhysicalDeviceFeatures2KHR( static VKAPI_ATTR void VKAPI_CALL AntiLagUpdateAMD(VkDevice device, const VkAntiLagDataAMD* pData) { const auto context = layer_context.get_context(device); + assert(pData); context->notify_antilag_update(*pData); } diff --git a/src/layer_context.cc b/src/layer_context.cc index ceb0030..28a94b5 100644 --- a/src/layer_context.cc +++ b/src/layer_context.cc @@ -1,8 +1,16 @@ #include "layer_context.hh" +#include <cstdlib> // for env var +#include <string_view> + namespace low_latency { -LayerContext::LayerContext() {} +LayerContext::LayerContext() { + this->is_antilag_1_enabled = []() -> auto { + const auto env = std::getenv(LayerContext::SLEEP_AFTER_PRESENT_ENV); + return env && std::string_view{env} == "1"; + }(); +} LayerContext::~LayerContext() {} diff --git a/src/layer_context.hh b/src/layer_context.hh index 44857d4..c98768b 100644 --- a/src/layer_context.hh +++ b/src/layer_context.hh @@ -13,17 +13,15 @@ // The purpose of this file is to provide a definition for the highest level // entry point struct of our vulkan state. -// -// All Context structs have deleted copy/move constructors. This is because we -// want to be extremely explicit with how/when we delete things, and this allows -// us to use destructors for cleanup without much worry about weird copies -// floating around. Most contexts will probably live inside std::unique_ptr's as -// a result so they can be used in standard containers. namespace low_latency { // All these templates do is make it so we can go from some DispatchableType -// to their respective context's with nice syntax. +// to their respective context's with nice syntax. This lets us write something +// like this for all DispatchableTypes: +// +// const auto device_context = get_context(some_vk_device); +// ^ It was automatically deduced as DeviceContext, wow! template <typename T> concept DispatchableType = @@ -49,10 +47,18 @@ template <DispatchableType D> using dispatch_context_t = typename context_for_t<D>::context; struct LayerContext final : public Context { + private: + // If this is not null and set to exactly "1", then we should sleep after + // present. + static constexpr auto SLEEP_AFTER_PRESENT_ENV = + "LOW_LATENCY_LAYER_SLEEP_AFTER_PRESENT"; + public: std::mutex mutex; std::unordered_map<void*, std::shared_ptr<Context>> contexts; + bool is_antilag_1_enabled = false; + public: LayerContext(); virtual ~LayerContext(); diff --git a/src/queue_context.cc b/src/queue_context.cc index 2096df3..9fe25b3 100644 --- a/src/queue_context.cc +++ b/src/queue_context.cc @@ -1,5 +1,6 @@ #include "queue_context.hh" #include "device_context.hh" +#include "layer_context.hh" #include "timestamp_pool.hh" #include <algorithm> @@ -158,9 +159,14 @@ void QueueContext::notify_present(const VkPresentInfoKHR& info) { // any particular queue. this->device_context.notify_queue_present(*this); - // If antilag is on, the sleep will occur in notify_antilag_update at the - // device context. - if (this->device_context.antilag_mode != VK_ANTI_LAG_MODE_ON_AMD) { + // We should only sleep in present if two conditions are met: + // 1. Our antilag_mode isn't set to on, because otherwise the sleep will + // be done in input and with far better results. + // 2. The 'is_antilag_1_enabled' flag, which exists at the layer's + // context, is set. + if (this->device_context.antilag_mode != VK_ANTI_LAG_MODE_ON_AMD && + this->device_context.instance.layer.is_antilag_1_enabled) { + this->sleep_in_present(); } } @@ -268,11 +274,11 @@ void QueueContext::drain_frames_to_timings() { const auto cpu_start = [&]() -> auto { if (const auto it = std::rbegin(this->timings); it != std::rend(this->timings)) { + return (*it)->frame.cpu_post_present_time; } - // This will happen *once*, and only for the first frame. We don't - // have a way of knowing when the CPU first started work obviously - // in this case because we're a vulkan layer and not omniscient. + // This will happen once, only for the first frame. We don't + // have a way of knowing when the CPU first started work here. // Just return our first submit's start for this edge case. return frame.submissions.front()->start_handle->get_time_required(); }(); |
