aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorNicolas James <nj3ahxac@gmail.com>2026-03-11 10:32:26 +1100
committerNicolas James <nj3ahxac@gmail.com>2026-03-11 10:32:26 +1100
commit733955de0ae90de26fe98854a1debd6b80ccc27a (patch)
tree6424a26bb8f5434d1ad6952f16ba6a79b3b5991f /src
parentf10074d9897850b9b746ff8d8e0b2dc4af24f3ff (diff)
Add LOW_LATENCY_LAYER_SLEEP_AFTER_PRESENT env as an explicit AL1 toggle
Diffstat (limited to 'src')
-rw-r--r--src/context.hh15
-rw-r--r--src/device_context.cc6
-rw-r--r--src/instance_context.cc5
-rw-r--r--src/instance_context.hh9
-rw-r--r--src/layer.cc55
-rw-r--r--src/layer_context.cc10
-rw-r--r--src/layer_context.hh20
-rw-r--r--src/queue_context.cc18
8 files changed, 88 insertions, 50 deletions
diff --git a/src/context.hh b/src/context.hh
index 5972740..91fbf91 100644
--- a/src/context.hh
+++ b/src/context.hh
@@ -1,13 +1,20 @@
#ifndef CONTEXT_HH_
#define CONTEXT_HH_
-// The purpose of this class is to provide a base class for Context classes.
-
namespace low_latency {
+// A context class doesn't do much by itself. We just use it to provide a
+// virtual destructor so we can store a bunch of shared_ptrs in the same
+// container and rely on RTTI in the layer context. It also deletes the copy and
+// move constructors for derived classes implicitly, and that's pretty much it.
+//
+// We _could_ do something weird and complicated where we define virtual pure
+// hashing and equality functions so we can store them in an unordered_set, but
+// it's just unnecessary complexity and doesn't allow us to perform 'do you exist'
+// lookups without creating an object.
class Context {
-
-public:
+
+ public:
Context();
Context(const Context& context) = delete;
Context(Context&& context) = delete;
diff --git a/src/device_context.cc b/src/device_context.cc
index 49b7808..97103de 100644
--- a/src/device_context.cc
+++ b/src/device_context.cc
@@ -134,9 +134,11 @@ void DeviceContext::notify_antilag_update(const VkAntiLagDataAMD& data) {
return;
}
- if (this->antilag_mode == VK_ANTI_LAG_MODE_ON_AMD) {
- this->sleep_in_input();
+ if (this->antilag_mode != VK_ANTI_LAG_MODE_ON_AMD) {
+ return;
}
+
+ this->sleep_in_input();
}
void DeviceContext::notify_queue_present(const QueueContext& queue) {
diff --git a/src/instance_context.cc b/src/instance_context.cc
index d12766f..5a4d48a 100644
--- a/src/instance_context.cc
+++ b/src/instance_context.cc
@@ -5,9 +5,10 @@
namespace low_latency {
-InstanceContext::InstanceContext(const VkInstance& instance,
+InstanceContext::InstanceContext(const LayerContext& parent_context,
+ const VkInstance& instance,
VkuInstanceDispatchTable&& vtable)
- : instance(instance), vtable(std::move(vtable)) {}
+ : layer(parent_context), instance(instance), vtable(std::move(vtable)) {}
InstanceContext::~InstanceContext() {
// Similar to devices, we should own the only shared ptr at this point so
diff --git a/src/instance_context.hh b/src/instance_context.hh
index 3b71a82..001cde8 100644
--- a/src/instance_context.hh
+++ b/src/instance_context.hh
@@ -10,17 +10,22 @@
namespace low_latency {
+class LayerContext;
class PhysicalDeviceContext;
struct InstanceContext final : public Context {
+ const LayerContext& layer;
+
const VkInstance instance;
const VkuInstanceDispatchTable vtable;
- std::unordered_map<void*, std::shared_ptr<PhysicalDeviceContext>> phys_devices;
+ std::unordered_map<void*, std::shared_ptr<PhysicalDeviceContext>>
+ phys_devices;
public:
- InstanceContext(const VkInstance& instance,
+ InstanceContext(const LayerContext& parent_context,
+ const VkInstance& instance,
VkuInstanceDispatchTable&& vtable);
virtual ~InstanceContext();
};
diff --git a/src/layer.cc b/src/layer.cc
index d2977b7..3600a47 100644
--- a/src/layer.cc
+++ b/src/layer.cc
@@ -28,15 +28,15 @@ LayerContext layer_context;
} // namespace
+// Small templates which allow us to SFINAE find pNext structs.
template <typename T>
static T* find_next(void* const head, const VkStructureType& stype) {
- for (auto i = reinterpret_cast<VkBaseOutStructure*>(head); i;
+ for (auto i = reinterpret_cast<VkBaseOutStructure*>(head)->pNext; i;
i = i->pNext) {
- if (i->sType != stype) {
- continue;
+ if (i->sType == stype) {
+ return reinterpret_cast<T*>(i);
}
- return reinterpret_cast<T*>(i);
}
return nullptr;
}
@@ -44,13 +44,13 @@ static T* find_next(void* const head, const VkStructureType& stype) {
template <typename T>
static const T* find_next(const void* const head,
const VkStructureType& stype) {
- for (auto i = reinterpret_cast<const VkBaseInStructure*>(head); i;
+
+ for (auto i = reinterpret_cast<const VkBaseInStructure*>(head)->pNext; i;
i = i->pNext) {
- if (i->sType != stype) {
- continue;
+ if (i->sType == stype) {
+ return reinterpret_cast<const T*>(i);
}
- return reinterpret_cast<const T*>(i);
}
return nullptr;
}
@@ -59,12 +59,11 @@ template <typename T>
static const T* find_link(const void* const head,
const VkStructureType& stype) {
for (auto info = find_next<T>(head, stype); info;
- info = find_next<T>(info->pNext, stype)) {
+ info = find_next<T>(info, stype)) {
- if (info->function != VK_LAYER_LINK_INFO) {
- continue;
+ if (info->function == VK_LAYER_LINK_INFO) {
+ return reinterpret_cast<const T*>(info);
}
- return reinterpret_cast<const T*>(info);
}
return nullptr;
}
@@ -74,7 +73,7 @@ CreateInstance(const VkInstanceCreateInfo* pCreateInfo,
const VkAllocationCallbacks* pAllocator, VkInstance* pInstance) {
const auto link_info = find_link<VkLayerInstanceCreateInfo>(
- pCreateInfo->pNext, VK_STRUCTURE_TYPE_LOADER_INSTANCE_CREATE_INFO);
+ pCreateInfo, VK_STRUCTURE_TYPE_LOADER_INSTANCE_CREATE_INFO);
if (!link_info || !link_info->u.pLayerInfo) {
return VK_ERROR_INITIALIZATION_FAILED;
@@ -122,7 +121,8 @@ CreateInstance(const VkInstanceCreateInfo* pCreateInfo,
assert(!layer_context.contexts.contains(key));
layer_context.contexts.try_emplace(
- key, std::make_shared<InstanceContext>(*pInstance, std::move(vtable)));
+ key, std::make_shared<InstanceContext>(layer_context, *pInstance,
+ std::move(vtable)));
return VK_SUCCESS;
}
@@ -182,16 +182,18 @@ static VKAPI_ATTR VkResult VKAPI_CALL CreateDevice(
VkPhysicalDevice physical_device, const VkDeviceCreateInfo* pCreateInfo,
const VkAllocationCallbacks* pAllocator, VkDevice* pDevice) {
+ const auto enabled_extensions =
+ std::span{pCreateInfo->ppEnabledExtensionNames,
+ pCreateInfo->enabledExtensionCount};
+
// Hook logic after create device looks like this.
// !PHYS_SUPPORT && AL2_REQUESTED -> return INITIALIZATION_FAILED here.
// !PHYS_SUPPORT && !AL2_REQUESTED -> hooks are no-ops
// PHYS_SUPPORT -> hooks inject timestamps regardless
// because AL1 might be used and it
// costs virtually nothing to do.
- const auto was_antilag_requested = std::ranges::any_of(
- std::span{pCreateInfo->ppEnabledExtensionNames,
- pCreateInfo->enabledExtensionCount},
- [](const auto& ext) {
+ const auto was_antilag_requested =
+ std::ranges::any_of(enabled_extensions, [](const auto& ext) {
return std::string_view{ext} == VK_AMD_ANTI_LAG_EXTENSION_NAME;
});
@@ -201,7 +203,7 @@ static VKAPI_ATTR VkResult VKAPI_CALL CreateDevice(
}
const auto create_info = find_link<VkLayerDeviceCreateInfo>(
- pCreateInfo->pNext, VK_STRUCTURE_TYPE_LOADER_DEVICE_CREATE_INFO);
+ pCreateInfo, VK_STRUCTURE_TYPE_LOADER_DEVICE_CREATE_INFO);
if (!create_info || !create_info->u.pLayerInfo) {
return VK_ERROR_INITIALIZATION_FAILED;
}
@@ -216,9 +218,7 @@ static VKAPI_ATTR VkResult VKAPI_CALL CreateDevice(
// Build a next extensions vector from what they have requested.
const auto next_extensions = [&]() -> std::vector<const char*> {
- auto next_extensions = std::span{pCreateInfo->ppEnabledExtensionNames,
- pCreateInfo->enabledExtensionCount} |
- std::ranges::to<std::vector>();
+ auto next_extensions = std::vector(std::from_range, enabled_extensions);
// Don't append anything extra if we don't support what we need.
if (!context->supports_required_extensions) {
@@ -251,6 +251,7 @@ static VKAPI_ATTR VkResult VKAPI_CALL CreateDevice(
if (const auto result = context->instance.vtable.CreateDevice(
physical_device, &next_create_info, pAllocator, pDevice);
result != VK_SUCCESS) {
+
return result;
}
@@ -327,6 +328,9 @@ GetDeviceQueue(VkDevice device, std::uint32_t queue_family_index,
const auto context = layer_context.get_context(device);
+ // Get device queue, unlike CreateDevice or CreateInstance, can be
+ // called multiple times to return the same queue object. Our insertion
+ // handling has to be a little different where we account for this.
context->vtable.GetDeviceQueue(device, queue_family_index, queue_index,
queue);
if (!queue || !*queue) {
@@ -344,8 +348,7 @@ GetDeviceQueue(VkDevice device, std::uint32_t queue_family_index,
queue_family_index);
}
- // it->second should be QueueContext, also it might already be there
- // but this is expected.
+ // it->second should be QueueContext, also it might already be there.
const auto ptr = std::dynamic_pointer_cast<QueueContext>(it->second);
assert(ptr);
context->queues.emplace(*queue, ptr);
@@ -617,8 +620,7 @@ static VKAPI_ATTR void VKAPI_CALL GetPhysicalDeviceFeatures2(
vtable.GetPhysicalDeviceFeatures2(physical_device, pFeatures);
const auto feature = find_next<VkPhysicalDeviceAntiLagFeaturesAMD>(
- pFeatures->pNext,
- VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ANTI_LAG_FEATURES_AMD);
+ pFeatures, VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ANTI_LAG_FEATURES_AMD);
if (feature) {
feature->antiLag = context->supports_required_extensions;
@@ -633,6 +635,7 @@ static VKAPI_ATTR void VKAPI_CALL GetPhysicalDeviceFeatures2KHR(
static VKAPI_ATTR void VKAPI_CALL
AntiLagUpdateAMD(VkDevice device, const VkAntiLagDataAMD* pData) {
const auto context = layer_context.get_context(device);
+ assert(pData);
context->notify_antilag_update(*pData);
}
diff --git a/src/layer_context.cc b/src/layer_context.cc
index ceb0030..28a94b5 100644
--- a/src/layer_context.cc
+++ b/src/layer_context.cc
@@ -1,8 +1,16 @@
#include "layer_context.hh"
+#include <cstdlib> // for env var
+#include <string_view>
+
namespace low_latency {
-LayerContext::LayerContext() {}
+LayerContext::LayerContext() {
+ this->is_antilag_1_enabled = []() -> auto {
+ const auto env = std::getenv(LayerContext::SLEEP_AFTER_PRESENT_ENV);
+ return env && std::string_view{env} == "1";
+ }();
+}
LayerContext::~LayerContext() {}
diff --git a/src/layer_context.hh b/src/layer_context.hh
index 44857d4..c98768b 100644
--- a/src/layer_context.hh
+++ b/src/layer_context.hh
@@ -13,17 +13,15 @@
// The purpose of this file is to provide a definition for the highest level
// entry point struct of our vulkan state.
-//
-// All Context structs have deleted copy/move constructors. This is because we
-// want to be extremely explicit with how/when we delete things, and this allows
-// us to use destructors for cleanup without much worry about weird copies
-// floating around. Most contexts will probably live inside std::unique_ptr's as
-// a result so they can be used in standard containers.
namespace low_latency {
// All these templates do is make it so we can go from some DispatchableType
-// to their respective context's with nice syntax.
+// to their respective context's with nice syntax. This lets us write something
+// like this for all DispatchableTypes:
+//
+// const auto device_context = get_context(some_vk_device);
+// ^ It was automatically deduced as DeviceContext, wow!
template <typename T>
concept DispatchableType =
@@ -49,10 +47,18 @@ template <DispatchableType D>
using dispatch_context_t = typename context_for_t<D>::context;
struct LayerContext final : public Context {
+ private:
+ // If this is not null and set to exactly "1", then we should sleep after
+ // present.
+ static constexpr auto SLEEP_AFTER_PRESENT_ENV =
+ "LOW_LATENCY_LAYER_SLEEP_AFTER_PRESENT";
+
public:
std::mutex mutex;
std::unordered_map<void*, std::shared_ptr<Context>> contexts;
+ bool is_antilag_1_enabled = false;
+
public:
LayerContext();
virtual ~LayerContext();
diff --git a/src/queue_context.cc b/src/queue_context.cc
index 2096df3..9fe25b3 100644
--- a/src/queue_context.cc
+++ b/src/queue_context.cc
@@ -1,5 +1,6 @@
#include "queue_context.hh"
#include "device_context.hh"
+#include "layer_context.hh"
#include "timestamp_pool.hh"
#include <algorithm>
@@ -158,9 +159,14 @@ void QueueContext::notify_present(const VkPresentInfoKHR& info) {
// any particular queue.
this->device_context.notify_queue_present(*this);
- // If antilag is on, the sleep will occur in notify_antilag_update at the
- // device context.
- if (this->device_context.antilag_mode != VK_ANTI_LAG_MODE_ON_AMD) {
+ // We should only sleep in present if two conditions are met:
+ // 1. Our antilag_mode isn't set to on, because otherwise the sleep will
+ // be done in input and with far better results.
+ // 2. The 'is_antilag_1_enabled' flag, which exists at the layer's
+ // context, is set.
+ if (this->device_context.antilag_mode != VK_ANTI_LAG_MODE_ON_AMD &&
+ this->device_context.instance.layer.is_antilag_1_enabled) {
+
this->sleep_in_present();
}
}
@@ -268,11 +274,11 @@ void QueueContext::drain_frames_to_timings() {
const auto cpu_start = [&]() -> auto {
if (const auto it = std::rbegin(this->timings);
it != std::rend(this->timings)) {
+
return (*it)->frame.cpu_post_present_time;
}
- // This will happen *once*, and only for the first frame. We don't
- // have a way of knowing when the CPU first started work obviously
- // in this case because we're a vulkan layer and not omniscient.
+ // This will happen once, only for the first frame. We don't
+ // have a way of knowing when the CPU first started work here.
// Just return our first submit's start for this edge case.
return frame.submissions.front()->start_handle->get_time_required();
}();