aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/device_context.cc39
-rw-r--r--src/device_context.hh24
-rw-r--r--src/layer.cc116
-rw-r--r--src/layer_context.hh6
-rw-r--r--src/queue_context.cc9
-rw-r--r--src/queue_context.hh5
6 files changed, 126 insertions, 73 deletions
diff --git a/src/device_context.cc b/src/device_context.cc
index cea0540..58737e2 100644
--- a/src/device_context.cc
+++ b/src/device_context.cc
@@ -1,5 +1,4 @@
#include "device_context.hh"
-#include "queue_context.hh"
#include <time.h>
#include <utility>
@@ -23,7 +22,6 @@ DeviceContext::DeviceContext(InstanceContext& parent_instance,
}
DeviceContext::~DeviceContext() {
- this->present_queue.reset();
// We will let the destructor handle clearing here, but they should be
// unique by now (ie, removed from the layer's context map).
for (const auto& [queue, queue_context] : this->queues) {
@@ -94,6 +92,9 @@ DeviceContext::Clock::ticks_to_time(const std::uint64_t& ticks) const {
}
void DeviceContext::sleep_in_input() {
+ // TODO
+
+ /*
// Present hasn't happened yet, we don't know what queue to attack.
if (!this->present_queue) {
return;
@@ -121,32 +122,30 @@ void DeviceContext::sleep_in_input() {
// would get huge frame drops, loss of throughput, and the GPU would even
// clock down. So naturally I am concerned about this approach, but it seems
// to perform well so far in my own testing and is just beautifully elegant.
+ */
}
-void DeviceContext::notify_antilag_update(const VkAntiLagDataAMD& data) {
- this->antilag_mode = data.mode;
- this->antilag_fps = data.maxFPS; // TODO
+void DeviceContext::update_swapchain_infos(
+ const std::optional<VkSwapchainKHR> target,
+ const std::chrono::milliseconds& present_delay,
+ const bool was_low_latency_requested) {
- // This might not be provided (probably just to set some settings?).
- if (!data.pPresentationInfo) {
- return;
- }
+ const auto write = SwapchainInfo{
+ .present_delay = present_delay,
+ .was_low_latency_requested = was_low_latency_requested,
+ };
- // Only care about the input stage for now.
- if (data.pPresentationInfo->stage != VK_ANTI_LAG_STAGE_INPUT_AMD) {
+ if (target.has_value()) {
+ const auto iter = this->swapchain_infos.find(*target);
+ assert(iter != std::end(this->swapchain_infos)); // Must exist (spec).
+ iter->second = write;
return;
}
- if (this->antilag_mode != VK_ANTI_LAG_MODE_ON_AMD) {
- return;
+ // If we don't have a target (AMD's anti_lag), just write it to everything.
+ for (auto& iter : this->swapchain_infos) {
+ iter.second = write;
}
-
- this->sleep_in_input();
-}
-
-void DeviceContext::notify_queue_present(const QueueContext& queue) {
- assert(this->queues.contains(queue.queue));
- this->present_queue = this->queues[queue.queue];
}
} // namespace low_latency \ No newline at end of file
diff --git a/src/device_context.hh b/src/device_context.hh
index c76f376..6b5f000 100644
--- a/src/device_context.hh
+++ b/src/device_context.hh
@@ -28,6 +28,13 @@ struct DeviceContext final : public Context {
const VkDevice device;
const VkuDeviceDispatchTable vtable;
+ // Tiny struct to represent any swapchain's low latency state.
+ struct SwapchainInfo {
+ std::chrono::milliseconds present_delay = std::chrono::milliseconds{0};
+ bool was_low_latency_requested = false;
+ };
+ std::unordered_map<VkSwapchainKHR, SwapchainInfo> swapchain_infos{};
+
std::unordered_map<VkQueue, std::shared_ptr<QueueContext>> queues;
struct Clock {
@@ -58,15 +65,6 @@ struct DeviceContext final : public Context {
};
std::unique_ptr<Clock> clock;
- std::uint32_t antilag_fps = 0; // TODO
- VkAntiLagModeAMD antilag_mode = VK_ANTI_LAG_MODE_DRIVER_CONTROL_AMD;
-
- // The queue used in the last present.
- std::shared_ptr<QueueContext> present_queue;
-
- private:
- void sleep_in_input();
-
public:
DeviceContext(InstanceContext& parent_instance,
PhysicalDeviceContext& parent_physical,
@@ -75,9 +73,13 @@ struct DeviceContext final : public Context {
virtual ~DeviceContext();
public:
- void notify_antilag_update(const VkAntiLagDataAMD& data);
+ void sleep_in_input();
- void notify_queue_present(const QueueContext& queue);
+ // Updates the settings associated with that swapchain. If none is provided
+ // all swapchains are set to this value.
+ void update_swapchain_infos(const std::optional<VkSwapchainKHR> target,
+ const std::chrono::milliseconds& present_delay,
+ const bool was_low_latency_requested);
};
}; // namespace low_latency
diff --git a/src/layer.cc b/src/layer.cc
index 77ce296..438f331 100644
--- a/src/layer.cc
+++ b/src/layer.cc
@@ -295,6 +295,8 @@ static VKAPI_ATTR VkResult VKAPI_CALL CreateDevice(
DEVICE_VTABLE_LOAD(GetCalibratedTimestampsKHR);
DEVICE_VTABLE_LOAD(ResetQueryPoolEXT);
DEVICE_VTABLE_LOAD(SignalSemaphore);
+ DEVICE_VTABLE_LOAD(CreateSwapchainKHR);
+ DEVICE_VTABLE_LOAD(DestroySwapchainKHR);
#undef DEVICE_VTABLE_LOAD
const auto key = layer_context.get_key(*pDevice);
@@ -735,17 +737,57 @@ static VKAPI_ATTR void VKAPI_CALL GetPhysicalDeviceSurfaceCapabilities2KHR(
lsc->presentModeCount = num_to_write;
}
+static VKAPI_ATTR VkResult VKAPI_CALL CreateSwapchainKHR(
+ VkDevice device, const VkSwapchainCreateInfoKHR* pCreateInfo,
+ const VkAllocationCallbacks* pAllocator, VkSwapchainKHR* pSwapchain) {
+
+ const auto context = layer_context.get_context(device);
+
+ if (const auto result = context->vtable.CreateSwapchainKHR(
+ device, pCreateInfo, pAllocator, pSwapchain);
+ result != VK_SUCCESS) {
+
+ return result;
+ }
+
+ assert(context->swapchain_infos.try_emplace(*pSwapchain).second);
+
+ return VK_SUCCESS;
+}
+
+static VKAPI_ATTR void VKAPI_CALL
+DestroySwapchainKHR(VkDevice device, VkSwapchainKHR swapchain,
+ const VkAllocationCallbacks* pAllocator) {
+ const auto context = layer_context.get_context(device);
+
+ assert(context->swapchain_infos.erase(swapchain));
+
+ context->vtable.DestroySwapchainKHR(device, swapchain, pAllocator);
+}
+
static VKAPI_ATTR void VKAPI_CALL
AntiLagUpdateAMD(VkDevice device, const VkAntiLagDataAMD* pData) {
const auto context = layer_context.get_context(device);
assert(pData);
- context->notify_antilag_update(*pData);
-}
-// Stubs for nvidia low latency 2.
-void GetLatencyTimingsNV(VkDevice device, VkSwapchainKHR swapchain,
- VkGetLatencyMarkerInfoNV* pLatencyMarkerInfo) {
- // STUB
+ // AL2 is a synchronous while NVIDIA's low_latencty2 is asynchronous.
+ // It's difficult to model an asynchronous impl inside a synchronous impl,
+ // but it's easy to do the inverse. As a result, we should implement
+ // NVIDIA's method and then have a working AL2 implementation follow using
+ // that existing code path.
+
+ using namespace std::chrono;
+ const auto present_delay = duration_cast<milliseconds>(1s / pData->maxFPS);
+ context->update_swapchain_infos(std::nullopt, present_delay,
+ (pData->mode == VK_ANTI_LAG_MODE_ON_AMD));
+
+ if (!pData->pPresentationInfo) {
+ return;
+ }
+
+ if (pData->pPresentationInfo->stage == VK_ANTI_LAG_STAGE_INPUT_AMD) {
+ context->sleep_in_input();
+ }
}
VkResult LatencySleepNV(VkDevice device, VkSwapchainKHR swapchain,
@@ -754,50 +796,54 @@ VkResult LatencySleepNV(VkDevice device, VkSwapchainKHR swapchain,
const auto context = layer_context.get_context(device);
assert(pSleepInfo);
- // Keep going.
- if (pSleepInfo->signalSemaphore) {
-
- // This is a hack obviously. I will have to associate queue submits with
- // a semaphore and signal it correctly later. I'm not sure about the
- // implications regarding multithreading, will have to think a bit about how to do this cleanly
- // with our current anti lag.
- static std::uint32_t counter = 1024;
-
- const auto ssi = VkSemaphoreSignalInfo{
- .sType = VK_STRUCTURE_TYPE_SEMAPHORE_SUBMIT_INFO,
- .semaphore = pSleepInfo->signalSemaphore,
- .value = counter,
- };
+ // TODO sleep here
- // So we don't wait and this becomes a no-op instead of a freeze!
- context->vtable.SignalSemaphore(device, &ssi);
-
- ++counter;
- }
- // STUB
return VK_SUCCESS;
}
void QueueNotifyOutOfBandNV(VkQueue queue,
const VkOutOfBandQueueTypeInfoNV* pQueueTypeInfo) {
- // STUB
-}
+ // This is really thoughtful from NVIDIA. Having the application explicitly
+ // state which queues should be ignored for latency evaluation is far
+ // superior to AMD's guessing game.
+ // Kind of interesting how you can't turn it back on once it's turned off.
+ // Also I really have no idea why pQueueTypeInfo's VkOutOfBandQueueTypeNV
+ // enum even exists (I guess we will find out later when nothing works).
+ const auto context = layer_context.get_context(queue);
-void SetLatencyMarkerNV(VkDevice device, VkSwapchainKHR swapchain,
- const VkSetLatencyMarkerInfoNV* pLatencyMarkerInfo) {
- // STUB
+ context->should_ignore_latency = true;
}
VkResult SetLatencySleepModeNV(VkDevice device, VkSwapchainKHR swapchain,
const VkLatencySleepModeInfoNV* pSleepModeInfo) {
-
const auto context = layer_context.get_context(device);
- assert(pSleepModeInfo);
- // STUB
+ using namespace std::chrono;
+ if (pSleepModeInfo) {
+ context->update_swapchain_infos(
+ swapchain, milliseconds{pSleepModeInfo->minimumIntervalUs},
+ pSleepModeInfo->lowLatencyMode);
+ } else {
+ // If pSleepModeInfo is nullptr, it means no delay and no low latency.
+ context->update_swapchain_infos(swapchain, milliseconds{0}, false);
+ }
return VK_SUCCESS;
}
+void SetLatencyMarkerNV(VkDevice device, VkSwapchainKHR swapchain,
+ const VkSetLatencyMarkerInfoNV* pLatencyMarkerInfo) {
+ // STUB
+ // We will probably end up making use of this in the future, but afaict it's
+ // not relevant for this layer's operation just yet. This function is
+ // NVIDIA's way of giving developers insight into their render pipeline.
+}
+
+void GetLatencyTimingsNV(VkDevice device, VkSwapchainKHR swapchain,
+ VkGetLatencyMarkerInfoNV* pLatencyMarkerInfo) {
+ // STUB
+ // Just like SetLatencyMarkerNV this isn't relevant for us just yet.
+}
+
} // namespace low_latency
// This is a bit of template hackery which generates a wrapper function for each
@@ -885,6 +931,8 @@ static const auto device_functions = func_map_t{
HOOK_ENTRY("vkSetLatencyMarkerNV", low_latency::SetLatencyMarkerNV),
HOOK_ENTRY("vkSetLatencySleepModeNV", low_latency::SetLatencySleepModeNV),
+ HOOK_ENTRY("vkCreateSwapchainKHR", low_latency::CreateSwapchainKHR),
+ HOOK_ENTRY("vkDestroySwapchainKHR", low_latency::DestroySwapchainKHR),
};
#undef HOOK_ENTRY
diff --git a/src/layer_context.hh b/src/layer_context.hh
index da13dc6..95f1cd5 100644
--- a/src/layer_context.hh
+++ b/src/layer_context.hh
@@ -48,15 +48,15 @@ using dispatch_context_t = typename context_for_t<D>::context;
class LayerContext final : public Context {
private:
- // If this is not null and set to exactly "1", then we should sleep after
+ // If this is not null and set to exactly 1, then we should sleep after
// present.
static constexpr auto SLEEP_AFTER_PRESENT_ENV =
"LOW_LATENCY_LAYER_SLEEP_AFTER_PRESENT";
- // If this is not null and set to exactly "1", then VK_NV_LOW_LATENCY2
+ // If this is not null and set to exactly 1, then VK_NV_low_latency2
// should be provided instead of VK_AMD_anti_lag.
static constexpr auto SPOOF_NVIDIA_ENV =
- "LOW_LATENCY_LAYER_SPOOF_NV_LOWLATENCY2";
+ "LOW_LATENCY_LAYER_SPOOF_NVIDIA";
public:
std::mutex mutex;
diff --git a/src/queue_context.cc b/src/queue_context.cc
index 29dcbfb..d12f03d 100644
--- a/src/queue_context.cc
+++ b/src/queue_context.cc
@@ -157,21 +157,20 @@ void QueueContext::notify_present(const VkPresentInfoKHR& info) {
this->drain_submissions_to_frame();
this->drain_frames_to_timings();
- // Call up to notify the device now that we're done with this frame.
- // We have to do this because antilag 2 data is sent to the device, not
- // any particular queue.
- this->device_context.notify_queue_present(*this);
-
// We should only sleep in present if two conditions are met:
// 1. Our antilag_mode isn't set to on, because otherwise the sleep will
// be done in input and with far better results.
// 2. The 'is_antilag_1_enabled' flag, which exists at the layer's
// context, is set.
+ //
+ /*
+ * WIP REFLEX
if (this->device_context.antilag_mode != VK_ANTI_LAG_MODE_ON_AMD &&
this->device_context.instance.layer.is_antilag_1_enabled) {
this->sleep_in_present();
}
+ */
}
const auto debug_log_time2 = [](auto& stream, const auto& diff) {
diff --git a/src/queue_context.hh b/src/queue_context.hh
index 701fc0d..221626f 100644
--- a/src/queue_context.hh
+++ b/src/queue_context.hh
@@ -57,6 +57,11 @@ class QueueContext final : public Context {
std::unique_ptr<TimestampPool> timestamp_pool;
+ // NVIDIA's extension lets the application explicitly state that this queue
+ // does not contribute to the frame. AMD's extension has no such mechanism -
+ // so this will always be false.
+ bool should_ignore_latency = false;
+
public:
// Potentially in flight queue submissions that come from this queue.
struct Submission {