aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--src/device_context.cc67
-rw-r--r--src/device_context.hh16
-rw-r--r--src/layer.cc23
-rw-r--r--src/queue_context.cc42
-rw-r--r--src/queue_context.hh17
-rw-r--r--src/timestamp_pool.cc6
6 files changed, 131 insertions, 40 deletions
diff --git a/src/device_context.cc b/src/device_context.cc
index a8e0347..2214b71 100644
--- a/src/device_context.cc
+++ b/src/device_context.cc
@@ -1,7 +1,9 @@
#include "device_context.hh"
+#include "queue_context.hh"
#include <iostream>
#include <utility>
+#include <vulkan/vulkan_core.h>
namespace low_latency {
@@ -13,6 +15,7 @@ DeviceContext::DeviceContext(InstanceContext& parent_instance,
device(device), vtable(std::move(vtable)), clock(*this) {}
DeviceContext::~DeviceContext() {
+ this->present_queue.reset();
// We will let the destructor handle clearing here, but they should be
// unique by now (ie, removed from the layer's context map).
for (const auto& [queue, queue_context] : this->queues) {
@@ -24,9 +27,11 @@ void DeviceContext::notify_acquire(const VkSwapchainKHR& swapchain,
const std::uint32_t& image_index,
const VkSemaphore& signal_semaphore) {
+ /*
std::cerr << "notify acquire for swapchain: " << swapchain << " : "
<< image_index << '\n';
std::cerr << " signal semaphore: " << signal_semaphore << '\n';
+ */
const auto it = this->swapchain_signals.try_emplace(swapchain).first;
@@ -63,7 +68,7 @@ DeviceContext::Clock::time_point_t
DeviceContext::Clock::ticks_to_time(const std::uint64_t& ticks) const {
const auto& pd = device.physical_device.properties;
const auto ns_tick = static_cast<double>(pd->limits.timestampPeriod);
-
+
const auto diff = [&]() -> auto {
auto a = this->device_ticks;
auto b = ticks;
@@ -76,7 +81,7 @@ DeviceContext::Clock::ticks_to_time(const std::uint64_t& ticks) const {
const auto signed_abs_diff = static_cast<std::int64_t>(abs_diff);
return is_negative ? -signed_abs_diff : signed_abs_diff;
}();
-
+
// This will have issues because std::chrono::steady_clock::now(), which
// we use for cpu time, may not be on the same time domain what was returned
// by GetCalibratedTimestamps. It would be more robust to use the posix
@@ -87,4 +92,62 @@ DeviceContext::Clock::ticks_to_time(const std::uint64_t& ticks) const {
return time_point_t{delta};
}
+const auto debug_log_time2 = [](auto& stream, const auto& diff) {
+ using namespace std::chrono;
+ const auto ms = duration_cast<milliseconds>(diff);
+ const auto us = duration_cast<microseconds>(diff - ms);
+ const auto ns = duration_cast<nanoseconds>(diff - ms - us);
+ stream << ms << " " << us << " " << ns << '\n';
+};
+
+const auto debug_log_time = [](const auto& diff) {
+ debug_log_time2(std::cerr, diff);
+};
+
+void DeviceContext::sleep_in_input() {
+ // Present hasn't happened yet, we don't know what queue to attack.
+ if (!this->present_queue) {
+ return;
+ }
+
+ const auto before = std::chrono::steady_clock::now();
+ // If we're here, that means that there might be an outstanding frame that's
+ // sitting on our present_queue which hasn't yet completed, so we need to
+ // stall until it's finished.
+ const auto& frames = this->present_queue->in_flight_frames;
+ if (std::size(frames)) {
+ frames.back().submissions.back()->end_handle->get_time_spinlock();
+ }
+ const auto after = std::chrono::steady_clock::now();
+ //debug_log_time(after - before);
+
+ // FIXME this should take into account 'cpu_time', which we currently do not...
+ // idk if it matters.
+}
+
+void DeviceContext::notify_antilag_update(const VkAntiLagDataAMD& data) {
+ this->antilag_mode = data.mode;
+ this->antilag_fps = data.maxFPS;
+
+ // This might not be provided (probably just to set some settings).
+ if (!data.pPresentationInfo) {
+ return;
+ }
+
+ const auto& presentation_info = *data.pPresentationInfo;
+ // Only care about the input stage for now.
+ if (presentation_info.stage != VK_ANTI_LAG_STAGE_INPUT_AMD) {
+ return;
+ }
+
+ if (this->antilag_mode == VK_ANTI_LAG_MODE_ON_AMD) {
+ this->sleep_in_input();
+ }
+}
+
+void DeviceContext::notify_queue_present(const QueueContext& queue) {
+ assert(this->queues.contains(queue.queue));
+ this->present_queue = this->queues[queue.queue];
+}
+
} // namespace low_latency \ No newline at end of file
diff --git a/src/device_context.hh b/src/device_context.hh
index 310b8a7..c73f97f 100644
--- a/src/device_context.hh
+++ b/src/device_context.hh
@@ -2,6 +2,7 @@
#define DEVICE_CONTEXT_HH_
#include <chrono>
+#include <deque>
#include <memory>
#include <unordered_map>
@@ -53,6 +54,16 @@ struct DeviceContext final : public Context {
};
Clock clock;
+
+ std::uint32_t antilag_fps = 0;
+ VkAntiLagModeAMD antilag_mode = VK_ANTI_LAG_MODE_DRIVER_CONTROL_AMD;
+
+ // The queue used in the last present.
+ std::shared_ptr<QueueContext> present_queue;
+
+ private:
+ void sleep_in_input();
+
public:
DeviceContext(InstanceContext& parent_instance,
PhysicalDeviceContext& parent_physical,
@@ -63,6 +74,11 @@ struct DeviceContext final : public Context {
void notify_acquire(const VkSwapchainKHR& swapchain,
const std::uint32_t& image_index,
const VkSemaphore& signal_semaphore);
+
+ //
+ void notify_antilag_update(const VkAntiLagDataAMD& data);
+
+ void notify_queue_present(const QueueContext& queue);
};
}; // namespace low_latency
diff --git a/src/layer.cc b/src/layer.cc
index b5287f8..12067a0 100644
--- a/src/layer.cc
+++ b/src/layer.cc
@@ -56,7 +56,8 @@ static const T* find_next(const void* const head,
}
template <typename T>
-static const T* find_link(const void* head, const VkStructureType& stype) {
+static const T* find_link(const void* const head,
+ const VkStructureType& stype) {
for (auto info = find_next<T>(head, stype); info;
info = find_next<T>(info->pNext, stype)) {
@@ -610,11 +611,7 @@ vkQueuePresentKHR(VkQueue queue, const VkPresentInfoKHR* present_info) {
return res;
}
- if (present_info) { // might not be needed
- queue_context->notify_present(*present_info);
- }
-
- queue_context->sleep_in_present();
+ queue_context->notify_present(*present_info);
return VK_SUCCESS;
}
@@ -633,13 +630,13 @@ static VKAPI_ATTR VkResult VKAPI_CALL EnumerateDeviceExtensionProperties(
physical_device, pLayerName, pPropertyCount, pProperties);
}
+ auto& count = *pPropertyCount;
// !pProperties means they're querying how much space they need.
if (!pProperties) {
- *pPropertyCount = 1;
+ count = 1;
return VK_SUCCESS;
}
- auto& count = *pPropertyCount;
// Defensive - they gave us zero space to work with.
if (!count) {
return VK_INCOMPLETE;
@@ -678,14 +675,8 @@ static VKAPI_ATTR void VKAPI_CALL GetPhysicalDeviceFeatures2KHR(
static VKAPI_ATTR void VKAPI_CALL
AntiLagUpdateAMD(VkDevice device, const VkAntiLagDataAMD* pData) {
- std::cerr << "low_latency::AntiLagUpdateAMD\n";
- std::cerr << " maxFPS: " << pData->maxFPS << '\n';
- std::cerr << " mode: " << pData->mode << '\n';
- std::cerr << " pPresentInfo: " << pData->pPresentationInfo->frameIndex
- << '\n';
- std::cerr << " frameIndex: " << pData->pPresentationInfo->frameIndex
- << '\n';
- std::cerr << " stage: " << pData->pPresentationInfo->stage << '\n';
+ const auto device_context = layer_context.get_context(device);
+ device_context->notify_antilag_update(*pData);
}
} // namespace low_latency
diff --git a/src/queue_context.cc b/src/queue_context.cc
index d20cc79..388019c 100644
--- a/src/queue_context.cc
+++ b/src/queue_context.cc
@@ -8,6 +8,7 @@
#include <iostream>
#include <ranges>
#include <span>
+#include <vulkan/vulkan_core.h>
namespace low_latency {
@@ -62,6 +63,7 @@ void QueueContext::notify_submit(
std::span{info.pSignalSemaphores, info.signalSemaphoreCount},
std::inserter(signals, std::end(signals)));
+ /*
std::cerr << "submit1 notif for queue " << this->queue << '\n';
std::cerr << " signals: \n";
for (const auto& signal : signals) {
@@ -71,6 +73,7 @@ void QueueContext::notify_submit(
for (const auto& wait : waits) {
std::cerr << " " << wait << '\n';
}
+ */
this->submissions.emplace_back(std::make_unique<Submission>(
std::move(signals), std::move(waits), head_handle, tail_handle, now));
@@ -100,6 +103,7 @@ void QueueContext::notify_submit(
std::inserter(signals, std::end(signals)),
[](const auto& info) -> auto { return info.semaphore; });
+ /*
std::cerr << "submit2 notif for queue " << this->queue << '\n';
std::cerr << " signals: \n";
for (const auto& signal : signals) {
@@ -109,6 +113,7 @@ void QueueContext::notify_submit(
for (const auto& wait : waits) {
std::cerr << " " << wait << '\n';
}
+ */
this->submissions.emplace_back(std::make_unique<Submission>(
std::move(signals), std::move(waits), head_handle, tail_handle, now));
@@ -119,7 +124,7 @@ void QueueContext::notify_submit(
}
}
-void QueueContext::notify_present(const VkPresentInfoKHR& info) {
+void QueueContext::drain_submissions_to_frame() {
// We are going to assume that all queue submissions before and on the same
// queue contribute to the frame.
@@ -139,9 +144,6 @@ void QueueContext::notify_present(const VkPresentInfoKHR& info) {
}
const auto last_iter = std::prev(std::end(this->submissions));
- (*start_iter)->debug += "first_during_present ";
- (*last_iter)->debug += "last_during_present ";
-
// The last submission is either in flight, already processed, or we
// just happen to be the first frame and we can just set it to our start
// with little consequence.
@@ -173,6 +175,22 @@ void QueueContext::notify_present(const VkPresentInfoKHR& info) {
this->submissions.clear();
}
+void QueueContext::notify_present(const VkPresentInfoKHR& info) {
+ this->drain_submissions_to_frame();
+ this->drain_frames_to_timings();
+
+ // Call up to notify the device now that we're done with this frame.
+ // We have to do this because antilag 2 data is sent to the device, not
+ // any particular queue.
+ this->device_context.notify_queue_present(*this);
+
+ // If antilag is on, the sleep will occur in notify_antilag_update at the
+ // device context.
+ if (this->device_context.antilag_mode != VK_ANTI_LAG_MODE_ON_AMD) {
+ this->sleep_in_present();
+ }
+}
+
const auto debug_log_time2 = [](auto& stream, const auto& diff) {
using namespace std::chrono;
const auto ms = duration_cast<milliseconds>(diff);
@@ -185,7 +203,7 @@ const auto debug_log_time = [](const auto& diff) {
debug_log_time2(std::cerr, diff);
};
-void QueueContext::process_frames() {
+void QueueContext::drain_frames_to_timings() {
if (!std::size(this->in_flight_frames)) {
return;
}
@@ -201,9 +219,9 @@ void QueueContext::process_frames() {
while (std::size(this->in_flight_frames)) {
const auto& frame = this->in_flight_frames.front();
- // There should be at least one submission, we guarantee it in
- // notify_present.
- assert(std::size(frame.submissions));
+ if (!std::size(frame.submissions)) {
+ break;
+ }
const auto& last_submission = frame.submissions.back();
@@ -320,7 +338,7 @@ void QueueContext::sleep_in_present() {
// Call this to push all in flight frames into our timings structure,
// but only if they're completed. So now they are truly *in flight
// frames*.
- this->process_frames();
+ this->drain_frames_to_timings();
if (!std::size(this->in_flight_frames)) {
return;
@@ -335,9 +353,9 @@ void QueueContext::sleep_in_present() {
return first_submission->start_handle->get_time_spinlock();
}();
- // Process frames because as stated above, we might have multiple frames
- // now completed.
- this->process_frames();
+ // Drain frames again because as stated above, we might have multiple frames
+ // now completed after our wait spinlock.
+ this->drain_frames_to_timings();
// Check the size again because the frame we want to target may have already
// completed when we called process_frames().
diff --git a/src/queue_context.hh b/src/queue_context.hh
index 67b9c5d..2a3ea39 100644
--- a/src/queue_context.hh
+++ b/src/queue_context.hh
@@ -26,7 +26,7 @@ class QueueContext final : public Context {
std::unique_ptr<TimestampPool> timestamp_pool;
- private:
+ public:
static constexpr auto MAX_TRACKED_TIMINGS = 50;
// Potentially in flight queue submissions that come from this queue.
@@ -38,8 +38,6 @@ class QueueContext final : public Context {
const std::shared_ptr<TimestampPool::Handle> end_handle;
const DeviceContext::Clock::time_point_t enqueued_time;
-
- std::string debug;
};
using submission_ptr_t = std::shared_ptr<Submission>;
std::deque<submission_ptr_t> submissions;
@@ -67,7 +65,15 @@ class QueueContext final : public Context {
std::deque<std::unique_ptr<Timing>> timings;
private:
- void process_frames();
+ // Drains submissions and promotes them into a single frame object.
+ void drain_submissions_to_frame();
+
+ // Drains in flight frames and promotes them into a Timing object if they
+ // have completed.
+ void drain_frames_to_timings();
+
+ // Antilag 1 equivalent where we sleep after present to reduce queueing.
+ void sleep_in_present();
public:
QueueContext(DeviceContext& device_context, const VkQueue& queue,
@@ -86,9 +92,6 @@ class QueueContext final : public Context {
const DeviceContext::Clock::time_point_t& now);
void notify_present(const VkPresentInfoKHR& info);
-
- public:
- void sleep_in_present();
public:
bool should_inject_timestamps() const;
diff --git a/src/timestamp_pool.cc b/src/timestamp_pool.cc
index e482654..5149747 100644
--- a/src/timestamp_pool.cc
+++ b/src/timestamp_pool.cc
@@ -3,6 +3,7 @@
#include "queue_context.hh"
#include <chrono>
+#include <span>
#include <ranges>
#include <vulkan/utility/vk_dispatch_table.h>
#include <vulkan/vulkan_core.h>
@@ -24,9 +25,8 @@ TimestampPool::QueryChunk::QueryChunk(const QueueContext& queue_context) {
return qp;
}();
- constexpr auto key_range = std::views::iota(0u, QueryChunk::CHUNK_SIZE);
- this->free_indices = std::make_unique<free_indices_t>(std::begin(key_range),
- std::end(key_range));
+ constexpr auto KEY_RANGE = std::views::iota(0u, QueryChunk::CHUNK_SIZE);
+ this->free_indices = std::make_unique<free_indices_t>(std::from_range, KEY_RANGE);
this->command_buffers = [&, this]() -> auto {
auto cbs = std::make_unique<std::vector<VkCommandBuffer>>(CHUNK_SIZE);