aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorNicolas James <nj3ahxac@gmail.com>2026-02-19 13:16:14 +1100
committerNicolas James <nj3ahxac@gmail.com>2026-02-19 13:16:14 +1100
commit3a5299c81884e8b28fa6a1a57f31c3375a4b633d (patch)
treec494bf7c192cb965daf469615ff7c65f63fa6dc5 /src
parentbb6195afa0fc2ae2a5fe00b718fc71630a696855 (diff)
Don't mess with timeline semaphores in submit, spin on vkGetQueryPoolResults instead, fix start = end submission issue
Diffstat (limited to 'src')
-rw-r--r--src/layer.cc158
-rw-r--r--src/queue_context.cc152
-rw-r--r--src/queue_context.hh10
-rw-r--r--src/timestamp_pool.cc63
-rw-r--r--src/timestamp_pool.hh20
5 files changed, 164 insertions, 239 deletions
diff --git a/src/layer.cc b/src/layer.cc
index 160851f..c19fbfc 100644
--- a/src/layer.cc
+++ b/src/layer.cc
@@ -282,8 +282,6 @@ static VKAPI_ATTR VkResult VKAPI_CALL CreateDevice(
DEVICE_VTABLE_LOAD(DestroyDevice),
DEVICE_VTABLE_LOAD(GetDeviceQueue),
DEVICE_VTABLE_LOAD(QueueSubmit),
- DEVICE_VTABLE_LOAD(CreateSemaphore),
- DEVICE_VTABLE_LOAD(DestroySemaphore),
DEVICE_VTABLE_LOAD(CreateQueryPool),
DEVICE_VTABLE_LOAD(DestroyQueryPool),
DEVICE_VTABLE_LOAD(GetQueryPoolResults),
@@ -302,8 +300,6 @@ static VKAPI_ATTR VkResult VKAPI_CALL CreateDevice(
DEVICE_VTABLE_LOAD(AcquireNextImageKHR),
DEVICE_VTABLE_LOAD(QueuePresentKHR),
DEVICE_VTABLE_LOAD(AcquireNextImage2KHR),
- DEVICE_VTABLE_LOAD(GetSemaphoreCounterValueKHR),
- DEVICE_VTABLE_LOAD(WaitSemaphoresKHR),
DEVICE_VTABLE_LOAD(CmdWriteTimestamp2KHR),
DEVICE_VTABLE_LOAD(QueueSubmit2KHR),
DEVICE_VTABLE_LOAD(GetCalibratedTimestampsKHR),
@@ -453,55 +449,44 @@ vkQueueSubmit(VkQueue queue, std::uint32_t submit_count,
return vtable.QueueSubmit(queue, submit_count, submit_infos, fence);
}
- // We have to avoid casting away the const* of the passed VkSubmitInfos.
- // So we end up copying a lot of stuff and wrapping them in unique_ptrs
- // so their position in memory is stable.
-
- using cb_vect = std::vector<VkCommandBuffer>;
- using tssi_t = VkTimelineSemaphoreSubmitInfo;
+ using cbs_t = std::vector<VkCommandBuffer>;
auto next_submits = std::vector<VkSubmitInfo>{};
- auto next_cbs = std::vector<std::unique_ptr<cb_vect>>{};
- auto handles = std::vector<std::shared_ptr<TimestampPool::Handle>>{};
- auto tssis = std::vector<std::unique_ptr<tssi_t>>{};
- for (const auto& submit_info : std::span{submit_infos, submit_count}) {
- const auto head_handle = queue_context->timestamp_pool->acquire();
- const auto tail_handle = queue_context->timestamp_pool->acquire();
+ // We're making modifications to multiple vkQueueSubmits. These have raw
+ // pointers to our command buffer arrays - of which the position in memory
+ // of can change on vector reallocation. So we use unique_ptrs here.
+ auto next_cbs = std::vector<std::unique_ptr<cbs_t>>{};
+
+ // notify_submit() should take copies of these shared_ptrs and store
+ // them for the duration of our call, but saving them here is a bit
+ // more explicit + insurance if that changes.
+ auto handles = std::vector<std::shared_ptr<TimestampPool::Handle>>{};
- next_cbs.emplace_back([&]() -> auto {
- auto cbs = std::make_unique<std::vector<VkCommandBuffer>>();
+ std::ranges::transform(
+ std::span{submit_infos, submit_count}, std::back_inserter(next_submits),
+ [&](const auto& submit) {
+ const auto head_handle = queue_context->timestamp_pool->acquire();
+ const auto tail_handle = queue_context->timestamp_pool->acquire();
head_handle->setup_command_buffers(*tail_handle, *queue_context);
- cbs->push_back(head_handle->command_buffer);
- std::ranges::copy_n(submit_info.pCommandBuffers,
- submit_info.commandBufferCount,
- std::back_inserter(*cbs));
- cbs->push_back(tail_handle->command_buffer);
- return cbs;
- }());
- next_submits.push_back(submit_info);
- next_submits.back().pCommandBuffers = std::data(*next_cbs.back());
- next_submits.back().commandBufferCount = std::size(*next_cbs.back());
- handles.push_back(head_handle);
- handles.push_back(tail_handle);
-
- // We submit an extra command which signals a timeline semaphore which
- // signals that this command has completed.
- const auto sequence = 1 + queue_context->semaphore_sequence++;
- queue_context->notify_submit(submit_info, sequence, head_handle,
- tail_handle);
-
- tssis.push_back(std::make_unique<tssi_t>(tssi_t{
- .sType = VK_STRUCTURE_TYPE_TIMELINE_SEMAPHORE_SUBMIT_INFO_KHR,
- .signalSemaphoreValueCount = 1,
- .pSignalSemaphoreValues = &sequence,
- }));
- next_submits.push_back(VkSubmitInfo{
- .sType = VK_STRUCTURE_TYPE_SUBMIT_INFO,
- .pNext = tssis.back().get(),
- .signalSemaphoreCount = 1,
- .pSignalSemaphores = &queue_context->semaphore,
+ queue_context->notify_submit(submit, head_handle, tail_handle);
+
+ handles.emplace_back(head_handle);
+ handles.emplace_back(tail_handle);
+ next_cbs.emplace_back([&]() -> auto {
+ auto cbs = std::make_unique<cbs_t>();
+ cbs->push_back(head_handle->command_buffer);
+ std::ranges::copy_n(submit.pCommandBuffers,
+ submit.commandBufferCount,
+ std::back_inserter(*cbs));
+ cbs->push_back(tail_handle->command_buffer);
+ return cbs;
+ }());
+
+ auto next_submit = submit;
+ next_submit.pCommandBuffers = std::data(*next_cbs.back());
+ next_submit.commandBufferCount = std::size(*next_cbs.back());
+ return next_submit;
});
- }
return vtable.QueueSubmit(queue, std::size(next_submits),
std::data(next_submits), fence);
@@ -519,57 +504,42 @@ vkQueueSubmit2(VkQueue queue, std::uint32_t submit_count,
return vtable.QueueSubmit2(queue, submit_count, submit_infos, fence);
}
- using cb_vect_t = std::vector<VkCommandBufferSubmitInfo>;
+ using cbs_t = std::vector<VkCommandBufferSubmitInfo>;
auto next_submits = std::vector<VkSubmitInfo2>{};
- auto next_cbs = std::vector<std::unique_ptr<cb_vect_t>>{};
+ auto next_cbs = std::vector<std::unique_ptr<cbs_t>>{};
auto handles = std::vector<std::shared_ptr<TimestampPool::Handle>>{};
- auto next_ssis = std::vector<std::unique_ptr<VkSemaphoreSubmitInfo>>{};
-
- for (const auto& submit_info : std::span{submit_infos, submit_count}) {
- const auto head_handle = queue_context->timestamp_pool->acquire();
- const auto tail_handle = queue_context->timestamp_pool->acquire();
- next_cbs.emplace_back([&]() -> auto {
- auto cbs = std::make_unique<cb_vect_t>();
+ std::ranges::transform(
+ std::span{submit_infos, submit_count}, std::back_inserter(next_submits),
+ [&](const auto& submit) {
+ const auto head_handle = queue_context->timestamp_pool->acquire();
+ const auto tail_handle = queue_context->timestamp_pool->acquire();
head_handle->setup_command_buffers(*tail_handle, *queue_context);
- cbs->push_back(VkCommandBufferSubmitInfo{
- .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_SUBMIT_INFO,
- .commandBuffer = head_handle->command_buffer,
- });
- std::ranges::copy_n(submit_info.pCommandBufferInfos,
- submit_info.commandBufferInfoCount,
- std::back_inserter(*cbs));
- cbs->push_back(VkCommandBufferSubmitInfo{
- .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_SUBMIT_INFO,
- .commandBuffer = tail_handle->command_buffer,
- });
- return cbs;
- }());
-
- next_submits.push_back(submit_info);
- next_submits.back().pCommandBufferInfos = std::data(*next_cbs.back());
- next_submits.back().commandBufferInfoCount =
- std::size(*next_cbs.back());
- handles.push_back(head_handle);
- handles.push_back(tail_handle);
-
- const auto sequence = 1 + queue_context->semaphore_sequence++;
- queue_context->notify_submit(submit_info, sequence, head_handle,
- tail_handle);
-
- next_ssis.push_back(
- std::make_unique<VkSemaphoreSubmitInfo>(VkSemaphoreSubmitInfo{
- .sType = VK_STRUCTURE_TYPE_SEMAPHORE_SUBMIT_INFO,
- .semaphore = queue_context->semaphore,
- .value = sequence,
- .stageMask = VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT,
- }));
- next_submits.push_back(VkSubmitInfo2{
- .sType = VK_STRUCTURE_TYPE_SUBMIT_INFO_2,
- .signalSemaphoreInfoCount = 1,
- .pSignalSemaphoreInfos = next_ssis.back().get(),
+ queue_context->notify_submit(submit, head_handle, tail_handle);
+
+ next_cbs.emplace_back([&]() -> auto {
+ auto cbs = std::make_unique<cbs_t>();
+ head_handle->setup_command_buffers(*tail_handle,
+ *queue_context);
+ cbs->push_back(VkCommandBufferSubmitInfo{
+ .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_SUBMIT_INFO,
+ .commandBuffer = head_handle->command_buffer,
+ });
+ std::ranges::copy_n(submit.pCommandBufferInfos,
+ submit.commandBufferInfoCount,
+ std::back_inserter(*cbs));
+ cbs->push_back(VkCommandBufferSubmitInfo{
+ .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_SUBMIT_INFO,
+ .commandBuffer = tail_handle->command_buffer,
+ });
+ return cbs;
+ }());
+
+ auto next_submit = submit;
+ next_submit.pCommandBufferInfos = std::data(*next_cbs.back());
+ next_submit.commandBufferInfoCount = std::size(*next_cbs.back());
+ return next_submit;
});
- }
return vtable.QueueSubmit2(queue, std::size(next_submits),
std::data(next_submits), fence);
diff --git a/src/queue_context.cc b/src/queue_context.cc
index 3914691..2f0a89d 100644
--- a/src/queue_context.cc
+++ b/src/queue_context.cc
@@ -28,25 +28,6 @@ make_command_pool(const DeviceContext& device_context,
return command_pool;
}
-static VkSemaphore make_semaphore(const DeviceContext& device_context) {
-
- const auto stci = VkSemaphoreTypeCreateInfo{
- .sType = VK_STRUCTURE_TYPE_SEMAPHORE_TYPE_CREATE_INFO,
- .semaphoreType = VK_SEMAPHORE_TYPE_TIMELINE,
- .initialValue = 0,
- };
-
- const auto sci = VkSemaphoreCreateInfo{
- .sType = VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO,
- .pNext = &stci,
- };
-
- auto semaphore = VkSemaphore{};
- device_context.vtable.CreateSemaphore(device_context.device, &sci, nullptr,
- &semaphore);
- return semaphore;
-}
-
QueueContext::QueueContext(DeviceContext& device_context, const VkQueue& queue,
const std::uint32_t& queue_family_index)
: device_context(device_context), queue(queue),
@@ -54,7 +35,6 @@ QueueContext::QueueContext(DeviceContext& device_context, const VkQueue& queue,
// Important we make the command pool before the timestamp pool, because
// it's a dependency.
command_pool(make_command_pool(device_context, queue_family_index)),
- semaphore(make_semaphore(device_context)),
timestamp_pool(std::make_unique<TimestampPool>(*this)) {}
QueueContext::~QueueContext() {
@@ -64,14 +44,12 @@ QueueContext::~QueueContext() {
this->timestamp_pool.reset();
const auto& vtable = this->device_context.vtable;
- vtable.DestroySemaphore(this->device_context.device, this->semaphore,
- nullptr);
vtable.DestroyCommandPool(this->device_context.device, this->command_pool,
nullptr);
}
void QueueContext::notify_submit(
- const VkSubmitInfo& info, const std::uint64_t& sequence,
+ const VkSubmitInfo& info,
const std::shared_ptr<TimestampPool::Handle> head_handle,
const std::shared_ptr<TimestampPool::Handle> tail_handle) {
@@ -92,9 +70,8 @@ void QueueContext::notify_submit(
std::cerr << " " << wait << '\n';
}
- this->submissions.emplace_back(
- std::make_unique<Submission>(std::move(signals), std::move(waits),
- head_handle, tail_handle, sequence));
+ this->submissions.emplace_back(std::make_unique<Submission>(
+ std::move(signals), std::move(waits), head_handle, tail_handle));
// TODO HACK
if (std::size(this->submissions) > 100) {
@@ -103,7 +80,7 @@ void QueueContext::notify_submit(
}
void QueueContext::notify_submit(
- const VkSubmitInfo2& info, const std::uint64_t& sequence,
+ const VkSubmitInfo2& info,
const std::shared_ptr<TimestampPool::Handle> head_handle,
const std::shared_ptr<TimestampPool::Handle> tail_handle) {
@@ -130,9 +107,8 @@ void QueueContext::notify_submit(
std::cerr << " " << wait << '\n';
}
- this->submissions.emplace_back(
- std::make_unique<Submission>(std::move(signals), std::move(waits),
- head_handle, tail_handle, sequence));
+ this->submissions.emplace_back(std::make_unique<Submission>(
+ std::move(signals), std::move(waits), head_handle, tail_handle));
// TODO HACK
if (std::size(this->submissions) > 100) {
@@ -165,7 +141,7 @@ void QueueContext::notify_present(const VkPresentInfoKHR& info) {
// The last submission is either in flight, already processed, or we
// just happen to be the first frame and we can just set it to our start
- // with little conseuqence.
+ // with little consequence.
const auto prev_frame_last_submit = [&]() -> auto {
if (const auto iter = std::rbegin(this->in_flight_frames);
iter != std::rend(this->in_flight_frames)) {
@@ -189,7 +165,6 @@ void QueueContext::notify_present(const VkPresentInfoKHR& info) {
this->in_flight_frames.emplace_back(Frame{
.prev_frame_last_submit = prev_frame_last_submit,
.submissions = std::move(this->submissions),
- .sequence = (*last_iter)->sequence,
});
assert(std::size(this->in_flight_frames.back().submissions));
// *valid but unspecified state after move, so clear!*
@@ -211,22 +186,12 @@ void QueueContext::process_frames() {
// We used to collect all devices that were pointed to by all potential
// submissions, put them in a set and then call.calibrate() on each once.
- // This is unnecessary now - we can assume all submissions come from the
- // same queue (this one!).
+ // This is unnecessary now - we assume all submissions come from the same
+ // queue. FIXME: don't assume this.
auto& device_context = this->device_context;
auto& clock = device_context.clock;
clock.calibrate();
- // Get the queue's sequence number so we can quickly check
- // frames are finished without calling getCalibratedTimestamps.
- // This is somewhat a premature optimization but it's elegant.
- const auto seq = [&, this]() -> auto {
- auto seq = std::uint64_t{0};
- device_context.vtable.GetSemaphoreCounterValueKHR(
- device_context.device, this->semaphore, &seq);
- return seq;
- }();
-
while (std::size(this->in_flight_frames)) {
const auto& frame = this->in_flight_frames.front();
@@ -237,7 +202,7 @@ void QueueContext::process_frames() {
const auto& last_submission = frame.submissions.back();
// Not completed (so future frames definitely aren't) - stop early.
- if (seq < last_submission->sequence) {
+ if (!last_submission->end_handle->get_time().has_value()) {
break;
}
@@ -256,13 +221,9 @@ void QueueContext::process_frames() {
std::ranges::transform(
frame.submissions, std::back_inserter(intervals),
[&, this](const auto& submission) {
- const auto get_time = [&, this](const auto& handle) {
- return handle->get_time();
- };
-
return Interval{
- .start = get_time(submission->start_handle),
- .end = get_time(submission->end_handle),
+ .start = submission->start_handle->get_time_required(),
+ .end = submission->end_handle->get_time_required(),
};
});
@@ -309,7 +270,8 @@ void QueueContext::process_frames() {
return gputime + (end - start);
});
- const auto start = frame.prev_frame_last_submit->end_handle->get_time();
+ const auto start =
+ frame.prev_frame_last_submit->end_handle->get_time_required();
const auto end = merged.back().end;
const auto not_gputime = (end - start) - gputime;
@@ -341,32 +303,24 @@ void QueueContext::sleep_in_present() {
// frames*.
this->process_frames();
- if (const auto F = std::size(this->in_flight_frames); F > 1) {
- // In this case, we are so far ahead that there are multiple frames
- // in flight. Either that, or our bookkeeping has gone horribly
- // wrong! Wait on the 2nd last frame in flight to complete. This
- // shunts us to F=1.
- const auto second_iter = std::next(std::rbegin(this->in_flight_frames));
- assert(second_iter != std::rend(this->in_flight_frames));
-
- const auto swi = VkSemaphoreWaitInfo{
- .sType = VK_STRUCTURE_TYPE_SEMAPHORE_WAIT_INFO,
- .semaphoreCount = 1,
- .pSemaphores = &this->semaphore,
- .pValues = &second_iter->sequence,
- };
- vtable.WaitSemaphoresKHR(device.device, &swi,
- std::numeric_limits<std::uint64_t>::max());
-
- // Here
- this->process_frames(); // get rid of completed frames
- } else if (!F) {
- // We have completed all frames. DO NOT WAIT!
+ if (!std::size(this->in_flight_frames)) {
return;
}
- // We are checking size again because process_frames might have drained
- // it to zero.
+ // This is doing more than it looks like one line can do (tbf it is a long
+ // line). It's getting the most recent frame and waiting until its start has
+ // begun. This means that, in the case of >1 frame in flight, it's draining
+ // all of them before we're allowed to move forward.
+ const auto a = this->in_flight_frames.back()
+ .submissions.front()
+ ->start_handle->get_time_spinlock();
+
+ // Process frames because as stated above, we might have multiple frames
+ // now completed.
+ this->process_frames();
+
+ // Check the size again because the frame we want to target may have already
+ // completed when we called process_frames().
if (!std::size(this->in_flight_frames)) {
return;
}
@@ -397,56 +351,12 @@ void QueueContext::sleep_in_present() {
std::cerr << " expected not_gputime: ";
debug_log_time(expected_not_gputime);
- // PRESENT CALL
- // |----------------------------------|----------------|
- // first b c
- //
- // Us, the CPU on the host, is approximately at 'b'. We have a good
- // guess for the distance between a and b as gputime.
-
- const auto& frame = this->in_flight_frames.back();
-
- // We could be in the period where A hasn't signalled yet.
- // It's impossible to make a decision until we know a.
- // Doing this is fine because it won't affect throughput at all.
- // (ie, there's more work queued after regardless).
- // FIXME: If a == b, then we're waiting for the entire queue
- // to finish because the semaphore only says if it has finished.
- // The fix is to check the start timestamp instead of the query
- // in the case that it's...
- // Honestly it might be better to signal two semaphores because
- // we need to wait for when the submission starts work and
- // right now, we only signal when the submission finishes work.
- // Ideally we have both, so we can elegantly wait on the start
- // semaphore of A, then get A's start timestamp. This is BROKEN.
-
- [&]() -> void {
- const auto swi = VkSemaphoreWaitInfo{
- .sType = VK_STRUCTURE_TYPE_SEMAPHORE_WAIT_INFO,
- .semaphoreCount = 1,
- .pSemaphores = &this->semaphore,
- .pValues = &frame.submissions.front()->sequence,
- };
- vtable.WaitSemaphoresKHR(device.device, &swi,
- std::numeric_limits<std::uint64_t>::max());
- }();
-
- // We now know that A is available because its semaphore has been
- // signalled.
- const auto a = frame.submissions.front()->start_handle->get_time();
-
const auto now = std::chrono::steady_clock::now();
const auto dist = now - a;
const auto expected = expected_gputime - dist;
- const auto swi = VkSemaphoreWaitInfo{
- .sType = VK_STRUCTURE_TYPE_SEMAPHORE_WAIT_INFO,
- .semaphoreCount = 1,
- .pSemaphores = &this->semaphore,
- .pValues = &frame.sequence,
- };
- vtable.WaitSemaphoresKHR(device.device, &swi,
- std::max(expected.count(), 0l));
+ const auto& frame = this->in_flight_frames.back();
+ frame.submissions.back()->end_handle->get_time_spinlock(now + expected);
}
} // namespace low_latency \ No newline at end of file
diff --git a/src/queue_context.hh b/src/queue_context.hh
index 219e6fb..f8782de 100644
--- a/src/queue_context.hh
+++ b/src/queue_context.hh
@@ -22,9 +22,6 @@ class QueueContext final : public Context {
const VkQueue queue;
const std::uint32_t queue_family_index;
- std::uint64_t semaphore_sequence = 0;
- VkSemaphore semaphore;
-
VkCommandPool command_pool;
std::unique_ptr<TimestampPool> timestamp_pool;
@@ -40,8 +37,6 @@ class QueueContext final : public Context {
const std::shared_ptr<TimestampPool::Handle> start_handle;
const std::shared_ptr<TimestampPool::Handle> end_handle;
- std::uint64_t sequence;
-
std::string debug;
};
using submission_ptr_t = std::shared_ptr<Submission>;
@@ -55,7 +50,6 @@ class QueueContext final : public Context {
struct Frame {
submission_ptr_t prev_frame_last_submit;
std::deque<submission_ptr_t> submissions;
- std::uint64_t sequence;
};
std::deque<Frame> in_flight_frames;
@@ -77,12 +71,12 @@ class QueueContext final : public Context {
public:
void
- notify_submit(const VkSubmitInfo& info, const std::uint64_t& sequence,
+ notify_submit(const VkSubmitInfo& info,
const std::shared_ptr<TimestampPool::Handle> head_handle,
const std::shared_ptr<TimestampPool::Handle> tail_handle);
void
- notify_submit(const VkSubmitInfo2& info, const std::uint64_t& sequence,
+ notify_submit(const VkSubmitInfo2& info,
const std::shared_ptr<TimestampPool::Handle> head_handle,
const std::shared_ptr<TimestampPool::Handle> tail_handle);
diff --git a/src/timestamp_pool.cc b/src/timestamp_pool.cc
index 854fae1..a66bb2a 100644
--- a/src/timestamp_pool.cc
+++ b/src/timestamp_pool.cc
@@ -2,7 +2,9 @@
#include "device_context.hh"
#include "queue_context.hh"
+#include <chrono>
#include <ranges>
+#include <thread>
#include <vulkan/utility/vk_dispatch_table.h>
#include <vulkan/vulkan_core.h>
@@ -123,37 +125,68 @@ void TimestampPool::Handle::setup_command_buffers(
vtable.EndCommandBuffer(tail.command_buffer);
}
-DeviceContext::Clock::time_point_t TimestampPool::Handle::get_time() {
+std::optional<DeviceContext::Clock::time_point_t>
+TimestampPool::Handle::get_time() {
const auto& device_ctx = this->timestamp_pool.queue_context.device_context;
const auto& vtable = device_ctx.vtable;
- // For debug builds, we're going to query the availability bit so we can
- // assert that after the semaphore has flagged it as naturally available.
struct QueryResult {
std::uint64_t value;
-#ifndef NDEBUG
std::uint64_t available;
-#endif
};
auto query_result = QueryResult{};
- constexpr auto query_flags = []() -> auto {
- auto flag = VkQueryResultFlags{VK_QUERY_RESULT_64_BIT};
-#ifndef NDEBUG
- flag |= VK_QUERY_RESULT_WITH_AVAILABILITY_BIT;
-#endif
- return flag;
- }();
-
const auto r = vtable.GetQueryPoolResults(
device_ctx.device, query_pool, this->query_index, 1,
- sizeof(query_result), &query_result, sizeof(query_result), query_flags);
+ sizeof(query_result), &query_result, sizeof(query_result),
+ VK_QUERY_RESULT_64_BIT | VK_QUERY_RESULT_WITH_AVAILABILITY_BIT);
- assert(r == VK_SUCCESS && query_result.available);
+ assert(r == VK_SUCCESS || r == VK_NOT_READY);
+
+ if (!query_result.available) {
+ return std::nullopt;
+ }
return device_ctx.clock.ticks_to_time(query_result.value);
}
+std::optional<DeviceContext::Clock::time_point_t>
+TimestampPool::Handle::get_time_spinlock(
+ const DeviceContext::Clock::time_point_t& until) {
+
+ auto time = this->get_time();
+ if (time.has_value()) { // fast path, avoid now().
+ return time;
+ }
+
+ auto last = std::chrono::steady_clock::now();
+ for (; !time.has_value(); time = this->get_time()) {
+
+ if (const auto now = std::chrono::steady_clock::now(); now >= until) {
+ break;
+ }
+
+ // Afaik no-op if it's too far behind, which is ideal.
+ std::this_thread::sleep_until(std::min(last + this->SPINLOCK_MAX_DELAY, until));
+
+ last = std::chrono::steady_clock::now();
+ }
+
+ return time;
+}
+
+DeviceContext::Clock::time_point_t TimestampPool::Handle::get_time_spinlock() {
+ const auto time = this->get_time_spinlock(DeviceContext::Clock::time_point_t::max());
+ assert(time.has_value());
+ return *time;
+}
+
+DeviceContext::Clock::time_point_t TimestampPool::Handle::get_time_required() {
+ const auto time = this->get_time();
+ assert(time.has_value());
+ return *time;
+}
+
TimestampPool::~TimestampPool() {
const auto& device = this->queue_context.device_context.device;
const auto& vtable = this->queue_context.device_context.vtable;
diff --git a/src/timestamp_pool.hh b/src/timestamp_pool.hh
index b7aa54e..bfdad2e 100644
--- a/src/timestamp_pool.hh
+++ b/src/timestamp_pool.hh
@@ -60,6 +60,11 @@ class TimestampPool final {
friend class TimestampPool;
private:
+ // For our spinlock functions this is the period in which we sleep
+ // between attempts.
+ static constexpr auto SPINLOCK_MAX_DELAY = std::chrono::microseconds(1);
+
+ private:
const TimestampPool& timestamp_pool;
const std::weak_ptr<QueryChunk> origin_chunk;
@@ -82,7 +87,20 @@ class TimestampPool final {
void setup_command_buffers(const Handle& tail,
const QueueContext& queue_context) const;
- DeviceContext::Clock::time_point_t get_time();
+ // Attempts to get_time, but returns an optional if it's not available
+ // yet.
+ std::optional<DeviceContext::Clock::time_point_t> get_time();
+
+ // Calls get_time() repeatedly under a spinlock, or gives up at
+ // time_point_t and returns std::nullopt.
+ std::optional<DeviceContext::Clock::time_point_t>
+ get_time_spinlock(const DeviceContext::Clock::time_point_t& until);
+
+ // Calls get_time() repeatedly under a spinlock until it's available.
+ DeviceContext::Clock::time_point_t get_time_spinlock();
+
+ // Calls get_time with the assumption it's already available.
+ DeviceContext::Clock::time_point_t get_time_required();
};
public: