aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--src/device_context.cc46
-rw-r--r--src/device_context.hh11
-rw-r--r--src/layer.cc116
-rw-r--r--src/queue_context.cc429
-rw-r--r--src/queue_context.hh35
-rw-r--r--src/timestamp_pool.cc208
-rw-r--r--src/timestamp_pool.hh63
7 files changed, 527 insertions, 381 deletions
diff --git a/src/device_context.cc b/src/device_context.cc
index 4b39210..f849df1 100644
--- a/src/device_context.cc
+++ b/src/device_context.cc
@@ -31,50 +31,58 @@ void DeviceContext::notify_acquire(const VkSwapchainKHR& swapchain,
it->second.insert_or_assign(image_index, signal_semaphore);
}
-DeviceContext::Clock::Clock(const DeviceContext& context) {
+DeviceContext::Clock::Clock(const DeviceContext& context) : device(context) {
+ this->calibrate();
+}
+
+DeviceContext::Clock::~Clock() {}
+void DeviceContext::Clock::calibrate() {
const auto infos = std::vector<VkCalibratedTimestampInfoKHR>{
{VK_STRUCTURE_TYPE_CALIBRATED_TIMESTAMP_INFO_EXT, nullptr,
VK_TIME_DOMAIN_DEVICE_EXT},
{VK_STRUCTURE_TYPE_CALIBRATED_TIMESTAMP_INFO_EXT, nullptr,
VK_TIME_DOMAIN_CLOCK_MONOTONIC_EXT}};
- auto device_host = std::array<std::uint64_t, 2>{};
+ struct CalibratedResult {
+ std::uint64_t device;
+ std::uint64_t host;
+ };
+ auto calibrated_result = CalibratedResult{};
+ // we probably want to use this instead bc clock_gettime isn't guaranteed
+ // by steady clock afaik
+ /*
+ struct timespec tv;
+ clock_gettime(CLOCK_MONOTONIC, &tv);
+ return tv.tv_nsec + tv.tv_sec*1000000000ull;
+ */
const auto steady_before = std::chrono::steady_clock::now();
- context.vtable.GetCalibratedTimestampsKHR(
- context.device, 2, std::data(infos), std::data(device_host),
- &this->error_bound);
+ device.vtable.GetCalibratedTimestampsKHR(device.device, 2, std::data(infos),
+ &calibrated_result.device,
+ &this->error_bound);
const auto steady_after = std::chrono::steady_clock::now();
this->cpu_time = steady_before + (steady_after - steady_before) / 2;
- this->device_ticks = device_host[0];
- this->host_ns = device_host[1];
+ this->device_ticks = calibrated_result.device;
+ this->host_ns = calibrated_result.host;
- // Might need to get physical limits again?
- this->ticks_per_ns =
- context.physical_device.properties->limits.timestampPeriod;
+ // Might need to get physical limits every now and then?
+ const auto& pd = device.physical_device.properties;
+ this->ticks_per_ns = pd->limits.timestampPeriod;
}
DeviceContext::Clock::time_point_t
DeviceContext::Clock::ticks_to_time(const std::uint64_t& ticks) const {
- /*
- struct timespec tv;
- clock_gettime(CLOCK_MONOTONIC, &tv);
- return tv.tv_nsec + tv.tv_sec*1000000000ull;
- */
-
auto a = this->device_ticks;
auto b = ticks;
-
const auto was_before = a > b;
if (was_before) { // it's happened before
std::swap(a, b);
}
+
const auto nsec = std::chrono::nanoseconds((b - a) * this->ticks_per_ns);
return this->cpu_time + (was_before ? -nsec : nsec);
}
-void DeviceContext::calibrate_timestamps() { this->clock = Clock{*this}; }
-
} // namespace low_latency \ No newline at end of file
diff --git a/src/device_context.hh b/src/device_context.hh
index b55b70c..c08cec2 100644
--- a/src/device_context.hh
+++ b/src/device_context.hh
@@ -35,8 +35,11 @@ struct DeviceContext final : public Context {
std::unordered_map<VkSwapchainKHR, index_semaphores_t> swapchain_signals;
struct Clock {
+ public:
using time_point_t = std::chrono::steady_clock::time_point;
+ const DeviceContext& device;
+ public:
time_point_t cpu_time;
std::uint64_t error_bound;
std::uint64_t device_ticks;
@@ -45,7 +48,10 @@ struct DeviceContext final : public Context {
public:
Clock(const DeviceContext& device);
-
+ ~Clock();
+
+ public:
+ void calibrate();
time_point_t ticks_to_time(const std::uint64_t& ticks) const;
};
Clock clock;
@@ -61,9 +67,6 @@ struct DeviceContext final : public Context {
void notify_acquire(const VkSwapchainKHR& swapchain,
const std::uint32_t& image_index,
const VkSemaphore& signal_semaphore);
-
- public:
- void calibrate_timestamps();
};
}; // namespace low_latency
diff --git a/src/layer.cc b/src/layer.cc
index c521bb9..1b1d9e7 100644
--- a/src/layer.cc
+++ b/src/layer.cc
@@ -1,9 +1,12 @@
#include "layer.hh"
+#include <memory>
+#include <span>
#include <string_view>
#include <thread>
#include <unordered_map>
#include <utility>
+#include <vector>
#include <vulkan/utility/vk_dispatch_table.h>
#include <vulkan/vk_layer.h>
@@ -224,7 +227,8 @@ static VKAPI_ATTR VkResult VKAPI_CALL CreateDevice(
const auto wanted_extensions = {
VK_KHR_SYNCHRONIZATION_2_EXTENSION_NAME,
VK_KHR_CALIBRATED_TIMESTAMPS_EXTENSION_NAME,
- VK_KHR_TIMELINE_SEMAPHORE_EXTENSION_NAME};
+ VK_KHR_TIMELINE_SEMAPHORE_EXTENSION_NAME,
+ VK_EXT_HOST_QUERY_RESET_EXTENSION_NAME};
for (const auto& wanted : wanted_extensions) {
@@ -274,7 +278,7 @@ static VKAPI_ATTR VkResult VKAPI_CALL CreateDevice(
return result;
}
-
+
#define DEVICE_VTABLE_LOAD(name) \
.name = reinterpret_cast<PFN_vk##name>(gdpa(*pDevice, "vk" #name))
auto vtable = VkuDeviceDispatchTable{
@@ -294,9 +298,9 @@ static VKAPI_ATTR VkResult VKAPI_CALL CreateDevice(
DEVICE_VTABLE_LOAD(BeginCommandBuffer),
DEVICE_VTABLE_LOAD(EndCommandBuffer),
DEVICE_VTABLE_LOAD(ResetCommandBuffer),
- DEVICE_VTABLE_LOAD(CmdResetQueryPool),
DEVICE_VTABLE_LOAD(CmdDraw),
DEVICE_VTABLE_LOAD(CmdDrawIndexed),
+ DEVICE_VTABLE_LOAD(CmdResetQueryPool),
DEVICE_VTABLE_LOAD(GetDeviceQueue2),
DEVICE_VTABLE_LOAD(QueueSubmit2),
DEVICE_VTABLE_LOAD(AcquireNextImageKHR),
@@ -306,6 +310,7 @@ static VKAPI_ATTR VkResult VKAPI_CALL CreateDevice(
DEVICE_VTABLE_LOAD(CmdWriteTimestamp2KHR),
DEVICE_VTABLE_LOAD(QueueSubmit2KHR),
DEVICE_VTABLE_LOAD(GetCalibratedTimestampsKHR),
+ DEVICE_VTABLE_LOAD(ResetQueryPoolEXT),
};
#undef DEVICE_VTABLE_LOAD
@@ -442,61 +447,81 @@ static VKAPI_ATTR VkResult VKAPI_CALL vkAcquireNextImage2KHR(
static VKAPI_ATTR VkResult VKAPI_CALL
vkQueueSubmit(VkQueue queue, std::uint32_t submit_count,
- const VkSubmitInfo* submit_info, VkFence fence) {
+ const VkSubmitInfo* submit_infos, VkFence fence) {
const auto& queue_context = layer_context.get_context(queue);
const auto& vtable = queue_context->device_context.vtable;
if (!submit_count) { // no-op submit we shouldn't worry about
- return vtable.QueueSubmit(queue, submit_count, submit_info, fence);
+ return vtable.QueueSubmit(queue, submit_count, submit_infos, fence);
}
- // Create a new vector of submit infos.
- auto next_submit_infos = std::vector<VkSubmitInfo>{};
-
- auto timestamp_handle = queue_context->timestamp_pool->acquire();
- timestamp_handle->setup_command_buffers(vtable);
-
- const auto& [head_cb, tail_cb] = timestamp_handle->command_buffers;
+ // We have to avoid casting away the const* of the passed VkSubmitInfos.
+ // We wrap every single submission with *two* extra VkSubmitInfos to
+ // accomplish this. The first executes a command buffer that
+
+ using cb_vect = std::vector<VkCommandBuffer>;
+ using tssi_ptr_t = std::unique_ptr<VkTimelineSemaphoreSubmitInfo>;
+ auto next_submits = std::vector<VkSubmitInfo>{};
+ auto next_cbs = std::vector<std::unique_ptr<cb_vect>>{};
+ auto next_signals = std::vector<std::unique_ptr<std::uint64_t>>{};
+ auto next_tssis = std::vector<tssi_ptr_t>{};
+ auto handles = std::vector<std::shared_ptr<TimestampPool::Handle>>{};
+
+ for (const auto& submit_info : std::span{submit_infos, submit_count}) {
+ const auto head_handle = queue_context->timestamp_pool->acquire();
+ const auto tail_handle = queue_context->timestamp_pool->acquire();
+
+ // Head is special as we need to inject a CB into a copy of
+ // their command buffers that records the time the waits completed.
+ next_cbs.emplace_back([&]() -> auto {
+ auto cbs = std::make_unique<std::vector<VkCommandBuffer>>();
+ head_handle->setup_command_buffers(*tail_handle, *queue_context);
+ cbs->push_back(head_handle->command_buffer);
+ std::ranges::copy_n(submit_info.pCommandBuffers,
+ submit_info.commandBufferCount,
+ std::back_inserter(*cbs));
+ cbs->push_back(tail_handle->command_buffer);
+ return cbs;
+ }());
+ next_submits.push_back(submit_info);
+ next_submits.back().pCommandBuffers = std::data(*next_cbs.back());
+ next_submits.back().commandBufferCount = std::size(*next_cbs.back());
+
+ const auto next_signal = 1 + queue_context->semaphore_sequence++;
+
+ next_signals.push_back(std::make_unique<std::uint64_t>(next_signal));
+
+ next_tssis.push_back(std::make_unique<VkTimelineSemaphoreSubmitInfo>(
+ VkTimelineSemaphoreSubmitInfo{
+ .sType = VK_STRUCTURE_TYPE_TIMELINE_SEMAPHORE_SUBMIT_INFO_KHR,
+ .signalSemaphoreValueCount = 1,
+ .pSignalSemaphoreValues = next_signals.back().get(),
+ }));
+ next_submits.push_back(VkSubmitInfo{
+ .sType = VK_STRUCTURE_TYPE_SUBMIT_INFO,
+ .pNext = next_tssis.back().get(),
+ .commandBufferCount = 1,
+ .pCommandBuffers = &tail_handle->command_buffer,
+ .signalSemaphoreCount = 1,
+ .pSignalSemaphores = &queue_context->semaphore,
+ });
- const auto next_command_buffers = [&]() -> auto {
- auto next_command_buffers = std::vector<VkCommandBuffer>{head_cb};
- std::ranges::copy_n(submit_info[0].pCommandBuffers,
- submit_info[0].commandBufferCount,
- std::back_inserter(next_command_buffers));
- return next_command_buffers;
- }();
+ queue_context->notify_submit(submit_info, next_signal, head_handle,
+ tail_handle);
- std::ranges::copy_n(submit_info, submit_count,
- std::back_inserter(next_submit_infos));
- next_submit_infos[0].pCommandBuffers = std::data(next_command_buffers);
- next_submit_infos[0].commandBufferCount = std::size(next_command_buffers);
-
- const auto next_signal = 1 + queue_context->semaphore_sequence++;
- const auto tail_tssi = VkTimelineSemaphoreSubmitInfo{
- .sType = VK_STRUCTURE_TYPE_TIMELINE_SEMAPHORE_SUBMIT_INFO_KHR,
- .signalSemaphoreValueCount = 1,
- .pSignalSemaphoreValues = &next_signal,
- };
- next_submit_infos.push_back(VkSubmitInfo{
- .sType = VK_STRUCTURE_TYPE_SUBMIT_INFO,
- .pNext = &tail_tssi,
- .commandBufferCount = 1,
- .pCommandBuffers = &tail_cb,
- .signalSemaphoreCount = 1,
- .pSignalSemaphores = &queue_context->semaphore,
- });
+ handles.push_back(head_handle);
+ handles.push_back(tail_handle);
+ }
- if (const auto res =
- vtable.QueueSubmit(queue, std::size(next_submit_infos),
- std::data(next_submit_infos), fence);
+ if (const auto res = vtable.QueueSubmit(queue, std::size(next_submits),
+ std::data(next_submits), fence);
res != VK_SUCCESS) {
return res;
}
- queue_context->notify_submit(std::span{submit_info, submit_count},
- next_signal, std::move(timestamp_handle));
+ // ?!?
return VK_SUCCESS;
}
@@ -509,10 +534,12 @@ vkQueueSubmit2(VkQueue queue, std::uint32_t submit_count,
const auto queue_context = layer_context.get_context(queue);
const auto& vtable = queue_context->device_context.vtable;
- if (!submit_count) {
+ // TODO
+ if (!submit_count || true) {
return vtable.QueueSubmit2(queue, submit_count, submit_infos, fence);
}
+ /*
auto timestamp_handle = queue_context->timestamp_pool->acquire();
timestamp_handle->setup_command_buffers(vtable);
const auto& [head_cb, tail_cb] = timestamp_handle->command_buffers;
@@ -568,6 +595,7 @@ vkQueueSubmit2(VkQueue queue, std::uint32_t submit_count,
std::move(timestamp_handle));
return VK_SUCCESS;
+ */
}
static VKAPI_ATTR VkResult VKAPI_CALL
diff --git a/src/queue_context.cc b/src/queue_context.cc
index 9b46773..99cf51e 100644
--- a/src/queue_context.cc
+++ b/src/queue_context.cc
@@ -67,25 +67,20 @@ QueueContext::~QueueContext() {
}
void QueueContext::notify_submit(
- std::span<const VkSubmitInfo> infos,
- const std::uint64_t target_semaphore_sequence,
- std::shared_ptr<TimestampPool::Handle>&& handle) {
-
- // This has an issue where we're collecting all signals and waits and
- // treating a single submit call as finishing
+ const VkSubmitInfo& info, const std::uint64_t& target_semaphore_sequence,
+ const std::shared_ptr<TimestampPool::Handle> head_handle,
+ const std::shared_ptr<TimestampPool::Handle> tail_handle) {
auto signals = std::unordered_set<VkSemaphore>{};
auto waits = std::unordered_set<VkSemaphore>{};
- for (const auto& info : infos) {
- std::ranges::copy_n(info.pWaitSemaphores, info.waitSemaphoreCount,
- std::inserter(waits, std::end(waits)));
- std::ranges::copy_n(info.pSignalSemaphores, info.signalSemaphoreCount,
- std::inserter(signals, std::end(signals)));
- }
+ std::ranges::copy_n(info.pWaitSemaphores, info.waitSemaphoreCount,
+ std::inserter(waits, std::end(waits)));
+ std::ranges::copy_n(info.pSignalSemaphores, info.signalSemaphoreCount,
+ std::inserter(signals, std::end(signals)));
this->submissions.emplace_back(std::make_unique<Submission>(
std::move(signals), std::move(waits), target_semaphore_sequence,
- std::move(handle)));
+ head_handle, tail_handle));
// TODO HACK
if (std::size(this->submissions) > 100) {
@@ -93,6 +88,7 @@ void QueueContext::notify_submit(
}
}
+/*
void QueueContext::notify_submit(
std::span<const VkSubmitInfo2> infos,
const std::uint64_t target_semaphore_sequence,
@@ -100,6 +96,7 @@ void QueueContext::notify_submit(
auto signals = std::unordered_set<VkSemaphore>{};
auto waits = std::unordered_set<VkSemaphore>{};
+
for (const auto& info : infos) {
constexpr auto get_semaphore = [](const auto& semaphore_info) {
return semaphore_info.semaphore;
@@ -124,21 +121,18 @@ void QueueContext::notify_submit(
this->submissions.pop_front();
}
}
+*/
void QueueContext::notify_present(const VkPresentInfoKHR& info) {
- auto frame = [&]() -> std::unique_ptr<Frame> {
- const auto waits = [&]() {
- auto waits = std::unordered_set<VkSemaphore>{};
- std::ranges::copy_n(info.pWaitSemaphores, info.waitSemaphoreCount,
- std::inserter(waits, std::end(waits)));
- return waits;
- }();
-
- const auto wait_semaphores = std::unordered_set<VkSemaphore>{
- info.pWaitSemaphores,
- std::next(info.pWaitSemaphores, info.waitSemaphoreCount)};
+ const auto waits = [&]() {
+ auto waits = std::unordered_set<VkSemaphore>{};
+ std::ranges::copy_n(info.pWaitSemaphores, info.waitSemaphoreCount,
+ std::inserter(waits, std::end(waits)));
+ return waits;
+ }();
+ const auto collected_semaphores = [&info, this]() {
auto collected_semaphores = std::unordered_set<VkSemaphore>{};
for (auto i = std::uint32_t{0}; i < info.swapchainCount; ++i) {
const auto& swapchain = info.pSwapchains[i];
@@ -153,112 +147,147 @@ void QueueContext::notify_present(const VkPresentInfoKHR& info) {
const auto index_it = swapchain_it->second.find(index);
assert(index_it != std::end(swapchain_it->second));
- const auto semaphore = index_it->second;
+ const auto& semaphore = index_it->second;
collected_semaphores.emplace(index_it->second);
}
+ return collected_semaphores;
+ }();
- const auto start_submission_it = std::ranges::find_if(
- std::rbegin(this->submissions), std::rend(this->submissions),
- [&](const auto& submission) {
- return std::ranges::any_of(
- submission->waits, [&](const auto& wait) {
- return collected_semaphores.contains(wait);
- });
- });
-
- if (start_submission_it == std::rend(this->submissions)) {
- std::cout << "couldn't find starting submission!\n";
- return nullptr;
- }
- const auto& start_submission = *start_submission_it;
-
- const auto end_submission_it = std::ranges::find_if(
- std::rbegin(this->submissions), std::rend(this->submissions),
- [&](const auto& submission) {
- return std::ranges::any_of(
- submission->signals, [&](const auto& signal) {
- return wait_semaphores.contains(signal);
- });
- });
-
- if (end_submission_it == std::rend(this->submissions)) {
- std::cout << "couldn't find ending submission!\n";
- return nullptr;
- }
- const auto& end_submission = *end_submission_it;
-
- return std::make_unique<Frame>(Frame{
- .start_context = *this,
- .start = start_submission->timestamp_handle,
- .target_start_sequence =
- start_submission->target_semaphore_sequence,
- .end_context = *this,
- .end = start_submission->timestamp_handle,
- .target_end_sequence = start_submission->target_semaphore_sequence,
+ const auto start_iter = std::ranges::find_if(
+ std::rbegin(this->submissions), std::rend(this->submissions),
+ [&](const auto& submission) {
+ return std::ranges::any_of(
+ submission->waits, [&](const auto& wait) {
+ return collected_semaphores.contains(wait);
+ });
});
- }();
- this->in_flight_frames.emplace_back(std::move(frame));
-
+ if (start_iter == std::rend(this->submissions)) {
+ std::cout << "couldn't find starting submission!\n";
+ return;
+ }
+ const auto& start = *start_iter;
+
+ const auto end_iter = std::ranges::find_if(
+ std::rbegin(this->submissions), std::rend(this->submissions),
+ [&](const auto& submission) {
+ return std::ranges::any_of(
+ submission->signals,
+ [&](const auto& signal) { return waits.contains(signal); });
+ });
+
+ if (end_iter == std::rend(this->submissions)) {
+ std::cout << "couldn't find ending submission!\n";
+ return;
+ }
+ const auto& end = *end_iter;
+
+ auto frame = Frame{.start =
+ Frame::Timepoint{
+ .context = *this,
+ .handle = start->start_handle,
+ .sequence = start->sequence,
+ },
+ .end = Frame::Timepoint{
+ .context = *this,
+ .handle = end->end_handle,
+ .sequence = end->sequence,
+ }};
+ this->in_flight_frames.emplace_back(
+ std::make_unique<Frame>(std::move(frame)));
+
// hack
if (this->in_flight_frames.size() > 5) {
this->in_flight_frames.pop_front();
}
}
-// now it's all coming together
std::optional<QueueContext::duration_t> QueueContext::get_delay_time() {
if (!std::size(this->in_flight_frames)) {
return std::nullopt;
}
- auto seq = std::uint64_t{};
- this->device_context.vtable.GetSemaphoreCounterValueKHR(
- this->device_context.device, this->semaphore, &seq);
-
- // Get semaphore first, then poll!
- this->timestamp_pool->poll();
+ // We are about to query the wait semaphores of all of our current
+ // frames in flight. They may come from the same device, so we're going
+ // to build a mapping here to reduce vulkan calls. Not only that,
+ // we have to do this or else our timing information becomes broken
+ // as this loop iterates.
+ const auto target_devices = [this]() -> auto {
+ using context_ref_t = std::reference_wrapper<DeviceContext>;
+ auto target_devices = std::unordered_map<VkDevice, context_ref_t>{};
+ for (const auto& frame : this->in_flight_frames) {
+ auto& start = frame->start.context.device_context;
+ auto& end = frame->end.context.device_context;
+
+ target_devices.try_emplace(start.device, std::ref(start));
+ target_devices.try_emplace(end.device, std::ref(end));
+ }
+ return target_devices;
+ }();
- // idk how frequently we should call this.
- this->device_context.calibrate_timestamps();
+ // Calibrate timestamps before we acquire semaphores.
+ for (const auto& pair : target_devices) {
+ auto& device = pair.second;
+ device_context.clock.calibrate();
+ }
- static auto gpu_frametimes = std::deque<uint64_t>{};
- static auto cpu_frametimes = std::deque<uint64_t>{};
+ // Now we have all owned devices and their clocks are in a good state.
+ // We need to build another mapping of semaphores to their queries now.
+ const auto queue_sequences = [this]() -> auto {
+ auto queue_sequences = std::unordered_map<VkQueue, std::uint64_t>{};
+ for (const auto& frame : this->in_flight_frames) {
+ auto& start = frame->start.context;
+ auto& end = frame->end.context;
+
+ for (const auto& queue_ptr : {&start, &end}) {
+ if (queue_sequences.contains(queue_ptr->queue)) {
+ continue;
+ }
+
+ const auto& vtable = queue_ptr->device_context.vtable;
+ auto seq = std::uint64_t{};
+ vtable.GetSemaphoreCounterValueKHR(this->device_context.device,
+ this->semaphore, &seq);
+ queue_sequences.emplace(queue_ptr->queue, seq);
+ }
+ }
+ return queue_sequences;
+ }();
+ // Now all devices we are about to query are primed to query.
+ // We have all sequence numbers from all queus we could possibly query.
const auto S = std::size(this->in_flight_frames);
+ for (auto i = std::size_t{0}; i < S; ++i) {
+ assert(this->in_flight_frames[i]);
+ const auto& frame = *this->in_flight_frames[i];
+ const auto& start = frame.start;
+ const auto& end = frame.end;
- std::cout << "\nSTART FRAME READOUT\n";
- std::cout << "error bound: " << this->device_context.clock.error_bound
- << '\n';
- std::cout << "num frames in flight: " << S << '\n';
- std::cout << "from oldest -> newest\n";
-
- // const auto b_seq = semaphore_from_context(*this);
- const auto now = std::chrono::steady_clock::now();
-
- auto i = std::size_t{0};
- for (; i < std::size(this->in_flight_frames); ++i) {
- const auto& frame = this->in_flight_frames[i];
std::cout << " Evaluating the frame that's " << S - i - 1
<< " behind\n";
- if (!frame) {
- std::cout << " nullptr!\n";
+
+ std::cout << " target start seq: " << start.sequence << '\n';
+ std::cout << " target end seq: " << end.sequence << '\n';
+
+ const auto start_seq_it = queue_sequences.find(start.context.queue);
+ assert(start_seq_it != std::end(queue_sequences));
+ const auto& start_seq = start_seq_it->second;
+ if (start_seq < start.sequence) {
+ std::cout << " frame hasn't started yet !\n ";
continue;
}
- std::cout << " target start: " << frame->target_start_sequence << '\n';
- std::cout << " target end: " << frame->target_end_sequence << '\n';
- if (seq < frame->target_start_sequence) {
- std::cout << " frame hasn't started yet!\n";
- continue;
+ /*
+ const auto start_ticks_opt =
+ start.handle->get_ticks(*start.context.timestamp_pool);
+ if (!start_ticks_opt.has_value()) {
+ std::cout << " frame hasn't started yet !\n ";
}
- const auto start_ticks =
- frame->start_context.timestamp_pool->get_polled(*frame->start);
std::cout << " START TICKS: " << start_ticks << '\n';
- const auto& a_clock = frame->start_context.device_context.clock;
- const auto a = a_clock.ticks_to_time(start_ticks);
-
+ const auto start_time =
+ start.context.device_context.clock.ticks_to_time(start_ticks);
+
{
using namespace std::chrono;
const auto diff = now - a;
@@ -269,85 +298,161 @@ std::optional<QueueContext::duration_t> QueueContext::get_delay_time() {
<< " us " << ns << " ns ago\n";
}
- if (seq < frame->target_end_sequence) {
- std::cout << " frame hasn't ended yet!\n";
+ const auto end_seq_it = queue_sequences.find(end.context.queue);
+ assert(end_seq_it != std::end(queue_sequences));
+ const auto& end_seq = end_seq_it->second;
+ if (start_seq < end.sequence) {
+ std::cout << " frame hasn't started yet !\n ";
continue;
}
+ */
+ }
+ return std::nullopt;
+ //
+}
- const auto end_ticks =
- frame->end_context.timestamp_pool->get_polled(*frame->end, true);
- const auto& b_clock = frame->end_context.device_context.clock;
- std::cout << " END_TICKS: " << end_ticks << '\n';
- const auto b = b_clock.ticks_to_time(end_ticks);
- {
- using namespace std::chrono;
- if (now <= b) {
- std::cout << "b happened before now?\n";
- }
- const auto diff = now - b;
- const auto ms = duration_cast<milliseconds>(diff);
- const auto us = duration_cast<microseconds>(diff - ms);
- const auto ns = duration_cast<nanoseconds>(diff - ms - us);
- std::cout << " frame ended: " << ms << " ms " << us
- << " us " << ns << " ns ago\n";
- }
+// now it's all coming together
+// std::optional<QueueContext::duration_t> QueueContext::get_delay_time() {
+/*
+if (!std::size(this->in_flight_frames)) {
+ return std::nullopt;
+}
- const auto gpu_time = b - a;
- {
- using namespace std::chrono;
- const auto diff = gpu_time;
- const auto ms = duration_cast<milliseconds>(diff);
- const auto us = duration_cast<microseconds>(diff - ms);
- const auto ns = duration_cast<nanoseconds>(diff - ms - us);
- std::cout << " gpu_time: " << ms << " ms " << us
- << " us " << ns << " ns ago\n";
- }
+auto seq = std::uint64_t{};
+this->device_context.vtable.GetSemaphoreCounterValueKHR(
+ this->device_context.device, this->semaphore, &seq);
- /*
- cpu_frametimes.emplace_back(cpu_time);
- gpu_frametimes.emplace_back(gpu_time);
- */
- }
+// Get semaphore first, then poll!
+this->timestamp_pool->poll();
- /*
- if (remove_index.has_value()) {
- this->in_flight_frames.erase(std::begin(this->in_flight_frames),
- std::begin(this->in_flight_frames) +
- *remove_index);
+// idk how frequently we should call this.
+this->device_context.calibrate_timestamps();
+
+static auto gpu_frametimes = std::deque<uint64_t>{};
+static auto cpu_frametimes = std::deque<uint64_t>{};
+
+const auto S = std::size(this->in_flight_frames);
+
+std::cout << "\nSTART FRAME READOUT\n";
+std::cout << "error bound: " << this->device_context.clock.error_bound
+ << '\n';
+std::cout << "num frames in flight: " << S << '\n';
+std::cout << "from oldest -> newest\n";
+
+// const auto b_seq = semaphore_from_context(*this);
+const auto now = std::chrono::steady_clock::now();
+
+auto i = std::size_t{0};
+for (; i < std::size(this->in_flight_frames); ++i) {
+ const auto& frame = this->in_flight_frames[i];
+ std::cout << " Evaluating the frame that's " << S - i - 1
+ << " behind\n";
+ if (!frame) {
+ std::cout << " nullptr!\n";
+ continue;
}
- */
- /*
- auto g_copy = gpu_frametimes;
- auto c_copy = cpu_frametimes;
- std::ranges::sort(g_copy);
- std::ranges::sort(c_copy);
+ std::cout << " target start: " << frame->target_start_sequence <<
+'\n'; std::cout << " target end: " << frame->target_end_sequence << '\n'; if
+(seq < frame->target_start_sequence) { std::cout << " frame hasn't
+started yet!\n"; continue;
+ }
- constexpr auto N = 49;
- if (std::size(cpu_frametimes) < N) {
- return std::nullopt;
+ const auto start_ticks =
+ frame->start_context.timestamp_pool->get_polled(*frame->start);
+ std::cout << " START TICKS: " << start_ticks << '\n';
+ const auto& a_clock = frame->start_context.device_context.clock;
+ const auto a = a_clock.ticks_to_time(start_ticks);
+
+ {
+ using namespace std::chrono;
+ const auto diff = now - a;
+ const auto ms = duration_cast<milliseconds>(diff);
+ const auto us = duration_cast<microseconds>(diff - ms);
+ const auto ns = duration_cast<nanoseconds>(diff - ms - us);
+ std::cout << " frame started: " << ms << " ms " << us
+ << " us " << ns << " ns ago\n";
}
- const auto F = std::size(g_copy);
- // close enough to median lol
- const auto g = g_copy[F / 2];
- const auto c = c_copy[F / 2];
+ if (seq < frame->target_end_sequence) {
+ std::cout << " frame hasn't ended yet!\n";
+ continue;
+ }
- std::cout << g << '\n';
- std::cout << " median gpu: " << (g / 1'000'000) << " ms " << g / 1'000
- << " us " << g << " ns\n";
- std::cout << " median cpu: " << c / 1'000'000 << " ms " << c / 1'000
- << " us " << c << " ns\n";
+ const auto end_ticks =
+ frame->end_context.timestamp_pool->get_polled(*frame->end, true);
+ const auto& b_clock = frame->end_context.device_context.clock;
+ std::cout << " END_TICKS: " << end_ticks << '\n';
+ const auto b = b_clock.ticks_to_time(end_ticks);
+ {
+ using namespace std::chrono;
+ if (now <= b) {
+ std::cout << "b happened before now?\n";
+ }
+ const auto diff = now - b;
+ const auto ms = duration_cast<milliseconds>(diff);
+ const auto us = duration_cast<microseconds>(diff - ms);
+ const auto ns = duration_cast<nanoseconds>(diff - ms - us);
+ std::cout << " frame ended: " << ms << " ms " << us
+ << " us " << ns << " ns ago\n";
+ }
- if (F > N) {
- gpu_frametimes.pop_front();
- cpu_frametimes.pop_front();
+ const auto gpu_time = b - a;
+ {
+ using namespace std::chrono;
+ const auto diff = gpu_time;
+ const auto ms = duration_cast<milliseconds>(diff);
+ const auto us = duration_cast<microseconds>(diff - ms);
+ const auto ns = duration_cast<nanoseconds>(diff - ms - us);
+ std::cout << " gpu_time: " << ms << " ms " << us
+ << " us " << ns << " ns ago\n";
}
- */
+ /*
+ cpu_frametimes.emplace_back(cpu_time);
+ gpu_frametimes.emplace_back(gpu_time);
+}
+
+/*
+if (remove_index.has_value()) {
+ this->in_flight_frames.erase(std::begin(this->in_flight_frames),
+ std::begin(this->in_flight_frames) +
+ *remove_index);
+}
+*/
+
+/*
+auto g_copy = gpu_frametimes;
+auto c_copy = cpu_frametimes;
+std::ranges::sort(g_copy);
+std::ranges::sort(c_copy);
+
+constexpr auto N = 49;
+if (std::size(cpu_frametimes) < N) {
return std::nullopt;
}
+const auto F = std::size(g_copy);
+// close enough to median lol
+const auto g = g_copy[F / 2];
+const auto c = c_copy[F / 2];
+
+std::cout << g << '\n';
+
+std::cout << " median gpu: " << (g / 1'000'000) << " ms " << g / 1'000
+ << " us " << g << " ns\n";
+std::cout << " median cpu: " << c / 1'000'000 << " ms " << c / 1'000
+ << " us " << c << " ns\n";
+
+if (F > N) {
+ gpu_frametimes.pop_front();
+ cpu_frametimes.pop_front();
+}
+
+return std::nullopt;
+}
+*/
+
} // namespace low_latency \ No newline at end of file
diff --git a/src/queue_context.hh b/src/queue_context.hh
index a6f43e5..3df6af4 100644
--- a/src/queue_context.hh
+++ b/src/queue_context.hh
@@ -10,7 +10,6 @@
#include <chrono>
#include <deque>
#include <memory>
-#include <span>
#include <unordered_set>
namespace low_latency {
@@ -35,24 +34,26 @@ class QueueContext final : public Context {
struct Submission {
const std::unordered_set<VkSemaphore> signals;
const std::unordered_set<VkSemaphore> waits;
- const std::uint64_t target_semaphore_sequence;
- const std::shared_ptr<TimestampPool::Handle> timestamp_handle;
+ const std::uint64_t sequence;
+
+ const std::shared_ptr<TimestampPool::Handle> start_handle;
+ const std::shared_ptr<TimestampPool::Handle> end_handle;
};
std::deque<std::shared_ptr<Submission>> submissions;
// In flight frames!
// These might come from different contexts.
struct Frame {
- const QueueContext& start_context;
- const std::shared_ptr<TimestampPool::Handle> start;
- const std::uint64_t target_start_sequence;
- const QueueContext& end_context;
- const std::shared_ptr<TimestampPool::Handle> end;
- const std::uint64_t target_end_sequence;
+ struct Timepoint {
+ const QueueContext& context;
+ const std::shared_ptr<TimestampPool::Handle> handle;
+ const std::uint64_t sequence;
+ };
+
+ const Timepoint start;
+ const Timepoint end;
};
- // These can be null, it means we made presented without finding the
- // timestamps associated with the present.
std::deque<std::unique_ptr<Frame>> in_flight_frames;
public:
@@ -61,12 +62,12 @@ class QueueContext final : public Context {
virtual ~QueueContext();
public:
- void notify_submit(std::span<const VkSubmitInfo> infos,
- const std::uint64_t target_semaphore_sequence,
- std::shared_ptr<TimestampPool::Handle>&& handle);
- void notify_submit(std::span<const VkSubmitInfo2> infos,
- const std::uint64_t target_semaphore_sequence,
- std::shared_ptr<TimestampPool::Handle>&& handle);
+ void
+ notify_submit(const VkSubmitInfo& info, const std::uint64_t& sequence,
+ const std::shared_ptr<TimestampPool::Handle> head_handle,
+ const std::shared_ptr<TimestampPool::Handle> tail_handle);
+
+ // TODO submit2
void notify_present(const VkPresentInfoKHR& info);
diff --git a/src/timestamp_pool.cc b/src/timestamp_pool.cc
index b4dc3c9..cf48873 100644
--- a/src/timestamp_pool.cc
+++ b/src/timestamp_pool.cc
@@ -3,142 +3,152 @@
#include "queue_context.hh"
#include <ranges>
+#include <vulkan/utility/vk_dispatch_table.h>
#include <vulkan/vulkan_core.h>
namespace low_latency {
-TimestampPool::Block TimestampPool::allocate() {
- const auto& device_context = this->queue_context.device_context;
+TimestampPool::QueryChunk::QueryChunk(const QueueContext& queue_context) {
+ const auto& device_context = queue_context.device_context;
+ const auto& vtable = device_context.vtable;
- const auto query_pool = [&]() -> VkQueryPool {
+ this->query_pool = [&]() {
const auto qpci = VkQueryPoolCreateInfo{
.sType = VK_STRUCTURE_TYPE_QUERY_POOL_CREATE_INFO,
.queryType = VK_QUERY_TYPE_TIMESTAMP,
- .queryCount = this->TIMESTAMP_QUERY_POOL_SIZE};
-
- auto query_pool = VkQueryPool{};
+ .queryCount = QueryChunk::CHUNK_SIZE};
- device_context.vtable.CreateQueryPool(device_context.device, &qpci,
- nullptr, &query_pool);
- return query_pool;
+ auto qp = VkQueryPool{};
+ vtable.CreateQueryPool(device_context.device, &qpci, nullptr, &qp);
+ return qp;
}();
- const auto key_range =
- std::views::iota(0u, this->TIMESTAMP_QUERY_POOL_SIZE / 2) |
- std::views::transform([](const std::uint64_t& i) { return 2 * i; });
-
- auto available_indices = std::make_unique<available_query_indicies_t>(
- available_query_indicies_t{std::begin(key_range), std::end(key_range)});
-
- auto command_buffers = [&, this]() -> auto {
- auto command_buffers =
- std::vector<VkCommandBuffer>(this->TIMESTAMP_QUERY_POOL_SIZE);
+ constexpr auto key_range = std::views::iota(0u, QueryChunk::CHUNK_SIZE);
+ this->free_indices = std::make_unique<free_indices_t>(std::begin(key_range),
+ std::end(key_range));
+ this->command_buffers = [&, this]() -> auto {
+ auto cbs = std::make_unique<std::vector<VkCommandBuffer>>(CHUNK_SIZE);
const auto cbai = VkCommandBufferAllocateInfo{
.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO,
- .commandPool = this->queue_context.command_pool,
+ .commandPool = queue_context.command_pool,
.level = VK_COMMAND_BUFFER_LEVEL_PRIMARY,
- .commandBufferCount =
- static_cast<std::uint32_t>(std::size(command_buffers)),
+ .commandBufferCount = static_cast<std::uint32_t>(std::size(*cbs)),
};
- device_context.vtable.AllocateCommandBuffers(
- device_context.device, &cbai, std::data(command_buffers));
- std::ranges::for_each(command_buffers, [&](const auto& cb) {
- device_context.sdld(device_context.device, cb);
- });
- return std::make_unique<std::vector<VkCommandBuffer>>(command_buffers);
+ vtable.AllocateCommandBuffers(device_context.device, &cbai,
+ std::data(*cbs));
+ return cbs;
}();
-
- return Block{.query_pool = query_pool,
- .available_indicies = std::move(available_indices),
- .command_buffers = std::move(command_buffers)};
}
+TimestampPool::QueryChunk::~QueryChunk() {}
+
TimestampPool::TimestampPool(QueueContext& queue_context)
: queue_context(queue_context) {
- // Allocate one block on construction, it's likely more than enough!
- this->blocks.emplace_back(this->allocate());
+ // Allocate one block on construction, it's likely more than enough.
+ auto query_chunk = std::make_shared<QueryChunk>(this->queue_context);
+ this->query_chunks.emplace(std::move(query_chunk));
}
std::shared_ptr<TimestampPool::Handle> TimestampPool::acquire() {
- const auto vacant_iter = [this]() -> auto {
- const auto it =
- std::ranges::find_if(this->blocks, [](const auto& block) {
- return std::size(*block.available_indicies);
+
+ // Gets the empty one, or inserts a new one and returns it.
+ const auto not_empty_iter = [this]() -> auto {
+ const auto not_empty_iter =
+ std::ranges::find_if(this->query_chunks, [](const auto& qc) {
+ assert(qc);
+ return std::size(*qc->free_indices);
});
- if (it != std::end(this->blocks)) {
- return it;
+ if (not_empty_iter != std::end(this->query_chunks)) {
+ return not_empty_iter;
}
- this->blocks.emplace_back(this->allocate());
- return std::prev(std::end(this->blocks));
- }();
-
- const auto query_pool = vacant_iter->query_pool;
- auto& available_indices = *vacant_iter->available_indicies;
- // Grab any element from our set and erase it immediately after.
- const auto query_index = *std::begin(available_indices);
- available_indices.erase(std::begin(available_indices));
-
- const auto command_buffers = [&]() -> auto {
- auto command_buffers = std::array<VkCommandBuffer, 2>{};
- std::ranges::copy_n(
- std::next(std::begin(*vacant_iter->command_buffers), query_index),
- std::size(command_buffers), std::begin(command_buffers));
- return command_buffers;
+ const auto insert = std::make_shared<QueryChunk>(this->queue_context);
+ const auto [iter, did_insert] = this->query_chunks.emplace(insert);
+ assert(did_insert);
+ return iter;
}();
- const auto block_index = static_cast<std::size_t>(
- std::distance(std::begin(this->blocks), vacant_iter));
+ // Grab any element from our set and erase it immediately after.
+ auto& indices = *(*not_empty_iter)->free_indices;
+ const auto query_index = *std::begin(indices);
+ assert(indices.erase(query_index));
- return std::make_shared<Handle>(available_indices, block_index, query_pool,
- query_index, command_buffers);
+ return std::make_shared<Handle>(*not_empty_iter, query_index);
}
-TimestampPool::Handle::Handle(
- TimestampPool::available_query_indicies_t& index_origin,
- const std::size_t block_index, const VkQueryPool& query_pool,
- const std::uint64_t query_index,
- const std::array<VkCommandBuffer, 2>& command_buffers)
- : index_origin(index_origin), block_index(block_index),
- query_pool(query_pool), query_index(query_index),
- command_buffers(command_buffers) {}
+TimestampPool::Handle::Handle(const std::shared_ptr<QueryChunk>& origin_chunk,
+ const std::uint64_t& query_index)
+ : query_pool(origin_chunk->query_pool), query_index(query_index),
+ origin_chunk(origin_chunk),
+ command_buffer((*origin_chunk->command_buffers)[query_index]) {}
TimestampPool::Handle::~Handle() {
- assert(this->index_origin.insert(this->query_index).second);
+ // Parent destructing shouldn't mean we should have a bunch of insertions
+ // for zero reason.
+ if (const auto ptr = this->origin_chunk.lock(); ptr) {
+ assert(ptr->free_indices->insert(this->query_index).second);
+ }
}
void TimestampPool::Handle::setup_command_buffers(
- const VkuDeviceDispatchTable& vtable) const {
-
- const auto& [head, tail] = this->command_buffers;
+ const Handle& tail, const QueueContext& queue_context) const {
const auto cbbi = VkCommandBufferBeginInfo{
.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO,
};
- // Heads
- vtable.ResetCommandBuffer(head, 0);
- vtable.BeginCommandBuffer(head, &cbbi);
- // Reset the next two and make them unavailable when they are run!
- vtable.CmdResetQueryPool(head, this->query_pool, this->query_index, 2);
- vtable.CmdWriteTimestamp2KHR(head, VK_PIPELINE_STAGE_2_TOP_OF_PIPE_BIT,
+
+ const auto& device_context = queue_context.device_context;
+ const auto& vtable = device_context.vtable;
+
+ vtable.ResetQueryPoolEXT(device_context.device, this->query_pool,
+ this->query_index, 1);
+
+ vtable.BeginCommandBuffer(this->command_buffer, &cbbi);
+ vtable.CmdWriteTimestamp2KHR(this->command_buffer,
+ VK_PIPELINE_STAGE_2_TOP_OF_PIPE_BIT,
this->query_pool, this->query_index);
- vtable.EndCommandBuffer(head);
-
- // Tails
- vtable.ResetCommandBuffer(tail, 0);
- vtable.BeginCommandBuffer(tail, &cbbi);
- vtable.CmdWriteTimestamp2KHR(tail, VK_PIPELINE_STAGE_2_BOTTOM_OF_PIPE_BIT,
- this->query_pool, this->query_index + 1);
- vtable.EndCommandBuffer(tail);
+ vtable.EndCommandBuffer(this->command_buffer);
+
+ vtable.ResetQueryPoolEXT(device_context.device, tail.query_pool,
+ tail.query_index, 1);
+ vtable.ResetCommandBuffer(tail.command_buffer, 0);
+ vtable.BeginCommandBuffer(tail.command_buffer, &cbbi);
+ vtable.CmdWriteTimestamp2KHR(tail.command_buffer,
+ VK_PIPELINE_STAGE_2_BOTTOM_OF_PIPE_BIT,
+ tail.query_pool, tail.query_index);
+ vtable.EndCommandBuffer(tail.command_buffer);
}
-void TimestampPool::poll() {
- this->cached_timestamps.clear();
- this->cached_timestamps.reserve(std::size(this->blocks));
+std::optional<std::uint64_t>
+TimestampPool::Handle::get_ticks(const TimestampPool& pool) {
+
+ const auto& device_context = pool.queue_context.device_context;
+ const auto& vtable = device_context.vtable;
+
+ struct QueryResult {
+ std::uint64_t value;
+ std::uint64_t available;
+ };
+ auto query_result = QueryResult{};
+
+ const auto r = vtable.GetQueryPoolResults(
+ device_context.device, query_pool, this->query_index, 1,
+ sizeof(query_result), &query_result, sizeof(query_result),
+ VK_QUERY_RESULT_64_BIT | VK_QUERY_RESULT_WITH_AVAILABILITY_BIT);
+
+ assert(r == VK_SUCCESS || r == VK_NOT_READY);
+
+ if (!query_result.available) {
+ return std::nullopt;
+ }
+ return query_result.value;
+}
+/*
+void TimestampPool::poll() {
const auto& device_context = this->queue_context.device_context;
std::ranges::transform(
@@ -163,26 +173,16 @@ void TimestampPool::poll() {
return timestamps;
});
};
-
-std::uint64_t TimestampPool::get_polled(const Handle& handle, const bool hack) {
-
- assert(handle.block_index < std::size(this->cached_timestamps));
-
- const auto& cached_timestamp = this->cached_timestamps[handle.block_index];
- assert(cached_timestamp != nullptr);
- assert(handle.query_index < std::size(*cached_timestamp));
-
- return (*cached_timestamp)[handle.query_index + hack];
-}
+*/
TimestampPool::~TimestampPool() {
const auto& device = this->queue_context.device_context.device;
const auto& vtable = this->queue_context.device_context.vtable;
- for (const auto& block : this->blocks) {
+ for (const auto& query_chunk : this->query_chunks) {
vtable.FreeCommandBuffers(device, this->queue_context.command_pool,
- std::size(*block.command_buffers),
- std::data(*block.command_buffers));
- vtable.DestroyQueryPool(device, block.query_pool, nullptr);
+ std::size(*query_chunk->command_buffers),
+ std::data(*query_chunk->command_buffers));
+ vtable.DestroyQueryPool(device, query_chunk->query_pool, nullptr);
}
}
diff --git a/src/timestamp_pool.hh b/src/timestamp_pool.hh
index a4aa429..f69b06f 100644
--- a/src/timestamp_pool.hh
+++ b/src/timestamp_pool.hh
@@ -40,6 +40,7 @@
#include <memory>
#include <unordered_set>
+#include <vector>
namespace low_latency {
@@ -47,58 +48,62 @@ class QueueContext;
class TimestampPool final {
private:
- static constexpr auto TIMESTAMP_QUERY_POOL_SIZE = 512u;
- static_assert(TIMESTAMP_QUERY_POOL_SIZE % 2 == 0);
-
- private:
QueueContext& queue_context;
- // VkQueryPool with an unordered set of keys available for reading.
- using available_query_indicies_t = std::unordered_set<std::uint64_t>;
+ // A chunk of data which is useful for making timestamp queries.
+ // Allows association of an index to a query pool and command buffer.
+ // We reuse these when they're released.
+ struct QueryChunk final {
+ private:
+ using free_indices_t = std::unordered_set<std::uint64_t>;
+ static constexpr auto CHUNK_SIZE = 512u;
- struct Block {
+ public:
VkQueryPool query_pool;
- std::unique_ptr<available_query_indicies_t> available_indicies;
+ std::unique_ptr<free_indices_t> free_indices;
std::unique_ptr<std::vector<VkCommandBuffer>> command_buffers;
- };
- std::vector<Block> blocks; // multiple blocks
- // A snapshot of all available blocks for reading after each poll.
- std::vector<std::unique_ptr<std::vector<std::uint64_t>>> cached_timestamps;
+ public:
+ QueryChunk(const QueueContext& queue_context);
+ QueryChunk(const QueryChunk& handle) = delete;
+ QueryChunk(QueryChunk&&) = delete;
+ QueryChunk operator=(const QueryChunk& handle) = delete;
+ QueryChunk operator=(QueryChunk&&) = delete;
+ ~QueryChunk();
+ };
+ std::unordered_set<std::shared_ptr<QueryChunk>> query_chunks;
public:
- // A handle represents two std::uint64_t blocks oftimestamp memory and two
- // command buffers.
+ // A handle represents a VkCommandBuffer and a query index.
+ // Once the Handle goes out of scope, the query index will be returned
+ // to the parent pool.
struct Handle final {
private:
friend class TimestampPool;
private:
- available_query_indicies_t& index_origin;
- const std::size_t block_index;
+ const std::weak_ptr<QueryChunk> origin_chunk;
public:
const VkQueryPool query_pool;
const std::uint64_t query_index;
- const std::array<VkCommandBuffer, 2> command_buffers;
+ const VkCommandBuffer command_buffer;
public:
- Handle(TimestampPool::available_query_indicies_t& index_origin,
- const std::size_t block_index, const VkQueryPool& query_pool,
- const std::uint64_t query_index,
- const std::array<VkCommandBuffer, 2>& command_buffers);
+ Handle(const std::shared_ptr<QueryChunk>& origin_chunk,
+ const std::uint64_t& query_index);
Handle(const Handle& handle) = delete;
Handle(Handle&&) = delete;
Handle operator=(const Handle& handle) = delete;
Handle operator=(Handle&&) = delete;
- ~Handle(); // frees from the pool
+ ~Handle();
public:
- void setup_command_buffers(const VkuDeviceDispatchTable& vtable) const;
- };
+ void setup_command_buffers(const Handle& tail,
+ const QueueContext& queue_context) const;
- private:
- Block allocate();
+ std::optional<std::uint64_t> get_ticks(const TimestampPool& pool);
+ };
public:
TimestampPool(QueueContext& queue_context);
@@ -109,12 +114,8 @@ class TimestampPool final {
~TimestampPool();
public:
- // Hands out a Handle with a pool and index of two uint64_t's.
+ // Hands out a Handle!
std::shared_ptr<Handle> acquire();
-
- void poll(); // saves the current state for future get's.
-
- std::uint64_t get_polled(const Handle& handle, const bool hack = false);
};
} // namespace low_latency