From 8f4501215c0dbbbde59da2d015fdec3dbe5131bc Mon Sep 17 00:00:00 2001 From: Nicolas James Date: Fri, 13 Feb 2026 18:21:04 +1100 Subject: add working frame tracking and commit before i break everything --- src/device_context.cc | 61 ++++++++- src/device_context.hh | 39 +++++- src/layer.cc | 58 ++++---- src/physical_device_context.cc | 10 +- src/physical_device_context.hh | 2 + src/queue_context.cc | 296 ++++++++++++++++++++++++++++++++++++++++- src/queue_context.hh | 46 ++++++- src/timestamp_pool.cc | 12 +- src/timestamp_pool.hh | 6 +- 9 files changed, 481 insertions(+), 49 deletions(-) diff --git a/src/device_context.cc b/src/device_context.cc index 5f5c1f7..4b39210 100644 --- a/src/device_context.cc +++ b/src/device_context.cc @@ -6,11 +6,12 @@ namespace low_latency { DeviceContext::DeviceContext(InstanceContext& parent_instance, + PhysicalDeviceContext& parent_physical_device, const VkDevice& device, const PFN_vkSetDeviceLoaderData& sdld, VkuDeviceDispatchTable&& vtable) - : instance(parent_instance), device(device), sdld(sdld), - vtable(std::move(vtable)) {} + : instance(parent_instance), physical_device(parent_physical_device), + device(device), sdld(sdld), vtable(std::move(vtable)), clock(*this) {} DeviceContext::~DeviceContext() { // We will let the destructor handle clearing here, but they should be @@ -20,4 +21,60 @@ DeviceContext::~DeviceContext() { } } +void DeviceContext::notify_acquire(const VkSwapchainKHR& swapchain, + const std::uint32_t& image_index, + const VkSemaphore& signal_semaphore) { + + const auto it = this->swapchain_signals.try_emplace(swapchain).first; + + // Doesn't matter if it was already there, overwrite it. + it->second.insert_or_assign(image_index, signal_semaphore); +} + +DeviceContext::Clock::Clock(const DeviceContext& context) { + + const auto infos = std::vector{ + {VK_STRUCTURE_TYPE_CALIBRATED_TIMESTAMP_INFO_EXT, nullptr, + VK_TIME_DOMAIN_DEVICE_EXT}, + {VK_STRUCTURE_TYPE_CALIBRATED_TIMESTAMP_INFO_EXT, nullptr, + VK_TIME_DOMAIN_CLOCK_MONOTONIC_EXT}}; + + auto device_host = std::array{}; + + const auto steady_before = std::chrono::steady_clock::now(); + context.vtable.GetCalibratedTimestampsKHR( + context.device, 2, std::data(infos), std::data(device_host), + &this->error_bound); + const auto steady_after = std::chrono::steady_clock::now(); + + this->cpu_time = steady_before + (steady_after - steady_before) / 2; + this->device_ticks = device_host[0]; + this->host_ns = device_host[1]; + + // Might need to get physical limits again? + this->ticks_per_ns = + context.physical_device.properties->limits.timestampPeriod; +} + +DeviceContext::Clock::time_point_t +DeviceContext::Clock::ticks_to_time(const std::uint64_t& ticks) const { + /* + struct timespec tv; + clock_gettime(CLOCK_MONOTONIC, &tv); + return tv.tv_nsec + tv.tv_sec*1000000000ull; + */ + + auto a = this->device_ticks; + auto b = ticks; + + const auto was_before = a > b; + if (was_before) { // it's happened before + std::swap(a, b); + } + const auto nsec = std::chrono::nanoseconds((b - a) * this->ticks_per_ns); + return this->cpu_time + (was_before ? -nsec : nsec); +} + +void DeviceContext::calibrate_timestamps() { this->clock = Clock{*this}; } + } // namespace low_latency \ No newline at end of file diff --git a/src/device_context.hh b/src/device_context.hh index 3406da1..b55b70c 100644 --- a/src/device_context.hh +++ b/src/device_context.hh @@ -1,36 +1,69 @@ #ifndef DEVICE_CONTEXT_HH_ #define DEVICE_CONTEXT_HH_ +#include #include #include #include #include #include +#include #include "context.hh" #include "instance_context.hh" +#include "physical_device_context.hh" namespace low_latency { class QueueContext; struct DeviceContext final : public Context { + public: InstanceContext& instance; + PhysicalDeviceContext& physical_device; const VkDevice device; const VkuDeviceDispatchTable vtable; - // Do we need to use this unless we wrap dispatchable objects? const PFN_vkSetDeviceLoaderData sdld; std::unordered_map> queues; + // We map swapchains to image indexes and their last signalled semaphore. + using index_semaphores_t = std::unordered_map; + std::unordered_map swapchain_signals; + + struct Clock { + using time_point_t = std::chrono::steady_clock::time_point; + + time_point_t cpu_time; + std::uint64_t error_bound; + std::uint64_t device_ticks; + std::uint64_t host_ns; + std::uint64_t ticks_per_ns; + + public: + Clock(const DeviceContext& device); + + time_point_t ticks_to_time(const std::uint64_t& ticks) const; + }; + Clock clock; + public: - DeviceContext(InstanceContext& parent_instance, const VkDevice& device, - const PFN_vkSetDeviceLoaderData& sdld, + DeviceContext(InstanceContext& parent_instance, + PhysicalDeviceContext& parent_physical, + const VkDevice& device, const PFN_vkSetDeviceLoaderData& sdld, VkuDeviceDispatchTable&& vtable); virtual ~DeviceContext(); + + public: + void notify_acquire(const VkSwapchainKHR& swapchain, + const std::uint32_t& image_index, + const VkSemaphore& signal_semaphore); + + public: + void calibrate_timestamps(); }; }; // namespace low_latency diff --git a/src/layer.cc b/src/layer.cc index cead7cd..c521bb9 100644 --- a/src/layer.cc +++ b/src/layer.cc @@ -1,13 +1,10 @@ #include "layer.hh" -#include #include +#include #include #include -// hack -#include - #include #include #include @@ -90,6 +87,7 @@ CreateInstance(const VkInstanceCreateInfo* pCreateInfo, auto vtable = VkuInstanceDispatchTable{ INSTANCE_VTABLE_LOAD(DestroyInstance), INSTANCE_VTABLE_LOAD(EnumeratePhysicalDevices), + INSTANCE_VTABLE_LOAD(GetPhysicalDeviceProperties), INSTANCE_VTABLE_LOAD(GetInstanceProcAddr), INSTANCE_VTABLE_LOAD(CreateDevice), INSTANCE_VTABLE_LOAD(EnumerateDeviceExtensionProperties), @@ -307,16 +305,20 @@ static VKAPI_ATTR VkResult VKAPI_CALL CreateDevice( DEVICE_VTABLE_LOAD(GetSemaphoreCounterValueKHR), DEVICE_VTABLE_LOAD(CmdWriteTimestamp2KHR), DEVICE_VTABLE_LOAD(QueueSubmit2KHR), + DEVICE_VTABLE_LOAD(GetCalibratedTimestampsKHR), }; #undef DEVICE_VTABLE_LOAD + const auto physical_context = layer_context.get_context(physical_device); + const auto key = layer_context.get_key(*pDevice); const auto lock = std::scoped_lock{layer_context.mutex}; assert(!layer_context.contexts.contains(key)); layer_context.contexts.try_emplace( - key, std::make_shared(instance_context, *pDevice, sdld, - std::move(vtable))); + key, + std::make_shared(instance_context, *physical_context, + *pDevice, sdld, std::move(vtable))); return VK_SUCCESS; } @@ -415,6 +417,8 @@ static VKAPI_ATTR VkResult VKAPI_CALL vkAcquireNextImageKHR( return result; } + context->notify_acquire(swapchain, *pImageIndex, semaphore); + return VK_SUCCESS; } @@ -430,6 +434,9 @@ static VKAPI_ATTR VkResult VKAPI_CALL vkAcquireNextImage2KHR( return result; } + context->notify_acquire(pAcquireInfo->swapchain, *pImageIndex, + pAcquireInfo->semaphore); + return VK_SUCCESS; } @@ -465,7 +472,7 @@ vkQueueSubmit(VkQueue queue, std::uint32_t submit_count, next_submit_infos[0].pCommandBuffers = std::data(next_command_buffers); next_submit_infos[0].commandBufferCount = std::size(next_command_buffers); - const auto next_signal = queue_context->semaphore_sequence + 1; + const auto next_signal = 1 + queue_context->semaphore_sequence++; const auto tail_tssi = VkTimelineSemaphoreSubmitInfo{ .sType = VK_STRUCTURE_TYPE_TIMELINE_SEMAPHORE_SUBMIT_INFO_KHR, .signalSemaphoreValueCount = 1, @@ -488,13 +495,8 @@ vkQueueSubmit(VkQueue queue, std::uint32_t submit_count, return res; } - // Hack for now, store timestamp handles. - queue_context->handle_hack.push_front(std::move(timestamp_handle)); - if (std::size(queue_context->handle_hack) > 250) { - queue_context->handle_hack.pop_back(); - } - - ++queue_context->semaphore_sequence; + queue_context->notify_submit(std::span{submit_info, submit_count}, + next_signal, std::move(timestamp_handle)); return VK_SUCCESS; } @@ -534,10 +536,12 @@ vkQueueSubmit2(VkQueue queue, std::uint32_t submit_count, next_submit_infos[0].commandBufferInfoCount = std::size(next_command_buffers); + const auto target_semaphore_sequence = + 1 + queue_context->semaphore_sequence++; const auto tail_ssi = VkSemaphoreSubmitInfo{ .sType = VK_STRUCTURE_TYPE_SEMAPHORE_SUBMIT_INFO, .semaphore = queue_context->semaphore, - .value = queue_context->semaphore_sequence + 1, + .value = target_semaphore_sequence, .stageMask = VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT, }; const auto tail_cbsi = VkCommandBufferSubmitInfo{ @@ -559,13 +563,9 @@ vkQueueSubmit2(VkQueue queue, std::uint32_t submit_count, return res; } - // hack - queue_context->handle_hack.push_front(std::move(timestamp_handle)); - if (std::size(queue_context->handle_hack) > 250) { - queue_context->handle_hack.pop_back(); - } - - ++queue_context->semaphore_sequence; + queue_context->notify_submit({submit_infos, submit_count}, + target_semaphore_sequence, + std::move(timestamp_handle)); return VK_SUCCESS; } @@ -580,8 +580,8 @@ vkQueueSubmit2KHR(VkQueue queue, std::uint32_t submit_count, static VKAPI_ATTR VkResult VKAPI_CALL vkQueuePresentKHR(VkQueue queue, const VkPresentInfoKHR* present_info) { - const auto& vtable = - layer_context.get_context(queue)->device_context.vtable; + const auto queue_context = layer_context.get_context(queue); + const auto& vtable = queue_context->device_context.vtable; if (const auto res = vtable.QueuePresentKHR(queue, present_info); res != VK_SUCCESS) { @@ -589,6 +589,16 @@ vkQueuePresentKHR(VkQueue queue, const VkPresentInfoKHR* present_info) { return res; } + if (present_info) { // might not be needed + queue_context->notify_present(*present_info); + } + + if (const auto sleep_time = queue_context->get_delay_time(); + sleep_time.has_value()) { + + std::this_thread::sleep_for(*sleep_time); + } + return VK_SUCCESS; } diff --git a/src/physical_device_context.cc b/src/physical_device_context.cc index 105b840..d265c9d 100644 --- a/src/physical_device_context.cc +++ b/src/physical_device_context.cc @@ -1,10 +1,16 @@ #include "physical_device_context.hh" namespace low_latency { - + PhysicalDeviceContext::PhysicalDeviceContext( InstanceContext& instance_context, const VkPhysicalDevice& physical_device) - : instance(instance_context), physical_device(physical_device) {} + : instance(instance_context), physical_device(physical_device) { + + auto props = VkPhysicalDeviceProperties{}; + instance.vtable.GetPhysicalDeviceProperties(this->physical_device, &props); + this->properties = + std::make_unique(std::move(props)); +} PhysicalDeviceContext::~PhysicalDeviceContext() {} diff --git a/src/physical_device_context.hh b/src/physical_device_context.hh index 639fa0f..8eb4a1a 100644 --- a/src/physical_device_context.hh +++ b/src/physical_device_context.hh @@ -14,6 +14,8 @@ class PhysicalDeviceContext final : public Context { InstanceContext& instance; const VkPhysicalDevice physical_device; + + std::unique_ptr properties; public: PhysicalDeviceContext(InstanceContext& instance_context, diff --git a/src/queue_context.cc b/src/queue_context.cc index 930b0c5..9b46773 100644 --- a/src/queue_context.cc +++ b/src/queue_context.cc @@ -2,6 +2,9 @@ #include "device_context.hh" #include "timestamp_pool.hh" +#include +#include + namespace low_latency { static VkCommandPool @@ -51,12 +54,9 @@ QueueContext::QueueContext(DeviceContext& device_context, const VkQueue& queue, timestamp_pool(std::make_unique(*this)) {} QueueContext::~QueueContext() { - - // nuke our handles, so we avoid segfaults for now - this->handle_hack.clear(); - - // Ugly - destructors of timestamp_pool should be called before we destroy - // our vulkan objects. + + this->in_flight_frames.clear(); + this->submissions.clear(); this->timestamp_pool.reset(); const auto& vtable = this->device_context.vtable; @@ -66,4 +66,288 @@ QueueContext::~QueueContext() { nullptr); } +void QueueContext::notify_submit( + std::span infos, + const std::uint64_t target_semaphore_sequence, + std::shared_ptr&& handle) { + + // This has an issue where we're collecting all signals and waits and + // treating a single submit call as finishing + + auto signals = std::unordered_set{}; + auto waits = std::unordered_set{}; + for (const auto& info : infos) { + std::ranges::copy_n(info.pWaitSemaphores, info.waitSemaphoreCount, + std::inserter(waits, std::end(waits))); + std::ranges::copy_n(info.pSignalSemaphores, info.signalSemaphoreCount, + std::inserter(signals, std::end(signals))); + } + + this->submissions.emplace_back(std::make_unique( + std::move(signals), std::move(waits), target_semaphore_sequence, + std::move(handle))); + + // TODO HACK + if (std::size(this->submissions) > 100) { + this->submissions.pop_front(); + } +} + +void QueueContext::notify_submit( + std::span infos, + const std::uint64_t target_semaphore_sequence, + std::shared_ptr&& handle) { + + auto signals = std::unordered_set{}; + auto waits = std::unordered_set{}; + for (const auto& info : infos) { + constexpr auto get_semaphore = [](const auto& semaphore_info) { + return semaphore_info.semaphore; + }; + std::ranges::transform(info.pSignalSemaphoreInfos, + std::next(info.pSignalSemaphoreInfos, + info.signalSemaphoreInfoCount), + std::inserter(signals, std::end(signals)), + get_semaphore); + std::ranges::transform( + info.pWaitSemaphoreInfos, + std::next(info.pWaitSemaphoreInfos, info.waitSemaphoreInfoCount), + std::inserter(waits, std::end(waits)), get_semaphore); + } + + this->submissions.emplace_back(std::make_unique( + std::move(signals), std::move(waits), target_semaphore_sequence, + std::move(handle))); + + // TODO HACK + if (std::size(this->submissions) > 100) { + this->submissions.pop_front(); + } +} + +void QueueContext::notify_present(const VkPresentInfoKHR& info) { + + auto frame = [&]() -> std::unique_ptr { + const auto waits = [&]() { + auto waits = std::unordered_set{}; + std::ranges::copy_n(info.pWaitSemaphores, info.waitSemaphoreCount, + std::inserter(waits, std::end(waits))); + return waits; + }(); + + const auto wait_semaphores = std::unordered_set{ + info.pWaitSemaphores, + std::next(info.pWaitSemaphores, info.waitSemaphoreCount)}; + + auto collected_semaphores = std::unordered_set{}; + for (auto i = std::uint32_t{0}; i < info.swapchainCount; ++i) { + const auto& swapchain = info.pSwapchains[i]; + const auto& index = info.pImageIndices[i]; + + // Shouldn't be possible to present to a swapchain that wasn't + // waited in + + const auto& signals = this->device_context.swapchain_signals; + const auto swapchain_it = signals.find(swapchain); + assert(swapchain_it != std::end(signals)); + const auto index_it = swapchain_it->second.find(index); + assert(index_it != std::end(swapchain_it->second)); + + const auto semaphore = index_it->second; + collected_semaphores.emplace(index_it->second); + } + + const auto start_submission_it = std::ranges::find_if( + std::rbegin(this->submissions), std::rend(this->submissions), + [&](const auto& submission) { + return std::ranges::any_of( + submission->waits, [&](const auto& wait) { + return collected_semaphores.contains(wait); + }); + }); + + if (start_submission_it == std::rend(this->submissions)) { + std::cout << "couldn't find starting submission!\n"; + return nullptr; + } + const auto& start_submission = *start_submission_it; + + const auto end_submission_it = std::ranges::find_if( + std::rbegin(this->submissions), std::rend(this->submissions), + [&](const auto& submission) { + return std::ranges::any_of( + submission->signals, [&](const auto& signal) { + return wait_semaphores.contains(signal); + }); + }); + + if (end_submission_it == std::rend(this->submissions)) { + std::cout << "couldn't find ending submission!\n"; + return nullptr; + } + const auto& end_submission = *end_submission_it; + + return std::make_unique(Frame{ + .start_context = *this, + .start = start_submission->timestamp_handle, + .target_start_sequence = + start_submission->target_semaphore_sequence, + .end_context = *this, + .end = start_submission->timestamp_handle, + .target_end_sequence = start_submission->target_semaphore_sequence, + }); + }(); + + this->in_flight_frames.emplace_back(std::move(frame)); + + // hack + if (this->in_flight_frames.size() > 5) { + this->in_flight_frames.pop_front(); + } +} + +// now it's all coming together +std::optional QueueContext::get_delay_time() { + if (!std::size(this->in_flight_frames)) { + return std::nullopt; + } + + auto seq = std::uint64_t{}; + this->device_context.vtable.GetSemaphoreCounterValueKHR( + this->device_context.device, this->semaphore, &seq); + + // Get semaphore first, then poll! + this->timestamp_pool->poll(); + + // idk how frequently we should call this. + this->device_context.calibrate_timestamps(); + + static auto gpu_frametimes = std::deque{}; + static auto cpu_frametimes = std::deque{}; + + const auto S = std::size(this->in_flight_frames); + + std::cout << "\nSTART FRAME READOUT\n"; + std::cout << "error bound: " << this->device_context.clock.error_bound + << '\n'; + std::cout << "num frames in flight: " << S << '\n'; + std::cout << "from oldest -> newest\n"; + + // const auto b_seq = semaphore_from_context(*this); + const auto now = std::chrono::steady_clock::now(); + + auto i = std::size_t{0}; + for (; i < std::size(this->in_flight_frames); ++i) { + const auto& frame = this->in_flight_frames[i]; + std::cout << " Evaluating the frame that's " << S - i - 1 + << " behind\n"; + if (!frame) { + std::cout << " nullptr!\n"; + continue; + } + + std::cout << " target start: " << frame->target_start_sequence << '\n'; + std::cout << " target end: " << frame->target_end_sequence << '\n'; + if (seq < frame->target_start_sequence) { + std::cout << " frame hasn't started yet!\n"; + continue; + } + + const auto start_ticks = + frame->start_context.timestamp_pool->get_polled(*frame->start); + std::cout << " START TICKS: " << start_ticks << '\n'; + const auto& a_clock = frame->start_context.device_context.clock; + const auto a = a_clock.ticks_to_time(start_ticks); + + { + using namespace std::chrono; + const auto diff = now - a; + const auto ms = duration_cast(diff); + const auto us = duration_cast(diff - ms); + const auto ns = duration_cast(diff - ms - us); + std::cout << " frame started: " << ms << " ms " << us + << " us " << ns << " ns ago\n"; + } + + if (seq < frame->target_end_sequence) { + std::cout << " frame hasn't ended yet!\n"; + continue; + } + + + const auto end_ticks = + frame->end_context.timestamp_pool->get_polled(*frame->end, true); + const auto& b_clock = frame->end_context.device_context.clock; + std::cout << " END_TICKS: " << end_ticks << '\n'; + const auto b = b_clock.ticks_to_time(end_ticks); + { + using namespace std::chrono; + if (now <= b) { + std::cout << "b happened before now?\n"; + } + const auto diff = now - b; + const auto ms = duration_cast(diff); + const auto us = duration_cast(diff - ms); + const auto ns = duration_cast(diff - ms - us); + std::cout << " frame ended: " << ms << " ms " << us + << " us " << ns << " ns ago\n"; + } + + const auto gpu_time = b - a; + { + using namespace std::chrono; + const auto diff = gpu_time; + const auto ms = duration_cast(diff); + const auto us = duration_cast(diff - ms); + const auto ns = duration_cast(diff - ms - us); + std::cout << " gpu_time: " << ms << " ms " << us + << " us " << ns << " ns ago\n"; + } + + /* + cpu_frametimes.emplace_back(cpu_time); + gpu_frametimes.emplace_back(gpu_time); + */ + } + + /* + if (remove_index.has_value()) { + this->in_flight_frames.erase(std::begin(this->in_flight_frames), + std::begin(this->in_flight_frames) + + *remove_index); + } + */ + + /* + auto g_copy = gpu_frametimes; + auto c_copy = cpu_frametimes; + std::ranges::sort(g_copy); + std::ranges::sort(c_copy); + + constexpr auto N = 49; + if (std::size(cpu_frametimes) < N) { + return std::nullopt; + } + + const auto F = std::size(g_copy); + // close enough to median lol + const auto g = g_copy[F / 2]; + const auto c = c_copy[F / 2]; + + std::cout << g << '\n'; + + std::cout << " median gpu: " << (g / 1'000'000) << " ms " << g / 1'000 + << " us " << g << " ns\n"; + std::cout << " median cpu: " << c / 1'000'000 << " ms " << c / 1'000 + << " us " << c << " ns\n"; + + if (F > N) { + gpu_frametimes.pop_front(); + cpu_frametimes.pop_front(); + } + */ + + return std::nullopt; +} + } // namespace low_latency \ No newline at end of file diff --git a/src/queue_context.hh b/src/queue_context.hh index 184e31d..a6f43e5 100644 --- a/src/queue_context.hh +++ b/src/queue_context.hh @@ -7,8 +7,11 @@ #include #include -#include +#include #include +#include +#include +#include namespace low_latency { @@ -21,19 +24,56 @@ class QueueContext final : public Context { const VkQueue queue; const std::uint32_t queue_family_index; - // this is incremented and tied to our semaphore std::uint64_t semaphore_sequence = 0; VkSemaphore semaphore; VkCommandPool command_pool; std::unique_ptr timestamp_pool; - std::deque> handle_hack; + + // Potentially in flight queue submissions + struct Submission { + const std::unordered_set signals; + const std::unordered_set waits; + const std::uint64_t target_semaphore_sequence; + const std::shared_ptr timestamp_handle; + }; + std::deque> submissions; + + // In flight frames! + // These might come from different contexts. + struct Frame { + const QueueContext& start_context; + const std::shared_ptr start; + const std::uint64_t target_start_sequence; + + const QueueContext& end_context; + const std::shared_ptr end; + const std::uint64_t target_end_sequence; + }; + // These can be null, it means we made presented without finding the + // timestamps associated with the present. + std::deque> in_flight_frames; public: QueueContext(DeviceContext& device_context, const VkQueue& queue, const std::uint32_t& queue_family_index); virtual ~QueueContext(); + + public: + void notify_submit(std::span infos, + const std::uint64_t target_semaphore_sequence, + std::shared_ptr&& handle); + void notify_submit(std::span infos, + const std::uint64_t target_semaphore_sequence, + std::shared_ptr&& handle); + + void notify_present(const VkPresentInfoKHR& info); + + public: + // Computes the amount we should delay... + using duration_t = std::chrono::steady_clock::duration; + std::optional get_delay_time(); }; }; // namespace low_latency diff --git a/src/timestamp_pool.cc b/src/timestamp_pool.cc index e37dcd2..b4dc3c9 100644 --- a/src/timestamp_pool.cc +++ b/src/timestamp_pool.cc @@ -61,8 +61,8 @@ TimestampPool::TimestampPool(QueueContext& queue_context) this->blocks.emplace_back(this->allocate()); } -std::unique_ptr TimestampPool::acquire() { - const auto& vacant_iter = [this]() -> auto { +std::shared_ptr TimestampPool::acquire() { + const auto vacant_iter = [this]() -> auto { const auto it = std::ranges::find_if(this->blocks, [](const auto& block) { return std::size(*block.available_indicies); @@ -93,7 +93,7 @@ std::unique_ptr TimestampPool::acquire() { const auto block_index = static_cast( std::distance(std::begin(this->blocks), vacant_iter)); - return std::make_unique(available_indices, block_index, query_pool, + return std::make_shared(available_indices, block_index, query_pool, query_index, command_buffers); } @@ -164,15 +164,15 @@ void TimestampPool::poll() { }); }; -std::uint64_t TimestampPool::get_polled(const Handle& handle) { +std::uint64_t TimestampPool::get_polled(const Handle& handle, const bool hack) { assert(handle.block_index < std::size(this->cached_timestamps)); const auto& cached_timestamp = this->cached_timestamps[handle.block_index]; assert(cached_timestamp != nullptr); - assert(std::size(*cached_timestamp) < handle.query_index); + assert(handle.query_index < std::size(*cached_timestamp)); - return handle.query_index; + return (*cached_timestamp)[handle.query_index + hack]; } TimestampPool::~TimestampPool() { diff --git a/src/timestamp_pool.hh b/src/timestamp_pool.hh index cc67b18..a4aa429 100644 --- a/src/timestamp_pool.hh +++ b/src/timestamp_pool.hh @@ -67,7 +67,7 @@ class TimestampPool final { std::vector>> cached_timestamps; public: - // A handle represents two std::uint64_t blocks of timestamp memory and two + // A handle represents two std::uint64_t blocks oftimestamp memory and two // command buffers. struct Handle final { private: @@ -110,11 +110,11 @@ class TimestampPool final { public: // Hands out a Handle with a pool and index of two uint64_t's. - std::unique_ptr acquire(); + std::shared_ptr acquire(); void poll(); // saves the current state for future get's. - std::uint64_t get_polled(const Handle& handle); + std::uint64_t get_polled(const Handle& handle, const bool hack = false); }; } // namespace low_latency -- cgit v1.2.3