From 5ab5046b643b04b9c31fd41cdfca39b9d5f6b99e Mon Sep 17 00:00:00 2001 From: Nicolas James Date: Sun, 8 Feb 2026 17:49:48 +1100 Subject: track queue submits WIP --- src/timestamp_pool.hh | 123 ++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 123 insertions(+) create mode 100644 src/timestamp_pool.hh (limited to 'src/timestamp_pool.hh') diff --git a/src/timestamp_pool.hh b/src/timestamp_pool.hh new file mode 100644 index 0000000..7efa4ee --- /dev/null +++ b/src/timestamp_pool.hh @@ -0,0 +1,123 @@ +#ifndef TIMESTAMP_POOL_HH_ +#define TIMESTAMP_POOL_HH_ + +// The purpose of this file is to provide the definition of a 'timestamp pool'. +// It manages blocks of timestamp query pools, hands them out when requested, +// and allocates more when (if) we run out. It also efficiently reads them back. +// This class solves some key issues: +// +// 1. We need a potentially infinite amount of timestamps available to the +// GPU. While I imagine most (good) applications will limit the amount of +// times they call vkQueueSubmit, there's no bound we can place on the +// amount of times this function will be called. Also, +// the amount of frames in flight might vary, so really we need +// num_queue_submits * max_frames_in_flight timestamps. Obviously, we don't +// know what these numbers are at runtime and can't assume that they are +// reasonable or even constant either. We solve this by allocating more +// timestamps when necessary. + +// 2. We don't want to hammer vulkan with expensive timestamp read +// operations. If we have hundreds of query pools lying around, reading them +// back will take hundreds of individual vulkan calls. They +// should be batched as to perform as few reads as possible. So if we allocate +// multiple big query pool strips, then reading them will only require that many +// calls. We then can cache off the result of reading as well so iterating +// through later doesn't require any vulkan interaction at all. +// +// +// Usage: +// 1. Get handle with .acquire(). +// 2. Write start/end timestamp operations with the handle's pool and index +// into the provided command buffer. +// 3. With the command buffer signalled completion via some semaphore / +// fence, call .poll(). This will cache off all outstanding handles. +// Retrieving with handles which have not been signalled are undefined. +// 4. Retrieve timestamp results with .get_polled(your_handle). +// 5. Destruct the handle to return the key to the pool. + +#include +#include + +#include +#include + +namespace low_latency { + +class TimestampPool final { + private: + static constexpr auto TIMESTAMP_QUERY_POOL_SIZE = 512u; + static_assert(TIMESTAMP_QUERY_POOL_SIZE % 2 == 0); + + private: + VkuDeviceDispatchTable vtable; + VkDevice device; + VkCommandPool command_pool; + + // VkQueryPool with an unordered set of keys available for reading. + using available_query_indicies_t = std::unordered_set; + + struct block { + VkQueryPool query_pool; + std::shared_ptr available_indicies; + std::unique_ptr> command_buffers; + }; + std::vector blocks; // multiple blocks + + // A snapshot of all available blocks for reading after each poll. + std::vector>> cached_timestamps; + + public: + // A handle represents two std::uint64_t blocks of timestamp memory and two + // command buffers. + struct Handle { + private: + friend class TimestampPool; + + private: + std::weak_ptr index_origin; + std::size_t block_index; + + public: + VkQueryPool query_pool; + std::uint64_t query_index; + std::array command_buffers; + + public: + Handle(const std::weak_ptr& + index_origin, + const std::size_t block_index, const VkQueryPool& query_pool, + const std::uint64_t query_index, + const std::array& command_buffers); + Handle(const Handle& handle) = delete; + Handle(Handle&&) = delete; + Handle operator==(const Handle& handle) = delete; + Handle operator==(Handle&&) = delete; + ~Handle(); // frees from the pool + + public: + void setup_command_buffers(const VkuDeviceDispatchTable& vtable) const; + }; + + private: + block allocate(); + + public: + TimestampPool(const VkDevice& device, const VkuDeviceDispatchTable& vtable, + const VkCommandPool& command_pool); + TimestampPool(const TimestampPool&) = delete; + TimestampPool(TimestampPool&&) = delete; + TimestampPool operator==(const TimestampPool&) = delete; + TimestampPool operator==(TimestampPool&&) = delete; + + public: + // Hands out a Handle with a pool and index of two uint64_t's. + std::unique_ptr acquire(); + + void poll(); // saves the current state for future get's. + + std::uint64_t get_polled(const Handle& handle); +}; + +} // namespace low_latency + +#endif \ No newline at end of file -- cgit v1.2.3