diff options
| author | Nicolas James <nj3ahxac@gmail.com> | 2026-02-08 17:49:48 +1100 |
|---|---|---|
| committer | Nicolas James <nj3ahxac@gmail.com> | 2026-02-08 17:49:48 +1100 |
| commit | 5ab5046b643b04b9c31fd41cdfca39b9d5f6b99e (patch) | |
| tree | da5383b41b2fff17362425d2a57cbd129f681498 /src/timestamp_pool.hh | |
| parent | c7363b6165a7795d10a8989c241dcdec84d0c7d7 (diff) | |
track queue submits WIP
Diffstat (limited to 'src/timestamp_pool.hh')
| -rw-r--r-- | src/timestamp_pool.hh | 123 |
1 files changed, 123 insertions, 0 deletions
diff --git a/src/timestamp_pool.hh b/src/timestamp_pool.hh new file mode 100644 index 0000000..7efa4ee --- /dev/null +++ b/src/timestamp_pool.hh @@ -0,0 +1,123 @@ +#ifndef TIMESTAMP_POOL_HH_ +#define TIMESTAMP_POOL_HH_ + +// The purpose of this file is to provide the definition of a 'timestamp pool'. +// It manages blocks of timestamp query pools, hands them out when requested, +// and allocates more when (if) we run out. It also efficiently reads them back. +// This class solves some key issues: +// +// 1. We need a potentially infinite amount of timestamps available to the +// GPU. While I imagine most (good) applications will limit the amount of +// times they call vkQueueSubmit, there's no bound we can place on the +// amount of times this function will be called. Also, +// the amount of frames in flight might vary, so really we need +// num_queue_submits * max_frames_in_flight timestamps. Obviously, we don't +// know what these numbers are at runtime and can't assume that they are +// reasonable or even constant either. We solve this by allocating more +// timestamps when necessary. + +// 2. We don't want to hammer vulkan with expensive timestamp read +// operations. If we have hundreds of query pools lying around, reading them +// back will take hundreds of individual vulkan calls. They +// should be batched as to perform as few reads as possible. So if we allocate +// multiple big query pool strips, then reading them will only require that many +// calls. We then can cache off the result of reading as well so iterating +// through later doesn't require any vulkan interaction at all. +// +// +// Usage: +// 1. Get handle with .acquire(). +// 2. Write start/end timestamp operations with the handle's pool and index +// into the provided command buffer. +// 3. With the command buffer signalled completion via some semaphore / +// fence, call .poll(). This will cache off all outstanding handles. +// Retrieving with handles which have not been signalled are undefined. +// 4. Retrieve timestamp results with .get_polled(your_handle). +// 5. Destruct the handle to return the key to the pool. + +#include <vulkan/utility/vk_dispatch_table.h> +#include <vulkan/vulkan.hpp> + +#include <memory> +#include <unordered_set> + +namespace low_latency { + +class TimestampPool final { + private: + static constexpr auto TIMESTAMP_QUERY_POOL_SIZE = 512u; + static_assert(TIMESTAMP_QUERY_POOL_SIZE % 2 == 0); + + private: + VkuDeviceDispatchTable vtable; + VkDevice device; + VkCommandPool command_pool; + + // VkQueryPool with an unordered set of keys available for reading. + using available_query_indicies_t = std::unordered_set<std::uint64_t>; + + struct block { + VkQueryPool query_pool; + std::shared_ptr<available_query_indicies_t> available_indicies; + std::unique_ptr<std::vector<VkCommandBuffer>> command_buffers; + }; + std::vector<block> blocks; // multiple blocks + + // A snapshot of all available blocks for reading after each poll. + std::vector<std::unique_ptr<std::vector<std::uint64_t>>> cached_timestamps; + + public: + // A handle represents two std::uint64_t blocks of timestamp memory and two + // command buffers. + struct Handle { + private: + friend class TimestampPool; + + private: + std::weak_ptr<available_query_indicies_t> index_origin; + std::size_t block_index; + + public: + VkQueryPool query_pool; + std::uint64_t query_index; + std::array<VkCommandBuffer, 2> command_buffers; + + public: + Handle(const std::weak_ptr<TimestampPool::available_query_indicies_t>& + index_origin, + const std::size_t block_index, const VkQueryPool& query_pool, + const std::uint64_t query_index, + const std::array<VkCommandBuffer, 2>& command_buffers); + Handle(const Handle& handle) = delete; + Handle(Handle&&) = delete; + Handle operator==(const Handle& handle) = delete; + Handle operator==(Handle&&) = delete; + ~Handle(); // frees from the pool + + public: + void setup_command_buffers(const VkuDeviceDispatchTable& vtable) const; + }; + + private: + block allocate(); + + public: + TimestampPool(const VkDevice& device, const VkuDeviceDispatchTable& vtable, + const VkCommandPool& command_pool); + TimestampPool(const TimestampPool&) = delete; + TimestampPool(TimestampPool&&) = delete; + TimestampPool operator==(const TimestampPool&) = delete; + TimestampPool operator==(TimestampPool&&) = delete; + + public: + // Hands out a Handle with a pool and index of two uint64_t's. + std::unique_ptr<Handle> acquire(); + + void poll(); // saves the current state for future get's. + + std::uint64_t get_polled(const Handle& handle); +}; + +} // namespace low_latency + +#endif
\ No newline at end of file |
