track queue submits WIP

author: Nicolas James <nj3ahxac@gmail.com> 2026-02-08 17:49:48 +1100
committer: Nicolas James <nj3ahxac@gmail.com> 2026-02-08 17:49:48 +1100
commit: 5ab5046b643b04b9c31fd41cdfca39b9d5f6b99e (patch)
tree: da5383b41b2fff17362425d2a57cbd129f681498 /src/timestamp_pool.hh
parent: c7363b6165a7795d10a8989c241dcdec84d0c7d7 (diff)
1 files changed, 123 insertions, 0 deletions
diff --git a/src/timestamp_pool.hh b/src/timestamp_pool.hh
new file mode 100644
index 0000000..7efa4ee
--- /dev/null
+++ b/src/timestamp_pool.hh
@@ -0,0 +1,123 @@
+#ifndef TIMESTAMP_POOL_HH_
+#define TIMESTAMP_POOL_HH_
+
+// The purpose of this file is to provide the definition of a 'timestamp pool'.
+// It manages blocks of timestamp query pools, hands them out when requested,
+// and allocates more when (if) we run out. It also efficiently reads them back.
+// This class solves some key issues:
+//
+// 1. We need a potentially infinite amount of timestamps available to the
+// GPU. While I imagine most (good) applications will limit the amount of
+// times they call vkQueueSubmit, there's no bound we can place on the
+// amount of times this function will be called. Also,
+// the amount of frames in flight might vary, so really we need
+// num_queue_submits * max_frames_in_flight timestamps. Obviously, we don't
+// know what these numbers are at runtime and can't assume that they are
+// reasonable or even constant either. We solve this by allocating more
+// timestamps when necessary.
+
+// 2. We don't want to hammer vulkan with expensive timestamp read
+// operations. If we have hundreds of query pools lying around, reading them
+// back will take hundreds of individual vulkan calls. They
+// should be batched as to perform as few reads as possible. So if we allocate
+// multiple big query pool strips, then reading them will only require that many
+// calls. We then can cache off the result of reading as well so iterating
+// through later doesn't require any vulkan interaction at all.
+//
+//
+// Usage:
+//     1. Get handle with .acquire().
+//     2. Write start/end timestamp operations with the handle's pool and index
+//     into the provided command buffer.
+//     3. With the command buffer signalled completion via some semaphore /
+//     fence, call .poll(). This will cache off all outstanding handles.
+//     Retrieving with handles which have not been signalled are undefined.
+//     4. Retrieve timestamp results with .get_polled(your_handle).
+//     5. Destruct the handle to return the key to the pool.
+
+#include <vulkan/utility/vk_dispatch_table.h>
+#include <vulkan/vulkan.hpp>
+
+#include <memory>
+#include <unordered_set>
+
+namespace low_latency {
+
+class TimestampPool final {
+  private:
+    static constexpr auto TIMESTAMP_QUERY_POOL_SIZE = 512u;
+    static_assert(TIMESTAMP_QUERY_POOL_SIZE % 2 == 0);
+
+  private:
+    VkuDeviceDispatchTable vtable;
+    VkDevice device;
+    VkCommandPool command_pool;
+
+    // VkQueryPool with an unordered set of keys available for reading.
+    using available_query_indicies_t = std::unordered_set<std::uint64_t>;
+
+    struct block {
+        VkQueryPool query_pool;
+        std::shared_ptr<available_query_indicies_t> available_indicies;
+        std::unique_ptr<std::vector<VkCommandBuffer>> command_buffers;
+    };
+    std::vector<block> blocks; // multiple blocks
+
+    // A snapshot of all available blocks for reading after each poll.
+    std::vector<std::unique_ptr<std::vector<std::uint64_t>>> cached_timestamps;
+
+  public:
+    // A handle represents two std::uint64_t blocks of timestamp memory and two
+    // command buffers.
+    struct Handle {
+      private:
+        friend class TimestampPool;
+
+      private:
+        std::weak_ptr<available_query_indicies_t> index_origin;
+        std::size_t block_index;
+
+      public:
+        VkQueryPool query_pool;
+        std::uint64_t query_index;
+        std::array<VkCommandBuffer, 2> command_buffers;
+
+      public:
+        Handle(const std::weak_ptr<TimestampPool::available_query_indicies_t>&
+                   index_origin,
+               const std::size_t block_index, const VkQueryPool& query_pool,
+               const std::uint64_t query_index,
+               const std::array<VkCommandBuffer, 2>& command_buffers);
+        Handle(const Handle& handle) = delete;
+        Handle(Handle&&) = delete;
+        Handle operator==(const Handle& handle) = delete;
+        Handle operator==(Handle&&) = delete;
+        ~Handle(); // frees from the pool
+
+      public:
+        void setup_command_buffers(const VkuDeviceDispatchTable& vtable) const;
+    };
+
+  private:
+    block allocate();
+
+  public:
+    TimestampPool(const VkDevice& device, const VkuDeviceDispatchTable& vtable,
+                  const VkCommandPool& command_pool);
+    TimestampPool(const TimestampPool&) = delete;
+    TimestampPool(TimestampPool&&) = delete;
+    TimestampPool operator==(const TimestampPool&) = delete;
+    TimestampPool operator==(TimestampPool&&) = delete;
+
+  public:
+    // Hands out a Handle with a pool and index of two uint64_t's.
+    std::unique_ptr<Handle> acquire();
+
+    void poll(); // saves the current state for future get's.
+
+    std::uint64_t get_polled(const Handle& handle);
+};
+
+} // namespace low_latency
+
+#endif
+\ No newline at end of file
author	Nicolas James <nj3ahxac@gmail.com>	2026-02-08 17:49:48 +1100
committer	Nicolas James <nj3ahxac@gmail.com>	2026-02-08 17:49:48 +1100
commit	5ab5046b643b04b9c31fd41cdfca39b9d5f6b99e (patch)
tree	da5383b41b2fff17362425d2a57cbd129f681498 /src/timestamp_pool.hh
parent	c7363b6165a7795d10a8989c241dcdec84d0c7d7 (diff)