#include "device_context.hh" #include "queue_context.hh" #include #include namespace low_latency { DeviceContext::DeviceContext(InstanceContext& parent_instance, PhysicalDeviceContext& parent_physical_device, const VkDevice& device, VkuDeviceDispatchTable&& vtable) : instance(parent_instance), physical_device(parent_physical_device), device(device), vtable(std::move(vtable)) { // Only create our clock if we can support creating it. if (this->physical_device.supports_required_extensions) { this->clock = std::make_unique(*this); } } DeviceContext::~DeviceContext() { this->present_queue.reset(); // We will let the destructor handle clearing here, but they should be // unique by now (ie, removed from the layer's context map). for (const auto& [queue, queue_context] : this->queues) { assert(queue_context.unique()); } } DeviceContext::Clock::Clock(const DeviceContext& context) : device(context) { this->calibrate(); } DeviceContext::Clock::~Clock() {} void DeviceContext::Clock::calibrate() { const auto infos = std::vector{ {VK_STRUCTURE_TYPE_CALIBRATED_TIMESTAMP_INFO_EXT, nullptr, VK_TIME_DOMAIN_DEVICE_EXT}, {VK_STRUCTURE_TYPE_CALIBRATED_TIMESTAMP_INFO_EXT, nullptr, VK_TIME_DOMAIN_CLOCK_MONOTONIC_EXT}}; struct CalibratedResult { std::uint64_t device; std::uint64_t host; }; auto calibrated_result = CalibratedResult{}; device.vtable.GetCalibratedTimestampsKHR(device.device, 2, std::data(infos), &calibrated_result.device, &this->error_bound); this->device_ticks = calibrated_result.device; this->host_ns = calibrated_result.host; } DeviceContext::Clock::time_point_t DeviceContext::Clock::ticks_to_time(const std::uint64_t& ticks) const { const auto& pd = device.physical_device.properties; const auto ns_tick = static_cast(pd->limits.timestampPeriod); const auto diff = [&]() -> auto { auto a = this->device_ticks; auto b = ticks; const auto is_negative = a > b; if (is_negative) { std::swap(a, b); } const auto abs_diff = b - a; assert(abs_diff <= std::numeric_limits::max()); const auto signed_abs_diff = static_cast(abs_diff); return is_negative ? -signed_abs_diff : signed_abs_diff; }(); // This will have issues because std::chrono::steady_clock::now(), which // we use for cpu time, may not be on the same time domain what was returned // by GetCalibratedTimestamps. It would be more robust to use the posix // gettime that vulkan guarantees it can be compared to instead. const auto diff_nsec = static_cast(diff * ns_tick + 0.5); const auto delta = std::chrono::nanoseconds(this->host_ns + diff_nsec); return time_point_t{delta}; } void DeviceContext::sleep_in_input() { // Present hasn't happened yet, we don't know what queue to attack. if (!this->present_queue) { return; } const auto& frames = this->present_queue->in_flight_frames; // No frame here means we're behind the GPU and do not need to delay. // If anything we should speed up... if (!std::size(frames)) { return; } // If we're here, that means that there might be an outstanding frame that's // sitting on our present_queue which hasn't yet completed, so we need to // stall until it's finished. const auto& last_frame = frames.back(); assert(std::size(last_frame.submissions)); const auto& last_frame_submission = last_frame.submissions.back(); last_frame_submission->end_handle->get_time_spinlock(); // From our sleep in present implementation, just spinning until // the previous frame has completed did not work well. This was because // there was a delay between presentation and when new work was given // to the GPU. If we stalled the CPU without trying to account for this, we // would get huge frame drops, loss of throughput, and the GPU would even // clock down. So naturally I am concerned about this approach, but it seems // to perform well so far in my own testing and is just beautifully elegant. } void DeviceContext::notify_antilag_update(const VkAntiLagDataAMD& data) { this->antilag_mode = data.mode; this->antilag_fps = data.maxFPS; // TODO // This might not be provided (probably just to set some settings?). if (!data.pPresentationInfo) { return; } // Only care about the input stage for now. if (data.pPresentationInfo->stage != VK_ANTI_LAG_STAGE_INPUT_AMD) { return; } if (this->antilag_mode != VK_ANTI_LAG_MODE_ON_AMD) { return; } this->sleep_in_input(); } void DeviceContext::notify_queue_present(const QueueContext& queue) { assert(this->queues.contains(queue.queue)); this->present_queue = this->queues[queue.queue]; } } // namespace low_latency