1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
|
#include "device_context.hh"
#include <utility>
#include <vulkan/vulkan_core.h>
namespace low_latency {
DeviceContext::DeviceContext(InstanceContext& parent_instance,
PhysicalDeviceContext& parent_physical_device,
const VkDevice& device,
const bool was_capability_requested,
VkuDeviceDispatchTable&& vtable)
: instance(parent_instance), physical_device(parent_physical_device),
was_capability_requested(was_capability_requested), device(device),
vtable(std::move(vtable)) {
// Only create our clock if we can support creating it.
if (this->physical_device.supports_required_extensions) {
this->clock = std::make_unique<DeviceClock>(*this);
}
}
DeviceContext::~DeviceContext() {
// We will let the destructor handle clearing here, but they should be
// unique by now (ie, removed from the layer's context map).
for (const auto& [queue, queue_context] : this->queues) {
assert(queue_context.unique());
}
}
/*
void DeviceContext::sleep_in_input() {
// TODO
// Present hasn't happened yet, we don't know what queue to attack.
if (!this->present_queue) {
return;
}
const auto& frames = this->present_queue->in_flight_frames;
// No frame here means we're behind the GPU and do not need to delay.
// If anything we should speed up...
if (!std::size(frames)) {
return;
}
// If we're here, that means that there might be an outstanding frame that's
// sitting on our present_queue which hasn't yet completed, so we need to
// stall until it's finished.
const auto& last_frame = frames.back();
assert(std::size(last_frame.submissions));
const auto& last_frame_submission = last_frame.submissions.back();
last_frame_submission->end_handle->get_time_spinlock();
// From our sleep in present implementation, just spinning until
// the previous frame has completed did not work well. This was because
// there was a delay between presentation and when new work was given
// to the GPU. If we stalled the CPU without trying to account for this, we
// would get huge frame drops, loss of throughput, and the GPU would even
// clock down. So naturally I am concerned about this approach, but it seems
// to perform well so far in my own testing and is just beautifully elegant.
}
*/
void DeviceContext::update_params(
const std::optional<VkSwapchainKHR> target,
const std::chrono::milliseconds& present_delay,
const bool was_low_latency_requested) {
// If we don't have a target (AMD's anti_lag doesn't differentiate between
// swapchains), just write it to everything.
if (!target.has_value()) {
for (auto& iter : this->swapchain_monitors) {
iter.second.update_params(was_low_latency_requested, present_delay);
}
return;
}
const auto iter = this->swapchain_monitors.find(*target);
assert(iter != std::end(this->swapchain_monitors));
iter->second.update_params(was_low_latency_requested, present_delay);
}
void DeviceContext::notify_present(
const VkSwapchainKHR& swapchain,
const QueueContext::submissions_t& submissions) {
const auto iter = this->swapchain_monitors.find(swapchain);
assert(iter != std::end(this->swapchain_monitors));
iter->second.notify_present(submissions);
}
} // namespace low_latency
|