summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--CMakeLists.txt8
-rw-r--r--src/core/frontend/framebuffer_layout.cpp21
-rw-r--r--src/core/frontend/framebuffer_layout.h15
-rw-r--r--src/core/hle/service/ldn/ldn.cpp10
-rw-r--r--src/core/hle/service/nvdrv/devices/nvhost_gpu.cpp12
-rw-r--r--src/core/hle/service/nvdrv/devices/nvhost_gpu.h8
-rw-r--r--src/core/settings.h1
-rw-r--r--src/video_core/CMakeLists.txt5
-rw-r--r--src/video_core/engines/maxwell_3d.cpp77
-rw-r--r--src/video_core/engines/maxwell_3d.h42
-rw-r--r--src/video_core/gpu.cpp2
-rw-r--r--src/video_core/memory_manager.cpp14
-rw-r--r--src/video_core/memory_manager.h7
-rw-r--r--src/video_core/query_cache.h359
-rw-r--r--src/video_core/rasterizer_interface.h12
-rw-r--r--src/video_core/renderer_opengl/gl_query_cache.cpp120
-rw-r--r--src/video_core/renderer_opengl/gl_query_cache.h78
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer.cpp68
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer.h41
-rw-r--r--src/video_core/renderer_opengl/gl_resource_manager.cpp17
-rw-r--r--src/video_core/renderer_opengl/gl_resource_manager.h25
-rw-r--r--src/video_core/renderer_vulkan/vk_device.cpp10
-rw-r--r--src/video_core/renderer_vulkan/vk_query_cache.cpp122
-rw-r--r--src/video_core/renderer_vulkan/vk_query_cache.h104
-rw-r--r--src/video_core/renderer_vulkan/vk_rasterizer.cpp21
-rw-r--r--src/video_core/renderer_vulkan/vk_rasterizer.h6
-rw-r--r--src/video_core/renderer_vulkan/vk_scheduler.cpp8
-rw-r--r--src/video_core/renderer_vulkan/vk_scheduler.h15
-rw-r--r--src/video_core/renderer_vulkan/vk_shader_decompiler.cpp17
-rw-r--r--src/video_core/shader/decode/conversion.cpp14
-rw-r--r--src/yuzu/configuration/config.cpp2
-rw-r--r--src/yuzu/configuration/configure_graphics.cpp2
-rw-r--r--src/yuzu/configuration/configure_graphics.ui35
-rw-r--r--src/yuzu_cmd/config.cpp2
-rw-r--r--src/yuzu_cmd/default_ini.h4
-rw-r--r--src/yuzu_tester/config.cpp2
-rw-r--r--src/yuzu_tester/default_ini.h4
37 files changed, 1226 insertions, 84 deletions
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 44ed4196d..467d769a2 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -157,8 +157,14 @@ if (ENABLE_SDL2)
157 target_include_directories(SDL2 INTERFACE "${SDL2_INCLUDE_DIR}") 157 target_include_directories(SDL2 INTERFACE "${SDL2_INCLUDE_DIR}")
158 else() 158 else()
159 find_package(SDL2 REQUIRED) 159 find_package(SDL2 REQUIRED)
160 include_directories(${SDL2_INCLUDE_DIRS})
161 160
161 # Some installations don't set SDL2_LIBRARIES
162 if("${SDL2_LIBRARIES}" STREQUAL "")
163 message(WARNING "SDL2_LIBRARIES wasn't set, manually setting to SDL2::SDL2")
164 set(SDL2_LIBRARIES "SDL2::SDL2")
165 endif()
166
167 include_directories(${SDL2_INCLUDE_DIRS})
162 add_library(SDL2 INTERFACE) 168 add_library(SDL2 INTERFACE)
163 target_link_libraries(SDL2 INTERFACE "${SDL2_LIBRARIES}") 169 target_link_libraries(SDL2 INTERFACE "${SDL2_LIBRARIES}")
164 endif() 170 endif()
diff --git a/src/core/frontend/framebuffer_layout.cpp b/src/core/frontend/framebuffer_layout.cpp
index d6d2cf3f0..2dc795d56 100644
--- a/src/core/frontend/framebuffer_layout.cpp
+++ b/src/core/frontend/framebuffer_layout.cpp
@@ -27,9 +27,9 @@ FramebufferLayout DefaultFrameLayout(u32 width, u32 height) {
27 // so just calculate them both even if the other isn't showing. 27 // so just calculate them both even if the other isn't showing.
28 FramebufferLayout res{width, height}; 28 FramebufferLayout res{width, height};
29 29
30 const float emulation_aspect_ratio{static_cast<float>(ScreenUndocked::Height) / 30 const float window_aspect_ratio = static_cast<float>(height) / width;
31 ScreenUndocked::Width}; 31 const float emulation_aspect_ratio = EmulationAspectRatio(
32 const auto window_aspect_ratio = static_cast<float>(height) / width; 32 static_cast<AspectRatio>(Settings::values.aspect_ratio), window_aspect_ratio);
33 33
34 const Common::Rectangle<u32> screen_window_area{0, 0, width, height}; 34 const Common::Rectangle<u32> screen_window_area{0, 0, width, height};
35 Common::Rectangle<u32> screen = MaxRectangle(screen_window_area, emulation_aspect_ratio); 35 Common::Rectangle<u32> screen = MaxRectangle(screen_window_area, emulation_aspect_ratio);
@@ -58,4 +58,19 @@ FramebufferLayout FrameLayoutFromResolutionScale(u32 res_scale) {
58 return DefaultFrameLayout(width, height); 58 return DefaultFrameLayout(width, height);
59} 59}
60 60
61float EmulationAspectRatio(AspectRatio aspect, float window_aspect_ratio) {
62 switch (aspect) {
63 case AspectRatio::Default:
64 return static_cast<float>(ScreenUndocked::Height) / ScreenUndocked::Width;
65 case AspectRatio::R4_3:
66 return 3.0f / 4.0f;
67 case AspectRatio::R21_9:
68 return 9.0f / 21.0f;
69 case AspectRatio::StretchToWindow:
70 return window_aspect_ratio;
71 default:
72 return static_cast<float>(ScreenUndocked::Height) / ScreenUndocked::Width;
73 }
74}
75
61} // namespace Layout 76} // namespace Layout
diff --git a/src/core/frontend/framebuffer_layout.h b/src/core/frontend/framebuffer_layout.h
index d2370adde..1d39c1faf 100644
--- a/src/core/frontend/framebuffer_layout.h
+++ b/src/core/frontend/framebuffer_layout.h
@@ -18,6 +18,13 @@ enum ScreenDocked : u32 {
18 HeightDocked = 1080, 18 HeightDocked = 1080,
19}; 19};
20 20
21enum class AspectRatio {
22 Default,
23 R4_3,
24 R21_9,
25 StretchToWindow,
26};
27
21/// Describes the layout of the window framebuffer 28/// Describes the layout of the window framebuffer
22struct FramebufferLayout { 29struct FramebufferLayout {
23 u32 width{ScreenUndocked::Width}; 30 u32 width{ScreenUndocked::Width};
@@ -48,4 +55,12 @@ FramebufferLayout DefaultFrameLayout(u32 width, u32 height);
48 */ 55 */
49FramebufferLayout FrameLayoutFromResolutionScale(u32 res_scale); 56FramebufferLayout FrameLayoutFromResolutionScale(u32 res_scale);
50 57
58/**
59 * Convenience method to determine emulation aspect ratio
60 * @param aspect Represents the index of aspect ratio stored in Settings::values.aspect_ratio
61 * @param window_aspect_ratio Current window aspect ratio
62 * @return Emulation render window aspect ratio
63 */
64float EmulationAspectRatio(AspectRatio aspect, float window_aspect_ratio);
65
51} // namespace Layout 66} // namespace Layout
diff --git a/src/core/hle/service/ldn/ldn.cpp b/src/core/hle/service/ldn/ldn.cpp
index ed5059047..92adde6d4 100644
--- a/src/core/hle/service/ldn/ldn.cpp
+++ b/src/core/hle/service/ldn/ldn.cpp
@@ -129,12 +129,20 @@ public:
129 {304, nullptr, "Disconnect"}, 129 {304, nullptr, "Disconnect"},
130 {400, nullptr, "Initialize"}, 130 {400, nullptr, "Initialize"},
131 {401, nullptr, "Finalize"}, 131 {401, nullptr, "Finalize"},
132 {402, nullptr, "SetOperationMode"}, 132 {402, &IUserLocalCommunicationService::Initialize2, "Initialize2"}, // 7.0.0+
133 }; 133 };
134 // clang-format on 134 // clang-format on
135 135
136 RegisterHandlers(functions); 136 RegisterHandlers(functions);
137 } 137 }
138
139 void Initialize2(Kernel::HLERequestContext& ctx) {
140 LOG_WARNING(Service_LDN, "(STUBBED) called");
141 // Result success seem make this services start network and continue.
142 // If we just pass result error then it will stop and maybe try again and again.
143 IPC::ResponseBuilder rb{ctx, 2};
144 rb.Push(RESULT_UNKNOWN);
145 }
138}; 146};
139 147
140class LDNS final : public ServiceFramework<LDNS> { 148class LDNS final : public ServiceFramework<LDNS> {
diff --git a/src/core/hle/service/nvdrv/devices/nvhost_gpu.cpp b/src/core/hle/service/nvdrv/devices/nvhost_gpu.cpp
index 6d8bca8bb..f1966ac0e 100644
--- a/src/core/hle/service/nvdrv/devices/nvhost_gpu.cpp
+++ b/src/core/hle/service/nvdrv/devices/nvhost_gpu.cpp
@@ -44,6 +44,8 @@ u32 nvhost_gpu::ioctl(Ioctl command, const std::vector<u8>& input, const std::ve
44 return GetWaitbase(input, output); 44 return GetWaitbase(input, output);
45 case IoctlCommand::IocChannelSetTimeoutCommand: 45 case IoctlCommand::IocChannelSetTimeoutCommand:
46 return ChannelSetTimeout(input, output); 46 return ChannelSetTimeout(input, output);
47 case IoctlCommand::IocChannelSetTimeslice:
48 return ChannelSetTimeslice(input, output);
47 default: 49 default:
48 break; 50 break;
49 } 51 }
@@ -228,4 +230,14 @@ u32 nvhost_gpu::ChannelSetTimeout(const std::vector<u8>& input, std::vector<u8>&
228 return 0; 230 return 0;
229} 231}
230 232
233u32 nvhost_gpu::ChannelSetTimeslice(const std::vector<u8>& input, std::vector<u8>& output) {
234 IoctlSetTimeslice params{};
235 std::memcpy(&params, input.data(), sizeof(IoctlSetTimeslice));
236 LOG_INFO(Service_NVDRV, "called, timeslice=0x{:X}", params.timeslice);
237
238 channel_timeslice = params.timeslice;
239
240 return 0;
241}
242
231} // namespace Service::Nvidia::Devices 243} // namespace Service::Nvidia::Devices
diff --git a/src/core/hle/service/nvdrv/devices/nvhost_gpu.h b/src/core/hle/service/nvdrv/devices/nvhost_gpu.h
index d056dd046..2ac74743f 100644
--- a/src/core/hle/service/nvdrv/devices/nvhost_gpu.h
+++ b/src/core/hle/service/nvdrv/devices/nvhost_gpu.h
@@ -48,6 +48,7 @@ private:
48 IocAllocObjCtxCommand = 0xC0104809, 48 IocAllocObjCtxCommand = 0xC0104809,
49 IocChannelGetWaitbaseCommand = 0xC0080003, 49 IocChannelGetWaitbaseCommand = 0xC0080003,
50 IocChannelSetTimeoutCommand = 0x40044803, 50 IocChannelSetTimeoutCommand = 0x40044803,
51 IocChannelSetTimeslice = 0xC004481D,
51 }; 52 };
52 53
53 enum class CtxObjects : u32_le { 54 enum class CtxObjects : u32_le {
@@ -101,6 +102,11 @@ private:
101 static_assert(sizeof(IoctlChannelSetPriority) == 4, 102 static_assert(sizeof(IoctlChannelSetPriority) == 4,
102 "IoctlChannelSetPriority is incorrect size"); 103 "IoctlChannelSetPriority is incorrect size");
103 104
105 struct IoctlSetTimeslice {
106 u32_le timeslice;
107 };
108 static_assert(sizeof(IoctlSetTimeslice) == 4, "IoctlSetTimeslice is incorrect size");
109
104 struct IoctlEventIdControl { 110 struct IoctlEventIdControl {
105 u32_le cmd; // 0=disable, 1=enable, 2=clear 111 u32_le cmd; // 0=disable, 1=enable, 2=clear
106 u32_le id; 112 u32_le id;
@@ -174,6 +180,7 @@ private:
174 u64_le user_data{}; 180 u64_le user_data{};
175 IoctlZCullBind zcull_params{}; 181 IoctlZCullBind zcull_params{};
176 u32_le channel_priority{}; 182 u32_le channel_priority{};
183 u32_le channel_timeslice{};
177 184
178 u32 SetNVMAPfd(const std::vector<u8>& input, std::vector<u8>& output); 185 u32 SetNVMAPfd(const std::vector<u8>& input, std::vector<u8>& output);
179 u32 SetClientData(const std::vector<u8>& input, std::vector<u8>& output); 186 u32 SetClientData(const std::vector<u8>& input, std::vector<u8>& output);
@@ -188,6 +195,7 @@ private:
188 const std::vector<u8>& input2, IoctlVersion version); 195 const std::vector<u8>& input2, IoctlVersion version);
189 u32 GetWaitbase(const std::vector<u8>& input, std::vector<u8>& output); 196 u32 GetWaitbase(const std::vector<u8>& input, std::vector<u8>& output);
190 u32 ChannelSetTimeout(const std::vector<u8>& input, std::vector<u8>& output); 197 u32 ChannelSetTimeout(const std::vector<u8>& input, std::vector<u8>& output);
198 u32 ChannelSetTimeslice(const std::vector<u8>& input, std::vector<u8>& output);
191 199
192 std::shared_ptr<nvmap> nvmap_dev; 200 std::shared_ptr<nvmap> nvmap_dev;
193 u32 assigned_syncpoints{}; 201 u32 assigned_syncpoints{};
diff --git a/src/core/settings.h b/src/core/settings.h
index e1a9a0ffa..f837d3fbc 100644
--- a/src/core/settings.h
+++ b/src/core/settings.h
@@ -429,6 +429,7 @@ struct Values {
429 int vulkan_device; 429 int vulkan_device;
430 430
431 float resolution_factor; 431 float resolution_factor;
432 int aspect_ratio;
432 bool use_frame_limit; 433 bool use_frame_limit;
433 u16 frame_limit; 434 u16 frame_limit;
434 bool use_disk_shader_cache; 435 bool use_disk_shader_cache;
diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt
index db9332d00..4b0c6346f 100644
--- a/src/video_core/CMakeLists.txt
+++ b/src/video_core/CMakeLists.txt
@@ -37,6 +37,7 @@ add_library(video_core STATIC
37 memory_manager.h 37 memory_manager.h
38 morton.cpp 38 morton.cpp
39 morton.h 39 morton.h
40 query_cache.h
40 rasterizer_accelerated.cpp 41 rasterizer_accelerated.cpp
41 rasterizer_accelerated.h 42 rasterizer_accelerated.h
42 rasterizer_cache.cpp 43 rasterizer_cache.cpp
@@ -74,6 +75,8 @@ add_library(video_core STATIC
74 renderer_opengl/gl_stream_buffer.h 75 renderer_opengl/gl_stream_buffer.h
75 renderer_opengl/gl_texture_cache.cpp 76 renderer_opengl/gl_texture_cache.cpp
76 renderer_opengl/gl_texture_cache.h 77 renderer_opengl/gl_texture_cache.h
78 renderer_opengl/gl_query_cache.cpp
79 renderer_opengl/gl_query_cache.h
77 renderer_opengl/maxwell_to_gl.h 80 renderer_opengl/maxwell_to_gl.h
78 renderer_opengl/renderer_opengl.cpp 81 renderer_opengl/renderer_opengl.cpp
79 renderer_opengl/renderer_opengl.h 82 renderer_opengl/renderer_opengl.h
@@ -177,6 +180,8 @@ if (ENABLE_VULKAN)
177 renderer_vulkan/vk_memory_manager.h 180 renderer_vulkan/vk_memory_manager.h
178 renderer_vulkan/vk_pipeline_cache.cpp 181 renderer_vulkan/vk_pipeline_cache.cpp
179 renderer_vulkan/vk_pipeline_cache.h 182 renderer_vulkan/vk_pipeline_cache.h
183 renderer_vulkan/vk_query_cache.cpp
184 renderer_vulkan/vk_query_cache.h
180 renderer_vulkan/vk_rasterizer.cpp 185 renderer_vulkan/vk_rasterizer.cpp
181 renderer_vulkan/vk_rasterizer.h 186 renderer_vulkan/vk_rasterizer.h
182 renderer_vulkan/vk_renderpass_cache.cpp 187 renderer_vulkan/vk_renderpass_cache.cpp
diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp
index 5a74d1c2a..b28de1092 100644
--- a/src/video_core/engines/maxwell_3d.cpp
+++ b/src/video_core/engines/maxwell_3d.cpp
@@ -4,6 +4,7 @@
4 4
5#include <cinttypes> 5#include <cinttypes>
6#include <cstring> 6#include <cstring>
7#include <optional>
7#include "common/assert.h" 8#include "common/assert.h"
8#include "core/core.h" 9#include "core/core.h"
9#include "core/core_timing.h" 10#include "core/core_timing.h"
@@ -16,6 +17,8 @@
16 17
17namespace Tegra::Engines { 18namespace Tegra::Engines {
18 19
20using VideoCore::QueryType;
21
19/// First register id that is actually a Macro call. 22/// First register id that is actually a Macro call.
20constexpr u32 MacroRegistersStart = 0xE00; 23constexpr u32 MacroRegistersStart = 0xE00;
21 24
@@ -400,6 +403,10 @@ void Maxwell3D::CallMethod(const GPU::MethodCall& method_call) {
400 ProcessQueryCondition(); 403 ProcessQueryCondition();
401 break; 404 break;
402 } 405 }
406 case MAXWELL3D_REG_INDEX(counter_reset): {
407 ProcessCounterReset();
408 break;
409 }
403 case MAXWELL3D_REG_INDEX(sync_info): { 410 case MAXWELL3D_REG_INDEX(sync_info): {
404 ProcessSyncPoint(); 411 ProcessSyncPoint();
405 break; 412 break;
@@ -544,40 +551,28 @@ void Maxwell3D::ProcessQueryGet() {
544 "Units other than CROP are unimplemented"); 551 "Units other than CROP are unimplemented");
545 552
546 switch (regs.query.query_get.operation) { 553 switch (regs.query.query_get.operation) {
547 case Regs::QueryOperation::Release: { 554 case Regs::QueryOperation::Release:
548 const u64 result = regs.query.query_sequence; 555 StampQueryResult(regs.query.query_sequence, regs.query.query_get.short_query == 0);
549 StampQueryResult(result, regs.query.query_get.short_query == 0);
550 break; 556 break;
551 } 557 case Regs::QueryOperation::Acquire:
552 case Regs::QueryOperation::Acquire: { 558 // TODO(Blinkhawk): Under this operation, the GPU waits for the CPU to write a value that
553 // Todo(Blinkhawk): Under this operation, the GPU waits for the CPU 559 // matches the current payload.
554 // to write a value that matches the current payload.
555 UNIMPLEMENTED_MSG("Unimplemented query operation ACQUIRE"); 560 UNIMPLEMENTED_MSG("Unimplemented query operation ACQUIRE");
556 break; 561 break;
557 } 562 case Regs::QueryOperation::Counter:
558 case Regs::QueryOperation::Counter: { 563 if (const std::optional<u64> result = GetQueryResult()) {
559 u64 result{}; 564 // If the query returns an empty optional it means it's cached and deferred.
560 switch (regs.query.query_get.select) { 565 // In this case we have a non-empty result, so we stamp it immediately.
561 case Regs::QuerySelect::Zero: 566 StampQueryResult(*result, regs.query.query_get.short_query == 0);
562 result = 0;
563 break;
564 default:
565 result = 1;
566 UNIMPLEMENTED_MSG("Unimplemented query select type {}",
567 static_cast<u32>(regs.query.query_get.select.Value()));
568 } 567 }
569 StampQueryResult(result, regs.query.query_get.short_query == 0);
570 break; 568 break;
571 } 569 case Regs::QueryOperation::Trap:
572 case Regs::QueryOperation::Trap: {
573 UNIMPLEMENTED_MSG("Unimplemented query operation TRAP"); 570 UNIMPLEMENTED_MSG("Unimplemented query operation TRAP");
574 break; 571 break;
575 } 572 default:
576 default: {
577 UNIMPLEMENTED_MSG("Unknown query operation"); 573 UNIMPLEMENTED_MSG("Unknown query operation");
578 break; 574 break;
579 } 575 }
580 }
581} 576}
582 577
583void Maxwell3D::ProcessQueryCondition() { 578void Maxwell3D::ProcessQueryCondition() {
@@ -593,20 +588,20 @@ void Maxwell3D::ProcessQueryCondition() {
593 } 588 }
594 case Regs::ConditionMode::ResNonZero: { 589 case Regs::ConditionMode::ResNonZero: {
595 Regs::QueryCompare cmp; 590 Regs::QueryCompare cmp;
596 memory_manager.ReadBlockUnsafe(condition_address, &cmp, sizeof(cmp)); 591 memory_manager.ReadBlock(condition_address, &cmp, sizeof(cmp));
597 execute_on = cmp.initial_sequence != 0U && cmp.initial_mode != 0U; 592 execute_on = cmp.initial_sequence != 0U && cmp.initial_mode != 0U;
598 break; 593 break;
599 } 594 }
600 case Regs::ConditionMode::Equal: { 595 case Regs::ConditionMode::Equal: {
601 Regs::QueryCompare cmp; 596 Regs::QueryCompare cmp;
602 memory_manager.ReadBlockUnsafe(condition_address, &cmp, sizeof(cmp)); 597 memory_manager.ReadBlock(condition_address, &cmp, sizeof(cmp));
603 execute_on = 598 execute_on =
604 cmp.initial_sequence == cmp.current_sequence && cmp.initial_mode == cmp.current_mode; 599 cmp.initial_sequence == cmp.current_sequence && cmp.initial_mode == cmp.current_mode;
605 break; 600 break;
606 } 601 }
607 case Regs::ConditionMode::NotEqual: { 602 case Regs::ConditionMode::NotEqual: {
608 Regs::QueryCompare cmp; 603 Regs::QueryCompare cmp;
609 memory_manager.ReadBlockUnsafe(condition_address, &cmp, sizeof(cmp)); 604 memory_manager.ReadBlock(condition_address, &cmp, sizeof(cmp));
610 execute_on = 605 execute_on =
611 cmp.initial_sequence != cmp.current_sequence || cmp.initial_mode != cmp.current_mode; 606 cmp.initial_sequence != cmp.current_sequence || cmp.initial_mode != cmp.current_mode;
612 break; 607 break;
@@ -619,6 +614,18 @@ void Maxwell3D::ProcessQueryCondition() {
619 } 614 }
620} 615}
621 616
617void Maxwell3D::ProcessCounterReset() {
618 switch (regs.counter_reset) {
619 case Regs::CounterReset::SampleCnt:
620 rasterizer.ResetCounter(QueryType::SamplesPassed);
621 break;
622 default:
623 LOG_WARNING(Render_OpenGL, "Unimplemented counter reset={}",
624 static_cast<int>(regs.counter_reset));
625 break;
626 }
627}
628
622void Maxwell3D::ProcessSyncPoint() { 629void Maxwell3D::ProcessSyncPoint() {
623 const u32 sync_point = regs.sync_info.sync_point.Value(); 630 const u32 sync_point = regs.sync_info.sync_point.Value();
624 const u32 increment = regs.sync_info.increment.Value(); 631 const u32 increment = regs.sync_info.increment.Value();
@@ -661,6 +668,22 @@ void Maxwell3D::DrawArrays() {
661 } 668 }
662} 669}
663 670
671std::optional<u64> Maxwell3D::GetQueryResult() {
672 switch (regs.query.query_get.select) {
673 case Regs::QuerySelect::Zero:
674 return 0;
675 case Regs::QuerySelect::SamplesPassed:
676 // Deferred.
677 rasterizer.Query(regs.query.QueryAddress(), VideoCore::QueryType::SamplesPassed,
678 system.GPU().GetTicks());
679 return {};
680 default:
681 UNIMPLEMENTED_MSG("Unimplemented query select type {}",
682 static_cast<u32>(regs.query.query_get.select.Value()));
683 return 1;
684 }
685}
686
664void Maxwell3D::ProcessCBBind(std::size_t stage_index) { 687void Maxwell3D::ProcessCBBind(std::size_t stage_index) {
665 // Bind the buffer currently in CB_ADDRESS to the specified index in the desired shader stage. 688 // Bind the buffer currently in CB_ADDRESS to the specified index in the desired shader stage.
666 auto& shader = state.shader_stages[stage_index]; 689 auto& shader = state.shader_stages[stage_index];
diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h
index 0a2af54e5..26939be3f 100644
--- a/src/video_core/engines/maxwell_3d.h
+++ b/src/video_core/engines/maxwell_3d.h
@@ -6,6 +6,7 @@
6 6
7#include <array> 7#include <array>
8#include <bitset> 8#include <bitset>
9#include <optional>
9#include <type_traits> 10#include <type_traits>
10#include <unordered_map> 11#include <unordered_map>
11#include <vector> 12#include <vector>
@@ -409,6 +410,27 @@ public:
409 Linear = 1, 410 Linear = 1,
410 }; 411 };
411 412
413 enum class CounterReset : u32 {
414 SampleCnt = 0x01,
415 Unk02 = 0x02,
416 Unk03 = 0x03,
417 Unk04 = 0x04,
418 EmittedPrimitives = 0x10, // Not tested
419 Unk11 = 0x11,
420 Unk12 = 0x12,
421 Unk13 = 0x13,
422 Unk15 = 0x15,
423 Unk16 = 0x16,
424 Unk17 = 0x17,
425 Unk18 = 0x18,
426 Unk1A = 0x1A,
427 Unk1B = 0x1B,
428 Unk1C = 0x1C,
429 Unk1D = 0x1D,
430 Unk1E = 0x1E,
431 GeneratedPrimitives = 0x1F,
432 };
433
412 struct Cull { 434 struct Cull {
413 enum class FrontFace : u32 { 435 enum class FrontFace : u32 {
414 ClockWise = 0x0900, 436 ClockWise = 0x0900,
@@ -857,7 +879,7 @@ public:
857 BitField<7, 1, u32> c7; 879 BitField<7, 1, u32> c7;
858 } clip_distance_enabled; 880 } clip_distance_enabled;
859 881
860 INSERT_UNION_PADDING_WORDS(0x1); 882 u32 samplecnt_enable;
861 883
862 float point_size; 884 float point_size;
863 885
@@ -865,7 +887,11 @@ public:
865 887
866 u32 point_sprite_enable; 888 u32 point_sprite_enable;
867 889
868 INSERT_UNION_PADDING_WORDS(0x5); 890 INSERT_UNION_PADDING_WORDS(0x3);
891
892 CounterReset counter_reset;
893
894 INSERT_UNION_PADDING_WORDS(0x1);
869 895
870 u32 zeta_enable; 896 u32 zeta_enable;
871 897
@@ -1412,12 +1438,15 @@ private:
1412 /// Handles a write to the QUERY_GET register. 1438 /// Handles a write to the QUERY_GET register.
1413 void ProcessQueryGet(); 1439 void ProcessQueryGet();
1414 1440
1415 // Writes the query result accordingly 1441 /// Writes the query result accordingly.
1416 void StampQueryResult(u64 payload, bool long_query); 1442 void StampQueryResult(u64 payload, bool long_query);
1417 1443
1418 // Handles Conditional Rendering 1444 /// Handles conditional rendering.
1419 void ProcessQueryCondition(); 1445 void ProcessQueryCondition();
1420 1446
1447 /// Handles counter resets.
1448 void ProcessCounterReset();
1449
1421 /// Handles writes to syncing register. 1450 /// Handles writes to syncing register.
1422 void ProcessSyncPoint(); 1451 void ProcessSyncPoint();
1423 1452
@@ -1434,6 +1463,9 @@ private:
1434 1463
1435 // Handles a instance drawcall from MME 1464 // Handles a instance drawcall from MME
1436 void StepInstance(MMEDrawMode expected_mode, u32 count); 1465 void StepInstance(MMEDrawMode expected_mode, u32 count);
1466
1467 /// Returns a query's value or an empty object if the value will be deferred through a cache.
1468 std::optional<u64> GetQueryResult();
1437}; 1469};
1438 1470
1439#define ASSERT_REG_POSITION(field_name, position) \ 1471#define ASSERT_REG_POSITION(field_name, position) \
@@ -1499,8 +1531,10 @@ ASSERT_REG_POSITION(screen_y_control, 0x4EB);
1499ASSERT_REG_POSITION(vb_element_base, 0x50D); 1531ASSERT_REG_POSITION(vb_element_base, 0x50D);
1500ASSERT_REG_POSITION(vb_base_instance, 0x50E); 1532ASSERT_REG_POSITION(vb_base_instance, 0x50E);
1501ASSERT_REG_POSITION(clip_distance_enabled, 0x544); 1533ASSERT_REG_POSITION(clip_distance_enabled, 0x544);
1534ASSERT_REG_POSITION(samplecnt_enable, 0x545);
1502ASSERT_REG_POSITION(point_size, 0x546); 1535ASSERT_REG_POSITION(point_size, 0x546);
1503ASSERT_REG_POSITION(point_sprite_enable, 0x548); 1536ASSERT_REG_POSITION(point_sprite_enable, 0x548);
1537ASSERT_REG_POSITION(counter_reset, 0x54C);
1504ASSERT_REG_POSITION(zeta_enable, 0x54E); 1538ASSERT_REG_POSITION(zeta_enable, 0x54E);
1505ASSERT_REG_POSITION(multisample_control, 0x54F); 1539ASSERT_REG_POSITION(multisample_control, 0x54F);
1506ASSERT_REG_POSITION(condition, 0x554); 1540ASSERT_REG_POSITION(condition, 0x554);
diff --git a/src/video_core/gpu.cpp b/src/video_core/gpu.cpp
index 4419ab735..7d7137109 100644
--- a/src/video_core/gpu.cpp
+++ b/src/video_core/gpu.cpp
@@ -24,7 +24,7 @@ MICROPROFILE_DEFINE(GPU_wait, "GPU", "Wait for the GPU", MP_RGB(128, 128, 192));
24GPU::GPU(Core::System& system, VideoCore::RendererBase& renderer, bool is_async) 24GPU::GPU(Core::System& system, VideoCore::RendererBase& renderer, bool is_async)
25 : system{system}, renderer{renderer}, is_async{is_async} { 25 : system{system}, renderer{renderer}, is_async{is_async} {
26 auto& rasterizer{renderer.Rasterizer()}; 26 auto& rasterizer{renderer.Rasterizer()};
27 memory_manager = std::make_unique<Tegra::MemoryManager>(system); 27 memory_manager = std::make_unique<Tegra::MemoryManager>(system, rasterizer);
28 dma_pusher = std::make_unique<Tegra::DmaPusher>(*this); 28 dma_pusher = std::make_unique<Tegra::DmaPusher>(*this);
29 maxwell_3d = std::make_unique<Engines::Maxwell3D>(system, rasterizer, *memory_manager); 29 maxwell_3d = std::make_unique<Engines::Maxwell3D>(system, rasterizer, *memory_manager);
30 fermi_2d = std::make_unique<Engines::Fermi2D>(rasterizer); 30 fermi_2d = std::make_unique<Engines::Fermi2D>(rasterizer);
diff --git a/src/video_core/memory_manager.cpp b/src/video_core/memory_manager.cpp
index f1d50be3e..11848fbce 100644
--- a/src/video_core/memory_manager.cpp
+++ b/src/video_core/memory_manager.cpp
@@ -9,12 +9,13 @@
9#include "core/hle/kernel/process.h" 9#include "core/hle/kernel/process.h"
10#include "core/hle/kernel/vm_manager.h" 10#include "core/hle/kernel/vm_manager.h"
11#include "core/memory.h" 11#include "core/memory.h"
12#include "video_core/gpu.h"
13#include "video_core/memory_manager.h" 12#include "video_core/memory_manager.h"
13#include "video_core/rasterizer_interface.h"
14 14
15namespace Tegra { 15namespace Tegra {
16 16
17MemoryManager::MemoryManager(Core::System& system) : system{system} { 17MemoryManager::MemoryManager(Core::System& system, VideoCore::RasterizerInterface& rasterizer)
18 : rasterizer{rasterizer}, system{system} {
18 std::fill(page_table.pointers.begin(), page_table.pointers.end(), nullptr); 19 std::fill(page_table.pointers.begin(), page_table.pointers.end(), nullptr);
19 std::fill(page_table.attributes.begin(), page_table.attributes.end(), 20 std::fill(page_table.attributes.begin(), page_table.attributes.end(),
20 Common::PageType::Unmapped); 21 Common::PageType::Unmapped);
@@ -83,8 +84,7 @@ GPUVAddr MemoryManager::UnmapBuffer(GPUVAddr gpu_addr, u64 size) {
83 const auto cpu_addr = GpuToCpuAddress(gpu_addr); 84 const auto cpu_addr = GpuToCpuAddress(gpu_addr);
84 ASSERT(cpu_addr); 85 ASSERT(cpu_addr);
85 86
86 system.GPU().FlushAndInvalidateRegion(cache_addr, aligned_size); 87 rasterizer.FlushAndInvalidateRegion(cache_addr, aligned_size);
87
88 UnmapRange(gpu_addr, aligned_size); 88 UnmapRange(gpu_addr, aligned_size);
89 ASSERT(system.CurrentProcess() 89 ASSERT(system.CurrentProcess()
90 ->VMManager() 90 ->VMManager()
@@ -242,7 +242,7 @@ void MemoryManager::ReadBlock(GPUVAddr src_addr, void* dest_buffer, const std::s
242 switch (page_table.attributes[page_index]) { 242 switch (page_table.attributes[page_index]) {
243 case Common::PageType::Memory: { 243 case Common::PageType::Memory: {
244 const u8* src_ptr{page_table.pointers[page_index] + page_offset}; 244 const u8* src_ptr{page_table.pointers[page_index] + page_offset};
245 system.GPU().FlushRegion(ToCacheAddr(src_ptr), copy_amount); 245 rasterizer.FlushRegion(ToCacheAddr(src_ptr), copy_amount);
246 std::memcpy(dest_buffer, src_ptr, copy_amount); 246 std::memcpy(dest_buffer, src_ptr, copy_amount);
247 break; 247 break;
248 } 248 }
@@ -292,7 +292,7 @@ void MemoryManager::WriteBlock(GPUVAddr dest_addr, const void* src_buffer, const
292 switch (page_table.attributes[page_index]) { 292 switch (page_table.attributes[page_index]) {
293 case Common::PageType::Memory: { 293 case Common::PageType::Memory: {
294 u8* dest_ptr{page_table.pointers[page_index] + page_offset}; 294 u8* dest_ptr{page_table.pointers[page_index] + page_offset};
295 system.GPU().InvalidateRegion(ToCacheAddr(dest_ptr), copy_amount); 295 rasterizer.InvalidateRegion(ToCacheAddr(dest_ptr), copy_amount);
296 std::memcpy(dest_ptr, src_buffer, copy_amount); 296 std::memcpy(dest_ptr, src_buffer, copy_amount);
297 break; 297 break;
298 } 298 }
@@ -340,7 +340,7 @@ void MemoryManager::CopyBlock(GPUVAddr dest_addr, GPUVAddr src_addr, const std::
340 switch (page_table.attributes[page_index]) { 340 switch (page_table.attributes[page_index]) {
341 case Common::PageType::Memory: { 341 case Common::PageType::Memory: {
342 const u8* src_ptr{page_table.pointers[page_index] + page_offset}; 342 const u8* src_ptr{page_table.pointers[page_index] + page_offset};
343 system.GPU().FlushRegion(ToCacheAddr(src_ptr), copy_amount); 343 rasterizer.FlushRegion(ToCacheAddr(src_ptr), copy_amount);
344 WriteBlock(dest_addr, src_ptr, copy_amount); 344 WriteBlock(dest_addr, src_ptr, copy_amount);
345 break; 345 break;
346 } 346 }
diff --git a/src/video_core/memory_manager.h b/src/video_core/memory_manager.h
index 393447eb4..aea010087 100644
--- a/src/video_core/memory_manager.h
+++ b/src/video_core/memory_manager.h
@@ -10,6 +10,10 @@
10#include "common/common_types.h" 10#include "common/common_types.h"
11#include "common/page_table.h" 11#include "common/page_table.h"
12 12
13namespace VideoCore {
14class RasterizerInterface;
15}
16
13namespace Core { 17namespace Core {
14class System; 18class System;
15} 19}
@@ -47,7 +51,7 @@ struct VirtualMemoryArea {
47 51
48class MemoryManager final { 52class MemoryManager final {
49public: 53public:
50 explicit MemoryManager(Core::System& system); 54 explicit MemoryManager(Core::System& system, VideoCore::RasterizerInterface& rasterizer);
51 ~MemoryManager(); 55 ~MemoryManager();
52 56
53 GPUVAddr AllocateSpace(u64 size, u64 align); 57 GPUVAddr AllocateSpace(u64 size, u64 align);
@@ -172,6 +176,7 @@ private:
172 176
173 Common::PageTable page_table{page_bits}; 177 Common::PageTable page_table{page_bits};
174 VMAMap vma_map; 178 VMAMap vma_map;
179 VideoCore::RasterizerInterface& rasterizer;
175 180
176 Core::System& system; 181 Core::System& system;
177}; 182};
diff --git a/src/video_core/query_cache.h b/src/video_core/query_cache.h
new file mode 100644
index 000000000..e66054ed0
--- /dev/null
+++ b/src/video_core/query_cache.h
@@ -0,0 +1,359 @@
1// Copyright 2020 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <algorithm>
8#include <array>
9#include <cstring>
10#include <iterator>
11#include <memory>
12#include <mutex>
13#include <optional>
14#include <unordered_map>
15#include <vector>
16
17#include "common/assert.h"
18#include "core/core.h"
19#include "video_core/engines/maxwell_3d.h"
20#include "video_core/gpu.h"
21#include "video_core/memory_manager.h"
22#include "video_core/rasterizer_interface.h"
23
24namespace VideoCommon {
25
26template <class QueryCache, class HostCounter>
27class CounterStreamBase {
28public:
29 explicit CounterStreamBase(QueryCache& cache, VideoCore::QueryType type)
30 : cache{cache}, type{type} {}
31
32 /// Updates the state of the stream, enabling or disabling as needed.
33 void Update(bool enabled) {
34 if (enabled) {
35 Enable();
36 } else {
37 Disable();
38 }
39 }
40
41 /// Resets the stream to zero. It doesn't disable the query after resetting.
42 void Reset() {
43 if (current) {
44 current->EndQuery();
45
46 // Immediately start a new query to avoid disabling its state.
47 current = cache.Counter(nullptr, type);
48 }
49 last = nullptr;
50 }
51
52 /// Returns the current counter slicing as needed.
53 std::shared_ptr<HostCounter> Current() {
54 if (!current) {
55 return nullptr;
56 }
57 current->EndQuery();
58 last = std::move(current);
59 current = cache.Counter(last, type);
60 return last;
61 }
62
63 /// Returns true when the counter stream is enabled.
64 bool IsEnabled() const {
65 return current != nullptr;
66 }
67
68private:
69 /// Enables the stream.
70 void Enable() {
71 if (current) {
72 return;
73 }
74 current = cache.Counter(last, type);
75 }
76
77 // Disables the stream.
78 void Disable() {
79 if (current) {
80 current->EndQuery();
81 }
82 last = std::exchange(current, nullptr);
83 }
84
85 QueryCache& cache;
86 const VideoCore::QueryType type;
87
88 std::shared_ptr<HostCounter> current;
89 std::shared_ptr<HostCounter> last;
90};
91
92template <class QueryCache, class CachedQuery, class CounterStream, class HostCounter,
93 class QueryPool>
94class QueryCacheBase {
95public:
96 explicit QueryCacheBase(Core::System& system, VideoCore::RasterizerInterface& rasterizer)
97 : system{system}, rasterizer{rasterizer}, streams{{CounterStream{
98 static_cast<QueryCache&>(*this),
99 VideoCore::QueryType::SamplesPassed}}} {}
100
101 void InvalidateRegion(CacheAddr addr, std::size_t size) {
102 std::unique_lock lock{mutex};
103 FlushAndRemoveRegion(addr, size);
104 }
105
106 void FlushRegion(CacheAddr addr, std::size_t size) {
107 std::unique_lock lock{mutex};
108 FlushAndRemoveRegion(addr, size);
109 }
110
111 /**
112 * Records a query in GPU mapped memory, potentially marked with a timestamp.
113 * @param gpu_addr GPU address to flush to when the mapped memory is read.
114 * @param type Query type, e.g. SamplesPassed.
115 * @param timestamp Timestamp, when empty the flushed query is assumed to be short.
116 */
117 void Query(GPUVAddr gpu_addr, VideoCore::QueryType type, std::optional<u64> timestamp) {
118 std::unique_lock lock{mutex};
119 auto& memory_manager = system.GPU().MemoryManager();
120 const auto host_ptr = memory_manager.GetPointer(gpu_addr);
121
122 CachedQuery* query = TryGet(ToCacheAddr(host_ptr));
123 if (!query) {
124 const auto cpu_addr = memory_manager.GpuToCpuAddress(gpu_addr);
125 ASSERT_OR_EXECUTE(cpu_addr, return;);
126
127 query = Register(type, *cpu_addr, host_ptr, timestamp.has_value());
128 }
129
130 query->BindCounter(Stream(type).Current(), timestamp);
131 }
132
133 /// Updates counters from GPU state. Expected to be called once per draw, clear or dispatch.
134 void UpdateCounters() {
135 std::unique_lock lock{mutex};
136 const auto& regs = system.GPU().Maxwell3D().regs;
137 Stream(VideoCore::QueryType::SamplesPassed).Update(regs.samplecnt_enable);
138 }
139
140 /// Resets a counter to zero. It doesn't disable the query after resetting.
141 void ResetCounter(VideoCore::QueryType type) {
142 std::unique_lock lock{mutex};
143 Stream(type).Reset();
144 }
145
146 /// Disable all active streams. Expected to be called at the end of a command buffer.
147 void DisableStreams() {
148 std::unique_lock lock{mutex};
149 for (auto& stream : streams) {
150 stream.Update(false);
151 }
152 }
153
154 /// Returns a new host counter.
155 std::shared_ptr<HostCounter> Counter(std::shared_ptr<HostCounter> dependency,
156 VideoCore::QueryType type) {
157 return std::make_shared<HostCounter>(static_cast<QueryCache&>(*this), std::move(dependency),
158 type);
159 }
160
161 /// Returns the counter stream of the specified type.
162 CounterStream& Stream(VideoCore::QueryType type) {
163 return streams[static_cast<std::size_t>(type)];
164 }
165
166 /// Returns the counter stream of the specified type.
167 const CounterStream& Stream(VideoCore::QueryType type) const {
168 return streams[static_cast<std::size_t>(type)];
169 }
170
171protected:
172 std::array<QueryPool, VideoCore::NumQueryTypes> query_pools;
173
174private:
175 /// Flushes a memory range to guest memory and removes it from the cache.
176 void FlushAndRemoveRegion(CacheAddr addr, std::size_t size) {
177 const u64 addr_begin = static_cast<u64>(addr);
178 const u64 addr_end = addr_begin + static_cast<u64>(size);
179 const auto in_range = [addr_begin, addr_end](CachedQuery& query) {
180 const u64 cache_begin = query.GetCacheAddr();
181 const u64 cache_end = cache_begin + query.SizeInBytes();
182 return cache_begin < addr_end && addr_begin < cache_end;
183 };
184
185 const u64 page_end = addr_end >> PAGE_SHIFT;
186 for (u64 page = addr_begin >> PAGE_SHIFT; page <= page_end; ++page) {
187 const auto& it = cached_queries.find(page);
188 if (it == std::end(cached_queries)) {
189 continue;
190 }
191 auto& contents = it->second;
192 for (auto& query : contents) {
193 if (!in_range(query)) {
194 continue;
195 }
196 rasterizer.UpdatePagesCachedCount(query.CpuAddr(), query.SizeInBytes(), -1);
197 query.Flush();
198 }
199 contents.erase(std::remove_if(std::begin(contents), std::end(contents), in_range),
200 std::end(contents));
201 }
202 }
203
204 /// Registers the passed parameters as cached and returns a pointer to the stored cached query.
205 CachedQuery* Register(VideoCore::QueryType type, VAddr cpu_addr, u8* host_ptr, bool timestamp) {
206 rasterizer.UpdatePagesCachedCount(cpu_addr, CachedQuery::SizeInBytes(timestamp), 1);
207 const u64 page = static_cast<u64>(ToCacheAddr(host_ptr)) >> PAGE_SHIFT;
208 return &cached_queries[page].emplace_back(static_cast<QueryCache&>(*this), type, cpu_addr,
209 host_ptr);
210 }
211
212 /// Tries to a get a cached query. Returns nullptr on failure.
213 CachedQuery* TryGet(CacheAddr addr) {
214 const u64 page = static_cast<u64>(addr) >> PAGE_SHIFT;
215 const auto it = cached_queries.find(page);
216 if (it == std::end(cached_queries)) {
217 return nullptr;
218 }
219 auto& contents = it->second;
220 const auto found =
221 std::find_if(std::begin(contents), std::end(contents),
222 [addr](auto& query) { return query.GetCacheAddr() == addr; });
223 return found != std::end(contents) ? &*found : nullptr;
224 }
225
226 static constexpr std::uintptr_t PAGE_SIZE = 4096;
227 static constexpr unsigned PAGE_SHIFT = 12;
228
229 Core::System& system;
230 VideoCore::RasterizerInterface& rasterizer;
231
232 std::recursive_mutex mutex;
233
234 std::unordered_map<u64, std::vector<CachedQuery>> cached_queries;
235
236 std::array<CounterStream, VideoCore::NumQueryTypes> streams;
237};
238
239template <class QueryCache, class HostCounter>
240class HostCounterBase {
241public:
242 explicit HostCounterBase(std::shared_ptr<HostCounter> dependency_)
243 : dependency{std::move(dependency_)}, depth{dependency ? (dependency->Depth() + 1) : 0} {
244 // Avoid nesting too many dependencies to avoid a stack overflow when these are deleted.
245 constexpr u64 depth_threshold = 96;
246 if (depth > depth_threshold) {
247 depth = 0;
248 base_result = dependency->Query();
249 dependency = nullptr;
250 }
251 }
252 virtual ~HostCounterBase() = default;
253
254 /// Returns the current value of the query.
255 u64 Query() {
256 if (result) {
257 return *result;
258 }
259
260 u64 value = BlockingQuery() + base_result;
261 if (dependency) {
262 value += dependency->Query();
263 dependency = nullptr;
264 }
265
266 result = value;
267 return *result;
268 }
269
270 /// Returns true when flushing this query will potentially wait.
271 bool WaitPending() const noexcept {
272 return result.has_value();
273 }
274
275 u64 Depth() const noexcept {
276 return depth;
277 }
278
279protected:
280 /// Returns the value of query from the backend API blocking as needed.
281 virtual u64 BlockingQuery() const = 0;
282
283private:
284 std::shared_ptr<HostCounter> dependency; ///< Counter to add to this value.
285 std::optional<u64> result; ///< Filled with the already returned value.
286 u64 depth; ///< Number of nested dependencies.
287 u64 base_result = 0; ///< Equivalent to nested dependencies value.
288};
289
290template <class HostCounter>
291class CachedQueryBase {
292public:
293 explicit CachedQueryBase(VAddr cpu_addr, u8* host_ptr)
294 : cpu_addr{cpu_addr}, host_ptr{host_ptr} {}
295 virtual ~CachedQueryBase() = default;
296
297 CachedQueryBase(CachedQueryBase&&) noexcept = default;
298 CachedQueryBase(const CachedQueryBase&) = delete;
299
300 CachedQueryBase& operator=(CachedQueryBase&&) noexcept = default;
301 CachedQueryBase& operator=(const CachedQueryBase&) = delete;
302
303 /// Flushes the query to guest memory.
304 virtual void Flush() {
305 // When counter is nullptr it means that it's just been reseted. We are supposed to write a
306 // zero in these cases.
307 const u64 value = counter ? counter->Query() : 0;
308 std::memcpy(host_ptr, &value, sizeof(u64));
309
310 if (timestamp) {
311 std::memcpy(host_ptr + TIMESTAMP_OFFSET, &*timestamp, sizeof(u64));
312 }
313 }
314
315 /// Binds a counter to this query.
316 void BindCounter(std::shared_ptr<HostCounter> counter_, std::optional<u64> timestamp_) {
317 if (counter) {
318 // If there's an old counter set it means the query is being rewritten by the game.
319 // To avoid losing the data forever, flush here.
320 Flush();
321 }
322 counter = std::move(counter_);
323 timestamp = timestamp_;
324 }
325
326 VAddr CpuAddr() const noexcept {
327 return cpu_addr;
328 }
329
330 CacheAddr GetCacheAddr() const noexcept {
331 return ToCacheAddr(host_ptr);
332 }
333
334 u64 SizeInBytes() const noexcept {
335 return SizeInBytes(timestamp.has_value());
336 }
337
338 static constexpr u64 SizeInBytes(bool with_timestamp) noexcept {
339 return with_timestamp ? LARGE_QUERY_SIZE : SMALL_QUERY_SIZE;
340 }
341
342protected:
343 /// Returns true when querying the counter may potentially block.
344 bool WaitPending() const noexcept {
345 return counter && counter->WaitPending();
346 }
347
348private:
349 static constexpr std::size_t SMALL_QUERY_SIZE = 8; // Query size without timestamp.
350 static constexpr std::size_t LARGE_QUERY_SIZE = 16; // Query size with timestamp.
351 static constexpr std::intptr_t TIMESTAMP_OFFSET = 8; // Timestamp offset in a large query.
352
353 VAddr cpu_addr; ///< Guest CPU address.
354 u8* host_ptr; ///< Writable host pointer.
355 std::shared_ptr<HostCounter> counter; ///< Host counter to query, owns the dependency tree.
356 std::optional<u64> timestamp; ///< Timestamp to flush to guest memory.
357};
358
359} // namespace VideoCommon
diff --git a/src/video_core/rasterizer_interface.h b/src/video_core/rasterizer_interface.h
index a8fc66711..f18eaf4bc 100644
--- a/src/video_core/rasterizer_interface.h
+++ b/src/video_core/rasterizer_interface.h
@@ -6,6 +6,7 @@
6 6
7#include <atomic> 7#include <atomic>
8#include <functional> 8#include <functional>
9#include <optional>
9#include "common/common_types.h" 10#include "common/common_types.h"
10#include "video_core/engines/fermi_2d.h" 11#include "video_core/engines/fermi_2d.h"
11#include "video_core/gpu.h" 12#include "video_core/gpu.h"
@@ -17,6 +18,11 @@ class MemoryManager;
17 18
18namespace VideoCore { 19namespace VideoCore {
19 20
21enum class QueryType {
22 SamplesPassed,
23};
24constexpr std::size_t NumQueryTypes = 1;
25
20enum class LoadCallbackStage { 26enum class LoadCallbackStage {
21 Prepare, 27 Prepare,
22 Decompile, 28 Decompile,
@@ -38,6 +44,12 @@ public:
38 /// Dispatches a compute shader invocation 44 /// Dispatches a compute shader invocation
39 virtual void DispatchCompute(GPUVAddr code_addr) = 0; 45 virtual void DispatchCompute(GPUVAddr code_addr) = 0;
40 46
47 /// Resets the counter of a query
48 virtual void ResetCounter(QueryType type) = 0;
49
50 /// Records a GPU query and caches it
51 virtual void Query(GPUVAddr gpu_addr, QueryType type, std::optional<u64> timestamp) = 0;
52
41 /// Notify rasterizer that all caches should be flushed to Switch memory 53 /// Notify rasterizer that all caches should be flushed to Switch memory
42 virtual void FlushAll() = 0; 54 virtual void FlushAll() = 0;
43 55
diff --git a/src/video_core/renderer_opengl/gl_query_cache.cpp b/src/video_core/renderer_opengl/gl_query_cache.cpp
new file mode 100644
index 000000000..f12e9f55f
--- /dev/null
+++ b/src/video_core/renderer_opengl/gl_query_cache.cpp
@@ -0,0 +1,120 @@
1// Copyright 2019 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <algorithm>
6#include <cstring>
7#include <memory>
8#include <unordered_map>
9#include <utility>
10#include <vector>
11
12#include <glad/glad.h>
13
14#include "common/assert.h"
15#include "core/core.h"
16#include "video_core/engines/maxwell_3d.h"
17#include "video_core/memory_manager.h"
18#include "video_core/renderer_opengl/gl_query_cache.h"
19#include "video_core/renderer_opengl/gl_rasterizer.h"
20
21namespace OpenGL {
22
23namespace {
24
25constexpr std::array<GLenum, VideoCore::NumQueryTypes> QueryTargets = {GL_SAMPLES_PASSED};
26
27constexpr GLenum GetTarget(VideoCore::QueryType type) {
28 return QueryTargets[static_cast<std::size_t>(type)];
29}
30
31} // Anonymous namespace
32
33QueryCache::QueryCache(Core::System& system, RasterizerOpenGL& gl_rasterizer)
34 : VideoCommon::QueryCacheBase<
35 QueryCache, CachedQuery, CounterStream, HostCounter,
36 std::vector<OGLQuery>>{system,
37 static_cast<VideoCore::RasterizerInterface&>(gl_rasterizer)},
38 gl_rasterizer{gl_rasterizer} {}
39
40QueryCache::~QueryCache() = default;
41
42OGLQuery QueryCache::AllocateQuery(VideoCore::QueryType type) {
43 auto& reserve = query_pools[static_cast<std::size_t>(type)];
44 OGLQuery query;
45 if (reserve.empty()) {
46 query.Create(GetTarget(type));
47 return query;
48 }
49
50 query = std::move(reserve.back());
51 reserve.pop_back();
52 return query;
53}
54
55void QueryCache::Reserve(VideoCore::QueryType type, OGLQuery&& query) {
56 query_pools[static_cast<std::size_t>(type)].push_back(std::move(query));
57}
58
59bool QueryCache::AnyCommandQueued() const noexcept {
60 return gl_rasterizer.AnyCommandQueued();
61}
62
63HostCounter::HostCounter(QueryCache& cache, std::shared_ptr<HostCounter> dependency,
64 VideoCore::QueryType type)
65 : VideoCommon::HostCounterBase<QueryCache, HostCounter>{std::move(dependency)}, cache{cache},
66 type{type}, query{cache.AllocateQuery(type)} {
67 glBeginQuery(GetTarget(type), query.handle);
68}
69
70HostCounter::~HostCounter() {
71 cache.Reserve(type, std::move(query));
72}
73
74void HostCounter::EndQuery() {
75 if (!cache.AnyCommandQueued()) {
76 // There are chances a query waited on without commands (glDraw, glClear, glDispatch). Not
77 // having any of these causes a lock. glFlush is considered a command, so we can safely wait
78 // for this. Insert to the OpenGL command stream a flush.
79 glFlush();
80 }
81 glEndQuery(GetTarget(type));
82}
83
84u64 HostCounter::BlockingQuery() const {
85 GLint64 value;
86 glGetQueryObjecti64v(query.handle, GL_QUERY_RESULT, &value);
87 return static_cast<u64>(value);
88}
89
90CachedQuery::CachedQuery(QueryCache& cache, VideoCore::QueryType type, VAddr cpu_addr, u8* host_ptr)
91 : VideoCommon::CachedQueryBase<HostCounter>{cpu_addr, host_ptr}, cache{&cache}, type{type} {}
92
93CachedQuery::CachedQuery(CachedQuery&& rhs) noexcept
94 : VideoCommon::CachedQueryBase<HostCounter>(std::move(rhs)), cache{rhs.cache}, type{rhs.type} {}
95
96CachedQuery& CachedQuery::operator=(CachedQuery&& rhs) noexcept {
97 VideoCommon::CachedQueryBase<HostCounter>::operator=(std::move(rhs));
98 cache = rhs.cache;
99 type = rhs.type;
100 return *this;
101}
102
103void CachedQuery::Flush() {
104 // Waiting for a query while another query of the same target is enabled locks Nvidia's driver.
105 // To avoid this disable and re-enable keeping the dependency stream.
106 // But we only have to do this if we have pending waits to be done.
107 auto& stream = cache->Stream(type);
108 const bool slice_counter = WaitPending() && stream.IsEnabled();
109 if (slice_counter) {
110 stream.Update(false);
111 }
112
113 VideoCommon::CachedQueryBase<HostCounter>::Flush();
114
115 if (slice_counter) {
116 stream.Update(true);
117 }
118}
119
120} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_query_cache.h b/src/video_core/renderer_opengl/gl_query_cache.h
new file mode 100644
index 000000000..d8e7052a1
--- /dev/null
+++ b/src/video_core/renderer_opengl/gl_query_cache.h
@@ -0,0 +1,78 @@
1// Copyright 2019 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <array>
8#include <memory>
9#include <vector>
10
11#include "common/common_types.h"
12#include "video_core/query_cache.h"
13#include "video_core/rasterizer_interface.h"
14#include "video_core/renderer_opengl/gl_resource_manager.h"
15
16namespace Core {
17class System;
18}
19
20namespace OpenGL {
21
22class CachedQuery;
23class HostCounter;
24class QueryCache;
25class RasterizerOpenGL;
26
27using CounterStream = VideoCommon::CounterStreamBase<QueryCache, HostCounter>;
28
29class QueryCache final : public VideoCommon::QueryCacheBase<QueryCache, CachedQuery, CounterStream,
30 HostCounter, std::vector<OGLQuery>> {
31public:
32 explicit QueryCache(Core::System& system, RasterizerOpenGL& rasterizer);
33 ~QueryCache();
34
35 OGLQuery AllocateQuery(VideoCore::QueryType type);
36
37 void Reserve(VideoCore::QueryType type, OGLQuery&& query);
38
39 bool AnyCommandQueued() const noexcept;
40
41private:
42 RasterizerOpenGL& gl_rasterizer;
43};
44
45class HostCounter final : public VideoCommon::HostCounterBase<QueryCache, HostCounter> {
46public:
47 explicit HostCounter(QueryCache& cache, std::shared_ptr<HostCounter> dependency,
48 VideoCore::QueryType type);
49 ~HostCounter();
50
51 void EndQuery();
52
53private:
54 u64 BlockingQuery() const override;
55
56 QueryCache& cache;
57 const VideoCore::QueryType type;
58 OGLQuery query;
59};
60
61class CachedQuery final : public VideoCommon::CachedQueryBase<HostCounter> {
62public:
63 explicit CachedQuery(QueryCache& cache, VideoCore::QueryType type, VAddr cpu_addr,
64 u8* host_ptr);
65 CachedQuery(CachedQuery&& rhs) noexcept;
66 CachedQuery(const CachedQuery&) = delete;
67
68 CachedQuery& operator=(CachedQuery&& rhs) noexcept;
69 CachedQuery& operator=(const CachedQuery&) = delete;
70
71 void Flush() override;
72
73private:
74 QueryCache* cache;
75 VideoCore::QueryType type;
76};
77
78} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp
index 048d43b89..e1965fb21 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp
@@ -25,6 +25,7 @@
25#include "video_core/engines/maxwell_3d.h" 25#include "video_core/engines/maxwell_3d.h"
26#include "video_core/engines/shader_type.h" 26#include "video_core/engines/shader_type.h"
27#include "video_core/memory_manager.h" 27#include "video_core/memory_manager.h"
28#include "video_core/renderer_opengl/gl_query_cache.h"
28#include "video_core/renderer_opengl/gl_rasterizer.h" 29#include "video_core/renderer_opengl/gl_rasterizer.h"
29#include "video_core/renderer_opengl/gl_shader_cache.h" 30#include "video_core/renderer_opengl/gl_shader_cache.h"
30#include "video_core/renderer_opengl/gl_shader_gen.h" 31#include "video_core/renderer_opengl/gl_shader_gen.h"
@@ -92,8 +93,8 @@ std::size_t GetConstBufferSize(const Tegra::Engines::ConstBufferInfo& buffer,
92RasterizerOpenGL::RasterizerOpenGL(Core::System& system, Core::Frontend::EmuWindow& emu_window, 93RasterizerOpenGL::RasterizerOpenGL(Core::System& system, Core::Frontend::EmuWindow& emu_window,
93 ScreenInfo& info) 94 ScreenInfo& info)
94 : RasterizerAccelerated{system.Memory()}, texture_cache{system, *this, device}, 95 : RasterizerAccelerated{system.Memory()}, texture_cache{system, *this, device},
95 shader_cache{*this, system, emu_window, device}, system{system}, screen_info{info}, 96 shader_cache{*this, system, emu_window, device}, query_cache{system, *this}, system{system},
96 buffer_cache{*this, system, device, STREAM_BUFFER_SIZE} { 97 screen_info{info}, buffer_cache{*this, system, device, STREAM_BUFFER_SIZE} {
97 shader_program_manager = std::make_unique<GLShader::ProgramManager>(); 98 shader_program_manager = std::make_unique<GLShader::ProgramManager>();
98 state.draw.shader_program = 0; 99 state.draw.shader_program = 0;
99 state.Apply(); 100 state.Apply();
@@ -541,11 +542,16 @@ void RasterizerOpenGL::Clear() {
541 } else if (use_stencil) { 542 } else if (use_stencil) {
542 glClearBufferiv(GL_STENCIL, 0, &regs.clear_stencil); 543 glClearBufferiv(GL_STENCIL, 0, &regs.clear_stencil);
543 } 544 }
545
546 ++num_queued_commands;
544} 547}
545 548
546void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) { 549void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) {
547 MICROPROFILE_SCOPE(OpenGL_Drawing); 550 MICROPROFILE_SCOPE(OpenGL_Drawing);
548 auto& gpu = system.GPU().Maxwell3D(); 551 auto& gpu = system.GPU().Maxwell3D();
552 const auto& regs = gpu.regs;
553
554 query_cache.UpdateCounters();
549 555
550 SyncRasterizeEnable(state); 556 SyncRasterizeEnable(state);
551 SyncColorMask(); 557 SyncColorMask();
@@ -611,7 +617,7 @@ void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) {
611 617
612 // Setup shaders and their used resources. 618 // Setup shaders and their used resources.
613 texture_cache.GuardSamplers(true); 619 texture_cache.GuardSamplers(true);
614 const auto primitive_mode = MaxwellToGL::PrimitiveTopology(gpu.regs.draw.topology); 620 const GLenum primitive_mode = MaxwellToGL::PrimitiveTopology(gpu.regs.draw.topology);
615 SetupShaders(primitive_mode); 621 SetupShaders(primitive_mode);
616 texture_cache.GuardSamplers(false); 622 texture_cache.GuardSamplers(false);
617 623
@@ -638,22 +644,44 @@ void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) {
638 glTextureBarrier(); 644 glTextureBarrier();
639 } 645 }
640 646
647 ++num_queued_commands;
648
641 const GLuint base_instance = static_cast<GLuint>(gpu.regs.vb_base_instance); 649 const GLuint base_instance = static_cast<GLuint>(gpu.regs.vb_base_instance);
642 const GLsizei num_instances = 650 const GLsizei num_instances =
643 static_cast<GLsizei>(is_instanced ? gpu.mme_draw.instance_count : 1); 651 static_cast<GLsizei>(is_instanced ? gpu.mme_draw.instance_count : 1);
644 if (is_indexed) { 652 if (is_indexed) {
645 const GLenum index_format = MaxwellToGL::IndexFormat(gpu.regs.index_array.format);
646 const GLint base_vertex = static_cast<GLint>(gpu.regs.vb_element_base); 653 const GLint base_vertex = static_cast<GLint>(gpu.regs.vb_element_base);
647 const GLsizei num_vertices = static_cast<GLsizei>(gpu.regs.index_array.count); 654 const GLsizei num_vertices = static_cast<GLsizei>(gpu.regs.index_array.count);
648 glDrawElementsInstancedBaseVertexBaseInstance( 655 const GLvoid* offset = reinterpret_cast<const GLvoid*>(index_buffer_offset);
649 primitive_mode, num_vertices, index_format, 656 const GLenum format = MaxwellToGL::IndexFormat(gpu.regs.index_array.format);
650 reinterpret_cast<const void*>(index_buffer_offset), num_instances, base_vertex, 657 if (num_instances == 1 && base_instance == 0 && base_vertex == 0) {
651 base_instance); 658 glDrawElements(primitive_mode, num_vertices, format, offset);
659 } else if (num_instances == 1 && base_instance == 0) {
660 glDrawElementsBaseVertex(primitive_mode, num_vertices, format, offset, base_vertex);
661 } else if (base_vertex == 0 && base_instance == 0) {
662 glDrawElementsInstanced(primitive_mode, num_vertices, format, offset, num_instances);
663 } else if (base_vertex == 0) {
664 glDrawElementsInstancedBaseInstance(primitive_mode, num_vertices, format, offset,
665 num_instances, base_instance);
666 } else if (base_instance == 0) {
667 glDrawElementsInstancedBaseVertex(primitive_mode, num_vertices, format, offset,
668 num_instances, base_vertex);
669 } else {
670 glDrawElementsInstancedBaseVertexBaseInstance(primitive_mode, num_vertices, format,
671 offset, num_instances, base_vertex,
672 base_instance);
673 }
652 } else { 674 } else {
653 const GLint base_vertex = static_cast<GLint>(gpu.regs.vertex_buffer.first); 675 const GLint base_vertex = static_cast<GLint>(gpu.regs.vertex_buffer.first);
654 const GLsizei num_vertices = static_cast<GLsizei>(gpu.regs.vertex_buffer.count); 676 const GLsizei num_vertices = static_cast<GLsizei>(gpu.regs.vertex_buffer.count);
655 glDrawArraysInstancedBaseInstance(primitive_mode, base_vertex, num_vertices, num_instances, 677 if (num_instances == 1 && base_instance == 0) {
656 base_instance); 678 glDrawArrays(primitive_mode, base_vertex, num_vertices);
679 } else if (base_instance == 0) {
680 glDrawArraysInstanced(primitive_mode, base_vertex, num_vertices, num_instances);
681 } else {
682 glDrawArraysInstancedBaseInstance(primitive_mode, base_vertex, num_vertices,
683 num_instances, base_instance);
684 }
657 } 685 }
658} 686}
659 687
@@ -697,6 +725,16 @@ void RasterizerOpenGL::DispatchCompute(GPUVAddr code_addr) {
697 state.ApplyProgramPipeline(); 725 state.ApplyProgramPipeline();
698 726
699 glDispatchCompute(launch_desc.grid_dim_x, launch_desc.grid_dim_y, launch_desc.grid_dim_z); 727 glDispatchCompute(launch_desc.grid_dim_x, launch_desc.grid_dim_y, launch_desc.grid_dim_z);
728 ++num_queued_commands;
729}
730
731void RasterizerOpenGL::ResetCounter(VideoCore::QueryType type) {
732 query_cache.ResetCounter(type);
733}
734
735void RasterizerOpenGL::Query(GPUVAddr gpu_addr, VideoCore::QueryType type,
736 std::optional<u64> timestamp) {
737 query_cache.Query(gpu_addr, type, timestamp);
700} 738}
701 739
702void RasterizerOpenGL::FlushAll() {} 740void RasterizerOpenGL::FlushAll() {}
@@ -708,6 +746,7 @@ void RasterizerOpenGL::FlushRegion(CacheAddr addr, u64 size) {
708 } 746 }
709 texture_cache.FlushRegion(addr, size); 747 texture_cache.FlushRegion(addr, size);
710 buffer_cache.FlushRegion(addr, size); 748 buffer_cache.FlushRegion(addr, size);
749 query_cache.FlushRegion(addr, size);
711} 750}
712 751
713void RasterizerOpenGL::InvalidateRegion(CacheAddr addr, u64 size) { 752void RasterizerOpenGL::InvalidateRegion(CacheAddr addr, u64 size) {
@@ -718,6 +757,7 @@ void RasterizerOpenGL::InvalidateRegion(CacheAddr addr, u64 size) {
718 texture_cache.InvalidateRegion(addr, size); 757 texture_cache.InvalidateRegion(addr, size);
719 shader_cache.InvalidateRegion(addr, size); 758 shader_cache.InvalidateRegion(addr, size);
720 buffer_cache.InvalidateRegion(addr, size); 759 buffer_cache.InvalidateRegion(addr, size);
760 query_cache.InvalidateRegion(addr, size);
721} 761}
722 762
723void RasterizerOpenGL::FlushAndInvalidateRegion(CacheAddr addr, u64 size) { 763void RasterizerOpenGL::FlushAndInvalidateRegion(CacheAddr addr, u64 size) {
@@ -728,10 +768,18 @@ void RasterizerOpenGL::FlushAndInvalidateRegion(CacheAddr addr, u64 size) {
728} 768}
729 769
730void RasterizerOpenGL::FlushCommands() { 770void RasterizerOpenGL::FlushCommands() {
771 // Only flush when we have commands queued to OpenGL.
772 if (num_queued_commands == 0) {
773 return;
774 }
775 num_queued_commands = 0;
731 glFlush(); 776 glFlush();
732} 777}
733 778
734void RasterizerOpenGL::TickFrame() { 779void RasterizerOpenGL::TickFrame() {
780 // Ticking a frame means that buffers will be swapped, calling glFlush implicitly.
781 num_queued_commands = 0;
782
735 buffer_cache.TickFrame(); 783 buffer_cache.TickFrame();
736} 784}
737 785
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h
index bc28a3bcf..68abe9a21 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.h
+++ b/src/video_core/renderer_opengl/gl_rasterizer.h
@@ -24,6 +24,7 @@
24#include "video_core/renderer_opengl/gl_buffer_cache.h" 24#include "video_core/renderer_opengl/gl_buffer_cache.h"
25#include "video_core/renderer_opengl/gl_device.h" 25#include "video_core/renderer_opengl/gl_device.h"
26#include "video_core/renderer_opengl/gl_framebuffer_cache.h" 26#include "video_core/renderer_opengl/gl_framebuffer_cache.h"
27#include "video_core/renderer_opengl/gl_query_cache.h"
27#include "video_core/renderer_opengl/gl_resource_manager.h" 28#include "video_core/renderer_opengl/gl_resource_manager.h"
28#include "video_core/renderer_opengl/gl_sampler_cache.h" 29#include "video_core/renderer_opengl/gl_sampler_cache.h"
29#include "video_core/renderer_opengl/gl_shader_cache.h" 30#include "video_core/renderer_opengl/gl_shader_cache.h"
@@ -60,6 +61,8 @@ public:
60 void Draw(bool is_indexed, bool is_instanced) override; 61 void Draw(bool is_indexed, bool is_instanced) override;
61 void Clear() override; 62 void Clear() override;
62 void DispatchCompute(GPUVAddr code_addr) override; 63 void DispatchCompute(GPUVAddr code_addr) override;
64 void ResetCounter(VideoCore::QueryType type) override;
65 void Query(GPUVAddr gpu_addr, VideoCore::QueryType type, std::optional<u64> timestamp) override;
63 void FlushAll() override; 66 void FlushAll() override;
64 void FlushRegion(CacheAddr addr, u64 size) override; 67 void FlushRegion(CacheAddr addr, u64 size) override;
65 void InvalidateRegion(CacheAddr addr, u64 size) override; 68 void InvalidateRegion(CacheAddr addr, u64 size) override;
@@ -74,6 +77,11 @@ public:
74 void LoadDiskResources(const std::atomic_bool& stop_loading, 77 void LoadDiskResources(const std::atomic_bool& stop_loading,
75 const VideoCore::DiskResourceLoadCallback& callback) override; 78 const VideoCore::DiskResourceLoadCallback& callback) override;
76 79
80 /// Returns true when there are commands queued to the OpenGL server.
81 bool AnyCommandQueued() const {
82 return num_queued_commands > 0;
83 }
84
77private: 85private:
78 /// Configures the color and depth framebuffer states. 86 /// Configures the color and depth framebuffer states.
79 void ConfigureFramebuffers(); 87 void ConfigureFramebuffers();
@@ -176,10 +184,23 @@ private:
176 /// Syncs the alpha test state to match the guest state 184 /// Syncs the alpha test state to match the guest state
177 void SyncAlphaTest(); 185 void SyncAlphaTest();
178 186
179 /// Check for extension that are not strictly required 187 /// Check for extension that are not strictly required but are needed for correct emulation
180 /// but are needed for correct emulation
181 void CheckExtensions(); 188 void CheckExtensions();
182 189
190 std::size_t CalculateVertexArraysSize() const;
191
192 std::size_t CalculateIndexBufferSize() const;
193
194 /// Updates and returns a vertex array object representing current vertex format
195 GLuint SetupVertexFormat();
196
197 void SetupVertexBuffer(GLuint vao);
198 void SetupVertexInstances(GLuint vao);
199
200 GLintptr SetupIndexBuffer();
201
202 void SetupShaders(GLenum primitive_mode);
203
183 const Device device; 204 const Device device;
184 OpenGLState state; 205 OpenGLState state;
185 206
@@ -187,6 +208,7 @@ private:
187 ShaderCacheOpenGL shader_cache; 208 ShaderCacheOpenGL shader_cache;
188 SamplerCacheOpenGL sampler_cache; 209 SamplerCacheOpenGL sampler_cache;
189 FramebufferCacheOpenGL framebuffer_cache; 210 FramebufferCacheOpenGL framebuffer_cache;
211 QueryCache query_cache;
190 212
191 Core::System& system; 213 Core::System& system;
192 ScreenInfo& screen_info; 214 ScreenInfo& screen_info;
@@ -204,19 +226,8 @@ private:
204 BindBuffersRangePushBuffer bind_ubo_pushbuffer{GL_UNIFORM_BUFFER}; 226 BindBuffersRangePushBuffer bind_ubo_pushbuffer{GL_UNIFORM_BUFFER};
205 BindBuffersRangePushBuffer bind_ssbo_pushbuffer{GL_SHADER_STORAGE_BUFFER}; 227 BindBuffersRangePushBuffer bind_ssbo_pushbuffer{GL_SHADER_STORAGE_BUFFER};
206 228
207 std::size_t CalculateVertexArraysSize() const; 229 /// Number of commands queued to the OpenGL driver. Reseted on flush.
208 230 std::size_t num_queued_commands = 0;
209 std::size_t CalculateIndexBufferSize() const;
210
211 /// Updates and returns a vertex array object representing current vertex format
212 GLuint SetupVertexFormat();
213
214 void SetupVertexBuffer(GLuint vao);
215 void SetupVertexInstances(GLuint vao);
216
217 GLintptr SetupIndexBuffer();
218
219 void SetupShaders(GLenum primitive_mode);
220}; 231};
221 232
222} // namespace OpenGL 233} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_resource_manager.cpp b/src/video_core/renderer_opengl/gl_resource_manager.cpp
index 5c96c1d46..f0ddfb276 100644
--- a/src/video_core/renderer_opengl/gl_resource_manager.cpp
+++ b/src/video_core/renderer_opengl/gl_resource_manager.cpp
@@ -207,4 +207,21 @@ void OGLFramebuffer::Release() {
207 handle = 0; 207 handle = 0;
208} 208}
209 209
210void OGLQuery::Create(GLenum target) {
211 if (handle != 0)
212 return;
213
214 MICROPROFILE_SCOPE(OpenGL_ResourceCreation);
215 glCreateQueries(target, 1, &handle);
216}
217
218void OGLQuery::Release() {
219 if (handle == 0)
220 return;
221
222 MICROPROFILE_SCOPE(OpenGL_ResourceDeletion);
223 glDeleteQueries(1, &handle);
224 handle = 0;
225}
226
210} // namespace OpenGL 227} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_resource_manager.h b/src/video_core/renderer_opengl/gl_resource_manager.h
index 3a85a1d4c..514d1d165 100644
--- a/src/video_core/renderer_opengl/gl_resource_manager.h
+++ b/src/video_core/renderer_opengl/gl_resource_manager.h
@@ -266,4 +266,29 @@ public:
266 GLuint handle = 0; 266 GLuint handle = 0;
267}; 267};
268 268
269class OGLQuery : private NonCopyable {
270public:
271 OGLQuery() = default;
272
273 OGLQuery(OGLQuery&& o) noexcept : handle(std::exchange(o.handle, 0)) {}
274
275 ~OGLQuery() {
276 Release();
277 }
278
279 OGLQuery& operator=(OGLQuery&& o) noexcept {
280 Release();
281 handle = std::exchange(o.handle, 0);
282 return *this;
283 }
284
285 /// Creates a new internal OpenGL resource and stores the handle
286 void Create(GLenum target);
287
288 /// Deletes the internal OpenGL resource
289 void Release();
290
291 GLuint handle = 0;
292};
293
269} // namespace OpenGL 294} // namespace OpenGL
diff --git a/src/video_core/renderer_vulkan/vk_device.cpp b/src/video_core/renderer_vulkan/vk_device.cpp
index 9840f26e5..588a6835f 100644
--- a/src/video_core/renderer_vulkan/vk_device.cpp
+++ b/src/video_core/renderer_vulkan/vk_device.cpp
@@ -104,6 +104,7 @@ bool VKDevice::Create(const vk::DispatchLoaderDynamic& dldi, vk::Instance instan
104 features.depthBiasClamp = true; 104 features.depthBiasClamp = true;
105 features.geometryShader = true; 105 features.geometryShader = true;
106 features.tessellationShader = true; 106 features.tessellationShader = true;
107 features.occlusionQueryPrecise = true;
107 features.fragmentStoresAndAtomics = true; 108 features.fragmentStoresAndAtomics = true;
108 features.shaderImageGatherExtended = true; 109 features.shaderImageGatherExtended = true;
109 features.shaderStorageImageWriteWithoutFormat = true; 110 features.shaderStorageImageWriteWithoutFormat = true;
@@ -117,6 +118,10 @@ bool VKDevice::Create(const vk::DispatchLoaderDynamic& dldi, vk::Instance instan
117 bit8_storage.uniformAndStorageBuffer8BitAccess = true; 118 bit8_storage.uniformAndStorageBuffer8BitAccess = true;
118 SetNext(next, bit8_storage); 119 SetNext(next, bit8_storage);
119 120
121 vk::PhysicalDeviceHostQueryResetFeaturesEXT host_query_reset;
122 host_query_reset.hostQueryReset = true;
123 SetNext(next, host_query_reset);
124
120 vk::PhysicalDeviceFloat16Int8FeaturesKHR float16_int8; 125 vk::PhysicalDeviceFloat16Int8FeaturesKHR float16_int8;
121 if (is_float16_supported) { 126 if (is_float16_supported) {
122 float16_int8.shaderFloat16 = true; 127 float16_int8.shaderFloat16 = true;
@@ -273,6 +278,7 @@ bool VKDevice::IsSuitable(const vk::DispatchLoaderDynamic& dldi, vk::PhysicalDev
273 VK_EXT_VERTEX_ATTRIBUTE_DIVISOR_EXTENSION_NAME, 278 VK_EXT_VERTEX_ATTRIBUTE_DIVISOR_EXTENSION_NAME,
274 VK_EXT_SHADER_SUBGROUP_BALLOT_EXTENSION_NAME, 279 VK_EXT_SHADER_SUBGROUP_BALLOT_EXTENSION_NAME,
275 VK_EXT_SHADER_SUBGROUP_VOTE_EXTENSION_NAME, 280 VK_EXT_SHADER_SUBGROUP_VOTE_EXTENSION_NAME,
281 VK_EXT_HOST_QUERY_RESET_EXTENSION_NAME,
276 }; 282 };
277 std::bitset<required_extensions.size()> available_extensions{}; 283 std::bitset<required_extensions.size()> available_extensions{};
278 284
@@ -340,6 +346,7 @@ bool VKDevice::IsSuitable(const vk::DispatchLoaderDynamic& dldi, vk::PhysicalDev
340 std::make_pair(features.depthBiasClamp, "depthBiasClamp"), 346 std::make_pair(features.depthBiasClamp, "depthBiasClamp"),
341 std::make_pair(features.geometryShader, "geometryShader"), 347 std::make_pair(features.geometryShader, "geometryShader"),
342 std::make_pair(features.tessellationShader, "tessellationShader"), 348 std::make_pair(features.tessellationShader, "tessellationShader"),
349 std::make_pair(features.occlusionQueryPrecise, "occlusionQueryPrecise"),
343 std::make_pair(features.fragmentStoresAndAtomics, "fragmentStoresAndAtomics"), 350 std::make_pair(features.fragmentStoresAndAtomics, "fragmentStoresAndAtomics"),
344 std::make_pair(features.shaderImageGatherExtended, "shaderImageGatherExtended"), 351 std::make_pair(features.shaderImageGatherExtended, "shaderImageGatherExtended"),
345 std::make_pair(features.shaderStorageImageWriteWithoutFormat, 352 std::make_pair(features.shaderStorageImageWriteWithoutFormat,
@@ -376,7 +383,7 @@ std::vector<const char*> VKDevice::LoadExtensions(const vk::DispatchLoaderDynami
376 } 383 }
377 }; 384 };
378 385
379 extensions.reserve(13); 386 extensions.reserve(14);
380 extensions.push_back(VK_KHR_SWAPCHAIN_EXTENSION_NAME); 387 extensions.push_back(VK_KHR_SWAPCHAIN_EXTENSION_NAME);
381 extensions.push_back(VK_KHR_16BIT_STORAGE_EXTENSION_NAME); 388 extensions.push_back(VK_KHR_16BIT_STORAGE_EXTENSION_NAME);
382 extensions.push_back(VK_KHR_8BIT_STORAGE_EXTENSION_NAME); 389 extensions.push_back(VK_KHR_8BIT_STORAGE_EXTENSION_NAME);
@@ -384,6 +391,7 @@ std::vector<const char*> VKDevice::LoadExtensions(const vk::DispatchLoaderDynami
384 extensions.push_back(VK_EXT_VERTEX_ATTRIBUTE_DIVISOR_EXTENSION_NAME); 391 extensions.push_back(VK_EXT_VERTEX_ATTRIBUTE_DIVISOR_EXTENSION_NAME);
385 extensions.push_back(VK_EXT_SHADER_SUBGROUP_BALLOT_EXTENSION_NAME); 392 extensions.push_back(VK_EXT_SHADER_SUBGROUP_BALLOT_EXTENSION_NAME);
386 extensions.push_back(VK_EXT_SHADER_SUBGROUP_VOTE_EXTENSION_NAME); 393 extensions.push_back(VK_EXT_SHADER_SUBGROUP_VOTE_EXTENSION_NAME);
394 extensions.push_back(VK_EXT_HOST_QUERY_RESET_EXTENSION_NAME);
387 395
388 [[maybe_unused]] const bool nsight = 396 [[maybe_unused]] const bool nsight =
389 std::getenv("NVTX_INJECTION64_PATH") || std::getenv("NSIGHT_LAUNCHED"); 397 std::getenv("NVTX_INJECTION64_PATH") || std::getenv("NSIGHT_LAUNCHED");
diff --git a/src/video_core/renderer_vulkan/vk_query_cache.cpp b/src/video_core/renderer_vulkan/vk_query_cache.cpp
new file mode 100644
index 000000000..ffbf60dda
--- /dev/null
+++ b/src/video_core/renderer_vulkan/vk_query_cache.cpp
@@ -0,0 +1,122 @@
1// Copyright 2020 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <algorithm>
6#include <cstddef>
7#include <cstdint>
8#include <utility>
9#include <vector>
10
11#include "video_core/renderer_vulkan/declarations.h"
12#include "video_core/renderer_vulkan/vk_device.h"
13#include "video_core/renderer_vulkan/vk_query_cache.h"
14#include "video_core/renderer_vulkan/vk_resource_manager.h"
15#include "video_core/renderer_vulkan/vk_scheduler.h"
16
17namespace Vulkan {
18
19namespace {
20
21constexpr std::array QUERY_TARGETS = {vk::QueryType::eOcclusion};
22
23constexpr vk::QueryType GetTarget(VideoCore::QueryType type) {
24 return QUERY_TARGETS[static_cast<std::size_t>(type)];
25}
26
27} // Anonymous namespace
28
29QueryPool::QueryPool() : VKFencedPool{GROW_STEP} {}
30
31QueryPool::~QueryPool() = default;
32
33void QueryPool::Initialize(const VKDevice& device_, VideoCore::QueryType type_) {
34 device = &device_;
35 type = type_;
36}
37
38std::pair<vk::QueryPool, std::uint32_t> QueryPool::Commit(VKFence& fence) {
39 std::size_t index;
40 do {
41 index = CommitResource(fence);
42 } while (usage[index]);
43 usage[index] = true;
44
45 return {*pools[index / GROW_STEP], static_cast<std::uint32_t>(index % GROW_STEP)};
46}
47
48void QueryPool::Allocate(std::size_t begin, std::size_t end) {
49 usage.resize(end);
50
51 const auto dev = device->GetLogical();
52 const u32 size = static_cast<u32>(end - begin);
53 const vk::QueryPoolCreateInfo query_pool_ci({}, GetTarget(type), size, {});
54 pools.push_back(dev.createQueryPoolUnique(query_pool_ci, nullptr, device->GetDispatchLoader()));
55}
56
57void QueryPool::Reserve(std::pair<vk::QueryPool, std::uint32_t> query) {
58 const auto it =
59 std::find_if(std::begin(pools), std::end(pools),
60 [query_pool = query.first](auto& pool) { return query_pool == *pool; });
61 ASSERT(it != std::end(pools));
62
63 const std::ptrdiff_t pool_index = std::distance(std::begin(pools), it);
64 usage[pool_index * GROW_STEP + static_cast<std::ptrdiff_t>(query.second)] = false;
65}
66
67VKQueryCache::VKQueryCache(Core::System& system, VideoCore::RasterizerInterface& rasterizer,
68 const VKDevice& device, VKScheduler& scheduler)
69 : VideoCommon::QueryCacheBase<VKQueryCache, CachedQuery, CounterStream, HostCounter,
70 QueryPool>{system, rasterizer},
71 device{device}, scheduler{scheduler} {
72 for (std::size_t i = 0; i < static_cast<std::size_t>(VideoCore::NumQueryTypes); ++i) {
73 query_pools[i].Initialize(device, static_cast<VideoCore::QueryType>(i));
74 }
75}
76
77VKQueryCache::~VKQueryCache() = default;
78
79std::pair<vk::QueryPool, std::uint32_t> VKQueryCache::AllocateQuery(VideoCore::QueryType type) {
80 return query_pools[static_cast<std::size_t>(type)].Commit(scheduler.GetFence());
81}
82
83void VKQueryCache::Reserve(VideoCore::QueryType type,
84 std::pair<vk::QueryPool, std::uint32_t> query) {
85 query_pools[static_cast<std::size_t>(type)].Reserve(query);
86}
87
88HostCounter::HostCounter(VKQueryCache& cache, std::shared_ptr<HostCounter> dependency,
89 VideoCore::QueryType type)
90 : VideoCommon::HostCounterBase<VKQueryCache, HostCounter>{std::move(dependency)}, cache{cache},
91 type{type}, query{cache.AllocateQuery(type)}, ticks{cache.Scheduler().Ticks()} {
92 const auto dev = cache.Device().GetLogical();
93 cache.Scheduler().Record([dev, query = query](vk::CommandBuffer cmdbuf, auto& dld) {
94 dev.resetQueryPoolEXT(query.first, query.second, 1, dld);
95 cmdbuf.beginQuery(query.first, query.second, vk::QueryControlFlagBits::ePrecise, dld);
96 });
97}
98
99HostCounter::~HostCounter() {
100 cache.Reserve(type, query);
101}
102
103void HostCounter::EndQuery() {
104 cache.Scheduler().Record([query = query](auto cmdbuf, auto& dld) {
105 cmdbuf.endQuery(query.first, query.second, dld);
106 });
107}
108
109u64 HostCounter::BlockingQuery() const {
110 if (ticks >= cache.Scheduler().Ticks()) {
111 cache.Scheduler().Flush();
112 }
113
114 const auto dev = cache.Device().GetLogical();
115 const auto& dld = cache.Device().GetDispatchLoader();
116 u64 value;
117 dev.getQueryPoolResults(query.first, query.second, 1, sizeof(value), &value, sizeof(value),
118 vk::QueryResultFlagBits::e64 | vk::QueryResultFlagBits::eWait, dld);
119 return value;
120}
121
122} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_query_cache.h b/src/video_core/renderer_vulkan/vk_query_cache.h
new file mode 100644
index 000000000..c3092ee96
--- /dev/null
+++ b/src/video_core/renderer_vulkan/vk_query_cache.h
@@ -0,0 +1,104 @@
1// Copyright 2020 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <cstddef>
8#include <cstdint>
9#include <memory>
10#include <utility>
11#include <vector>
12
13#include "common/common_types.h"
14#include "video_core/query_cache.h"
15#include "video_core/renderer_vulkan/declarations.h"
16#include "video_core/renderer_vulkan/vk_resource_manager.h"
17
18namespace VideoCore {
19class RasterizerInterface;
20}
21
22namespace Vulkan {
23
24class CachedQuery;
25class HostCounter;
26class VKDevice;
27class VKQueryCache;
28class VKScheduler;
29
30using CounterStream = VideoCommon::CounterStreamBase<VKQueryCache, HostCounter>;
31
32class QueryPool final : public VKFencedPool {
33public:
34 explicit QueryPool();
35 ~QueryPool() override;
36
37 void Initialize(const VKDevice& device, VideoCore::QueryType type);
38
39 std::pair<vk::QueryPool, std::uint32_t> Commit(VKFence& fence);
40
41 void Reserve(std::pair<vk::QueryPool, std::uint32_t> query);
42
43protected:
44 void Allocate(std::size_t begin, std::size_t end) override;
45
46private:
47 static constexpr std::size_t GROW_STEP = 512;
48
49 const VKDevice* device = nullptr;
50 VideoCore::QueryType type = {};
51
52 std::vector<UniqueQueryPool> pools;
53 std::vector<bool> usage;
54};
55
56class VKQueryCache final
57 : public VideoCommon::QueryCacheBase<VKQueryCache, CachedQuery, CounterStream, HostCounter,
58 QueryPool> {
59public:
60 explicit VKQueryCache(Core::System& system, VideoCore::RasterizerInterface& rasterizer,
61 const VKDevice& device, VKScheduler& scheduler);
62 ~VKQueryCache();
63
64 std::pair<vk::QueryPool, std::uint32_t> AllocateQuery(VideoCore::QueryType type);
65
66 void Reserve(VideoCore::QueryType type, std::pair<vk::QueryPool, std::uint32_t> query);
67
68 const VKDevice& Device() const noexcept {
69 return device;
70 }
71
72 VKScheduler& Scheduler() const noexcept {
73 return scheduler;
74 }
75
76private:
77 const VKDevice& device;
78 VKScheduler& scheduler;
79};
80
81class HostCounter final : public VideoCommon::HostCounterBase<VKQueryCache, HostCounter> {
82public:
83 explicit HostCounter(VKQueryCache& cache, std::shared_ptr<HostCounter> dependency,
84 VideoCore::QueryType type);
85 ~HostCounter();
86
87 void EndQuery();
88
89private:
90 u64 BlockingQuery() const override;
91
92 VKQueryCache& cache;
93 const VideoCore::QueryType type;
94 const std::pair<vk::QueryPool, std::uint32_t> query;
95 const u64 ticks;
96};
97
98class CachedQuery : public VideoCommon::CachedQueryBase<HostCounter> {
99public:
100 explicit CachedQuery(VKQueryCache&, VideoCore::QueryType, VAddr cpu_addr, u8* host_ptr)
101 : VideoCommon::CachedQueryBase<HostCounter>{cpu_addr, host_ptr} {}
102};
103
104} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp
index bfeaf98ac..31c078f6a 100644
--- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp
+++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp
@@ -289,7 +289,9 @@ RasterizerVulkan::RasterizerVulkan(Core::System& system, Core::Frontend::EmuWind
289 staging_pool), 289 staging_pool),
290 pipeline_cache(system, *this, device, scheduler, descriptor_pool, update_descriptor_queue), 290 pipeline_cache(system, *this, device, scheduler, descriptor_pool, update_descriptor_queue),
291 buffer_cache(*this, system, device, memory_manager, scheduler, staging_pool), 291 buffer_cache(*this, system, device, memory_manager, scheduler, staging_pool),
292 sampler_cache(device) {} 292 sampler_cache(device), query_cache(system, *this, device, scheduler) {
293 scheduler.SetQueryCache(query_cache);
294}
293 295
294RasterizerVulkan::~RasterizerVulkan() = default; 296RasterizerVulkan::~RasterizerVulkan() = default;
295 297
@@ -298,6 +300,8 @@ void RasterizerVulkan::Draw(bool is_indexed, bool is_instanced) {
298 300
299 FlushWork(); 301 FlushWork();
300 302
303 query_cache.UpdateCounters();
304
301 const auto& gpu = system.GPU().Maxwell3D(); 305 const auto& gpu = system.GPU().Maxwell3D();
302 GraphicsPipelineCacheKey key{GetFixedPipelineState(gpu.regs)}; 306 GraphicsPipelineCacheKey key{GetFixedPipelineState(gpu.regs)};
303 307
@@ -352,6 +356,8 @@ void RasterizerVulkan::Draw(bool is_indexed, bool is_instanced) {
352void RasterizerVulkan::Clear() { 356void RasterizerVulkan::Clear() {
353 MICROPROFILE_SCOPE(Vulkan_Clearing); 357 MICROPROFILE_SCOPE(Vulkan_Clearing);
354 358
359 query_cache.UpdateCounters();
360
355 const auto& gpu = system.GPU().Maxwell3D(); 361 const auto& gpu = system.GPU().Maxwell3D();
356 if (!system.GPU().Maxwell3D().ShouldExecute()) { 362 if (!system.GPU().Maxwell3D().ShouldExecute()) {
357 return; 363 return;
@@ -419,6 +425,8 @@ void RasterizerVulkan::DispatchCompute(GPUVAddr code_addr) {
419 sampled_views.clear(); 425 sampled_views.clear();
420 image_views.clear(); 426 image_views.clear();
421 427
428 query_cache.UpdateCounters();
429
422 const auto& launch_desc = system.GPU().KeplerCompute().launch_description; 430 const auto& launch_desc = system.GPU().KeplerCompute().launch_description;
423 const ComputePipelineCacheKey key{ 431 const ComputePipelineCacheKey key{
424 code_addr, 432 code_addr,
@@ -461,17 +469,28 @@ void RasterizerVulkan::DispatchCompute(GPUVAddr code_addr) {
461 }); 469 });
462} 470}
463 471
472void RasterizerVulkan::ResetCounter(VideoCore::QueryType type) {
473 query_cache.ResetCounter(type);
474}
475
476void RasterizerVulkan::Query(GPUVAddr gpu_addr, VideoCore::QueryType type,
477 std::optional<u64> timestamp) {
478 query_cache.Query(gpu_addr, type, timestamp);
479}
480
464void RasterizerVulkan::FlushAll() {} 481void RasterizerVulkan::FlushAll() {}
465 482
466void RasterizerVulkan::FlushRegion(CacheAddr addr, u64 size) { 483void RasterizerVulkan::FlushRegion(CacheAddr addr, u64 size) {
467 texture_cache.FlushRegion(addr, size); 484 texture_cache.FlushRegion(addr, size);
468 buffer_cache.FlushRegion(addr, size); 485 buffer_cache.FlushRegion(addr, size);
486 query_cache.FlushRegion(addr, size);
469} 487}
470 488
471void RasterizerVulkan::InvalidateRegion(CacheAddr addr, u64 size) { 489void RasterizerVulkan::InvalidateRegion(CacheAddr addr, u64 size) {
472 texture_cache.InvalidateRegion(addr, size); 490 texture_cache.InvalidateRegion(addr, size);
473 pipeline_cache.InvalidateRegion(addr, size); 491 pipeline_cache.InvalidateRegion(addr, size);
474 buffer_cache.InvalidateRegion(addr, size); 492 buffer_cache.InvalidateRegion(addr, size);
493 query_cache.InvalidateRegion(addr, size);
475} 494}
476 495
477void RasterizerVulkan::FlushAndInvalidateRegion(CacheAddr addr, u64 size) { 496void RasterizerVulkan::FlushAndInvalidateRegion(CacheAddr addr, u64 size) {
diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.h b/src/video_core/renderer_vulkan/vk_rasterizer.h
index ff74de164..138903d60 100644
--- a/src/video_core/renderer_vulkan/vk_rasterizer.h
+++ b/src/video_core/renderer_vulkan/vk_rasterizer.h
@@ -24,6 +24,7 @@
24#include "video_core/renderer_vulkan/vk_descriptor_pool.h" 24#include "video_core/renderer_vulkan/vk_descriptor_pool.h"
25#include "video_core/renderer_vulkan/vk_memory_manager.h" 25#include "video_core/renderer_vulkan/vk_memory_manager.h"
26#include "video_core/renderer_vulkan/vk_pipeline_cache.h" 26#include "video_core/renderer_vulkan/vk_pipeline_cache.h"
27#include "video_core/renderer_vulkan/vk_query_cache.h"
27#include "video_core/renderer_vulkan/vk_renderpass_cache.h" 28#include "video_core/renderer_vulkan/vk_renderpass_cache.h"
28#include "video_core/renderer_vulkan/vk_resource_manager.h" 29#include "video_core/renderer_vulkan/vk_resource_manager.h"
29#include "video_core/renderer_vulkan/vk_sampler_cache.h" 30#include "video_core/renderer_vulkan/vk_sampler_cache.h"
@@ -96,7 +97,7 @@ struct ImageView {
96 vk::ImageLayout* layout = nullptr; 97 vk::ImageLayout* layout = nullptr;
97}; 98};
98 99
99class RasterizerVulkan : public VideoCore::RasterizerAccelerated { 100class RasterizerVulkan final : public VideoCore::RasterizerAccelerated {
100public: 101public:
101 explicit RasterizerVulkan(Core::System& system, Core::Frontend::EmuWindow& render_window, 102 explicit RasterizerVulkan(Core::System& system, Core::Frontend::EmuWindow& render_window,
102 VKScreenInfo& screen_info, const VKDevice& device, 103 VKScreenInfo& screen_info, const VKDevice& device,
@@ -107,6 +108,8 @@ public:
107 void Draw(bool is_indexed, bool is_instanced) override; 108 void Draw(bool is_indexed, bool is_instanced) override;
108 void Clear() override; 109 void Clear() override;
109 void DispatchCompute(GPUVAddr code_addr) override; 110 void DispatchCompute(GPUVAddr code_addr) override;
111 void ResetCounter(VideoCore::QueryType type) override;
112 void Query(GPUVAddr gpu_addr, VideoCore::QueryType type, std::optional<u64> timestamp) override;
110 void FlushAll() override; 113 void FlushAll() override;
111 void FlushRegion(CacheAddr addr, u64 size) override; 114 void FlushRegion(CacheAddr addr, u64 size) override;
112 void InvalidateRegion(CacheAddr addr, u64 size) override; 115 void InvalidateRegion(CacheAddr addr, u64 size) override;
@@ -244,6 +247,7 @@ private:
244 VKPipelineCache pipeline_cache; 247 VKPipelineCache pipeline_cache;
245 VKBufferCache buffer_cache; 248 VKBufferCache buffer_cache;
246 VKSamplerCache sampler_cache; 249 VKSamplerCache sampler_cache;
250 VKQueryCache query_cache;
247 251
248 std::array<View, Maxwell::NumRenderTargets> color_attachments; 252 std::array<View, Maxwell::NumRenderTargets> color_attachments;
249 View zeta_attachment; 253 View zeta_attachment;
diff --git a/src/video_core/renderer_vulkan/vk_scheduler.cpp b/src/video_core/renderer_vulkan/vk_scheduler.cpp
index d66133ad1..92bd6c344 100644
--- a/src/video_core/renderer_vulkan/vk_scheduler.cpp
+++ b/src/video_core/renderer_vulkan/vk_scheduler.cpp
@@ -6,6 +6,7 @@
6#include "common/microprofile.h" 6#include "common/microprofile.h"
7#include "video_core/renderer_vulkan/declarations.h" 7#include "video_core/renderer_vulkan/declarations.h"
8#include "video_core/renderer_vulkan/vk_device.h" 8#include "video_core/renderer_vulkan/vk_device.h"
9#include "video_core/renderer_vulkan/vk_query_cache.h"
9#include "video_core/renderer_vulkan/vk_resource_manager.h" 10#include "video_core/renderer_vulkan/vk_resource_manager.h"
10#include "video_core/renderer_vulkan/vk_scheduler.h" 11#include "video_core/renderer_vulkan/vk_scheduler.h"
11 12
@@ -139,6 +140,8 @@ void VKScheduler::SubmitExecution(vk::Semaphore semaphore) {
139} 140}
140 141
141void VKScheduler::AllocateNewContext() { 142void VKScheduler::AllocateNewContext() {
143 ++ticks;
144
142 std::unique_lock lock{mutex}; 145 std::unique_lock lock{mutex};
143 current_fence = next_fence; 146 current_fence = next_fence;
144 next_fence = &resource_manager.CommitFence(); 147 next_fence = &resource_manager.CommitFence();
@@ -146,6 +149,10 @@ void VKScheduler::AllocateNewContext() {
146 current_cmdbuf = resource_manager.CommitCommandBuffer(*current_fence); 149 current_cmdbuf = resource_manager.CommitCommandBuffer(*current_fence);
147 current_cmdbuf.begin({vk::CommandBufferUsageFlagBits::eOneTimeSubmit}, 150 current_cmdbuf.begin({vk::CommandBufferUsageFlagBits::eOneTimeSubmit},
148 device.GetDispatchLoader()); 151 device.GetDispatchLoader());
152 // Enable counters once again. These are disabled when a command buffer is finished.
153 if (query_cache) {
154 query_cache->UpdateCounters();
155 }
149} 156}
150 157
151void VKScheduler::InvalidateState() { 158void VKScheduler::InvalidateState() {
@@ -159,6 +166,7 @@ void VKScheduler::InvalidateState() {
159} 166}
160 167
161void VKScheduler::EndPendingOperations() { 168void VKScheduler::EndPendingOperations() {
169 query_cache->DisableStreams();
162 EndRenderPass(); 170 EndRenderPass();
163} 171}
164 172
diff --git a/src/video_core/renderer_vulkan/vk_scheduler.h b/src/video_core/renderer_vulkan/vk_scheduler.h
index bcdffbba0..62fd7858b 100644
--- a/src/video_core/renderer_vulkan/vk_scheduler.h
+++ b/src/video_core/renderer_vulkan/vk_scheduler.h
@@ -4,6 +4,7 @@
4 4
5#pragma once 5#pragma once
6 6
7#include <atomic>
7#include <condition_variable> 8#include <condition_variable>
8#include <memory> 9#include <memory>
9#include <optional> 10#include <optional>
@@ -18,6 +19,7 @@ namespace Vulkan {
18 19
19class VKDevice; 20class VKDevice;
20class VKFence; 21class VKFence;
22class VKQueryCache;
21class VKResourceManager; 23class VKResourceManager;
22 24
23class VKFenceView { 25class VKFenceView {
@@ -67,6 +69,11 @@ public:
67 /// Binds a pipeline to the current execution context. 69 /// Binds a pipeline to the current execution context.
68 void BindGraphicsPipeline(vk::Pipeline pipeline); 70 void BindGraphicsPipeline(vk::Pipeline pipeline);
69 71
72 /// Assigns the query cache.
73 void SetQueryCache(VKQueryCache& query_cache_) {
74 query_cache = &query_cache_;
75 }
76
70 /// Returns true when viewports have been set in the current command buffer. 77 /// Returns true when viewports have been set in the current command buffer.
71 bool TouchViewports() { 78 bool TouchViewports() {
72 return std::exchange(state.viewports, true); 79 return std::exchange(state.viewports, true);
@@ -112,6 +119,11 @@ public:
112 return current_fence; 119 return current_fence;
113 } 120 }
114 121
122 /// Returns the current command buffer tick.
123 u64 Ticks() const {
124 return ticks;
125 }
126
115private: 127private:
116 class Command { 128 class Command {
117 public: 129 public:
@@ -205,6 +217,8 @@ private:
205 217
206 const VKDevice& device; 218 const VKDevice& device;
207 VKResourceManager& resource_manager; 219 VKResourceManager& resource_manager;
220 VKQueryCache* query_cache = nullptr;
221
208 vk::CommandBuffer current_cmdbuf; 222 vk::CommandBuffer current_cmdbuf;
209 VKFence* current_fence = nullptr; 223 VKFence* current_fence = nullptr;
210 VKFence* next_fence = nullptr; 224 VKFence* next_fence = nullptr;
@@ -227,6 +241,7 @@ private:
227 Common::SPSCQueue<std::unique_ptr<CommandChunk>> chunk_reserve; 241 Common::SPSCQueue<std::unique_ptr<CommandChunk>> chunk_reserve;
228 std::mutex mutex; 242 std::mutex mutex;
229 std::condition_variable cv; 243 std::condition_variable cv;
244 std::atomic<u64> ticks = 0;
230 bool quit = false; 245 bool quit = false;
231}; 246};
232 247
diff --git a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp
index 24a658dce..f64f5da28 100644
--- a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp
+++ b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp
@@ -275,12 +275,14 @@ public:
275 AddCapability(spv::Capability::ImageGatherExtended); 275 AddCapability(spv::Capability::ImageGatherExtended);
276 AddCapability(spv::Capability::SampledBuffer); 276 AddCapability(spv::Capability::SampledBuffer);
277 AddCapability(spv::Capability::StorageImageWriteWithoutFormat); 277 AddCapability(spv::Capability::StorageImageWriteWithoutFormat);
278 AddCapability(spv::Capability::DrawParameters);
278 AddCapability(spv::Capability::SubgroupBallotKHR); 279 AddCapability(spv::Capability::SubgroupBallotKHR);
279 AddCapability(spv::Capability::SubgroupVoteKHR); 280 AddCapability(spv::Capability::SubgroupVoteKHR);
280 AddExtension("SPV_KHR_shader_ballot"); 281 AddExtension("SPV_KHR_shader_ballot");
281 AddExtension("SPV_KHR_subgroup_vote"); 282 AddExtension("SPV_KHR_subgroup_vote");
282 AddExtension("SPV_KHR_storage_buffer_storage_class"); 283 AddExtension("SPV_KHR_storage_buffer_storage_class");
283 AddExtension("SPV_KHR_variable_pointers"); 284 AddExtension("SPV_KHR_variable_pointers");
285 AddExtension("SPV_KHR_shader_draw_parameters");
284 286
285 if (ir.UsesViewportIndex()) { 287 if (ir.UsesViewportIndex()) {
286 AddCapability(spv::Capability::MultiViewport); 288 AddCapability(spv::Capability::MultiViewport);
@@ -492,9 +494,11 @@ private:
492 interfaces.push_back(AddGlobalVariable(Name(out_vertex, "out_vertex"))); 494 interfaces.push_back(AddGlobalVariable(Name(out_vertex, "out_vertex")));
493 495
494 // Declare input attributes 496 // Declare input attributes
495 vertex_index = DeclareInputBuiltIn(spv::BuiltIn::VertexIndex, t_in_uint, "vertex_index"); 497 vertex_index = DeclareInputBuiltIn(spv::BuiltIn::VertexIndex, t_in_int, "vertex_index");
496 instance_index = 498 instance_index =
497 DeclareInputBuiltIn(spv::BuiltIn::InstanceIndex, t_in_uint, "instance_index"); 499 DeclareInputBuiltIn(spv::BuiltIn::InstanceIndex, t_in_int, "instance_index");
500 base_vertex = DeclareInputBuiltIn(spv::BuiltIn::BaseVertex, t_in_int, "base_vertex");
501 base_instance = DeclareInputBuiltIn(spv::BuiltIn::BaseInstance, t_in_int, "base_instance");
498 } 502 }
499 503
500 void DeclareTessControl() { 504 void DeclareTessControl() {
@@ -1068,9 +1072,12 @@ private:
1068 return {OpLoad(t_float, AccessElement(t_in_float, tess_coord, element)), 1072 return {OpLoad(t_float, AccessElement(t_in_float, tess_coord, element)),
1069 Type::Float}; 1073 Type::Float};
1070 case 2: 1074 case 2:
1071 return {OpLoad(t_uint, instance_index), Type::Uint}; 1075 return {
1076 OpISub(t_int, OpLoad(t_int, instance_index), OpLoad(t_int, base_instance)),
1077 Type::Int};
1072 case 3: 1078 case 3:
1073 return {OpLoad(t_uint, vertex_index), Type::Uint}; 1079 return {OpISub(t_int, OpLoad(t_int, vertex_index), OpLoad(t_int, base_vertex)),
1080 Type::Int};
1074 } 1081 }
1075 UNIMPLEMENTED_MSG("Unmanaged TessCoordInstanceIDVertexID element={}", element); 1082 UNIMPLEMENTED_MSG("Unmanaged TessCoordInstanceIDVertexID element={}", element);
1076 return {Constant(t_uint, 0U), Type::Uint}; 1083 return {Constant(t_uint, 0U), Type::Uint};
@@ -2542,6 +2549,8 @@ private:
2542 2549
2543 Id instance_index{}; 2550 Id instance_index{};
2544 Id vertex_index{}; 2551 Id vertex_index{};
2552 Id base_instance{};
2553 Id base_vertex{};
2545 std::array<Id, Maxwell::NumRenderTargets> frag_colors{}; 2554 std::array<Id, Maxwell::NumRenderTargets> frag_colors{};
2546 Id frag_depth{}; 2555 Id frag_depth{};
2547 Id frag_coord{}; 2556 Id frag_coord{};
diff --git a/src/video_core/shader/decode/conversion.cpp b/src/video_core/shader/decode/conversion.cpp
index 0eeb75559..6ead42070 100644
--- a/src/video_core/shader/decode/conversion.cpp
+++ b/src/video_core/shader/decode/conversion.cpp
@@ -83,14 +83,14 @@ u32 ShaderIR::DecodeConversion(NodeBlock& bb, u32 pc) {
83 83
84 const bool input_signed = instr.conversion.is_input_signed; 84 const bool input_signed = instr.conversion.is_input_signed;
85 85
86 if (instr.conversion.src_size == Register::Size::Byte) { 86 if (const u32 offset = static_cast<u32>(instr.conversion.int_src.selector); offset > 0) {
87 const u32 offset = static_cast<u32>(instr.conversion.int_src.selector) * 8; 87 ASSERT(instr.conversion.src_size == Register::Size::Byte ||
88 if (offset > 0) { 88 instr.conversion.src_size == Register::Size::Short);
89 value = SignedOperation(OperationCode::ILogicalShiftRight, input_signed, 89 if (instr.conversion.src_size == Register::Size::Short) {
90 std::move(value), Immediate(offset)); 90 ASSERT(offset == 0 || offset == 2);
91 } 91 }
92 } else { 92 value = SignedOperation(OperationCode::ILogicalShiftRight, input_signed,
93 UNIMPLEMENTED_IF(instr.conversion.int_src.selector != 0); 93 std::move(value), Immediate(offset * 8));
94 } 94 }
95 95
96 value = ConvertIntegerSize(value, instr.conversion.src_size, input_signed); 96 value = ConvertIntegerSize(value, instr.conversion.src_size, input_signed);
diff --git a/src/yuzu/configuration/config.cpp b/src/yuzu/configuration/config.cpp
index cd94693c1..6209fff75 100644
--- a/src/yuzu/configuration/config.cpp
+++ b/src/yuzu/configuration/config.cpp
@@ -630,6 +630,7 @@ void Config::ReadRendererValues() {
630 Settings::values.vulkan_device = ReadSetting(QStringLiteral("vulkan_device"), 0).toInt(); 630 Settings::values.vulkan_device = ReadSetting(QStringLiteral("vulkan_device"), 0).toInt();
631 Settings::values.resolution_factor = 631 Settings::values.resolution_factor =
632 ReadSetting(QStringLiteral("resolution_factor"), 1.0).toFloat(); 632 ReadSetting(QStringLiteral("resolution_factor"), 1.0).toFloat();
633 Settings::values.aspect_ratio = ReadSetting(QStringLiteral("aspect_ratio"), 0).toInt();
633 Settings::values.use_frame_limit = 634 Settings::values.use_frame_limit =
634 ReadSetting(QStringLiteral("use_frame_limit"), true).toBool(); 635 ReadSetting(QStringLiteral("use_frame_limit"), true).toBool();
635 Settings::values.frame_limit = ReadSetting(QStringLiteral("frame_limit"), 100).toInt(); 636 Settings::values.frame_limit = ReadSetting(QStringLiteral("frame_limit"), 100).toInt();
@@ -1064,6 +1065,7 @@ void Config::SaveRendererValues() {
1064 WriteSetting(QStringLiteral("vulkan_device"), Settings::values.vulkan_device, 0); 1065 WriteSetting(QStringLiteral("vulkan_device"), Settings::values.vulkan_device, 0);
1065 WriteSetting(QStringLiteral("resolution_factor"), 1066 WriteSetting(QStringLiteral("resolution_factor"),
1066 static_cast<double>(Settings::values.resolution_factor), 1.0); 1067 static_cast<double>(Settings::values.resolution_factor), 1.0);
1068 WriteSetting(QStringLiteral("aspect_ratio"), Settings::values.aspect_ratio, 0);
1067 WriteSetting(QStringLiteral("use_frame_limit"), Settings::values.use_frame_limit, true); 1069 WriteSetting(QStringLiteral("use_frame_limit"), Settings::values.use_frame_limit, true);
1068 WriteSetting(QStringLiteral("frame_limit"), Settings::values.frame_limit, 100); 1070 WriteSetting(QStringLiteral("frame_limit"), Settings::values.frame_limit, 100);
1069 WriteSetting(QStringLiteral("use_disk_shader_cache"), Settings::values.use_disk_shader_cache, 1071 WriteSetting(QStringLiteral("use_disk_shader_cache"), Settings::values.use_disk_shader_cache,
diff --git a/src/yuzu/configuration/configure_graphics.cpp b/src/yuzu/configuration/configure_graphics.cpp
index f57a24e36..ea899c080 100644
--- a/src/yuzu/configuration/configure_graphics.cpp
+++ b/src/yuzu/configuration/configure_graphics.cpp
@@ -97,6 +97,7 @@ void ConfigureGraphics::SetConfiguration() {
97 ui->api->setCurrentIndex(static_cast<int>(Settings::values.renderer_backend)); 97 ui->api->setCurrentIndex(static_cast<int>(Settings::values.renderer_backend));
98 ui->resolution_factor_combobox->setCurrentIndex( 98 ui->resolution_factor_combobox->setCurrentIndex(
99 static_cast<int>(FromResolutionFactor(Settings::values.resolution_factor))); 99 static_cast<int>(FromResolutionFactor(Settings::values.resolution_factor)));
100 ui->aspect_ratio_combobox->setCurrentIndex(Settings::values.aspect_ratio);
100 ui->use_disk_shader_cache->setEnabled(runtime_lock); 101 ui->use_disk_shader_cache->setEnabled(runtime_lock);
101 ui->use_disk_shader_cache->setChecked(Settings::values.use_disk_shader_cache); 102 ui->use_disk_shader_cache->setChecked(Settings::values.use_disk_shader_cache);
102 ui->use_accurate_gpu_emulation->setChecked(Settings::values.use_accurate_gpu_emulation); 103 ui->use_accurate_gpu_emulation->setChecked(Settings::values.use_accurate_gpu_emulation);
@@ -114,6 +115,7 @@ void ConfigureGraphics::ApplyConfiguration() {
114 Settings::values.vulkan_device = vulkan_device; 115 Settings::values.vulkan_device = vulkan_device;
115 Settings::values.resolution_factor = 116 Settings::values.resolution_factor =
116 ToResolutionFactor(static_cast<Resolution>(ui->resolution_factor_combobox->currentIndex())); 117 ToResolutionFactor(static_cast<Resolution>(ui->resolution_factor_combobox->currentIndex()));
118 Settings::values.aspect_ratio = ui->aspect_ratio_combobox->currentIndex();
117 Settings::values.use_disk_shader_cache = ui->use_disk_shader_cache->isChecked(); 119 Settings::values.use_disk_shader_cache = ui->use_disk_shader_cache->isChecked();
118 Settings::values.use_accurate_gpu_emulation = ui->use_accurate_gpu_emulation->isChecked(); 120 Settings::values.use_accurate_gpu_emulation = ui->use_accurate_gpu_emulation->isChecked();
119 Settings::values.use_asynchronous_gpu_emulation = 121 Settings::values.use_asynchronous_gpu_emulation =
diff --git a/src/yuzu/configuration/configure_graphics.ui b/src/yuzu/configuration/configure_graphics.ui
index e24372204..db60426ab 100644
--- a/src/yuzu/configuration/configure_graphics.ui
+++ b/src/yuzu/configuration/configure_graphics.ui
@@ -139,6 +139,41 @@
139 </layout> 139 </layout>
140 </item> 140 </item>
141 <item> 141 <item>
142 <layout class="QHBoxLayout" name="horizontalLayout_6">
143 <item>
144 <widget class="QLabel" name="ar_label">
145 <property name="text">
146 <string>Aspect Ratio:</string>
147 </property>
148 </widget>
149 </item>
150 <item>
151 <widget class="QComboBox" name="aspect_ratio_combobox">
152 <item>
153 <property name="text">
154 <string>Default (16:9)</string>
155 </property>
156 </item>
157 <item>
158 <property name="text">
159 <string>Force 4:3</string>
160 </property>
161 </item>
162 <item>
163 <property name="text">
164 <string>Force 21:9</string>
165 </property>
166 </item>
167 <item>
168 <property name="text">
169 <string>Stretch to Window</string>
170 </property>
171 </item>
172 </widget>
173 </item>
174 </layout>
175 </item>
176 <item>
142 <layout class="QHBoxLayout" name="horizontalLayout_3"> 177 <layout class="QHBoxLayout" name="horizontalLayout_3">
143 <item> 178 <item>
144 <widget class="QLabel" name="bg_label"> 179 <widget class="QLabel" name="bg_label">
diff --git a/src/yuzu_cmd/config.cpp b/src/yuzu_cmd/config.cpp
index b01a36023..96f1ce3af 100644
--- a/src/yuzu_cmd/config.cpp
+++ b/src/yuzu_cmd/config.cpp
@@ -379,6 +379,8 @@ void Config::ReadValues() {
379 379
380 Settings::values.resolution_factor = 380 Settings::values.resolution_factor =
381 static_cast<float>(sdl2_config->GetReal("Renderer", "resolution_factor", 1.0)); 381 static_cast<float>(sdl2_config->GetReal("Renderer", "resolution_factor", 1.0));
382 Settings::values.aspect_ratio =
383 static_cast<int>(sdl2_config->GetInteger("Renderer", "aspect_ratio", 0));
382 Settings::values.use_frame_limit = sdl2_config->GetBoolean("Renderer", "use_frame_limit", true); 384 Settings::values.use_frame_limit = sdl2_config->GetBoolean("Renderer", "use_frame_limit", true);
383 Settings::values.frame_limit = 385 Settings::values.frame_limit =
384 static_cast<u16>(sdl2_config->GetInteger("Renderer", "frame_limit", 100)); 386 static_cast<u16>(sdl2_config->GetInteger("Renderer", "frame_limit", 100));
diff --git a/src/yuzu_cmd/default_ini.h b/src/yuzu_cmd/default_ini.h
index 00fd88279..8a2b658cd 100644
--- a/src/yuzu_cmd/default_ini.h
+++ b/src/yuzu_cmd/default_ini.h
@@ -122,6 +122,10 @@ use_shader_jit =
122# factor for the Switch resolution 122# factor for the Switch resolution
123resolution_factor = 123resolution_factor =
124 124
125# Aspect ratio
126# 0: Default (16:9), 1: Force 4:3, 2: Force 21:9, 3: Stretch to Window
127aspect_ratio =
128
125# Whether to enable V-Sync (caps the framerate at 60FPS) or not. 129# Whether to enable V-Sync (caps the framerate at 60FPS) or not.
126# 0 (default): Off, 1: On 130# 0 (default): Off, 1: On
127use_vsync = 131use_vsync =
diff --git a/src/yuzu_tester/config.cpp b/src/yuzu_tester/config.cpp
index 84ab4d687..0ac93b62a 100644
--- a/src/yuzu_tester/config.cpp
+++ b/src/yuzu_tester/config.cpp
@@ -118,6 +118,8 @@ void Config::ReadValues() {
118 // Renderer 118 // Renderer
119 Settings::values.resolution_factor = 119 Settings::values.resolution_factor =
120 static_cast<float>(sdl2_config->GetReal("Renderer", "resolution_factor", 1.0)); 120 static_cast<float>(sdl2_config->GetReal("Renderer", "resolution_factor", 1.0));
121 Settings::values.aspect_ratio =
122 static_cast<int>(sdl2_config->GetInteger("Renderer", "aspect_ratio", 0));
121 Settings::values.use_frame_limit = false; 123 Settings::values.use_frame_limit = false;
122 Settings::values.frame_limit = 100; 124 Settings::values.frame_limit = 100;
123 Settings::values.use_disk_shader_cache = 125 Settings::values.use_disk_shader_cache =
diff --git a/src/yuzu_tester/default_ini.h b/src/yuzu_tester/default_ini.h
index 9a3e86d68..8d93f7b88 100644
--- a/src/yuzu_tester/default_ini.h
+++ b/src/yuzu_tester/default_ini.h
@@ -26,6 +26,10 @@ use_shader_jit =
26# factor for the Switch resolution 26# factor for the Switch resolution
27resolution_factor = 27resolution_factor =
28 28
29# Aspect ratio
30# 0: Default (16:9), 1: Force 4:3, 2: Force 21:9, 3: Stretch to Window
31aspect_ratio =
32
29# Whether to enable V-Sync (caps the framerate at 60FPS) or not. 33# Whether to enable V-Sync (caps the framerate at 60FPS) or not.
30# 0 (default): Off, 1: On 34# 0 (default): Off, 1: On
31use_vsync = 35use_vsync =