diff options
Diffstat (limited to 'src')
36 files changed, 1219 insertions, 83 deletions
diff --git a/src/core/frontend/framebuffer_layout.cpp b/src/core/frontend/framebuffer_layout.cpp index d6d2cf3f0..2dc795d56 100644 --- a/src/core/frontend/framebuffer_layout.cpp +++ b/src/core/frontend/framebuffer_layout.cpp | |||
| @@ -27,9 +27,9 @@ FramebufferLayout DefaultFrameLayout(u32 width, u32 height) { | |||
| 27 | // so just calculate them both even if the other isn't showing. | 27 | // so just calculate them both even if the other isn't showing. |
| 28 | FramebufferLayout res{width, height}; | 28 | FramebufferLayout res{width, height}; |
| 29 | 29 | ||
| 30 | const float emulation_aspect_ratio{static_cast<float>(ScreenUndocked::Height) / | 30 | const float window_aspect_ratio = static_cast<float>(height) / width; |
| 31 | ScreenUndocked::Width}; | 31 | const float emulation_aspect_ratio = EmulationAspectRatio( |
| 32 | const auto window_aspect_ratio = static_cast<float>(height) / width; | 32 | static_cast<AspectRatio>(Settings::values.aspect_ratio), window_aspect_ratio); |
| 33 | 33 | ||
| 34 | const Common::Rectangle<u32> screen_window_area{0, 0, width, height}; | 34 | const Common::Rectangle<u32> screen_window_area{0, 0, width, height}; |
| 35 | Common::Rectangle<u32> screen = MaxRectangle(screen_window_area, emulation_aspect_ratio); | 35 | Common::Rectangle<u32> screen = MaxRectangle(screen_window_area, emulation_aspect_ratio); |
| @@ -58,4 +58,19 @@ FramebufferLayout FrameLayoutFromResolutionScale(u32 res_scale) { | |||
| 58 | return DefaultFrameLayout(width, height); | 58 | return DefaultFrameLayout(width, height); |
| 59 | } | 59 | } |
| 60 | 60 | ||
| 61 | float EmulationAspectRatio(AspectRatio aspect, float window_aspect_ratio) { | ||
| 62 | switch (aspect) { | ||
| 63 | case AspectRatio::Default: | ||
| 64 | return static_cast<float>(ScreenUndocked::Height) / ScreenUndocked::Width; | ||
| 65 | case AspectRatio::R4_3: | ||
| 66 | return 3.0f / 4.0f; | ||
| 67 | case AspectRatio::R21_9: | ||
| 68 | return 9.0f / 21.0f; | ||
| 69 | case AspectRatio::StretchToWindow: | ||
| 70 | return window_aspect_ratio; | ||
| 71 | default: | ||
| 72 | return static_cast<float>(ScreenUndocked::Height) / ScreenUndocked::Width; | ||
| 73 | } | ||
| 74 | } | ||
| 75 | |||
| 61 | } // namespace Layout | 76 | } // namespace Layout |
diff --git a/src/core/frontend/framebuffer_layout.h b/src/core/frontend/framebuffer_layout.h index d2370adde..1d39c1faf 100644 --- a/src/core/frontend/framebuffer_layout.h +++ b/src/core/frontend/framebuffer_layout.h | |||
| @@ -18,6 +18,13 @@ enum ScreenDocked : u32 { | |||
| 18 | HeightDocked = 1080, | 18 | HeightDocked = 1080, |
| 19 | }; | 19 | }; |
| 20 | 20 | ||
| 21 | enum class AspectRatio { | ||
| 22 | Default, | ||
| 23 | R4_3, | ||
| 24 | R21_9, | ||
| 25 | StretchToWindow, | ||
| 26 | }; | ||
| 27 | |||
| 21 | /// Describes the layout of the window framebuffer | 28 | /// Describes the layout of the window framebuffer |
| 22 | struct FramebufferLayout { | 29 | struct FramebufferLayout { |
| 23 | u32 width{ScreenUndocked::Width}; | 30 | u32 width{ScreenUndocked::Width}; |
| @@ -48,4 +55,12 @@ FramebufferLayout DefaultFrameLayout(u32 width, u32 height); | |||
| 48 | */ | 55 | */ |
| 49 | FramebufferLayout FrameLayoutFromResolutionScale(u32 res_scale); | 56 | FramebufferLayout FrameLayoutFromResolutionScale(u32 res_scale); |
| 50 | 57 | ||
| 58 | /** | ||
| 59 | * Convenience method to determine emulation aspect ratio | ||
| 60 | * @param aspect Represents the index of aspect ratio stored in Settings::values.aspect_ratio | ||
| 61 | * @param window_aspect_ratio Current window aspect ratio | ||
| 62 | * @return Emulation render window aspect ratio | ||
| 63 | */ | ||
| 64 | float EmulationAspectRatio(AspectRatio aspect, float window_aspect_ratio); | ||
| 65 | |||
| 51 | } // namespace Layout | 66 | } // namespace Layout |
diff --git a/src/core/hle/service/ldn/ldn.cpp b/src/core/hle/service/ldn/ldn.cpp index ed5059047..92adde6d4 100644 --- a/src/core/hle/service/ldn/ldn.cpp +++ b/src/core/hle/service/ldn/ldn.cpp | |||
| @@ -129,12 +129,20 @@ public: | |||
| 129 | {304, nullptr, "Disconnect"}, | 129 | {304, nullptr, "Disconnect"}, |
| 130 | {400, nullptr, "Initialize"}, | 130 | {400, nullptr, "Initialize"}, |
| 131 | {401, nullptr, "Finalize"}, | 131 | {401, nullptr, "Finalize"}, |
| 132 | {402, nullptr, "SetOperationMode"}, | 132 | {402, &IUserLocalCommunicationService::Initialize2, "Initialize2"}, // 7.0.0+ |
| 133 | }; | 133 | }; |
| 134 | // clang-format on | 134 | // clang-format on |
| 135 | 135 | ||
| 136 | RegisterHandlers(functions); | 136 | RegisterHandlers(functions); |
| 137 | } | 137 | } |
| 138 | |||
| 139 | void Initialize2(Kernel::HLERequestContext& ctx) { | ||
| 140 | LOG_WARNING(Service_LDN, "(STUBBED) called"); | ||
| 141 | // Result success seem make this services start network and continue. | ||
| 142 | // If we just pass result error then it will stop and maybe try again and again. | ||
| 143 | IPC::ResponseBuilder rb{ctx, 2}; | ||
| 144 | rb.Push(RESULT_UNKNOWN); | ||
| 145 | } | ||
| 138 | }; | 146 | }; |
| 139 | 147 | ||
| 140 | class LDNS final : public ServiceFramework<LDNS> { | 148 | class LDNS final : public ServiceFramework<LDNS> { |
diff --git a/src/core/hle/service/nvdrv/devices/nvhost_gpu.cpp b/src/core/hle/service/nvdrv/devices/nvhost_gpu.cpp index 6d8bca8bb..f1966ac0e 100644 --- a/src/core/hle/service/nvdrv/devices/nvhost_gpu.cpp +++ b/src/core/hle/service/nvdrv/devices/nvhost_gpu.cpp | |||
| @@ -44,6 +44,8 @@ u32 nvhost_gpu::ioctl(Ioctl command, const std::vector<u8>& input, const std::ve | |||
| 44 | return GetWaitbase(input, output); | 44 | return GetWaitbase(input, output); |
| 45 | case IoctlCommand::IocChannelSetTimeoutCommand: | 45 | case IoctlCommand::IocChannelSetTimeoutCommand: |
| 46 | return ChannelSetTimeout(input, output); | 46 | return ChannelSetTimeout(input, output); |
| 47 | case IoctlCommand::IocChannelSetTimeslice: | ||
| 48 | return ChannelSetTimeslice(input, output); | ||
| 47 | default: | 49 | default: |
| 48 | break; | 50 | break; |
| 49 | } | 51 | } |
| @@ -228,4 +230,14 @@ u32 nvhost_gpu::ChannelSetTimeout(const std::vector<u8>& input, std::vector<u8>& | |||
| 228 | return 0; | 230 | return 0; |
| 229 | } | 231 | } |
| 230 | 232 | ||
| 233 | u32 nvhost_gpu::ChannelSetTimeslice(const std::vector<u8>& input, std::vector<u8>& output) { | ||
| 234 | IoctlSetTimeslice params{}; | ||
| 235 | std::memcpy(¶ms, input.data(), sizeof(IoctlSetTimeslice)); | ||
| 236 | LOG_INFO(Service_NVDRV, "called, timeslice=0x{:X}", params.timeslice); | ||
| 237 | |||
| 238 | channel_timeslice = params.timeslice; | ||
| 239 | |||
| 240 | return 0; | ||
| 241 | } | ||
| 242 | |||
| 231 | } // namespace Service::Nvidia::Devices | 243 | } // namespace Service::Nvidia::Devices |
diff --git a/src/core/hle/service/nvdrv/devices/nvhost_gpu.h b/src/core/hle/service/nvdrv/devices/nvhost_gpu.h index d056dd046..2ac74743f 100644 --- a/src/core/hle/service/nvdrv/devices/nvhost_gpu.h +++ b/src/core/hle/service/nvdrv/devices/nvhost_gpu.h | |||
| @@ -48,6 +48,7 @@ private: | |||
| 48 | IocAllocObjCtxCommand = 0xC0104809, | 48 | IocAllocObjCtxCommand = 0xC0104809, |
| 49 | IocChannelGetWaitbaseCommand = 0xC0080003, | 49 | IocChannelGetWaitbaseCommand = 0xC0080003, |
| 50 | IocChannelSetTimeoutCommand = 0x40044803, | 50 | IocChannelSetTimeoutCommand = 0x40044803, |
| 51 | IocChannelSetTimeslice = 0xC004481D, | ||
| 51 | }; | 52 | }; |
| 52 | 53 | ||
| 53 | enum class CtxObjects : u32_le { | 54 | enum class CtxObjects : u32_le { |
| @@ -101,6 +102,11 @@ private: | |||
| 101 | static_assert(sizeof(IoctlChannelSetPriority) == 4, | 102 | static_assert(sizeof(IoctlChannelSetPriority) == 4, |
| 102 | "IoctlChannelSetPriority is incorrect size"); | 103 | "IoctlChannelSetPriority is incorrect size"); |
| 103 | 104 | ||
| 105 | struct IoctlSetTimeslice { | ||
| 106 | u32_le timeslice; | ||
| 107 | }; | ||
| 108 | static_assert(sizeof(IoctlSetTimeslice) == 4, "IoctlSetTimeslice is incorrect size"); | ||
| 109 | |||
| 104 | struct IoctlEventIdControl { | 110 | struct IoctlEventIdControl { |
| 105 | u32_le cmd; // 0=disable, 1=enable, 2=clear | 111 | u32_le cmd; // 0=disable, 1=enable, 2=clear |
| 106 | u32_le id; | 112 | u32_le id; |
| @@ -174,6 +180,7 @@ private: | |||
| 174 | u64_le user_data{}; | 180 | u64_le user_data{}; |
| 175 | IoctlZCullBind zcull_params{}; | 181 | IoctlZCullBind zcull_params{}; |
| 176 | u32_le channel_priority{}; | 182 | u32_le channel_priority{}; |
| 183 | u32_le channel_timeslice{}; | ||
| 177 | 184 | ||
| 178 | u32 SetNVMAPfd(const std::vector<u8>& input, std::vector<u8>& output); | 185 | u32 SetNVMAPfd(const std::vector<u8>& input, std::vector<u8>& output); |
| 179 | u32 SetClientData(const std::vector<u8>& input, std::vector<u8>& output); | 186 | u32 SetClientData(const std::vector<u8>& input, std::vector<u8>& output); |
| @@ -188,6 +195,7 @@ private: | |||
| 188 | const std::vector<u8>& input2, IoctlVersion version); | 195 | const std::vector<u8>& input2, IoctlVersion version); |
| 189 | u32 GetWaitbase(const std::vector<u8>& input, std::vector<u8>& output); | 196 | u32 GetWaitbase(const std::vector<u8>& input, std::vector<u8>& output); |
| 190 | u32 ChannelSetTimeout(const std::vector<u8>& input, std::vector<u8>& output); | 197 | u32 ChannelSetTimeout(const std::vector<u8>& input, std::vector<u8>& output); |
| 198 | u32 ChannelSetTimeslice(const std::vector<u8>& input, std::vector<u8>& output); | ||
| 191 | 199 | ||
| 192 | std::shared_ptr<nvmap> nvmap_dev; | 200 | std::shared_ptr<nvmap> nvmap_dev; |
| 193 | u32 assigned_syncpoints{}; | 201 | u32 assigned_syncpoints{}; |
diff --git a/src/core/settings.h b/src/core/settings.h index e1a9a0ffa..f837d3fbc 100644 --- a/src/core/settings.h +++ b/src/core/settings.h | |||
| @@ -429,6 +429,7 @@ struct Values { | |||
| 429 | int vulkan_device; | 429 | int vulkan_device; |
| 430 | 430 | ||
| 431 | float resolution_factor; | 431 | float resolution_factor; |
| 432 | int aspect_ratio; | ||
| 432 | bool use_frame_limit; | 433 | bool use_frame_limit; |
| 433 | u16 frame_limit; | 434 | u16 frame_limit; |
| 434 | bool use_disk_shader_cache; | 435 | bool use_disk_shader_cache; |
diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt index db9332d00..4b0c6346f 100644 --- a/src/video_core/CMakeLists.txt +++ b/src/video_core/CMakeLists.txt | |||
| @@ -37,6 +37,7 @@ add_library(video_core STATIC | |||
| 37 | memory_manager.h | 37 | memory_manager.h |
| 38 | morton.cpp | 38 | morton.cpp |
| 39 | morton.h | 39 | morton.h |
| 40 | query_cache.h | ||
| 40 | rasterizer_accelerated.cpp | 41 | rasterizer_accelerated.cpp |
| 41 | rasterizer_accelerated.h | 42 | rasterizer_accelerated.h |
| 42 | rasterizer_cache.cpp | 43 | rasterizer_cache.cpp |
| @@ -74,6 +75,8 @@ add_library(video_core STATIC | |||
| 74 | renderer_opengl/gl_stream_buffer.h | 75 | renderer_opengl/gl_stream_buffer.h |
| 75 | renderer_opengl/gl_texture_cache.cpp | 76 | renderer_opengl/gl_texture_cache.cpp |
| 76 | renderer_opengl/gl_texture_cache.h | 77 | renderer_opengl/gl_texture_cache.h |
| 78 | renderer_opengl/gl_query_cache.cpp | ||
| 79 | renderer_opengl/gl_query_cache.h | ||
| 77 | renderer_opengl/maxwell_to_gl.h | 80 | renderer_opengl/maxwell_to_gl.h |
| 78 | renderer_opengl/renderer_opengl.cpp | 81 | renderer_opengl/renderer_opengl.cpp |
| 79 | renderer_opengl/renderer_opengl.h | 82 | renderer_opengl/renderer_opengl.h |
| @@ -177,6 +180,8 @@ if (ENABLE_VULKAN) | |||
| 177 | renderer_vulkan/vk_memory_manager.h | 180 | renderer_vulkan/vk_memory_manager.h |
| 178 | renderer_vulkan/vk_pipeline_cache.cpp | 181 | renderer_vulkan/vk_pipeline_cache.cpp |
| 179 | renderer_vulkan/vk_pipeline_cache.h | 182 | renderer_vulkan/vk_pipeline_cache.h |
| 183 | renderer_vulkan/vk_query_cache.cpp | ||
| 184 | renderer_vulkan/vk_query_cache.h | ||
| 180 | renderer_vulkan/vk_rasterizer.cpp | 185 | renderer_vulkan/vk_rasterizer.cpp |
| 181 | renderer_vulkan/vk_rasterizer.h | 186 | renderer_vulkan/vk_rasterizer.h |
| 182 | renderer_vulkan/vk_renderpass_cache.cpp | 187 | renderer_vulkan/vk_renderpass_cache.cpp |
diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp index 5a74d1c2a..b28de1092 100644 --- a/src/video_core/engines/maxwell_3d.cpp +++ b/src/video_core/engines/maxwell_3d.cpp | |||
| @@ -4,6 +4,7 @@ | |||
| 4 | 4 | ||
| 5 | #include <cinttypes> | 5 | #include <cinttypes> |
| 6 | #include <cstring> | 6 | #include <cstring> |
| 7 | #include <optional> | ||
| 7 | #include "common/assert.h" | 8 | #include "common/assert.h" |
| 8 | #include "core/core.h" | 9 | #include "core/core.h" |
| 9 | #include "core/core_timing.h" | 10 | #include "core/core_timing.h" |
| @@ -16,6 +17,8 @@ | |||
| 16 | 17 | ||
| 17 | namespace Tegra::Engines { | 18 | namespace Tegra::Engines { |
| 18 | 19 | ||
| 20 | using VideoCore::QueryType; | ||
| 21 | |||
| 19 | /// First register id that is actually a Macro call. | 22 | /// First register id that is actually a Macro call. |
| 20 | constexpr u32 MacroRegistersStart = 0xE00; | 23 | constexpr u32 MacroRegistersStart = 0xE00; |
| 21 | 24 | ||
| @@ -400,6 +403,10 @@ void Maxwell3D::CallMethod(const GPU::MethodCall& method_call) { | |||
| 400 | ProcessQueryCondition(); | 403 | ProcessQueryCondition(); |
| 401 | break; | 404 | break; |
| 402 | } | 405 | } |
| 406 | case MAXWELL3D_REG_INDEX(counter_reset): { | ||
| 407 | ProcessCounterReset(); | ||
| 408 | break; | ||
| 409 | } | ||
| 403 | case MAXWELL3D_REG_INDEX(sync_info): { | 410 | case MAXWELL3D_REG_INDEX(sync_info): { |
| 404 | ProcessSyncPoint(); | 411 | ProcessSyncPoint(); |
| 405 | break; | 412 | break; |
| @@ -544,40 +551,28 @@ void Maxwell3D::ProcessQueryGet() { | |||
| 544 | "Units other than CROP are unimplemented"); | 551 | "Units other than CROP are unimplemented"); |
| 545 | 552 | ||
| 546 | switch (regs.query.query_get.operation) { | 553 | switch (regs.query.query_get.operation) { |
| 547 | case Regs::QueryOperation::Release: { | 554 | case Regs::QueryOperation::Release: |
| 548 | const u64 result = regs.query.query_sequence; | 555 | StampQueryResult(regs.query.query_sequence, regs.query.query_get.short_query == 0); |
| 549 | StampQueryResult(result, regs.query.query_get.short_query == 0); | ||
| 550 | break; | 556 | break; |
| 551 | } | 557 | case Regs::QueryOperation::Acquire: |
| 552 | case Regs::QueryOperation::Acquire: { | 558 | // TODO(Blinkhawk): Under this operation, the GPU waits for the CPU to write a value that |
| 553 | // Todo(Blinkhawk): Under this operation, the GPU waits for the CPU | 559 | // matches the current payload. |
| 554 | // to write a value that matches the current payload. | ||
| 555 | UNIMPLEMENTED_MSG("Unimplemented query operation ACQUIRE"); | 560 | UNIMPLEMENTED_MSG("Unimplemented query operation ACQUIRE"); |
| 556 | break; | 561 | break; |
| 557 | } | 562 | case Regs::QueryOperation::Counter: |
| 558 | case Regs::QueryOperation::Counter: { | 563 | if (const std::optional<u64> result = GetQueryResult()) { |
| 559 | u64 result{}; | 564 | // If the query returns an empty optional it means it's cached and deferred. |
| 560 | switch (regs.query.query_get.select) { | 565 | // In this case we have a non-empty result, so we stamp it immediately. |
| 561 | case Regs::QuerySelect::Zero: | 566 | StampQueryResult(*result, regs.query.query_get.short_query == 0); |
| 562 | result = 0; | ||
| 563 | break; | ||
| 564 | default: | ||
| 565 | result = 1; | ||
| 566 | UNIMPLEMENTED_MSG("Unimplemented query select type {}", | ||
| 567 | static_cast<u32>(regs.query.query_get.select.Value())); | ||
| 568 | } | 567 | } |
| 569 | StampQueryResult(result, regs.query.query_get.short_query == 0); | ||
| 570 | break; | 568 | break; |
| 571 | } | 569 | case Regs::QueryOperation::Trap: |
| 572 | case Regs::QueryOperation::Trap: { | ||
| 573 | UNIMPLEMENTED_MSG("Unimplemented query operation TRAP"); | 570 | UNIMPLEMENTED_MSG("Unimplemented query operation TRAP"); |
| 574 | break; | 571 | break; |
| 575 | } | 572 | default: |
| 576 | default: { | ||
| 577 | UNIMPLEMENTED_MSG("Unknown query operation"); | 573 | UNIMPLEMENTED_MSG("Unknown query operation"); |
| 578 | break; | 574 | break; |
| 579 | } | 575 | } |
| 580 | } | ||
| 581 | } | 576 | } |
| 582 | 577 | ||
| 583 | void Maxwell3D::ProcessQueryCondition() { | 578 | void Maxwell3D::ProcessQueryCondition() { |
| @@ -593,20 +588,20 @@ void Maxwell3D::ProcessQueryCondition() { | |||
| 593 | } | 588 | } |
| 594 | case Regs::ConditionMode::ResNonZero: { | 589 | case Regs::ConditionMode::ResNonZero: { |
| 595 | Regs::QueryCompare cmp; | 590 | Regs::QueryCompare cmp; |
| 596 | memory_manager.ReadBlockUnsafe(condition_address, &cmp, sizeof(cmp)); | 591 | memory_manager.ReadBlock(condition_address, &cmp, sizeof(cmp)); |
| 597 | execute_on = cmp.initial_sequence != 0U && cmp.initial_mode != 0U; | 592 | execute_on = cmp.initial_sequence != 0U && cmp.initial_mode != 0U; |
| 598 | break; | 593 | break; |
| 599 | } | 594 | } |
| 600 | case Regs::ConditionMode::Equal: { | 595 | case Regs::ConditionMode::Equal: { |
| 601 | Regs::QueryCompare cmp; | 596 | Regs::QueryCompare cmp; |
| 602 | memory_manager.ReadBlockUnsafe(condition_address, &cmp, sizeof(cmp)); | 597 | memory_manager.ReadBlock(condition_address, &cmp, sizeof(cmp)); |
| 603 | execute_on = | 598 | execute_on = |
| 604 | cmp.initial_sequence == cmp.current_sequence && cmp.initial_mode == cmp.current_mode; | 599 | cmp.initial_sequence == cmp.current_sequence && cmp.initial_mode == cmp.current_mode; |
| 605 | break; | 600 | break; |
| 606 | } | 601 | } |
| 607 | case Regs::ConditionMode::NotEqual: { | 602 | case Regs::ConditionMode::NotEqual: { |
| 608 | Regs::QueryCompare cmp; | 603 | Regs::QueryCompare cmp; |
| 609 | memory_manager.ReadBlockUnsafe(condition_address, &cmp, sizeof(cmp)); | 604 | memory_manager.ReadBlock(condition_address, &cmp, sizeof(cmp)); |
| 610 | execute_on = | 605 | execute_on = |
| 611 | cmp.initial_sequence != cmp.current_sequence || cmp.initial_mode != cmp.current_mode; | 606 | cmp.initial_sequence != cmp.current_sequence || cmp.initial_mode != cmp.current_mode; |
| 612 | break; | 607 | break; |
| @@ -619,6 +614,18 @@ void Maxwell3D::ProcessQueryCondition() { | |||
| 619 | } | 614 | } |
| 620 | } | 615 | } |
| 621 | 616 | ||
| 617 | void Maxwell3D::ProcessCounterReset() { | ||
| 618 | switch (regs.counter_reset) { | ||
| 619 | case Regs::CounterReset::SampleCnt: | ||
| 620 | rasterizer.ResetCounter(QueryType::SamplesPassed); | ||
| 621 | break; | ||
| 622 | default: | ||
| 623 | LOG_WARNING(Render_OpenGL, "Unimplemented counter reset={}", | ||
| 624 | static_cast<int>(regs.counter_reset)); | ||
| 625 | break; | ||
| 626 | } | ||
| 627 | } | ||
| 628 | |||
| 622 | void Maxwell3D::ProcessSyncPoint() { | 629 | void Maxwell3D::ProcessSyncPoint() { |
| 623 | const u32 sync_point = regs.sync_info.sync_point.Value(); | 630 | const u32 sync_point = regs.sync_info.sync_point.Value(); |
| 624 | const u32 increment = regs.sync_info.increment.Value(); | 631 | const u32 increment = regs.sync_info.increment.Value(); |
| @@ -661,6 +668,22 @@ void Maxwell3D::DrawArrays() { | |||
| 661 | } | 668 | } |
| 662 | } | 669 | } |
| 663 | 670 | ||
| 671 | std::optional<u64> Maxwell3D::GetQueryResult() { | ||
| 672 | switch (regs.query.query_get.select) { | ||
| 673 | case Regs::QuerySelect::Zero: | ||
| 674 | return 0; | ||
| 675 | case Regs::QuerySelect::SamplesPassed: | ||
| 676 | // Deferred. | ||
| 677 | rasterizer.Query(regs.query.QueryAddress(), VideoCore::QueryType::SamplesPassed, | ||
| 678 | system.GPU().GetTicks()); | ||
| 679 | return {}; | ||
| 680 | default: | ||
| 681 | UNIMPLEMENTED_MSG("Unimplemented query select type {}", | ||
| 682 | static_cast<u32>(regs.query.query_get.select.Value())); | ||
| 683 | return 1; | ||
| 684 | } | ||
| 685 | } | ||
| 686 | |||
| 664 | void Maxwell3D::ProcessCBBind(std::size_t stage_index) { | 687 | void Maxwell3D::ProcessCBBind(std::size_t stage_index) { |
| 665 | // Bind the buffer currently in CB_ADDRESS to the specified index in the desired shader stage. | 688 | // Bind the buffer currently in CB_ADDRESS to the specified index in the desired shader stage. |
| 666 | auto& shader = state.shader_stages[stage_index]; | 689 | auto& shader = state.shader_stages[stage_index]; |
diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h index 0a2af54e5..26939be3f 100644 --- a/src/video_core/engines/maxwell_3d.h +++ b/src/video_core/engines/maxwell_3d.h | |||
| @@ -6,6 +6,7 @@ | |||
| 6 | 6 | ||
| 7 | #include <array> | 7 | #include <array> |
| 8 | #include <bitset> | 8 | #include <bitset> |
| 9 | #include <optional> | ||
| 9 | #include <type_traits> | 10 | #include <type_traits> |
| 10 | #include <unordered_map> | 11 | #include <unordered_map> |
| 11 | #include <vector> | 12 | #include <vector> |
| @@ -409,6 +410,27 @@ public: | |||
| 409 | Linear = 1, | 410 | Linear = 1, |
| 410 | }; | 411 | }; |
| 411 | 412 | ||
| 413 | enum class CounterReset : u32 { | ||
| 414 | SampleCnt = 0x01, | ||
| 415 | Unk02 = 0x02, | ||
| 416 | Unk03 = 0x03, | ||
| 417 | Unk04 = 0x04, | ||
| 418 | EmittedPrimitives = 0x10, // Not tested | ||
| 419 | Unk11 = 0x11, | ||
| 420 | Unk12 = 0x12, | ||
| 421 | Unk13 = 0x13, | ||
| 422 | Unk15 = 0x15, | ||
| 423 | Unk16 = 0x16, | ||
| 424 | Unk17 = 0x17, | ||
| 425 | Unk18 = 0x18, | ||
| 426 | Unk1A = 0x1A, | ||
| 427 | Unk1B = 0x1B, | ||
| 428 | Unk1C = 0x1C, | ||
| 429 | Unk1D = 0x1D, | ||
| 430 | Unk1E = 0x1E, | ||
| 431 | GeneratedPrimitives = 0x1F, | ||
| 432 | }; | ||
| 433 | |||
| 412 | struct Cull { | 434 | struct Cull { |
| 413 | enum class FrontFace : u32 { | 435 | enum class FrontFace : u32 { |
| 414 | ClockWise = 0x0900, | 436 | ClockWise = 0x0900, |
| @@ -857,7 +879,7 @@ public: | |||
| 857 | BitField<7, 1, u32> c7; | 879 | BitField<7, 1, u32> c7; |
| 858 | } clip_distance_enabled; | 880 | } clip_distance_enabled; |
| 859 | 881 | ||
| 860 | INSERT_UNION_PADDING_WORDS(0x1); | 882 | u32 samplecnt_enable; |
| 861 | 883 | ||
| 862 | float point_size; | 884 | float point_size; |
| 863 | 885 | ||
| @@ -865,7 +887,11 @@ public: | |||
| 865 | 887 | ||
| 866 | u32 point_sprite_enable; | 888 | u32 point_sprite_enable; |
| 867 | 889 | ||
| 868 | INSERT_UNION_PADDING_WORDS(0x5); | 890 | INSERT_UNION_PADDING_WORDS(0x3); |
| 891 | |||
| 892 | CounterReset counter_reset; | ||
| 893 | |||
| 894 | INSERT_UNION_PADDING_WORDS(0x1); | ||
| 869 | 895 | ||
| 870 | u32 zeta_enable; | 896 | u32 zeta_enable; |
| 871 | 897 | ||
| @@ -1412,12 +1438,15 @@ private: | |||
| 1412 | /// Handles a write to the QUERY_GET register. | 1438 | /// Handles a write to the QUERY_GET register. |
| 1413 | void ProcessQueryGet(); | 1439 | void ProcessQueryGet(); |
| 1414 | 1440 | ||
| 1415 | // Writes the query result accordingly | 1441 | /// Writes the query result accordingly. |
| 1416 | void StampQueryResult(u64 payload, bool long_query); | 1442 | void StampQueryResult(u64 payload, bool long_query); |
| 1417 | 1443 | ||
| 1418 | // Handles Conditional Rendering | 1444 | /// Handles conditional rendering. |
| 1419 | void ProcessQueryCondition(); | 1445 | void ProcessQueryCondition(); |
| 1420 | 1446 | ||
| 1447 | /// Handles counter resets. | ||
| 1448 | void ProcessCounterReset(); | ||
| 1449 | |||
| 1421 | /// Handles writes to syncing register. | 1450 | /// Handles writes to syncing register. |
| 1422 | void ProcessSyncPoint(); | 1451 | void ProcessSyncPoint(); |
| 1423 | 1452 | ||
| @@ -1434,6 +1463,9 @@ private: | |||
| 1434 | 1463 | ||
| 1435 | // Handles a instance drawcall from MME | 1464 | // Handles a instance drawcall from MME |
| 1436 | void StepInstance(MMEDrawMode expected_mode, u32 count); | 1465 | void StepInstance(MMEDrawMode expected_mode, u32 count); |
| 1466 | |||
| 1467 | /// Returns a query's value or an empty object if the value will be deferred through a cache. | ||
| 1468 | std::optional<u64> GetQueryResult(); | ||
| 1437 | }; | 1469 | }; |
| 1438 | 1470 | ||
| 1439 | #define ASSERT_REG_POSITION(field_name, position) \ | 1471 | #define ASSERT_REG_POSITION(field_name, position) \ |
| @@ -1499,8 +1531,10 @@ ASSERT_REG_POSITION(screen_y_control, 0x4EB); | |||
| 1499 | ASSERT_REG_POSITION(vb_element_base, 0x50D); | 1531 | ASSERT_REG_POSITION(vb_element_base, 0x50D); |
| 1500 | ASSERT_REG_POSITION(vb_base_instance, 0x50E); | 1532 | ASSERT_REG_POSITION(vb_base_instance, 0x50E); |
| 1501 | ASSERT_REG_POSITION(clip_distance_enabled, 0x544); | 1533 | ASSERT_REG_POSITION(clip_distance_enabled, 0x544); |
| 1534 | ASSERT_REG_POSITION(samplecnt_enable, 0x545); | ||
| 1502 | ASSERT_REG_POSITION(point_size, 0x546); | 1535 | ASSERT_REG_POSITION(point_size, 0x546); |
| 1503 | ASSERT_REG_POSITION(point_sprite_enable, 0x548); | 1536 | ASSERT_REG_POSITION(point_sprite_enable, 0x548); |
| 1537 | ASSERT_REG_POSITION(counter_reset, 0x54C); | ||
| 1504 | ASSERT_REG_POSITION(zeta_enable, 0x54E); | 1538 | ASSERT_REG_POSITION(zeta_enable, 0x54E); |
| 1505 | ASSERT_REG_POSITION(multisample_control, 0x54F); | 1539 | ASSERT_REG_POSITION(multisample_control, 0x54F); |
| 1506 | ASSERT_REG_POSITION(condition, 0x554); | 1540 | ASSERT_REG_POSITION(condition, 0x554); |
diff --git a/src/video_core/gpu.cpp b/src/video_core/gpu.cpp index 4419ab735..7d7137109 100644 --- a/src/video_core/gpu.cpp +++ b/src/video_core/gpu.cpp | |||
| @@ -24,7 +24,7 @@ MICROPROFILE_DEFINE(GPU_wait, "GPU", "Wait for the GPU", MP_RGB(128, 128, 192)); | |||
| 24 | GPU::GPU(Core::System& system, VideoCore::RendererBase& renderer, bool is_async) | 24 | GPU::GPU(Core::System& system, VideoCore::RendererBase& renderer, bool is_async) |
| 25 | : system{system}, renderer{renderer}, is_async{is_async} { | 25 | : system{system}, renderer{renderer}, is_async{is_async} { |
| 26 | auto& rasterizer{renderer.Rasterizer()}; | 26 | auto& rasterizer{renderer.Rasterizer()}; |
| 27 | memory_manager = std::make_unique<Tegra::MemoryManager>(system); | 27 | memory_manager = std::make_unique<Tegra::MemoryManager>(system, rasterizer); |
| 28 | dma_pusher = std::make_unique<Tegra::DmaPusher>(*this); | 28 | dma_pusher = std::make_unique<Tegra::DmaPusher>(*this); |
| 29 | maxwell_3d = std::make_unique<Engines::Maxwell3D>(system, rasterizer, *memory_manager); | 29 | maxwell_3d = std::make_unique<Engines::Maxwell3D>(system, rasterizer, *memory_manager); |
| 30 | fermi_2d = std::make_unique<Engines::Fermi2D>(rasterizer); | 30 | fermi_2d = std::make_unique<Engines::Fermi2D>(rasterizer); |
diff --git a/src/video_core/memory_manager.cpp b/src/video_core/memory_manager.cpp index f1d50be3e..11848fbce 100644 --- a/src/video_core/memory_manager.cpp +++ b/src/video_core/memory_manager.cpp | |||
| @@ -9,12 +9,13 @@ | |||
| 9 | #include "core/hle/kernel/process.h" | 9 | #include "core/hle/kernel/process.h" |
| 10 | #include "core/hle/kernel/vm_manager.h" | 10 | #include "core/hle/kernel/vm_manager.h" |
| 11 | #include "core/memory.h" | 11 | #include "core/memory.h" |
| 12 | #include "video_core/gpu.h" | ||
| 13 | #include "video_core/memory_manager.h" | 12 | #include "video_core/memory_manager.h" |
| 13 | #include "video_core/rasterizer_interface.h" | ||
| 14 | 14 | ||
| 15 | namespace Tegra { | 15 | namespace Tegra { |
| 16 | 16 | ||
| 17 | MemoryManager::MemoryManager(Core::System& system) : system{system} { | 17 | MemoryManager::MemoryManager(Core::System& system, VideoCore::RasterizerInterface& rasterizer) |
| 18 | : rasterizer{rasterizer}, system{system} { | ||
| 18 | std::fill(page_table.pointers.begin(), page_table.pointers.end(), nullptr); | 19 | std::fill(page_table.pointers.begin(), page_table.pointers.end(), nullptr); |
| 19 | std::fill(page_table.attributes.begin(), page_table.attributes.end(), | 20 | std::fill(page_table.attributes.begin(), page_table.attributes.end(), |
| 20 | Common::PageType::Unmapped); | 21 | Common::PageType::Unmapped); |
| @@ -83,8 +84,7 @@ GPUVAddr MemoryManager::UnmapBuffer(GPUVAddr gpu_addr, u64 size) { | |||
| 83 | const auto cpu_addr = GpuToCpuAddress(gpu_addr); | 84 | const auto cpu_addr = GpuToCpuAddress(gpu_addr); |
| 84 | ASSERT(cpu_addr); | 85 | ASSERT(cpu_addr); |
| 85 | 86 | ||
| 86 | system.GPU().FlushAndInvalidateRegion(cache_addr, aligned_size); | 87 | rasterizer.FlushAndInvalidateRegion(cache_addr, aligned_size); |
| 87 | |||
| 88 | UnmapRange(gpu_addr, aligned_size); | 88 | UnmapRange(gpu_addr, aligned_size); |
| 89 | ASSERT(system.CurrentProcess() | 89 | ASSERT(system.CurrentProcess() |
| 90 | ->VMManager() | 90 | ->VMManager() |
| @@ -242,7 +242,7 @@ void MemoryManager::ReadBlock(GPUVAddr src_addr, void* dest_buffer, const std::s | |||
| 242 | switch (page_table.attributes[page_index]) { | 242 | switch (page_table.attributes[page_index]) { |
| 243 | case Common::PageType::Memory: { | 243 | case Common::PageType::Memory: { |
| 244 | const u8* src_ptr{page_table.pointers[page_index] + page_offset}; | 244 | const u8* src_ptr{page_table.pointers[page_index] + page_offset}; |
| 245 | system.GPU().FlushRegion(ToCacheAddr(src_ptr), copy_amount); | 245 | rasterizer.FlushRegion(ToCacheAddr(src_ptr), copy_amount); |
| 246 | std::memcpy(dest_buffer, src_ptr, copy_amount); | 246 | std::memcpy(dest_buffer, src_ptr, copy_amount); |
| 247 | break; | 247 | break; |
| 248 | } | 248 | } |
| @@ -292,7 +292,7 @@ void MemoryManager::WriteBlock(GPUVAddr dest_addr, const void* src_buffer, const | |||
| 292 | switch (page_table.attributes[page_index]) { | 292 | switch (page_table.attributes[page_index]) { |
| 293 | case Common::PageType::Memory: { | 293 | case Common::PageType::Memory: { |
| 294 | u8* dest_ptr{page_table.pointers[page_index] + page_offset}; | 294 | u8* dest_ptr{page_table.pointers[page_index] + page_offset}; |
| 295 | system.GPU().InvalidateRegion(ToCacheAddr(dest_ptr), copy_amount); | 295 | rasterizer.InvalidateRegion(ToCacheAddr(dest_ptr), copy_amount); |
| 296 | std::memcpy(dest_ptr, src_buffer, copy_amount); | 296 | std::memcpy(dest_ptr, src_buffer, copy_amount); |
| 297 | break; | 297 | break; |
| 298 | } | 298 | } |
| @@ -340,7 +340,7 @@ void MemoryManager::CopyBlock(GPUVAddr dest_addr, GPUVAddr src_addr, const std:: | |||
| 340 | switch (page_table.attributes[page_index]) { | 340 | switch (page_table.attributes[page_index]) { |
| 341 | case Common::PageType::Memory: { | 341 | case Common::PageType::Memory: { |
| 342 | const u8* src_ptr{page_table.pointers[page_index] + page_offset}; | 342 | const u8* src_ptr{page_table.pointers[page_index] + page_offset}; |
| 343 | system.GPU().FlushRegion(ToCacheAddr(src_ptr), copy_amount); | 343 | rasterizer.FlushRegion(ToCacheAddr(src_ptr), copy_amount); |
| 344 | WriteBlock(dest_addr, src_ptr, copy_amount); | 344 | WriteBlock(dest_addr, src_ptr, copy_amount); |
| 345 | break; | 345 | break; |
| 346 | } | 346 | } |
diff --git a/src/video_core/memory_manager.h b/src/video_core/memory_manager.h index 393447eb4..aea010087 100644 --- a/src/video_core/memory_manager.h +++ b/src/video_core/memory_manager.h | |||
| @@ -10,6 +10,10 @@ | |||
| 10 | #include "common/common_types.h" | 10 | #include "common/common_types.h" |
| 11 | #include "common/page_table.h" | 11 | #include "common/page_table.h" |
| 12 | 12 | ||
| 13 | namespace VideoCore { | ||
| 14 | class RasterizerInterface; | ||
| 15 | } | ||
| 16 | |||
| 13 | namespace Core { | 17 | namespace Core { |
| 14 | class System; | 18 | class System; |
| 15 | } | 19 | } |
| @@ -47,7 +51,7 @@ struct VirtualMemoryArea { | |||
| 47 | 51 | ||
| 48 | class MemoryManager final { | 52 | class MemoryManager final { |
| 49 | public: | 53 | public: |
| 50 | explicit MemoryManager(Core::System& system); | 54 | explicit MemoryManager(Core::System& system, VideoCore::RasterizerInterface& rasterizer); |
| 51 | ~MemoryManager(); | 55 | ~MemoryManager(); |
| 52 | 56 | ||
| 53 | GPUVAddr AllocateSpace(u64 size, u64 align); | 57 | GPUVAddr AllocateSpace(u64 size, u64 align); |
| @@ -172,6 +176,7 @@ private: | |||
| 172 | 176 | ||
| 173 | Common::PageTable page_table{page_bits}; | 177 | Common::PageTable page_table{page_bits}; |
| 174 | VMAMap vma_map; | 178 | VMAMap vma_map; |
| 179 | VideoCore::RasterizerInterface& rasterizer; | ||
| 175 | 180 | ||
| 176 | Core::System& system; | 181 | Core::System& system; |
| 177 | }; | 182 | }; |
diff --git a/src/video_core/query_cache.h b/src/video_core/query_cache.h new file mode 100644 index 000000000..e66054ed0 --- /dev/null +++ b/src/video_core/query_cache.h | |||
| @@ -0,0 +1,359 @@ | |||
| 1 | // Copyright 2020 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include <algorithm> | ||
| 8 | #include <array> | ||
| 9 | #include <cstring> | ||
| 10 | #include <iterator> | ||
| 11 | #include <memory> | ||
| 12 | #include <mutex> | ||
| 13 | #include <optional> | ||
| 14 | #include <unordered_map> | ||
| 15 | #include <vector> | ||
| 16 | |||
| 17 | #include "common/assert.h" | ||
| 18 | #include "core/core.h" | ||
| 19 | #include "video_core/engines/maxwell_3d.h" | ||
| 20 | #include "video_core/gpu.h" | ||
| 21 | #include "video_core/memory_manager.h" | ||
| 22 | #include "video_core/rasterizer_interface.h" | ||
| 23 | |||
| 24 | namespace VideoCommon { | ||
| 25 | |||
| 26 | template <class QueryCache, class HostCounter> | ||
| 27 | class CounterStreamBase { | ||
| 28 | public: | ||
| 29 | explicit CounterStreamBase(QueryCache& cache, VideoCore::QueryType type) | ||
| 30 | : cache{cache}, type{type} {} | ||
| 31 | |||
| 32 | /// Updates the state of the stream, enabling or disabling as needed. | ||
| 33 | void Update(bool enabled) { | ||
| 34 | if (enabled) { | ||
| 35 | Enable(); | ||
| 36 | } else { | ||
| 37 | Disable(); | ||
| 38 | } | ||
| 39 | } | ||
| 40 | |||
| 41 | /// Resets the stream to zero. It doesn't disable the query after resetting. | ||
| 42 | void Reset() { | ||
| 43 | if (current) { | ||
| 44 | current->EndQuery(); | ||
| 45 | |||
| 46 | // Immediately start a new query to avoid disabling its state. | ||
| 47 | current = cache.Counter(nullptr, type); | ||
| 48 | } | ||
| 49 | last = nullptr; | ||
| 50 | } | ||
| 51 | |||
| 52 | /// Returns the current counter slicing as needed. | ||
| 53 | std::shared_ptr<HostCounter> Current() { | ||
| 54 | if (!current) { | ||
| 55 | return nullptr; | ||
| 56 | } | ||
| 57 | current->EndQuery(); | ||
| 58 | last = std::move(current); | ||
| 59 | current = cache.Counter(last, type); | ||
| 60 | return last; | ||
| 61 | } | ||
| 62 | |||
| 63 | /// Returns true when the counter stream is enabled. | ||
| 64 | bool IsEnabled() const { | ||
| 65 | return current != nullptr; | ||
| 66 | } | ||
| 67 | |||
| 68 | private: | ||
| 69 | /// Enables the stream. | ||
| 70 | void Enable() { | ||
| 71 | if (current) { | ||
| 72 | return; | ||
| 73 | } | ||
| 74 | current = cache.Counter(last, type); | ||
| 75 | } | ||
| 76 | |||
| 77 | // Disables the stream. | ||
| 78 | void Disable() { | ||
| 79 | if (current) { | ||
| 80 | current->EndQuery(); | ||
| 81 | } | ||
| 82 | last = std::exchange(current, nullptr); | ||
| 83 | } | ||
| 84 | |||
| 85 | QueryCache& cache; | ||
| 86 | const VideoCore::QueryType type; | ||
| 87 | |||
| 88 | std::shared_ptr<HostCounter> current; | ||
| 89 | std::shared_ptr<HostCounter> last; | ||
| 90 | }; | ||
| 91 | |||
| 92 | template <class QueryCache, class CachedQuery, class CounterStream, class HostCounter, | ||
| 93 | class QueryPool> | ||
| 94 | class QueryCacheBase { | ||
| 95 | public: | ||
| 96 | explicit QueryCacheBase(Core::System& system, VideoCore::RasterizerInterface& rasterizer) | ||
| 97 | : system{system}, rasterizer{rasterizer}, streams{{CounterStream{ | ||
| 98 | static_cast<QueryCache&>(*this), | ||
| 99 | VideoCore::QueryType::SamplesPassed}}} {} | ||
| 100 | |||
| 101 | void InvalidateRegion(CacheAddr addr, std::size_t size) { | ||
| 102 | std::unique_lock lock{mutex}; | ||
| 103 | FlushAndRemoveRegion(addr, size); | ||
| 104 | } | ||
| 105 | |||
| 106 | void FlushRegion(CacheAddr addr, std::size_t size) { | ||
| 107 | std::unique_lock lock{mutex}; | ||
| 108 | FlushAndRemoveRegion(addr, size); | ||
| 109 | } | ||
| 110 | |||
| 111 | /** | ||
| 112 | * Records a query in GPU mapped memory, potentially marked with a timestamp. | ||
| 113 | * @param gpu_addr GPU address to flush to when the mapped memory is read. | ||
| 114 | * @param type Query type, e.g. SamplesPassed. | ||
| 115 | * @param timestamp Timestamp, when empty the flushed query is assumed to be short. | ||
| 116 | */ | ||
| 117 | void Query(GPUVAddr gpu_addr, VideoCore::QueryType type, std::optional<u64> timestamp) { | ||
| 118 | std::unique_lock lock{mutex}; | ||
| 119 | auto& memory_manager = system.GPU().MemoryManager(); | ||
| 120 | const auto host_ptr = memory_manager.GetPointer(gpu_addr); | ||
| 121 | |||
| 122 | CachedQuery* query = TryGet(ToCacheAddr(host_ptr)); | ||
| 123 | if (!query) { | ||
| 124 | const auto cpu_addr = memory_manager.GpuToCpuAddress(gpu_addr); | ||
| 125 | ASSERT_OR_EXECUTE(cpu_addr, return;); | ||
| 126 | |||
| 127 | query = Register(type, *cpu_addr, host_ptr, timestamp.has_value()); | ||
| 128 | } | ||
| 129 | |||
| 130 | query->BindCounter(Stream(type).Current(), timestamp); | ||
| 131 | } | ||
| 132 | |||
| 133 | /// Updates counters from GPU state. Expected to be called once per draw, clear or dispatch. | ||
| 134 | void UpdateCounters() { | ||
| 135 | std::unique_lock lock{mutex}; | ||
| 136 | const auto& regs = system.GPU().Maxwell3D().regs; | ||
| 137 | Stream(VideoCore::QueryType::SamplesPassed).Update(regs.samplecnt_enable); | ||
| 138 | } | ||
| 139 | |||
| 140 | /// Resets a counter to zero. It doesn't disable the query after resetting. | ||
| 141 | void ResetCounter(VideoCore::QueryType type) { | ||
| 142 | std::unique_lock lock{mutex}; | ||
| 143 | Stream(type).Reset(); | ||
| 144 | } | ||
| 145 | |||
| 146 | /// Disable all active streams. Expected to be called at the end of a command buffer. | ||
| 147 | void DisableStreams() { | ||
| 148 | std::unique_lock lock{mutex}; | ||
| 149 | for (auto& stream : streams) { | ||
| 150 | stream.Update(false); | ||
| 151 | } | ||
| 152 | } | ||
| 153 | |||
| 154 | /// Returns a new host counter. | ||
| 155 | std::shared_ptr<HostCounter> Counter(std::shared_ptr<HostCounter> dependency, | ||
| 156 | VideoCore::QueryType type) { | ||
| 157 | return std::make_shared<HostCounter>(static_cast<QueryCache&>(*this), std::move(dependency), | ||
| 158 | type); | ||
| 159 | } | ||
| 160 | |||
| 161 | /// Returns the counter stream of the specified type. | ||
| 162 | CounterStream& Stream(VideoCore::QueryType type) { | ||
| 163 | return streams[static_cast<std::size_t>(type)]; | ||
| 164 | } | ||
| 165 | |||
| 166 | /// Returns the counter stream of the specified type. | ||
| 167 | const CounterStream& Stream(VideoCore::QueryType type) const { | ||
| 168 | return streams[static_cast<std::size_t>(type)]; | ||
| 169 | } | ||
| 170 | |||
| 171 | protected: | ||
| 172 | std::array<QueryPool, VideoCore::NumQueryTypes> query_pools; | ||
| 173 | |||
| 174 | private: | ||
| 175 | /// Flushes a memory range to guest memory and removes it from the cache. | ||
| 176 | void FlushAndRemoveRegion(CacheAddr addr, std::size_t size) { | ||
| 177 | const u64 addr_begin = static_cast<u64>(addr); | ||
| 178 | const u64 addr_end = addr_begin + static_cast<u64>(size); | ||
| 179 | const auto in_range = [addr_begin, addr_end](CachedQuery& query) { | ||
| 180 | const u64 cache_begin = query.GetCacheAddr(); | ||
| 181 | const u64 cache_end = cache_begin + query.SizeInBytes(); | ||
| 182 | return cache_begin < addr_end && addr_begin < cache_end; | ||
| 183 | }; | ||
| 184 | |||
| 185 | const u64 page_end = addr_end >> PAGE_SHIFT; | ||
| 186 | for (u64 page = addr_begin >> PAGE_SHIFT; page <= page_end; ++page) { | ||
| 187 | const auto& it = cached_queries.find(page); | ||
| 188 | if (it == std::end(cached_queries)) { | ||
| 189 | continue; | ||
| 190 | } | ||
| 191 | auto& contents = it->second; | ||
| 192 | for (auto& query : contents) { | ||
| 193 | if (!in_range(query)) { | ||
| 194 | continue; | ||
| 195 | } | ||
| 196 | rasterizer.UpdatePagesCachedCount(query.CpuAddr(), query.SizeInBytes(), -1); | ||
| 197 | query.Flush(); | ||
| 198 | } | ||
| 199 | contents.erase(std::remove_if(std::begin(contents), std::end(contents), in_range), | ||
| 200 | std::end(contents)); | ||
| 201 | } | ||
| 202 | } | ||
| 203 | |||
| 204 | /// Registers the passed parameters as cached and returns a pointer to the stored cached query. | ||
| 205 | CachedQuery* Register(VideoCore::QueryType type, VAddr cpu_addr, u8* host_ptr, bool timestamp) { | ||
| 206 | rasterizer.UpdatePagesCachedCount(cpu_addr, CachedQuery::SizeInBytes(timestamp), 1); | ||
| 207 | const u64 page = static_cast<u64>(ToCacheAddr(host_ptr)) >> PAGE_SHIFT; | ||
| 208 | return &cached_queries[page].emplace_back(static_cast<QueryCache&>(*this), type, cpu_addr, | ||
| 209 | host_ptr); | ||
| 210 | } | ||
| 211 | |||
| 212 | /// Tries to a get a cached query. Returns nullptr on failure. | ||
| 213 | CachedQuery* TryGet(CacheAddr addr) { | ||
| 214 | const u64 page = static_cast<u64>(addr) >> PAGE_SHIFT; | ||
| 215 | const auto it = cached_queries.find(page); | ||
| 216 | if (it == std::end(cached_queries)) { | ||
| 217 | return nullptr; | ||
| 218 | } | ||
| 219 | auto& contents = it->second; | ||
| 220 | const auto found = | ||
| 221 | std::find_if(std::begin(contents), std::end(contents), | ||
| 222 | [addr](auto& query) { return query.GetCacheAddr() == addr; }); | ||
| 223 | return found != std::end(contents) ? &*found : nullptr; | ||
| 224 | } | ||
| 225 | |||
| 226 | static constexpr std::uintptr_t PAGE_SIZE = 4096; | ||
| 227 | static constexpr unsigned PAGE_SHIFT = 12; | ||
| 228 | |||
| 229 | Core::System& system; | ||
| 230 | VideoCore::RasterizerInterface& rasterizer; | ||
| 231 | |||
| 232 | std::recursive_mutex mutex; | ||
| 233 | |||
| 234 | std::unordered_map<u64, std::vector<CachedQuery>> cached_queries; | ||
| 235 | |||
| 236 | std::array<CounterStream, VideoCore::NumQueryTypes> streams; | ||
| 237 | }; | ||
| 238 | |||
| 239 | template <class QueryCache, class HostCounter> | ||
| 240 | class HostCounterBase { | ||
| 241 | public: | ||
| 242 | explicit HostCounterBase(std::shared_ptr<HostCounter> dependency_) | ||
| 243 | : dependency{std::move(dependency_)}, depth{dependency ? (dependency->Depth() + 1) : 0} { | ||
| 244 | // Avoid nesting too many dependencies to avoid a stack overflow when these are deleted. | ||
| 245 | constexpr u64 depth_threshold = 96; | ||
| 246 | if (depth > depth_threshold) { | ||
| 247 | depth = 0; | ||
| 248 | base_result = dependency->Query(); | ||
| 249 | dependency = nullptr; | ||
| 250 | } | ||
| 251 | } | ||
| 252 | virtual ~HostCounterBase() = default; | ||
| 253 | |||
| 254 | /// Returns the current value of the query. | ||
| 255 | u64 Query() { | ||
| 256 | if (result) { | ||
| 257 | return *result; | ||
| 258 | } | ||
| 259 | |||
| 260 | u64 value = BlockingQuery() + base_result; | ||
| 261 | if (dependency) { | ||
| 262 | value += dependency->Query(); | ||
| 263 | dependency = nullptr; | ||
| 264 | } | ||
| 265 | |||
| 266 | result = value; | ||
| 267 | return *result; | ||
| 268 | } | ||
| 269 | |||
| 270 | /// Returns true when flushing this query will potentially wait. | ||
| 271 | bool WaitPending() const noexcept { | ||
| 272 | return result.has_value(); | ||
| 273 | } | ||
| 274 | |||
| 275 | u64 Depth() const noexcept { | ||
| 276 | return depth; | ||
| 277 | } | ||
| 278 | |||
| 279 | protected: | ||
| 280 | /// Returns the value of query from the backend API blocking as needed. | ||
| 281 | virtual u64 BlockingQuery() const = 0; | ||
| 282 | |||
| 283 | private: | ||
| 284 | std::shared_ptr<HostCounter> dependency; ///< Counter to add to this value. | ||
| 285 | std::optional<u64> result; ///< Filled with the already returned value. | ||
| 286 | u64 depth; ///< Number of nested dependencies. | ||
| 287 | u64 base_result = 0; ///< Equivalent to nested dependencies value. | ||
| 288 | }; | ||
| 289 | |||
| 290 | template <class HostCounter> | ||
| 291 | class CachedQueryBase { | ||
| 292 | public: | ||
| 293 | explicit CachedQueryBase(VAddr cpu_addr, u8* host_ptr) | ||
| 294 | : cpu_addr{cpu_addr}, host_ptr{host_ptr} {} | ||
| 295 | virtual ~CachedQueryBase() = default; | ||
| 296 | |||
| 297 | CachedQueryBase(CachedQueryBase&&) noexcept = default; | ||
| 298 | CachedQueryBase(const CachedQueryBase&) = delete; | ||
| 299 | |||
| 300 | CachedQueryBase& operator=(CachedQueryBase&&) noexcept = default; | ||
| 301 | CachedQueryBase& operator=(const CachedQueryBase&) = delete; | ||
| 302 | |||
| 303 | /// Flushes the query to guest memory. | ||
| 304 | virtual void Flush() { | ||
| 305 | // When counter is nullptr it means that it's just been reseted. We are supposed to write a | ||
| 306 | // zero in these cases. | ||
| 307 | const u64 value = counter ? counter->Query() : 0; | ||
| 308 | std::memcpy(host_ptr, &value, sizeof(u64)); | ||
| 309 | |||
| 310 | if (timestamp) { | ||
| 311 | std::memcpy(host_ptr + TIMESTAMP_OFFSET, &*timestamp, sizeof(u64)); | ||
| 312 | } | ||
| 313 | } | ||
| 314 | |||
| 315 | /// Binds a counter to this query. | ||
| 316 | void BindCounter(std::shared_ptr<HostCounter> counter_, std::optional<u64> timestamp_) { | ||
| 317 | if (counter) { | ||
| 318 | // If there's an old counter set it means the query is being rewritten by the game. | ||
| 319 | // To avoid losing the data forever, flush here. | ||
| 320 | Flush(); | ||
| 321 | } | ||
| 322 | counter = std::move(counter_); | ||
| 323 | timestamp = timestamp_; | ||
| 324 | } | ||
| 325 | |||
| 326 | VAddr CpuAddr() const noexcept { | ||
| 327 | return cpu_addr; | ||
| 328 | } | ||
| 329 | |||
| 330 | CacheAddr GetCacheAddr() const noexcept { | ||
| 331 | return ToCacheAddr(host_ptr); | ||
| 332 | } | ||
| 333 | |||
| 334 | u64 SizeInBytes() const noexcept { | ||
| 335 | return SizeInBytes(timestamp.has_value()); | ||
| 336 | } | ||
| 337 | |||
| 338 | static constexpr u64 SizeInBytes(bool with_timestamp) noexcept { | ||
| 339 | return with_timestamp ? LARGE_QUERY_SIZE : SMALL_QUERY_SIZE; | ||
| 340 | } | ||
| 341 | |||
| 342 | protected: | ||
| 343 | /// Returns true when querying the counter may potentially block. | ||
| 344 | bool WaitPending() const noexcept { | ||
| 345 | return counter && counter->WaitPending(); | ||
| 346 | } | ||
| 347 | |||
| 348 | private: | ||
| 349 | static constexpr std::size_t SMALL_QUERY_SIZE = 8; // Query size without timestamp. | ||
| 350 | static constexpr std::size_t LARGE_QUERY_SIZE = 16; // Query size with timestamp. | ||
| 351 | static constexpr std::intptr_t TIMESTAMP_OFFSET = 8; // Timestamp offset in a large query. | ||
| 352 | |||
| 353 | VAddr cpu_addr; ///< Guest CPU address. | ||
| 354 | u8* host_ptr; ///< Writable host pointer. | ||
| 355 | std::shared_ptr<HostCounter> counter; ///< Host counter to query, owns the dependency tree. | ||
| 356 | std::optional<u64> timestamp; ///< Timestamp to flush to guest memory. | ||
| 357 | }; | ||
| 358 | |||
| 359 | } // namespace VideoCommon | ||
diff --git a/src/video_core/rasterizer_interface.h b/src/video_core/rasterizer_interface.h index a8fc66711..f18eaf4bc 100644 --- a/src/video_core/rasterizer_interface.h +++ b/src/video_core/rasterizer_interface.h | |||
| @@ -6,6 +6,7 @@ | |||
| 6 | 6 | ||
| 7 | #include <atomic> | 7 | #include <atomic> |
| 8 | #include <functional> | 8 | #include <functional> |
| 9 | #include <optional> | ||
| 9 | #include "common/common_types.h" | 10 | #include "common/common_types.h" |
| 10 | #include "video_core/engines/fermi_2d.h" | 11 | #include "video_core/engines/fermi_2d.h" |
| 11 | #include "video_core/gpu.h" | 12 | #include "video_core/gpu.h" |
| @@ -17,6 +18,11 @@ class MemoryManager; | |||
| 17 | 18 | ||
| 18 | namespace VideoCore { | 19 | namespace VideoCore { |
| 19 | 20 | ||
| 21 | enum class QueryType { | ||
| 22 | SamplesPassed, | ||
| 23 | }; | ||
| 24 | constexpr std::size_t NumQueryTypes = 1; | ||
| 25 | |||
| 20 | enum class LoadCallbackStage { | 26 | enum class LoadCallbackStage { |
| 21 | Prepare, | 27 | Prepare, |
| 22 | Decompile, | 28 | Decompile, |
| @@ -38,6 +44,12 @@ public: | |||
| 38 | /// Dispatches a compute shader invocation | 44 | /// Dispatches a compute shader invocation |
| 39 | virtual void DispatchCompute(GPUVAddr code_addr) = 0; | 45 | virtual void DispatchCompute(GPUVAddr code_addr) = 0; |
| 40 | 46 | ||
| 47 | /// Resets the counter of a query | ||
| 48 | virtual void ResetCounter(QueryType type) = 0; | ||
| 49 | |||
| 50 | /// Records a GPU query and caches it | ||
| 51 | virtual void Query(GPUVAddr gpu_addr, QueryType type, std::optional<u64> timestamp) = 0; | ||
| 52 | |||
| 41 | /// Notify rasterizer that all caches should be flushed to Switch memory | 53 | /// Notify rasterizer that all caches should be flushed to Switch memory |
| 42 | virtual void FlushAll() = 0; | 54 | virtual void FlushAll() = 0; |
| 43 | 55 | ||
diff --git a/src/video_core/renderer_opengl/gl_query_cache.cpp b/src/video_core/renderer_opengl/gl_query_cache.cpp new file mode 100644 index 000000000..f12e9f55f --- /dev/null +++ b/src/video_core/renderer_opengl/gl_query_cache.cpp | |||
| @@ -0,0 +1,120 @@ | |||
| 1 | // Copyright 2019 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include <algorithm> | ||
| 6 | #include <cstring> | ||
| 7 | #include <memory> | ||
| 8 | #include <unordered_map> | ||
| 9 | #include <utility> | ||
| 10 | #include <vector> | ||
| 11 | |||
| 12 | #include <glad/glad.h> | ||
| 13 | |||
| 14 | #include "common/assert.h" | ||
| 15 | #include "core/core.h" | ||
| 16 | #include "video_core/engines/maxwell_3d.h" | ||
| 17 | #include "video_core/memory_manager.h" | ||
| 18 | #include "video_core/renderer_opengl/gl_query_cache.h" | ||
| 19 | #include "video_core/renderer_opengl/gl_rasterizer.h" | ||
| 20 | |||
| 21 | namespace OpenGL { | ||
| 22 | |||
| 23 | namespace { | ||
| 24 | |||
| 25 | constexpr std::array<GLenum, VideoCore::NumQueryTypes> QueryTargets = {GL_SAMPLES_PASSED}; | ||
| 26 | |||
| 27 | constexpr GLenum GetTarget(VideoCore::QueryType type) { | ||
| 28 | return QueryTargets[static_cast<std::size_t>(type)]; | ||
| 29 | } | ||
| 30 | |||
| 31 | } // Anonymous namespace | ||
| 32 | |||
| 33 | QueryCache::QueryCache(Core::System& system, RasterizerOpenGL& gl_rasterizer) | ||
| 34 | : VideoCommon::QueryCacheBase< | ||
| 35 | QueryCache, CachedQuery, CounterStream, HostCounter, | ||
| 36 | std::vector<OGLQuery>>{system, | ||
| 37 | static_cast<VideoCore::RasterizerInterface&>(gl_rasterizer)}, | ||
| 38 | gl_rasterizer{gl_rasterizer} {} | ||
| 39 | |||
| 40 | QueryCache::~QueryCache() = default; | ||
| 41 | |||
| 42 | OGLQuery QueryCache::AllocateQuery(VideoCore::QueryType type) { | ||
| 43 | auto& reserve = query_pools[static_cast<std::size_t>(type)]; | ||
| 44 | OGLQuery query; | ||
| 45 | if (reserve.empty()) { | ||
| 46 | query.Create(GetTarget(type)); | ||
| 47 | return query; | ||
| 48 | } | ||
| 49 | |||
| 50 | query = std::move(reserve.back()); | ||
| 51 | reserve.pop_back(); | ||
| 52 | return query; | ||
| 53 | } | ||
| 54 | |||
| 55 | void QueryCache::Reserve(VideoCore::QueryType type, OGLQuery&& query) { | ||
| 56 | query_pools[static_cast<std::size_t>(type)].push_back(std::move(query)); | ||
| 57 | } | ||
| 58 | |||
| 59 | bool QueryCache::AnyCommandQueued() const noexcept { | ||
| 60 | return gl_rasterizer.AnyCommandQueued(); | ||
| 61 | } | ||
| 62 | |||
| 63 | HostCounter::HostCounter(QueryCache& cache, std::shared_ptr<HostCounter> dependency, | ||
| 64 | VideoCore::QueryType type) | ||
| 65 | : VideoCommon::HostCounterBase<QueryCache, HostCounter>{std::move(dependency)}, cache{cache}, | ||
| 66 | type{type}, query{cache.AllocateQuery(type)} { | ||
| 67 | glBeginQuery(GetTarget(type), query.handle); | ||
| 68 | } | ||
| 69 | |||
| 70 | HostCounter::~HostCounter() { | ||
| 71 | cache.Reserve(type, std::move(query)); | ||
| 72 | } | ||
| 73 | |||
| 74 | void HostCounter::EndQuery() { | ||
| 75 | if (!cache.AnyCommandQueued()) { | ||
| 76 | // There are chances a query waited on without commands (glDraw, glClear, glDispatch). Not | ||
| 77 | // having any of these causes a lock. glFlush is considered a command, so we can safely wait | ||
| 78 | // for this. Insert to the OpenGL command stream a flush. | ||
| 79 | glFlush(); | ||
| 80 | } | ||
| 81 | glEndQuery(GetTarget(type)); | ||
| 82 | } | ||
| 83 | |||
| 84 | u64 HostCounter::BlockingQuery() const { | ||
| 85 | GLint64 value; | ||
| 86 | glGetQueryObjecti64v(query.handle, GL_QUERY_RESULT, &value); | ||
| 87 | return static_cast<u64>(value); | ||
| 88 | } | ||
| 89 | |||
| 90 | CachedQuery::CachedQuery(QueryCache& cache, VideoCore::QueryType type, VAddr cpu_addr, u8* host_ptr) | ||
| 91 | : VideoCommon::CachedQueryBase<HostCounter>{cpu_addr, host_ptr}, cache{&cache}, type{type} {} | ||
| 92 | |||
| 93 | CachedQuery::CachedQuery(CachedQuery&& rhs) noexcept | ||
| 94 | : VideoCommon::CachedQueryBase<HostCounter>(std::move(rhs)), cache{rhs.cache}, type{rhs.type} {} | ||
| 95 | |||
| 96 | CachedQuery& CachedQuery::operator=(CachedQuery&& rhs) noexcept { | ||
| 97 | VideoCommon::CachedQueryBase<HostCounter>::operator=(std::move(rhs)); | ||
| 98 | cache = rhs.cache; | ||
| 99 | type = rhs.type; | ||
| 100 | return *this; | ||
| 101 | } | ||
| 102 | |||
| 103 | void CachedQuery::Flush() { | ||
| 104 | // Waiting for a query while another query of the same target is enabled locks Nvidia's driver. | ||
| 105 | // To avoid this disable and re-enable keeping the dependency stream. | ||
| 106 | // But we only have to do this if we have pending waits to be done. | ||
| 107 | auto& stream = cache->Stream(type); | ||
| 108 | const bool slice_counter = WaitPending() && stream.IsEnabled(); | ||
| 109 | if (slice_counter) { | ||
| 110 | stream.Update(false); | ||
| 111 | } | ||
| 112 | |||
| 113 | VideoCommon::CachedQueryBase<HostCounter>::Flush(); | ||
| 114 | |||
| 115 | if (slice_counter) { | ||
| 116 | stream.Update(true); | ||
| 117 | } | ||
| 118 | } | ||
| 119 | |||
| 120 | } // namespace OpenGL | ||
diff --git a/src/video_core/renderer_opengl/gl_query_cache.h b/src/video_core/renderer_opengl/gl_query_cache.h new file mode 100644 index 000000000..d8e7052a1 --- /dev/null +++ b/src/video_core/renderer_opengl/gl_query_cache.h | |||
| @@ -0,0 +1,78 @@ | |||
| 1 | // Copyright 2019 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include <array> | ||
| 8 | #include <memory> | ||
| 9 | #include <vector> | ||
| 10 | |||
| 11 | #include "common/common_types.h" | ||
| 12 | #include "video_core/query_cache.h" | ||
| 13 | #include "video_core/rasterizer_interface.h" | ||
| 14 | #include "video_core/renderer_opengl/gl_resource_manager.h" | ||
| 15 | |||
| 16 | namespace Core { | ||
| 17 | class System; | ||
| 18 | } | ||
| 19 | |||
| 20 | namespace OpenGL { | ||
| 21 | |||
| 22 | class CachedQuery; | ||
| 23 | class HostCounter; | ||
| 24 | class QueryCache; | ||
| 25 | class RasterizerOpenGL; | ||
| 26 | |||
| 27 | using CounterStream = VideoCommon::CounterStreamBase<QueryCache, HostCounter>; | ||
| 28 | |||
| 29 | class QueryCache final : public VideoCommon::QueryCacheBase<QueryCache, CachedQuery, CounterStream, | ||
| 30 | HostCounter, std::vector<OGLQuery>> { | ||
| 31 | public: | ||
| 32 | explicit QueryCache(Core::System& system, RasterizerOpenGL& rasterizer); | ||
| 33 | ~QueryCache(); | ||
| 34 | |||
| 35 | OGLQuery AllocateQuery(VideoCore::QueryType type); | ||
| 36 | |||
| 37 | void Reserve(VideoCore::QueryType type, OGLQuery&& query); | ||
| 38 | |||
| 39 | bool AnyCommandQueued() const noexcept; | ||
| 40 | |||
| 41 | private: | ||
| 42 | RasterizerOpenGL& gl_rasterizer; | ||
| 43 | }; | ||
| 44 | |||
| 45 | class HostCounter final : public VideoCommon::HostCounterBase<QueryCache, HostCounter> { | ||
| 46 | public: | ||
| 47 | explicit HostCounter(QueryCache& cache, std::shared_ptr<HostCounter> dependency, | ||
| 48 | VideoCore::QueryType type); | ||
| 49 | ~HostCounter(); | ||
| 50 | |||
| 51 | void EndQuery(); | ||
| 52 | |||
| 53 | private: | ||
| 54 | u64 BlockingQuery() const override; | ||
| 55 | |||
| 56 | QueryCache& cache; | ||
| 57 | const VideoCore::QueryType type; | ||
| 58 | OGLQuery query; | ||
| 59 | }; | ||
| 60 | |||
| 61 | class CachedQuery final : public VideoCommon::CachedQueryBase<HostCounter> { | ||
| 62 | public: | ||
| 63 | explicit CachedQuery(QueryCache& cache, VideoCore::QueryType type, VAddr cpu_addr, | ||
| 64 | u8* host_ptr); | ||
| 65 | CachedQuery(CachedQuery&& rhs) noexcept; | ||
| 66 | CachedQuery(const CachedQuery&) = delete; | ||
| 67 | |||
| 68 | CachedQuery& operator=(CachedQuery&& rhs) noexcept; | ||
| 69 | CachedQuery& operator=(const CachedQuery&) = delete; | ||
| 70 | |||
| 71 | void Flush() override; | ||
| 72 | |||
| 73 | private: | ||
| 74 | QueryCache* cache; | ||
| 75 | VideoCore::QueryType type; | ||
| 76 | }; | ||
| 77 | |||
| 78 | } // namespace OpenGL | ||
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index 048d43b89..e1965fb21 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp | |||
| @@ -25,6 +25,7 @@ | |||
| 25 | #include "video_core/engines/maxwell_3d.h" | 25 | #include "video_core/engines/maxwell_3d.h" |
| 26 | #include "video_core/engines/shader_type.h" | 26 | #include "video_core/engines/shader_type.h" |
| 27 | #include "video_core/memory_manager.h" | 27 | #include "video_core/memory_manager.h" |
| 28 | #include "video_core/renderer_opengl/gl_query_cache.h" | ||
| 28 | #include "video_core/renderer_opengl/gl_rasterizer.h" | 29 | #include "video_core/renderer_opengl/gl_rasterizer.h" |
| 29 | #include "video_core/renderer_opengl/gl_shader_cache.h" | 30 | #include "video_core/renderer_opengl/gl_shader_cache.h" |
| 30 | #include "video_core/renderer_opengl/gl_shader_gen.h" | 31 | #include "video_core/renderer_opengl/gl_shader_gen.h" |
| @@ -92,8 +93,8 @@ std::size_t GetConstBufferSize(const Tegra::Engines::ConstBufferInfo& buffer, | |||
| 92 | RasterizerOpenGL::RasterizerOpenGL(Core::System& system, Core::Frontend::EmuWindow& emu_window, | 93 | RasterizerOpenGL::RasterizerOpenGL(Core::System& system, Core::Frontend::EmuWindow& emu_window, |
| 93 | ScreenInfo& info) | 94 | ScreenInfo& info) |
| 94 | : RasterizerAccelerated{system.Memory()}, texture_cache{system, *this, device}, | 95 | : RasterizerAccelerated{system.Memory()}, texture_cache{system, *this, device}, |
| 95 | shader_cache{*this, system, emu_window, device}, system{system}, screen_info{info}, | 96 | shader_cache{*this, system, emu_window, device}, query_cache{system, *this}, system{system}, |
| 96 | buffer_cache{*this, system, device, STREAM_BUFFER_SIZE} { | 97 | screen_info{info}, buffer_cache{*this, system, device, STREAM_BUFFER_SIZE} { |
| 97 | shader_program_manager = std::make_unique<GLShader::ProgramManager>(); | 98 | shader_program_manager = std::make_unique<GLShader::ProgramManager>(); |
| 98 | state.draw.shader_program = 0; | 99 | state.draw.shader_program = 0; |
| 99 | state.Apply(); | 100 | state.Apply(); |
| @@ -541,11 +542,16 @@ void RasterizerOpenGL::Clear() { | |||
| 541 | } else if (use_stencil) { | 542 | } else if (use_stencil) { |
| 542 | glClearBufferiv(GL_STENCIL, 0, ®s.clear_stencil); | 543 | glClearBufferiv(GL_STENCIL, 0, ®s.clear_stencil); |
| 543 | } | 544 | } |
| 545 | |||
| 546 | ++num_queued_commands; | ||
| 544 | } | 547 | } |
| 545 | 548 | ||
| 546 | void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) { | 549 | void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) { |
| 547 | MICROPROFILE_SCOPE(OpenGL_Drawing); | 550 | MICROPROFILE_SCOPE(OpenGL_Drawing); |
| 548 | auto& gpu = system.GPU().Maxwell3D(); | 551 | auto& gpu = system.GPU().Maxwell3D(); |
| 552 | const auto& regs = gpu.regs; | ||
| 553 | |||
| 554 | query_cache.UpdateCounters(); | ||
| 549 | 555 | ||
| 550 | SyncRasterizeEnable(state); | 556 | SyncRasterizeEnable(state); |
| 551 | SyncColorMask(); | 557 | SyncColorMask(); |
| @@ -611,7 +617,7 @@ void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) { | |||
| 611 | 617 | ||
| 612 | // Setup shaders and their used resources. | 618 | // Setup shaders and their used resources. |
| 613 | texture_cache.GuardSamplers(true); | 619 | texture_cache.GuardSamplers(true); |
| 614 | const auto primitive_mode = MaxwellToGL::PrimitiveTopology(gpu.regs.draw.topology); | 620 | const GLenum primitive_mode = MaxwellToGL::PrimitiveTopology(gpu.regs.draw.topology); |
| 615 | SetupShaders(primitive_mode); | 621 | SetupShaders(primitive_mode); |
| 616 | texture_cache.GuardSamplers(false); | 622 | texture_cache.GuardSamplers(false); |
| 617 | 623 | ||
| @@ -638,22 +644,44 @@ void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) { | |||
| 638 | glTextureBarrier(); | 644 | glTextureBarrier(); |
| 639 | } | 645 | } |
| 640 | 646 | ||
| 647 | ++num_queued_commands; | ||
| 648 | |||
| 641 | const GLuint base_instance = static_cast<GLuint>(gpu.regs.vb_base_instance); | 649 | const GLuint base_instance = static_cast<GLuint>(gpu.regs.vb_base_instance); |
| 642 | const GLsizei num_instances = | 650 | const GLsizei num_instances = |
| 643 | static_cast<GLsizei>(is_instanced ? gpu.mme_draw.instance_count : 1); | 651 | static_cast<GLsizei>(is_instanced ? gpu.mme_draw.instance_count : 1); |
| 644 | if (is_indexed) { | 652 | if (is_indexed) { |
| 645 | const GLenum index_format = MaxwellToGL::IndexFormat(gpu.regs.index_array.format); | ||
| 646 | const GLint base_vertex = static_cast<GLint>(gpu.regs.vb_element_base); | 653 | const GLint base_vertex = static_cast<GLint>(gpu.regs.vb_element_base); |
| 647 | const GLsizei num_vertices = static_cast<GLsizei>(gpu.regs.index_array.count); | 654 | const GLsizei num_vertices = static_cast<GLsizei>(gpu.regs.index_array.count); |
| 648 | glDrawElementsInstancedBaseVertexBaseInstance( | 655 | const GLvoid* offset = reinterpret_cast<const GLvoid*>(index_buffer_offset); |
| 649 | primitive_mode, num_vertices, index_format, | 656 | const GLenum format = MaxwellToGL::IndexFormat(gpu.regs.index_array.format); |
| 650 | reinterpret_cast<const void*>(index_buffer_offset), num_instances, base_vertex, | 657 | if (num_instances == 1 && base_instance == 0 && base_vertex == 0) { |
| 651 | base_instance); | 658 | glDrawElements(primitive_mode, num_vertices, format, offset); |
| 659 | } else if (num_instances == 1 && base_instance == 0) { | ||
| 660 | glDrawElementsBaseVertex(primitive_mode, num_vertices, format, offset, base_vertex); | ||
| 661 | } else if (base_vertex == 0 && base_instance == 0) { | ||
| 662 | glDrawElementsInstanced(primitive_mode, num_vertices, format, offset, num_instances); | ||
| 663 | } else if (base_vertex == 0) { | ||
| 664 | glDrawElementsInstancedBaseInstance(primitive_mode, num_vertices, format, offset, | ||
| 665 | num_instances, base_instance); | ||
| 666 | } else if (base_instance == 0) { | ||
| 667 | glDrawElementsInstancedBaseVertex(primitive_mode, num_vertices, format, offset, | ||
| 668 | num_instances, base_vertex); | ||
| 669 | } else { | ||
| 670 | glDrawElementsInstancedBaseVertexBaseInstance(primitive_mode, num_vertices, format, | ||
| 671 | offset, num_instances, base_vertex, | ||
| 672 | base_instance); | ||
| 673 | } | ||
| 652 | } else { | 674 | } else { |
| 653 | const GLint base_vertex = static_cast<GLint>(gpu.regs.vertex_buffer.first); | 675 | const GLint base_vertex = static_cast<GLint>(gpu.regs.vertex_buffer.first); |
| 654 | const GLsizei num_vertices = static_cast<GLsizei>(gpu.regs.vertex_buffer.count); | 676 | const GLsizei num_vertices = static_cast<GLsizei>(gpu.regs.vertex_buffer.count); |
| 655 | glDrawArraysInstancedBaseInstance(primitive_mode, base_vertex, num_vertices, num_instances, | 677 | if (num_instances == 1 && base_instance == 0) { |
| 656 | base_instance); | 678 | glDrawArrays(primitive_mode, base_vertex, num_vertices); |
| 679 | } else if (base_instance == 0) { | ||
| 680 | glDrawArraysInstanced(primitive_mode, base_vertex, num_vertices, num_instances); | ||
| 681 | } else { | ||
| 682 | glDrawArraysInstancedBaseInstance(primitive_mode, base_vertex, num_vertices, | ||
| 683 | num_instances, base_instance); | ||
| 684 | } | ||
| 657 | } | 685 | } |
| 658 | } | 686 | } |
| 659 | 687 | ||
| @@ -697,6 +725,16 @@ void RasterizerOpenGL::DispatchCompute(GPUVAddr code_addr) { | |||
| 697 | state.ApplyProgramPipeline(); | 725 | state.ApplyProgramPipeline(); |
| 698 | 726 | ||
| 699 | glDispatchCompute(launch_desc.grid_dim_x, launch_desc.grid_dim_y, launch_desc.grid_dim_z); | 727 | glDispatchCompute(launch_desc.grid_dim_x, launch_desc.grid_dim_y, launch_desc.grid_dim_z); |
| 728 | ++num_queued_commands; | ||
| 729 | } | ||
| 730 | |||
| 731 | void RasterizerOpenGL::ResetCounter(VideoCore::QueryType type) { | ||
| 732 | query_cache.ResetCounter(type); | ||
| 733 | } | ||
| 734 | |||
| 735 | void RasterizerOpenGL::Query(GPUVAddr gpu_addr, VideoCore::QueryType type, | ||
| 736 | std::optional<u64> timestamp) { | ||
| 737 | query_cache.Query(gpu_addr, type, timestamp); | ||
| 700 | } | 738 | } |
| 701 | 739 | ||
| 702 | void RasterizerOpenGL::FlushAll() {} | 740 | void RasterizerOpenGL::FlushAll() {} |
| @@ -708,6 +746,7 @@ void RasterizerOpenGL::FlushRegion(CacheAddr addr, u64 size) { | |||
| 708 | } | 746 | } |
| 709 | texture_cache.FlushRegion(addr, size); | 747 | texture_cache.FlushRegion(addr, size); |
| 710 | buffer_cache.FlushRegion(addr, size); | 748 | buffer_cache.FlushRegion(addr, size); |
| 749 | query_cache.FlushRegion(addr, size); | ||
| 711 | } | 750 | } |
| 712 | 751 | ||
| 713 | void RasterizerOpenGL::InvalidateRegion(CacheAddr addr, u64 size) { | 752 | void RasterizerOpenGL::InvalidateRegion(CacheAddr addr, u64 size) { |
| @@ -718,6 +757,7 @@ void RasterizerOpenGL::InvalidateRegion(CacheAddr addr, u64 size) { | |||
| 718 | texture_cache.InvalidateRegion(addr, size); | 757 | texture_cache.InvalidateRegion(addr, size); |
| 719 | shader_cache.InvalidateRegion(addr, size); | 758 | shader_cache.InvalidateRegion(addr, size); |
| 720 | buffer_cache.InvalidateRegion(addr, size); | 759 | buffer_cache.InvalidateRegion(addr, size); |
| 760 | query_cache.InvalidateRegion(addr, size); | ||
| 721 | } | 761 | } |
| 722 | 762 | ||
| 723 | void RasterizerOpenGL::FlushAndInvalidateRegion(CacheAddr addr, u64 size) { | 763 | void RasterizerOpenGL::FlushAndInvalidateRegion(CacheAddr addr, u64 size) { |
| @@ -728,10 +768,18 @@ void RasterizerOpenGL::FlushAndInvalidateRegion(CacheAddr addr, u64 size) { | |||
| 728 | } | 768 | } |
| 729 | 769 | ||
| 730 | void RasterizerOpenGL::FlushCommands() { | 770 | void RasterizerOpenGL::FlushCommands() { |
| 771 | // Only flush when we have commands queued to OpenGL. | ||
| 772 | if (num_queued_commands == 0) { | ||
| 773 | return; | ||
| 774 | } | ||
| 775 | num_queued_commands = 0; | ||
| 731 | glFlush(); | 776 | glFlush(); |
| 732 | } | 777 | } |
| 733 | 778 | ||
| 734 | void RasterizerOpenGL::TickFrame() { | 779 | void RasterizerOpenGL::TickFrame() { |
| 780 | // Ticking a frame means that buffers will be swapped, calling glFlush implicitly. | ||
| 781 | num_queued_commands = 0; | ||
| 782 | |||
| 735 | buffer_cache.TickFrame(); | 783 | buffer_cache.TickFrame(); |
| 736 | } | 784 | } |
| 737 | 785 | ||
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h index bc28a3bcf..68abe9a21 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.h +++ b/src/video_core/renderer_opengl/gl_rasterizer.h | |||
| @@ -24,6 +24,7 @@ | |||
| 24 | #include "video_core/renderer_opengl/gl_buffer_cache.h" | 24 | #include "video_core/renderer_opengl/gl_buffer_cache.h" |
| 25 | #include "video_core/renderer_opengl/gl_device.h" | 25 | #include "video_core/renderer_opengl/gl_device.h" |
| 26 | #include "video_core/renderer_opengl/gl_framebuffer_cache.h" | 26 | #include "video_core/renderer_opengl/gl_framebuffer_cache.h" |
| 27 | #include "video_core/renderer_opengl/gl_query_cache.h" | ||
| 27 | #include "video_core/renderer_opengl/gl_resource_manager.h" | 28 | #include "video_core/renderer_opengl/gl_resource_manager.h" |
| 28 | #include "video_core/renderer_opengl/gl_sampler_cache.h" | 29 | #include "video_core/renderer_opengl/gl_sampler_cache.h" |
| 29 | #include "video_core/renderer_opengl/gl_shader_cache.h" | 30 | #include "video_core/renderer_opengl/gl_shader_cache.h" |
| @@ -60,6 +61,8 @@ public: | |||
| 60 | void Draw(bool is_indexed, bool is_instanced) override; | 61 | void Draw(bool is_indexed, bool is_instanced) override; |
| 61 | void Clear() override; | 62 | void Clear() override; |
| 62 | void DispatchCompute(GPUVAddr code_addr) override; | 63 | void DispatchCompute(GPUVAddr code_addr) override; |
| 64 | void ResetCounter(VideoCore::QueryType type) override; | ||
| 65 | void Query(GPUVAddr gpu_addr, VideoCore::QueryType type, std::optional<u64> timestamp) override; | ||
| 63 | void FlushAll() override; | 66 | void FlushAll() override; |
| 64 | void FlushRegion(CacheAddr addr, u64 size) override; | 67 | void FlushRegion(CacheAddr addr, u64 size) override; |
| 65 | void InvalidateRegion(CacheAddr addr, u64 size) override; | 68 | void InvalidateRegion(CacheAddr addr, u64 size) override; |
| @@ -74,6 +77,11 @@ public: | |||
| 74 | void LoadDiskResources(const std::atomic_bool& stop_loading, | 77 | void LoadDiskResources(const std::atomic_bool& stop_loading, |
| 75 | const VideoCore::DiskResourceLoadCallback& callback) override; | 78 | const VideoCore::DiskResourceLoadCallback& callback) override; |
| 76 | 79 | ||
| 80 | /// Returns true when there are commands queued to the OpenGL server. | ||
| 81 | bool AnyCommandQueued() const { | ||
| 82 | return num_queued_commands > 0; | ||
| 83 | } | ||
| 84 | |||
| 77 | private: | 85 | private: |
| 78 | /// Configures the color and depth framebuffer states. | 86 | /// Configures the color and depth framebuffer states. |
| 79 | void ConfigureFramebuffers(); | 87 | void ConfigureFramebuffers(); |
| @@ -176,10 +184,23 @@ private: | |||
| 176 | /// Syncs the alpha test state to match the guest state | 184 | /// Syncs the alpha test state to match the guest state |
| 177 | void SyncAlphaTest(); | 185 | void SyncAlphaTest(); |
| 178 | 186 | ||
| 179 | /// Check for extension that are not strictly required | 187 | /// Check for extension that are not strictly required but are needed for correct emulation |
| 180 | /// but are needed for correct emulation | ||
| 181 | void CheckExtensions(); | 188 | void CheckExtensions(); |
| 182 | 189 | ||
| 190 | std::size_t CalculateVertexArraysSize() const; | ||
| 191 | |||
| 192 | std::size_t CalculateIndexBufferSize() const; | ||
| 193 | |||
| 194 | /// Updates and returns a vertex array object representing current vertex format | ||
| 195 | GLuint SetupVertexFormat(); | ||
| 196 | |||
| 197 | void SetupVertexBuffer(GLuint vao); | ||
| 198 | void SetupVertexInstances(GLuint vao); | ||
| 199 | |||
| 200 | GLintptr SetupIndexBuffer(); | ||
| 201 | |||
| 202 | void SetupShaders(GLenum primitive_mode); | ||
| 203 | |||
| 183 | const Device device; | 204 | const Device device; |
| 184 | OpenGLState state; | 205 | OpenGLState state; |
| 185 | 206 | ||
| @@ -187,6 +208,7 @@ private: | |||
| 187 | ShaderCacheOpenGL shader_cache; | 208 | ShaderCacheOpenGL shader_cache; |
| 188 | SamplerCacheOpenGL sampler_cache; | 209 | SamplerCacheOpenGL sampler_cache; |
| 189 | FramebufferCacheOpenGL framebuffer_cache; | 210 | FramebufferCacheOpenGL framebuffer_cache; |
| 211 | QueryCache query_cache; | ||
| 190 | 212 | ||
| 191 | Core::System& system; | 213 | Core::System& system; |
| 192 | ScreenInfo& screen_info; | 214 | ScreenInfo& screen_info; |
| @@ -204,19 +226,8 @@ private: | |||
| 204 | BindBuffersRangePushBuffer bind_ubo_pushbuffer{GL_UNIFORM_BUFFER}; | 226 | BindBuffersRangePushBuffer bind_ubo_pushbuffer{GL_UNIFORM_BUFFER}; |
| 205 | BindBuffersRangePushBuffer bind_ssbo_pushbuffer{GL_SHADER_STORAGE_BUFFER}; | 227 | BindBuffersRangePushBuffer bind_ssbo_pushbuffer{GL_SHADER_STORAGE_BUFFER}; |
| 206 | 228 | ||
| 207 | std::size_t CalculateVertexArraysSize() const; | 229 | /// Number of commands queued to the OpenGL driver. Reseted on flush. |
| 208 | 230 | std::size_t num_queued_commands = 0; | |
| 209 | std::size_t CalculateIndexBufferSize() const; | ||
| 210 | |||
| 211 | /// Updates and returns a vertex array object representing current vertex format | ||
| 212 | GLuint SetupVertexFormat(); | ||
| 213 | |||
| 214 | void SetupVertexBuffer(GLuint vao); | ||
| 215 | void SetupVertexInstances(GLuint vao); | ||
| 216 | |||
| 217 | GLintptr SetupIndexBuffer(); | ||
| 218 | |||
| 219 | void SetupShaders(GLenum primitive_mode); | ||
| 220 | }; | 231 | }; |
| 221 | 232 | ||
| 222 | } // namespace OpenGL | 233 | } // namespace OpenGL |
diff --git a/src/video_core/renderer_opengl/gl_resource_manager.cpp b/src/video_core/renderer_opengl/gl_resource_manager.cpp index 5c96c1d46..f0ddfb276 100644 --- a/src/video_core/renderer_opengl/gl_resource_manager.cpp +++ b/src/video_core/renderer_opengl/gl_resource_manager.cpp | |||
| @@ -207,4 +207,21 @@ void OGLFramebuffer::Release() { | |||
| 207 | handle = 0; | 207 | handle = 0; |
| 208 | } | 208 | } |
| 209 | 209 | ||
| 210 | void OGLQuery::Create(GLenum target) { | ||
| 211 | if (handle != 0) | ||
| 212 | return; | ||
| 213 | |||
| 214 | MICROPROFILE_SCOPE(OpenGL_ResourceCreation); | ||
| 215 | glCreateQueries(target, 1, &handle); | ||
| 216 | } | ||
| 217 | |||
| 218 | void OGLQuery::Release() { | ||
| 219 | if (handle == 0) | ||
| 220 | return; | ||
| 221 | |||
| 222 | MICROPROFILE_SCOPE(OpenGL_ResourceDeletion); | ||
| 223 | glDeleteQueries(1, &handle); | ||
| 224 | handle = 0; | ||
| 225 | } | ||
| 226 | |||
| 210 | } // namespace OpenGL | 227 | } // namespace OpenGL |
diff --git a/src/video_core/renderer_opengl/gl_resource_manager.h b/src/video_core/renderer_opengl/gl_resource_manager.h index 3a85a1d4c..514d1d165 100644 --- a/src/video_core/renderer_opengl/gl_resource_manager.h +++ b/src/video_core/renderer_opengl/gl_resource_manager.h | |||
| @@ -266,4 +266,29 @@ public: | |||
| 266 | GLuint handle = 0; | 266 | GLuint handle = 0; |
| 267 | }; | 267 | }; |
| 268 | 268 | ||
| 269 | class OGLQuery : private NonCopyable { | ||
| 270 | public: | ||
| 271 | OGLQuery() = default; | ||
| 272 | |||
| 273 | OGLQuery(OGLQuery&& o) noexcept : handle(std::exchange(o.handle, 0)) {} | ||
| 274 | |||
| 275 | ~OGLQuery() { | ||
| 276 | Release(); | ||
| 277 | } | ||
| 278 | |||
| 279 | OGLQuery& operator=(OGLQuery&& o) noexcept { | ||
| 280 | Release(); | ||
| 281 | handle = std::exchange(o.handle, 0); | ||
| 282 | return *this; | ||
| 283 | } | ||
| 284 | |||
| 285 | /// Creates a new internal OpenGL resource and stores the handle | ||
| 286 | void Create(GLenum target); | ||
| 287 | |||
| 288 | /// Deletes the internal OpenGL resource | ||
| 289 | void Release(); | ||
| 290 | |||
| 291 | GLuint handle = 0; | ||
| 292 | }; | ||
| 293 | |||
| 269 | } // namespace OpenGL | 294 | } // namespace OpenGL |
diff --git a/src/video_core/renderer_vulkan/vk_device.cpp b/src/video_core/renderer_vulkan/vk_device.cpp index 9840f26e5..588a6835f 100644 --- a/src/video_core/renderer_vulkan/vk_device.cpp +++ b/src/video_core/renderer_vulkan/vk_device.cpp | |||
| @@ -104,6 +104,7 @@ bool VKDevice::Create(const vk::DispatchLoaderDynamic& dldi, vk::Instance instan | |||
| 104 | features.depthBiasClamp = true; | 104 | features.depthBiasClamp = true; |
| 105 | features.geometryShader = true; | 105 | features.geometryShader = true; |
| 106 | features.tessellationShader = true; | 106 | features.tessellationShader = true; |
| 107 | features.occlusionQueryPrecise = true; | ||
| 107 | features.fragmentStoresAndAtomics = true; | 108 | features.fragmentStoresAndAtomics = true; |
| 108 | features.shaderImageGatherExtended = true; | 109 | features.shaderImageGatherExtended = true; |
| 109 | features.shaderStorageImageWriteWithoutFormat = true; | 110 | features.shaderStorageImageWriteWithoutFormat = true; |
| @@ -117,6 +118,10 @@ bool VKDevice::Create(const vk::DispatchLoaderDynamic& dldi, vk::Instance instan | |||
| 117 | bit8_storage.uniformAndStorageBuffer8BitAccess = true; | 118 | bit8_storage.uniformAndStorageBuffer8BitAccess = true; |
| 118 | SetNext(next, bit8_storage); | 119 | SetNext(next, bit8_storage); |
| 119 | 120 | ||
| 121 | vk::PhysicalDeviceHostQueryResetFeaturesEXT host_query_reset; | ||
| 122 | host_query_reset.hostQueryReset = true; | ||
| 123 | SetNext(next, host_query_reset); | ||
| 124 | |||
| 120 | vk::PhysicalDeviceFloat16Int8FeaturesKHR float16_int8; | 125 | vk::PhysicalDeviceFloat16Int8FeaturesKHR float16_int8; |
| 121 | if (is_float16_supported) { | 126 | if (is_float16_supported) { |
| 122 | float16_int8.shaderFloat16 = true; | 127 | float16_int8.shaderFloat16 = true; |
| @@ -273,6 +278,7 @@ bool VKDevice::IsSuitable(const vk::DispatchLoaderDynamic& dldi, vk::PhysicalDev | |||
| 273 | VK_EXT_VERTEX_ATTRIBUTE_DIVISOR_EXTENSION_NAME, | 278 | VK_EXT_VERTEX_ATTRIBUTE_DIVISOR_EXTENSION_NAME, |
| 274 | VK_EXT_SHADER_SUBGROUP_BALLOT_EXTENSION_NAME, | 279 | VK_EXT_SHADER_SUBGROUP_BALLOT_EXTENSION_NAME, |
| 275 | VK_EXT_SHADER_SUBGROUP_VOTE_EXTENSION_NAME, | 280 | VK_EXT_SHADER_SUBGROUP_VOTE_EXTENSION_NAME, |
| 281 | VK_EXT_HOST_QUERY_RESET_EXTENSION_NAME, | ||
| 276 | }; | 282 | }; |
| 277 | std::bitset<required_extensions.size()> available_extensions{}; | 283 | std::bitset<required_extensions.size()> available_extensions{}; |
| 278 | 284 | ||
| @@ -340,6 +346,7 @@ bool VKDevice::IsSuitable(const vk::DispatchLoaderDynamic& dldi, vk::PhysicalDev | |||
| 340 | std::make_pair(features.depthBiasClamp, "depthBiasClamp"), | 346 | std::make_pair(features.depthBiasClamp, "depthBiasClamp"), |
| 341 | std::make_pair(features.geometryShader, "geometryShader"), | 347 | std::make_pair(features.geometryShader, "geometryShader"), |
| 342 | std::make_pair(features.tessellationShader, "tessellationShader"), | 348 | std::make_pair(features.tessellationShader, "tessellationShader"), |
| 349 | std::make_pair(features.occlusionQueryPrecise, "occlusionQueryPrecise"), | ||
| 343 | std::make_pair(features.fragmentStoresAndAtomics, "fragmentStoresAndAtomics"), | 350 | std::make_pair(features.fragmentStoresAndAtomics, "fragmentStoresAndAtomics"), |
| 344 | std::make_pair(features.shaderImageGatherExtended, "shaderImageGatherExtended"), | 351 | std::make_pair(features.shaderImageGatherExtended, "shaderImageGatherExtended"), |
| 345 | std::make_pair(features.shaderStorageImageWriteWithoutFormat, | 352 | std::make_pair(features.shaderStorageImageWriteWithoutFormat, |
| @@ -376,7 +383,7 @@ std::vector<const char*> VKDevice::LoadExtensions(const vk::DispatchLoaderDynami | |||
| 376 | } | 383 | } |
| 377 | }; | 384 | }; |
| 378 | 385 | ||
| 379 | extensions.reserve(13); | 386 | extensions.reserve(14); |
| 380 | extensions.push_back(VK_KHR_SWAPCHAIN_EXTENSION_NAME); | 387 | extensions.push_back(VK_KHR_SWAPCHAIN_EXTENSION_NAME); |
| 381 | extensions.push_back(VK_KHR_16BIT_STORAGE_EXTENSION_NAME); | 388 | extensions.push_back(VK_KHR_16BIT_STORAGE_EXTENSION_NAME); |
| 382 | extensions.push_back(VK_KHR_8BIT_STORAGE_EXTENSION_NAME); | 389 | extensions.push_back(VK_KHR_8BIT_STORAGE_EXTENSION_NAME); |
| @@ -384,6 +391,7 @@ std::vector<const char*> VKDevice::LoadExtensions(const vk::DispatchLoaderDynami | |||
| 384 | extensions.push_back(VK_EXT_VERTEX_ATTRIBUTE_DIVISOR_EXTENSION_NAME); | 391 | extensions.push_back(VK_EXT_VERTEX_ATTRIBUTE_DIVISOR_EXTENSION_NAME); |
| 385 | extensions.push_back(VK_EXT_SHADER_SUBGROUP_BALLOT_EXTENSION_NAME); | 392 | extensions.push_back(VK_EXT_SHADER_SUBGROUP_BALLOT_EXTENSION_NAME); |
| 386 | extensions.push_back(VK_EXT_SHADER_SUBGROUP_VOTE_EXTENSION_NAME); | 393 | extensions.push_back(VK_EXT_SHADER_SUBGROUP_VOTE_EXTENSION_NAME); |
| 394 | extensions.push_back(VK_EXT_HOST_QUERY_RESET_EXTENSION_NAME); | ||
| 387 | 395 | ||
| 388 | [[maybe_unused]] const bool nsight = | 396 | [[maybe_unused]] const bool nsight = |
| 389 | std::getenv("NVTX_INJECTION64_PATH") || std::getenv("NSIGHT_LAUNCHED"); | 397 | std::getenv("NVTX_INJECTION64_PATH") || std::getenv("NSIGHT_LAUNCHED"); |
diff --git a/src/video_core/renderer_vulkan/vk_query_cache.cpp b/src/video_core/renderer_vulkan/vk_query_cache.cpp new file mode 100644 index 000000000..ffbf60dda --- /dev/null +++ b/src/video_core/renderer_vulkan/vk_query_cache.cpp | |||
| @@ -0,0 +1,122 @@ | |||
| 1 | // Copyright 2020 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include <algorithm> | ||
| 6 | #include <cstddef> | ||
| 7 | #include <cstdint> | ||
| 8 | #include <utility> | ||
| 9 | #include <vector> | ||
| 10 | |||
| 11 | #include "video_core/renderer_vulkan/declarations.h" | ||
| 12 | #include "video_core/renderer_vulkan/vk_device.h" | ||
| 13 | #include "video_core/renderer_vulkan/vk_query_cache.h" | ||
| 14 | #include "video_core/renderer_vulkan/vk_resource_manager.h" | ||
| 15 | #include "video_core/renderer_vulkan/vk_scheduler.h" | ||
| 16 | |||
| 17 | namespace Vulkan { | ||
| 18 | |||
| 19 | namespace { | ||
| 20 | |||
| 21 | constexpr std::array QUERY_TARGETS = {vk::QueryType::eOcclusion}; | ||
| 22 | |||
| 23 | constexpr vk::QueryType GetTarget(VideoCore::QueryType type) { | ||
| 24 | return QUERY_TARGETS[static_cast<std::size_t>(type)]; | ||
| 25 | } | ||
| 26 | |||
| 27 | } // Anonymous namespace | ||
| 28 | |||
| 29 | QueryPool::QueryPool() : VKFencedPool{GROW_STEP} {} | ||
| 30 | |||
| 31 | QueryPool::~QueryPool() = default; | ||
| 32 | |||
| 33 | void QueryPool::Initialize(const VKDevice& device_, VideoCore::QueryType type_) { | ||
| 34 | device = &device_; | ||
| 35 | type = type_; | ||
| 36 | } | ||
| 37 | |||
| 38 | std::pair<vk::QueryPool, std::uint32_t> QueryPool::Commit(VKFence& fence) { | ||
| 39 | std::size_t index; | ||
| 40 | do { | ||
| 41 | index = CommitResource(fence); | ||
| 42 | } while (usage[index]); | ||
| 43 | usage[index] = true; | ||
| 44 | |||
| 45 | return {*pools[index / GROW_STEP], static_cast<std::uint32_t>(index % GROW_STEP)}; | ||
| 46 | } | ||
| 47 | |||
| 48 | void QueryPool::Allocate(std::size_t begin, std::size_t end) { | ||
| 49 | usage.resize(end); | ||
| 50 | |||
| 51 | const auto dev = device->GetLogical(); | ||
| 52 | const u32 size = static_cast<u32>(end - begin); | ||
| 53 | const vk::QueryPoolCreateInfo query_pool_ci({}, GetTarget(type), size, {}); | ||
| 54 | pools.push_back(dev.createQueryPoolUnique(query_pool_ci, nullptr, device->GetDispatchLoader())); | ||
| 55 | } | ||
| 56 | |||
| 57 | void QueryPool::Reserve(std::pair<vk::QueryPool, std::uint32_t> query) { | ||
| 58 | const auto it = | ||
| 59 | std::find_if(std::begin(pools), std::end(pools), | ||
| 60 | [query_pool = query.first](auto& pool) { return query_pool == *pool; }); | ||
| 61 | ASSERT(it != std::end(pools)); | ||
| 62 | |||
| 63 | const std::ptrdiff_t pool_index = std::distance(std::begin(pools), it); | ||
| 64 | usage[pool_index * GROW_STEP + static_cast<std::ptrdiff_t>(query.second)] = false; | ||
| 65 | } | ||
| 66 | |||
| 67 | VKQueryCache::VKQueryCache(Core::System& system, VideoCore::RasterizerInterface& rasterizer, | ||
| 68 | const VKDevice& device, VKScheduler& scheduler) | ||
| 69 | : VideoCommon::QueryCacheBase<VKQueryCache, CachedQuery, CounterStream, HostCounter, | ||
| 70 | QueryPool>{system, rasterizer}, | ||
| 71 | device{device}, scheduler{scheduler} { | ||
| 72 | for (std::size_t i = 0; i < static_cast<std::size_t>(VideoCore::NumQueryTypes); ++i) { | ||
| 73 | query_pools[i].Initialize(device, static_cast<VideoCore::QueryType>(i)); | ||
| 74 | } | ||
| 75 | } | ||
| 76 | |||
| 77 | VKQueryCache::~VKQueryCache() = default; | ||
| 78 | |||
| 79 | std::pair<vk::QueryPool, std::uint32_t> VKQueryCache::AllocateQuery(VideoCore::QueryType type) { | ||
| 80 | return query_pools[static_cast<std::size_t>(type)].Commit(scheduler.GetFence()); | ||
| 81 | } | ||
| 82 | |||
| 83 | void VKQueryCache::Reserve(VideoCore::QueryType type, | ||
| 84 | std::pair<vk::QueryPool, std::uint32_t> query) { | ||
| 85 | query_pools[static_cast<std::size_t>(type)].Reserve(query); | ||
| 86 | } | ||
| 87 | |||
| 88 | HostCounter::HostCounter(VKQueryCache& cache, std::shared_ptr<HostCounter> dependency, | ||
| 89 | VideoCore::QueryType type) | ||
| 90 | : VideoCommon::HostCounterBase<VKQueryCache, HostCounter>{std::move(dependency)}, cache{cache}, | ||
| 91 | type{type}, query{cache.AllocateQuery(type)}, ticks{cache.Scheduler().Ticks()} { | ||
| 92 | const auto dev = cache.Device().GetLogical(); | ||
| 93 | cache.Scheduler().Record([dev, query = query](vk::CommandBuffer cmdbuf, auto& dld) { | ||
| 94 | dev.resetQueryPoolEXT(query.first, query.second, 1, dld); | ||
| 95 | cmdbuf.beginQuery(query.first, query.second, vk::QueryControlFlagBits::ePrecise, dld); | ||
| 96 | }); | ||
| 97 | } | ||
| 98 | |||
| 99 | HostCounter::~HostCounter() { | ||
| 100 | cache.Reserve(type, query); | ||
| 101 | } | ||
| 102 | |||
| 103 | void HostCounter::EndQuery() { | ||
| 104 | cache.Scheduler().Record([query = query](auto cmdbuf, auto& dld) { | ||
| 105 | cmdbuf.endQuery(query.first, query.second, dld); | ||
| 106 | }); | ||
| 107 | } | ||
| 108 | |||
| 109 | u64 HostCounter::BlockingQuery() const { | ||
| 110 | if (ticks >= cache.Scheduler().Ticks()) { | ||
| 111 | cache.Scheduler().Flush(); | ||
| 112 | } | ||
| 113 | |||
| 114 | const auto dev = cache.Device().GetLogical(); | ||
| 115 | const auto& dld = cache.Device().GetDispatchLoader(); | ||
| 116 | u64 value; | ||
| 117 | dev.getQueryPoolResults(query.first, query.second, 1, sizeof(value), &value, sizeof(value), | ||
| 118 | vk::QueryResultFlagBits::e64 | vk::QueryResultFlagBits::eWait, dld); | ||
| 119 | return value; | ||
| 120 | } | ||
| 121 | |||
| 122 | } // namespace Vulkan | ||
diff --git a/src/video_core/renderer_vulkan/vk_query_cache.h b/src/video_core/renderer_vulkan/vk_query_cache.h new file mode 100644 index 000000000..c3092ee96 --- /dev/null +++ b/src/video_core/renderer_vulkan/vk_query_cache.h | |||
| @@ -0,0 +1,104 @@ | |||
| 1 | // Copyright 2020 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include <cstddef> | ||
| 8 | #include <cstdint> | ||
| 9 | #include <memory> | ||
| 10 | #include <utility> | ||
| 11 | #include <vector> | ||
| 12 | |||
| 13 | #include "common/common_types.h" | ||
| 14 | #include "video_core/query_cache.h" | ||
| 15 | #include "video_core/renderer_vulkan/declarations.h" | ||
| 16 | #include "video_core/renderer_vulkan/vk_resource_manager.h" | ||
| 17 | |||
| 18 | namespace VideoCore { | ||
| 19 | class RasterizerInterface; | ||
| 20 | } | ||
| 21 | |||
| 22 | namespace Vulkan { | ||
| 23 | |||
| 24 | class CachedQuery; | ||
| 25 | class HostCounter; | ||
| 26 | class VKDevice; | ||
| 27 | class VKQueryCache; | ||
| 28 | class VKScheduler; | ||
| 29 | |||
| 30 | using CounterStream = VideoCommon::CounterStreamBase<VKQueryCache, HostCounter>; | ||
| 31 | |||
| 32 | class QueryPool final : public VKFencedPool { | ||
| 33 | public: | ||
| 34 | explicit QueryPool(); | ||
| 35 | ~QueryPool() override; | ||
| 36 | |||
| 37 | void Initialize(const VKDevice& device, VideoCore::QueryType type); | ||
| 38 | |||
| 39 | std::pair<vk::QueryPool, std::uint32_t> Commit(VKFence& fence); | ||
| 40 | |||
| 41 | void Reserve(std::pair<vk::QueryPool, std::uint32_t> query); | ||
| 42 | |||
| 43 | protected: | ||
| 44 | void Allocate(std::size_t begin, std::size_t end) override; | ||
| 45 | |||
| 46 | private: | ||
| 47 | static constexpr std::size_t GROW_STEP = 512; | ||
| 48 | |||
| 49 | const VKDevice* device = nullptr; | ||
| 50 | VideoCore::QueryType type = {}; | ||
| 51 | |||
| 52 | std::vector<UniqueQueryPool> pools; | ||
| 53 | std::vector<bool> usage; | ||
| 54 | }; | ||
| 55 | |||
| 56 | class VKQueryCache final | ||
| 57 | : public VideoCommon::QueryCacheBase<VKQueryCache, CachedQuery, CounterStream, HostCounter, | ||
| 58 | QueryPool> { | ||
| 59 | public: | ||
| 60 | explicit VKQueryCache(Core::System& system, VideoCore::RasterizerInterface& rasterizer, | ||
| 61 | const VKDevice& device, VKScheduler& scheduler); | ||
| 62 | ~VKQueryCache(); | ||
| 63 | |||
| 64 | std::pair<vk::QueryPool, std::uint32_t> AllocateQuery(VideoCore::QueryType type); | ||
| 65 | |||
| 66 | void Reserve(VideoCore::QueryType type, std::pair<vk::QueryPool, std::uint32_t> query); | ||
| 67 | |||
| 68 | const VKDevice& Device() const noexcept { | ||
| 69 | return device; | ||
| 70 | } | ||
| 71 | |||
| 72 | VKScheduler& Scheduler() const noexcept { | ||
| 73 | return scheduler; | ||
| 74 | } | ||
| 75 | |||
| 76 | private: | ||
| 77 | const VKDevice& device; | ||
| 78 | VKScheduler& scheduler; | ||
| 79 | }; | ||
| 80 | |||
| 81 | class HostCounter final : public VideoCommon::HostCounterBase<VKQueryCache, HostCounter> { | ||
| 82 | public: | ||
| 83 | explicit HostCounter(VKQueryCache& cache, std::shared_ptr<HostCounter> dependency, | ||
| 84 | VideoCore::QueryType type); | ||
| 85 | ~HostCounter(); | ||
| 86 | |||
| 87 | void EndQuery(); | ||
| 88 | |||
| 89 | private: | ||
| 90 | u64 BlockingQuery() const override; | ||
| 91 | |||
| 92 | VKQueryCache& cache; | ||
| 93 | const VideoCore::QueryType type; | ||
| 94 | const std::pair<vk::QueryPool, std::uint32_t> query; | ||
| 95 | const u64 ticks; | ||
| 96 | }; | ||
| 97 | |||
| 98 | class CachedQuery : public VideoCommon::CachedQueryBase<HostCounter> { | ||
| 99 | public: | ||
| 100 | explicit CachedQuery(VKQueryCache&, VideoCore::QueryType, VAddr cpu_addr, u8* host_ptr) | ||
| 101 | : VideoCommon::CachedQueryBase<HostCounter>{cpu_addr, host_ptr} {} | ||
| 102 | }; | ||
| 103 | |||
| 104 | } // namespace Vulkan | ||
diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index bfeaf98ac..31c078f6a 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp | |||
| @@ -289,7 +289,9 @@ RasterizerVulkan::RasterizerVulkan(Core::System& system, Core::Frontend::EmuWind | |||
| 289 | staging_pool), | 289 | staging_pool), |
| 290 | pipeline_cache(system, *this, device, scheduler, descriptor_pool, update_descriptor_queue), | 290 | pipeline_cache(system, *this, device, scheduler, descriptor_pool, update_descriptor_queue), |
| 291 | buffer_cache(*this, system, device, memory_manager, scheduler, staging_pool), | 291 | buffer_cache(*this, system, device, memory_manager, scheduler, staging_pool), |
| 292 | sampler_cache(device) {} | 292 | sampler_cache(device), query_cache(system, *this, device, scheduler) { |
| 293 | scheduler.SetQueryCache(query_cache); | ||
| 294 | } | ||
| 293 | 295 | ||
| 294 | RasterizerVulkan::~RasterizerVulkan() = default; | 296 | RasterizerVulkan::~RasterizerVulkan() = default; |
| 295 | 297 | ||
| @@ -298,6 +300,8 @@ void RasterizerVulkan::Draw(bool is_indexed, bool is_instanced) { | |||
| 298 | 300 | ||
| 299 | FlushWork(); | 301 | FlushWork(); |
| 300 | 302 | ||
| 303 | query_cache.UpdateCounters(); | ||
| 304 | |||
| 301 | const auto& gpu = system.GPU().Maxwell3D(); | 305 | const auto& gpu = system.GPU().Maxwell3D(); |
| 302 | GraphicsPipelineCacheKey key{GetFixedPipelineState(gpu.regs)}; | 306 | GraphicsPipelineCacheKey key{GetFixedPipelineState(gpu.regs)}; |
| 303 | 307 | ||
| @@ -352,6 +356,8 @@ void RasterizerVulkan::Draw(bool is_indexed, bool is_instanced) { | |||
| 352 | void RasterizerVulkan::Clear() { | 356 | void RasterizerVulkan::Clear() { |
| 353 | MICROPROFILE_SCOPE(Vulkan_Clearing); | 357 | MICROPROFILE_SCOPE(Vulkan_Clearing); |
| 354 | 358 | ||
| 359 | query_cache.UpdateCounters(); | ||
| 360 | |||
| 355 | const auto& gpu = system.GPU().Maxwell3D(); | 361 | const auto& gpu = system.GPU().Maxwell3D(); |
| 356 | if (!system.GPU().Maxwell3D().ShouldExecute()) { | 362 | if (!system.GPU().Maxwell3D().ShouldExecute()) { |
| 357 | return; | 363 | return; |
| @@ -419,6 +425,8 @@ void RasterizerVulkan::DispatchCompute(GPUVAddr code_addr) { | |||
| 419 | sampled_views.clear(); | 425 | sampled_views.clear(); |
| 420 | image_views.clear(); | 426 | image_views.clear(); |
| 421 | 427 | ||
| 428 | query_cache.UpdateCounters(); | ||
| 429 | |||
| 422 | const auto& launch_desc = system.GPU().KeplerCompute().launch_description; | 430 | const auto& launch_desc = system.GPU().KeplerCompute().launch_description; |
| 423 | const ComputePipelineCacheKey key{ | 431 | const ComputePipelineCacheKey key{ |
| 424 | code_addr, | 432 | code_addr, |
| @@ -461,17 +469,28 @@ void RasterizerVulkan::DispatchCompute(GPUVAddr code_addr) { | |||
| 461 | }); | 469 | }); |
| 462 | } | 470 | } |
| 463 | 471 | ||
| 472 | void RasterizerVulkan::ResetCounter(VideoCore::QueryType type) { | ||
| 473 | query_cache.ResetCounter(type); | ||
| 474 | } | ||
| 475 | |||
| 476 | void RasterizerVulkan::Query(GPUVAddr gpu_addr, VideoCore::QueryType type, | ||
| 477 | std::optional<u64> timestamp) { | ||
| 478 | query_cache.Query(gpu_addr, type, timestamp); | ||
| 479 | } | ||
| 480 | |||
| 464 | void RasterizerVulkan::FlushAll() {} | 481 | void RasterizerVulkan::FlushAll() {} |
| 465 | 482 | ||
| 466 | void RasterizerVulkan::FlushRegion(CacheAddr addr, u64 size) { | 483 | void RasterizerVulkan::FlushRegion(CacheAddr addr, u64 size) { |
| 467 | texture_cache.FlushRegion(addr, size); | 484 | texture_cache.FlushRegion(addr, size); |
| 468 | buffer_cache.FlushRegion(addr, size); | 485 | buffer_cache.FlushRegion(addr, size); |
| 486 | query_cache.FlushRegion(addr, size); | ||
| 469 | } | 487 | } |
| 470 | 488 | ||
| 471 | void RasterizerVulkan::InvalidateRegion(CacheAddr addr, u64 size) { | 489 | void RasterizerVulkan::InvalidateRegion(CacheAddr addr, u64 size) { |
| 472 | texture_cache.InvalidateRegion(addr, size); | 490 | texture_cache.InvalidateRegion(addr, size); |
| 473 | pipeline_cache.InvalidateRegion(addr, size); | 491 | pipeline_cache.InvalidateRegion(addr, size); |
| 474 | buffer_cache.InvalidateRegion(addr, size); | 492 | buffer_cache.InvalidateRegion(addr, size); |
| 493 | query_cache.InvalidateRegion(addr, size); | ||
| 475 | } | 494 | } |
| 476 | 495 | ||
| 477 | void RasterizerVulkan::FlushAndInvalidateRegion(CacheAddr addr, u64 size) { | 496 | void RasterizerVulkan::FlushAndInvalidateRegion(CacheAddr addr, u64 size) { |
diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.h b/src/video_core/renderer_vulkan/vk_rasterizer.h index ff74de164..138903d60 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.h +++ b/src/video_core/renderer_vulkan/vk_rasterizer.h | |||
| @@ -24,6 +24,7 @@ | |||
| 24 | #include "video_core/renderer_vulkan/vk_descriptor_pool.h" | 24 | #include "video_core/renderer_vulkan/vk_descriptor_pool.h" |
| 25 | #include "video_core/renderer_vulkan/vk_memory_manager.h" | 25 | #include "video_core/renderer_vulkan/vk_memory_manager.h" |
| 26 | #include "video_core/renderer_vulkan/vk_pipeline_cache.h" | 26 | #include "video_core/renderer_vulkan/vk_pipeline_cache.h" |
| 27 | #include "video_core/renderer_vulkan/vk_query_cache.h" | ||
| 27 | #include "video_core/renderer_vulkan/vk_renderpass_cache.h" | 28 | #include "video_core/renderer_vulkan/vk_renderpass_cache.h" |
| 28 | #include "video_core/renderer_vulkan/vk_resource_manager.h" | 29 | #include "video_core/renderer_vulkan/vk_resource_manager.h" |
| 29 | #include "video_core/renderer_vulkan/vk_sampler_cache.h" | 30 | #include "video_core/renderer_vulkan/vk_sampler_cache.h" |
| @@ -96,7 +97,7 @@ struct ImageView { | |||
| 96 | vk::ImageLayout* layout = nullptr; | 97 | vk::ImageLayout* layout = nullptr; |
| 97 | }; | 98 | }; |
| 98 | 99 | ||
| 99 | class RasterizerVulkan : public VideoCore::RasterizerAccelerated { | 100 | class RasterizerVulkan final : public VideoCore::RasterizerAccelerated { |
| 100 | public: | 101 | public: |
| 101 | explicit RasterizerVulkan(Core::System& system, Core::Frontend::EmuWindow& render_window, | 102 | explicit RasterizerVulkan(Core::System& system, Core::Frontend::EmuWindow& render_window, |
| 102 | VKScreenInfo& screen_info, const VKDevice& device, | 103 | VKScreenInfo& screen_info, const VKDevice& device, |
| @@ -107,6 +108,8 @@ public: | |||
| 107 | void Draw(bool is_indexed, bool is_instanced) override; | 108 | void Draw(bool is_indexed, bool is_instanced) override; |
| 108 | void Clear() override; | 109 | void Clear() override; |
| 109 | void DispatchCompute(GPUVAddr code_addr) override; | 110 | void DispatchCompute(GPUVAddr code_addr) override; |
| 111 | void ResetCounter(VideoCore::QueryType type) override; | ||
| 112 | void Query(GPUVAddr gpu_addr, VideoCore::QueryType type, std::optional<u64> timestamp) override; | ||
| 110 | void FlushAll() override; | 113 | void FlushAll() override; |
| 111 | void FlushRegion(CacheAddr addr, u64 size) override; | 114 | void FlushRegion(CacheAddr addr, u64 size) override; |
| 112 | void InvalidateRegion(CacheAddr addr, u64 size) override; | 115 | void InvalidateRegion(CacheAddr addr, u64 size) override; |
| @@ -244,6 +247,7 @@ private: | |||
| 244 | VKPipelineCache pipeline_cache; | 247 | VKPipelineCache pipeline_cache; |
| 245 | VKBufferCache buffer_cache; | 248 | VKBufferCache buffer_cache; |
| 246 | VKSamplerCache sampler_cache; | 249 | VKSamplerCache sampler_cache; |
| 250 | VKQueryCache query_cache; | ||
| 247 | 251 | ||
| 248 | std::array<View, Maxwell::NumRenderTargets> color_attachments; | 252 | std::array<View, Maxwell::NumRenderTargets> color_attachments; |
| 249 | View zeta_attachment; | 253 | View zeta_attachment; |
diff --git a/src/video_core/renderer_vulkan/vk_scheduler.cpp b/src/video_core/renderer_vulkan/vk_scheduler.cpp index d66133ad1..92bd6c344 100644 --- a/src/video_core/renderer_vulkan/vk_scheduler.cpp +++ b/src/video_core/renderer_vulkan/vk_scheduler.cpp | |||
| @@ -6,6 +6,7 @@ | |||
| 6 | #include "common/microprofile.h" | 6 | #include "common/microprofile.h" |
| 7 | #include "video_core/renderer_vulkan/declarations.h" | 7 | #include "video_core/renderer_vulkan/declarations.h" |
| 8 | #include "video_core/renderer_vulkan/vk_device.h" | 8 | #include "video_core/renderer_vulkan/vk_device.h" |
| 9 | #include "video_core/renderer_vulkan/vk_query_cache.h" | ||
| 9 | #include "video_core/renderer_vulkan/vk_resource_manager.h" | 10 | #include "video_core/renderer_vulkan/vk_resource_manager.h" |
| 10 | #include "video_core/renderer_vulkan/vk_scheduler.h" | 11 | #include "video_core/renderer_vulkan/vk_scheduler.h" |
| 11 | 12 | ||
| @@ -139,6 +140,8 @@ void VKScheduler::SubmitExecution(vk::Semaphore semaphore) { | |||
| 139 | } | 140 | } |
| 140 | 141 | ||
| 141 | void VKScheduler::AllocateNewContext() { | 142 | void VKScheduler::AllocateNewContext() { |
| 143 | ++ticks; | ||
| 144 | |||
| 142 | std::unique_lock lock{mutex}; | 145 | std::unique_lock lock{mutex}; |
| 143 | current_fence = next_fence; | 146 | current_fence = next_fence; |
| 144 | next_fence = &resource_manager.CommitFence(); | 147 | next_fence = &resource_manager.CommitFence(); |
| @@ -146,6 +149,10 @@ void VKScheduler::AllocateNewContext() { | |||
| 146 | current_cmdbuf = resource_manager.CommitCommandBuffer(*current_fence); | 149 | current_cmdbuf = resource_manager.CommitCommandBuffer(*current_fence); |
| 147 | current_cmdbuf.begin({vk::CommandBufferUsageFlagBits::eOneTimeSubmit}, | 150 | current_cmdbuf.begin({vk::CommandBufferUsageFlagBits::eOneTimeSubmit}, |
| 148 | device.GetDispatchLoader()); | 151 | device.GetDispatchLoader()); |
| 152 | // Enable counters once again. These are disabled when a command buffer is finished. | ||
| 153 | if (query_cache) { | ||
| 154 | query_cache->UpdateCounters(); | ||
| 155 | } | ||
| 149 | } | 156 | } |
| 150 | 157 | ||
| 151 | void VKScheduler::InvalidateState() { | 158 | void VKScheduler::InvalidateState() { |
| @@ -159,6 +166,7 @@ void VKScheduler::InvalidateState() { | |||
| 159 | } | 166 | } |
| 160 | 167 | ||
| 161 | void VKScheduler::EndPendingOperations() { | 168 | void VKScheduler::EndPendingOperations() { |
| 169 | query_cache->DisableStreams(); | ||
| 162 | EndRenderPass(); | 170 | EndRenderPass(); |
| 163 | } | 171 | } |
| 164 | 172 | ||
diff --git a/src/video_core/renderer_vulkan/vk_scheduler.h b/src/video_core/renderer_vulkan/vk_scheduler.h index bcdffbba0..62fd7858b 100644 --- a/src/video_core/renderer_vulkan/vk_scheduler.h +++ b/src/video_core/renderer_vulkan/vk_scheduler.h | |||
| @@ -4,6 +4,7 @@ | |||
| 4 | 4 | ||
| 5 | #pragma once | 5 | #pragma once |
| 6 | 6 | ||
| 7 | #include <atomic> | ||
| 7 | #include <condition_variable> | 8 | #include <condition_variable> |
| 8 | #include <memory> | 9 | #include <memory> |
| 9 | #include <optional> | 10 | #include <optional> |
| @@ -18,6 +19,7 @@ namespace Vulkan { | |||
| 18 | 19 | ||
| 19 | class VKDevice; | 20 | class VKDevice; |
| 20 | class VKFence; | 21 | class VKFence; |
| 22 | class VKQueryCache; | ||
| 21 | class VKResourceManager; | 23 | class VKResourceManager; |
| 22 | 24 | ||
| 23 | class VKFenceView { | 25 | class VKFenceView { |
| @@ -67,6 +69,11 @@ public: | |||
| 67 | /// Binds a pipeline to the current execution context. | 69 | /// Binds a pipeline to the current execution context. |
| 68 | void BindGraphicsPipeline(vk::Pipeline pipeline); | 70 | void BindGraphicsPipeline(vk::Pipeline pipeline); |
| 69 | 71 | ||
| 72 | /// Assigns the query cache. | ||
| 73 | void SetQueryCache(VKQueryCache& query_cache_) { | ||
| 74 | query_cache = &query_cache_; | ||
| 75 | } | ||
| 76 | |||
| 70 | /// Returns true when viewports have been set in the current command buffer. | 77 | /// Returns true when viewports have been set in the current command buffer. |
| 71 | bool TouchViewports() { | 78 | bool TouchViewports() { |
| 72 | return std::exchange(state.viewports, true); | 79 | return std::exchange(state.viewports, true); |
| @@ -112,6 +119,11 @@ public: | |||
| 112 | return current_fence; | 119 | return current_fence; |
| 113 | } | 120 | } |
| 114 | 121 | ||
| 122 | /// Returns the current command buffer tick. | ||
| 123 | u64 Ticks() const { | ||
| 124 | return ticks; | ||
| 125 | } | ||
| 126 | |||
| 115 | private: | 127 | private: |
| 116 | class Command { | 128 | class Command { |
| 117 | public: | 129 | public: |
| @@ -205,6 +217,8 @@ private: | |||
| 205 | 217 | ||
| 206 | const VKDevice& device; | 218 | const VKDevice& device; |
| 207 | VKResourceManager& resource_manager; | 219 | VKResourceManager& resource_manager; |
| 220 | VKQueryCache* query_cache = nullptr; | ||
| 221 | |||
| 208 | vk::CommandBuffer current_cmdbuf; | 222 | vk::CommandBuffer current_cmdbuf; |
| 209 | VKFence* current_fence = nullptr; | 223 | VKFence* current_fence = nullptr; |
| 210 | VKFence* next_fence = nullptr; | 224 | VKFence* next_fence = nullptr; |
| @@ -227,6 +241,7 @@ private: | |||
| 227 | Common::SPSCQueue<std::unique_ptr<CommandChunk>> chunk_reserve; | 241 | Common::SPSCQueue<std::unique_ptr<CommandChunk>> chunk_reserve; |
| 228 | std::mutex mutex; | 242 | std::mutex mutex; |
| 229 | std::condition_variable cv; | 243 | std::condition_variable cv; |
| 244 | std::atomic<u64> ticks = 0; | ||
| 230 | bool quit = false; | 245 | bool quit = false; |
| 231 | }; | 246 | }; |
| 232 | 247 | ||
diff --git a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp index 24a658dce..f64f5da28 100644 --- a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp +++ b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp | |||
| @@ -275,12 +275,14 @@ public: | |||
| 275 | AddCapability(spv::Capability::ImageGatherExtended); | 275 | AddCapability(spv::Capability::ImageGatherExtended); |
| 276 | AddCapability(spv::Capability::SampledBuffer); | 276 | AddCapability(spv::Capability::SampledBuffer); |
| 277 | AddCapability(spv::Capability::StorageImageWriteWithoutFormat); | 277 | AddCapability(spv::Capability::StorageImageWriteWithoutFormat); |
| 278 | AddCapability(spv::Capability::DrawParameters); | ||
| 278 | AddCapability(spv::Capability::SubgroupBallotKHR); | 279 | AddCapability(spv::Capability::SubgroupBallotKHR); |
| 279 | AddCapability(spv::Capability::SubgroupVoteKHR); | 280 | AddCapability(spv::Capability::SubgroupVoteKHR); |
| 280 | AddExtension("SPV_KHR_shader_ballot"); | 281 | AddExtension("SPV_KHR_shader_ballot"); |
| 281 | AddExtension("SPV_KHR_subgroup_vote"); | 282 | AddExtension("SPV_KHR_subgroup_vote"); |
| 282 | AddExtension("SPV_KHR_storage_buffer_storage_class"); | 283 | AddExtension("SPV_KHR_storage_buffer_storage_class"); |
| 283 | AddExtension("SPV_KHR_variable_pointers"); | 284 | AddExtension("SPV_KHR_variable_pointers"); |
| 285 | AddExtension("SPV_KHR_shader_draw_parameters"); | ||
| 284 | 286 | ||
| 285 | if (ir.UsesViewportIndex()) { | 287 | if (ir.UsesViewportIndex()) { |
| 286 | AddCapability(spv::Capability::MultiViewport); | 288 | AddCapability(spv::Capability::MultiViewport); |
| @@ -492,9 +494,11 @@ private: | |||
| 492 | interfaces.push_back(AddGlobalVariable(Name(out_vertex, "out_vertex"))); | 494 | interfaces.push_back(AddGlobalVariable(Name(out_vertex, "out_vertex"))); |
| 493 | 495 | ||
| 494 | // Declare input attributes | 496 | // Declare input attributes |
| 495 | vertex_index = DeclareInputBuiltIn(spv::BuiltIn::VertexIndex, t_in_uint, "vertex_index"); | 497 | vertex_index = DeclareInputBuiltIn(spv::BuiltIn::VertexIndex, t_in_int, "vertex_index"); |
| 496 | instance_index = | 498 | instance_index = |
| 497 | DeclareInputBuiltIn(spv::BuiltIn::InstanceIndex, t_in_uint, "instance_index"); | 499 | DeclareInputBuiltIn(spv::BuiltIn::InstanceIndex, t_in_int, "instance_index"); |
| 500 | base_vertex = DeclareInputBuiltIn(spv::BuiltIn::BaseVertex, t_in_int, "base_vertex"); | ||
| 501 | base_instance = DeclareInputBuiltIn(spv::BuiltIn::BaseInstance, t_in_int, "base_instance"); | ||
| 498 | } | 502 | } |
| 499 | 503 | ||
| 500 | void DeclareTessControl() { | 504 | void DeclareTessControl() { |
| @@ -1068,9 +1072,12 @@ private: | |||
| 1068 | return {OpLoad(t_float, AccessElement(t_in_float, tess_coord, element)), | 1072 | return {OpLoad(t_float, AccessElement(t_in_float, tess_coord, element)), |
| 1069 | Type::Float}; | 1073 | Type::Float}; |
| 1070 | case 2: | 1074 | case 2: |
| 1071 | return {OpLoad(t_uint, instance_index), Type::Uint}; | 1075 | return { |
| 1076 | OpISub(t_int, OpLoad(t_int, instance_index), OpLoad(t_int, base_instance)), | ||
| 1077 | Type::Int}; | ||
| 1072 | case 3: | 1078 | case 3: |
| 1073 | return {OpLoad(t_uint, vertex_index), Type::Uint}; | 1079 | return {OpISub(t_int, OpLoad(t_int, vertex_index), OpLoad(t_int, base_vertex)), |
| 1080 | Type::Int}; | ||
| 1074 | } | 1081 | } |
| 1075 | UNIMPLEMENTED_MSG("Unmanaged TessCoordInstanceIDVertexID element={}", element); | 1082 | UNIMPLEMENTED_MSG("Unmanaged TessCoordInstanceIDVertexID element={}", element); |
| 1076 | return {Constant(t_uint, 0U), Type::Uint}; | 1083 | return {Constant(t_uint, 0U), Type::Uint}; |
| @@ -2542,6 +2549,8 @@ private: | |||
| 2542 | 2549 | ||
| 2543 | Id instance_index{}; | 2550 | Id instance_index{}; |
| 2544 | Id vertex_index{}; | 2551 | Id vertex_index{}; |
| 2552 | Id base_instance{}; | ||
| 2553 | Id base_vertex{}; | ||
| 2545 | std::array<Id, Maxwell::NumRenderTargets> frag_colors{}; | 2554 | std::array<Id, Maxwell::NumRenderTargets> frag_colors{}; |
| 2546 | Id frag_depth{}; | 2555 | Id frag_depth{}; |
| 2547 | Id frag_coord{}; | 2556 | Id frag_coord{}; |
diff --git a/src/video_core/shader/decode/conversion.cpp b/src/video_core/shader/decode/conversion.cpp index 0eeb75559..6ead42070 100644 --- a/src/video_core/shader/decode/conversion.cpp +++ b/src/video_core/shader/decode/conversion.cpp | |||
| @@ -83,14 +83,14 @@ u32 ShaderIR::DecodeConversion(NodeBlock& bb, u32 pc) { | |||
| 83 | 83 | ||
| 84 | const bool input_signed = instr.conversion.is_input_signed; | 84 | const bool input_signed = instr.conversion.is_input_signed; |
| 85 | 85 | ||
| 86 | if (instr.conversion.src_size == Register::Size::Byte) { | 86 | if (const u32 offset = static_cast<u32>(instr.conversion.int_src.selector); offset > 0) { |
| 87 | const u32 offset = static_cast<u32>(instr.conversion.int_src.selector) * 8; | 87 | ASSERT(instr.conversion.src_size == Register::Size::Byte || |
| 88 | if (offset > 0) { | 88 | instr.conversion.src_size == Register::Size::Short); |
| 89 | value = SignedOperation(OperationCode::ILogicalShiftRight, input_signed, | 89 | if (instr.conversion.src_size == Register::Size::Short) { |
| 90 | std::move(value), Immediate(offset)); | 90 | ASSERT(offset == 0 || offset == 2); |
| 91 | } | 91 | } |
| 92 | } else { | 92 | value = SignedOperation(OperationCode::ILogicalShiftRight, input_signed, |
| 93 | UNIMPLEMENTED_IF(instr.conversion.int_src.selector != 0); | 93 | std::move(value), Immediate(offset * 8)); |
| 94 | } | 94 | } |
| 95 | 95 | ||
| 96 | value = ConvertIntegerSize(value, instr.conversion.src_size, input_signed); | 96 | value = ConvertIntegerSize(value, instr.conversion.src_size, input_signed); |
diff --git a/src/yuzu/configuration/config.cpp b/src/yuzu/configuration/config.cpp index cd94693c1..6209fff75 100644 --- a/src/yuzu/configuration/config.cpp +++ b/src/yuzu/configuration/config.cpp | |||
| @@ -630,6 +630,7 @@ void Config::ReadRendererValues() { | |||
| 630 | Settings::values.vulkan_device = ReadSetting(QStringLiteral("vulkan_device"), 0).toInt(); | 630 | Settings::values.vulkan_device = ReadSetting(QStringLiteral("vulkan_device"), 0).toInt(); |
| 631 | Settings::values.resolution_factor = | 631 | Settings::values.resolution_factor = |
| 632 | ReadSetting(QStringLiteral("resolution_factor"), 1.0).toFloat(); | 632 | ReadSetting(QStringLiteral("resolution_factor"), 1.0).toFloat(); |
| 633 | Settings::values.aspect_ratio = ReadSetting(QStringLiteral("aspect_ratio"), 0).toInt(); | ||
| 633 | Settings::values.use_frame_limit = | 634 | Settings::values.use_frame_limit = |
| 634 | ReadSetting(QStringLiteral("use_frame_limit"), true).toBool(); | 635 | ReadSetting(QStringLiteral("use_frame_limit"), true).toBool(); |
| 635 | Settings::values.frame_limit = ReadSetting(QStringLiteral("frame_limit"), 100).toInt(); | 636 | Settings::values.frame_limit = ReadSetting(QStringLiteral("frame_limit"), 100).toInt(); |
| @@ -1064,6 +1065,7 @@ void Config::SaveRendererValues() { | |||
| 1064 | WriteSetting(QStringLiteral("vulkan_device"), Settings::values.vulkan_device, 0); | 1065 | WriteSetting(QStringLiteral("vulkan_device"), Settings::values.vulkan_device, 0); |
| 1065 | WriteSetting(QStringLiteral("resolution_factor"), | 1066 | WriteSetting(QStringLiteral("resolution_factor"), |
| 1066 | static_cast<double>(Settings::values.resolution_factor), 1.0); | 1067 | static_cast<double>(Settings::values.resolution_factor), 1.0); |
| 1068 | WriteSetting(QStringLiteral("aspect_ratio"), Settings::values.aspect_ratio, 0); | ||
| 1067 | WriteSetting(QStringLiteral("use_frame_limit"), Settings::values.use_frame_limit, true); | 1069 | WriteSetting(QStringLiteral("use_frame_limit"), Settings::values.use_frame_limit, true); |
| 1068 | WriteSetting(QStringLiteral("frame_limit"), Settings::values.frame_limit, 100); | 1070 | WriteSetting(QStringLiteral("frame_limit"), Settings::values.frame_limit, 100); |
| 1069 | WriteSetting(QStringLiteral("use_disk_shader_cache"), Settings::values.use_disk_shader_cache, | 1071 | WriteSetting(QStringLiteral("use_disk_shader_cache"), Settings::values.use_disk_shader_cache, |
diff --git a/src/yuzu/configuration/configure_graphics.cpp b/src/yuzu/configuration/configure_graphics.cpp index f57a24e36..ea899c080 100644 --- a/src/yuzu/configuration/configure_graphics.cpp +++ b/src/yuzu/configuration/configure_graphics.cpp | |||
| @@ -97,6 +97,7 @@ void ConfigureGraphics::SetConfiguration() { | |||
| 97 | ui->api->setCurrentIndex(static_cast<int>(Settings::values.renderer_backend)); | 97 | ui->api->setCurrentIndex(static_cast<int>(Settings::values.renderer_backend)); |
| 98 | ui->resolution_factor_combobox->setCurrentIndex( | 98 | ui->resolution_factor_combobox->setCurrentIndex( |
| 99 | static_cast<int>(FromResolutionFactor(Settings::values.resolution_factor))); | 99 | static_cast<int>(FromResolutionFactor(Settings::values.resolution_factor))); |
| 100 | ui->aspect_ratio_combobox->setCurrentIndex(Settings::values.aspect_ratio); | ||
| 100 | ui->use_disk_shader_cache->setEnabled(runtime_lock); | 101 | ui->use_disk_shader_cache->setEnabled(runtime_lock); |
| 101 | ui->use_disk_shader_cache->setChecked(Settings::values.use_disk_shader_cache); | 102 | ui->use_disk_shader_cache->setChecked(Settings::values.use_disk_shader_cache); |
| 102 | ui->use_accurate_gpu_emulation->setChecked(Settings::values.use_accurate_gpu_emulation); | 103 | ui->use_accurate_gpu_emulation->setChecked(Settings::values.use_accurate_gpu_emulation); |
| @@ -114,6 +115,7 @@ void ConfigureGraphics::ApplyConfiguration() { | |||
| 114 | Settings::values.vulkan_device = vulkan_device; | 115 | Settings::values.vulkan_device = vulkan_device; |
| 115 | Settings::values.resolution_factor = | 116 | Settings::values.resolution_factor = |
| 116 | ToResolutionFactor(static_cast<Resolution>(ui->resolution_factor_combobox->currentIndex())); | 117 | ToResolutionFactor(static_cast<Resolution>(ui->resolution_factor_combobox->currentIndex())); |
| 118 | Settings::values.aspect_ratio = ui->aspect_ratio_combobox->currentIndex(); | ||
| 117 | Settings::values.use_disk_shader_cache = ui->use_disk_shader_cache->isChecked(); | 119 | Settings::values.use_disk_shader_cache = ui->use_disk_shader_cache->isChecked(); |
| 118 | Settings::values.use_accurate_gpu_emulation = ui->use_accurate_gpu_emulation->isChecked(); | 120 | Settings::values.use_accurate_gpu_emulation = ui->use_accurate_gpu_emulation->isChecked(); |
| 119 | Settings::values.use_asynchronous_gpu_emulation = | 121 | Settings::values.use_asynchronous_gpu_emulation = |
diff --git a/src/yuzu/configuration/configure_graphics.ui b/src/yuzu/configuration/configure_graphics.ui index e24372204..db60426ab 100644 --- a/src/yuzu/configuration/configure_graphics.ui +++ b/src/yuzu/configuration/configure_graphics.ui | |||
| @@ -139,6 +139,41 @@ | |||
| 139 | </layout> | 139 | </layout> |
| 140 | </item> | 140 | </item> |
| 141 | <item> | 141 | <item> |
| 142 | <layout class="QHBoxLayout" name="horizontalLayout_6"> | ||
| 143 | <item> | ||
| 144 | <widget class="QLabel" name="ar_label"> | ||
| 145 | <property name="text"> | ||
| 146 | <string>Aspect Ratio:</string> | ||
| 147 | </property> | ||
| 148 | </widget> | ||
| 149 | </item> | ||
| 150 | <item> | ||
| 151 | <widget class="QComboBox" name="aspect_ratio_combobox"> | ||
| 152 | <item> | ||
| 153 | <property name="text"> | ||
| 154 | <string>Default (16:9)</string> | ||
| 155 | </property> | ||
| 156 | </item> | ||
| 157 | <item> | ||
| 158 | <property name="text"> | ||
| 159 | <string>Force 4:3</string> | ||
| 160 | </property> | ||
| 161 | </item> | ||
| 162 | <item> | ||
| 163 | <property name="text"> | ||
| 164 | <string>Force 21:9</string> | ||
| 165 | </property> | ||
| 166 | </item> | ||
| 167 | <item> | ||
| 168 | <property name="text"> | ||
| 169 | <string>Stretch to Window</string> | ||
| 170 | </property> | ||
| 171 | </item> | ||
| 172 | </widget> | ||
| 173 | </item> | ||
| 174 | </layout> | ||
| 175 | </item> | ||
| 176 | <item> | ||
| 142 | <layout class="QHBoxLayout" name="horizontalLayout_3"> | 177 | <layout class="QHBoxLayout" name="horizontalLayout_3"> |
| 143 | <item> | 178 | <item> |
| 144 | <widget class="QLabel" name="bg_label"> | 179 | <widget class="QLabel" name="bg_label"> |
diff --git a/src/yuzu_cmd/config.cpp b/src/yuzu_cmd/config.cpp index b01a36023..96f1ce3af 100644 --- a/src/yuzu_cmd/config.cpp +++ b/src/yuzu_cmd/config.cpp | |||
| @@ -379,6 +379,8 @@ void Config::ReadValues() { | |||
| 379 | 379 | ||
| 380 | Settings::values.resolution_factor = | 380 | Settings::values.resolution_factor = |
| 381 | static_cast<float>(sdl2_config->GetReal("Renderer", "resolution_factor", 1.0)); | 381 | static_cast<float>(sdl2_config->GetReal("Renderer", "resolution_factor", 1.0)); |
| 382 | Settings::values.aspect_ratio = | ||
| 383 | static_cast<int>(sdl2_config->GetInteger("Renderer", "aspect_ratio", 0)); | ||
| 382 | Settings::values.use_frame_limit = sdl2_config->GetBoolean("Renderer", "use_frame_limit", true); | 384 | Settings::values.use_frame_limit = sdl2_config->GetBoolean("Renderer", "use_frame_limit", true); |
| 383 | Settings::values.frame_limit = | 385 | Settings::values.frame_limit = |
| 384 | static_cast<u16>(sdl2_config->GetInteger("Renderer", "frame_limit", 100)); | 386 | static_cast<u16>(sdl2_config->GetInteger("Renderer", "frame_limit", 100)); |
diff --git a/src/yuzu_cmd/default_ini.h b/src/yuzu_cmd/default_ini.h index 00fd88279..8a2b658cd 100644 --- a/src/yuzu_cmd/default_ini.h +++ b/src/yuzu_cmd/default_ini.h | |||
| @@ -122,6 +122,10 @@ use_shader_jit = | |||
| 122 | # factor for the Switch resolution | 122 | # factor for the Switch resolution |
| 123 | resolution_factor = | 123 | resolution_factor = |
| 124 | 124 | ||
| 125 | # Aspect ratio | ||
| 126 | # 0: Default (16:9), 1: Force 4:3, 2: Force 21:9, 3: Stretch to Window | ||
| 127 | aspect_ratio = | ||
| 128 | |||
| 125 | # Whether to enable V-Sync (caps the framerate at 60FPS) or not. | 129 | # Whether to enable V-Sync (caps the framerate at 60FPS) or not. |
| 126 | # 0 (default): Off, 1: On | 130 | # 0 (default): Off, 1: On |
| 127 | use_vsync = | 131 | use_vsync = |
diff --git a/src/yuzu_tester/config.cpp b/src/yuzu_tester/config.cpp index 84ab4d687..0ac93b62a 100644 --- a/src/yuzu_tester/config.cpp +++ b/src/yuzu_tester/config.cpp | |||
| @@ -118,6 +118,8 @@ void Config::ReadValues() { | |||
| 118 | // Renderer | 118 | // Renderer |
| 119 | Settings::values.resolution_factor = | 119 | Settings::values.resolution_factor = |
| 120 | static_cast<float>(sdl2_config->GetReal("Renderer", "resolution_factor", 1.0)); | 120 | static_cast<float>(sdl2_config->GetReal("Renderer", "resolution_factor", 1.0)); |
| 121 | Settings::values.aspect_ratio = | ||
| 122 | static_cast<int>(sdl2_config->GetInteger("Renderer", "aspect_ratio", 0)); | ||
| 121 | Settings::values.use_frame_limit = false; | 123 | Settings::values.use_frame_limit = false; |
| 122 | Settings::values.frame_limit = 100; | 124 | Settings::values.frame_limit = 100; |
| 123 | Settings::values.use_disk_shader_cache = | 125 | Settings::values.use_disk_shader_cache = |
diff --git a/src/yuzu_tester/default_ini.h b/src/yuzu_tester/default_ini.h index 9a3e86d68..8d93f7b88 100644 --- a/src/yuzu_tester/default_ini.h +++ b/src/yuzu_tester/default_ini.h | |||
| @@ -26,6 +26,10 @@ use_shader_jit = | |||
| 26 | # factor for the Switch resolution | 26 | # factor for the Switch resolution |
| 27 | resolution_factor = | 27 | resolution_factor = |
| 28 | 28 | ||
| 29 | # Aspect ratio | ||
| 30 | # 0: Default (16:9), 1: Force 4:3, 2: Force 21:9, 3: Stretch to Window | ||
| 31 | aspect_ratio = | ||
| 32 | |||
| 29 | # Whether to enable V-Sync (caps the framerate at 60FPS) or not. | 33 | # Whether to enable V-Sync (caps the framerate at 60FPS) or not. |
| 30 | # 0 (default): Off, 1: On | 34 | # 0 (default): Off, 1: On |
| 31 | use_vsync = | 35 | use_vsync = |