diff options
| author | 2020-04-27 00:18:46 -0400 | |
|---|---|---|
| committer | 2020-04-27 00:18:46 -0400 | |
| commit | 6c7d8073be9ab0ce92d346742989800895beeffe (patch) | |
| tree | 298da9383d7f883102643f0ab146dda72d9f5358 /src | |
| parent | Merge pull request #3795 from vitor-k/fix-folder (diff) | |
| parent | Clang Format. (diff) | |
| download | yuzu-6c7d8073be9ab0ce92d346742989800895beeffe.tar.gz yuzu-6c7d8073be9ab0ce92d346742989800895beeffe.tar.xz yuzu-6c7d8073be9ab0ce92d346742989800895beeffe.zip | |
Merge pull request #3742 from FernandoS27/command-list
Optimize GPU Command Lists and Introduce Fast GPU Time Option
Diffstat (limited to 'src')
| -rw-r--r-- | src/core/settings.h | 1 | ||||
| -rw-r--r-- | src/video_core/dma_pusher.cpp | 30 | ||||
| -rw-r--r-- | src/video_core/dma_pusher.h | 1 | ||||
| -rw-r--r-- | src/video_core/engines/fermi_2d.cpp | 6 | ||||
| -rw-r--r-- | src/video_core/engines/fermi_2d.h | 3 | ||||
| -rw-r--r-- | src/video_core/engines/kepler_compute.cpp | 7 | ||||
| -rw-r--r-- | src/video_core/engines/kepler_compute.h | 3 | ||||
| -rw-r--r-- | src/video_core/engines/kepler_memory.cpp | 7 | ||||
| -rw-r--r-- | src/video_core/engines/kepler_memory.h | 3 | ||||
| -rw-r--r-- | src/video_core/engines/maxwell_3d.cpp | 74 | ||||
| -rw-r--r-- | src/video_core/engines/maxwell_3d.h | 4 | ||||
| -rw-r--r-- | src/video_core/engines/maxwell_dma.cpp | 7 | ||||
| -rw-r--r-- | src/video_core/engines/maxwell_dma.h | 3 | ||||
| -rw-r--r-- | src/video_core/gpu.cpp | 55 | ||||
| -rw-r--r-- | src/video_core/gpu.h | 10 | ||||
| -rw-r--r-- | src/yuzu/configuration/config.cpp | 3 | ||||
| -rw-r--r-- | src/yuzu/configuration/configure_graphics_advanced.cpp | 2 | ||||
| -rw-r--r-- | src/yuzu/configuration/configure_graphics_advanced.ui | 7 | ||||
| -rw-r--r-- | src/yuzu_cmd/config.cpp | 2 | ||||
| -rw-r--r-- | src/yuzu_tester/config.cpp | 2 |
20 files changed, 215 insertions, 15 deletions
diff --git a/src/core/settings.h b/src/core/settings.h index 7d09253f5..163900f0b 100644 --- a/src/core/settings.h +++ b/src/core/settings.h | |||
| @@ -446,6 +446,7 @@ struct Values { | |||
| 446 | bool use_asynchronous_gpu_emulation; | 446 | bool use_asynchronous_gpu_emulation; |
| 447 | bool use_vsync; | 447 | bool use_vsync; |
| 448 | bool force_30fps_mode; | 448 | bool force_30fps_mode; |
| 449 | bool use_fast_gpu_time; | ||
| 449 | 450 | ||
| 450 | float bg_red; | 451 | float bg_red; |
| 451 | float bg_green; | 452 | float bg_green; |
diff --git a/src/video_core/dma_pusher.cpp b/src/video_core/dma_pusher.cpp index 324dafdcd..16311f05e 100644 --- a/src/video_core/dma_pusher.cpp +++ b/src/video_core/dma_pusher.cpp | |||
| @@ -71,16 +71,22 @@ bool DmaPusher::Step() { | |||
| 71 | gpu.MemoryManager().ReadBlockUnsafe(dma_get, command_headers.data(), | 71 | gpu.MemoryManager().ReadBlockUnsafe(dma_get, command_headers.data(), |
| 72 | command_list_header.size * sizeof(u32)); | 72 | command_list_header.size * sizeof(u32)); |
| 73 | 73 | ||
| 74 | for (const CommandHeader& command_header : command_headers) { | 74 | for (std::size_t index = 0; index < command_headers.size();) { |
| 75 | 75 | const CommandHeader& command_header = command_headers[index]; | |
| 76 | // now, see if we're in the middle of a command | 76 | |
| 77 | if (dma_state.length_pending) { | 77 | if (dma_state.method_count) { |
| 78 | // Second word of long non-inc methods command - method count | ||
| 79 | dma_state.length_pending = 0; | ||
| 80 | dma_state.method_count = command_header.method_count_; | ||
| 81 | } else if (dma_state.method_count) { | ||
| 82 | // Data word of methods command | 78 | // Data word of methods command |
| 83 | CallMethod(command_header.argument); | 79 | if (dma_state.non_incrementing) { |
| 80 | const u32 max_write = static_cast<u32>( | ||
| 81 | std::min<std::size_t>(index + dma_state.method_count, command_headers.size()) - | ||
| 82 | index); | ||
| 83 | CallMultiMethod(&command_header.argument, max_write); | ||
| 84 | dma_state.method_count -= max_write; | ||
| 85 | index += max_write; | ||
| 86 | continue; | ||
| 87 | } else { | ||
| 88 | CallMethod(command_header.argument); | ||
| 89 | } | ||
| 84 | 90 | ||
| 85 | if (!dma_state.non_incrementing) { | 91 | if (!dma_state.non_incrementing) { |
| 86 | dma_state.method++; | 92 | dma_state.method++; |
| @@ -120,6 +126,7 @@ bool DmaPusher::Step() { | |||
| 120 | break; | 126 | break; |
| 121 | } | 127 | } |
| 122 | } | 128 | } |
| 129 | index++; | ||
| 123 | } | 130 | } |
| 124 | 131 | ||
| 125 | if (!non_main) { | 132 | if (!non_main) { |
| @@ -140,4 +147,9 @@ void DmaPusher::CallMethod(u32 argument) const { | |||
| 140 | gpu.CallMethod({dma_state.method, argument, dma_state.subchannel, dma_state.method_count}); | 147 | gpu.CallMethod({dma_state.method, argument, dma_state.subchannel, dma_state.method_count}); |
| 141 | } | 148 | } |
| 142 | 149 | ||
| 150 | void DmaPusher::CallMultiMethod(const u32* base_start, u32 num_methods) const { | ||
| 151 | gpu.CallMultiMethod(dma_state.method, dma_state.subchannel, base_start, num_methods, | ||
| 152 | dma_state.method_count); | ||
| 153 | } | ||
| 154 | |||
| 143 | } // namespace Tegra | 155 | } // namespace Tegra |
diff --git a/src/video_core/dma_pusher.h b/src/video_core/dma_pusher.h index d6188614a..6cef71306 100644 --- a/src/video_core/dma_pusher.h +++ b/src/video_core/dma_pusher.h | |||
| @@ -75,6 +75,7 @@ private: | |||
| 75 | void SetState(const CommandHeader& command_header); | 75 | void SetState(const CommandHeader& command_header); |
| 76 | 76 | ||
| 77 | void CallMethod(u32 argument) const; | 77 | void CallMethod(u32 argument) const; |
| 78 | void CallMultiMethod(const u32* base_start, u32 num_methods) const; | ||
| 78 | 79 | ||
| 79 | std::vector<CommandHeader> command_headers; ///< Buffer for list of commands fetched at once | 80 | std::vector<CommandHeader> command_headers; ///< Buffer for list of commands fetched at once |
| 80 | 81 | ||
diff --git a/src/video_core/engines/fermi_2d.cpp b/src/video_core/engines/fermi_2d.cpp index bace6affb..8a47614d2 100644 --- a/src/video_core/engines/fermi_2d.cpp +++ b/src/video_core/engines/fermi_2d.cpp | |||
| @@ -28,6 +28,12 @@ void Fermi2D::CallMethod(const GPU::MethodCall& method_call) { | |||
| 28 | } | 28 | } |
| 29 | } | 29 | } |
| 30 | 30 | ||
| 31 | void Fermi2D::CallMultiMethod(u32 method, const u32* base_start, u32 amount, u32 methods_pending) { | ||
| 32 | for (std::size_t i = 0; i < amount; i++) { | ||
| 33 | CallMethod({method, base_start[i], 0, methods_pending - static_cast<u32>(i)}); | ||
| 34 | } | ||
| 35 | } | ||
| 36 | |||
| 31 | static std::pair<u32, u32> DelimitLine(u32 src_1, u32 src_2, u32 dst_1, u32 dst_2, u32 src_line) { | 37 | static std::pair<u32, u32> DelimitLine(u32 src_1, u32 src_2, u32 dst_1, u32 dst_2, u32 src_line) { |
| 32 | const u32 line_a = src_2 - src_1; | 38 | const u32 line_a = src_2 - src_1; |
| 33 | const u32 line_b = dst_2 - dst_1; | 39 | const u32 line_b = dst_2 - dst_1; |
diff --git a/src/video_core/engines/fermi_2d.h b/src/video_core/engines/fermi_2d.h index dba342c70..939a5966d 100644 --- a/src/video_core/engines/fermi_2d.h +++ b/src/video_core/engines/fermi_2d.h | |||
| @@ -39,6 +39,9 @@ public: | |||
| 39 | /// Write the value to the register identified by method. | 39 | /// Write the value to the register identified by method. |
| 40 | void CallMethod(const GPU::MethodCall& method_call); | 40 | void CallMethod(const GPU::MethodCall& method_call); |
| 41 | 41 | ||
| 42 | /// Write multiple values to the register identified by method. | ||
| 43 | void CallMultiMethod(u32 method, const u32* base_start, u32 amount, u32 methods_pending); | ||
| 44 | |||
| 42 | enum class Origin : u32 { | 45 | enum class Origin : u32 { |
| 43 | Center = 0, | 46 | Center = 0, |
| 44 | Corner = 1, | 47 | Corner = 1, |
diff --git a/src/video_core/engines/kepler_compute.cpp b/src/video_core/engines/kepler_compute.cpp index 368c75a66..00a12175f 100644 --- a/src/video_core/engines/kepler_compute.cpp +++ b/src/video_core/engines/kepler_compute.cpp | |||
| @@ -51,6 +51,13 @@ void KeplerCompute::CallMethod(const GPU::MethodCall& method_call) { | |||
| 51 | } | 51 | } |
| 52 | } | 52 | } |
| 53 | 53 | ||
| 54 | void KeplerCompute::CallMultiMethod(u32 method, const u32* base_start, u32 amount, | ||
| 55 | u32 methods_pending) { | ||
| 56 | for (std::size_t i = 0; i < amount; i++) { | ||
| 57 | CallMethod({method, base_start[i], 0, methods_pending - static_cast<u32>(i)}); | ||
| 58 | } | ||
| 59 | } | ||
| 60 | |||
| 54 | Texture::FullTextureInfo KeplerCompute::GetTexture(std::size_t offset) const { | 61 | Texture::FullTextureInfo KeplerCompute::GetTexture(std::size_t offset) const { |
| 55 | const std::bitset<8> cbuf_mask = launch_description.const_buffer_enable_mask.Value(); | 62 | const std::bitset<8> cbuf_mask = launch_description.const_buffer_enable_mask.Value(); |
| 56 | ASSERT(cbuf_mask[regs.tex_cb_index]); | 63 | ASSERT(cbuf_mask[regs.tex_cb_index]); |
diff --git a/src/video_core/engines/kepler_compute.h b/src/video_core/engines/kepler_compute.h index eeb79c56f..fe55fdfd0 100644 --- a/src/video_core/engines/kepler_compute.h +++ b/src/video_core/engines/kepler_compute.h | |||
| @@ -202,6 +202,9 @@ public: | |||
| 202 | /// Write the value to the register identified by method. | 202 | /// Write the value to the register identified by method. |
| 203 | void CallMethod(const GPU::MethodCall& method_call); | 203 | void CallMethod(const GPU::MethodCall& method_call); |
| 204 | 204 | ||
| 205 | /// Write multiple values to the register identified by method. | ||
| 206 | void CallMultiMethod(u32 method, const u32* base_start, u32 amount, u32 methods_pending); | ||
| 207 | |||
| 205 | Texture::FullTextureInfo GetTexture(std::size_t offset) const; | 208 | Texture::FullTextureInfo GetTexture(std::size_t offset) const; |
| 206 | 209 | ||
| 207 | /// Given a texture handle, returns the TSC and TIC entries. | 210 | /// Given a texture handle, returns the TSC and TIC entries. |
diff --git a/src/video_core/engines/kepler_memory.cpp b/src/video_core/engines/kepler_memory.cpp index 597872e43..586ff15dc 100644 --- a/src/video_core/engines/kepler_memory.cpp +++ b/src/video_core/engines/kepler_memory.cpp | |||
| @@ -41,4 +41,11 @@ void KeplerMemory::CallMethod(const GPU::MethodCall& method_call) { | |||
| 41 | } | 41 | } |
| 42 | } | 42 | } |
| 43 | 43 | ||
| 44 | void KeplerMemory::CallMultiMethod(u32 method, const u32* base_start, u32 amount, | ||
| 45 | u32 methods_pending) { | ||
| 46 | for (std::size_t i = 0; i < amount; i++) { | ||
| 47 | CallMethod({method, base_start[i], 0, methods_pending - static_cast<u32>(i)}); | ||
| 48 | } | ||
| 49 | } | ||
| 50 | |||
| 44 | } // namespace Tegra::Engines | 51 | } // namespace Tegra::Engines |
diff --git a/src/video_core/engines/kepler_memory.h b/src/video_core/engines/kepler_memory.h index 396fb6e86..bb26fb030 100644 --- a/src/video_core/engines/kepler_memory.h +++ b/src/video_core/engines/kepler_memory.h | |||
| @@ -40,6 +40,9 @@ public: | |||
| 40 | /// Write the value to the register identified by method. | 40 | /// Write the value to the register identified by method. |
| 41 | void CallMethod(const GPU::MethodCall& method_call); | 41 | void CallMethod(const GPU::MethodCall& method_call); |
| 42 | 42 | ||
| 43 | /// Write multiple values to the register identified by method. | ||
| 44 | void CallMultiMethod(u32 method, const u32* base_start, u32 amount, u32 methods_pending); | ||
| 45 | |||
| 43 | struct Regs { | 46 | struct Regs { |
| 44 | static constexpr size_t NUM_REGS = 0x7F; | 47 | static constexpr size_t NUM_REGS = 0x7F; |
| 45 | 48 | ||
diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp index 2824ed707..39e3b66a2 100644 --- a/src/video_core/engines/maxwell_3d.cpp +++ b/src/video_core/engines/maxwell_3d.cpp | |||
| @@ -280,6 +280,58 @@ void Maxwell3D::CallMethod(const GPU::MethodCall& method_call) { | |||
| 280 | } | 280 | } |
| 281 | } | 281 | } |
| 282 | 282 | ||
| 283 | void Maxwell3D::CallMultiMethod(u32 method, const u32* base_start, u32 amount, | ||
| 284 | u32 methods_pending) { | ||
| 285 | // Methods after 0xE00 are special, they're actually triggers for some microcode that was | ||
| 286 | // uploaded to the GPU during initialization. | ||
| 287 | if (method >= MacroRegistersStart) { | ||
| 288 | // We're trying to execute a macro | ||
| 289 | if (executing_macro == 0) { | ||
| 290 | // A macro call must begin by writing the macro method's register, not its argument. | ||
| 291 | ASSERT_MSG((method % 2) == 0, | ||
| 292 | "Can't start macro execution by writing to the ARGS register"); | ||
| 293 | executing_macro = method; | ||
| 294 | } | ||
| 295 | |||
| 296 | for (std::size_t i = 0; i < amount; i++) { | ||
| 297 | macro_params.push_back(base_start[i]); | ||
| 298 | } | ||
| 299 | |||
| 300 | // Call the macro when there are no more parameters in the command buffer | ||
| 301 | if (amount == methods_pending) { | ||
| 302 | CallMacroMethod(executing_macro, macro_params.size(), macro_params.data()); | ||
| 303 | macro_params.clear(); | ||
| 304 | } | ||
| 305 | return; | ||
| 306 | } | ||
| 307 | switch (method) { | ||
| 308 | case MAXWELL3D_REG_INDEX(const_buffer.cb_data[0]): | ||
| 309 | case MAXWELL3D_REG_INDEX(const_buffer.cb_data[1]): | ||
| 310 | case MAXWELL3D_REG_INDEX(const_buffer.cb_data[2]): | ||
| 311 | case MAXWELL3D_REG_INDEX(const_buffer.cb_data[3]): | ||
| 312 | case MAXWELL3D_REG_INDEX(const_buffer.cb_data[4]): | ||
| 313 | case MAXWELL3D_REG_INDEX(const_buffer.cb_data[5]): | ||
| 314 | case MAXWELL3D_REG_INDEX(const_buffer.cb_data[6]): | ||
| 315 | case MAXWELL3D_REG_INDEX(const_buffer.cb_data[7]): | ||
| 316 | case MAXWELL3D_REG_INDEX(const_buffer.cb_data[8]): | ||
| 317 | case MAXWELL3D_REG_INDEX(const_buffer.cb_data[9]): | ||
| 318 | case MAXWELL3D_REG_INDEX(const_buffer.cb_data[10]): | ||
| 319 | case MAXWELL3D_REG_INDEX(const_buffer.cb_data[11]): | ||
| 320 | case MAXWELL3D_REG_INDEX(const_buffer.cb_data[12]): | ||
| 321 | case MAXWELL3D_REG_INDEX(const_buffer.cb_data[13]): | ||
| 322 | case MAXWELL3D_REG_INDEX(const_buffer.cb_data[14]): | ||
| 323 | case MAXWELL3D_REG_INDEX(const_buffer.cb_data[15]): { | ||
| 324 | ProcessCBMultiData(method, base_start, amount); | ||
| 325 | break; | ||
| 326 | } | ||
| 327 | default: { | ||
| 328 | for (std::size_t i = 0; i < amount; i++) { | ||
| 329 | CallMethod({method, base_start[i], 0, methods_pending - static_cast<u32>(i)}); | ||
| 330 | } | ||
| 331 | } | ||
| 332 | } | ||
| 333 | } | ||
| 334 | |||
| 283 | void Maxwell3D::StepInstance(const MMEDrawMode expected_mode, const u32 count) { | 335 | void Maxwell3D::StepInstance(const MMEDrawMode expected_mode, const u32 count) { |
| 284 | if (mme_draw.current_mode == MMEDrawMode::Undefined) { | 336 | if (mme_draw.current_mode == MMEDrawMode::Undefined) { |
| 285 | if (mme_draw.gl_begin_consume) { | 337 | if (mme_draw.gl_begin_consume) { |
| @@ -570,6 +622,28 @@ void Maxwell3D::StartCBData(u32 method) { | |||
| 570 | ProcessCBData(regs.const_buffer.cb_data[cb_data_state.id]); | 622 | ProcessCBData(regs.const_buffer.cb_data[cb_data_state.id]); |
| 571 | } | 623 | } |
| 572 | 624 | ||
| 625 | void Maxwell3D::ProcessCBMultiData(u32 method, const u32* start_base, u32 amount) { | ||
| 626 | if (cb_data_state.current != method) { | ||
| 627 | if (cb_data_state.current != null_cb_data) { | ||
| 628 | FinishCBData(); | ||
| 629 | } | ||
| 630 | constexpr u32 first_cb_data = MAXWELL3D_REG_INDEX(const_buffer.cb_data[0]); | ||
| 631 | cb_data_state.start_pos = regs.const_buffer.cb_pos; | ||
| 632 | cb_data_state.id = method - first_cb_data; | ||
| 633 | cb_data_state.current = method; | ||
| 634 | cb_data_state.counter = 0; | ||
| 635 | } | ||
| 636 | const std::size_t id = cb_data_state.id; | ||
| 637 | const std::size_t size = amount; | ||
| 638 | std::size_t i = 0; | ||
| 639 | for (; i < size; i++) { | ||
| 640 | cb_data_state.buffer[id][cb_data_state.counter] = start_base[i]; | ||
| 641 | cb_data_state.counter++; | ||
| 642 | } | ||
| 643 | // Increment the current buffer position. | ||
| 644 | regs.const_buffer.cb_pos = regs.const_buffer.cb_pos + 4 * amount; | ||
| 645 | } | ||
| 646 | |||
| 573 | void Maxwell3D::FinishCBData() { | 647 | void Maxwell3D::FinishCBData() { |
| 574 | // Write the input value to the current const buffer at the current position. | 648 | // Write the input value to the current const buffer at the current position. |
| 575 | const GPUVAddr buffer_address = regs.const_buffer.BufferAddress(); | 649 | const GPUVAddr buffer_address = regs.const_buffer.BufferAddress(); |
diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h index 7bbc6600b..3dfba8197 100644 --- a/src/video_core/engines/maxwell_3d.h +++ b/src/video_core/engines/maxwell_3d.h | |||
| @@ -1359,6 +1359,9 @@ public: | |||
| 1359 | /// Write the value to the register identified by method. | 1359 | /// Write the value to the register identified by method. |
| 1360 | void CallMethod(const GPU::MethodCall& method_call); | 1360 | void CallMethod(const GPU::MethodCall& method_call); |
| 1361 | 1361 | ||
| 1362 | /// Write multiple values to the register identified by method. | ||
| 1363 | void CallMultiMethod(u32 method, const u32* base_start, u32 amount, u32 methods_pending); | ||
| 1364 | |||
| 1362 | /// Write the value to the register identified by method. | 1365 | /// Write the value to the register identified by method. |
| 1363 | void CallMethodFromMME(const GPU::MethodCall& method_call); | 1366 | void CallMethodFromMME(const GPU::MethodCall& method_call); |
| 1364 | 1367 | ||
| @@ -1512,6 +1515,7 @@ private: | |||
| 1512 | /// Handles a write to the CB_DATA[i] register. | 1515 | /// Handles a write to the CB_DATA[i] register. |
| 1513 | void StartCBData(u32 method); | 1516 | void StartCBData(u32 method); |
| 1514 | void ProcessCBData(u32 value); | 1517 | void ProcessCBData(u32 value); |
| 1518 | void ProcessCBMultiData(u32 method, const u32* start_base, u32 amount); | ||
| 1515 | void FinishCBData(); | 1519 | void FinishCBData(); |
| 1516 | 1520 | ||
| 1517 | /// Handles a write to the CB_BIND register. | 1521 | /// Handles a write to the CB_BIND register. |
diff --git a/src/video_core/engines/maxwell_dma.cpp b/src/video_core/engines/maxwell_dma.cpp index 3bfed6ab8..6630005b0 100644 --- a/src/video_core/engines/maxwell_dma.cpp +++ b/src/video_core/engines/maxwell_dma.cpp | |||
| @@ -36,6 +36,13 @@ void MaxwellDMA::CallMethod(const GPU::MethodCall& method_call) { | |||
| 36 | #undef MAXWELLDMA_REG_INDEX | 36 | #undef MAXWELLDMA_REG_INDEX |
| 37 | } | 37 | } |
| 38 | 38 | ||
| 39 | void MaxwellDMA::CallMultiMethod(u32 method, const u32* base_start, u32 amount, | ||
| 40 | u32 methods_pending) { | ||
| 41 | for (std::size_t i = 0; i < amount; i++) { | ||
| 42 | CallMethod({method, base_start[i], 0, methods_pending - static_cast<u32>(i)}); | ||
| 43 | } | ||
| 44 | } | ||
| 45 | |||
| 39 | void MaxwellDMA::HandleCopy() { | 46 | void MaxwellDMA::HandleCopy() { |
| 40 | LOG_TRACE(HW_GPU, "Requested a DMA copy"); | 47 | LOG_TRACE(HW_GPU, "Requested a DMA copy"); |
| 41 | 48 | ||
diff --git a/src/video_core/engines/maxwell_dma.h b/src/video_core/engines/maxwell_dma.h index 4f40d1d1f..c43ed8194 100644 --- a/src/video_core/engines/maxwell_dma.h +++ b/src/video_core/engines/maxwell_dma.h | |||
| @@ -35,6 +35,9 @@ public: | |||
| 35 | /// Write the value to the register identified by method. | 35 | /// Write the value to the register identified by method. |
| 36 | void CallMethod(const GPU::MethodCall& method_call); | 36 | void CallMethod(const GPU::MethodCall& method_call); |
| 37 | 37 | ||
| 38 | /// Write multiple values to the register identified by method. | ||
| 39 | void CallMultiMethod(u32 method, const u32* base_start, u32 amount, u32 methods_pending); | ||
| 40 | |||
| 38 | struct Regs { | 41 | struct Regs { |
| 39 | static constexpr std::size_t NUM_REGS = 0x1D6; | 42 | static constexpr std::size_t NUM_REGS = 0x1D6; |
| 40 | 43 | ||
diff --git a/src/video_core/gpu.cpp b/src/video_core/gpu.cpp index 3b7572d61..b87fd873d 100644 --- a/src/video_core/gpu.cpp +++ b/src/video_core/gpu.cpp | |||
| @@ -9,6 +9,7 @@ | |||
| 9 | #include "core/core_timing_util.h" | 9 | #include "core/core_timing_util.h" |
| 10 | #include "core/frontend/emu_window.h" | 10 | #include "core/frontend/emu_window.h" |
| 11 | #include "core/memory.h" | 11 | #include "core/memory.h" |
| 12 | #include "core/settings.h" | ||
| 12 | #include "video_core/engines/fermi_2d.h" | 13 | #include "video_core/engines/fermi_2d.h" |
| 13 | #include "video_core/engines/kepler_compute.h" | 14 | #include "video_core/engines/kepler_compute.h" |
| 14 | #include "video_core/engines/kepler_memory.h" | 15 | #include "video_core/engines/kepler_memory.h" |
| @@ -154,7 +155,10 @@ u64 GPU::GetTicks() const { | |||
| 154 | constexpr u64 gpu_ticks_den = 625; | 155 | constexpr u64 gpu_ticks_den = 625; |
| 155 | 156 | ||
| 156 | const u64 cpu_ticks = system.CoreTiming().GetTicks(); | 157 | const u64 cpu_ticks = system.CoreTiming().GetTicks(); |
| 157 | const u64 nanoseconds = Core::Timing::CyclesToNs(cpu_ticks).count(); | 158 | u64 nanoseconds = Core::Timing::CyclesToNs(cpu_ticks).count(); |
| 159 | if (Settings::values.use_fast_gpu_time) { | ||
| 160 | nanoseconds /= 256; | ||
| 161 | } | ||
| 158 | const u64 nanoseconds_num = nanoseconds / gpu_ticks_den; | 162 | const u64 nanoseconds_num = nanoseconds / gpu_ticks_den; |
| 159 | const u64 nanoseconds_rem = nanoseconds % gpu_ticks_den; | 163 | const u64 nanoseconds_rem = nanoseconds % gpu_ticks_den; |
| 160 | return nanoseconds_num * gpu_ticks_num + (nanoseconds_rem * gpu_ticks_num) / gpu_ticks_den; | 164 | return nanoseconds_num * gpu_ticks_num + (nanoseconds_rem * gpu_ticks_num) / gpu_ticks_den; |
| @@ -209,16 +213,32 @@ void GPU::CallMethod(const MethodCall& method_call) { | |||
| 209 | 213 | ||
| 210 | ASSERT(method_call.subchannel < bound_engines.size()); | 214 | ASSERT(method_call.subchannel < bound_engines.size()); |
| 211 | 215 | ||
| 212 | if (ExecuteMethodOnEngine(method_call)) { | 216 | if (ExecuteMethodOnEngine(method_call.method)) { |
| 213 | CallEngineMethod(method_call); | 217 | CallEngineMethod(method_call); |
| 214 | } else { | 218 | } else { |
| 215 | CallPullerMethod(method_call); | 219 | CallPullerMethod(method_call); |
| 216 | } | 220 | } |
| 217 | } | 221 | } |
| 218 | 222 | ||
| 219 | bool GPU::ExecuteMethodOnEngine(const MethodCall& method_call) { | 223 | void GPU::CallMultiMethod(u32 method, u32 subchannel, const u32* base_start, u32 amount, |
| 220 | const auto method = static_cast<BufferMethods>(method_call.method); | 224 | u32 methods_pending) { |
| 221 | return method >= BufferMethods::NonPullerMethods; | 225 | LOG_TRACE(HW_GPU, "Processing method {:08X} on subchannel {}", method, subchannel); |
| 226 | |||
| 227 | ASSERT(subchannel < bound_engines.size()); | ||
| 228 | |||
| 229 | if (ExecuteMethodOnEngine(method)) { | ||
| 230 | CallEngineMultiMethod(method, subchannel, base_start, amount, methods_pending); | ||
| 231 | } else { | ||
| 232 | for (std::size_t i = 0; i < amount; i++) { | ||
| 233 | CallPullerMethod( | ||
| 234 | {method, base_start[i], subchannel, methods_pending - static_cast<u32>(i)}); | ||
| 235 | } | ||
| 236 | } | ||
| 237 | } | ||
| 238 | |||
| 239 | bool GPU::ExecuteMethodOnEngine(u32 method) { | ||
| 240 | const auto buffer_method = static_cast<BufferMethods>(method); | ||
| 241 | return buffer_method >= BufferMethods::NonPullerMethods; | ||
| 222 | } | 242 | } |
| 223 | 243 | ||
| 224 | void GPU::CallPullerMethod(const MethodCall& method_call) { | 244 | void GPU::CallPullerMethod(const MethodCall& method_call) { |
| @@ -298,6 +318,31 @@ void GPU::CallEngineMethod(const MethodCall& method_call) { | |||
| 298 | } | 318 | } |
| 299 | } | 319 | } |
| 300 | 320 | ||
| 321 | void GPU::CallEngineMultiMethod(u32 method, u32 subchannel, const u32* base_start, u32 amount, | ||
| 322 | u32 methods_pending) { | ||
| 323 | const EngineID engine = bound_engines[subchannel]; | ||
| 324 | |||
| 325 | switch (engine) { | ||
| 326 | case EngineID::FERMI_TWOD_A: | ||
| 327 | fermi_2d->CallMultiMethod(method, base_start, amount, methods_pending); | ||
| 328 | break; | ||
| 329 | case EngineID::MAXWELL_B: | ||
| 330 | maxwell_3d->CallMultiMethod(method, base_start, amount, methods_pending); | ||
| 331 | break; | ||
| 332 | case EngineID::KEPLER_COMPUTE_B: | ||
| 333 | kepler_compute->CallMultiMethod(method, base_start, amount, methods_pending); | ||
| 334 | break; | ||
| 335 | case EngineID::MAXWELL_DMA_COPY_A: | ||
| 336 | maxwell_dma->CallMultiMethod(method, base_start, amount, methods_pending); | ||
| 337 | break; | ||
| 338 | case EngineID::KEPLER_INLINE_TO_MEMORY_B: | ||
| 339 | kepler_memory->CallMultiMethod(method, base_start, amount, methods_pending); | ||
| 340 | break; | ||
| 341 | default: | ||
| 342 | UNIMPLEMENTED_MSG("Unimplemented engine"); | ||
| 343 | } | ||
| 344 | } | ||
| 345 | |||
| 301 | void GPU::ProcessBindMethod(const MethodCall& method_call) { | 346 | void GPU::ProcessBindMethod(const MethodCall& method_call) { |
| 302 | // Bind the current subchannel to the desired engine id. | 347 | // Bind the current subchannel to the desired engine id. |
| 303 | LOG_DEBUG(HW_GPU, "Binding subchannel {} to engine {}", method_call.subchannel, | 348 | LOG_DEBUG(HW_GPU, "Binding subchannel {} to engine {}", method_call.subchannel, |
diff --git a/src/video_core/gpu.h b/src/video_core/gpu.h index 5e3eb94e9..dd51c95b7 100644 --- a/src/video_core/gpu.h +++ b/src/video_core/gpu.h | |||
| @@ -155,6 +155,10 @@ public: | |||
| 155 | /// Calls a GPU method. | 155 | /// Calls a GPU method. |
| 156 | void CallMethod(const MethodCall& method_call); | 156 | void CallMethod(const MethodCall& method_call); |
| 157 | 157 | ||
| 158 | /// Calls a GPU multivalue method. | ||
| 159 | void CallMultiMethod(u32 method, u32 subchannel, const u32* base_start, u32 amount, | ||
| 160 | u32 methods_pending); | ||
| 161 | |||
| 158 | /// Flush all current written commands into the host GPU for execution. | 162 | /// Flush all current written commands into the host GPU for execution. |
| 159 | void FlushCommands(); | 163 | void FlushCommands(); |
| 160 | /// Synchronizes CPU writes with Host GPU memory. | 164 | /// Synchronizes CPU writes with Host GPU memory. |
| @@ -309,8 +313,12 @@ private: | |||
| 309 | /// Calls a GPU engine method. | 313 | /// Calls a GPU engine method. |
| 310 | void CallEngineMethod(const MethodCall& method_call); | 314 | void CallEngineMethod(const MethodCall& method_call); |
| 311 | 315 | ||
| 316 | /// Calls a GPU engine multivalue method. | ||
| 317 | void CallEngineMultiMethod(u32 method, u32 subchannel, const u32* base_start, u32 amount, | ||
| 318 | u32 methods_pending); | ||
| 319 | |||
| 312 | /// Determines where the method should be executed. | 320 | /// Determines where the method should be executed. |
| 313 | bool ExecuteMethodOnEngine(const MethodCall& method_call); | 321 | bool ExecuteMethodOnEngine(u32 method); |
| 314 | 322 | ||
| 315 | protected: | 323 | protected: |
| 316 | std::unique_ptr<Tegra::DmaPusher> dma_pusher; | 324 | std::unique_ptr<Tegra::DmaPusher> dma_pusher; |
diff --git a/src/yuzu/configuration/config.cpp b/src/yuzu/configuration/config.cpp index 6182ca0e0..a44eed047 100644 --- a/src/yuzu/configuration/config.cpp +++ b/src/yuzu/configuration/config.cpp | |||
| @@ -643,6 +643,8 @@ void Config::ReadRendererValues() { | |||
| 643 | Settings::values.use_asynchronous_gpu_emulation = | 643 | Settings::values.use_asynchronous_gpu_emulation = |
| 644 | ReadSetting(QStringLiteral("use_asynchronous_gpu_emulation"), false).toBool(); | 644 | ReadSetting(QStringLiteral("use_asynchronous_gpu_emulation"), false).toBool(); |
| 645 | Settings::values.use_vsync = ReadSetting(QStringLiteral("use_vsync"), true).toBool(); | 645 | Settings::values.use_vsync = ReadSetting(QStringLiteral("use_vsync"), true).toBool(); |
| 646 | Settings::values.use_fast_gpu_time = | ||
| 647 | ReadSetting(QStringLiteral("use_fast_gpu_time"), true).toBool(); | ||
| 646 | Settings::values.force_30fps_mode = | 648 | Settings::values.force_30fps_mode = |
| 647 | ReadSetting(QStringLiteral("force_30fps_mode"), false).toBool(); | 649 | ReadSetting(QStringLiteral("force_30fps_mode"), false).toBool(); |
| 648 | 650 | ||
| @@ -1084,6 +1086,7 @@ void Config::SaveRendererValues() { | |||
| 1084 | WriteSetting(QStringLiteral("use_asynchronous_gpu_emulation"), | 1086 | WriteSetting(QStringLiteral("use_asynchronous_gpu_emulation"), |
| 1085 | Settings::values.use_asynchronous_gpu_emulation, false); | 1087 | Settings::values.use_asynchronous_gpu_emulation, false); |
| 1086 | WriteSetting(QStringLiteral("use_vsync"), Settings::values.use_vsync, true); | 1088 | WriteSetting(QStringLiteral("use_vsync"), Settings::values.use_vsync, true); |
| 1089 | WriteSetting(QStringLiteral("use_fast_gpu_time"), Settings::values.use_fast_gpu_time, true); | ||
| 1087 | WriteSetting(QStringLiteral("force_30fps_mode"), Settings::values.force_30fps_mode, false); | 1090 | WriteSetting(QStringLiteral("force_30fps_mode"), Settings::values.force_30fps_mode, false); |
| 1088 | 1091 | ||
| 1089 | // Cast to double because Qt's written float values are not human-readable | 1092 | // Cast to double because Qt's written float values are not human-readable |
diff --git a/src/yuzu/configuration/configure_graphics_advanced.cpp b/src/yuzu/configuration/configure_graphics_advanced.cpp index 0a3f47339..5bb2ae555 100644 --- a/src/yuzu/configuration/configure_graphics_advanced.cpp +++ b/src/yuzu/configuration/configure_graphics_advanced.cpp | |||
| @@ -22,6 +22,7 @@ void ConfigureGraphicsAdvanced::SetConfiguration() { | |||
| 22 | ui->gpu_accuracy->setCurrentIndex(static_cast<int>(Settings::values.gpu_accuracy)); | 22 | ui->gpu_accuracy->setCurrentIndex(static_cast<int>(Settings::values.gpu_accuracy)); |
| 23 | ui->use_vsync->setEnabled(runtime_lock); | 23 | ui->use_vsync->setEnabled(runtime_lock); |
| 24 | ui->use_vsync->setChecked(Settings::values.use_vsync); | 24 | ui->use_vsync->setChecked(Settings::values.use_vsync); |
| 25 | ui->use_fast_gpu_time->setChecked(Settings::values.use_fast_gpu_time); | ||
| 25 | ui->force_30fps_mode->setEnabled(runtime_lock); | 26 | ui->force_30fps_mode->setEnabled(runtime_lock); |
| 26 | ui->force_30fps_mode->setChecked(Settings::values.force_30fps_mode); | 27 | ui->force_30fps_mode->setChecked(Settings::values.force_30fps_mode); |
| 27 | ui->anisotropic_filtering_combobox->setEnabled(runtime_lock); | 28 | ui->anisotropic_filtering_combobox->setEnabled(runtime_lock); |
| @@ -32,6 +33,7 @@ void ConfigureGraphicsAdvanced::ApplyConfiguration() { | |||
| 32 | auto gpu_accuracy = static_cast<Settings::GPUAccuracy>(ui->gpu_accuracy->currentIndex()); | 33 | auto gpu_accuracy = static_cast<Settings::GPUAccuracy>(ui->gpu_accuracy->currentIndex()); |
| 33 | Settings::values.gpu_accuracy = gpu_accuracy; | 34 | Settings::values.gpu_accuracy = gpu_accuracy; |
| 34 | Settings::values.use_vsync = ui->use_vsync->isChecked(); | 35 | Settings::values.use_vsync = ui->use_vsync->isChecked(); |
| 36 | Settings::values.use_fast_gpu_time = ui->use_fast_gpu_time->isChecked(); | ||
| 35 | Settings::values.force_30fps_mode = ui->force_30fps_mode->isChecked(); | 37 | Settings::values.force_30fps_mode = ui->force_30fps_mode->isChecked(); |
| 36 | Settings::values.max_anisotropy = ui->anisotropic_filtering_combobox->currentIndex(); | 38 | Settings::values.max_anisotropy = ui->anisotropic_filtering_combobox->currentIndex(); |
| 37 | } | 39 | } |
diff --git a/src/yuzu/configuration/configure_graphics_advanced.ui b/src/yuzu/configuration/configure_graphics_advanced.ui index 0c7b383e0..770b80c50 100644 --- a/src/yuzu/configuration/configure_graphics_advanced.ui +++ b/src/yuzu/configuration/configure_graphics_advanced.ui | |||
| @@ -70,6 +70,13 @@ | |||
| 70 | </widget> | 70 | </widget> |
| 71 | </item> | 71 | </item> |
| 72 | <item> | 72 | <item> |
| 73 | <widget class="QCheckBox" name="use_fast_gpu_time"> | ||
| 74 | <property name="text"> | ||
| 75 | <string>Use Fast GPU Time</string> | ||
| 76 | </property> | ||
| 77 | </widget> | ||
| 78 | </item> | ||
| 79 | <item> | ||
| 73 | <layout class="QHBoxLayout" name="horizontalLayout_1"> | 80 | <layout class="QHBoxLayout" name="horizontalLayout_1"> |
| 74 | <item> | 81 | <item> |
| 75 | <widget class="QLabel" name="af_label"> | 82 | <widget class="QLabel" name="af_label"> |
diff --git a/src/yuzu_cmd/config.cpp b/src/yuzu_cmd/config.cpp index d1ac354bf..8476a5a16 100644 --- a/src/yuzu_cmd/config.cpp +++ b/src/yuzu_cmd/config.cpp | |||
| @@ -394,6 +394,8 @@ void Config::ReadValues() { | |||
| 394 | sdl2_config->GetBoolean("Renderer", "use_asynchronous_gpu_emulation", false); | 394 | sdl2_config->GetBoolean("Renderer", "use_asynchronous_gpu_emulation", false); |
| 395 | Settings::values.use_vsync = | 395 | Settings::values.use_vsync = |
| 396 | static_cast<u16>(sdl2_config->GetInteger("Renderer", "use_vsync", 1)); | 396 | static_cast<u16>(sdl2_config->GetInteger("Renderer", "use_vsync", 1)); |
| 397 | Settings::values.use_fast_gpu_time = | ||
| 398 | sdl2_config->GetBoolean("Renderer", "use_fast_gpu_time", true); | ||
| 397 | 399 | ||
| 398 | Settings::values.bg_red = static_cast<float>(sdl2_config->GetReal("Renderer", "bg_red", 0.0)); | 400 | Settings::values.bg_red = static_cast<float>(sdl2_config->GetReal("Renderer", "bg_red", 0.0)); |
| 399 | Settings::values.bg_green = | 401 | Settings::values.bg_green = |
diff --git a/src/yuzu_tester/config.cpp b/src/yuzu_tester/config.cpp index c0325cc3c..3be58b15d 100644 --- a/src/yuzu_tester/config.cpp +++ b/src/yuzu_tester/config.cpp | |||
| @@ -130,6 +130,8 @@ void Config::ReadValues() { | |||
| 130 | Settings::values.gpu_accuracy = static_cast<Settings::GPUAccuracy>(gpu_accuracy_level); | 130 | Settings::values.gpu_accuracy = static_cast<Settings::GPUAccuracy>(gpu_accuracy_level); |
| 131 | Settings::values.use_asynchronous_gpu_emulation = | 131 | Settings::values.use_asynchronous_gpu_emulation = |
| 132 | sdl2_config->GetBoolean("Renderer", "use_asynchronous_gpu_emulation", false); | 132 | sdl2_config->GetBoolean("Renderer", "use_asynchronous_gpu_emulation", false); |
| 133 | Settings::values.use_fast_gpu_time = | ||
| 134 | sdl2_config->GetBoolean("Renderer", "use_fast_gpu_time", true); | ||
| 133 | 135 | ||
| 134 | Settings::values.bg_red = static_cast<float>(sdl2_config->GetReal("Renderer", "bg_red", 0.0)); | 136 | Settings::values.bg_red = static_cast<float>(sdl2_config->GetReal("Renderer", "bg_red", 0.0)); |
| 135 | Settings::values.bg_green = | 137 | Settings::values.bg_green = |