diff options
| author | 2020-05-05 17:12:42 -0400 | |
|---|---|---|
| committer | 2020-05-05 17:12:42 -0400 | |
| commit | 41682e0888f7cb640787ab8d9a7e5c0ebb83d8fa (patch) | |
| tree | 64c61fda0aaa076cd54c46e8c271e67888c79c61 | |
| parent | Merge pull request #3881 from lioncash/mem-warning (diff) | |
| parent | Update src/video_core/gpu.cpp (diff) | |
| download | yuzu-41682e0888f7cb640787ab8d9a7e5c0ebb83d8fa.tar.gz yuzu-41682e0888f7cb640787ab8d9a7e5c0ebb83d8fa.tar.xz yuzu-41682e0888f7cb640787ab8d9a7e5c0ebb83d8fa.zip | |
Merge pull request #3815 from FernandoS27/command-list-2
GPU: More optimizations to GPU Command List Processing and DMA Copy Optimizations
Diffstat (limited to '')
| -rw-r--r-- | src/video_core/CMakeLists.txt | 1 | ||||
| -rw-r--r-- | src/video_core/dma_pusher.cpp | 20 | ||||
| -rw-r--r-- | src/video_core/dma_pusher.h | 11 | ||||
| -rw-r--r-- | src/video_core/engines/engine_interface.h | 22 | ||||
| -rw-r--r-- | src/video_core/engines/fermi_2d.cpp | 10 | ||||
| -rw-r--r-- | src/video_core/engines/fermi_2d.h | 8 | ||||
| -rw-r--r-- | src/video_core/engines/kepler_compute.cpp | 13 | ||||
| -rw-r--r-- | src/video_core/engines/kepler_compute.h | 8 | ||||
| -rw-r--r-- | src/video_core/engines/kepler_memory.cpp | 13 | ||||
| -rw-r--r-- | src/video_core/engines/kepler_memory.h | 8 | ||||
| -rw-r--r-- | src/video_core/engines/maxwell_3d.cpp | 28 | ||||
| -rw-r--r-- | src/video_core/engines/maxwell_3d.h | 10 | ||||
| -rw-r--r-- | src/video_core/engines/maxwell_dma.cpp | 50 | ||||
| -rw-r--r-- | src/video_core/engines/maxwell_dma.h | 8 | ||||
| -rw-r--r-- | src/video_core/gpu.cpp | 34 | ||||
| -rw-r--r-- | src/video_core/macro_interpreter.cpp | 2 | ||||
| -rw-r--r-- | src/video_core/textures/decoders.cpp | 14 | ||||
| -rw-r--r-- | src/video_core/textures/decoders.h | 4 |
18 files changed, 198 insertions, 66 deletions
diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt index ff53282c9..d23c53843 100644 --- a/src/video_core/CMakeLists.txt +++ b/src/video_core/CMakeLists.txt | |||
| @@ -8,6 +8,7 @@ add_library(video_core STATIC | |||
| 8 | dma_pusher.h | 8 | dma_pusher.h |
| 9 | engines/const_buffer_engine_interface.h | 9 | engines/const_buffer_engine_interface.h |
| 10 | engines/const_buffer_info.h | 10 | engines/const_buffer_info.h |
| 11 | engines/engine_interface.h | ||
| 11 | engines/engine_upload.cpp | 12 | engines/engine_upload.cpp |
| 12 | engines/engine_upload.h | 13 | engines/engine_upload.h |
| 13 | engines/fermi_2d.cpp | 14 | engines/fermi_2d.cpp |
diff --git a/src/video_core/dma_pusher.cpp b/src/video_core/dma_pusher.cpp index 16311f05e..bdc023d54 100644 --- a/src/video_core/dma_pusher.cpp +++ b/src/video_core/dma_pusher.cpp | |||
| @@ -27,6 +27,8 @@ void DmaPusher::DispatchCalls() { | |||
| 27 | 27 | ||
| 28 | dma_pushbuffer_subindex = 0; | 28 | dma_pushbuffer_subindex = 0; |
| 29 | 29 | ||
| 30 | dma_state.is_last_call = true; | ||
| 31 | |||
| 30 | while (system.IsPoweredOn()) { | 32 | while (system.IsPoweredOn()) { |
| 31 | if (!Step()) { | 33 | if (!Step()) { |
| 32 | break; | 34 | break; |
| @@ -82,9 +84,11 @@ bool DmaPusher::Step() { | |||
| 82 | index); | 84 | index); |
| 83 | CallMultiMethod(&command_header.argument, max_write); | 85 | CallMultiMethod(&command_header.argument, max_write); |
| 84 | dma_state.method_count -= max_write; | 86 | dma_state.method_count -= max_write; |
| 87 | dma_state.is_last_call = true; | ||
| 85 | index += max_write; | 88 | index += max_write; |
| 86 | continue; | 89 | continue; |
| 87 | } else { | 90 | } else { |
| 91 | dma_state.is_last_call = dma_state.method_count <= 1; | ||
| 88 | CallMethod(command_header.argument); | 92 | CallMethod(command_header.argument); |
| 89 | } | 93 | } |
| 90 | 94 | ||
| @@ -144,12 +148,22 @@ void DmaPusher::SetState(const CommandHeader& command_header) { | |||
| 144 | } | 148 | } |
| 145 | 149 | ||
| 146 | void DmaPusher::CallMethod(u32 argument) const { | 150 | void DmaPusher::CallMethod(u32 argument) const { |
| 147 | gpu.CallMethod({dma_state.method, argument, dma_state.subchannel, dma_state.method_count}); | 151 | if (dma_state.method < non_puller_methods) { |
| 152 | gpu.CallMethod({dma_state.method, argument, dma_state.subchannel, dma_state.method_count}); | ||
| 153 | } else { | ||
| 154 | subchannels[dma_state.subchannel]->CallMethod(dma_state.method, argument, | ||
| 155 | dma_state.is_last_call); | ||
| 156 | } | ||
| 148 | } | 157 | } |
| 149 | 158 | ||
| 150 | void DmaPusher::CallMultiMethod(const u32* base_start, u32 num_methods) const { | 159 | void DmaPusher::CallMultiMethod(const u32* base_start, u32 num_methods) const { |
| 151 | gpu.CallMultiMethod(dma_state.method, dma_state.subchannel, base_start, num_methods, | 160 | if (dma_state.method < non_puller_methods) { |
| 152 | dma_state.method_count); | 161 | gpu.CallMultiMethod(dma_state.method, dma_state.subchannel, base_start, num_methods, |
| 162 | dma_state.method_count); | ||
| 163 | } else { | ||
| 164 | subchannels[dma_state.subchannel]->CallMultiMethod(dma_state.method, base_start, | ||
| 165 | num_methods, dma_state.method_count); | ||
| 166 | } | ||
| 153 | } | 167 | } |
| 154 | 168 | ||
| 155 | } // namespace Tegra | 169 | } // namespace Tegra |
diff --git a/src/video_core/dma_pusher.h b/src/video_core/dma_pusher.h index 6cef71306..e8b714e94 100644 --- a/src/video_core/dma_pusher.h +++ b/src/video_core/dma_pusher.h | |||
| @@ -4,11 +4,13 @@ | |||
| 4 | 4 | ||
| 5 | #pragma once | 5 | #pragma once |
| 6 | 6 | ||
| 7 | #include <array> | ||
| 7 | #include <vector> | 8 | #include <vector> |
| 8 | #include <queue> | 9 | #include <queue> |
| 9 | 10 | ||
| 10 | #include "common/bit_field.h" | 11 | #include "common/bit_field.h" |
| 11 | #include "common/common_types.h" | 12 | #include "common/common_types.h" |
| 13 | #include "video_core/engines/engine_interface.h" | ||
| 12 | 14 | ||
| 13 | namespace Core { | 15 | namespace Core { |
| 14 | class System; | 16 | class System; |
| @@ -69,7 +71,13 @@ public: | |||
| 69 | 71 | ||
| 70 | void DispatchCalls(); | 72 | void DispatchCalls(); |
| 71 | 73 | ||
| 74 | void BindSubchannel(Tegra::Engines::EngineInterface* engine, u32 subchannel_id) { | ||
| 75 | subchannels[subchannel_id] = engine; | ||
| 76 | } | ||
| 77 | |||
| 72 | private: | 78 | private: |
| 79 | static constexpr u32 non_puller_methods = 0x40; | ||
| 80 | static constexpr u32 max_subchannels = 8; | ||
| 73 | bool Step(); | 81 | bool Step(); |
| 74 | 82 | ||
| 75 | void SetState(const CommandHeader& command_header); | 83 | void SetState(const CommandHeader& command_header); |
| @@ -88,6 +96,7 @@ private: | |||
| 88 | u32 method_count; ///< Current method count | 96 | u32 method_count; ///< Current method count |
| 89 | u32 length_pending; ///< Large NI command length pending | 97 | u32 length_pending; ///< Large NI command length pending |
| 90 | bool non_incrementing; ///< Current command's NI flag | 98 | bool non_incrementing; ///< Current command's NI flag |
| 99 | bool is_last_call; | ||
| 91 | }; | 100 | }; |
| 92 | 101 | ||
| 93 | DmaState dma_state{}; | 102 | DmaState dma_state{}; |
| @@ -96,6 +105,8 @@ private: | |||
| 96 | GPUVAddr dma_mget{}; ///< main pushbuffer last read address | 105 | GPUVAddr dma_mget{}; ///< main pushbuffer last read address |
| 97 | bool ib_enable{true}; ///< IB mode enabled | 106 | bool ib_enable{true}; ///< IB mode enabled |
| 98 | 107 | ||
| 108 | std::array<Tegra::Engines::EngineInterface*, max_subchannels> subchannels{}; | ||
| 109 | |||
| 99 | GPU& gpu; | 110 | GPU& gpu; |
| 100 | Core::System& system; | 111 | Core::System& system; |
| 101 | }; | 112 | }; |
diff --git a/src/video_core/engines/engine_interface.h b/src/video_core/engines/engine_interface.h new file mode 100644 index 000000000..18a9db7e6 --- /dev/null +++ b/src/video_core/engines/engine_interface.h | |||
| @@ -0,0 +1,22 @@ | |||
| 1 | // Copyright 2020 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include <type_traits> | ||
| 8 | #include "common/common_types.h" | ||
| 9 | |||
| 10 | namespace Tegra::Engines { | ||
| 11 | |||
| 12 | class EngineInterface { | ||
| 13 | public: | ||
| 14 | /// Write the value to the register identified by method. | ||
| 15 | virtual void CallMethod(u32 method, u32 method_argument, bool is_last_call) = 0; | ||
| 16 | |||
| 17 | /// Write multiple values to the register identified by method. | ||
| 18 | virtual void CallMultiMethod(u32 method, const u32* base_start, u32 amount, | ||
| 19 | u32 methods_pending) = 0; | ||
| 20 | }; | ||
| 21 | |||
| 22 | } // namespace Tegra::Engines | ||
diff --git a/src/video_core/engines/fermi_2d.cpp b/src/video_core/engines/fermi_2d.cpp index 8a47614d2..ff10ff40d 100644 --- a/src/video_core/engines/fermi_2d.cpp +++ b/src/video_core/engines/fermi_2d.cpp | |||
| @@ -12,13 +12,13 @@ namespace Tegra::Engines { | |||
| 12 | 12 | ||
| 13 | Fermi2D::Fermi2D(VideoCore::RasterizerInterface& rasterizer) : rasterizer{rasterizer} {} | 13 | Fermi2D::Fermi2D(VideoCore::RasterizerInterface& rasterizer) : rasterizer{rasterizer} {} |
| 14 | 14 | ||
| 15 | void Fermi2D::CallMethod(const GPU::MethodCall& method_call) { | 15 | void Fermi2D::CallMethod(u32 method, u32 method_argument, bool is_last_call) { |
| 16 | ASSERT_MSG(method_call.method < Regs::NUM_REGS, | 16 | ASSERT_MSG(method < Regs::NUM_REGS, |
| 17 | "Invalid Fermi2D register, increase the size of the Regs structure"); | 17 | "Invalid Fermi2D register, increase the size of the Regs structure"); |
| 18 | 18 | ||
| 19 | regs.reg_array[method_call.method] = method_call.argument; | 19 | regs.reg_array[method] = method_argument; |
| 20 | 20 | ||
| 21 | switch (method_call.method) { | 21 | switch (method) { |
| 22 | // Trigger the surface copy on the last register write. This is blit_src_y, but this is 64-bit, | 22 | // Trigger the surface copy on the last register write. This is blit_src_y, but this is 64-bit, |
| 23 | // so trigger on the second 32-bit write. | 23 | // so trigger on the second 32-bit write. |
| 24 | case FERMI2D_REG_INDEX(blit_src_y) + 1: { | 24 | case FERMI2D_REG_INDEX(blit_src_y) + 1: { |
| @@ -30,7 +30,7 @@ void Fermi2D::CallMethod(const GPU::MethodCall& method_call) { | |||
| 30 | 30 | ||
| 31 | void Fermi2D::CallMultiMethod(u32 method, const u32* base_start, u32 amount, u32 methods_pending) { | 31 | void Fermi2D::CallMultiMethod(u32 method, const u32* base_start, u32 amount, u32 methods_pending) { |
| 32 | for (std::size_t i = 0; i < amount; i++) { | 32 | for (std::size_t i = 0; i < amount; i++) { |
| 33 | CallMethod({method, base_start[i], 0, methods_pending - static_cast<u32>(i)}); | 33 | CallMethod(method, base_start[i], methods_pending - static_cast<u32>(i) <= 1); |
| 34 | } | 34 | } |
| 35 | } | 35 | } |
| 36 | 36 | ||
diff --git a/src/video_core/engines/fermi_2d.h b/src/video_core/engines/fermi_2d.h index 939a5966d..8f37d053f 100644 --- a/src/video_core/engines/fermi_2d.h +++ b/src/video_core/engines/fermi_2d.h | |||
| @@ -10,6 +10,7 @@ | |||
| 10 | #include "common/common_funcs.h" | 10 | #include "common/common_funcs.h" |
| 11 | #include "common/common_types.h" | 11 | #include "common/common_types.h" |
| 12 | #include "common/math_util.h" | 12 | #include "common/math_util.h" |
| 13 | #include "video_core/engines/engine_interface.h" | ||
| 13 | #include "video_core/gpu.h" | 14 | #include "video_core/gpu.h" |
| 14 | 15 | ||
| 15 | namespace Tegra { | 16 | namespace Tegra { |
| @@ -31,16 +32,17 @@ namespace Tegra::Engines { | |||
| 31 | #define FERMI2D_REG_INDEX(field_name) \ | 32 | #define FERMI2D_REG_INDEX(field_name) \ |
| 32 | (offsetof(Tegra::Engines::Fermi2D::Regs, field_name) / sizeof(u32)) | 33 | (offsetof(Tegra::Engines::Fermi2D::Regs, field_name) / sizeof(u32)) |
| 33 | 34 | ||
| 34 | class Fermi2D final { | 35 | class Fermi2D final : public EngineInterface { |
| 35 | public: | 36 | public: |
| 36 | explicit Fermi2D(VideoCore::RasterizerInterface& rasterizer); | 37 | explicit Fermi2D(VideoCore::RasterizerInterface& rasterizer); |
| 37 | ~Fermi2D() = default; | 38 | ~Fermi2D() = default; |
| 38 | 39 | ||
| 39 | /// Write the value to the register identified by method. | 40 | /// Write the value to the register identified by method. |
| 40 | void CallMethod(const GPU::MethodCall& method_call); | 41 | void CallMethod(u32 method, u32 method_argument, bool is_last_call) override; |
| 41 | 42 | ||
| 42 | /// Write multiple values to the register identified by method. | 43 | /// Write multiple values to the register identified by method. |
| 43 | void CallMultiMethod(u32 method, const u32* base_start, u32 amount, u32 methods_pending); | 44 | void CallMultiMethod(u32 method, const u32* base_start, u32 amount, |
| 45 | u32 methods_pending) override; | ||
| 44 | 46 | ||
| 45 | enum class Origin : u32 { | 47 | enum class Origin : u32 { |
| 46 | Center = 0, | 48 | Center = 0, |
diff --git a/src/video_core/engines/kepler_compute.cpp b/src/video_core/engines/kepler_compute.cpp index 00a12175f..f6237fc6a 100644 --- a/src/video_core/engines/kepler_compute.cpp +++ b/src/video_core/engines/kepler_compute.cpp | |||
| @@ -24,20 +24,19 @@ KeplerCompute::KeplerCompute(Core::System& system, VideoCore::RasterizerInterfac | |||
| 24 | 24 | ||
| 25 | KeplerCompute::~KeplerCompute() = default; | 25 | KeplerCompute::~KeplerCompute() = default; |
| 26 | 26 | ||
| 27 | void KeplerCompute::CallMethod(const GPU::MethodCall& method_call) { | 27 | void KeplerCompute::CallMethod(u32 method, u32 method_argument, bool is_last_call) { |
| 28 | ASSERT_MSG(method_call.method < Regs::NUM_REGS, | 28 | ASSERT_MSG(method < Regs::NUM_REGS, |
| 29 | "Invalid KeplerCompute register, increase the size of the Regs structure"); | 29 | "Invalid KeplerCompute register, increase the size of the Regs structure"); |
| 30 | 30 | ||
| 31 | regs.reg_array[method_call.method] = method_call.argument; | 31 | regs.reg_array[method] = method_argument; |
| 32 | 32 | ||
| 33 | switch (method_call.method) { | 33 | switch (method) { |
| 34 | case KEPLER_COMPUTE_REG_INDEX(exec_upload): { | 34 | case KEPLER_COMPUTE_REG_INDEX(exec_upload): { |
| 35 | upload_state.ProcessExec(regs.exec_upload.linear != 0); | 35 | upload_state.ProcessExec(regs.exec_upload.linear != 0); |
| 36 | break; | 36 | break; |
| 37 | } | 37 | } |
| 38 | case KEPLER_COMPUTE_REG_INDEX(data_upload): { | 38 | case KEPLER_COMPUTE_REG_INDEX(data_upload): { |
| 39 | const bool is_last_call = method_call.IsLastCall(); | 39 | upload_state.ProcessData(method_argument, is_last_call); |
| 40 | upload_state.ProcessData(method_call.argument, is_last_call); | ||
| 41 | if (is_last_call) { | 40 | if (is_last_call) { |
| 42 | system.GPU().Maxwell3D().OnMemoryWrite(); | 41 | system.GPU().Maxwell3D().OnMemoryWrite(); |
| 43 | } | 42 | } |
| @@ -54,7 +53,7 @@ void KeplerCompute::CallMethod(const GPU::MethodCall& method_call) { | |||
| 54 | void KeplerCompute::CallMultiMethod(u32 method, const u32* base_start, u32 amount, | 53 | void KeplerCompute::CallMultiMethod(u32 method, const u32* base_start, u32 amount, |
| 55 | u32 methods_pending) { | 54 | u32 methods_pending) { |
| 56 | for (std::size_t i = 0; i < amount; i++) { | 55 | for (std::size_t i = 0; i < amount; i++) { |
| 57 | CallMethod({method, base_start[i], 0, methods_pending - static_cast<u32>(i)}); | 56 | CallMethod(method, base_start[i], methods_pending - static_cast<u32>(i) <= 1); |
| 58 | } | 57 | } |
| 59 | } | 58 | } |
| 60 | 59 | ||
diff --git a/src/video_core/engines/kepler_compute.h b/src/video_core/engines/kepler_compute.h index fe55fdfd0..18ceedfaf 100644 --- a/src/video_core/engines/kepler_compute.h +++ b/src/video_core/engines/kepler_compute.h | |||
| @@ -11,6 +11,7 @@ | |||
| 11 | #include "common/common_funcs.h" | 11 | #include "common/common_funcs.h" |
| 12 | #include "common/common_types.h" | 12 | #include "common/common_types.h" |
| 13 | #include "video_core/engines/const_buffer_engine_interface.h" | 13 | #include "video_core/engines/const_buffer_engine_interface.h" |
| 14 | #include "video_core/engines/engine_interface.h" | ||
| 14 | #include "video_core/engines/engine_upload.h" | 15 | #include "video_core/engines/engine_upload.h" |
| 15 | #include "video_core/engines/shader_type.h" | 16 | #include "video_core/engines/shader_type.h" |
| 16 | #include "video_core/gpu.h" | 17 | #include "video_core/gpu.h" |
| @@ -39,7 +40,7 @@ namespace Tegra::Engines { | |||
| 39 | #define KEPLER_COMPUTE_REG_INDEX(field_name) \ | 40 | #define KEPLER_COMPUTE_REG_INDEX(field_name) \ |
| 40 | (offsetof(Tegra::Engines::KeplerCompute::Regs, field_name) / sizeof(u32)) | 41 | (offsetof(Tegra::Engines::KeplerCompute::Regs, field_name) / sizeof(u32)) |
| 41 | 42 | ||
| 42 | class KeplerCompute final : public ConstBufferEngineInterface { | 43 | class KeplerCompute final : public ConstBufferEngineInterface, public EngineInterface { |
| 43 | public: | 44 | public: |
| 44 | explicit KeplerCompute(Core::System& system, VideoCore::RasterizerInterface& rasterizer, | 45 | explicit KeplerCompute(Core::System& system, VideoCore::RasterizerInterface& rasterizer, |
| 45 | MemoryManager& memory_manager); | 46 | MemoryManager& memory_manager); |
| @@ -200,10 +201,11 @@ public: | |||
| 200 | "KeplerCompute LaunchParams has wrong size"); | 201 | "KeplerCompute LaunchParams has wrong size"); |
| 201 | 202 | ||
| 202 | /// Write the value to the register identified by method. | 203 | /// Write the value to the register identified by method. |
| 203 | void CallMethod(const GPU::MethodCall& method_call); | 204 | void CallMethod(u32 method, u32 method_argument, bool is_last_call) override; |
| 204 | 205 | ||
| 205 | /// Write multiple values to the register identified by method. | 206 | /// Write multiple values to the register identified by method. |
| 206 | void CallMultiMethod(u32 method, const u32* base_start, u32 amount, u32 methods_pending); | 207 | void CallMultiMethod(u32 method, const u32* base_start, u32 amount, |
| 208 | u32 methods_pending) override; | ||
| 207 | 209 | ||
| 208 | Texture::FullTextureInfo GetTexture(std::size_t offset) const; | 210 | Texture::FullTextureInfo GetTexture(std::size_t offset) const; |
| 209 | 211 | ||
diff --git a/src/video_core/engines/kepler_memory.cpp b/src/video_core/engines/kepler_memory.cpp index 586ff15dc..dc71b2eec 100644 --- a/src/video_core/engines/kepler_memory.cpp +++ b/src/video_core/engines/kepler_memory.cpp | |||
| @@ -19,20 +19,19 @@ KeplerMemory::KeplerMemory(Core::System& system, MemoryManager& memory_manager) | |||
| 19 | 19 | ||
| 20 | KeplerMemory::~KeplerMemory() = default; | 20 | KeplerMemory::~KeplerMemory() = default; |
| 21 | 21 | ||
| 22 | void KeplerMemory::CallMethod(const GPU::MethodCall& method_call) { | 22 | void KeplerMemory::CallMethod(u32 method, u32 method_argument, bool is_last_call) { |
| 23 | ASSERT_MSG(method_call.method < Regs::NUM_REGS, | 23 | ASSERT_MSG(method < Regs::NUM_REGS, |
| 24 | "Invalid KeplerMemory register, increase the size of the Regs structure"); | 24 | "Invalid KeplerMemory register, increase the size of the Regs structure"); |
| 25 | 25 | ||
| 26 | regs.reg_array[method_call.method] = method_call.argument; | 26 | regs.reg_array[method] = method_argument; |
| 27 | 27 | ||
| 28 | switch (method_call.method) { | 28 | switch (method) { |
| 29 | case KEPLERMEMORY_REG_INDEX(exec): { | 29 | case KEPLERMEMORY_REG_INDEX(exec): { |
| 30 | upload_state.ProcessExec(regs.exec.linear != 0); | 30 | upload_state.ProcessExec(regs.exec.linear != 0); |
| 31 | break; | 31 | break; |
| 32 | } | 32 | } |
| 33 | case KEPLERMEMORY_REG_INDEX(data): { | 33 | case KEPLERMEMORY_REG_INDEX(data): { |
| 34 | const bool is_last_call = method_call.IsLastCall(); | 34 | upload_state.ProcessData(method_argument, is_last_call); |
| 35 | upload_state.ProcessData(method_call.argument, is_last_call); | ||
| 36 | if (is_last_call) { | 35 | if (is_last_call) { |
| 37 | system.GPU().Maxwell3D().OnMemoryWrite(); | 36 | system.GPU().Maxwell3D().OnMemoryWrite(); |
| 38 | } | 37 | } |
| @@ -44,7 +43,7 @@ void KeplerMemory::CallMethod(const GPU::MethodCall& method_call) { | |||
| 44 | void KeplerMemory::CallMultiMethod(u32 method, const u32* base_start, u32 amount, | 43 | void KeplerMemory::CallMultiMethod(u32 method, const u32* base_start, u32 amount, |
| 45 | u32 methods_pending) { | 44 | u32 methods_pending) { |
| 46 | for (std::size_t i = 0; i < amount; i++) { | 45 | for (std::size_t i = 0; i < amount; i++) { |
| 47 | CallMethod({method, base_start[i], 0, methods_pending - static_cast<u32>(i)}); | 46 | CallMethod(method, base_start[i], methods_pending - static_cast<u32>(i) <= 1); |
| 48 | } | 47 | } |
| 49 | } | 48 | } |
| 50 | 49 | ||
diff --git a/src/video_core/engines/kepler_memory.h b/src/video_core/engines/kepler_memory.h index bb26fb030..5b7f71a00 100644 --- a/src/video_core/engines/kepler_memory.h +++ b/src/video_core/engines/kepler_memory.h | |||
| @@ -10,6 +10,7 @@ | |||
| 10 | #include "common/bit_field.h" | 10 | #include "common/bit_field.h" |
| 11 | #include "common/common_funcs.h" | 11 | #include "common/common_funcs.h" |
| 12 | #include "common/common_types.h" | 12 | #include "common/common_types.h" |
| 13 | #include "video_core/engines/engine_interface.h" | ||
| 13 | #include "video_core/engines/engine_upload.h" | 14 | #include "video_core/engines/engine_upload.h" |
| 14 | #include "video_core/gpu.h" | 15 | #include "video_core/gpu.h" |
| 15 | 16 | ||
| @@ -32,16 +33,17 @@ namespace Tegra::Engines { | |||
| 32 | #define KEPLERMEMORY_REG_INDEX(field_name) \ | 33 | #define KEPLERMEMORY_REG_INDEX(field_name) \ |
| 33 | (offsetof(Tegra::Engines::KeplerMemory::Regs, field_name) / sizeof(u32)) | 34 | (offsetof(Tegra::Engines::KeplerMemory::Regs, field_name) / sizeof(u32)) |
| 34 | 35 | ||
| 35 | class KeplerMemory final { | 36 | class KeplerMemory final : public EngineInterface { |
| 36 | public: | 37 | public: |
| 37 | KeplerMemory(Core::System& system, MemoryManager& memory_manager); | 38 | KeplerMemory(Core::System& system, MemoryManager& memory_manager); |
| 38 | ~KeplerMemory(); | 39 | ~KeplerMemory(); |
| 39 | 40 | ||
| 40 | /// Write the value to the register identified by method. | 41 | /// Write the value to the register identified by method. |
| 41 | void CallMethod(const GPU::MethodCall& method_call); | 42 | void CallMethod(u32 method, u32 method_argument, bool is_last_call) override; |
| 42 | 43 | ||
| 43 | /// Write multiple values to the register identified by method. | 44 | /// Write multiple values to the register identified by method. |
| 44 | void CallMultiMethod(u32 method, const u32* base_start, u32 amount, u32 methods_pending); | 45 | void CallMultiMethod(u32 method, const u32* base_start, u32 amount, |
| 46 | u32 methods_pending) override; | ||
| 45 | 47 | ||
| 46 | struct Regs { | 48 | struct Regs { |
| 47 | static constexpr size_t NUM_REGS = 0x7F; | 49 | static constexpr size_t NUM_REGS = 0x7F; |
diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp index 7db055ea0..33936e209 100644 --- a/src/video_core/engines/maxwell_3d.cpp +++ b/src/video_core/engines/maxwell_3d.cpp | |||
| @@ -125,12 +125,10 @@ void Maxwell3D::CallMacroMethod(u32 method, std::size_t num_parameters, const u3 | |||
| 125 | } | 125 | } |
| 126 | } | 126 | } |
| 127 | 127 | ||
| 128 | void Maxwell3D::CallMethod(const GPU::MethodCall& method_call) { | 128 | void Maxwell3D::CallMethod(u32 method, u32 method_argument, bool is_last_call) { |
| 129 | const u32 method = method_call.method; | ||
| 130 | |||
| 131 | if (method == cb_data_state.current) { | 129 | if (method == cb_data_state.current) { |
| 132 | regs.reg_array[method] = method_call.argument; | 130 | regs.reg_array[method] = method_argument; |
| 133 | ProcessCBData(method_call.argument); | 131 | ProcessCBData(method_argument); |
| 134 | return; | 132 | return; |
| 135 | } else if (cb_data_state.current != null_cb_data) { | 133 | } else if (cb_data_state.current != null_cb_data) { |
| 136 | FinishCBData(); | 134 | FinishCBData(); |
| @@ -153,10 +151,10 @@ void Maxwell3D::CallMethod(const GPU::MethodCall& method_call) { | |||
| 153 | executing_macro = method; | 151 | executing_macro = method; |
| 154 | } | 152 | } |
| 155 | 153 | ||
| 156 | macro_params.push_back(method_call.argument); | 154 | macro_params.push_back(method_argument); |
| 157 | 155 | ||
| 158 | // Call the macro when there are no more parameters in the command buffer | 156 | // Call the macro when there are no more parameters in the command buffer |
| 159 | if (method_call.IsLastCall()) { | 157 | if (is_last_call) { |
| 160 | CallMacroMethod(executing_macro, macro_params.size(), macro_params.data()); | 158 | CallMacroMethod(executing_macro, macro_params.size(), macro_params.data()); |
| 161 | macro_params.clear(); | 159 | macro_params.clear(); |
| 162 | } | 160 | } |
| @@ -166,7 +164,7 @@ void Maxwell3D::CallMethod(const GPU::MethodCall& method_call) { | |||
| 166 | ASSERT_MSG(method < Regs::NUM_REGS, | 164 | ASSERT_MSG(method < Regs::NUM_REGS, |
| 167 | "Invalid Maxwell3D register, increase the size of the Regs structure"); | 165 | "Invalid Maxwell3D register, increase the size of the Regs structure"); |
| 168 | 166 | ||
| 169 | u32 arg = method_call.argument; | 167 | u32 arg = method_argument; |
| 170 | // Keep track of the register value in shadow_state when requested. | 168 | // Keep track of the register value in shadow_state when requested. |
| 171 | if (shadow_state.shadow_ram_control == Regs::ShadowRamControl::Track || | 169 | if (shadow_state.shadow_ram_control == Regs::ShadowRamControl::Track || |
| 172 | shadow_state.shadow_ram_control == Regs::ShadowRamControl::TrackWithFilter) { | 170 | shadow_state.shadow_ram_control == Regs::ShadowRamControl::TrackWithFilter) { |
| @@ -189,7 +187,7 @@ void Maxwell3D::CallMethod(const GPU::MethodCall& method_call) { | |||
| 189 | break; | 187 | break; |
| 190 | } | 188 | } |
| 191 | case MAXWELL3D_REG_INDEX(shadow_ram_control): { | 189 | case MAXWELL3D_REG_INDEX(shadow_ram_control): { |
| 192 | shadow_state.shadow_ram_control = static_cast<Regs::ShadowRamControl>(method_call.argument); | 190 | shadow_state.shadow_ram_control = static_cast<Regs::ShadowRamControl>(method_argument); |
| 193 | break; | 191 | break; |
| 194 | } | 192 | } |
| 195 | case MAXWELL3D_REG_INDEX(macros.data): { | 193 | case MAXWELL3D_REG_INDEX(macros.data): { |
| @@ -272,7 +270,6 @@ void Maxwell3D::CallMethod(const GPU::MethodCall& method_call) { | |||
| 272 | break; | 270 | break; |
| 273 | } | 271 | } |
| 274 | case MAXWELL3D_REG_INDEX(data_upload): { | 272 | case MAXWELL3D_REG_INDEX(data_upload): { |
| 275 | const bool is_last_call = method_call.IsLastCall(); | ||
| 276 | upload_state.ProcessData(arg, is_last_call); | 273 | upload_state.ProcessData(arg, is_last_call); |
| 277 | if (is_last_call) { | 274 | if (is_last_call) { |
| 278 | OnMemoryWrite(); | 275 | OnMemoryWrite(); |
| @@ -330,7 +327,7 @@ void Maxwell3D::CallMultiMethod(u32 method, const u32* base_start, u32 amount, | |||
| 330 | } | 327 | } |
| 331 | default: { | 328 | default: { |
| 332 | for (std::size_t i = 0; i < amount; i++) { | 329 | for (std::size_t i = 0; i < amount; i++) { |
| 333 | CallMethod({method, base_start[i], 0, methods_pending - static_cast<u32>(i)}); | 330 | CallMethod(method, base_start[i], methods_pending - static_cast<u32>(i) <= 1); |
| 334 | } | 331 | } |
| 335 | } | 332 | } |
| 336 | } | 333 | } |
| @@ -360,16 +357,15 @@ void Maxwell3D::StepInstance(const MMEDrawMode expected_mode, const u32 count) { | |||
| 360 | StepInstance(expected_mode, count); | 357 | StepInstance(expected_mode, count); |
| 361 | } | 358 | } |
| 362 | 359 | ||
| 363 | void Maxwell3D::CallMethodFromMME(const GPU::MethodCall& method_call) { | 360 | void Maxwell3D::CallMethodFromMME(u32 method, u32 method_argument) { |
| 364 | const u32 method = method_call.method; | ||
| 365 | if (mme_inline[method]) { | 361 | if (mme_inline[method]) { |
| 366 | regs.reg_array[method] = method_call.argument; | 362 | regs.reg_array[method] = method_argument; |
| 367 | if (method == MAXWELL3D_REG_INDEX(vertex_buffer.count) || | 363 | if (method == MAXWELL3D_REG_INDEX(vertex_buffer.count) || |
| 368 | method == MAXWELL3D_REG_INDEX(index_array.count)) { | 364 | method == MAXWELL3D_REG_INDEX(index_array.count)) { |
| 369 | const MMEDrawMode expected_mode = method == MAXWELL3D_REG_INDEX(vertex_buffer.count) | 365 | const MMEDrawMode expected_mode = method == MAXWELL3D_REG_INDEX(vertex_buffer.count) |
| 370 | ? MMEDrawMode::Array | 366 | ? MMEDrawMode::Array |
| 371 | : MMEDrawMode::Indexed; | 367 | : MMEDrawMode::Indexed; |
| 372 | StepInstance(expected_mode, method_call.argument); | 368 | StepInstance(expected_mode, method_argument); |
| 373 | } else if (method == MAXWELL3D_REG_INDEX(draw.vertex_begin_gl)) { | 369 | } else if (method == MAXWELL3D_REG_INDEX(draw.vertex_begin_gl)) { |
| 374 | mme_draw.instance_mode = | 370 | mme_draw.instance_mode = |
| 375 | (regs.draw.instance_next != 0) || (regs.draw.instance_cont != 0); | 371 | (regs.draw.instance_next != 0) || (regs.draw.instance_cont != 0); |
| @@ -381,7 +377,7 @@ void Maxwell3D::CallMethodFromMME(const GPU::MethodCall& method_call) { | |||
| 381 | if (mme_draw.current_mode != MMEDrawMode::Undefined) { | 377 | if (mme_draw.current_mode != MMEDrawMode::Undefined) { |
| 382 | FlushMMEInlineDraw(); | 378 | FlushMMEInlineDraw(); |
| 383 | } | 379 | } |
| 384 | CallMethod(method_call); | 380 | CallMethod(method, method_argument, true); |
| 385 | } | 381 | } |
| 386 | } | 382 | } |
| 387 | 383 | ||
diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h index 864924ff3..1a5df05ce 100644 --- a/src/video_core/engines/maxwell_3d.h +++ b/src/video_core/engines/maxwell_3d.h | |||
| @@ -19,6 +19,7 @@ | |||
| 19 | #include "common/math_util.h" | 19 | #include "common/math_util.h" |
| 20 | #include "video_core/engines/const_buffer_engine_interface.h" | 20 | #include "video_core/engines/const_buffer_engine_interface.h" |
| 21 | #include "video_core/engines/const_buffer_info.h" | 21 | #include "video_core/engines/const_buffer_info.h" |
| 22 | #include "video_core/engines/engine_interface.h" | ||
| 22 | #include "video_core/engines/engine_upload.h" | 23 | #include "video_core/engines/engine_upload.h" |
| 23 | #include "video_core/engines/shader_type.h" | 24 | #include "video_core/engines/shader_type.h" |
| 24 | #include "video_core/gpu.h" | 25 | #include "video_core/gpu.h" |
| @@ -48,7 +49,7 @@ namespace Tegra::Engines { | |||
| 48 | #define MAXWELL3D_REG_INDEX(field_name) \ | 49 | #define MAXWELL3D_REG_INDEX(field_name) \ |
| 49 | (offsetof(Tegra::Engines::Maxwell3D::Regs, field_name) / sizeof(u32)) | 50 | (offsetof(Tegra::Engines::Maxwell3D::Regs, field_name) / sizeof(u32)) |
| 50 | 51 | ||
| 51 | class Maxwell3D final : public ConstBufferEngineInterface { | 52 | class Maxwell3D final : public ConstBufferEngineInterface, public EngineInterface { |
| 52 | public: | 53 | public: |
| 53 | explicit Maxwell3D(Core::System& system, VideoCore::RasterizerInterface& rasterizer, | 54 | explicit Maxwell3D(Core::System& system, VideoCore::RasterizerInterface& rasterizer, |
| 54 | MemoryManager& memory_manager); | 55 | MemoryManager& memory_manager); |
| @@ -1360,13 +1361,14 @@ public: | |||
| 1360 | u32 GetRegisterValue(u32 method) const; | 1361 | u32 GetRegisterValue(u32 method) const; |
| 1361 | 1362 | ||
| 1362 | /// Write the value to the register identified by method. | 1363 | /// Write the value to the register identified by method. |
| 1363 | void CallMethod(const GPU::MethodCall& method_call); | 1364 | void CallMethod(u32 method, u32 method_argument, bool is_last_call) override; |
| 1364 | 1365 | ||
| 1365 | /// Write multiple values to the register identified by method. | 1366 | /// Write multiple values to the register identified by method. |
| 1366 | void CallMultiMethod(u32 method, const u32* base_start, u32 amount, u32 methods_pending); | 1367 | void CallMultiMethod(u32 method, const u32* base_start, u32 amount, |
| 1368 | u32 methods_pending) override; | ||
| 1367 | 1369 | ||
| 1368 | /// Write the value to the register identified by method. | 1370 | /// Write the value to the register identified by method. |
| 1369 | void CallMethodFromMME(const GPU::MethodCall& method_call); | 1371 | void CallMethodFromMME(u32 method, u32 method_argument); |
| 1370 | 1372 | ||
| 1371 | void FlushMMEInlineDraw(); | 1373 | void FlushMMEInlineDraw(); |
| 1372 | 1374 | ||
diff --git a/src/video_core/engines/maxwell_dma.cpp b/src/video_core/engines/maxwell_dma.cpp index 6630005b0..01d7df405 100644 --- a/src/video_core/engines/maxwell_dma.cpp +++ b/src/video_core/engines/maxwell_dma.cpp | |||
| @@ -17,16 +17,16 @@ namespace Tegra::Engines { | |||
| 17 | MaxwellDMA::MaxwellDMA(Core::System& system, MemoryManager& memory_manager) | 17 | MaxwellDMA::MaxwellDMA(Core::System& system, MemoryManager& memory_manager) |
| 18 | : system{system}, memory_manager{memory_manager} {} | 18 | : system{system}, memory_manager{memory_manager} {} |
| 19 | 19 | ||
| 20 | void MaxwellDMA::CallMethod(const GPU::MethodCall& method_call) { | 20 | void MaxwellDMA::CallMethod(u32 method, u32 method_argument, bool is_last_call) { |
| 21 | ASSERT_MSG(method_call.method < Regs::NUM_REGS, | 21 | ASSERT_MSG(method < Regs::NUM_REGS, |
| 22 | "Invalid MaxwellDMA register, increase the size of the Regs structure"); | 22 | "Invalid MaxwellDMA register, increase the size of the Regs structure"); |
| 23 | 23 | ||
| 24 | regs.reg_array[method_call.method] = method_call.argument; | 24 | regs.reg_array[method] = method_argument; |
| 25 | 25 | ||
| 26 | #define MAXWELLDMA_REG_INDEX(field_name) \ | 26 | #define MAXWELLDMA_REG_INDEX(field_name) \ |
| 27 | (offsetof(Tegra::Engines::MaxwellDMA::Regs, field_name) / sizeof(u32)) | 27 | (offsetof(Tegra::Engines::MaxwellDMA::Regs, field_name) / sizeof(u32)) |
| 28 | 28 | ||
| 29 | switch (method_call.method) { | 29 | switch (method) { |
| 30 | case MAXWELLDMA_REG_INDEX(exec): { | 30 | case MAXWELLDMA_REG_INDEX(exec): { |
| 31 | HandleCopy(); | 31 | HandleCopy(); |
| 32 | break; | 32 | break; |
| @@ -39,7 +39,7 @@ void MaxwellDMA::CallMethod(const GPU::MethodCall& method_call) { | |||
| 39 | void MaxwellDMA::CallMultiMethod(u32 method, const u32* base_start, u32 amount, | 39 | void MaxwellDMA::CallMultiMethod(u32 method, const u32* base_start, u32 amount, |
| 40 | u32 methods_pending) { | 40 | u32 methods_pending) { |
| 41 | for (std::size_t i = 0; i < amount; i++) { | 41 | for (std::size_t i = 0; i < amount; i++) { |
| 42 | CallMethod({method, base_start[i], 0, methods_pending - static_cast<u32>(i)}); | 42 | CallMethod(method, base_start[i], methods_pending - static_cast<u32>(i) <= 1); |
| 43 | } | 43 | } |
| 44 | } | 44 | } |
| 45 | 45 | ||
| @@ -90,7 +90,47 @@ void MaxwellDMA::HandleCopy() { | |||
| 90 | ASSERT(regs.exec.enable_2d == 1); | 90 | ASSERT(regs.exec.enable_2d == 1); |
| 91 | 91 | ||
| 92 | if (regs.exec.is_dst_linear && !regs.exec.is_src_linear) { | 92 | if (regs.exec.is_dst_linear && !regs.exec.is_src_linear) { |
| 93 | |||
| 93 | ASSERT(regs.src_params.BlockDepth() == 0); | 94 | ASSERT(regs.src_params.BlockDepth() == 0); |
| 95 | // Optimized path for micro copies. | ||
| 96 | if (regs.dst_pitch * regs.y_count < Texture::GetGOBSize() && regs.dst_pitch <= 64) { | ||
| 97 | const u32 bytes_per_pixel = regs.dst_pitch / regs.x_count; | ||
| 98 | const std::size_t src_size = Texture::GetGOBSize(); | ||
| 99 | const std::size_t dst_size = regs.dst_pitch * regs.y_count; | ||
| 100 | u32 pos_x = regs.src_params.pos_x; | ||
| 101 | u32 pos_y = regs.src_params.pos_y; | ||
| 102 | const u64 offset = | ||
| 103 | Texture::GetGOBOffset(regs.src_params.size_x, regs.src_params.size_y, pos_x, pos_y, | ||
| 104 | regs.src_params.BlockDepth(), bytes_per_pixel); | ||
| 105 | const u32 x_in_gob = 64 / bytes_per_pixel; | ||
| 106 | pos_x = pos_x % x_in_gob; | ||
| 107 | pos_y = pos_y % 8; | ||
| 108 | |||
| 109 | if (read_buffer.size() < src_size) { | ||
| 110 | read_buffer.resize(src_size); | ||
| 111 | } | ||
| 112 | |||
| 113 | if (write_buffer.size() < dst_size) { | ||
| 114 | write_buffer.resize(dst_size); | ||
| 115 | } | ||
| 116 | |||
| 117 | if (Settings::IsGPULevelExtreme()) { | ||
| 118 | memory_manager.ReadBlock(source + offset, read_buffer.data(), src_size); | ||
| 119 | memory_manager.ReadBlock(dest, write_buffer.data(), dst_size); | ||
| 120 | } else { | ||
| 121 | memory_manager.ReadBlockUnsafe(source + offset, read_buffer.data(), src_size); | ||
| 122 | memory_manager.ReadBlockUnsafe(dest, write_buffer.data(), dst_size); | ||
| 123 | } | ||
| 124 | |||
| 125 | Texture::UnswizzleSubrect(regs.x_count, regs.y_count, regs.dst_pitch, | ||
| 126 | regs.src_params.size_x, bytes_per_pixel, read_buffer.data(), | ||
| 127 | write_buffer.data(), regs.src_params.BlockHeight(), pos_x, | ||
| 128 | pos_y); | ||
| 129 | |||
| 130 | memory_manager.WriteBlock(dest, write_buffer.data(), dst_size); | ||
| 131 | |||
| 132 | return; | ||
| 133 | } | ||
| 94 | // If the input is tiled and the output is linear, deswizzle the input and copy it over. | 134 | // If the input is tiled and the output is linear, deswizzle the input and copy it over. |
| 95 | const u32 bytes_per_pixel = regs.dst_pitch / regs.x_count; | 135 | const u32 bytes_per_pixel = regs.dst_pitch / regs.x_count; |
| 96 | const std::size_t src_size = Texture::CalculateSize( | 136 | const std::size_t src_size = Texture::CalculateSize( |
diff --git a/src/video_core/engines/maxwell_dma.h b/src/video_core/engines/maxwell_dma.h index c43ed8194..502dd8509 100644 --- a/src/video_core/engines/maxwell_dma.h +++ b/src/video_core/engines/maxwell_dma.h | |||
| @@ -10,6 +10,7 @@ | |||
| 10 | #include "common/bit_field.h" | 10 | #include "common/bit_field.h" |
| 11 | #include "common/common_funcs.h" | 11 | #include "common/common_funcs.h" |
| 12 | #include "common/common_types.h" | 12 | #include "common/common_types.h" |
| 13 | #include "video_core/engines/engine_interface.h" | ||
| 13 | #include "video_core/gpu.h" | 14 | #include "video_core/gpu.h" |
| 14 | 15 | ||
| 15 | namespace Core { | 16 | namespace Core { |
| @@ -27,16 +28,17 @@ namespace Tegra::Engines { | |||
| 27 | * https://github.com/envytools/envytools/blob/master/rnndb/fifo/gk104_copy.xml | 28 | * https://github.com/envytools/envytools/blob/master/rnndb/fifo/gk104_copy.xml |
| 28 | */ | 29 | */ |
| 29 | 30 | ||
| 30 | class MaxwellDMA final { | 31 | class MaxwellDMA final : public EngineInterface { |
| 31 | public: | 32 | public: |
| 32 | explicit MaxwellDMA(Core::System& system, MemoryManager& memory_manager); | 33 | explicit MaxwellDMA(Core::System& system, MemoryManager& memory_manager); |
| 33 | ~MaxwellDMA() = default; | 34 | ~MaxwellDMA() = default; |
| 34 | 35 | ||
| 35 | /// Write the value to the register identified by method. | 36 | /// Write the value to the register identified by method. |
| 36 | void CallMethod(const GPU::MethodCall& method_call); | 37 | void CallMethod(u32 method, u32 method_argument, bool is_last_call) override; |
| 37 | 38 | ||
| 38 | /// Write multiple values to the register identified by method. | 39 | /// Write multiple values to the register identified by method. |
| 39 | void CallMultiMethod(u32 method, const u32* base_start, u32 amount, u32 methods_pending); | 40 | void CallMultiMethod(u32 method, const u32* base_start, u32 amount, |
| 41 | u32 methods_pending) override; | ||
| 40 | 42 | ||
| 41 | struct Regs { | 43 | struct Regs { |
| 42 | static constexpr std::size_t NUM_REGS = 0x1D6; | 44 | static constexpr std::size_t NUM_REGS = 0x1D6; |
diff --git a/src/video_core/gpu.cpp b/src/video_core/gpu.cpp index b87fd873d..8eb017f65 100644 --- a/src/video_core/gpu.cpp +++ b/src/video_core/gpu.cpp | |||
| @@ -299,19 +299,21 @@ void GPU::CallEngineMethod(const MethodCall& method_call) { | |||
| 299 | 299 | ||
| 300 | switch (engine) { | 300 | switch (engine) { |
| 301 | case EngineID::FERMI_TWOD_A: | 301 | case EngineID::FERMI_TWOD_A: |
| 302 | fermi_2d->CallMethod(method_call); | 302 | fermi_2d->CallMethod(method_call.method, method_call.argument, method_call.IsLastCall()); |
| 303 | break; | 303 | break; |
| 304 | case EngineID::MAXWELL_B: | 304 | case EngineID::MAXWELL_B: |
| 305 | maxwell_3d->CallMethod(method_call); | 305 | maxwell_3d->CallMethod(method_call.method, method_call.argument, method_call.IsLastCall()); |
| 306 | break; | 306 | break; |
| 307 | case EngineID::KEPLER_COMPUTE_B: | 307 | case EngineID::KEPLER_COMPUTE_B: |
| 308 | kepler_compute->CallMethod(method_call); | 308 | kepler_compute->CallMethod(method_call.method, method_call.argument, |
| 309 | method_call.IsLastCall()); | ||
| 309 | break; | 310 | break; |
| 310 | case EngineID::MAXWELL_DMA_COPY_A: | 311 | case EngineID::MAXWELL_DMA_COPY_A: |
| 311 | maxwell_dma->CallMethod(method_call); | 312 | maxwell_dma->CallMethod(method_call.method, method_call.argument, method_call.IsLastCall()); |
| 312 | break; | 313 | break; |
| 313 | case EngineID::KEPLER_INLINE_TO_MEMORY_B: | 314 | case EngineID::KEPLER_INLINE_TO_MEMORY_B: |
| 314 | kepler_memory->CallMethod(method_call); | 315 | kepler_memory->CallMethod(method_call.method, method_call.argument, |
| 316 | method_call.IsLastCall()); | ||
| 315 | break; | 317 | break; |
| 316 | default: | 318 | default: |
| 317 | UNIMPLEMENTED_MSG("Unimplemented engine"); | 319 | UNIMPLEMENTED_MSG("Unimplemented engine"); |
| @@ -347,7 +349,27 @@ void GPU::ProcessBindMethod(const MethodCall& method_call) { | |||
| 347 | // Bind the current subchannel to the desired engine id. | 349 | // Bind the current subchannel to the desired engine id. |
| 348 | LOG_DEBUG(HW_GPU, "Binding subchannel {} to engine {}", method_call.subchannel, | 350 | LOG_DEBUG(HW_GPU, "Binding subchannel {} to engine {}", method_call.subchannel, |
| 349 | method_call.argument); | 351 | method_call.argument); |
| 350 | bound_engines[method_call.subchannel] = static_cast<EngineID>(method_call.argument); | 352 | const auto engine_id = static_cast<EngineID>(method_call.argument); |
| 353 | bound_engines[method_call.subchannel] = static_cast<EngineID>(engine_id); | ||
| 354 | switch (engine_id) { | ||
| 355 | case EngineID::FERMI_TWOD_A: | ||
| 356 | dma_pusher->BindSubchannel(fermi_2d.get(), method_call.subchannel); | ||
| 357 | break; | ||
| 358 | case EngineID::MAXWELL_B: | ||
| 359 | dma_pusher->BindSubchannel(maxwell_3d.get(), method_call.subchannel); | ||
| 360 | break; | ||
| 361 | case EngineID::KEPLER_COMPUTE_B: | ||
| 362 | dma_pusher->BindSubchannel(kepler_compute.get(), method_call.subchannel); | ||
| 363 | break; | ||
| 364 | case EngineID::MAXWELL_DMA_COPY_A: | ||
| 365 | dma_pusher->BindSubchannel(maxwell_dma.get(), method_call.subchannel); | ||
| 366 | break; | ||
| 367 | case EngineID::KEPLER_INLINE_TO_MEMORY_B: | ||
| 368 | dma_pusher->BindSubchannel(kepler_memory.get(), method_call.subchannel); | ||
| 369 | break; | ||
| 370 | default: | ||
| 371 | UNIMPLEMENTED_MSG("Unimplemented engine {:04X}", static_cast<u32>(engine_id)); | ||
| 372 | } | ||
| 351 | } | 373 | } |
| 352 | 374 | ||
| 353 | void GPU::ProcessSemaphoreTriggerMethod() { | 375 | void GPU::ProcessSemaphoreTriggerMethod() { |
diff --git a/src/video_core/macro_interpreter.cpp b/src/video_core/macro_interpreter.cpp index 42031d80a..947364928 100644 --- a/src/video_core/macro_interpreter.cpp +++ b/src/video_core/macro_interpreter.cpp | |||
| @@ -328,7 +328,7 @@ void MacroInterpreter::SetMethodAddress(u32 address) { | |||
| 328 | } | 328 | } |
| 329 | 329 | ||
| 330 | void MacroInterpreter::Send(u32 value) { | 330 | void MacroInterpreter::Send(u32 value) { |
| 331 | maxwell3d.CallMethodFromMME({method_address.address, value}); | 331 | maxwell3d.CallMethodFromMME(method_address.address, value); |
| 332 | // Increment the method address by the method increment. | 332 | // Increment the method address by the method increment. |
| 333 | method_address.address.Assign(method_address.address.Value() + | 333 | method_address.address.Assign(method_address.address.Value() + |
| 334 | method_address.increment.Value()); | 334 | method_address.increment.Value()); |
diff --git a/src/video_core/textures/decoders.cpp b/src/video_core/textures/decoders.cpp index fae8638ec..548e4c3fe 100644 --- a/src/video_core/textures/decoders.cpp +++ b/src/video_core/textures/decoders.cpp | |||
| @@ -382,4 +382,18 @@ std::size_t CalculateSize(bool tiled, u32 bytes_per_pixel, u32 width, u32 height | |||
| 382 | } | 382 | } |
| 383 | } | 383 | } |
| 384 | 384 | ||
| 385 | u64 GetGOBOffset(u32 width, u32 height, u32 dst_x, u32 dst_y, u32 block_height, | ||
| 386 | u32 bytes_per_pixel) { | ||
| 387 | auto div_ceil = [](const u32 x, const u32 y) { return ((x + y - 1) / y); }; | ||
| 388 | const u32 gobs_in_block = 1 << block_height; | ||
| 389 | const u32 y_blocks = gob_size_y << block_height; | ||
| 390 | const u32 x_per_gob = gob_size_x / bytes_per_pixel; | ||
| 391 | const u32 x_blocks = div_ceil(width, x_per_gob); | ||
| 392 | const u32 block_size = gob_size * gobs_in_block; | ||
| 393 | const u32 stride = block_size * x_blocks; | ||
| 394 | const u32 base = (dst_y / y_blocks) * stride + (dst_x / x_per_gob) * block_size; | ||
| 395 | const u32 relative_y = dst_y % y_blocks; | ||
| 396 | return base + (relative_y / gob_size_y) * gob_size; | ||
| 397 | } | ||
| 398 | |||
| 385 | } // namespace Tegra::Texture | 399 | } // namespace Tegra::Texture |
diff --git a/src/video_core/textures/decoders.h b/src/video_core/textures/decoders.h index 9f2d6d308..06f3ebf87 100644 --- a/src/video_core/textures/decoders.h +++ b/src/video_core/textures/decoders.h | |||
| @@ -59,4 +59,8 @@ void UnswizzleSubrect(u32 subrect_width, u32 subrect_height, u32 dest_pitch, u32 | |||
| 59 | void SwizzleKepler(u32 width, u32 height, u32 dst_x, u32 dst_y, u32 block_height, | 59 | void SwizzleKepler(u32 width, u32 height, u32 dst_x, u32 dst_y, u32 block_height, |
| 60 | std::size_t copy_size, const u8* source_data, u8* swizzle_data); | 60 | std::size_t copy_size, const u8* source_data, u8* swizzle_data); |
| 61 | 61 | ||
| 62 | /// Obtains the offset of the gob for positions 'dst_x' & 'dst_y' | ||
| 63 | u64 GetGOBOffset(u32 width, u32 height, u32 dst_x, u32 dst_y, u32 block_height, | ||
| 64 | u32 bytes_per_pixel); | ||
| 65 | |||
| 62 | } // namespace Tegra::Texture | 66 | } // namespace Tegra::Texture |