diff options
Diffstat (limited to 'src')
| -rw-r--r-- | src/video_core/buffer_cache/buffer_cache.h | 81 | ||||
| -rw-r--r-- | src/video_core/engines/maxwell_dma.cpp | 36 | ||||
| -rw-r--r-- | src/video_core/engines/maxwell_dma.h | 17 | ||||
| -rw-r--r-- | src/video_core/gpu.cpp | 1 | ||||
| -rw-r--r-- | src/video_core/rasterizer_interface.h | 3 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_rasterizer.cpp | 13 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_rasterizer.h | 13 | ||||
| -rw-r--r-- | src/video_core/renderer_vulkan/vk_rasterizer.cpp | 13 | ||||
| -rw-r--r-- | src/video_core/renderer_vulkan/vk_rasterizer.h | 12 |
9 files changed, 176 insertions, 13 deletions
diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h index 502feddba..c73ebb1f4 100644 --- a/src/video_core/buffer_cache/buffer_cache.h +++ b/src/video_core/buffer_cache/buffer_cache.h | |||
| @@ -164,6 +164,8 @@ public: | |||
| 164 | /// Pop asynchronous downloads | 164 | /// Pop asynchronous downloads |
| 165 | void PopAsyncFlushes(); | 165 | void PopAsyncFlushes(); |
| 166 | 166 | ||
| 167 | [[nodiscard]] bool DMACopy(GPUVAddr src_address, GPUVAddr dest_address, u64 amount); | ||
| 168 | |||
| 167 | /// Return true when a CPU region is modified from the GPU | 169 | /// Return true when a CPU region is modified from the GPU |
| 168 | [[nodiscard]] bool IsRegionGpuModified(VAddr addr, size_t size); | 170 | [[nodiscard]] bool IsRegionGpuModified(VAddr addr, size_t size); |
| 169 | 171 | ||
| @@ -431,6 +433,83 @@ void BufferCache<P>::DownloadMemory(VAddr cpu_addr, u64 size) { | |||
| 431 | } | 433 | } |
| 432 | 434 | ||
| 433 | template <class P> | 435 | template <class P> |
| 436 | bool BufferCache<P>::DMACopy(GPUVAddr src_address, GPUVAddr dest_address, u64 amount) { | ||
| 437 | const std::optional<VAddr> cpu_src_address = gpu_memory.GpuToCpuAddress(src_address); | ||
| 438 | const std::optional<VAddr> cpu_dest_address = gpu_memory.GpuToCpuAddress(dest_address); | ||
| 439 | if (!cpu_src_address || !cpu_dest_address) { | ||
| 440 | return false; | ||
| 441 | } | ||
| 442 | const bool source_dirty = IsRegionGpuModified(*cpu_src_address, amount); | ||
| 443 | const bool dest_dirty = IsRegionGpuModified(*cpu_dest_address, amount); | ||
| 444 | if (!(source_dirty || dest_dirty)) { | ||
| 445 | return false; | ||
| 446 | } | ||
| 447 | |||
| 448 | const IntervalType subtract_interval{*cpu_dest_address, *cpu_dest_address + amount}; | ||
| 449 | common_ranges.subtract(subtract_interval); | ||
| 450 | |||
| 451 | BufferId buffer_a; | ||
| 452 | BufferId buffer_b; | ||
| 453 | do { | ||
| 454 | has_deleted_buffers = false; | ||
| 455 | buffer_a = FindBuffer(*cpu_src_address, static_cast<u32>(amount)); | ||
| 456 | buffer_b = FindBuffer(*cpu_dest_address, static_cast<u32>(amount)); | ||
| 457 | } while (has_deleted_buffers); | ||
| 458 | auto& src_buffer = slot_buffers[buffer_a]; | ||
| 459 | auto& dest_buffer = slot_buffers[buffer_b]; | ||
| 460 | SynchronizeBuffer(src_buffer, *cpu_src_address, amount); | ||
| 461 | SynchronizeBuffer(dest_buffer, *cpu_dest_address, amount); | ||
| 462 | std::array copies{BufferCopy{ | ||
| 463 | .src_offset = src_buffer.Offset(*cpu_src_address), | ||
| 464 | .dst_offset = dest_buffer.Offset(*cpu_dest_address), | ||
| 465 | .size = amount, | ||
| 466 | }}; | ||
| 467 | |||
| 468 | auto mirror = [&](VAddr base_address, u64 size) { | ||
| 469 | VAddr diff = base_address - *cpu_src_address; | ||
| 470 | VAddr new_base_address = *cpu_dest_address + diff; | ||
| 471 | const IntervalType add_interval{new_base_address, new_base_address + size}; | ||
| 472 | common_ranges.add(add_interval); | ||
| 473 | }; | ||
| 474 | |||
| 475 | const VAddr start_address = *cpu_src_address; | ||
| 476 | const VAddr end_address = start_address + amount; | ||
| 477 | const IntervalType search_interval{start_address - amount, 1}; | ||
| 478 | auto it = common_ranges.lower_bound(search_interval); | ||
| 479 | if (it == common_ranges.end()) { | ||
| 480 | it = common_ranges.begin(); | ||
| 481 | } | ||
| 482 | while (it != common_ranges.end()) { | ||
| 483 | VAddr inter_addr_end = it->upper(); | ||
| 484 | VAddr inter_addr = it->lower(); | ||
| 485 | if (inter_addr >= end_address) { | ||
| 486 | break; | ||
| 487 | } | ||
| 488 | if (inter_addr_end <= start_address) { | ||
| 489 | it++; | ||
| 490 | continue; | ||
| 491 | } | ||
| 492 | if (inter_addr_end > end_address) { | ||
| 493 | inter_addr_end = end_address; | ||
| 494 | } | ||
| 495 | if (inter_addr < start_address) { | ||
| 496 | inter_addr = start_address; | ||
| 497 | } | ||
| 498 | mirror(inter_addr, inter_addr_end - inter_addr); | ||
| 499 | it++; | ||
| 500 | } | ||
| 501 | |||
| 502 | runtime.CopyBuffer(dest_buffer, src_buffer, copies); | ||
| 503 | if (source_dirty) { | ||
| 504 | dest_buffer.MarkRegionAsGpuModified(*cpu_dest_address, amount); | ||
| 505 | } | ||
| 506 | std::vector<u8> tmp_buffer(amount); | ||
| 507 | cpu_memory.ReadBlockUnsafe(*cpu_src_address, tmp_buffer.data(), amount); | ||
| 508 | cpu_memory.WriteBlockUnsafe(*cpu_dest_address, tmp_buffer.data(), amount); | ||
| 509 | return true; | ||
| 510 | } | ||
| 511 | |||
| 512 | template <class P> | ||
| 434 | void BufferCache<P>::BindGraphicsUniformBuffer(size_t stage, u32 index, GPUVAddr gpu_addr, | 513 | void BufferCache<P>::BindGraphicsUniformBuffer(size_t stage, u32 index, GPUVAddr gpu_addr, |
| 435 | u32 size) { | 514 | u32 size) { |
| 436 | const std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr); | 515 | const std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr); |
| @@ -951,7 +1030,7 @@ void BufferCache<P>::UpdateIndexBuffer() { | |||
| 951 | const GPUVAddr gpu_addr_end = index_array.EndAddress(); | 1030 | const GPUVAddr gpu_addr_end = index_array.EndAddress(); |
| 952 | const std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr_begin); | 1031 | const std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr_begin); |
| 953 | const u32 address_size = static_cast<u32>(gpu_addr_end - gpu_addr_begin); | 1032 | const u32 address_size = static_cast<u32>(gpu_addr_end - gpu_addr_begin); |
| 954 | const u32 draw_size = index_array.count * index_array.FormatSizeInBytes(); | 1033 | const u32 draw_size = (index_array.count + index_array.first) * index_array.FormatSizeInBytes(); |
| 955 | const u32 size = std::min(address_size, draw_size); | 1034 | const u32 size = std::min(address_size, draw_size); |
| 956 | if (size == 0 || !cpu_addr) { | 1035 | if (size == 0 || !cpu_addr) { |
| 957 | index_buffer = NULL_BINDING; | 1036 | index_buffer = NULL_BINDING; |
diff --git a/src/video_core/engines/maxwell_dma.cpp b/src/video_core/engines/maxwell_dma.cpp index 2ee980bab..24481952b 100644 --- a/src/video_core/engines/maxwell_dma.cpp +++ b/src/video_core/engines/maxwell_dma.cpp | |||
| @@ -21,6 +21,10 @@ MaxwellDMA::MaxwellDMA(Core::System& system_, MemoryManager& memory_manager_) | |||
| 21 | 21 | ||
| 22 | MaxwellDMA::~MaxwellDMA() = default; | 22 | MaxwellDMA::~MaxwellDMA() = default; |
| 23 | 23 | ||
| 24 | void MaxwellDMA::BindRasterizer(VideoCore::RasterizerInterface* rasterizer_) { | ||
| 25 | rasterizer = rasterizer_; | ||
| 26 | } | ||
| 27 | |||
| 24 | void MaxwellDMA::CallMethod(u32 method, u32 method_argument, bool is_last_call) { | 28 | void MaxwellDMA::CallMethod(u32 method, u32 method_argument, bool is_last_call) { |
| 25 | ASSERT_MSG(method < NUM_REGS, "Invalid MaxwellDMA register"); | 29 | ASSERT_MSG(method < NUM_REGS, "Invalid MaxwellDMA register"); |
| 26 | 30 | ||
| @@ -44,7 +48,6 @@ void MaxwellDMA::Launch() { | |||
| 44 | 48 | ||
| 45 | // TODO(Subv): Perform more research and implement all features of this engine. | 49 | // TODO(Subv): Perform more research and implement all features of this engine. |
| 46 | const LaunchDMA& launch = regs.launch_dma; | 50 | const LaunchDMA& launch = regs.launch_dma; |
| 47 | ASSERT(launch.remap_enable == 0); | ||
| 48 | ASSERT(launch.semaphore_type == LaunchDMA::SemaphoreType::NONE); | 51 | ASSERT(launch.semaphore_type == LaunchDMA::SemaphoreType::NONE); |
| 49 | ASSERT(launch.interrupt_type == LaunchDMA::InterruptType::NONE); | 52 | ASSERT(launch.interrupt_type == LaunchDMA::InterruptType::NONE); |
| 50 | ASSERT(launch.data_transfer_type == LaunchDMA::DataTransferType::NON_PIPELINED); | 53 | ASSERT(launch.data_transfer_type == LaunchDMA::DataTransferType::NON_PIPELINED); |
| @@ -77,11 +80,29 @@ void MaxwellDMA::CopyPitchToPitch() { | |||
| 77 | // When `multi_line_enable` bit is disabled the copy is performed as if we were copying a 1D | 80 | // When `multi_line_enable` bit is disabled the copy is performed as if we were copying a 1D |
| 78 | // buffer of length `line_length_in`. | 81 | // buffer of length `line_length_in`. |
| 79 | // Otherwise we copy a 2D image of dimensions (line_length_in, line_count). | 82 | // Otherwise we copy a 2D image of dimensions (line_length_in, line_count). |
| 83 | auto& accelerate = rasterizer->AccessAccelerateDMA(); | ||
| 80 | if (!regs.launch_dma.multi_line_enable) { | 84 | if (!regs.launch_dma.multi_line_enable) { |
| 81 | memory_manager.CopyBlock(regs.offset_out, regs.offset_in, regs.line_length_in); | 85 | const bool is_buffer_clear = regs.launch_dma.remap_enable != 0 && |
| 86 | regs.remap_const.dst_x == RemapConst::Swizzle::CONST_A; | ||
| 87 | // TODO: allow multisized components. | ||
| 88 | if (is_buffer_clear) { | ||
| 89 | ASSERT(regs.remap_const.component_size_minus_one == 3); | ||
| 90 | std::vector<u32> tmp_buffer(regs.line_length_in, regs.remap_consta_value); | ||
| 91 | memory_manager.WriteBlock(regs.offset_out, reinterpret_cast<u8*>(tmp_buffer.data()), | ||
| 92 | regs.line_length_in * sizeof(u32)); | ||
| 93 | return; | ||
| 94 | } | ||
| 95 | UNIMPLEMENTED_IF(regs.launch_dma.remap_enable != 0); | ||
| 96 | if (!accelerate.BufferCopy(regs.offset_in, regs.offset_out, regs.line_length_in)) { | ||
| 97 | std::vector<u8> tmp_buffer(regs.line_length_in); | ||
| 98 | memory_manager.ReadBlockUnsafe(regs.offset_in, tmp_buffer.data(), regs.line_length_in); | ||
| 99 | memory_manager.WriteBlock(regs.offset_out, tmp_buffer.data(), regs.line_length_in); | ||
| 100 | } | ||
| 82 | return; | 101 | return; |
| 83 | } | 102 | } |
| 84 | 103 | ||
| 104 | UNIMPLEMENTED_IF(regs.launch_dma.remap_enable != 0); | ||
| 105 | |||
| 85 | // Perform a line-by-line copy. | 106 | // Perform a line-by-line copy. |
| 86 | // We're going to take a subrect of size (line_length_in, line_count) from the source rectangle. | 107 | // We're going to take a subrect of size (line_length_in, line_count) from the source rectangle. |
| 87 | // There is no need to manually flush/invalidate the regions because CopyBlock does that for us. | 108 | // There is no need to manually flush/invalidate the regions because CopyBlock does that for us. |
| @@ -105,6 +126,7 @@ void MaxwellDMA::CopyBlockLinearToPitch() { | |||
| 105 | } | 126 | } |
| 106 | 127 | ||
| 107 | // Deswizzle the input and copy it over. | 128 | // Deswizzle the input and copy it over. |
| 129 | UNIMPLEMENTED_IF(regs.launch_dma.remap_enable != 0); | ||
| 108 | const u32 bytes_per_pixel = regs.pitch_out / regs.line_length_in; | 130 | const u32 bytes_per_pixel = regs.pitch_out / regs.line_length_in; |
| 109 | const Parameters& src_params = regs.src_params; | 131 | const Parameters& src_params = regs.src_params; |
| 110 | const u32 width = src_params.width; | 132 | const u32 width = src_params.width; |
| @@ -134,6 +156,7 @@ void MaxwellDMA::CopyBlockLinearToPitch() { | |||
| 134 | 156 | ||
| 135 | void MaxwellDMA::CopyPitchToBlockLinear() { | 157 | void MaxwellDMA::CopyPitchToBlockLinear() { |
| 136 | UNIMPLEMENTED_IF_MSG(regs.dst_params.block_size.width != 0, "Block width is not one"); | 158 | UNIMPLEMENTED_IF_MSG(regs.dst_params.block_size.width != 0, "Block width is not one"); |
| 159 | UNIMPLEMENTED_IF(regs.launch_dma.remap_enable != 0); | ||
| 137 | 160 | ||
| 138 | const auto& dst_params = regs.dst_params; | 161 | const auto& dst_params = regs.dst_params; |
| 139 | const u32 bytes_per_pixel = regs.pitch_in / regs.line_length_in; | 162 | const u32 bytes_per_pixel = regs.pitch_in / regs.line_length_in; |
| @@ -156,13 +179,8 @@ void MaxwellDMA::CopyPitchToBlockLinear() { | |||
| 156 | write_buffer.resize(dst_size); | 179 | write_buffer.resize(dst_size); |
| 157 | } | 180 | } |
| 158 | 181 | ||
| 159 | if (Settings::IsGPULevelExtreme()) { | 182 | memory_manager.ReadBlock(regs.offset_in, read_buffer.data(), src_size); |
| 160 | memory_manager.ReadBlock(regs.offset_in, read_buffer.data(), src_size); | 183 | memory_manager.ReadBlock(regs.offset_out, write_buffer.data(), dst_size); |
| 161 | memory_manager.ReadBlock(regs.offset_out, write_buffer.data(), dst_size); | ||
| 162 | } else { | ||
| 163 | memory_manager.ReadBlockUnsafe(regs.offset_in, read_buffer.data(), src_size); | ||
| 164 | memory_manager.ReadBlockUnsafe(regs.offset_out, write_buffer.data(), dst_size); | ||
| 165 | } | ||
| 166 | 184 | ||
| 167 | // If the input is linear and the output is tiled, swizzle the input and copy it over. | 185 | // If the input is linear and the output is tiled, swizzle the input and copy it over. |
| 168 | if (regs.dst_params.block_size.depth > 0) { | 186 | if (regs.dst_params.block_size.depth > 0) { |
diff --git a/src/video_core/engines/maxwell_dma.h b/src/video_core/engines/maxwell_dma.h index c77f02a22..4ed0d0996 100644 --- a/src/video_core/engines/maxwell_dma.h +++ b/src/video_core/engines/maxwell_dma.h | |||
| @@ -21,8 +21,18 @@ namespace Tegra { | |||
| 21 | class MemoryManager; | 21 | class MemoryManager; |
| 22 | } | 22 | } |
| 23 | 23 | ||
| 24 | namespace VideoCore { | ||
| 25 | class RasterizerInterface; | ||
| 26 | } | ||
| 27 | |||
| 24 | namespace Tegra::Engines { | 28 | namespace Tegra::Engines { |
| 25 | 29 | ||
| 30 | class AccelerateDMAInterface { | ||
| 31 | public: | ||
| 32 | /// Write the value to the register identified by method. | ||
| 33 | virtual bool BufferCopy(GPUVAddr src_address, GPUVAddr dest_address, u64 amount) = 0; | ||
| 34 | }; | ||
| 35 | |||
| 26 | /** | 36 | /** |
| 27 | * This engine is known as gk104_copy. Documentation can be found in: | 37 | * This engine is known as gk104_copy. Documentation can be found in: |
| 28 | * https://github.com/NVIDIA/open-gpu-doc/blob/master/classes/dma-copy/clb0b5.h | 38 | * https://github.com/NVIDIA/open-gpu-doc/blob/master/classes/dma-copy/clb0b5.h |
| @@ -187,6 +197,8 @@ public: | |||
| 187 | }; | 197 | }; |
| 188 | static_assert(sizeof(RemapConst) == 12); | 198 | static_assert(sizeof(RemapConst) == 12); |
| 189 | 199 | ||
| 200 | void BindRasterizer(VideoCore::RasterizerInterface* rasterizer); | ||
| 201 | |||
| 190 | explicit MaxwellDMA(Core::System& system_, MemoryManager& memory_manager_); | 202 | explicit MaxwellDMA(Core::System& system_, MemoryManager& memory_manager_); |
| 191 | ~MaxwellDMA() override; | 203 | ~MaxwellDMA() override; |
| 192 | 204 | ||
| @@ -213,6 +225,7 @@ private: | |||
| 213 | Core::System& system; | 225 | Core::System& system; |
| 214 | 226 | ||
| 215 | MemoryManager& memory_manager; | 227 | MemoryManager& memory_manager; |
| 228 | VideoCore::RasterizerInterface* rasterizer; | ||
| 216 | 229 | ||
| 217 | std::vector<u8> read_buffer; | 230 | std::vector<u8> read_buffer; |
| 218 | std::vector<u8> write_buffer; | 231 | std::vector<u8> write_buffer; |
| @@ -240,7 +253,9 @@ private: | |||
| 240 | u32 pitch_out; | 253 | u32 pitch_out; |
| 241 | u32 line_length_in; | 254 | u32 line_length_in; |
| 242 | u32 line_count; | 255 | u32 line_count; |
| 243 | u32 reserved06[0xb8]; | 256 | u32 reserved06[0xb6]; |
| 257 | u32 remap_consta_value; | ||
| 258 | u32 remap_constb_value; | ||
| 244 | RemapConst remap_const; | 259 | RemapConst remap_const; |
| 245 | Parameters dst_params; | 260 | Parameters dst_params; |
| 246 | u32 reserved07[0x1]; | 261 | u32 reserved07[0x1]; |
diff --git a/src/video_core/gpu.cpp b/src/video_core/gpu.cpp index f317ddc2b..ff024f530 100644 --- a/src/video_core/gpu.cpp +++ b/src/video_core/gpu.cpp | |||
| @@ -50,6 +50,7 @@ void GPU::BindRenderer(std::unique_ptr<VideoCore::RendererBase> renderer_) { | |||
| 50 | maxwell_3d->BindRasterizer(rasterizer); | 50 | maxwell_3d->BindRasterizer(rasterizer); |
| 51 | fermi_2d->BindRasterizer(rasterizer); | 51 | fermi_2d->BindRasterizer(rasterizer); |
| 52 | kepler_compute->BindRasterizer(rasterizer); | 52 | kepler_compute->BindRasterizer(rasterizer); |
| 53 | maxwell_dma->BindRasterizer(rasterizer); | ||
| 53 | } | 54 | } |
| 54 | 55 | ||
| 55 | Engines::Maxwell3D& GPU::Maxwell3D() { | 56 | Engines::Maxwell3D& GPU::Maxwell3D() { |
diff --git a/src/video_core/rasterizer_interface.h b/src/video_core/rasterizer_interface.h index 67aef6000..8d4239afc 100644 --- a/src/video_core/rasterizer_interface.h +++ b/src/video_core/rasterizer_interface.h | |||
| @@ -10,6 +10,7 @@ | |||
| 10 | #include <stop_token> | 10 | #include <stop_token> |
| 11 | #include "common/common_types.h" | 11 | #include "common/common_types.h" |
| 12 | #include "video_core/engines/fermi_2d.h" | 12 | #include "video_core/engines/fermi_2d.h" |
| 13 | #include "video_core/engines/maxwell_dma.h" | ||
| 13 | #include "video_core/gpu.h" | 14 | #include "video_core/gpu.h" |
| 14 | #include "video_core/guest_driver.h" | 15 | #include "video_core/guest_driver.h" |
| 15 | 16 | ||
| @@ -119,6 +120,8 @@ public: | |||
| 119 | return false; | 120 | return false; |
| 120 | } | 121 | } |
| 121 | 122 | ||
| 123 | [[nodiscard]] virtual Tegra::Engines::AccelerateDMAInterface& AccessAccelerateDMA() = 0; | ||
| 124 | |||
| 122 | /// Attempt to use a faster method to display the framebuffer to screen | 125 | /// Attempt to use a faster method to display the framebuffer to screen |
| 123 | [[nodiscard]] virtual bool AccelerateDisplay(const Tegra::FramebufferConfig& config, | 126 | [[nodiscard]] virtual bool AccelerateDisplay(const Tegra::FramebufferConfig& config, |
| 124 | VAddr framebuffer_addr, u32 pixel_stride) { | 127 | VAddr framebuffer_addr, u32 pixel_stride) { |
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index a4ed8f68f..82c84127a 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp | |||
| @@ -171,7 +171,7 @@ RasterizerOpenGL::RasterizerOpenGL(Core::Frontend::EmuWindow& emu_window_, Tegra | |||
| 171 | buffer_cache_runtime(device), | 171 | buffer_cache_runtime(device), |
| 172 | buffer_cache(*this, maxwell3d, kepler_compute, gpu_memory, cpu_memory_, buffer_cache_runtime), | 172 | buffer_cache(*this, maxwell3d, kepler_compute, gpu_memory, cpu_memory_, buffer_cache_runtime), |
| 173 | shader_cache(*this, emu_window_, gpu, maxwell3d, kepler_compute, gpu_memory, device), | 173 | shader_cache(*this, emu_window_, gpu, maxwell3d, kepler_compute, gpu_memory, device), |
| 174 | query_cache(*this, maxwell3d, gpu_memory), | 174 | query_cache(*this, maxwell3d, gpu_memory), accelerate_dma(buffer_cache), |
| 175 | fence_manager(*this, gpu, texture_cache, buffer_cache, query_cache), | 175 | fence_manager(*this, gpu, texture_cache, buffer_cache, query_cache), |
| 176 | async_shaders(emu_window_) { | 176 | async_shaders(emu_window_) { |
| 177 | if (device.UseAsynchronousShaders()) { | 177 | if (device.UseAsynchronousShaders()) { |
| @@ -701,6 +701,10 @@ bool RasterizerOpenGL::AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Surf | |||
| 701 | return true; | 701 | return true; |
| 702 | } | 702 | } |
| 703 | 703 | ||
| 704 | Tegra::Engines::AccelerateDMAInterface& RasterizerOpenGL::AccessAccelerateDMA() { | ||
| 705 | return accelerate_dma; | ||
| 706 | } | ||
| 707 | |||
| 704 | bool RasterizerOpenGL::AccelerateDisplay(const Tegra::FramebufferConfig& config, | 708 | bool RasterizerOpenGL::AccelerateDisplay(const Tegra::FramebufferConfig& config, |
| 705 | VAddr framebuffer_addr, u32 pixel_stride) { | 709 | VAddr framebuffer_addr, u32 pixel_stride) { |
| 706 | if (framebuffer_addr == 0) { | 710 | if (framebuffer_addr == 0) { |
| @@ -1396,4 +1400,11 @@ void RasterizerOpenGL::EndTransformFeedback() { | |||
| 1396 | glEndTransformFeedback(); | 1400 | glEndTransformFeedback(); |
| 1397 | } | 1401 | } |
| 1398 | 1402 | ||
| 1403 | AccelerateDMA::AccelerateDMA(BufferCache& buffer_cache_) : buffer_cache{buffer_cache_} {} | ||
| 1404 | |||
| 1405 | bool AccelerateDMA::BufferCopy(GPUVAddr src_address, GPUVAddr dest_address, u64 amount) { | ||
| 1406 | std::scoped_lock lock{buffer_cache.mutex}; | ||
| 1407 | return buffer_cache.DMACopy(src_address, dest_address, amount); | ||
| 1408 | } | ||
| 1409 | |||
| 1399 | } // namespace OpenGL | 1410 | } // namespace OpenGL |
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h index d8df71962..ccee9ba33 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.h +++ b/src/video_core/renderer_opengl/gl_rasterizer.h | |||
| @@ -19,6 +19,7 @@ | |||
| 19 | #include "common/common_types.h" | 19 | #include "common/common_types.h" |
| 20 | #include "video_core/engines/const_buffer_info.h" | 20 | #include "video_core/engines/const_buffer_info.h" |
| 21 | #include "video_core/engines/maxwell_3d.h" | 21 | #include "video_core/engines/maxwell_3d.h" |
| 22 | #include "video_core/engines/maxwell_dma.h" | ||
| 22 | #include "video_core/rasterizer_accelerated.h" | 23 | #include "video_core/rasterizer_accelerated.h" |
| 23 | #include "video_core/rasterizer_interface.h" | 24 | #include "video_core/rasterizer_interface.h" |
| 24 | #include "video_core/renderer_opengl/gl_buffer_cache.h" | 25 | #include "video_core/renderer_opengl/gl_buffer_cache.h" |
| @@ -58,6 +59,16 @@ struct BindlessSSBO { | |||
| 58 | }; | 59 | }; |
| 59 | static_assert(sizeof(BindlessSSBO) * CHAR_BIT == 128); | 60 | static_assert(sizeof(BindlessSSBO) * CHAR_BIT == 128); |
| 60 | 61 | ||
| 62 | class AccelerateDMA : public Tegra::Engines::AccelerateDMAInterface { | ||
| 63 | public: | ||
| 64 | explicit AccelerateDMA(BufferCache& buffer_cache); | ||
| 65 | |||
| 66 | bool BufferCopy(GPUVAddr src_address, GPUVAddr dest_address, u64 amount) override; | ||
| 67 | |||
| 68 | private: | ||
| 69 | BufferCache& buffer_cache; | ||
| 70 | }; | ||
| 71 | |||
| 61 | class RasterizerOpenGL : public VideoCore::RasterizerAccelerated { | 72 | class RasterizerOpenGL : public VideoCore::RasterizerAccelerated { |
| 62 | public: | 73 | public: |
| 63 | explicit RasterizerOpenGL(Core::Frontend::EmuWindow& emu_window_, Tegra::GPU& gpu_, | 74 | explicit RasterizerOpenGL(Core::Frontend::EmuWindow& emu_window_, Tegra::GPU& gpu_, |
| @@ -94,6 +105,7 @@ public: | |||
| 94 | bool AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Surface& src, | 105 | bool AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Surface& src, |
| 95 | const Tegra::Engines::Fermi2D::Surface& dst, | 106 | const Tegra::Engines::Fermi2D::Surface& dst, |
| 96 | const Tegra::Engines::Fermi2D::Config& copy_config) override; | 107 | const Tegra::Engines::Fermi2D::Config& copy_config) override; |
| 108 | Tegra::Engines::AccelerateDMAInterface& AccessAccelerateDMA() override; | ||
| 97 | bool AccelerateDisplay(const Tegra::FramebufferConfig& config, VAddr framebuffer_addr, | 109 | bool AccelerateDisplay(const Tegra::FramebufferConfig& config, VAddr framebuffer_addr, |
| 98 | u32 pixel_stride) override; | 110 | u32 pixel_stride) override; |
| 99 | void LoadDiskResources(u64 title_id, std::stop_token stop_loading, | 111 | void LoadDiskResources(u64 title_id, std::stop_token stop_loading, |
| @@ -234,6 +246,7 @@ private: | |||
| 234 | BufferCache buffer_cache; | 246 | BufferCache buffer_cache; |
| 235 | ShaderCacheOpenGL shader_cache; | 247 | ShaderCacheOpenGL shader_cache; |
| 236 | QueryCache query_cache; | 248 | QueryCache query_cache; |
| 249 | AccelerateDMA accelerate_dma; | ||
| 237 | FenceManagerOpenGL fence_manager; | 250 | FenceManagerOpenGL fence_manager; |
| 238 | 251 | ||
| 239 | VideoCommon::Shader::AsyncShaders async_shaders; | 252 | VideoCommon::Shader::AsyncShaders async_shaders; |
diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index 9ea4b6653..e378a5679 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp | |||
| @@ -251,7 +251,7 @@ RasterizerVulkan::RasterizerVulkan(Core::Frontend::EmuWindow& emu_window_, Tegra | |||
| 251 | buffer_cache(*this, maxwell3d, kepler_compute, gpu_memory, cpu_memory_, buffer_cache_runtime), | 251 | buffer_cache(*this, maxwell3d, kepler_compute, gpu_memory, cpu_memory_, buffer_cache_runtime), |
| 252 | pipeline_cache(*this, gpu, maxwell3d, kepler_compute, gpu_memory, device, scheduler, | 252 | pipeline_cache(*this, gpu, maxwell3d, kepler_compute, gpu_memory, device, scheduler, |
| 253 | descriptor_pool, update_descriptor_queue), | 253 | descriptor_pool, update_descriptor_queue), |
| 254 | query_cache{*this, maxwell3d, gpu_memory, device, scheduler}, | 254 | query_cache{*this, maxwell3d, gpu_memory, device, scheduler}, accelerate_dma{buffer_cache}, |
| 255 | fence_manager(*this, gpu, texture_cache, buffer_cache, query_cache, device, scheduler), | 255 | fence_manager(*this, gpu, texture_cache, buffer_cache, query_cache, device, scheduler), |
| 256 | wfi_event(device.GetLogical().CreateEvent()), async_shaders(emu_window_) { | 256 | wfi_event(device.GetLogical().CreateEvent()), async_shaders(emu_window_) { |
| 257 | scheduler.SetQueryCache(query_cache); | 257 | scheduler.SetQueryCache(query_cache); |
| @@ -660,6 +660,10 @@ bool RasterizerVulkan::AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Surf | |||
| 660 | return true; | 660 | return true; |
| 661 | } | 661 | } |
| 662 | 662 | ||
| 663 | Tegra::Engines::AccelerateDMAInterface& RasterizerVulkan::AccessAccelerateDMA() { | ||
| 664 | return accelerate_dma; | ||
| 665 | } | ||
| 666 | |||
| 663 | bool RasterizerVulkan::AccelerateDisplay(const Tegra::FramebufferConfig& config, | 667 | bool RasterizerVulkan::AccelerateDisplay(const Tegra::FramebufferConfig& config, |
| 664 | VAddr framebuffer_addr, u32 pixel_stride) { | 668 | VAddr framebuffer_addr, u32 pixel_stride) { |
| 665 | if (!framebuffer_addr) { | 669 | if (!framebuffer_addr) { |
| @@ -698,6 +702,13 @@ void RasterizerVulkan::FlushWork() { | |||
| 698 | draw_counter = 0; | 702 | draw_counter = 0; |
| 699 | } | 703 | } |
| 700 | 704 | ||
| 705 | AccelerateDMA::AccelerateDMA(BufferCache& buffer_cache_) : buffer_cache{buffer_cache_} {} | ||
| 706 | |||
| 707 | bool AccelerateDMA::BufferCopy(GPUVAddr src_address, GPUVAddr dest_address, u64 amount) { | ||
| 708 | std::scoped_lock lock{buffer_cache.mutex}; | ||
| 709 | return buffer_cache.DMACopy(src_address, dest_address, amount); | ||
| 710 | } | ||
| 711 | |||
| 701 | void RasterizerVulkan::SetupShaderDescriptors( | 712 | void RasterizerVulkan::SetupShaderDescriptors( |
| 702 | const std::array<Shader*, Maxwell::MaxShaderProgram>& shaders, bool is_indexed) { | 713 | const std::array<Shader*, Maxwell::MaxShaderProgram>& shaders, bool is_indexed) { |
| 703 | image_view_indices.clear(); | 714 | image_view_indices.clear(); |
diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.h b/src/video_core/renderer_vulkan/vk_rasterizer.h index 5450ccfb5..cb562518d 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.h +++ b/src/video_core/renderer_vulkan/vk_rasterizer.h | |||
| @@ -49,6 +49,16 @@ struct VKScreenInfo; | |||
| 49 | 49 | ||
| 50 | class StateTracker; | 50 | class StateTracker; |
| 51 | 51 | ||
| 52 | class AccelerateDMA : public Tegra::Engines::AccelerateDMAInterface { | ||
| 53 | public: | ||
| 54 | explicit AccelerateDMA(BufferCache& buffer_cache); | ||
| 55 | |||
| 56 | bool BufferCopy(GPUVAddr start_address, GPUVAddr end_address, u64 amount) override; | ||
| 57 | |||
| 58 | private: | ||
| 59 | BufferCache& buffer_cache; | ||
| 60 | }; | ||
| 61 | |||
| 52 | class RasterizerVulkan final : public VideoCore::RasterizerAccelerated { | 62 | class RasterizerVulkan final : public VideoCore::RasterizerAccelerated { |
| 53 | public: | 63 | public: |
| 54 | explicit RasterizerVulkan(Core::Frontend::EmuWindow& emu_window_, Tegra::GPU& gpu_, | 64 | explicit RasterizerVulkan(Core::Frontend::EmuWindow& emu_window_, Tegra::GPU& gpu_, |
| @@ -86,6 +96,7 @@ public: | |||
| 86 | bool AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Surface& src, | 96 | bool AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Surface& src, |
| 87 | const Tegra::Engines::Fermi2D::Surface& dst, | 97 | const Tegra::Engines::Fermi2D::Surface& dst, |
| 88 | const Tegra::Engines::Fermi2D::Config& copy_config) override; | 98 | const Tegra::Engines::Fermi2D::Config& copy_config) override; |
| 99 | Tegra::Engines::AccelerateDMAInterface& AccessAccelerateDMA() override; | ||
| 89 | bool AccelerateDisplay(const Tegra::FramebufferConfig& config, VAddr framebuffer_addr, | 100 | bool AccelerateDisplay(const Tegra::FramebufferConfig& config, VAddr framebuffer_addr, |
| 90 | u32 pixel_stride) override; | 101 | u32 pixel_stride) override; |
| 91 | 102 | ||
| @@ -186,6 +197,7 @@ private: | |||
| 186 | BufferCache buffer_cache; | 197 | BufferCache buffer_cache; |
| 187 | VKPipelineCache pipeline_cache; | 198 | VKPipelineCache pipeline_cache; |
| 188 | VKQueryCache query_cache; | 199 | VKQueryCache query_cache; |
| 200 | AccelerateDMA accelerate_dma; | ||
| 189 | VKFenceManager fence_manager; | 201 | VKFenceManager fence_manager; |
| 190 | 202 | ||
| 191 | vk::Event wfi_event; | 203 | vk::Event wfi_event; |