diff options
| -rw-r--r-- | src/core/hle/service/nvdrv/devices/nvdisp_disp0.cpp | 5 | ||||
| -rw-r--r-- | src/core/hle/service/nvdrv/devices/nvhost_gpu.cpp | 13 | ||||
| -rw-r--r-- | src/video_core/cdma_pusher.cpp | 1 | ||||
| -rw-r--r-- | src/video_core/cdma_pusher.h | 2 | ||||
| -rw-r--r-- | src/video_core/engines/maxwell_3d.h | 1 | ||||
| -rw-r--r-- | src/video_core/framebuffer_config.h | 20 | ||||
| -rw-r--r-- | src/video_core/gpu.cpp | 1220 | ||||
| -rw-r--r-- | src/video_core/gpu.h | 217 | ||||
| -rw-r--r-- | src/video_core/gpu_thread.h | 3 | ||||
| -rw-r--r-- | src/video_core/query_cache.h | 1 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_texture_cache.cpp | 1 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_texture_cache.h | 1 | ||||
| -rw-r--r-- | src/video_core/renderer_vulkan/vk_texture_cache.cpp | 1 | ||||
| -rw-r--r-- | src/video_core/renderer_vulkan/vk_texture_cache.h | 2 | ||||
| -rw-r--r-- | src/video_core/shader_environment.cpp | 1 | ||||
| -rw-r--r-- | src/video_core/shader_environment.h | 4 | ||||
| -rw-r--r-- | src/video_core/texture_cache/image_view_info.cpp | 1 | ||||
| -rw-r--r-- | src/video_core/texture_cache/texture_cache.h | 5 | ||||
| -rw-r--r-- | src/video_core/texture_cache/texture_cache_base.h | 8 |
19 files changed, 875 insertions, 632 deletions
diff --git a/src/core/hle/service/nvdrv/devices/nvdisp_disp0.cpp b/src/core/hle/service/nvdrv/devices/nvdisp_disp0.cpp index 789000294..4ee8c5733 100644 --- a/src/core/hle/service/nvdrv/devices/nvdisp_disp0.cpp +++ b/src/core/hle/service/nvdrv/devices/nvdisp_disp0.cpp | |||
| @@ -48,8 +48,9 @@ void nvdisp_disp0::flip(u32 buffer_handle, u32 offset, u32 format, u32 width, u3 | |||
| 48 | addr, offset, width, height, stride, format); | 48 | addr, offset, width, height, stride, format); |
| 49 | 49 | ||
| 50 | const auto pixel_format = static_cast<Tegra::FramebufferConfig::PixelFormat>(format); | 50 | const auto pixel_format = static_cast<Tegra::FramebufferConfig::PixelFormat>(format); |
| 51 | const Tegra::FramebufferConfig framebuffer{addr, offset, width, height, | 51 | const auto transform_flags = static_cast<Tegra::FramebufferConfig::TransformFlags>(transform); |
| 52 | stride, pixel_format, transform, crop_rect}; | 52 | const Tegra::FramebufferConfig framebuffer{addr, offset, width, height, |
| 53 | stride, pixel_format, transform_flags, crop_rect}; | ||
| 53 | 54 | ||
| 54 | system.GetPerfStats().EndSystemFrame(); | 55 | system.GetPerfStats().EndSystemFrame(); |
| 55 | system.GPU().SwapBuffers(&framebuffer); | 56 | system.GPU().SwapBuffers(&framebuffer); |
diff --git a/src/core/hle/service/nvdrv/devices/nvhost_gpu.cpp b/src/core/hle/service/nvdrv/devices/nvhost_gpu.cpp index c0a380088..54ac105d5 100644 --- a/src/core/hle/service/nvdrv/devices/nvhost_gpu.cpp +++ b/src/core/hle/service/nvdrv/devices/nvhost_gpu.cpp | |||
| @@ -13,6 +13,14 @@ | |||
| 13 | #include "video_core/memory_manager.h" | 13 | #include "video_core/memory_manager.h" |
| 14 | 14 | ||
| 15 | namespace Service::Nvidia::Devices { | 15 | namespace Service::Nvidia::Devices { |
| 16 | namespace { | ||
| 17 | Tegra::CommandHeader BuildFenceAction(Tegra::GPU::FenceOperation op, u32 syncpoint_id) { | ||
| 18 | Tegra::GPU::FenceAction result{}; | ||
| 19 | result.op.Assign(op); | ||
| 20 | result.syncpoint_id.Assign(syncpoint_id); | ||
| 21 | return {result.raw}; | ||
| 22 | } | ||
| 23 | } // namespace | ||
| 16 | 24 | ||
| 17 | nvhost_gpu::nvhost_gpu(Core::System& system_, std::shared_ptr<nvmap> nvmap_dev_, | 25 | nvhost_gpu::nvhost_gpu(Core::System& system_, std::shared_ptr<nvmap> nvmap_dev_, |
| 18 | SyncpointManager& syncpoint_manager_) | 26 | SyncpointManager& syncpoint_manager_) |
| @@ -187,7 +195,7 @@ static std::vector<Tegra::CommandHeader> BuildWaitCommandList(Fence fence) { | |||
| 187 | {fence.value}, | 195 | {fence.value}, |
| 188 | Tegra::BuildCommandHeader(Tegra::BufferMethods::FenceAction, 1, | 196 | Tegra::BuildCommandHeader(Tegra::BufferMethods::FenceAction, 1, |
| 189 | Tegra::SubmissionMode::Increasing), | 197 | Tegra::SubmissionMode::Increasing), |
| 190 | Tegra::GPU::FenceAction::Build(Tegra::GPU::FenceOperation::Acquire, fence.id), | 198 | BuildFenceAction(Tegra::GPU::FenceOperation::Acquire, fence.id), |
| 191 | }; | 199 | }; |
| 192 | } | 200 | } |
| 193 | 201 | ||
| @@ -200,8 +208,7 @@ static std::vector<Tegra::CommandHeader> BuildIncrementCommandList(Fence fence, | |||
| 200 | for (u32 count = 0; count < add_increment; ++count) { | 208 | for (u32 count = 0; count < add_increment; ++count) { |
| 201 | result.emplace_back(Tegra::BuildCommandHeader(Tegra::BufferMethods::FenceAction, 1, | 209 | result.emplace_back(Tegra::BuildCommandHeader(Tegra::BufferMethods::FenceAction, 1, |
| 202 | Tegra::SubmissionMode::Increasing)); | 210 | Tegra::SubmissionMode::Increasing)); |
| 203 | result.emplace_back( | 211 | result.emplace_back(BuildFenceAction(Tegra::GPU::FenceOperation::Increment, fence.id)); |
| 204 | Tegra::GPU::FenceAction::Build(Tegra::GPU::FenceOperation::Increment, fence.id)); | ||
| 205 | } | 212 | } |
| 206 | 213 | ||
| 207 | return result; | 214 | return result; |
diff --git a/src/video_core/cdma_pusher.cpp b/src/video_core/cdma_pusher.cpp index 8b86ad050..a8c4b4415 100644 --- a/src/video_core/cdma_pusher.cpp +++ b/src/video_core/cdma_pusher.cpp | |||
| @@ -24,6 +24,7 @@ | |||
| 24 | #include "command_classes/vic.h" | 24 | #include "command_classes/vic.h" |
| 25 | #include "video_core/cdma_pusher.h" | 25 | #include "video_core/cdma_pusher.h" |
| 26 | #include "video_core/command_classes/nvdec_common.h" | 26 | #include "video_core/command_classes/nvdec_common.h" |
| 27 | #include "video_core/command_classes/sync_manager.h" | ||
| 27 | #include "video_core/engines/maxwell_3d.h" | 28 | #include "video_core/engines/maxwell_3d.h" |
| 28 | #include "video_core/gpu.h" | 29 | #include "video_core/gpu.h" |
| 29 | #include "video_core/memory_manager.h" | 30 | #include "video_core/memory_manager.h" |
diff --git a/src/video_core/cdma_pusher.h b/src/video_core/cdma_pusher.h index 1bada44dd..87b49d6ea 100644 --- a/src/video_core/cdma_pusher.h +++ b/src/video_core/cdma_pusher.h | |||
| @@ -9,13 +9,13 @@ | |||
| 9 | 9 | ||
| 10 | #include "common/bit_field.h" | 10 | #include "common/bit_field.h" |
| 11 | #include "common/common_types.h" | 11 | #include "common/common_types.h" |
| 12 | #include "video_core/command_classes/sync_manager.h" | ||
| 13 | 12 | ||
| 14 | namespace Tegra { | 13 | namespace Tegra { |
| 15 | 14 | ||
| 16 | class GPU; | 15 | class GPU; |
| 17 | class Host1x; | 16 | class Host1x; |
| 18 | class Nvdec; | 17 | class Nvdec; |
| 18 | class SyncptIncrManager; | ||
| 19 | class Vic; | 19 | class Vic; |
| 20 | 20 | ||
| 21 | enum class ChSubmissionMode : u32 { | 21 | enum class ChSubmissionMode : u32 { |
diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h index 7f4ca6282..f22342dfb 100644 --- a/src/video_core/engines/maxwell_3d.h +++ b/src/video_core/engines/maxwell_3d.h | |||
| @@ -6,6 +6,7 @@ | |||
| 6 | 6 | ||
| 7 | #include <array> | 7 | #include <array> |
| 8 | #include <bitset> | 8 | #include <bitset> |
| 9 | #include <cmath> | ||
| 9 | #include <limits> | 10 | #include <limits> |
| 10 | #include <optional> | 11 | #include <optional> |
| 11 | #include <type_traits> | 12 | #include <type_traits> |
diff --git a/src/video_core/framebuffer_config.h b/src/video_core/framebuffer_config.h index b86c3a757..b1d455e30 100644 --- a/src/video_core/framebuffer_config.h +++ b/src/video_core/framebuffer_config.h | |||
| @@ -4,8 +4,10 @@ | |||
| 4 | 4 | ||
| 5 | #pragma once | 5 | #pragma once |
| 6 | 6 | ||
| 7 | namespace Tegra { | 7 | #include "common/common_types.h" |
| 8 | #include "common/math_util.h" | ||
| 8 | 9 | ||
| 10 | namespace Tegra { | ||
| 9 | /** | 11 | /** |
| 10 | * Struct describing framebuffer configuration | 12 | * Struct describing framebuffer configuration |
| 11 | */ | 13 | */ |
| @@ -16,6 +18,21 @@ struct FramebufferConfig { | |||
| 16 | B8G8R8A8_UNORM = 5, | 18 | B8G8R8A8_UNORM = 5, |
| 17 | }; | 19 | }; |
| 18 | 20 | ||
| 21 | enum class TransformFlags : u32 { | ||
| 22 | /// No transform flags are set | ||
| 23 | Unset = 0x00, | ||
| 24 | /// Flip source image horizontally (around the vertical axis) | ||
| 25 | FlipH = 0x01, | ||
| 26 | /// Flip source image vertically (around the horizontal axis) | ||
| 27 | FlipV = 0x02, | ||
| 28 | /// Rotate source image 90 degrees clockwise | ||
| 29 | Rotate90 = 0x04, | ||
| 30 | /// Rotate source image 180 degrees | ||
| 31 | Rotate180 = 0x03, | ||
| 32 | /// Rotate source image 270 degrees clockwise | ||
| 33 | Rotate270 = 0x07, | ||
| 34 | }; | ||
| 35 | |||
| 19 | VAddr address{}; | 36 | VAddr address{}; |
| 20 | u32 offset{}; | 37 | u32 offset{}; |
| 21 | u32 width{}; | 38 | u32 width{}; |
| @@ -23,7 +40,6 @@ struct FramebufferConfig { | |||
| 23 | u32 stride{}; | 40 | u32 stride{}; |
| 24 | PixelFormat pixel_format{}; | 41 | PixelFormat pixel_format{}; |
| 25 | 42 | ||
| 26 | using TransformFlags = Service::NVFlinger::BufferQueue::BufferTransformFlags; | ||
| 27 | TransformFlags transform_flags{}; | 43 | TransformFlags transform_flags{}; |
| 28 | Common::Rectangle<int> crop_rect; | 44 | Common::Rectangle<int> crop_rect; |
| 29 | }; | 45 | }; |
diff --git a/src/video_core/gpu.cpp b/src/video_core/gpu.cpp index 2ae3639b5..520675873 100644 --- a/src/video_core/gpu.cpp +++ b/src/video_core/gpu.cpp | |||
| @@ -2,540 +2,920 @@ | |||
| 2 | // Licensed under GPLv2 or any later version | 2 | // Licensed under GPLv2 or any later version |
| 3 | // Refer to the license.txt file included. | 3 | // Refer to the license.txt file included. |
| 4 | 4 | ||
| 5 | #include <array> | ||
| 6 | #include <atomic> | ||
| 5 | #include <chrono> | 7 | #include <chrono> |
| 8 | #include <condition_variable> | ||
| 9 | #include <list> | ||
| 10 | #include <memory> | ||
| 6 | 11 | ||
| 7 | #include "common/assert.h" | 12 | #include "common/assert.h" |
| 8 | #include "common/microprofile.h" | 13 | #include "common/microprofile.h" |
| 9 | #include "common/settings.h" | 14 | #include "common/settings.h" |
| 10 | #include "core/core.h" | 15 | #include "core/core.h" |
| 11 | #include "core/core_timing.h" | 16 | #include "core/core_timing.h" |
| 12 | #include "core/core_timing_util.h" | ||
| 13 | #include "core/frontend/emu_window.h" | 17 | #include "core/frontend/emu_window.h" |
| 14 | #include "core/hardware_interrupt_manager.h" | 18 | #include "core/hardware_interrupt_manager.h" |
| 15 | #include "core/memory.h" | 19 | #include "core/hle/service/nvdrv/nvdata.h" |
| 20 | #include "core/hle/service/nvflinger/buffer_queue.h" | ||
| 16 | #include "core/perf_stats.h" | 21 | #include "core/perf_stats.h" |
| 22 | #include "video_core/cdma_pusher.h" | ||
| 23 | #include "video_core/dma_pusher.h" | ||
| 17 | #include "video_core/engines/fermi_2d.h" | 24 | #include "video_core/engines/fermi_2d.h" |
| 18 | #include "video_core/engines/kepler_compute.h" | 25 | #include "video_core/engines/kepler_compute.h" |
| 19 | #include "video_core/engines/kepler_memory.h" | 26 | #include "video_core/engines/kepler_memory.h" |
| 20 | #include "video_core/engines/maxwell_3d.h" | 27 | #include "video_core/engines/maxwell_3d.h" |
| 21 | #include "video_core/engines/maxwell_dma.h" | 28 | #include "video_core/engines/maxwell_dma.h" |
| 22 | #include "video_core/gpu.h" | 29 | #include "video_core/gpu.h" |
| 30 | #include "video_core/gpu_thread.h" | ||
| 23 | #include "video_core/memory_manager.h" | 31 | #include "video_core/memory_manager.h" |
| 24 | #include "video_core/renderer_base.h" | 32 | #include "video_core/renderer_base.h" |
| 25 | #include "video_core/shader_notify.h" | 33 | #include "video_core/shader_notify.h" |
| 26 | #include "video_core/video_core.h" | ||
| 27 | 34 | ||
| 28 | namespace Tegra { | 35 | namespace Tegra { |
| 29 | 36 | ||
| 30 | MICROPROFILE_DEFINE(GPU_wait, "GPU", "Wait for the GPU", MP_RGB(128, 128, 192)); | 37 | MICROPROFILE_DEFINE(GPU_wait, "GPU", "Wait for the GPU", MP_RGB(128, 128, 192)); |
| 31 | 38 | ||
| 32 | GPU::GPU(Core::System& system_, bool is_async_, bool use_nvdec_) | 39 | struct GPU::Impl { |
| 33 | : system{system_}, memory_manager{std::make_unique<Tegra::MemoryManager>(system)}, | 40 | explicit Impl(GPU& gpu_, Core::System& system_, bool is_async_, bool use_nvdec_) |
| 34 | dma_pusher{std::make_unique<Tegra::DmaPusher>(system, *this)}, use_nvdec{use_nvdec_}, | 41 | : gpu{gpu_}, system{system_}, memory_manager{std::make_unique<Tegra::MemoryManager>( |
| 35 | maxwell_3d{std::make_unique<Engines::Maxwell3D>(system, *memory_manager)}, | 42 | system)}, |
| 36 | fermi_2d{std::make_unique<Engines::Fermi2D>()}, | 43 | dma_pusher{std::make_unique<Tegra::DmaPusher>(system, gpu)}, use_nvdec{use_nvdec_}, |
| 37 | kepler_compute{std::make_unique<Engines::KeplerCompute>(system, *memory_manager)}, | 44 | maxwell_3d{std::make_unique<Engines::Maxwell3D>(system, *memory_manager)}, |
| 38 | maxwell_dma{std::make_unique<Engines::MaxwellDMA>(system, *memory_manager)}, | 45 | fermi_2d{std::make_unique<Engines::Fermi2D>()}, |
| 39 | kepler_memory{std::make_unique<Engines::KeplerMemory>(system, *memory_manager)}, | 46 | kepler_compute{std::make_unique<Engines::KeplerCompute>(system, *memory_manager)}, |
| 40 | shader_notify{std::make_unique<VideoCore::ShaderNotify>()}, is_async{is_async_}, | 47 | maxwell_dma{std::make_unique<Engines::MaxwellDMA>(system, *memory_manager)}, |
| 41 | gpu_thread{system_, is_async_} {} | 48 | kepler_memory{std::make_unique<Engines::KeplerMemory>(system, *memory_manager)}, |
| 49 | shader_notify{std::make_unique<VideoCore::ShaderNotify>()}, is_async{is_async_}, | ||
| 50 | gpu_thread{system_, is_async_} {} | ||
| 51 | |||
| 52 | ~Impl() = default; | ||
| 53 | |||
| 54 | /// Binds a renderer to the GPU. | ||
| 55 | void BindRenderer(std::unique_ptr<VideoCore::RendererBase> renderer_) { | ||
| 56 | renderer = std::move(renderer_); | ||
| 57 | rasterizer = renderer->ReadRasterizer(); | ||
| 58 | |||
| 59 | memory_manager->BindRasterizer(rasterizer); | ||
| 60 | maxwell_3d->BindRasterizer(rasterizer); | ||
| 61 | fermi_2d->BindRasterizer(rasterizer); | ||
| 62 | kepler_compute->BindRasterizer(rasterizer); | ||
| 63 | maxwell_dma->BindRasterizer(rasterizer); | ||
| 64 | } | ||
| 65 | |||
| 66 | /// Calls a GPU method. | ||
| 67 | void CallMethod(const GPU::MethodCall& method_call) { | ||
| 68 | LOG_TRACE(HW_GPU, "Processing method {:08X} on subchannel {}", method_call.method, | ||
| 69 | method_call.subchannel); | ||
| 70 | |||
| 71 | ASSERT(method_call.subchannel < bound_engines.size()); | ||
| 72 | |||
| 73 | if (ExecuteMethodOnEngine(method_call.method)) { | ||
| 74 | CallEngineMethod(method_call); | ||
| 75 | } else { | ||
| 76 | CallPullerMethod(method_call); | ||
| 77 | } | ||
| 78 | } | ||
| 79 | |||
| 80 | /// Calls a GPU multivalue method. | ||
| 81 | void CallMultiMethod(u32 method, u32 subchannel, const u32* base_start, u32 amount, | ||
| 82 | u32 methods_pending) { | ||
| 83 | LOG_TRACE(HW_GPU, "Processing method {:08X} on subchannel {}", method, subchannel); | ||
| 84 | |||
| 85 | ASSERT(subchannel < bound_engines.size()); | ||
| 86 | |||
| 87 | if (ExecuteMethodOnEngine(method)) { | ||
| 88 | CallEngineMultiMethod(method, subchannel, base_start, amount, methods_pending); | ||
| 89 | } else { | ||
| 90 | for (std::size_t i = 0; i < amount; i++) { | ||
| 91 | CallPullerMethod(GPU::MethodCall{ | ||
| 92 | method, | ||
| 93 | base_start[i], | ||
| 94 | subchannel, | ||
| 95 | methods_pending - static_cast<u32>(i), | ||
| 96 | }); | ||
| 97 | } | ||
| 98 | } | ||
| 99 | } | ||
| 100 | |||
| 101 | /// Flush all current written commands into the host GPU for execution. | ||
| 102 | void FlushCommands() { | ||
| 103 | rasterizer->FlushCommands(); | ||
| 104 | } | ||
| 105 | |||
| 106 | /// Synchronizes CPU writes with Host GPU memory. | ||
| 107 | void SyncGuestHost() { | ||
| 108 | rasterizer->SyncGuestHost(); | ||
| 109 | } | ||
| 110 | |||
| 111 | /// Signal the ending of command list. | ||
| 112 | void OnCommandListEnd() { | ||
| 113 | if (is_async) { | ||
| 114 | // This command only applies to asynchronous GPU mode | ||
| 115 | gpu_thread.OnCommandListEnd(); | ||
| 116 | } | ||
| 117 | } | ||
| 118 | |||
| 119 | /// Request a host GPU memory flush from the CPU. | ||
| 120 | [[nodiscard]] u64 RequestFlush(VAddr addr, std::size_t size) { | ||
| 121 | std::unique_lock lck{flush_request_mutex}; | ||
| 122 | const u64 fence = ++last_flush_fence; | ||
| 123 | flush_requests.emplace_back(fence, addr, size); | ||
| 124 | return fence; | ||
| 125 | } | ||
| 126 | |||
| 127 | /// Obtains current flush request fence id. | ||
| 128 | [[nodiscard]] u64 CurrentFlushRequestFence() const { | ||
| 129 | return current_flush_fence.load(std::memory_order_relaxed); | ||
| 130 | } | ||
| 131 | |||
| 132 | /// Tick pending requests within the GPU. | ||
| 133 | void TickWork() { | ||
| 134 | std::unique_lock lck{flush_request_mutex}; | ||
| 135 | while (!flush_requests.empty()) { | ||
| 136 | auto& request = flush_requests.front(); | ||
| 137 | const u64 fence = request.fence; | ||
| 138 | const VAddr addr = request.addr; | ||
| 139 | const std::size_t size = request.size; | ||
| 140 | flush_requests.pop_front(); | ||
| 141 | flush_request_mutex.unlock(); | ||
| 142 | rasterizer->FlushRegion(addr, size); | ||
| 143 | current_flush_fence.store(fence); | ||
| 144 | flush_request_mutex.lock(); | ||
| 145 | } | ||
| 146 | } | ||
| 147 | |||
| 148 | /// Returns a reference to the Maxwell3D GPU engine. | ||
| 149 | [[nodiscard]] Engines::Maxwell3D& Maxwell3D() { | ||
| 150 | return *maxwell_3d; | ||
| 151 | } | ||
| 152 | |||
| 153 | /// Returns a const reference to the Maxwell3D GPU engine. | ||
| 154 | [[nodiscard]] const Engines::Maxwell3D& Maxwell3D() const { | ||
| 155 | return *maxwell_3d; | ||
| 156 | } | ||
| 157 | |||
| 158 | /// Returns a reference to the KeplerCompute GPU engine. | ||
| 159 | [[nodiscard]] Engines::KeplerCompute& KeplerCompute() { | ||
| 160 | return *kepler_compute; | ||
| 161 | } | ||
| 162 | |||
| 163 | /// Returns a reference to the KeplerCompute GPU engine. | ||
| 164 | [[nodiscard]] const Engines::KeplerCompute& KeplerCompute() const { | ||
| 165 | return *kepler_compute; | ||
| 166 | } | ||
| 167 | |||
| 168 | /// Returns a reference to the GPU memory manager. | ||
| 169 | [[nodiscard]] Tegra::MemoryManager& MemoryManager() { | ||
| 170 | return *memory_manager; | ||
| 171 | } | ||
| 172 | |||
| 173 | /// Returns a const reference to the GPU memory manager. | ||
| 174 | [[nodiscard]] const Tegra::MemoryManager& MemoryManager() const { | ||
| 175 | return *memory_manager; | ||
| 176 | } | ||
| 177 | |||
| 178 | /// Returns a reference to the GPU DMA pusher. | ||
| 179 | [[nodiscard]] Tegra::DmaPusher& DmaPusher() { | ||
| 180 | return *dma_pusher; | ||
| 181 | } | ||
| 182 | |||
| 183 | /// Returns a const reference to the GPU DMA pusher. | ||
| 184 | [[nodiscard]] const Tegra::DmaPusher& DmaPusher() const { | ||
| 185 | return *dma_pusher; | ||
| 186 | } | ||
| 187 | |||
| 188 | /// Returns a reference to the GPU CDMA pusher. | ||
| 189 | [[nodiscard]] Tegra::CDmaPusher& CDmaPusher() { | ||
| 190 | return *cdma_pusher; | ||
| 191 | } | ||
| 192 | |||
| 193 | /// Returns a const reference to the GPU CDMA pusher. | ||
| 194 | [[nodiscard]] const Tegra::CDmaPusher& CDmaPusher() const { | ||
| 195 | return *cdma_pusher; | ||
| 196 | } | ||
| 197 | |||
| 198 | /// Returns a reference to the underlying renderer. | ||
| 199 | [[nodiscard]] VideoCore::RendererBase& Renderer() { | ||
| 200 | return *renderer; | ||
| 201 | } | ||
| 202 | |||
| 203 | /// Returns a const reference to the underlying renderer. | ||
| 204 | [[nodiscard]] const VideoCore::RendererBase& Renderer() const { | ||
| 205 | return *renderer; | ||
| 206 | } | ||
| 207 | |||
| 208 | /// Returns a reference to the shader notifier. | ||
| 209 | [[nodiscard]] VideoCore::ShaderNotify& ShaderNotify() { | ||
| 210 | return *shader_notify; | ||
| 211 | } | ||
| 212 | |||
| 213 | /// Returns a const reference to the shader notifier. | ||
| 214 | [[nodiscard]] const VideoCore::ShaderNotify& ShaderNotify() const { | ||
| 215 | return *shader_notify; | ||
| 216 | } | ||
| 217 | |||
| 218 | /// Allows the CPU/NvFlinger to wait on the GPU before presenting a frame. | ||
| 219 | void WaitFence(u32 syncpoint_id, u32 value) { | ||
| 220 | // Synced GPU, is always in sync | ||
| 221 | if (!is_async) { | ||
| 222 | return; | ||
| 223 | } | ||
| 224 | if (syncpoint_id == UINT32_MAX) { | ||
| 225 | // TODO: Research what this does. | ||
| 226 | LOG_ERROR(HW_GPU, "Waiting for syncpoint -1 not implemented"); | ||
| 227 | return; | ||
| 228 | } | ||
| 229 | MICROPROFILE_SCOPE(GPU_wait); | ||
| 230 | std::unique_lock lock{sync_mutex}; | ||
| 231 | sync_cv.wait(lock, [=, this] { | ||
| 232 | if (shutting_down.load(std::memory_order_relaxed)) { | ||
| 233 | // We're shutting down, ensure no threads continue to wait for the next syncpoint | ||
| 234 | return true; | ||
| 235 | } | ||
| 236 | return syncpoints.at(syncpoint_id).load() >= value; | ||
| 237 | }); | ||
| 238 | } | ||
| 239 | |||
| 240 | void IncrementSyncPoint(u32 syncpoint_id) { | ||
| 241 | auto& syncpoint = syncpoints.at(syncpoint_id); | ||
| 242 | syncpoint++; | ||
| 243 | std::lock_guard lock{sync_mutex}; | ||
| 244 | sync_cv.notify_all(); | ||
| 245 | auto& interrupt = syncpt_interrupts.at(syncpoint_id); | ||
| 246 | if (!interrupt.empty()) { | ||
| 247 | u32 value = syncpoint.load(); | ||
| 248 | auto it = interrupt.begin(); | ||
| 249 | while (it != interrupt.end()) { | ||
| 250 | if (value >= *it) { | ||
| 251 | TriggerCpuInterrupt(syncpoint_id, *it); | ||
| 252 | it = interrupt.erase(it); | ||
| 253 | continue; | ||
| 254 | } | ||
| 255 | it++; | ||
| 256 | } | ||
| 257 | } | ||
| 258 | } | ||
| 259 | |||
| 260 | [[nodiscard]] u32 GetSyncpointValue(u32 syncpoint_id) const { | ||
| 261 | return syncpoints.at(syncpoint_id).load(); | ||
| 262 | } | ||
| 263 | |||
| 264 | void RegisterSyncptInterrupt(u32 syncpoint_id, u32 value) { | ||
| 265 | auto& interrupt = syncpt_interrupts.at(syncpoint_id); | ||
| 266 | bool contains = std::any_of(interrupt.begin(), interrupt.end(), | ||
| 267 | [value](u32 in_value) { return in_value == value; }); | ||
| 268 | if (contains) { | ||
| 269 | return; | ||
| 270 | } | ||
| 271 | interrupt.emplace_back(value); | ||
| 272 | } | ||
| 273 | |||
| 274 | [[nodiscard]] bool CancelSyncptInterrupt(u32 syncpoint_id, u32 value) { | ||
| 275 | std::lock_guard lock{sync_mutex}; | ||
| 276 | auto& interrupt = syncpt_interrupts.at(syncpoint_id); | ||
| 277 | const auto iter = | ||
| 278 | std::find_if(interrupt.begin(), interrupt.end(), | ||
| 279 | [value](u32 interrupt_value) { return value == interrupt_value; }); | ||
| 280 | |||
| 281 | if (iter == interrupt.end()) { | ||
| 282 | return false; | ||
| 283 | } | ||
| 284 | interrupt.erase(iter); | ||
| 285 | return true; | ||
| 286 | } | ||
| 287 | |||
| 288 | [[nodiscard]] u64 GetTicks() const { | ||
| 289 | // This values were reversed engineered by fincs from NVN | ||
| 290 | // The gpu clock is reported in units of 385/625 nanoseconds | ||
| 291 | constexpr u64 gpu_ticks_num = 384; | ||
| 292 | constexpr u64 gpu_ticks_den = 625; | ||
| 293 | |||
| 294 | u64 nanoseconds = system.CoreTiming().GetGlobalTimeNs().count(); | ||
| 295 | if (Settings::values.use_fast_gpu_time.GetValue()) { | ||
| 296 | nanoseconds /= 256; | ||
| 297 | } | ||
| 298 | const u64 nanoseconds_num = nanoseconds / gpu_ticks_den; | ||
| 299 | const u64 nanoseconds_rem = nanoseconds % gpu_ticks_den; | ||
| 300 | return nanoseconds_num * gpu_ticks_num + (nanoseconds_rem * gpu_ticks_num) / gpu_ticks_den; | ||
| 301 | } | ||
| 302 | |||
| 303 | [[nodiscard]] std::unique_lock<std::mutex> LockSync() { | ||
| 304 | return std::unique_lock{sync_mutex}; | ||
| 305 | } | ||
| 306 | |||
| 307 | [[nodiscard]] bool IsAsync() const { | ||
| 308 | return is_async; | ||
| 309 | } | ||
| 310 | |||
| 311 | [[nodiscard]] bool UseNvdec() const { | ||
| 312 | return use_nvdec; | ||
| 313 | } | ||
| 314 | |||
| 315 | void RendererFrameEndNotify() { | ||
| 316 | system.GetPerfStats().EndGameFrame(); | ||
| 317 | } | ||
| 318 | |||
| 319 | /// Performs any additional setup necessary in order to begin GPU emulation. | ||
| 320 | /// This can be used to launch any necessary threads and register any necessary | ||
| 321 | /// core timing events. | ||
| 322 | void Start() { | ||
| 323 | gpu_thread.StartThread(*renderer, renderer->Context(), *dma_pusher); | ||
| 324 | cpu_context = renderer->GetRenderWindow().CreateSharedContext(); | ||
| 325 | cpu_context->MakeCurrent(); | ||
| 326 | } | ||
| 327 | |||
| 328 | /// Obtain the CPU Context | ||
| 329 | void ObtainContext() { | ||
| 330 | cpu_context->MakeCurrent(); | ||
| 331 | } | ||
| 332 | |||
| 333 | /// Release the CPU Context | ||
| 334 | void ReleaseContext() { | ||
| 335 | cpu_context->DoneCurrent(); | ||
| 336 | } | ||
| 337 | |||
| 338 | /// Push GPU command entries to be processed | ||
| 339 | void PushGPUEntries(Tegra::CommandList&& entries) { | ||
| 340 | gpu_thread.SubmitList(std::move(entries)); | ||
| 341 | } | ||
| 342 | |||
| 343 | /// Push GPU command buffer entries to be processed | ||
| 344 | void PushCommandBuffer(Tegra::ChCommandHeaderList& entries) { | ||
| 345 | if (!use_nvdec) { | ||
| 346 | return; | ||
| 347 | } | ||
| 348 | |||
| 349 | if (!cdma_pusher) { | ||
| 350 | cdma_pusher = std::make_unique<Tegra::CDmaPusher>(gpu); | ||
| 351 | } | ||
| 352 | |||
| 353 | // SubmitCommandBuffer would make the nvdec operations async, this is not currently working | ||
| 354 | // TODO(ameerj): RE proper async nvdec operation | ||
| 355 | // gpu_thread.SubmitCommandBuffer(std::move(entries)); | ||
| 356 | |||
| 357 | cdma_pusher->ProcessEntries(std::move(entries)); | ||
| 358 | } | ||
| 359 | |||
| 360 | /// Frees the CDMAPusher instance to free up resources | ||
| 361 | void ClearCdmaInstance() { | ||
| 362 | cdma_pusher.reset(); | ||
| 363 | } | ||
| 364 | |||
| 365 | /// Swap buffers (render frame) | ||
| 366 | void SwapBuffers(const Tegra::FramebufferConfig* framebuffer) { | ||
| 367 | gpu_thread.SwapBuffers(framebuffer); | ||
| 368 | } | ||
| 369 | |||
| 370 | /// Notify rasterizer that any caches of the specified region should be flushed to Switch memory | ||
| 371 | void FlushRegion(VAddr addr, u64 size) { | ||
| 372 | gpu_thread.FlushRegion(addr, size); | ||
| 373 | } | ||
| 374 | |||
| 375 | /// Notify rasterizer that any caches of the specified region should be invalidated | ||
| 376 | void InvalidateRegion(VAddr addr, u64 size) { | ||
| 377 | gpu_thread.InvalidateRegion(addr, size); | ||
| 378 | } | ||
| 379 | |||
| 380 | /// Notify rasterizer that any caches of the specified region should be flushed and invalidated | ||
| 381 | void FlushAndInvalidateRegion(VAddr addr, u64 size) { | ||
| 382 | gpu_thread.FlushAndInvalidateRegion(addr, size); | ||
| 383 | } | ||
| 384 | |||
| 385 | void TriggerCpuInterrupt(u32 syncpoint_id, u32 value) const { | ||
| 386 | auto& interrupt_manager = system.InterruptManager(); | ||
| 387 | interrupt_manager.GPUInterruptSyncpt(syncpoint_id, value); | ||
| 388 | } | ||
| 389 | |||
| 390 | void ProcessBindMethod(const GPU::MethodCall& method_call) { | ||
| 391 | // Bind the current subchannel to the desired engine id. | ||
| 392 | LOG_DEBUG(HW_GPU, "Binding subchannel {} to engine {}", method_call.subchannel, | ||
| 393 | method_call.argument); | ||
| 394 | const auto engine_id = static_cast<EngineID>(method_call.argument); | ||
| 395 | bound_engines[method_call.subchannel] = static_cast<EngineID>(engine_id); | ||
| 396 | switch (engine_id) { | ||
| 397 | case EngineID::FERMI_TWOD_A: | ||
| 398 | dma_pusher->BindSubchannel(fermi_2d.get(), method_call.subchannel); | ||
| 399 | break; | ||
| 400 | case EngineID::MAXWELL_B: | ||
| 401 | dma_pusher->BindSubchannel(maxwell_3d.get(), method_call.subchannel); | ||
| 402 | break; | ||
| 403 | case EngineID::KEPLER_COMPUTE_B: | ||
| 404 | dma_pusher->BindSubchannel(kepler_compute.get(), method_call.subchannel); | ||
| 405 | break; | ||
| 406 | case EngineID::MAXWELL_DMA_COPY_A: | ||
| 407 | dma_pusher->BindSubchannel(maxwell_dma.get(), method_call.subchannel); | ||
| 408 | break; | ||
| 409 | case EngineID::KEPLER_INLINE_TO_MEMORY_B: | ||
| 410 | dma_pusher->BindSubchannel(kepler_memory.get(), method_call.subchannel); | ||
| 411 | break; | ||
| 412 | default: | ||
| 413 | UNIMPLEMENTED_MSG("Unimplemented engine {:04X}", engine_id); | ||
| 414 | } | ||
| 415 | } | ||
| 416 | |||
| 417 | void ProcessFenceActionMethod() { | ||
| 418 | switch (regs.fence_action.op) { | ||
| 419 | case GPU::FenceOperation::Acquire: | ||
| 420 | WaitFence(regs.fence_action.syncpoint_id, regs.fence_value); | ||
| 421 | break; | ||
| 422 | case GPU::FenceOperation::Increment: | ||
| 423 | IncrementSyncPoint(regs.fence_action.syncpoint_id); | ||
| 424 | break; | ||
| 425 | default: | ||
| 426 | UNIMPLEMENTED_MSG("Unimplemented operation {}", regs.fence_action.op.Value()); | ||
| 427 | } | ||
| 428 | } | ||
| 429 | |||
| 430 | void ProcessWaitForInterruptMethod() { | ||
| 431 | // TODO(bunnei) ImplementMe | ||
| 432 | LOG_WARNING(HW_GPU, "(STUBBED) called"); | ||
| 433 | } | ||
| 434 | |||
| 435 | void ProcessSemaphoreTriggerMethod() { | ||
| 436 | const auto semaphoreOperationMask = 0xF; | ||
| 437 | const auto op = | ||
| 438 | static_cast<GpuSemaphoreOperation>(regs.semaphore_trigger & semaphoreOperationMask); | ||
| 439 | if (op == GpuSemaphoreOperation::WriteLong) { | ||
| 440 | struct Block { | ||
| 441 | u32 sequence; | ||
| 442 | u32 zeros = 0; | ||
| 443 | u64 timestamp; | ||
| 444 | }; | ||
| 445 | |||
| 446 | Block block{}; | ||
| 447 | block.sequence = regs.semaphore_sequence; | ||
| 448 | // TODO(Kmather73): Generate a real GPU timestamp and write it here instead of | ||
| 449 | // CoreTiming | ||
| 450 | block.timestamp = GetTicks(); | ||
| 451 | memory_manager->WriteBlock(regs.semaphore_address.SemaphoreAddress(), &block, | ||
| 452 | sizeof(block)); | ||
| 453 | } else { | ||
| 454 | const u32 word{memory_manager->Read<u32>(regs.semaphore_address.SemaphoreAddress())}; | ||
| 455 | if ((op == GpuSemaphoreOperation::AcquireEqual && word == regs.semaphore_sequence) || | ||
| 456 | (op == GpuSemaphoreOperation::AcquireGequal && | ||
| 457 | static_cast<s32>(word - regs.semaphore_sequence) > 0) || | ||
| 458 | (op == GpuSemaphoreOperation::AcquireMask && (word & regs.semaphore_sequence))) { | ||
| 459 | // Nothing to do in this case | ||
| 460 | } else { | ||
| 461 | regs.acquire_source = true; | ||
| 462 | regs.acquire_value = regs.semaphore_sequence; | ||
| 463 | if (op == GpuSemaphoreOperation::AcquireEqual) { | ||
| 464 | regs.acquire_active = true; | ||
| 465 | regs.acquire_mode = false; | ||
| 466 | } else if (op == GpuSemaphoreOperation::AcquireGequal) { | ||
| 467 | regs.acquire_active = true; | ||
| 468 | regs.acquire_mode = true; | ||
| 469 | } else if (op == GpuSemaphoreOperation::AcquireMask) { | ||
| 470 | // TODO(kemathe) The acquire mask operation waits for a value that, ANDed with | ||
| 471 | // semaphore_sequence, gives a non-0 result | ||
| 472 | LOG_ERROR(HW_GPU, "Invalid semaphore operation AcquireMask not implemented"); | ||
| 473 | } else { | ||
| 474 | LOG_ERROR(HW_GPU, "Invalid semaphore operation"); | ||
| 475 | } | ||
| 476 | } | ||
| 477 | } | ||
| 478 | } | ||
| 479 | |||
| 480 | void ProcessSemaphoreRelease() { | ||
| 481 | memory_manager->Write<u32>(regs.semaphore_address.SemaphoreAddress(), | ||
| 482 | regs.semaphore_release); | ||
| 483 | } | ||
| 484 | |||
| 485 | void ProcessSemaphoreAcquire() { | ||
| 486 | const u32 word = memory_manager->Read<u32>(regs.semaphore_address.SemaphoreAddress()); | ||
| 487 | const auto value = regs.semaphore_acquire; | ||
| 488 | if (word != value) { | ||
| 489 | regs.acquire_active = true; | ||
| 490 | regs.acquire_value = value; | ||
| 491 | // TODO(kemathe73) figure out how to do the acquire_timeout | ||
| 492 | regs.acquire_mode = false; | ||
| 493 | regs.acquire_source = false; | ||
| 494 | } | ||
| 495 | } | ||
| 496 | |||
| 497 | /// Calls a GPU puller method. | ||
| 498 | void CallPullerMethod(const GPU::MethodCall& method_call) { | ||
| 499 | regs.reg_array[method_call.method] = method_call.argument; | ||
| 500 | const auto method = static_cast<BufferMethods>(method_call.method); | ||
| 501 | |||
| 502 | switch (method) { | ||
| 503 | case BufferMethods::BindObject: { | ||
| 504 | ProcessBindMethod(method_call); | ||
| 505 | break; | ||
| 506 | } | ||
| 507 | case BufferMethods::Nop: | ||
| 508 | case BufferMethods::SemaphoreAddressHigh: | ||
| 509 | case BufferMethods::SemaphoreAddressLow: | ||
| 510 | case BufferMethods::SemaphoreSequence: | ||
| 511 | case BufferMethods::UnkCacheFlush: | ||
| 512 | case BufferMethods::WrcacheFlush: | ||
| 513 | case BufferMethods::FenceValue: | ||
| 514 | break; | ||
| 515 | case BufferMethods::RefCnt: | ||
| 516 | rasterizer->SignalReference(); | ||
| 517 | break; | ||
| 518 | case BufferMethods::FenceAction: | ||
| 519 | ProcessFenceActionMethod(); | ||
| 520 | break; | ||
| 521 | case BufferMethods::WaitForInterrupt: | ||
| 522 | ProcessWaitForInterruptMethod(); | ||
| 523 | break; | ||
| 524 | case BufferMethods::SemaphoreTrigger: { | ||
| 525 | ProcessSemaphoreTriggerMethod(); | ||
| 526 | break; | ||
| 527 | } | ||
| 528 | case BufferMethods::NotifyIntr: { | ||
| 529 | // TODO(Kmather73): Research and implement this method. | ||
| 530 | LOG_ERROR(HW_GPU, "Special puller engine method NotifyIntr not implemented"); | ||
| 531 | break; | ||
| 532 | } | ||
| 533 | case BufferMethods::Unk28: { | ||
| 534 | // TODO(Kmather73): Research and implement this method. | ||
| 535 | LOG_ERROR(HW_GPU, "Special puller engine method Unk28 not implemented"); | ||
| 536 | break; | ||
| 537 | } | ||
| 538 | case BufferMethods::SemaphoreAcquire: { | ||
| 539 | ProcessSemaphoreAcquire(); | ||
| 540 | break; | ||
| 541 | } | ||
| 542 | case BufferMethods::SemaphoreRelease: { | ||
| 543 | ProcessSemaphoreRelease(); | ||
| 544 | break; | ||
| 545 | } | ||
| 546 | case BufferMethods::Yield: { | ||
| 547 | // TODO(Kmather73): Research and implement this method. | ||
| 548 | LOG_ERROR(HW_GPU, "Special puller engine method Yield not implemented"); | ||
| 549 | break; | ||
| 550 | } | ||
| 551 | default: | ||
| 552 | LOG_ERROR(HW_GPU, "Special puller engine method {:X} not implemented", method); | ||
| 553 | break; | ||
| 554 | } | ||
| 555 | } | ||
| 556 | |||
| 557 | /// Calls a GPU engine method. | ||
| 558 | void CallEngineMethod(const GPU::MethodCall& method_call) { | ||
| 559 | const EngineID engine = bound_engines[method_call.subchannel]; | ||
| 560 | |||
| 561 | switch (engine) { | ||
| 562 | case EngineID::FERMI_TWOD_A: | ||
| 563 | fermi_2d->CallMethod(method_call.method, method_call.argument, | ||
| 564 | method_call.IsLastCall()); | ||
| 565 | break; | ||
| 566 | case EngineID::MAXWELL_B: | ||
| 567 | maxwell_3d->CallMethod(method_call.method, method_call.argument, | ||
| 568 | method_call.IsLastCall()); | ||
| 569 | break; | ||
| 570 | case EngineID::KEPLER_COMPUTE_B: | ||
| 571 | kepler_compute->CallMethod(method_call.method, method_call.argument, | ||
| 572 | method_call.IsLastCall()); | ||
| 573 | break; | ||
| 574 | case EngineID::MAXWELL_DMA_COPY_A: | ||
| 575 | maxwell_dma->CallMethod(method_call.method, method_call.argument, | ||
| 576 | method_call.IsLastCall()); | ||
| 577 | break; | ||
| 578 | case EngineID::KEPLER_INLINE_TO_MEMORY_B: | ||
| 579 | kepler_memory->CallMethod(method_call.method, method_call.argument, | ||
| 580 | method_call.IsLastCall()); | ||
| 581 | break; | ||
| 582 | default: | ||
| 583 | UNIMPLEMENTED_MSG("Unimplemented engine"); | ||
| 584 | } | ||
| 585 | } | ||
| 586 | |||
| 587 | /// Calls a GPU engine multivalue method. | ||
| 588 | void CallEngineMultiMethod(u32 method, u32 subchannel, const u32* base_start, u32 amount, | ||
| 589 | u32 methods_pending) { | ||
| 590 | const EngineID engine = bound_engines[subchannel]; | ||
| 591 | |||
| 592 | switch (engine) { | ||
| 593 | case EngineID::FERMI_TWOD_A: | ||
| 594 | fermi_2d->CallMultiMethod(method, base_start, amount, methods_pending); | ||
| 595 | break; | ||
| 596 | case EngineID::MAXWELL_B: | ||
| 597 | maxwell_3d->CallMultiMethod(method, base_start, amount, methods_pending); | ||
| 598 | break; | ||
| 599 | case EngineID::KEPLER_COMPUTE_B: | ||
| 600 | kepler_compute->CallMultiMethod(method, base_start, amount, methods_pending); | ||
| 601 | break; | ||
| 602 | case EngineID::MAXWELL_DMA_COPY_A: | ||
| 603 | maxwell_dma->CallMultiMethod(method, base_start, amount, methods_pending); | ||
| 604 | break; | ||
| 605 | case EngineID::KEPLER_INLINE_TO_MEMORY_B: | ||
| 606 | kepler_memory->CallMultiMethod(method, base_start, amount, methods_pending); | ||
| 607 | break; | ||
| 608 | default: | ||
| 609 | UNIMPLEMENTED_MSG("Unimplemented engine"); | ||
| 610 | } | ||
| 611 | } | ||
| 612 | |||
| 613 | /// Determines where the method should be executed. | ||
| 614 | [[nodiscard]] bool ExecuteMethodOnEngine(u32 method) { | ||
| 615 | const auto buffer_method = static_cast<BufferMethods>(method); | ||
| 616 | return buffer_method >= BufferMethods::NonPullerMethods; | ||
| 617 | } | ||
| 618 | |||
| 619 | struct Regs { | ||
| 620 | static constexpr size_t NUM_REGS = 0x40; | ||
| 621 | |||
| 622 | union { | ||
| 623 | struct { | ||
| 624 | INSERT_PADDING_WORDS_NOINIT(0x4); | ||
| 625 | struct { | ||
| 626 | u32 address_high; | ||
| 627 | u32 address_low; | ||
| 628 | |||
| 629 | [[nodiscard]] GPUVAddr SemaphoreAddress() const { | ||
| 630 | return static_cast<GPUVAddr>((static_cast<GPUVAddr>(address_high) << 32) | | ||
| 631 | address_low); | ||
| 632 | } | ||
| 633 | } semaphore_address; | ||
| 634 | |||
| 635 | u32 semaphore_sequence; | ||
| 636 | u32 semaphore_trigger; | ||
| 637 | INSERT_PADDING_WORDS_NOINIT(0xC); | ||
| 638 | |||
| 639 | // The pusher and the puller share the reference counter, the pusher only has read | ||
| 640 | // access | ||
| 641 | u32 reference_count; | ||
| 642 | INSERT_PADDING_WORDS_NOINIT(0x5); | ||
| 643 | |||
| 644 | u32 semaphore_acquire; | ||
| 645 | u32 semaphore_release; | ||
| 646 | u32 fence_value; | ||
| 647 | GPU::FenceAction fence_action; | ||
| 648 | INSERT_PADDING_WORDS_NOINIT(0xE2); | ||
| 649 | |||
| 650 | // Puller state | ||
| 651 | u32 acquire_mode; | ||
| 652 | u32 acquire_source; | ||
| 653 | u32 acquire_active; | ||
| 654 | u32 acquire_timeout; | ||
| 655 | u32 acquire_value; | ||
| 656 | }; | ||
| 657 | std::array<u32, NUM_REGS> reg_array; | ||
| 658 | }; | ||
| 659 | } regs{}; | ||
| 660 | |||
| 661 | GPU& gpu; | ||
| 662 | Core::System& system; | ||
| 663 | std::unique_ptr<Tegra::MemoryManager> memory_manager; | ||
| 664 | std::unique_ptr<Tegra::DmaPusher> dma_pusher; | ||
| 665 | std::unique_ptr<Tegra::CDmaPusher> cdma_pusher; | ||
| 666 | std::unique_ptr<VideoCore::RendererBase> renderer; | ||
| 667 | VideoCore::RasterizerInterface* rasterizer = nullptr; | ||
| 668 | const bool use_nvdec; | ||
| 669 | |||
| 670 | /// Mapping of command subchannels to their bound engine ids | ||
| 671 | std::array<EngineID, 8> bound_engines{}; | ||
| 672 | /// 3D engine | ||
| 673 | std::unique_ptr<Engines::Maxwell3D> maxwell_3d; | ||
| 674 | /// 2D engine | ||
| 675 | std::unique_ptr<Engines::Fermi2D> fermi_2d; | ||
| 676 | /// Compute engine | ||
| 677 | std::unique_ptr<Engines::KeplerCompute> kepler_compute; | ||
| 678 | /// DMA engine | ||
| 679 | std::unique_ptr<Engines::MaxwellDMA> maxwell_dma; | ||
| 680 | /// Inline memory engine | ||
| 681 | std::unique_ptr<Engines::KeplerMemory> kepler_memory; | ||
| 682 | /// Shader build notifier | ||
| 683 | std::unique_ptr<VideoCore::ShaderNotify> shader_notify; | ||
| 684 | /// When true, we are about to shut down emulation session, so terminate outstanding tasks | ||
| 685 | std::atomic_bool shutting_down{}; | ||
| 686 | |||
| 687 | std::array<std::atomic<u32>, Service::Nvidia::MaxSyncPoints> syncpoints{}; | ||
| 688 | |||
| 689 | std::array<std::list<u32>, Service::Nvidia::MaxSyncPoints> syncpt_interrupts; | ||
| 690 | |||
| 691 | std::mutex sync_mutex; | ||
| 692 | std::mutex device_mutex; | ||
| 693 | |||
| 694 | std::condition_variable sync_cv; | ||
| 695 | |||
| 696 | struct FlushRequest { | ||
| 697 | explicit FlushRequest(u64 fence_, VAddr addr_, std::size_t size_) | ||
| 698 | : fence{fence_}, addr{addr_}, size{size_} {} | ||
| 699 | u64 fence; | ||
| 700 | VAddr addr; | ||
| 701 | std::size_t size; | ||
| 702 | }; | ||
| 703 | |||
| 704 | std::list<FlushRequest> flush_requests; | ||
| 705 | std::atomic<u64> current_flush_fence{}; | ||
| 706 | u64 last_flush_fence{}; | ||
| 707 | std::mutex flush_request_mutex; | ||
| 708 | |||
| 709 | const bool is_async; | ||
| 710 | |||
| 711 | VideoCommon::GPUThread::ThreadManager gpu_thread; | ||
| 712 | std::unique_ptr<Core::Frontend::GraphicsContext> cpu_context; | ||
| 713 | |||
| 714 | #define ASSERT_REG_POSITION(field_name, position) \ | ||
| 715 | static_assert(offsetof(Regs, field_name) == position * 4, \ | ||
| 716 | "Field " #field_name " has invalid position") | ||
| 717 | |||
| 718 | ASSERT_REG_POSITION(semaphore_address, 0x4); | ||
| 719 | ASSERT_REG_POSITION(semaphore_sequence, 0x6); | ||
| 720 | ASSERT_REG_POSITION(semaphore_trigger, 0x7); | ||
| 721 | ASSERT_REG_POSITION(reference_count, 0x14); | ||
| 722 | ASSERT_REG_POSITION(semaphore_acquire, 0x1A); | ||
| 723 | ASSERT_REG_POSITION(semaphore_release, 0x1B); | ||
| 724 | ASSERT_REG_POSITION(fence_value, 0x1C); | ||
| 725 | ASSERT_REG_POSITION(fence_action, 0x1D); | ||
| 726 | |||
| 727 | ASSERT_REG_POSITION(acquire_mode, 0x100); | ||
| 728 | ASSERT_REG_POSITION(acquire_source, 0x101); | ||
| 729 | ASSERT_REG_POSITION(acquire_active, 0x102); | ||
| 730 | ASSERT_REG_POSITION(acquire_timeout, 0x103); | ||
| 731 | ASSERT_REG_POSITION(acquire_value, 0x104); | ||
| 732 | |||
| 733 | #undef ASSERT_REG_POSITION | ||
| 734 | |||
| 735 | enum class GpuSemaphoreOperation { | ||
| 736 | AcquireEqual = 0x1, | ||
| 737 | WriteLong = 0x2, | ||
| 738 | AcquireGequal = 0x4, | ||
| 739 | AcquireMask = 0x8, | ||
| 740 | }; | ||
| 741 | }; | ||
| 742 | |||
| 743 | GPU::GPU(Core::System& system, bool is_async, bool use_nvdec) | ||
| 744 | : impl{std::make_unique<Impl>(*this, system, is_async, use_nvdec)} {} | ||
| 42 | 745 | ||
| 43 | GPU::~GPU() = default; | 746 | GPU::~GPU() = default; |
| 44 | 747 | ||
| 45 | void GPU::BindRenderer(std::unique_ptr<VideoCore::RendererBase> renderer_) { | 748 | void GPU::BindRenderer(std::unique_ptr<VideoCore::RendererBase> renderer) { |
| 46 | renderer = std::move(renderer_); | 749 | impl->BindRenderer(std::move(renderer)); |
| 47 | rasterizer = renderer->ReadRasterizer(); | 750 | } |
| 48 | 751 | ||
| 49 | memory_manager->BindRasterizer(rasterizer); | 752 | void GPU::CallMethod(const MethodCall& method_call) { |
| 50 | maxwell_3d->BindRasterizer(rasterizer); | 753 | impl->CallMethod(method_call); |
| 51 | fermi_2d->BindRasterizer(rasterizer); | ||
| 52 | kepler_compute->BindRasterizer(rasterizer); | ||
| 53 | maxwell_dma->BindRasterizer(rasterizer); | ||
| 54 | } | 754 | } |
| 55 | 755 | ||
| 56 | Engines::Maxwell3D& GPU::Maxwell3D() { | 756 | void GPU::CallMultiMethod(u32 method, u32 subchannel, const u32* base_start, u32 amount, |
| 57 | return *maxwell_3d; | 757 | u32 methods_pending) { |
| 758 | impl->CallMultiMethod(method, subchannel, base_start, amount, methods_pending); | ||
| 58 | } | 759 | } |
| 59 | 760 | ||
| 60 | const Engines::Maxwell3D& GPU::Maxwell3D() const { | 761 | void GPU::FlushCommands() { |
| 61 | return *maxwell_3d; | 762 | impl->FlushCommands(); |
| 62 | } | 763 | } |
| 63 | 764 | ||
| 64 | Engines::KeplerCompute& GPU::KeplerCompute() { | 765 | void GPU::SyncGuestHost() { |
| 65 | return *kepler_compute; | 766 | impl->SyncGuestHost(); |
| 66 | } | 767 | } |
| 67 | 768 | ||
| 68 | const Engines::KeplerCompute& GPU::KeplerCompute() const { | 769 | void GPU::OnCommandListEnd() { |
| 69 | return *kepler_compute; | 770 | impl->OnCommandListEnd(); |
| 70 | } | 771 | } |
| 71 | 772 | ||
| 72 | MemoryManager& GPU::MemoryManager() { | 773 | u64 GPU::RequestFlush(VAddr addr, std::size_t size) { |
| 73 | return *memory_manager; | 774 | return impl->RequestFlush(addr, size); |
| 74 | } | 775 | } |
| 75 | 776 | ||
| 76 | const MemoryManager& GPU::MemoryManager() const { | 777 | u64 GPU::CurrentFlushRequestFence() const { |
| 77 | return *memory_manager; | 778 | return impl->CurrentFlushRequestFence(); |
| 78 | } | 779 | } |
| 79 | 780 | ||
| 80 | DmaPusher& GPU::DmaPusher() { | 781 | void GPU::TickWork() { |
| 81 | return *dma_pusher; | 782 | impl->TickWork(); |
| 82 | } | 783 | } |
| 83 | 784 | ||
| 84 | Tegra::CDmaPusher& GPU::CDmaPusher() { | 785 | Engines::Maxwell3D& GPU::Maxwell3D() { |
| 85 | return *cdma_pusher; | 786 | return impl->Maxwell3D(); |
| 86 | } | 787 | } |
| 87 | 788 | ||
| 88 | const DmaPusher& GPU::DmaPusher() const { | 789 | const Engines::Maxwell3D& GPU::Maxwell3D() const { |
| 89 | return *dma_pusher; | 790 | return impl->Maxwell3D(); |
| 90 | } | 791 | } |
| 91 | 792 | ||
| 92 | const Tegra::CDmaPusher& GPU::CDmaPusher() const { | 793 | Engines::KeplerCompute& GPU::KeplerCompute() { |
| 93 | return *cdma_pusher; | 794 | return impl->KeplerCompute(); |
| 94 | } | 795 | } |
| 95 | 796 | ||
| 96 | void GPU::WaitFence(u32 syncpoint_id, u32 value) { | 797 | const Engines::KeplerCompute& GPU::KeplerCompute() const { |
| 97 | // Synced GPU, is always in sync | 798 | return impl->KeplerCompute(); |
| 98 | if (!is_async) { | ||
| 99 | return; | ||
| 100 | } | ||
| 101 | if (syncpoint_id == UINT32_MAX) { | ||
| 102 | // TODO: Research what this does. | ||
| 103 | LOG_ERROR(HW_GPU, "Waiting for syncpoint -1 not implemented"); | ||
| 104 | return; | ||
| 105 | } | ||
| 106 | MICROPROFILE_SCOPE(GPU_wait); | ||
| 107 | std::unique_lock lock{sync_mutex}; | ||
| 108 | sync_cv.wait(lock, [=, this] { | ||
| 109 | if (shutting_down.load(std::memory_order_relaxed)) { | ||
| 110 | // We're shutting down, ensure no threads continue to wait for the next syncpoint | ||
| 111 | return true; | ||
| 112 | } | ||
| 113 | return syncpoints.at(syncpoint_id).load() >= value; | ||
| 114 | }); | ||
| 115 | } | ||
| 116 | |||
| 117 | void GPU::IncrementSyncPoint(const u32 syncpoint_id) { | ||
| 118 | auto& syncpoint = syncpoints.at(syncpoint_id); | ||
| 119 | syncpoint++; | ||
| 120 | std::lock_guard lock{sync_mutex}; | ||
| 121 | sync_cv.notify_all(); | ||
| 122 | auto& interrupt = syncpt_interrupts.at(syncpoint_id); | ||
| 123 | if (!interrupt.empty()) { | ||
| 124 | u32 value = syncpoint.load(); | ||
| 125 | auto it = interrupt.begin(); | ||
| 126 | while (it != interrupt.end()) { | ||
| 127 | if (value >= *it) { | ||
| 128 | TriggerCpuInterrupt(syncpoint_id, *it); | ||
| 129 | it = interrupt.erase(it); | ||
| 130 | continue; | ||
| 131 | } | ||
| 132 | it++; | ||
| 133 | } | ||
| 134 | } | ||
| 135 | } | 799 | } |
| 136 | 800 | ||
| 137 | u32 GPU::GetSyncpointValue(const u32 syncpoint_id) const { | 801 | Tegra::MemoryManager& GPU::MemoryManager() { |
| 138 | return syncpoints.at(syncpoint_id).load(); | 802 | return impl->MemoryManager(); |
| 139 | } | 803 | } |
| 140 | 804 | ||
| 141 | void GPU::RegisterSyncptInterrupt(const u32 syncpoint_id, const u32 value) { | 805 | const Tegra::MemoryManager& GPU::MemoryManager() const { |
| 142 | auto& interrupt = syncpt_interrupts.at(syncpoint_id); | 806 | return impl->MemoryManager(); |
| 143 | bool contains = std::any_of(interrupt.begin(), interrupt.end(), | ||
| 144 | [value](u32 in_value) { return in_value == value; }); | ||
| 145 | if (contains) { | ||
| 146 | return; | ||
| 147 | } | ||
| 148 | interrupt.emplace_back(value); | ||
| 149 | } | 807 | } |
| 150 | 808 | ||
| 151 | bool GPU::CancelSyncptInterrupt(const u32 syncpoint_id, const u32 value) { | 809 | Tegra::DmaPusher& GPU::DmaPusher() { |
| 152 | std::lock_guard lock{sync_mutex}; | 810 | return impl->DmaPusher(); |
| 153 | auto& interrupt = syncpt_interrupts.at(syncpoint_id); | 811 | } |
| 154 | const auto iter = | ||
| 155 | std::find_if(interrupt.begin(), interrupt.end(), | ||
| 156 | [value](u32 interrupt_value) { return value == interrupt_value; }); | ||
| 157 | 812 | ||
| 158 | if (iter == interrupt.end()) { | 813 | const Tegra::DmaPusher& GPU::DmaPusher() const { |
| 159 | return false; | 814 | return impl->DmaPusher(); |
| 160 | } | ||
| 161 | interrupt.erase(iter); | ||
| 162 | return true; | ||
| 163 | } | 815 | } |
| 164 | 816 | ||
| 165 | u64 GPU::RequestFlush(VAddr addr, std::size_t size) { | 817 | Tegra::CDmaPusher& GPU::CDmaPusher() { |
| 166 | std::unique_lock lck{flush_request_mutex}; | 818 | return impl->CDmaPusher(); |
| 167 | const u64 fence = ++last_flush_fence; | ||
| 168 | flush_requests.emplace_back(fence, addr, size); | ||
| 169 | return fence; | ||
| 170 | } | 819 | } |
| 171 | 820 | ||
| 172 | void GPU::TickWork() { | 821 | const Tegra::CDmaPusher& GPU::CDmaPusher() const { |
| 173 | std::unique_lock lck{flush_request_mutex}; | 822 | return impl->CDmaPusher(); |
| 174 | while (!flush_requests.empty()) { | ||
| 175 | auto& request = flush_requests.front(); | ||
| 176 | const u64 fence = request.fence; | ||
| 177 | const VAddr addr = request.addr; | ||
| 178 | const std::size_t size = request.size; | ||
| 179 | flush_requests.pop_front(); | ||
| 180 | flush_request_mutex.unlock(); | ||
| 181 | rasterizer->FlushRegion(addr, size); | ||
| 182 | current_flush_fence.store(fence); | ||
| 183 | flush_request_mutex.lock(); | ||
| 184 | } | ||
| 185 | } | 823 | } |
| 186 | 824 | ||
| 187 | u64 GPU::GetTicks() const { | 825 | VideoCore::RendererBase& GPU::Renderer() { |
| 188 | // This values were reversed engineered by fincs from NVN | 826 | return impl->Renderer(); |
| 189 | // The gpu clock is reported in units of 385/625 nanoseconds | 827 | } |
| 190 | constexpr u64 gpu_ticks_num = 384; | ||
| 191 | constexpr u64 gpu_ticks_den = 625; | ||
| 192 | 828 | ||
| 193 | u64 nanoseconds = system.CoreTiming().GetGlobalTimeNs().count(); | 829 | const VideoCore::RendererBase& GPU::Renderer() const { |
| 194 | if (Settings::values.use_fast_gpu_time.GetValue()) { | 830 | return impl->Renderer(); |
| 195 | nanoseconds /= 256; | ||
| 196 | } | ||
| 197 | const u64 nanoseconds_num = nanoseconds / gpu_ticks_den; | ||
| 198 | const u64 nanoseconds_rem = nanoseconds % gpu_ticks_den; | ||
| 199 | return nanoseconds_num * gpu_ticks_num + (nanoseconds_rem * gpu_ticks_num) / gpu_ticks_den; | ||
| 200 | } | 831 | } |
| 201 | 832 | ||
| 202 | void GPU::RendererFrameEndNotify() { | 833 | VideoCore::ShaderNotify& GPU::ShaderNotify() { |
| 203 | system.GetPerfStats().EndGameFrame(); | 834 | return impl->ShaderNotify(); |
| 204 | } | 835 | } |
| 205 | 836 | ||
| 206 | void GPU::FlushCommands() { | 837 | const VideoCore::ShaderNotify& GPU::ShaderNotify() const { |
| 207 | rasterizer->FlushCommands(); | 838 | return impl->ShaderNotify(); |
| 208 | } | 839 | } |
| 209 | 840 | ||
| 210 | void GPU::SyncGuestHost() { | 841 | void GPU::WaitFence(u32 syncpoint_id, u32 value) { |
| 211 | rasterizer->SyncGuestHost(); | 842 | impl->WaitFence(syncpoint_id, value); |
| 212 | } | 843 | } |
| 213 | 844 | ||
| 214 | enum class GpuSemaphoreOperation { | 845 | void GPU::IncrementSyncPoint(u32 syncpoint_id) { |
| 215 | AcquireEqual = 0x1, | 846 | impl->IncrementSyncPoint(syncpoint_id); |
| 216 | WriteLong = 0x2, | 847 | } |
| 217 | AcquireGequal = 0x4, | ||
| 218 | AcquireMask = 0x8, | ||
| 219 | }; | ||
| 220 | 848 | ||
| 221 | void GPU::CallMethod(const MethodCall& method_call) { | 849 | u32 GPU::GetSyncpointValue(u32 syncpoint_id) const { |
| 222 | LOG_TRACE(HW_GPU, "Processing method {:08X} on subchannel {}", method_call.method, | 850 | return impl->GetSyncpointValue(syncpoint_id); |
| 223 | method_call.subchannel); | 851 | } |
| 224 | 852 | ||
| 225 | ASSERT(method_call.subchannel < bound_engines.size()); | 853 | void GPU::RegisterSyncptInterrupt(u32 syncpoint_id, u32 value) { |
| 854 | impl->RegisterSyncptInterrupt(syncpoint_id, value); | ||
| 855 | } | ||
| 226 | 856 | ||
| 227 | if (ExecuteMethodOnEngine(method_call.method)) { | 857 | bool GPU::CancelSyncptInterrupt(u32 syncpoint_id, u32 value) { |
| 228 | CallEngineMethod(method_call); | 858 | return impl->CancelSyncptInterrupt(syncpoint_id, value); |
| 229 | } else { | ||
| 230 | CallPullerMethod(method_call); | ||
| 231 | } | ||
| 232 | } | 859 | } |
| 233 | 860 | ||
| 234 | void GPU::CallMultiMethod(u32 method, u32 subchannel, const u32* base_start, u32 amount, | 861 | u64 GPU::GetTicks() const { |
| 235 | u32 methods_pending) { | 862 | return impl->GetTicks(); |
| 236 | LOG_TRACE(HW_GPU, "Processing method {:08X} on subchannel {}", method, subchannel); | ||
| 237 | |||
| 238 | ASSERT(subchannel < bound_engines.size()); | ||
| 239 | |||
| 240 | if (ExecuteMethodOnEngine(method)) { | ||
| 241 | CallEngineMultiMethod(method, subchannel, base_start, amount, methods_pending); | ||
| 242 | } else { | ||
| 243 | for (std::size_t i = 0; i < amount; i++) { | ||
| 244 | CallPullerMethod(MethodCall{ | ||
| 245 | method, | ||
| 246 | base_start[i], | ||
| 247 | subchannel, | ||
| 248 | methods_pending - static_cast<u32>(i), | ||
| 249 | }); | ||
| 250 | } | ||
| 251 | } | ||
| 252 | } | 863 | } |
| 253 | 864 | ||
| 254 | bool GPU::ExecuteMethodOnEngine(u32 method) { | 865 | std::unique_lock<std::mutex> GPU::LockSync() { |
| 255 | const auto buffer_method = static_cast<BufferMethods>(method); | 866 | return impl->LockSync(); |
| 256 | return buffer_method >= BufferMethods::NonPullerMethods; | 867 | } |
| 257 | } | ||
| 258 | |||
| 259 | void GPU::CallPullerMethod(const MethodCall& method_call) { | ||
| 260 | regs.reg_array[method_call.method] = method_call.argument; | ||
| 261 | const auto method = static_cast<BufferMethods>(method_call.method); | ||
| 262 | |||
| 263 | switch (method) { | ||
| 264 | case BufferMethods::BindObject: { | ||
| 265 | ProcessBindMethod(method_call); | ||
| 266 | break; | ||
| 267 | } | ||
| 268 | case BufferMethods::Nop: | ||
| 269 | case BufferMethods::SemaphoreAddressHigh: | ||
| 270 | case BufferMethods::SemaphoreAddressLow: | ||
| 271 | case BufferMethods::SemaphoreSequence: | ||
| 272 | case BufferMethods::UnkCacheFlush: | ||
| 273 | case BufferMethods::WrcacheFlush: | ||
| 274 | case BufferMethods::FenceValue: | ||
| 275 | break; | ||
| 276 | case BufferMethods::RefCnt: | ||
| 277 | rasterizer->SignalReference(); | ||
| 278 | break; | ||
| 279 | case BufferMethods::FenceAction: | ||
| 280 | ProcessFenceActionMethod(); | ||
| 281 | break; | ||
| 282 | case BufferMethods::WaitForInterrupt: | ||
| 283 | ProcessWaitForInterruptMethod(); | ||
| 284 | break; | ||
| 285 | case BufferMethods::SemaphoreTrigger: { | ||
| 286 | ProcessSemaphoreTriggerMethod(); | ||
| 287 | break; | ||
| 288 | } | ||
| 289 | case BufferMethods::NotifyIntr: { | ||
| 290 | // TODO(Kmather73): Research and implement this method. | ||
| 291 | LOG_ERROR(HW_GPU, "Special puller engine method NotifyIntr not implemented"); | ||
| 292 | break; | ||
| 293 | } | ||
| 294 | case BufferMethods::Unk28: { | ||
| 295 | // TODO(Kmather73): Research and implement this method. | ||
| 296 | LOG_ERROR(HW_GPU, "Special puller engine method Unk28 not implemented"); | ||
| 297 | break; | ||
| 298 | } | ||
| 299 | case BufferMethods::SemaphoreAcquire: { | ||
| 300 | ProcessSemaphoreAcquire(); | ||
| 301 | break; | ||
| 302 | } | ||
| 303 | case BufferMethods::SemaphoreRelease: { | ||
| 304 | ProcessSemaphoreRelease(); | ||
| 305 | break; | ||
| 306 | } | ||
| 307 | case BufferMethods::Yield: { | ||
| 308 | // TODO(Kmather73): Research and implement this method. | ||
| 309 | LOG_ERROR(HW_GPU, "Special puller engine method Yield not implemented"); | ||
| 310 | break; | ||
| 311 | } | ||
| 312 | default: | ||
| 313 | LOG_ERROR(HW_GPU, "Special puller engine method {:X} not implemented", method); | ||
| 314 | break; | ||
| 315 | } | ||
| 316 | } | ||
| 317 | |||
| 318 | void GPU::CallEngineMethod(const MethodCall& method_call) { | ||
| 319 | const EngineID engine = bound_engines[method_call.subchannel]; | ||
| 320 | |||
| 321 | switch (engine) { | ||
| 322 | case EngineID::FERMI_TWOD_A: | ||
| 323 | fermi_2d->CallMethod(method_call.method, method_call.argument, method_call.IsLastCall()); | ||
| 324 | break; | ||
| 325 | case EngineID::MAXWELL_B: | ||
| 326 | maxwell_3d->CallMethod(method_call.method, method_call.argument, method_call.IsLastCall()); | ||
| 327 | break; | ||
| 328 | case EngineID::KEPLER_COMPUTE_B: | ||
| 329 | kepler_compute->CallMethod(method_call.method, method_call.argument, | ||
| 330 | method_call.IsLastCall()); | ||
| 331 | break; | ||
| 332 | case EngineID::MAXWELL_DMA_COPY_A: | ||
| 333 | maxwell_dma->CallMethod(method_call.method, method_call.argument, method_call.IsLastCall()); | ||
| 334 | break; | ||
| 335 | case EngineID::KEPLER_INLINE_TO_MEMORY_B: | ||
| 336 | kepler_memory->CallMethod(method_call.method, method_call.argument, | ||
| 337 | method_call.IsLastCall()); | ||
| 338 | break; | ||
| 339 | default: | ||
| 340 | UNIMPLEMENTED_MSG("Unimplemented engine"); | ||
| 341 | } | ||
| 342 | } | ||
| 343 | |||
| 344 | void GPU::CallEngineMultiMethod(u32 method, u32 subchannel, const u32* base_start, u32 amount, | ||
| 345 | u32 methods_pending) { | ||
| 346 | const EngineID engine = bound_engines[subchannel]; | ||
| 347 | |||
| 348 | switch (engine) { | ||
| 349 | case EngineID::FERMI_TWOD_A: | ||
| 350 | fermi_2d->CallMultiMethod(method, base_start, amount, methods_pending); | ||
| 351 | break; | ||
| 352 | case EngineID::MAXWELL_B: | ||
| 353 | maxwell_3d->CallMultiMethod(method, base_start, amount, methods_pending); | ||
| 354 | break; | ||
| 355 | case EngineID::KEPLER_COMPUTE_B: | ||
| 356 | kepler_compute->CallMultiMethod(method, base_start, amount, methods_pending); | ||
| 357 | break; | ||
| 358 | case EngineID::MAXWELL_DMA_COPY_A: | ||
| 359 | maxwell_dma->CallMultiMethod(method, base_start, amount, methods_pending); | ||
| 360 | break; | ||
| 361 | case EngineID::KEPLER_INLINE_TO_MEMORY_B: | ||
| 362 | kepler_memory->CallMultiMethod(method, base_start, amount, methods_pending); | ||
| 363 | break; | ||
| 364 | default: | ||
| 365 | UNIMPLEMENTED_MSG("Unimplemented engine"); | ||
| 366 | } | ||
| 367 | } | ||
| 368 | |||
| 369 | void GPU::ProcessBindMethod(const MethodCall& method_call) { | ||
| 370 | // Bind the current subchannel to the desired engine id. | ||
| 371 | LOG_DEBUG(HW_GPU, "Binding subchannel {} to engine {}", method_call.subchannel, | ||
| 372 | method_call.argument); | ||
| 373 | const auto engine_id = static_cast<EngineID>(method_call.argument); | ||
| 374 | bound_engines[method_call.subchannel] = static_cast<EngineID>(engine_id); | ||
| 375 | switch (engine_id) { | ||
| 376 | case EngineID::FERMI_TWOD_A: | ||
| 377 | dma_pusher->BindSubchannel(fermi_2d.get(), method_call.subchannel); | ||
| 378 | break; | ||
| 379 | case EngineID::MAXWELL_B: | ||
| 380 | dma_pusher->BindSubchannel(maxwell_3d.get(), method_call.subchannel); | ||
| 381 | break; | ||
| 382 | case EngineID::KEPLER_COMPUTE_B: | ||
| 383 | dma_pusher->BindSubchannel(kepler_compute.get(), method_call.subchannel); | ||
| 384 | break; | ||
| 385 | case EngineID::MAXWELL_DMA_COPY_A: | ||
| 386 | dma_pusher->BindSubchannel(maxwell_dma.get(), method_call.subchannel); | ||
| 387 | break; | ||
| 388 | case EngineID::KEPLER_INLINE_TO_MEMORY_B: | ||
| 389 | dma_pusher->BindSubchannel(kepler_memory.get(), method_call.subchannel); | ||
| 390 | break; | ||
| 391 | default: | ||
| 392 | UNIMPLEMENTED_MSG("Unimplemented engine {:04X}", engine_id); | ||
| 393 | } | ||
| 394 | } | ||
| 395 | |||
| 396 | void GPU::ProcessFenceActionMethod() { | ||
| 397 | switch (regs.fence_action.op) { | ||
| 398 | case FenceOperation::Acquire: | ||
| 399 | WaitFence(regs.fence_action.syncpoint_id, regs.fence_value); | ||
| 400 | break; | ||
| 401 | case FenceOperation::Increment: | ||
| 402 | IncrementSyncPoint(regs.fence_action.syncpoint_id); | ||
| 403 | break; | ||
| 404 | default: | ||
| 405 | UNIMPLEMENTED_MSG("Unimplemented operation {}", regs.fence_action.op.Value()); | ||
| 406 | } | ||
| 407 | } | ||
| 408 | |||
| 409 | void GPU::ProcessWaitForInterruptMethod() { | ||
| 410 | // TODO(bunnei) ImplementMe | ||
| 411 | LOG_WARNING(HW_GPU, "(STUBBED) called"); | ||
| 412 | } | ||
| 413 | |||
| 414 | void GPU::ProcessSemaphoreTriggerMethod() { | ||
| 415 | const auto semaphoreOperationMask = 0xF; | ||
| 416 | const auto op = | ||
| 417 | static_cast<GpuSemaphoreOperation>(regs.semaphore_trigger & semaphoreOperationMask); | ||
| 418 | if (op == GpuSemaphoreOperation::WriteLong) { | ||
| 419 | struct Block { | ||
| 420 | u32 sequence; | ||
| 421 | u32 zeros = 0; | ||
| 422 | u64 timestamp; | ||
| 423 | }; | ||
| 424 | 868 | ||
| 425 | Block block{}; | 869 | bool GPU::IsAsync() const { |
| 426 | block.sequence = regs.semaphore_sequence; | 870 | return impl->IsAsync(); |
| 427 | // TODO(Kmather73): Generate a real GPU timestamp and write it here instead of | ||
| 428 | // CoreTiming | ||
| 429 | block.timestamp = GetTicks(); | ||
| 430 | memory_manager->WriteBlock(regs.semaphore_address.SemaphoreAddress(), &block, | ||
| 431 | sizeof(block)); | ||
| 432 | } else { | ||
| 433 | const u32 word{memory_manager->Read<u32>(regs.semaphore_address.SemaphoreAddress())}; | ||
| 434 | if ((op == GpuSemaphoreOperation::AcquireEqual && word == regs.semaphore_sequence) || | ||
| 435 | (op == GpuSemaphoreOperation::AcquireGequal && | ||
| 436 | static_cast<s32>(word - regs.semaphore_sequence) > 0) || | ||
| 437 | (op == GpuSemaphoreOperation::AcquireMask && (word & regs.semaphore_sequence))) { | ||
| 438 | // Nothing to do in this case | ||
| 439 | } else { | ||
| 440 | regs.acquire_source = true; | ||
| 441 | regs.acquire_value = regs.semaphore_sequence; | ||
| 442 | if (op == GpuSemaphoreOperation::AcquireEqual) { | ||
| 443 | regs.acquire_active = true; | ||
| 444 | regs.acquire_mode = false; | ||
| 445 | } else if (op == GpuSemaphoreOperation::AcquireGequal) { | ||
| 446 | regs.acquire_active = true; | ||
| 447 | regs.acquire_mode = true; | ||
| 448 | } else if (op == GpuSemaphoreOperation::AcquireMask) { | ||
| 449 | // TODO(kemathe) The acquire mask operation waits for a value that, ANDed with | ||
| 450 | // semaphore_sequence, gives a non-0 result | ||
| 451 | LOG_ERROR(HW_GPU, "Invalid semaphore operation AcquireMask not implemented"); | ||
| 452 | } else { | ||
| 453 | LOG_ERROR(HW_GPU, "Invalid semaphore operation"); | ||
| 454 | } | ||
| 455 | } | ||
| 456 | } | ||
| 457 | } | 871 | } |
| 458 | 872 | ||
| 459 | void GPU::ProcessSemaphoreRelease() { | 873 | bool GPU::UseNvdec() const { |
| 460 | memory_manager->Write<u32>(regs.semaphore_address.SemaphoreAddress(), regs.semaphore_release); | 874 | return impl->UseNvdec(); |
| 461 | } | 875 | } |
| 462 | 876 | ||
| 463 | void GPU::ProcessSemaphoreAcquire() { | 877 | void GPU::RendererFrameEndNotify() { |
| 464 | const u32 word = memory_manager->Read<u32>(regs.semaphore_address.SemaphoreAddress()); | 878 | impl->RendererFrameEndNotify(); |
| 465 | const auto value = regs.semaphore_acquire; | ||
| 466 | if (word != value) { | ||
| 467 | regs.acquire_active = true; | ||
| 468 | regs.acquire_value = value; | ||
| 469 | // TODO(kemathe73) figure out how to do the acquire_timeout | ||
| 470 | regs.acquire_mode = false; | ||
| 471 | regs.acquire_source = false; | ||
| 472 | } | ||
| 473 | } | 879 | } |
| 474 | 880 | ||
| 475 | void GPU::Start() { | 881 | void GPU::Start() { |
| 476 | gpu_thread.StartThread(*renderer, renderer->Context(), *dma_pusher); | 882 | impl->Start(); |
| 477 | cpu_context = renderer->GetRenderWindow().CreateSharedContext(); | ||
| 478 | cpu_context->MakeCurrent(); | ||
| 479 | } | 883 | } |
| 480 | 884 | ||
| 481 | void GPU::ObtainContext() { | 885 | void GPU::ObtainContext() { |
| 482 | cpu_context->MakeCurrent(); | 886 | impl->ObtainContext(); |
| 483 | } | 887 | } |
| 484 | 888 | ||
| 485 | void GPU::ReleaseContext() { | 889 | void GPU::ReleaseContext() { |
| 486 | cpu_context->DoneCurrent(); | 890 | impl->ReleaseContext(); |
| 487 | } | 891 | } |
| 488 | 892 | ||
| 489 | void GPU::PushGPUEntries(Tegra::CommandList&& entries) { | 893 | void GPU::PushGPUEntries(Tegra::CommandList&& entries) { |
| 490 | gpu_thread.SubmitList(std::move(entries)); | 894 | impl->PushGPUEntries(std::move(entries)); |
| 491 | } | 895 | } |
| 492 | 896 | ||
| 493 | void GPU::PushCommandBuffer(Tegra::ChCommandHeaderList& entries) { | 897 | void GPU::PushCommandBuffer(Tegra::ChCommandHeaderList& entries) { |
| 494 | if (!use_nvdec) { | 898 | impl->PushCommandBuffer(entries); |
| 495 | return; | ||
| 496 | } | ||
| 497 | |||
| 498 | if (!cdma_pusher) { | ||
| 499 | cdma_pusher = std::make_unique<Tegra::CDmaPusher>(*this); | ||
| 500 | } | ||
| 501 | |||
| 502 | // SubmitCommandBuffer would make the nvdec operations async, this is not currently working | ||
| 503 | // TODO(ameerj): RE proper async nvdec operation | ||
| 504 | // gpu_thread.SubmitCommandBuffer(std::move(entries)); | ||
| 505 | |||
| 506 | cdma_pusher->ProcessEntries(std::move(entries)); | ||
| 507 | } | 899 | } |
| 508 | 900 | ||
| 509 | void GPU::ClearCdmaInstance() { | 901 | void GPU::ClearCdmaInstance() { |
| 510 | cdma_pusher.reset(); | 902 | impl->ClearCdmaInstance(); |
| 511 | } | 903 | } |
| 512 | 904 | ||
| 513 | void GPU::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) { | 905 | void GPU::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) { |
| 514 | gpu_thread.SwapBuffers(framebuffer); | 906 | impl->SwapBuffers(framebuffer); |
| 515 | } | 907 | } |
| 516 | 908 | ||
| 517 | void GPU::FlushRegion(VAddr addr, u64 size) { | 909 | void GPU::FlushRegion(VAddr addr, u64 size) { |
| 518 | gpu_thread.FlushRegion(addr, size); | 910 | impl->FlushRegion(addr, size); |
| 519 | } | 911 | } |
| 520 | 912 | ||
| 521 | void GPU::InvalidateRegion(VAddr addr, u64 size) { | 913 | void GPU::InvalidateRegion(VAddr addr, u64 size) { |
| 522 | gpu_thread.InvalidateRegion(addr, size); | 914 | impl->InvalidateRegion(addr, size); |
| 523 | } | 915 | } |
| 524 | 916 | ||
| 525 | void GPU::FlushAndInvalidateRegion(VAddr addr, u64 size) { | 917 | void GPU::FlushAndInvalidateRegion(VAddr addr, u64 size) { |
| 526 | gpu_thread.FlushAndInvalidateRegion(addr, size); | 918 | impl->FlushAndInvalidateRegion(addr, size); |
| 527 | } | ||
| 528 | |||
| 529 | void GPU::TriggerCpuInterrupt(const u32 syncpoint_id, const u32 value) const { | ||
| 530 | auto& interrupt_manager = system.InterruptManager(); | ||
| 531 | interrupt_manager.GPUInterruptSyncpt(syncpoint_id, value); | ||
| 532 | } | ||
| 533 | |||
| 534 | void GPU::OnCommandListEnd() { | ||
| 535 | if (is_async) { | ||
| 536 | // This command only applies to asynchronous GPU mode | ||
| 537 | gpu_thread.OnCommandListEnd(); | ||
| 538 | } | ||
| 539 | } | 919 | } |
| 540 | 920 | ||
| 541 | } // namespace Tegra | 921 | } // namespace Tegra |
diff --git a/src/video_core/gpu.h b/src/video_core/gpu.h index e6a02a71b..39b304823 100644 --- a/src/video_core/gpu.h +++ b/src/video_core/gpu.h | |||
| @@ -4,19 +4,13 @@ | |||
| 4 | 4 | ||
| 5 | #pragma once | 5 | #pragma once |
| 6 | 6 | ||
| 7 | #include <array> | ||
| 8 | #include <atomic> | ||
| 9 | #include <condition_variable> | ||
| 10 | #include <list> | ||
| 11 | #include <memory> | 7 | #include <memory> |
| 12 | #include <mutex> | 8 | #include <mutex> |
| 9 | |||
| 10 | #include "common/bit_field.h" | ||
| 13 | #include "common/common_types.h" | 11 | #include "common/common_types.h" |
| 14 | #include "core/hle/service/nvdrv/nvdata.h" | ||
| 15 | #include "core/hle/service/nvflinger/buffer_queue.h" | ||
| 16 | #include "video_core/cdma_pusher.h" | 12 | #include "video_core/cdma_pusher.h" |
| 17 | #include "video_core/dma_pusher.h" | ||
| 18 | #include "video_core/framebuffer_config.h" | 13 | #include "video_core/framebuffer_config.h" |
| 19 | #include "video_core/gpu_thread.h" | ||
| 20 | 14 | ||
| 21 | using CacheAddr = std::uintptr_t; | 15 | using CacheAddr = std::uintptr_t; |
| 22 | [[nodiscard]] inline CacheAddr ToCacheAddr(const void* host_ptr) { | 16 | [[nodiscard]] inline CacheAddr ToCacheAddr(const void* host_ptr) { |
| @@ -40,6 +34,9 @@ class ShaderNotify; | |||
| 40 | } // namespace VideoCore | 34 | } // namespace VideoCore |
| 41 | 35 | ||
| 42 | namespace Tegra { | 36 | namespace Tegra { |
| 37 | class DmaPusher; | ||
| 38 | class CDmaPusher; | ||
| 39 | struct CommandList; | ||
| 43 | 40 | ||
| 44 | enum class RenderTargetFormat : u32 { | 41 | enum class RenderTargetFormat : u32 { |
| 45 | NONE = 0x0, | 42 | NONE = 0x0, |
| @@ -138,7 +135,18 @@ public: | |||
| 138 | } | 135 | } |
| 139 | }; | 136 | }; |
| 140 | 137 | ||
| 141 | explicit GPU(Core::System& system_, bool is_async_, bool use_nvdec_); | 138 | enum class FenceOperation : u32 { |
| 139 | Acquire = 0, | ||
| 140 | Increment = 1, | ||
| 141 | }; | ||
| 142 | |||
| 143 | union FenceAction { | ||
| 144 | u32 raw; | ||
| 145 | BitField<0, 1, FenceOperation> op; | ||
| 146 | BitField<8, 24, u32> syncpoint_id; | ||
| 147 | }; | ||
| 148 | |||
| 149 | explicit GPU(Core::System& system, bool is_async, bool use_nvdec); | ||
| 142 | ~GPU(); | 150 | ~GPU(); |
| 143 | 151 | ||
| 144 | /// Binds a renderer to the GPU. | 152 | /// Binds a renderer to the GPU. |
| @@ -162,9 +170,7 @@ public: | |||
| 162 | [[nodiscard]] u64 RequestFlush(VAddr addr, std::size_t size); | 170 | [[nodiscard]] u64 RequestFlush(VAddr addr, std::size_t size); |
| 163 | 171 | ||
| 164 | /// Obtains current flush request fence id. | 172 | /// Obtains current flush request fence id. |
| 165 | [[nodiscard]] u64 CurrentFlushRequestFence() const { | 173 | [[nodiscard]] u64 CurrentFlushRequestFence() const; |
| 166 | return current_flush_fence.load(std::memory_order_relaxed); | ||
| 167 | } | ||
| 168 | 174 | ||
| 169 | /// Tick pending requests within the GPU. | 175 | /// Tick pending requests within the GPU. |
| 170 | void TickWork(); | 176 | void TickWork(); |
| @@ -200,24 +206,16 @@ public: | |||
| 200 | [[nodiscard]] const Tegra::CDmaPusher& CDmaPusher() const; | 206 | [[nodiscard]] const Tegra::CDmaPusher& CDmaPusher() const; |
| 201 | 207 | ||
| 202 | /// Returns a reference to the underlying renderer. | 208 | /// Returns a reference to the underlying renderer. |
| 203 | [[nodiscard]] VideoCore::RendererBase& Renderer() { | 209 | [[nodiscard]] VideoCore::RendererBase& Renderer(); |
| 204 | return *renderer; | ||
| 205 | } | ||
| 206 | 210 | ||
| 207 | /// Returns a const reference to the underlying renderer. | 211 | /// Returns a const reference to the underlying renderer. |
| 208 | [[nodiscard]] const VideoCore::RendererBase& Renderer() const { | 212 | [[nodiscard]] const VideoCore::RendererBase& Renderer() const; |
| 209 | return *renderer; | ||
| 210 | } | ||
| 211 | 213 | ||
| 212 | /// Returns a reference to the shader notifier. | 214 | /// Returns a reference to the shader notifier. |
| 213 | [[nodiscard]] VideoCore::ShaderNotify& ShaderNotify() { | 215 | [[nodiscard]] VideoCore::ShaderNotify& ShaderNotify(); |
| 214 | return *shader_notify; | ||
| 215 | } | ||
| 216 | 216 | ||
| 217 | /// Returns a const reference to the shader notifier. | 217 | /// Returns a const reference to the shader notifier. |
| 218 | [[nodiscard]] const VideoCore::ShaderNotify& ShaderNotify() const { | 218 | [[nodiscard]] const VideoCore::ShaderNotify& ShaderNotify() const; |
| 219 | return *shader_notify; | ||
| 220 | } | ||
| 221 | 219 | ||
| 222 | /// Allows the CPU/NvFlinger to wait on the GPU before presenting a frame. | 220 | /// Allows the CPU/NvFlinger to wait on the GPU before presenting a frame. |
| 223 | void WaitFence(u32 syncpoint_id, u32 value); | 221 | void WaitFence(u32 syncpoint_id, u32 value); |
| @@ -232,80 +230,14 @@ public: | |||
| 232 | 230 | ||
| 233 | [[nodiscard]] u64 GetTicks() const; | 231 | [[nodiscard]] u64 GetTicks() const; |
| 234 | 232 | ||
| 235 | [[nodiscard]] std::unique_lock<std::mutex> LockSync() { | 233 | [[nodiscard]] std::unique_lock<std::mutex> LockSync(); |
| 236 | return std::unique_lock{sync_mutex}; | ||
| 237 | } | ||
| 238 | 234 | ||
| 239 | [[nodiscard]] bool IsAsync() const { | 235 | [[nodiscard]] bool IsAsync() const; |
| 240 | return is_async; | ||
| 241 | } | ||
| 242 | 236 | ||
| 243 | [[nodiscard]] bool UseNvdec() const { | 237 | [[nodiscard]] bool UseNvdec() const; |
| 244 | return use_nvdec; | ||
| 245 | } | ||
| 246 | 238 | ||
| 247 | void RendererFrameEndNotify(); | 239 | void RendererFrameEndNotify(); |
| 248 | 240 | ||
| 249 | enum class FenceOperation : u32 { | ||
| 250 | Acquire = 0, | ||
| 251 | Increment = 1, | ||
| 252 | }; | ||
| 253 | |||
| 254 | union FenceAction { | ||
| 255 | u32 raw; | ||
| 256 | BitField<0, 1, FenceOperation> op; | ||
| 257 | BitField<8, 24, u32> syncpoint_id; | ||
| 258 | |||
| 259 | [[nodiscard]] static CommandHeader Build(FenceOperation op, u32 syncpoint_id) { | ||
| 260 | FenceAction result{}; | ||
| 261 | result.op.Assign(op); | ||
| 262 | result.syncpoint_id.Assign(syncpoint_id); | ||
| 263 | return {result.raw}; | ||
| 264 | } | ||
| 265 | }; | ||
| 266 | |||
| 267 | struct Regs { | ||
| 268 | static constexpr size_t NUM_REGS = 0x40; | ||
| 269 | |||
| 270 | union { | ||
| 271 | struct { | ||
| 272 | INSERT_PADDING_WORDS_NOINIT(0x4); | ||
| 273 | struct { | ||
| 274 | u32 address_high; | ||
| 275 | u32 address_low; | ||
| 276 | |||
| 277 | [[nodiscard]] GPUVAddr SemaphoreAddress() const { | ||
| 278 | return static_cast<GPUVAddr>((static_cast<GPUVAddr>(address_high) << 32) | | ||
| 279 | address_low); | ||
| 280 | } | ||
| 281 | } semaphore_address; | ||
| 282 | |||
| 283 | u32 semaphore_sequence; | ||
| 284 | u32 semaphore_trigger; | ||
| 285 | INSERT_PADDING_WORDS_NOINIT(0xC); | ||
| 286 | |||
| 287 | // The pusher and the puller share the reference counter, the pusher only has read | ||
| 288 | // access | ||
| 289 | u32 reference_count; | ||
| 290 | INSERT_PADDING_WORDS_NOINIT(0x5); | ||
| 291 | |||
| 292 | u32 semaphore_acquire; | ||
| 293 | u32 semaphore_release; | ||
| 294 | u32 fence_value; | ||
| 295 | FenceAction fence_action; | ||
| 296 | INSERT_PADDING_WORDS_NOINIT(0xE2); | ||
| 297 | |||
| 298 | // Puller state | ||
| 299 | u32 acquire_mode; | ||
| 300 | u32 acquire_source; | ||
| 301 | u32 acquire_active; | ||
| 302 | u32 acquire_timeout; | ||
| 303 | u32 acquire_value; | ||
| 304 | }; | ||
| 305 | std::array<u32, NUM_REGS> reg_array; | ||
| 306 | }; | ||
| 307 | } regs{}; | ||
| 308 | |||
| 309 | /// Performs any additional setup necessary in order to begin GPU emulation. | 241 | /// Performs any additional setup necessary in order to begin GPU emulation. |
| 310 | /// This can be used to launch any necessary threads and register any necessary | 242 | /// This can be used to launch any necessary threads and register any necessary |
| 311 | /// core timing events. | 243 | /// core timing events. |
| @@ -338,104 +270,9 @@ public: | |||
| 338 | /// Notify rasterizer that any caches of the specified region should be flushed and invalidated | 270 | /// Notify rasterizer that any caches of the specified region should be flushed and invalidated |
| 339 | void FlushAndInvalidateRegion(VAddr addr, u64 size); | 271 | void FlushAndInvalidateRegion(VAddr addr, u64 size); |
| 340 | 272 | ||
| 341 | protected: | ||
| 342 | void TriggerCpuInterrupt(u32 syncpoint_id, u32 value) const; | ||
| 343 | |||
| 344 | private: | ||
| 345 | void ProcessBindMethod(const MethodCall& method_call); | ||
| 346 | void ProcessFenceActionMethod(); | ||
| 347 | void ProcessWaitForInterruptMethod(); | ||
| 348 | void ProcessSemaphoreTriggerMethod(); | ||
| 349 | void ProcessSemaphoreRelease(); | ||
| 350 | void ProcessSemaphoreAcquire(); | ||
| 351 | |||
| 352 | /// Calls a GPU puller method. | ||
| 353 | void CallPullerMethod(const MethodCall& method_call); | ||
| 354 | |||
| 355 | /// Calls a GPU engine method. | ||
| 356 | void CallEngineMethod(const MethodCall& method_call); | ||
| 357 | |||
| 358 | /// Calls a GPU engine multivalue method. | ||
| 359 | void CallEngineMultiMethod(u32 method, u32 subchannel, const u32* base_start, u32 amount, | ||
| 360 | u32 methods_pending); | ||
| 361 | |||
| 362 | /// Determines where the method should be executed. | ||
| 363 | [[nodiscard]] bool ExecuteMethodOnEngine(u32 method); | ||
| 364 | |||
| 365 | protected: | ||
| 366 | Core::System& system; | ||
| 367 | std::unique_ptr<Tegra::MemoryManager> memory_manager; | ||
| 368 | std::unique_ptr<Tegra::DmaPusher> dma_pusher; | ||
| 369 | std::unique_ptr<Tegra::CDmaPusher> cdma_pusher; | ||
| 370 | std::unique_ptr<VideoCore::RendererBase> renderer; | ||
| 371 | VideoCore::RasterizerInterface* rasterizer = nullptr; | ||
| 372 | const bool use_nvdec; | ||
| 373 | |||
| 374 | private: | 273 | private: |
| 375 | /// Mapping of command subchannels to their bound engine ids | 274 | struct Impl; |
| 376 | std::array<EngineID, 8> bound_engines = {}; | 275 | std::unique_ptr<Impl> impl; |
| 377 | /// 3D engine | ||
| 378 | std::unique_ptr<Engines::Maxwell3D> maxwell_3d; | ||
| 379 | /// 2D engine | ||
| 380 | std::unique_ptr<Engines::Fermi2D> fermi_2d; | ||
| 381 | /// Compute engine | ||
| 382 | std::unique_ptr<Engines::KeplerCompute> kepler_compute; | ||
| 383 | /// DMA engine | ||
| 384 | std::unique_ptr<Engines::MaxwellDMA> maxwell_dma; | ||
| 385 | /// Inline memory engine | ||
| 386 | std::unique_ptr<Engines::KeplerMemory> kepler_memory; | ||
| 387 | /// Shader build notifier | ||
| 388 | std::unique_ptr<VideoCore::ShaderNotify> shader_notify; | ||
| 389 | /// When true, we are about to shut down emulation session, so terminate outstanding tasks | ||
| 390 | std::atomic_bool shutting_down{}; | ||
| 391 | |||
| 392 | std::array<std::atomic<u32>, Service::Nvidia::MaxSyncPoints> syncpoints{}; | ||
| 393 | |||
| 394 | std::array<std::list<u32>, Service::Nvidia::MaxSyncPoints> syncpt_interrupts; | ||
| 395 | |||
| 396 | std::mutex sync_mutex; | ||
| 397 | std::mutex device_mutex; | ||
| 398 | |||
| 399 | std::condition_variable sync_cv; | ||
| 400 | |||
| 401 | struct FlushRequest { | ||
| 402 | explicit FlushRequest(u64 fence_, VAddr addr_, std::size_t size_) | ||
| 403 | : fence{fence_}, addr{addr_}, size{size_} {} | ||
| 404 | u64 fence; | ||
| 405 | VAddr addr; | ||
| 406 | std::size_t size; | ||
| 407 | }; | ||
| 408 | |||
| 409 | std::list<FlushRequest> flush_requests; | ||
| 410 | std::atomic<u64> current_flush_fence{}; | ||
| 411 | u64 last_flush_fence{}; | ||
| 412 | std::mutex flush_request_mutex; | ||
| 413 | |||
| 414 | const bool is_async; | ||
| 415 | |||
| 416 | VideoCommon::GPUThread::ThreadManager gpu_thread; | ||
| 417 | std::unique_ptr<Core::Frontend::GraphicsContext> cpu_context; | ||
| 418 | }; | 276 | }; |
| 419 | 277 | ||
| 420 | #define ASSERT_REG_POSITION(field_name, position) \ | ||
| 421 | static_assert(offsetof(GPU::Regs, field_name) == position * 4, \ | ||
| 422 | "Field " #field_name " has invalid position") | ||
| 423 | |||
| 424 | ASSERT_REG_POSITION(semaphore_address, 0x4); | ||
| 425 | ASSERT_REG_POSITION(semaphore_sequence, 0x6); | ||
| 426 | ASSERT_REG_POSITION(semaphore_trigger, 0x7); | ||
| 427 | ASSERT_REG_POSITION(reference_count, 0x14); | ||
| 428 | ASSERT_REG_POSITION(semaphore_acquire, 0x1A); | ||
| 429 | ASSERT_REG_POSITION(semaphore_release, 0x1B); | ||
| 430 | ASSERT_REG_POSITION(fence_value, 0x1C); | ||
| 431 | ASSERT_REG_POSITION(fence_action, 0x1D); | ||
| 432 | |||
| 433 | ASSERT_REG_POSITION(acquire_mode, 0x100); | ||
| 434 | ASSERT_REG_POSITION(acquire_source, 0x101); | ||
| 435 | ASSERT_REG_POSITION(acquire_active, 0x102); | ||
| 436 | ASSERT_REG_POSITION(acquire_timeout, 0x103); | ||
| 437 | ASSERT_REG_POSITION(acquire_value, 0x104); | ||
| 438 | |||
| 439 | #undef ASSERT_REG_POSITION | ||
| 440 | |||
| 441 | } // namespace Tegra | 278 | } // namespace Tegra |
diff --git a/src/video_core/gpu_thread.h b/src/video_core/gpu_thread.h index 91bada925..00984188e 100644 --- a/src/video_core/gpu_thread.h +++ b/src/video_core/gpu_thread.h | |||
| @@ -130,9 +130,6 @@ public: | |||
| 130 | /// Notify rasterizer that any caches of the specified region should be flushed and invalidated | 130 | /// Notify rasterizer that any caches of the specified region should be flushed and invalidated |
| 131 | void FlushAndInvalidateRegion(VAddr addr, u64 size); | 131 | void FlushAndInvalidateRegion(VAddr addr, u64 size); |
| 132 | 132 | ||
| 133 | // Stops the GPU execution and waits for the GPU to finish working | ||
| 134 | void ShutDown(); | ||
| 135 | |||
| 136 | void OnCommandListEnd(); | 133 | void OnCommandListEnd(); |
| 137 | 134 | ||
| 138 | private: | 135 | private: |
diff --git a/src/video_core/query_cache.h b/src/video_core/query_cache.h index aac851253..73231061a 100644 --- a/src/video_core/query_cache.h +++ b/src/video_core/query_cache.h | |||
| @@ -8,6 +8,7 @@ | |||
| 8 | #include <array> | 8 | #include <array> |
| 9 | #include <cstring> | 9 | #include <cstring> |
| 10 | #include <iterator> | 10 | #include <iterator> |
| 11 | #include <list> | ||
| 11 | #include <memory> | 12 | #include <memory> |
| 12 | #include <mutex> | 13 | #include <mutex> |
| 13 | #include <optional> | 14 | #include <optional> |
diff --git a/src/video_core/renderer_opengl/gl_texture_cache.cpp b/src/video_core/renderer_opengl/gl_texture_cache.cpp index 54dae2c41..8c3ca3d82 100644 --- a/src/video_core/renderer_opengl/gl_texture_cache.cpp +++ b/src/video_core/renderer_opengl/gl_texture_cache.cpp | |||
| @@ -20,6 +20,7 @@ | |||
| 20 | #include "video_core/surface.h" | 20 | #include "video_core/surface.h" |
| 21 | #include "video_core/texture_cache/formatter.h" | 21 | #include "video_core/texture_cache/formatter.h" |
| 22 | #include "video_core/texture_cache/samples_helper.h" | 22 | #include "video_core/texture_cache/samples_helper.h" |
| 23 | #include "video_core/texture_cache/util.h" | ||
| 23 | 24 | ||
| 24 | namespace OpenGL { | 25 | namespace OpenGL { |
| 25 | namespace { | 26 | namespace { |
diff --git a/src/video_core/renderer_opengl/gl_texture_cache.h b/src/video_core/renderer_opengl/gl_texture_cache.h index c498a8a8f..1ca2c90be 100644 --- a/src/video_core/renderer_opengl/gl_texture_cache.h +++ b/src/video_core/renderer_opengl/gl_texture_cache.h | |||
| @@ -12,6 +12,7 @@ | |||
| 12 | #include "shader_recompiler/shader_info.h" | 12 | #include "shader_recompiler/shader_info.h" |
| 13 | #include "video_core/renderer_opengl/gl_resource_manager.h" | 13 | #include "video_core/renderer_opengl/gl_resource_manager.h" |
| 14 | #include "video_core/renderer_opengl/util_shaders.h" | 14 | #include "video_core/renderer_opengl/util_shaders.h" |
| 15 | #include "video_core/texture_cache/image_view_base.h" | ||
| 15 | #include "video_core/texture_cache/texture_cache_base.h" | 16 | #include "video_core/texture_cache/texture_cache_base.h" |
| 16 | 17 | ||
| 17 | namespace OpenGL { | 18 | namespace OpenGL { |
diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.cpp b/src/video_core/renderer_vulkan/vk_texture_cache.cpp index 3b87640b5..06c5fb867 100644 --- a/src/video_core/renderer_vulkan/vk_texture_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_texture_cache.cpp | |||
| @@ -21,6 +21,7 @@ | |||
| 21 | #include "video_core/renderer_vulkan/vk_texture_cache.h" | 21 | #include "video_core/renderer_vulkan/vk_texture_cache.h" |
| 22 | #include "video_core/texture_cache/formatter.h" | 22 | #include "video_core/texture_cache/formatter.h" |
| 23 | #include "video_core/texture_cache/samples_helper.h" | 23 | #include "video_core/texture_cache/samples_helper.h" |
| 24 | #include "video_core/texture_cache/util.h" | ||
| 24 | #include "video_core/vulkan_common/vulkan_device.h" | 25 | #include "video_core/vulkan_common/vulkan_device.h" |
| 25 | #include "video_core/vulkan_common/vulkan_memory_allocator.h" | 26 | #include "video_core/vulkan_common/vulkan_memory_allocator.h" |
| 26 | #include "video_core/vulkan_common/vulkan_wrapper.h" | 27 | #include "video_core/vulkan_common/vulkan_wrapper.h" |
diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.h b/src/video_core/renderer_vulkan/vk_texture_cache.h index 6d5a68bfe..b09c468e4 100644 --- a/src/video_core/renderer_vulkan/vk_texture_cache.h +++ b/src/video_core/renderer_vulkan/vk_texture_cache.h | |||
| @@ -4,11 +4,11 @@ | |||
| 4 | 4 | ||
| 5 | #pragma once | 5 | #pragma once |
| 6 | 6 | ||
| 7 | #include <compare> | ||
| 8 | #include <span> | 7 | #include <span> |
| 9 | 8 | ||
| 10 | #include "shader_recompiler/shader_info.h" | 9 | #include "shader_recompiler/shader_info.h" |
| 11 | #include "video_core/renderer_vulkan/vk_staging_buffer_pool.h" | 10 | #include "video_core/renderer_vulkan/vk_staging_buffer_pool.h" |
| 11 | #include "video_core/texture_cache/image_view_base.h" | ||
| 12 | #include "video_core/texture_cache/texture_cache_base.h" | 12 | #include "video_core/texture_cache/texture_cache_base.h" |
| 13 | #include "video_core/vulkan_common/vulkan_memory_allocator.h" | 13 | #include "video_core/vulkan_common/vulkan_memory_allocator.h" |
| 14 | #include "video_core/vulkan_common/vulkan_wrapper.h" | 14 | #include "video_core/vulkan_common/vulkan_wrapper.h" |
diff --git a/src/video_core/shader_environment.cpp b/src/video_core/shader_environment.cpp index 81a878bb2..05850afd0 100644 --- a/src/video_core/shader_environment.cpp +++ b/src/video_core/shader_environment.cpp | |||
| @@ -16,6 +16,7 @@ | |||
| 16 | #include "common/fs/fs.h" | 16 | #include "common/fs/fs.h" |
| 17 | #include "common/logging/log.h" | 17 | #include "common/logging/log.h" |
| 18 | #include "shader_recompiler/environment.h" | 18 | #include "shader_recompiler/environment.h" |
| 19 | #include "video_core/engines/kepler_compute.h" | ||
| 19 | #include "video_core/memory_manager.h" | 20 | #include "video_core/memory_manager.h" |
| 20 | #include "video_core/shader_environment.h" | 21 | #include "video_core/shader_environment.h" |
| 21 | #include "video_core/textures/texture.h" | 22 | #include "video_core/textures/texture.h" |
diff --git a/src/video_core/shader_environment.h b/src/video_core/shader_environment.h index 2079979db..6640e53d0 100644 --- a/src/video_core/shader_environment.h +++ b/src/video_core/shader_environment.h | |||
| @@ -5,13 +5,13 @@ | |||
| 5 | #pragma once | 5 | #pragma once |
| 6 | 6 | ||
| 7 | #include <array> | 7 | #include <array> |
| 8 | #include <atomic> | ||
| 9 | #include <filesystem> | 8 | #include <filesystem> |
| 10 | #include <iosfwd> | 9 | #include <iosfwd> |
| 11 | #include <limits> | 10 | #include <limits> |
| 12 | #include <memory> | 11 | #include <memory> |
| 13 | #include <optional> | 12 | #include <optional> |
| 14 | #include <span> | 13 | #include <span> |
| 14 | #include <stop_token> | ||
| 15 | #include <type_traits> | 15 | #include <type_traits> |
| 16 | #include <unordered_map> | 16 | #include <unordered_map> |
| 17 | #include <vector> | 17 | #include <vector> |
| @@ -19,9 +19,7 @@ | |||
| 19 | #include "common/common_types.h" | 19 | #include "common/common_types.h" |
| 20 | #include "common/unique_function.h" | 20 | #include "common/unique_function.h" |
| 21 | #include "shader_recompiler/environment.h" | 21 | #include "shader_recompiler/environment.h" |
| 22 | #include "video_core/engines/kepler_compute.h" | ||
| 23 | #include "video_core/engines/maxwell_3d.h" | 22 | #include "video_core/engines/maxwell_3d.h" |
| 24 | #include "video_core/textures/texture.h" | ||
| 25 | 23 | ||
| 26 | namespace Tegra { | 24 | namespace Tegra { |
| 27 | class Memorymanager; | 25 | class Memorymanager; |
diff --git a/src/video_core/texture_cache/image_view_info.cpp b/src/video_core/texture_cache/image_view_info.cpp index 6527e14c8..e751f26c7 100644 --- a/src/video_core/texture_cache/image_view_info.cpp +++ b/src/video_core/texture_cache/image_view_info.cpp | |||
| @@ -8,6 +8,7 @@ | |||
| 8 | #include "video_core/texture_cache/image_view_info.h" | 8 | #include "video_core/texture_cache/image_view_info.h" |
| 9 | #include "video_core/texture_cache/texture_cache_base.h" | 9 | #include "video_core/texture_cache/texture_cache_base.h" |
| 10 | #include "video_core/texture_cache/types.h" | 10 | #include "video_core/texture_cache/types.h" |
| 11 | #include "video_core/texture_cache/util.h" | ||
| 11 | #include "video_core/textures/texture.h" | 12 | #include "video_core/textures/texture.h" |
| 12 | 13 | ||
| 13 | namespace VideoCommon { | 14 | namespace VideoCommon { |
diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index 24b809242..329df2e49 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h | |||
| @@ -4,10 +4,15 @@ | |||
| 4 | 4 | ||
| 5 | #pragma once | 5 | #pragma once |
| 6 | 6 | ||
| 7 | #include <unordered_set> | ||
| 8 | |||
| 7 | #include "common/alignment.h" | 9 | #include "common/alignment.h" |
| 8 | #include "video_core/dirty_flags.h" | 10 | #include "video_core/dirty_flags.h" |
| 11 | #include "video_core/engines/kepler_compute.h" | ||
| 12 | #include "video_core/texture_cache/image_view_base.h" | ||
| 9 | #include "video_core/texture_cache/samples_helper.h" | 13 | #include "video_core/texture_cache/samples_helper.h" |
| 10 | #include "video_core/texture_cache/texture_cache_base.h" | 14 | #include "video_core/texture_cache/texture_cache_base.h" |
| 15 | #include "video_core/texture_cache/util.h" | ||
| 11 | 16 | ||
| 12 | namespace VideoCommon { | 17 | namespace VideoCommon { |
| 13 | 18 | ||
diff --git a/src/video_core/texture_cache/texture_cache_base.h b/src/video_core/texture_cache/texture_cache_base.h index d7528ed24..2d1893c1c 100644 --- a/src/video_core/texture_cache/texture_cache_base.h +++ b/src/video_core/texture_cache/texture_cache_base.h | |||
| @@ -4,13 +4,12 @@ | |||
| 4 | 4 | ||
| 5 | #pragma once | 5 | #pragma once |
| 6 | 6 | ||
| 7 | #include <array> | ||
| 8 | #include <mutex> | 7 | #include <mutex> |
| 9 | #include <span> | 8 | #include <span> |
| 10 | #include <type_traits> | 9 | #include <type_traits> |
| 11 | #include <unordered_map> | 10 | #include <unordered_map> |
| 12 | #include <unordered_set> | ||
| 13 | #include <vector> | 11 | #include <vector> |
| 12 | #include <queue> | ||
| 14 | 13 | ||
| 15 | #include "common/common_types.h" | 14 | #include "common/common_types.h" |
| 16 | #include "common/literals.h" | 15 | #include "common/literals.h" |
| @@ -18,10 +17,6 @@ | |||
| 18 | #include "video_core/compatible_formats.h" | 17 | #include "video_core/compatible_formats.h" |
| 19 | #include "video_core/delayed_destruction_ring.h" | 18 | #include "video_core/delayed_destruction_ring.h" |
| 20 | #include "video_core/engines/fermi_2d.h" | 19 | #include "video_core/engines/fermi_2d.h" |
| 21 | #include "video_core/engines/kepler_compute.h" | ||
| 22 | #include "video_core/engines/maxwell_3d.h" | ||
| 23 | #include "video_core/memory_manager.h" | ||
| 24 | #include "video_core/rasterizer_interface.h" | ||
| 25 | #include "video_core/surface.h" | 20 | #include "video_core/surface.h" |
| 26 | #include "video_core/texture_cache/descriptor_table.h" | 21 | #include "video_core/texture_cache/descriptor_table.h" |
| 27 | #include "video_core/texture_cache/image_base.h" | 22 | #include "video_core/texture_cache/image_base.h" |
| @@ -30,7 +25,6 @@ | |||
| 30 | #include "video_core/texture_cache/render_targets.h" | 25 | #include "video_core/texture_cache/render_targets.h" |
| 31 | #include "video_core/texture_cache/slot_vector.h" | 26 | #include "video_core/texture_cache/slot_vector.h" |
| 32 | #include "video_core/texture_cache/types.h" | 27 | #include "video_core/texture_cache/types.h" |
| 33 | #include "video_core/texture_cache/util.h" | ||
| 34 | #include "video_core/textures/texture.h" | 28 | #include "video_core/textures/texture.h" |
| 35 | 29 | ||
| 36 | namespace VideoCommon { | 30 | namespace VideoCommon { |