diff options
Diffstat (limited to 'src/video_core')
| -rw-r--r-- | src/video_core/dma_pusher.cpp | 80 | ||||
| -rw-r--r-- | src/video_core/dma_pusher.h | 49 | ||||
| -rw-r--r-- | src/video_core/gpu.cpp | 48 | ||||
| -rw-r--r-- | src/video_core/gpu.h | 25 |
4 files changed, 149 insertions, 53 deletions
diff --git a/src/video_core/dma_pusher.cpp b/src/video_core/dma_pusher.cpp index f2f96ac33..105b85a92 100644 --- a/src/video_core/dma_pusher.cpp +++ b/src/video_core/dma_pusher.cpp | |||
| @@ -2,6 +2,7 @@ | |||
| 2 | // Licensed under GPLv2 or any later version | 2 | // Licensed under GPLv2 or any later version |
| 3 | // Refer to the license.txt file included. | 3 | // Refer to the license.txt file included. |
| 4 | 4 | ||
| 5 | #include "common/cityhash.h" | ||
| 5 | #include "common/microprofile.h" | 6 | #include "common/microprofile.h" |
| 6 | #include "core/core.h" | 7 | #include "core/core.h" |
| 7 | #include "core/memory.h" | 8 | #include "core/memory.h" |
| @@ -12,6 +13,20 @@ | |||
| 12 | 13 | ||
| 13 | namespace Tegra { | 14 | namespace Tegra { |
| 14 | 15 | ||
| 16 | void CommandList::RefreshIntegrityChecks(GPU& gpu) { | ||
| 17 | command_list_hashes.resize(command_lists.size()); | ||
| 18 | |||
| 19 | for (std::size_t index = 0; index < command_lists.size(); ++index) { | ||
| 20 | const CommandListHeader command_list_header = command_lists[index]; | ||
| 21 | std::vector<CommandHeader> command_headers(command_list_header.size); | ||
| 22 | gpu.MemoryManager().ReadBlockUnsafe(command_list_header.addr, command_headers.data(), | ||
| 23 | command_list_header.size * sizeof(u32)); | ||
| 24 | command_list_hashes[index] = | ||
| 25 | Common::CityHash64(reinterpret_cast<char*>(command_headers.data()), | ||
| 26 | command_list_header.size * sizeof(u32)); | ||
| 27 | } | ||
| 28 | } | ||
| 29 | |||
| 15 | DmaPusher::DmaPusher(Core::System& system, GPU& gpu) : gpu{gpu}, system{system} {} | 30 | DmaPusher::DmaPusher(Core::System& system, GPU& gpu) : gpu{gpu}, system{system} {} |
| 16 | 31 | ||
| 17 | DmaPusher::~DmaPusher() = default; | 32 | DmaPusher::~DmaPusher() = default; |
| @@ -45,32 +60,51 @@ bool DmaPusher::Step() { | |||
| 45 | return false; | 60 | return false; |
| 46 | } | 61 | } |
| 47 | 62 | ||
| 48 | const CommandList& command_list{dma_pushbuffer.front()}; | 63 | CommandList& command_list{dma_pushbuffer.front()}; |
| 49 | ASSERT_OR_EXECUTE(!command_list.empty(), { | ||
| 50 | // Somehow the command_list is empty, in order to avoid a crash | ||
| 51 | // We ignore it and assume its size is 0. | ||
| 52 | dma_pushbuffer.pop(); | ||
| 53 | dma_pushbuffer_subindex = 0; | ||
| 54 | return true; | ||
| 55 | }); | ||
| 56 | const CommandListHeader command_list_header{command_list[dma_pushbuffer_subindex++]}; | ||
| 57 | const GPUVAddr dma_get = command_list_header.addr; | ||
| 58 | |||
| 59 | if (dma_pushbuffer_subindex >= command_list.size()) { | ||
| 60 | // We've gone through the current list, remove it from the queue | ||
| 61 | dma_pushbuffer.pop(); | ||
| 62 | dma_pushbuffer_subindex = 0; | ||
| 63 | } | ||
| 64 | 64 | ||
| 65 | if (command_list_header.size == 0) { | 65 | ASSERT_OR_EXECUTE( |
| 66 | return true; | 66 | command_list.command_lists.size() || command_list.prefetch_command_list.size(), { |
| 67 | } | 67 | // Somehow the command_list is empty, in order to avoid a crash |
| 68 | // We ignore it and assume its size is 0. | ||
| 69 | dma_pushbuffer.pop(); | ||
| 70 | dma_pushbuffer_subindex = 0; | ||
| 71 | return true; | ||
| 72 | }); | ||
| 68 | 73 | ||
| 69 | // Push buffer non-empty, read a word | 74 | if (command_list.prefetch_command_list.size()) { |
| 70 | command_headers.resize(command_list_header.size); | 75 | // Prefetched command list from nvdrv, used for things like synchronization |
| 71 | gpu.MemoryManager().ReadBlockUnsafe(dma_get, command_headers.data(), | 76 | command_headers = std::move(command_list.prefetch_command_list); |
| 72 | command_list_header.size * sizeof(u32)); | 77 | dma_pushbuffer.pop(); |
| 78 | } else { | ||
| 79 | const CommandListHeader command_list_header{ | ||
| 80 | command_list.command_lists[dma_pushbuffer_subindex]}; | ||
| 81 | const u64 next_hash = command_list.command_list_hashes[dma_pushbuffer_subindex++]; | ||
| 82 | const GPUVAddr dma_get = command_list_header.addr; | ||
| 83 | |||
| 84 | if (dma_pushbuffer_subindex >= command_list.command_lists.size()) { | ||
| 85 | // We've gone through the current list, remove it from the queue | ||
| 86 | dma_pushbuffer.pop(); | ||
| 87 | dma_pushbuffer_subindex = 0; | ||
| 88 | } | ||
| 73 | 89 | ||
| 90 | if (command_list_header.size == 0) { | ||
| 91 | return true; | ||
| 92 | } | ||
| 93 | |||
| 94 | // Push buffer non-empty, read a word | ||
| 95 | command_headers.resize(command_list_header.size); | ||
| 96 | gpu.MemoryManager().ReadBlockUnsafe(dma_get, command_headers.data(), | ||
| 97 | command_list_header.size * sizeof(u32)); | ||
| 98 | |||
| 99 | // Integrity check | ||
| 100 | const u64 new_hash = Common::CityHash64(reinterpret_cast<char*>(command_headers.data()), | ||
| 101 | command_list_header.size * sizeof(u32)); | ||
| 102 | if (new_hash != next_hash) { | ||
| 103 | LOG_CRITICAL(HW_GPU, "CommandList at addr=0x{:X} is corrupt, skipping!", dma_get); | ||
| 104 | dma_pushbuffer.pop(); | ||
| 105 | return true; | ||
| 106 | } | ||
| 107 | } | ||
| 74 | for (std::size_t index = 0; index < command_headers.size();) { | 108 | for (std::size_t index = 0; index < command_headers.size();) { |
| 75 | const CommandHeader& command_header = command_headers[index]; | 109 | const CommandHeader& command_header = command_headers[index]; |
| 76 | 110 | ||
diff --git a/src/video_core/dma_pusher.h b/src/video_core/dma_pusher.h index efa90d170..8496ba2da 100644 --- a/src/video_core/dma_pusher.h +++ b/src/video_core/dma_pusher.h | |||
| @@ -27,6 +27,31 @@ enum class SubmissionMode : u32 { | |||
| 27 | IncreaseOnce = 5 | 27 | IncreaseOnce = 5 |
| 28 | }; | 28 | }; |
| 29 | 29 | ||
| 30 | // Note that, traditionally, methods are treated as 4-byte addressable locations, and hence | ||
| 31 | // their numbers are written down multiplied by 4 in Docs. Here we are not multiply by 4. | ||
| 32 | // So the values you see in docs might be multiplied by 4. | ||
| 33 | enum class BufferMethods : u32 { | ||
| 34 | BindObject = 0x0, | ||
| 35 | Nop = 0x2, | ||
| 36 | SemaphoreAddressHigh = 0x4, | ||
| 37 | SemaphoreAddressLow = 0x5, | ||
| 38 | SemaphoreSequence = 0x6, | ||
| 39 | SemaphoreTrigger = 0x7, | ||
| 40 | NotifyIntr = 0x8, | ||
| 41 | WrcacheFlush = 0x9, | ||
| 42 | Unk28 = 0xA, | ||
| 43 | UnkCacheFlush = 0xB, | ||
| 44 | RefCnt = 0x14, | ||
| 45 | SemaphoreAcquire = 0x1A, | ||
| 46 | SemaphoreRelease = 0x1B, | ||
| 47 | FenceValue = 0x1C, | ||
| 48 | FenceAction = 0x1D, | ||
| 49 | WaitForInterrupt = 0x1E, | ||
| 50 | Unk7c = 0x1F, | ||
| 51 | Yield = 0x20, | ||
| 52 | NonPullerMethods = 0x40, | ||
| 53 | }; | ||
| 54 | |||
| 30 | struct CommandListHeader { | 55 | struct CommandListHeader { |
| 31 | union { | 56 | union { |
| 32 | u64 raw; | 57 | u64 raw; |
| @@ -49,9 +74,29 @@ union CommandHeader { | |||
| 49 | static_assert(std::is_standard_layout_v<CommandHeader>, "CommandHeader is not standard layout"); | 74 | static_assert(std::is_standard_layout_v<CommandHeader>, "CommandHeader is not standard layout"); |
| 50 | static_assert(sizeof(CommandHeader) == sizeof(u32), "CommandHeader has incorrect size!"); | 75 | static_assert(sizeof(CommandHeader) == sizeof(u32), "CommandHeader has incorrect size!"); |
| 51 | 76 | ||
| 77 | static constexpr CommandHeader BuildCommandHeader(BufferMethods method, u32 arg_count, | ||
| 78 | SubmissionMode mode) { | ||
| 79 | CommandHeader result{}; | ||
| 80 | result.method.Assign(static_cast<u32>(method)); | ||
| 81 | result.arg_count.Assign(arg_count); | ||
| 82 | result.mode.Assign(mode); | ||
| 83 | return result; | ||
| 84 | } | ||
| 85 | |||
| 52 | class GPU; | 86 | class GPU; |
| 53 | 87 | ||
| 54 | using CommandList = std::vector<Tegra::CommandListHeader>; | 88 | struct CommandList final { |
| 89 | CommandList() = default; | ||
| 90 | explicit CommandList(std::size_t size) : command_lists(size) {} | ||
| 91 | explicit CommandList(std::vector<Tegra::CommandHeader>&& prefetch_command_list) | ||
| 92 | : prefetch_command_list{std::move(prefetch_command_list)} {} | ||
| 93 | |||
| 94 | void RefreshIntegrityChecks(GPU& gpu); | ||
| 95 | |||
| 96 | std::vector<Tegra::CommandListHeader> command_lists; | ||
| 97 | std::vector<u64> command_list_hashes; | ||
| 98 | std::vector<Tegra::CommandHeader> prefetch_command_list; | ||
| 99 | }; | ||
| 55 | 100 | ||
| 56 | /** | 101 | /** |
| 57 | * The DmaPusher class implements DMA submission to FIFOs, providing an area of memory that the | 102 | * The DmaPusher class implements DMA submission to FIFOs, providing an area of memory that the |
| @@ -60,7 +105,7 @@ using CommandList = std::vector<Tegra::CommandListHeader>; | |||
| 60 | * See https://envytools.readthedocs.io/en/latest/hw/fifo/dma-pusher.html#fifo-dma-pusher for | 105 | * See https://envytools.readthedocs.io/en/latest/hw/fifo/dma-pusher.html#fifo-dma-pusher for |
| 61 | * details on this implementation. | 106 | * details on this implementation. |
| 62 | */ | 107 | */ |
| 63 | class DmaPusher { | 108 | class DmaPusher final { |
| 64 | public: | 109 | public: |
| 65 | explicit DmaPusher(Core::System& system, GPU& gpu); | 110 | explicit DmaPusher(Core::System& system, GPU& gpu); |
| 66 | ~DmaPusher(); | 111 | ~DmaPusher(); |
diff --git a/src/video_core/gpu.cpp b/src/video_core/gpu.cpp index 171f78183..ebd149c3a 100644 --- a/src/video_core/gpu.cpp +++ b/src/video_core/gpu.cpp | |||
| @@ -194,30 +194,6 @@ void GPU::SyncGuestHost() { | |||
| 194 | void GPU::OnCommandListEnd() { | 194 | void GPU::OnCommandListEnd() { |
| 195 | renderer->Rasterizer().ReleaseFences(); | 195 | renderer->Rasterizer().ReleaseFences(); |
| 196 | } | 196 | } |
| 197 | // Note that, traditionally, methods are treated as 4-byte addressable locations, and hence | ||
| 198 | // their numbers are written down multiplied by 4 in Docs. Here we are not multiply by 4. | ||
| 199 | // So the values you see in docs might be multiplied by 4. | ||
| 200 | enum class BufferMethods { | ||
| 201 | BindObject = 0x0, | ||
| 202 | Nop = 0x2, | ||
| 203 | SemaphoreAddressHigh = 0x4, | ||
| 204 | SemaphoreAddressLow = 0x5, | ||
| 205 | SemaphoreSequence = 0x6, | ||
| 206 | SemaphoreTrigger = 0x7, | ||
| 207 | NotifyIntr = 0x8, | ||
| 208 | WrcacheFlush = 0x9, | ||
| 209 | Unk28 = 0xA, | ||
| 210 | UnkCacheFlush = 0xB, | ||
| 211 | RefCnt = 0x14, | ||
| 212 | SemaphoreAcquire = 0x1A, | ||
| 213 | SemaphoreRelease = 0x1B, | ||
| 214 | FenceValue = 0x1C, | ||
| 215 | FenceAction = 0x1D, | ||
| 216 | Unk78 = 0x1E, | ||
| 217 | Unk7c = 0x1F, | ||
| 218 | Yield = 0x20, | ||
| 219 | NonPullerMethods = 0x40, | ||
| 220 | }; | ||
| 221 | 197 | ||
| 222 | enum class GpuSemaphoreOperation { | 198 | enum class GpuSemaphoreOperation { |
| 223 | AcquireEqual = 0x1, | 199 | AcquireEqual = 0x1, |
| @@ -277,7 +253,12 @@ void GPU::CallPullerMethod(const MethodCall& method_call) { | |||
| 277 | case BufferMethods::UnkCacheFlush: | 253 | case BufferMethods::UnkCacheFlush: |
| 278 | case BufferMethods::WrcacheFlush: | 254 | case BufferMethods::WrcacheFlush: |
| 279 | case BufferMethods::FenceValue: | 255 | case BufferMethods::FenceValue: |
| 256 | break; | ||
| 280 | case BufferMethods::FenceAction: | 257 | case BufferMethods::FenceAction: |
| 258 | ProcessFenceActionMethod(); | ||
| 259 | break; | ||
| 260 | case BufferMethods::WaitForInterrupt: | ||
| 261 | ProcessWaitForInterruptMethod(); | ||
| 281 | break; | 262 | break; |
| 282 | case BufferMethods::SemaphoreTrigger: { | 263 | case BufferMethods::SemaphoreTrigger: { |
| 283 | ProcessSemaphoreTriggerMethod(); | 264 | ProcessSemaphoreTriggerMethod(); |
| @@ -391,6 +372,25 @@ void GPU::ProcessBindMethod(const MethodCall& method_call) { | |||
| 391 | } | 372 | } |
| 392 | } | 373 | } |
| 393 | 374 | ||
| 375 | void GPU::ProcessFenceActionMethod() { | ||
| 376 | switch (regs.fence_action.op) { | ||
| 377 | case FenceOperation::Acquire: | ||
| 378 | WaitFence(regs.fence_action.syncpoint_id, regs.fence_value); | ||
| 379 | break; | ||
| 380 | case FenceOperation::Increment: | ||
| 381 | IncrementSyncPoint(regs.fence_action.syncpoint_id); | ||
| 382 | break; | ||
| 383 | default: | ||
| 384 | UNIMPLEMENTED_MSG("Unimplemented operation {}", | ||
| 385 | static_cast<u32>(regs.fence_action.op.Value())); | ||
| 386 | } | ||
| 387 | } | ||
| 388 | |||
| 389 | void GPU::ProcessWaitForInterruptMethod() { | ||
| 390 | // TODO(bunnei) ImplementMe | ||
| 391 | LOG_WARNING(HW_GPU, "(STUBBED) called"); | ||
| 392 | } | ||
| 393 | |||
| 394 | void GPU::ProcessSemaphoreTriggerMethod() { | 394 | void GPU::ProcessSemaphoreTriggerMethod() { |
| 395 | const auto semaphoreOperationMask = 0xF; | 395 | const auto semaphoreOperationMask = 0xF; |
| 396 | const auto op = | 396 | const auto op = |
diff --git a/src/video_core/gpu.h b/src/video_core/gpu.h index b8c613b11..5444b49f3 100644 --- a/src/video_core/gpu.h +++ b/src/video_core/gpu.h | |||
| @@ -263,6 +263,24 @@ public: | |||
| 263 | return use_nvdec; | 263 | return use_nvdec; |
| 264 | } | 264 | } |
| 265 | 265 | ||
| 266 | enum class FenceOperation : u32 { | ||
| 267 | Acquire = 0, | ||
| 268 | Increment = 1, | ||
| 269 | }; | ||
| 270 | |||
| 271 | union FenceAction { | ||
| 272 | u32 raw; | ||
| 273 | BitField<0, 1, FenceOperation> op; | ||
| 274 | BitField<8, 24, u32> syncpoint_id; | ||
| 275 | |||
| 276 | static constexpr CommandHeader Build(FenceOperation op, u32 syncpoint_id) { | ||
| 277 | FenceAction result{}; | ||
| 278 | result.op.Assign(op); | ||
| 279 | result.syncpoint_id.Assign(syncpoint_id); | ||
| 280 | return {result.raw}; | ||
| 281 | } | ||
| 282 | }; | ||
| 283 | |||
| 266 | struct Regs { | 284 | struct Regs { |
| 267 | static constexpr size_t NUM_REGS = 0x40; | 285 | static constexpr size_t NUM_REGS = 0x40; |
| 268 | 286 | ||
| @@ -291,10 +309,7 @@ public: | |||
| 291 | u32 semaphore_acquire; | 309 | u32 semaphore_acquire; |
| 292 | u32 semaphore_release; | 310 | u32 semaphore_release; |
| 293 | u32 fence_value; | 311 | u32 fence_value; |
| 294 | union { | 312 | FenceAction fence_action; |
| 295 | BitField<4, 4, u32> operation; | ||
| 296 | BitField<8, 8, u32> id; | ||
| 297 | } fence_action; | ||
| 298 | INSERT_UNION_PADDING_WORDS(0xE2); | 313 | INSERT_UNION_PADDING_WORDS(0xE2); |
| 299 | 314 | ||
| 300 | // Puller state | 315 | // Puller state |
| @@ -342,6 +357,8 @@ protected: | |||
| 342 | 357 | ||
| 343 | private: | 358 | private: |
| 344 | void ProcessBindMethod(const MethodCall& method_call); | 359 | void ProcessBindMethod(const MethodCall& method_call); |
| 360 | void ProcessFenceActionMethod(); | ||
| 361 | void ProcessWaitForInterruptMethod(); | ||
| 345 | void ProcessSemaphoreTriggerMethod(); | 362 | void ProcessSemaphoreTriggerMethod(); |
| 346 | void ProcessSemaphoreRelease(); | 363 | void ProcessSemaphoreRelease(); |
| 347 | void ProcessSemaphoreAcquire(); | 364 | void ProcessSemaphoreAcquire(); |