diff options
69 files changed, 1188 insertions, 472 deletions
diff --git a/CMakeModules/GenerateSCMRev.cmake b/CMakeModules/GenerateSCMRev.cmake index abdc74428..a1ace89cb 100644 --- a/CMakeModules/GenerateSCMRev.cmake +++ b/CMakeModules/GenerateSCMRev.cmake | |||
| @@ -81,6 +81,7 @@ set(HASH_FILES | |||
| 81 | "${VIDEO_CORE}/shader/decode/register_set_predicate.cpp" | 81 | "${VIDEO_CORE}/shader/decode/register_set_predicate.cpp" |
| 82 | "${VIDEO_CORE}/shader/decode/shift.cpp" | 82 | "${VIDEO_CORE}/shader/decode/shift.cpp" |
| 83 | "${VIDEO_CORE}/shader/decode/video.cpp" | 83 | "${VIDEO_CORE}/shader/decode/video.cpp" |
| 84 | "${VIDEO_CORE}/shader/decode/warp.cpp" | ||
| 84 | "${VIDEO_CORE}/shader/decode/xmad.cpp" | 85 | "${VIDEO_CORE}/shader/decode/xmad.cpp" |
| 85 | "${VIDEO_CORE}/shader/control_flow.cpp" | 86 | "${VIDEO_CORE}/shader/control_flow.cpp" |
| 86 | "${VIDEO_CORE}/shader/control_flow.h" | 87 | "${VIDEO_CORE}/shader/control_flow.h" |
diff --git a/src/common/CMakeLists.txt b/src/common/CMakeLists.txt index 2b4266f29..01abdb3bb 100644 --- a/src/common/CMakeLists.txt +++ b/src/common/CMakeLists.txt | |||
| @@ -55,6 +55,7 @@ add_custom_command(OUTPUT scm_rev.cpp | |||
| 55 | "${VIDEO_CORE}/shader/decode/register_set_predicate.cpp" | 55 | "${VIDEO_CORE}/shader/decode/register_set_predicate.cpp" |
| 56 | "${VIDEO_CORE}/shader/decode/shift.cpp" | 56 | "${VIDEO_CORE}/shader/decode/shift.cpp" |
| 57 | "${VIDEO_CORE}/shader/decode/video.cpp" | 57 | "${VIDEO_CORE}/shader/decode/video.cpp" |
| 58 | "${VIDEO_CORE}/shader/decode/warp.cpp" | ||
| 58 | "${VIDEO_CORE}/shader/decode/xmad.cpp" | 59 | "${VIDEO_CORE}/shader/decode/xmad.cpp" |
| 59 | "${VIDEO_CORE}/shader/control_flow.cpp" | 60 | "${VIDEO_CORE}/shader/control_flow.cpp" |
| 60 | "${VIDEO_CORE}/shader/control_flow.h" | 61 | "${VIDEO_CORE}/shader/control_flow.h" |
diff --git a/src/common/alignment.h b/src/common/alignment.h index 617b14d9b..88d5d3a65 100644 --- a/src/common/alignment.h +++ b/src/common/alignment.h | |||
| @@ -3,6 +3,7 @@ | |||
| 3 | #pragma once | 3 | #pragma once |
| 4 | 4 | ||
| 5 | #include <cstddef> | 5 | #include <cstddef> |
| 6 | #include <memory> | ||
| 6 | #include <type_traits> | 7 | #include <type_traits> |
| 7 | 8 | ||
| 8 | namespace Common { | 9 | namespace Common { |
| @@ -37,4 +38,63 @@ constexpr bool IsWordAligned(T value) { | |||
| 37 | return (value & 0b11) == 0; | 38 | return (value & 0b11) == 0; |
| 38 | } | 39 | } |
| 39 | 40 | ||
| 41 | template <typename T, std::size_t Align = 16> | ||
| 42 | class AlignmentAllocator { | ||
| 43 | public: | ||
| 44 | using value_type = T; | ||
| 45 | using size_type = std::size_t; | ||
| 46 | using difference_type = std::ptrdiff_t; | ||
| 47 | |||
| 48 | using pointer = T*; | ||
| 49 | using const_pointer = const T*; | ||
| 50 | |||
| 51 | using reference = T&; | ||
| 52 | using const_reference = const T&; | ||
| 53 | |||
| 54 | public: | ||
| 55 | pointer address(reference r) noexcept { | ||
| 56 | return std::addressof(r); | ||
| 57 | } | ||
| 58 | |||
| 59 | const_pointer address(const_reference r) const noexcept { | ||
| 60 | return std::addressof(r); | ||
| 61 | } | ||
| 62 | |||
| 63 | pointer allocate(size_type n) { | ||
| 64 | return static_cast<pointer>(::operator new (n, std::align_val_t{Align})); | ||
| 65 | } | ||
| 66 | |||
| 67 | void deallocate(pointer p, size_type) { | ||
| 68 | ::operator delete (p, std::align_val_t{Align}); | ||
| 69 | } | ||
| 70 | |||
| 71 | void construct(pointer p, const value_type& wert) { | ||
| 72 | new (p) value_type(wert); | ||
| 73 | } | ||
| 74 | |||
| 75 | void destroy(pointer p) { | ||
| 76 | p->~value_type(); | ||
| 77 | } | ||
| 78 | |||
| 79 | size_type max_size() const noexcept { | ||
| 80 | return size_type(-1) / sizeof(value_type); | ||
| 81 | } | ||
| 82 | |||
| 83 | template <typename T2> | ||
| 84 | struct rebind { | ||
| 85 | using other = AlignmentAllocator<T2, Align>; | ||
| 86 | }; | ||
| 87 | |||
| 88 | bool operator!=(const AlignmentAllocator<T, Align>& other) const noexcept { | ||
| 89 | return !(*this == other); | ||
| 90 | } | ||
| 91 | |||
| 92 | // Returns true if and only if storage allocated from *this | ||
| 93 | // can be deallocated from other, and vice versa. | ||
| 94 | // Always returns true for stateless allocators. | ||
| 95 | bool operator==(const AlignmentAllocator<T, Align>& other) const noexcept { | ||
| 96 | return true; | ||
| 97 | } | ||
| 98 | }; | ||
| 99 | |||
| 40 | } // namespace Common | 100 | } // namespace Common |
diff --git a/src/core/hle/kernel/code_set.h b/src/core/hle/kernel/code_set.h index 879957dcb..d8ad54030 100644 --- a/src/core/hle/kernel/code_set.h +++ b/src/core/hle/kernel/code_set.h | |||
| @@ -8,6 +8,7 @@ | |||
| 8 | #include <vector> | 8 | #include <vector> |
| 9 | 9 | ||
| 10 | #include "common/common_types.h" | 10 | #include "common/common_types.h" |
| 11 | #include "core/hle/kernel/physical_memory.h" | ||
| 11 | 12 | ||
| 12 | namespace Kernel { | 13 | namespace Kernel { |
| 13 | 14 | ||
| @@ -77,7 +78,7 @@ struct CodeSet final { | |||
| 77 | } | 78 | } |
| 78 | 79 | ||
| 79 | /// The overall data that backs this code set. | 80 | /// The overall data that backs this code set. |
| 80 | std::vector<u8> memory; | 81 | Kernel::PhysicalMemory memory; |
| 81 | 82 | ||
| 82 | /// The segments that comprise this code set. | 83 | /// The segments that comprise this code set. |
| 83 | std::array<Segment, 3> segments; | 84 | std::array<Segment, 3> segments; |
diff --git a/src/core/hle/kernel/physical_memory.h b/src/core/hle/kernel/physical_memory.h new file mode 100644 index 000000000..090565310 --- /dev/null +++ b/src/core/hle/kernel/physical_memory.h | |||
| @@ -0,0 +1,19 @@ | |||
| 1 | // Copyright 2019 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include "common/alignment.h" | ||
| 8 | |||
| 9 | namespace Kernel { | ||
| 10 | |||
| 11 | // This encapsulation serves 2 purposes: | ||
| 12 | // - First, to encapsulate host physical memory under a single type and set an | ||
| 13 | // standard for managing it. | ||
| 14 | // - Second to ensure all host backing memory used is aligned to 256 bytes due | ||
| 15 | // to strict alignment restrictions on GPU memory. | ||
| 16 | |||
| 17 | using PhysicalMemory = std::vector<u8, Common::AlignmentAllocator<u8, 256>>; | ||
| 18 | |||
| 19 | } // namespace Kernel | ||
diff --git a/src/core/hle/kernel/process.cpp b/src/core/hle/kernel/process.cpp index 92169a97b..e80a12ac3 100644 --- a/src/core/hle/kernel/process.cpp +++ b/src/core/hle/kernel/process.cpp | |||
| @@ -247,7 +247,7 @@ VAddr Process::CreateTLSRegion() { | |||
| 247 | ASSERT(region_address.Succeeded()); | 247 | ASSERT(region_address.Succeeded()); |
| 248 | 248 | ||
| 249 | const auto map_result = vm_manager.MapMemoryBlock( | 249 | const auto map_result = vm_manager.MapMemoryBlock( |
| 250 | *region_address, std::make_shared<std::vector<u8>>(Memory::PAGE_SIZE), 0, | 250 | *region_address, std::make_shared<PhysicalMemory>(Memory::PAGE_SIZE), 0, |
| 251 | Memory::PAGE_SIZE, MemoryState::ThreadLocal); | 251 | Memory::PAGE_SIZE, MemoryState::ThreadLocal); |
| 252 | ASSERT(map_result.Succeeded()); | 252 | ASSERT(map_result.Succeeded()); |
| 253 | 253 | ||
| @@ -277,7 +277,7 @@ void Process::FreeTLSRegion(VAddr tls_address) { | |||
| 277 | } | 277 | } |
| 278 | 278 | ||
| 279 | void Process::LoadModule(CodeSet module_, VAddr base_addr) { | 279 | void Process::LoadModule(CodeSet module_, VAddr base_addr) { |
| 280 | const auto memory = std::make_shared<std::vector<u8>>(std::move(module_.memory)); | 280 | const auto memory = std::make_shared<PhysicalMemory>(std::move(module_.memory)); |
| 281 | 281 | ||
| 282 | const auto MapSegment = [&](const CodeSet::Segment& segment, VMAPermission permissions, | 282 | const auto MapSegment = [&](const CodeSet::Segment& segment, VMAPermission permissions, |
| 283 | MemoryState memory_state) { | 283 | MemoryState memory_state) { |
| @@ -327,7 +327,7 @@ void Process::AllocateMainThreadStack(u64 stack_size) { | |||
| 327 | // Allocate and map the main thread stack | 327 | // Allocate and map the main thread stack |
| 328 | const VAddr mapping_address = vm_manager.GetTLSIORegionEndAddress() - main_thread_stack_size; | 328 | const VAddr mapping_address = vm_manager.GetTLSIORegionEndAddress() - main_thread_stack_size; |
| 329 | vm_manager | 329 | vm_manager |
| 330 | .MapMemoryBlock(mapping_address, std::make_shared<std::vector<u8>>(main_thread_stack_size), | 330 | .MapMemoryBlock(mapping_address, std::make_shared<PhysicalMemory>(main_thread_stack_size), |
| 331 | 0, main_thread_stack_size, MemoryState::Stack) | 331 | 0, main_thread_stack_size, MemoryState::Stack) |
| 332 | .Unwrap(); | 332 | .Unwrap(); |
| 333 | } | 333 | } |
diff --git a/src/core/hle/kernel/shared_memory.cpp b/src/core/hle/kernel/shared_memory.cpp index f15c5ee36..a815c4eea 100644 --- a/src/core/hle/kernel/shared_memory.cpp +++ b/src/core/hle/kernel/shared_memory.cpp | |||
| @@ -28,7 +28,7 @@ SharedPtr<SharedMemory> SharedMemory::Create(KernelCore& kernel, Process* owner_ | |||
| 28 | shared_memory->other_permissions = other_permissions; | 28 | shared_memory->other_permissions = other_permissions; |
| 29 | 29 | ||
| 30 | if (address == 0) { | 30 | if (address == 0) { |
| 31 | shared_memory->backing_block = std::make_shared<std::vector<u8>>(size); | 31 | shared_memory->backing_block = std::make_shared<Kernel::PhysicalMemory>(size); |
| 32 | shared_memory->backing_block_offset = 0; | 32 | shared_memory->backing_block_offset = 0; |
| 33 | 33 | ||
| 34 | // Refresh the address mappings for the current process. | 34 | // Refresh the address mappings for the current process. |
| @@ -59,8 +59,8 @@ SharedPtr<SharedMemory> SharedMemory::Create(KernelCore& kernel, Process* owner_ | |||
| 59 | } | 59 | } |
| 60 | 60 | ||
| 61 | SharedPtr<SharedMemory> SharedMemory::CreateForApplet( | 61 | SharedPtr<SharedMemory> SharedMemory::CreateForApplet( |
| 62 | KernelCore& kernel, std::shared_ptr<std::vector<u8>> heap_block, std::size_t offset, u64 size, | 62 | KernelCore& kernel, std::shared_ptr<Kernel::PhysicalMemory> heap_block, std::size_t offset, |
| 63 | MemoryPermission permissions, MemoryPermission other_permissions, std::string name) { | 63 | u64 size, MemoryPermission permissions, MemoryPermission other_permissions, std::string name) { |
| 64 | SharedPtr<SharedMemory> shared_memory(new SharedMemory(kernel)); | 64 | SharedPtr<SharedMemory> shared_memory(new SharedMemory(kernel)); |
| 65 | 65 | ||
| 66 | shared_memory->owner_process = nullptr; | 66 | shared_memory->owner_process = nullptr; |
diff --git a/src/core/hle/kernel/shared_memory.h b/src/core/hle/kernel/shared_memory.h index c2b6155e1..01ca6dcd2 100644 --- a/src/core/hle/kernel/shared_memory.h +++ b/src/core/hle/kernel/shared_memory.h | |||
| @@ -10,6 +10,7 @@ | |||
| 10 | 10 | ||
| 11 | #include "common/common_types.h" | 11 | #include "common/common_types.h" |
| 12 | #include "core/hle/kernel/object.h" | 12 | #include "core/hle/kernel/object.h" |
| 13 | #include "core/hle/kernel/physical_memory.h" | ||
| 13 | #include "core/hle/kernel/process.h" | 14 | #include "core/hle/kernel/process.h" |
| 14 | #include "core/hle/result.h" | 15 | #include "core/hle/result.h" |
| 15 | 16 | ||
| @@ -62,12 +63,10 @@ public: | |||
| 62 | * block. | 63 | * block. |
| 63 | * @param name Optional object name, used for debugging purposes. | 64 | * @param name Optional object name, used for debugging purposes. |
| 64 | */ | 65 | */ |
| 65 | static SharedPtr<SharedMemory> CreateForApplet(KernelCore& kernel, | 66 | static SharedPtr<SharedMemory> CreateForApplet( |
| 66 | std::shared_ptr<std::vector<u8>> heap_block, | 67 | KernelCore& kernel, std::shared_ptr<Kernel::PhysicalMemory> heap_block, std::size_t offset, |
| 67 | std::size_t offset, u64 size, | 68 | u64 size, MemoryPermission permissions, MemoryPermission other_permissions, |
| 68 | MemoryPermission permissions, | 69 | std::string name = "Unknown Applet"); |
| 69 | MemoryPermission other_permissions, | ||
| 70 | std::string name = "Unknown Applet"); | ||
| 71 | 70 | ||
| 72 | std::string GetTypeName() const override { | 71 | std::string GetTypeName() const override { |
| 73 | return "SharedMemory"; | 72 | return "SharedMemory"; |
| @@ -135,7 +134,7 @@ private: | |||
| 135 | ~SharedMemory() override; | 134 | ~SharedMemory() override; |
| 136 | 135 | ||
| 137 | /// Backing memory for this shared memory block. | 136 | /// Backing memory for this shared memory block. |
| 138 | std::shared_ptr<std::vector<u8>> backing_block; | 137 | std::shared_ptr<PhysicalMemory> backing_block; |
| 139 | /// Offset into the backing block for this shared memory. | 138 | /// Offset into the backing block for this shared memory. |
| 140 | std::size_t backing_block_offset = 0; | 139 | std::size_t backing_block_offset = 0; |
| 141 | /// Size of the memory block. Page-aligned. | 140 | /// Size of the memory block. Page-aligned. |
diff --git a/src/core/hle/kernel/transfer_memory.cpp b/src/core/hle/kernel/transfer_memory.cpp index 26c4e5e67..1113c815e 100644 --- a/src/core/hle/kernel/transfer_memory.cpp +++ b/src/core/hle/kernel/transfer_memory.cpp | |||
| @@ -47,7 +47,7 @@ ResultCode TransferMemory::MapMemory(VAddr address, u64 size, MemoryPermission p | |||
| 47 | return ERR_INVALID_STATE; | 47 | return ERR_INVALID_STATE; |
| 48 | } | 48 | } |
| 49 | 49 | ||
| 50 | backing_block = std::make_shared<std::vector<u8>>(size); | 50 | backing_block = std::make_shared<PhysicalMemory>(size); |
| 51 | 51 | ||
| 52 | const auto map_state = owner_permissions == MemoryPermission::None | 52 | const auto map_state = owner_permissions == MemoryPermission::None |
| 53 | ? MemoryState::TransferMemoryIsolated | 53 | ? MemoryState::TransferMemoryIsolated |
diff --git a/src/core/hle/kernel/transfer_memory.h b/src/core/hle/kernel/transfer_memory.h index a140b1e2b..6be9dc094 100644 --- a/src/core/hle/kernel/transfer_memory.h +++ b/src/core/hle/kernel/transfer_memory.h | |||
| @@ -8,6 +8,7 @@ | |||
| 8 | #include <vector> | 8 | #include <vector> |
| 9 | 9 | ||
| 10 | #include "core/hle/kernel/object.h" | 10 | #include "core/hle/kernel/object.h" |
| 11 | #include "core/hle/kernel/physical_memory.h" | ||
| 11 | 12 | ||
| 12 | union ResultCode; | 13 | union ResultCode; |
| 13 | 14 | ||
| @@ -82,7 +83,7 @@ private: | |||
| 82 | ~TransferMemory() override; | 83 | ~TransferMemory() override; |
| 83 | 84 | ||
| 84 | /// Memory block backing this instance. | 85 | /// Memory block backing this instance. |
| 85 | std::shared_ptr<std::vector<u8>> backing_block; | 86 | std::shared_ptr<PhysicalMemory> backing_block; |
| 86 | 87 | ||
| 87 | /// The base address for the memory managed by this instance. | 88 | /// The base address for the memory managed by this instance. |
| 88 | VAddr base_address = 0; | 89 | VAddr base_address = 0; |
diff --git a/src/core/hle/kernel/vm_manager.cpp b/src/core/hle/kernel/vm_manager.cpp index 4f45fb03b..40cea1e7c 100644 --- a/src/core/hle/kernel/vm_manager.cpp +++ b/src/core/hle/kernel/vm_manager.cpp | |||
| @@ -5,6 +5,7 @@ | |||
| 5 | #include <algorithm> | 5 | #include <algorithm> |
| 6 | #include <iterator> | 6 | #include <iterator> |
| 7 | #include <utility> | 7 | #include <utility> |
| 8 | #include "common/alignment.h" | ||
| 8 | #include "common/assert.h" | 9 | #include "common/assert.h" |
| 9 | #include "common/logging/log.h" | 10 | #include "common/logging/log.h" |
| 10 | #include "common/memory_hook.h" | 11 | #include "common/memory_hook.h" |
| @@ -103,7 +104,7 @@ bool VMManager::IsValidHandle(VMAHandle handle) const { | |||
| 103 | } | 104 | } |
| 104 | 105 | ||
| 105 | ResultVal<VMManager::VMAHandle> VMManager::MapMemoryBlock(VAddr target, | 106 | ResultVal<VMManager::VMAHandle> VMManager::MapMemoryBlock(VAddr target, |
| 106 | std::shared_ptr<std::vector<u8>> block, | 107 | std::shared_ptr<PhysicalMemory> block, |
| 107 | std::size_t offset, u64 size, | 108 | std::size_t offset, u64 size, |
| 108 | MemoryState state, VMAPermission perm) { | 109 | MemoryState state, VMAPermission perm) { |
| 109 | ASSERT(block != nullptr); | 110 | ASSERT(block != nullptr); |
| @@ -260,7 +261,7 @@ ResultVal<VAddr> VMManager::SetHeapSize(u64 size) { | |||
| 260 | 261 | ||
| 261 | if (heap_memory == nullptr) { | 262 | if (heap_memory == nullptr) { |
| 262 | // Initialize heap | 263 | // Initialize heap |
| 263 | heap_memory = std::make_shared<std::vector<u8>>(size); | 264 | heap_memory = std::make_shared<PhysicalMemory>(size); |
| 264 | heap_end = heap_region_base + size; | 265 | heap_end = heap_region_base + size; |
| 265 | } else { | 266 | } else { |
| 266 | UnmapRange(heap_region_base, GetCurrentHeapSize()); | 267 | UnmapRange(heap_region_base, GetCurrentHeapSize()); |
| @@ -341,7 +342,7 @@ ResultCode VMManager::MapPhysicalMemory(VAddr target, u64 size) { | |||
| 341 | const auto map_size = std::min(end_addr - cur_addr, vma_end - cur_addr); | 342 | const auto map_size = std::min(end_addr - cur_addr, vma_end - cur_addr); |
| 342 | if (vma.state == MemoryState::Unmapped) { | 343 | if (vma.state == MemoryState::Unmapped) { |
| 343 | const auto map_res = | 344 | const auto map_res = |
| 344 | MapMemoryBlock(cur_addr, std::make_shared<std::vector<u8>>(map_size, 0), 0, | 345 | MapMemoryBlock(cur_addr, std::make_shared<PhysicalMemory>(map_size, 0), 0, |
| 345 | map_size, MemoryState::Heap, VMAPermission::ReadWrite); | 346 | map_size, MemoryState::Heap, VMAPermission::ReadWrite); |
| 346 | result = map_res.Code(); | 347 | result = map_res.Code(); |
| 347 | if (result.IsError()) { | 348 | if (result.IsError()) { |
| @@ -442,7 +443,7 @@ ResultCode VMManager::UnmapPhysicalMemory(VAddr target, u64 size) { | |||
| 442 | if (result.IsError()) { | 443 | if (result.IsError()) { |
| 443 | for (const auto [map_address, map_size] : unmapped_regions) { | 444 | for (const auto [map_address, map_size] : unmapped_regions) { |
| 444 | const auto remap_res = | 445 | const auto remap_res = |
| 445 | MapMemoryBlock(map_address, std::make_shared<std::vector<u8>>(map_size, 0), 0, | 446 | MapMemoryBlock(map_address, std::make_shared<PhysicalMemory>(map_size, 0), 0, |
| 446 | map_size, MemoryState::Heap, VMAPermission::None); | 447 | map_size, MemoryState::Heap, VMAPermission::None); |
| 447 | ASSERT_MSG(remap_res.Succeeded(), "UnmapPhysicalMemory re-map on error"); | 448 | ASSERT_MSG(remap_res.Succeeded(), "UnmapPhysicalMemory re-map on error"); |
| 448 | } | 449 | } |
| @@ -593,7 +594,7 @@ ResultCode VMManager::MirrorMemory(VAddr dst_addr, VAddr src_addr, u64 size, Mem | |||
| 593 | ASSERT_MSG(vma_offset + size <= vma->second.size, | 594 | ASSERT_MSG(vma_offset + size <= vma->second.size, |
| 594 | "Shared memory exceeds bounds of mapped block"); | 595 | "Shared memory exceeds bounds of mapped block"); |
| 595 | 596 | ||
| 596 | const std::shared_ptr<std::vector<u8>>& backing_block = vma->second.backing_block; | 597 | const std::shared_ptr<PhysicalMemory>& backing_block = vma->second.backing_block; |
| 597 | const std::size_t backing_block_offset = vma->second.offset + vma_offset; | 598 | const std::size_t backing_block_offset = vma->second.offset + vma_offset; |
| 598 | 599 | ||
| 599 | CASCADE_RESULT(auto new_vma, | 600 | CASCADE_RESULT(auto new_vma, |
| @@ -606,7 +607,7 @@ ResultCode VMManager::MirrorMemory(VAddr dst_addr, VAddr src_addr, u64 size, Mem | |||
| 606 | return RESULT_SUCCESS; | 607 | return RESULT_SUCCESS; |
| 607 | } | 608 | } |
| 608 | 609 | ||
| 609 | void VMManager::RefreshMemoryBlockMappings(const std::vector<u8>* block) { | 610 | void VMManager::RefreshMemoryBlockMappings(const PhysicalMemory* block) { |
| 610 | // If this ever proves to have a noticeable performance impact, allow users of the function to | 611 | // If this ever proves to have a noticeable performance impact, allow users of the function to |
| 611 | // specify a specific range of addresses to limit the scan to. | 612 | // specify a specific range of addresses to limit the scan to. |
| 612 | for (const auto& p : vma_map) { | 613 | for (const auto& p : vma_map) { |
| @@ -764,7 +765,7 @@ void VMManager::MergeAdjacentVMA(VirtualMemoryArea& left, const VirtualMemoryAre | |||
| 764 | right.backing_block->begin() + right.offset + right.size); | 765 | right.backing_block->begin() + right.offset + right.size); |
| 765 | } else { | 766 | } else { |
| 766 | // Slow case: make a new memory block for left and right. | 767 | // Slow case: make a new memory block for left and right. |
| 767 | auto new_memory = std::make_shared<std::vector<u8>>(); | 768 | auto new_memory = std::make_shared<PhysicalMemory>(); |
| 768 | new_memory->insert(new_memory->end(), left.backing_block->begin() + left.offset, | 769 | new_memory->insert(new_memory->end(), left.backing_block->begin() + left.offset, |
| 769 | left.backing_block->begin() + left.offset + left.size); | 770 | left.backing_block->begin() + left.offset + left.size); |
| 770 | new_memory->insert(new_memory->end(), right.backing_block->begin() + right.offset, | 771 | new_memory->insert(new_memory->end(), right.backing_block->begin() + right.offset, |
diff --git a/src/core/hle/kernel/vm_manager.h b/src/core/hle/kernel/vm_manager.h index 0aecb7499..b18cde619 100644 --- a/src/core/hle/kernel/vm_manager.h +++ b/src/core/hle/kernel/vm_manager.h | |||
| @@ -11,6 +11,7 @@ | |||
| 11 | #include "common/common_types.h" | 11 | #include "common/common_types.h" |
| 12 | #include "common/memory_hook.h" | 12 | #include "common/memory_hook.h" |
| 13 | #include "common/page_table.h" | 13 | #include "common/page_table.h" |
| 14 | #include "core/hle/kernel/physical_memory.h" | ||
| 14 | #include "core/hle/result.h" | 15 | #include "core/hle/result.h" |
| 15 | #include "core/memory.h" | 16 | #include "core/memory.h" |
| 16 | 17 | ||
| @@ -290,7 +291,7 @@ struct VirtualMemoryArea { | |||
| 290 | 291 | ||
| 291 | // Settings for type = AllocatedMemoryBlock | 292 | // Settings for type = AllocatedMemoryBlock |
| 292 | /// Memory block backing this VMA. | 293 | /// Memory block backing this VMA. |
| 293 | std::shared_ptr<std::vector<u8>> backing_block = nullptr; | 294 | std::shared_ptr<PhysicalMemory> backing_block = nullptr; |
| 294 | /// Offset into the backing_memory the mapping starts from. | 295 | /// Offset into the backing_memory the mapping starts from. |
| 295 | std::size_t offset = 0; | 296 | std::size_t offset = 0; |
| 296 | 297 | ||
| @@ -348,7 +349,7 @@ public: | |||
| 348 | * @param size Size of the mapping. | 349 | * @param size Size of the mapping. |
| 349 | * @param state MemoryState tag to attach to the VMA. | 350 | * @param state MemoryState tag to attach to the VMA. |
| 350 | */ | 351 | */ |
| 351 | ResultVal<VMAHandle> MapMemoryBlock(VAddr target, std::shared_ptr<std::vector<u8>> block, | 352 | ResultVal<VMAHandle> MapMemoryBlock(VAddr target, std::shared_ptr<PhysicalMemory> block, |
| 352 | std::size_t offset, u64 size, MemoryState state, | 353 | std::size_t offset, u64 size, MemoryState state, |
| 353 | VMAPermission perm = VMAPermission::ReadWrite); | 354 | VMAPermission perm = VMAPermission::ReadWrite); |
| 354 | 355 | ||
| @@ -547,7 +548,7 @@ public: | |||
| 547 | * Scans all VMAs and updates the page table range of any that use the given vector as backing | 548 | * Scans all VMAs and updates the page table range of any that use the given vector as backing |
| 548 | * memory. This should be called after any operation that causes reallocation of the vector. | 549 | * memory. This should be called after any operation that causes reallocation of the vector. |
| 549 | */ | 550 | */ |
| 550 | void RefreshMemoryBlockMappings(const std::vector<u8>* block); | 551 | void RefreshMemoryBlockMappings(const PhysicalMemory* block); |
| 551 | 552 | ||
| 552 | /// Dumps the address space layout to the log, for debugging | 553 | /// Dumps the address space layout to the log, for debugging |
| 553 | void LogLayout() const; | 554 | void LogLayout() const; |
| @@ -777,7 +778,7 @@ private: | |||
| 777 | // the entire virtual address space extents that bound the allocations, including any holes. | 778 | // the entire virtual address space extents that bound the allocations, including any holes. |
| 778 | // This makes deallocation and reallocation of holes fast and keeps process memory contiguous | 779 | // This makes deallocation and reallocation of holes fast and keeps process memory contiguous |
| 779 | // in the emulator address space, allowing Memory::GetPointer to be reasonably safe. | 780 | // in the emulator address space, allowing Memory::GetPointer to be reasonably safe. |
| 780 | std::shared_ptr<std::vector<u8>> heap_memory; | 781 | std::shared_ptr<PhysicalMemory> heap_memory; |
| 781 | 782 | ||
| 782 | // The end of the currently allocated heap. This is not an inclusive | 783 | // The end of the currently allocated heap. This is not an inclusive |
| 783 | // end of the range. This is essentially 'base_address + current_size'. | 784 | // end of the range. This is essentially 'base_address + current_size'. |
diff --git a/src/core/hle/service/ns/pl_u.cpp b/src/core/hle/service/ns/pl_u.cpp index ad176f89d..2a522136d 100644 --- a/src/core/hle/service/ns/pl_u.cpp +++ b/src/core/hle/service/ns/pl_u.cpp | |||
| @@ -77,7 +77,7 @@ enum class LoadState : u32 { | |||
| 77 | Done = 1, | 77 | Done = 1, |
| 78 | }; | 78 | }; |
| 79 | 79 | ||
| 80 | static void DecryptSharedFont(const std::vector<u32>& input, std::vector<u8>& output, | 80 | static void DecryptSharedFont(const std::vector<u32>& input, Kernel::PhysicalMemory& output, |
| 81 | std::size_t& offset) { | 81 | std::size_t& offset) { |
| 82 | ASSERT_MSG(offset + (input.size() * sizeof(u32)) < SHARED_FONT_MEM_SIZE, | 82 | ASSERT_MSG(offset + (input.size() * sizeof(u32)) < SHARED_FONT_MEM_SIZE, |
| 83 | "Shared fonts exceeds 17mb!"); | 83 | "Shared fonts exceeds 17mb!"); |
| @@ -94,7 +94,7 @@ static void DecryptSharedFont(const std::vector<u32>& input, std::vector<u8>& ou | |||
| 94 | offset += transformed_font.size() * sizeof(u32); | 94 | offset += transformed_font.size() * sizeof(u32); |
| 95 | } | 95 | } |
| 96 | 96 | ||
| 97 | static void EncryptSharedFont(const std::vector<u8>& input, std::vector<u8>& output, | 97 | static void EncryptSharedFont(const std::vector<u8>& input, Kernel::PhysicalMemory& output, |
| 98 | std::size_t& offset) { | 98 | std::size_t& offset) { |
| 99 | ASSERT_MSG(offset + input.size() + 8 < SHARED_FONT_MEM_SIZE, "Shared fonts exceeds 17mb!"); | 99 | ASSERT_MSG(offset + input.size() + 8 < SHARED_FONT_MEM_SIZE, "Shared fonts exceeds 17mb!"); |
| 100 | const u32 KEY = EXPECTED_MAGIC ^ EXPECTED_RESULT; | 100 | const u32 KEY = EXPECTED_MAGIC ^ EXPECTED_RESULT; |
| @@ -121,7 +121,7 @@ struct PL_U::Impl { | |||
| 121 | return shared_font_regions.at(index); | 121 | return shared_font_regions.at(index); |
| 122 | } | 122 | } |
| 123 | 123 | ||
| 124 | void BuildSharedFontsRawRegions(const std::vector<u8>& input) { | 124 | void BuildSharedFontsRawRegions(const Kernel::PhysicalMemory& input) { |
| 125 | // As we can derive the xor key we can just populate the offsets | 125 | // As we can derive the xor key we can just populate the offsets |
| 126 | // based on the shared memory dump | 126 | // based on the shared memory dump |
| 127 | unsigned cur_offset = 0; | 127 | unsigned cur_offset = 0; |
| @@ -144,7 +144,7 @@ struct PL_U::Impl { | |||
| 144 | Kernel::SharedPtr<Kernel::SharedMemory> shared_font_mem; | 144 | Kernel::SharedPtr<Kernel::SharedMemory> shared_font_mem; |
| 145 | 145 | ||
| 146 | /// Backing memory for the shared font data | 146 | /// Backing memory for the shared font data |
| 147 | std::shared_ptr<std::vector<u8>> shared_font; | 147 | std::shared_ptr<Kernel::PhysicalMemory> shared_font; |
| 148 | 148 | ||
| 149 | // Automatically populated based on shared_fonts dump or system archives. | 149 | // Automatically populated based on shared_fonts dump or system archives. |
| 150 | std::vector<FontRegion> shared_font_regions; | 150 | std::vector<FontRegion> shared_font_regions; |
| @@ -166,7 +166,7 @@ PL_U::PL_U() : ServiceFramework("pl:u"), impl{std::make_unique<Impl>()} { | |||
| 166 | // Rebuild shared fonts from data ncas | 166 | // Rebuild shared fonts from data ncas |
| 167 | if (nand->HasEntry(static_cast<u64>(FontArchives::Standard), | 167 | if (nand->HasEntry(static_cast<u64>(FontArchives::Standard), |
| 168 | FileSys::ContentRecordType::Data)) { | 168 | FileSys::ContentRecordType::Data)) { |
| 169 | impl->shared_font = std::make_shared<std::vector<u8>>(SHARED_FONT_MEM_SIZE); | 169 | impl->shared_font = std::make_shared<Kernel::PhysicalMemory>(SHARED_FONT_MEM_SIZE); |
| 170 | for (auto font : SHARED_FONTS) { | 170 | for (auto font : SHARED_FONTS) { |
| 171 | const auto nca = | 171 | const auto nca = |
| 172 | nand->GetEntry(static_cast<u64>(font.first), FileSys::ContentRecordType::Data); | 172 | nand->GetEntry(static_cast<u64>(font.first), FileSys::ContentRecordType::Data); |
| @@ -207,7 +207,7 @@ PL_U::PL_U() : ServiceFramework("pl:u"), impl{std::make_unique<Impl>()} { | |||
| 207 | } | 207 | } |
| 208 | 208 | ||
| 209 | } else { | 209 | } else { |
| 210 | impl->shared_font = std::make_shared<std::vector<u8>>( | 210 | impl->shared_font = std::make_shared<Kernel::PhysicalMemory>( |
| 211 | SHARED_FONT_MEM_SIZE); // Shared memory needs to always be allocated and a fixed size | 211 | SHARED_FONT_MEM_SIZE); // Shared memory needs to always be allocated and a fixed size |
| 212 | 212 | ||
| 213 | const std::string user_path = FileUtil::GetUserPath(FileUtil::UserPath::SysDataDir); | 213 | const std::string user_path = FileUtil::GetUserPath(FileUtil::UserPath::SysDataDir); |
diff --git a/src/core/loader/elf.cpp b/src/core/loader/elf.cpp index 6d4b02375..f1795fdd6 100644 --- a/src/core/loader/elf.cpp +++ b/src/core/loader/elf.cpp | |||
| @@ -295,7 +295,7 @@ Kernel::CodeSet ElfReader::LoadInto(VAddr vaddr) { | |||
| 295 | } | 295 | } |
| 296 | } | 296 | } |
| 297 | 297 | ||
| 298 | std::vector<u8> program_image(total_image_size); | 298 | Kernel::PhysicalMemory program_image(total_image_size); |
| 299 | std::size_t current_image_position = 0; | 299 | std::size_t current_image_position = 0; |
| 300 | 300 | ||
| 301 | Kernel::CodeSet codeset; | 301 | Kernel::CodeSet codeset; |
diff --git a/src/core/loader/kip.cpp b/src/core/loader/kip.cpp index 70051c13a..474b55cb1 100644 --- a/src/core/loader/kip.cpp +++ b/src/core/loader/kip.cpp | |||
| @@ -69,7 +69,7 @@ AppLoader::LoadResult AppLoader_KIP::Load(Kernel::Process& process) { | |||
| 69 | 69 | ||
| 70 | const VAddr base_address = process.VMManager().GetCodeRegionBaseAddress(); | 70 | const VAddr base_address = process.VMManager().GetCodeRegionBaseAddress(); |
| 71 | Kernel::CodeSet codeset; | 71 | Kernel::CodeSet codeset; |
| 72 | std::vector<u8> program_image; | 72 | Kernel::PhysicalMemory program_image; |
| 73 | 73 | ||
| 74 | const auto load_segment = [&program_image](Kernel::CodeSet::Segment& segment, | 74 | const auto load_segment = [&program_image](Kernel::CodeSet::Segment& segment, |
| 75 | const std::vector<u8>& data, u32 offset) { | 75 | const std::vector<u8>& data, u32 offset) { |
diff --git a/src/core/loader/nro.cpp b/src/core/loader/nro.cpp index 6a0ca389b..e92e2e06e 100644 --- a/src/core/loader/nro.cpp +++ b/src/core/loader/nro.cpp | |||
| @@ -143,7 +143,7 @@ static bool LoadNroImpl(Kernel::Process& process, const std::vector<u8>& data, | |||
| 143 | } | 143 | } |
| 144 | 144 | ||
| 145 | // Build program image | 145 | // Build program image |
| 146 | std::vector<u8> program_image(PageAlignSize(nro_header.file_size)); | 146 | Kernel::PhysicalMemory program_image(PageAlignSize(nro_header.file_size)); |
| 147 | std::memcpy(program_image.data(), data.data(), program_image.size()); | 147 | std::memcpy(program_image.data(), data.data(), program_image.size()); |
| 148 | if (program_image.size() != PageAlignSize(nro_header.file_size)) { | 148 | if (program_image.size() != PageAlignSize(nro_header.file_size)) { |
| 149 | return {}; | 149 | return {}; |
diff --git a/src/core/loader/nso.cpp b/src/core/loader/nso.cpp index 29311404a..70c90109f 100644 --- a/src/core/loader/nso.cpp +++ b/src/core/loader/nso.cpp | |||
| @@ -89,7 +89,7 @@ std::optional<VAddr> AppLoader_NSO::LoadModule(Kernel::Process& process, | |||
| 89 | 89 | ||
| 90 | // Build program image | 90 | // Build program image |
| 91 | Kernel::CodeSet codeset; | 91 | Kernel::CodeSet codeset; |
| 92 | std::vector<u8> program_image; | 92 | Kernel::PhysicalMemory program_image; |
| 93 | for (std::size_t i = 0; i < nso_header.segments.size(); ++i) { | 93 | for (std::size_t i = 0; i < nso_header.segments.size(); ++i) { |
| 94 | std::vector<u8> data = | 94 | std::vector<u8> data = |
| 95 | file.ReadBytes(nso_header.segments_compressed_size[i], nso_header.segments[i].offset); | 95 | file.ReadBytes(nso_header.segments_compressed_size[i], nso_header.segments[i].offset); |
diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt index 7c18c27b3..e2f85c5f1 100644 --- a/src/video_core/CMakeLists.txt +++ b/src/video_core/CMakeLists.txt | |||
| @@ -1,5 +1,7 @@ | |||
| 1 | add_library(video_core STATIC | 1 | add_library(video_core STATIC |
| 2 | buffer_cache.h | 2 | buffer_cache/buffer_block.h |
| 3 | buffer_cache/buffer_cache.h | ||
| 4 | buffer_cache/map_interval.h | ||
| 3 | dma_pusher.cpp | 5 | dma_pusher.cpp |
| 4 | dma_pusher.h | 6 | dma_pusher.h |
| 5 | debug_utils/debug_utils.cpp | 7 | debug_utils/debug_utils.cpp |
| @@ -100,6 +102,7 @@ add_library(video_core STATIC | |||
| 100 | shader/decode/integer_set.cpp | 102 | shader/decode/integer_set.cpp |
| 101 | shader/decode/half_set.cpp | 103 | shader/decode/half_set.cpp |
| 102 | shader/decode/video.cpp | 104 | shader/decode/video.cpp |
| 105 | shader/decode/warp.cpp | ||
| 103 | shader/decode/xmad.cpp | 106 | shader/decode/xmad.cpp |
| 104 | shader/decode/other.cpp | 107 | shader/decode/other.cpp |
| 105 | shader/control_flow.cpp | 108 | shader/control_flow.cpp |
diff --git a/src/video_core/buffer_cache.h b/src/video_core/buffer_cache.h deleted file mode 100644 index 6f868b8b4..000000000 --- a/src/video_core/buffer_cache.h +++ /dev/null | |||
| @@ -1,299 +0,0 @@ | |||
| 1 | // Copyright 2019 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include <array> | ||
| 8 | #include <memory> | ||
| 9 | #include <mutex> | ||
| 10 | #include <unordered_map> | ||
| 11 | #include <unordered_set> | ||
| 12 | #include <utility> | ||
| 13 | #include <vector> | ||
| 14 | |||
| 15 | #include "common/alignment.h" | ||
| 16 | #include "common/common_types.h" | ||
| 17 | #include "core/core.h" | ||
| 18 | #include "video_core/memory_manager.h" | ||
| 19 | #include "video_core/rasterizer_cache.h" | ||
| 20 | |||
| 21 | namespace VideoCore { | ||
| 22 | class RasterizerInterface; | ||
| 23 | } | ||
| 24 | |||
| 25 | namespace VideoCommon { | ||
| 26 | |||
| 27 | template <typename BufferStorageType> | ||
| 28 | class CachedBuffer final : public RasterizerCacheObject { | ||
| 29 | public: | ||
| 30 | explicit CachedBuffer(VAddr cpu_addr, u8* host_ptr) | ||
| 31 | : RasterizerCacheObject{host_ptr}, host_ptr{host_ptr}, cpu_addr{cpu_addr} {} | ||
| 32 | ~CachedBuffer() override = default; | ||
| 33 | |||
| 34 | VAddr GetCpuAddr() const override { | ||
| 35 | return cpu_addr; | ||
| 36 | } | ||
| 37 | |||
| 38 | std::size_t GetSizeInBytes() const override { | ||
| 39 | return size; | ||
| 40 | } | ||
| 41 | |||
| 42 | u8* GetWritableHostPtr() const { | ||
| 43 | return host_ptr; | ||
| 44 | } | ||
| 45 | |||
| 46 | std::size_t GetSize() const { | ||
| 47 | return size; | ||
| 48 | } | ||
| 49 | |||
| 50 | std::size_t GetCapacity() const { | ||
| 51 | return capacity; | ||
| 52 | } | ||
| 53 | |||
| 54 | bool IsInternalized() const { | ||
| 55 | return is_internal; | ||
| 56 | } | ||
| 57 | |||
| 58 | const BufferStorageType& GetBuffer() const { | ||
| 59 | return buffer; | ||
| 60 | } | ||
| 61 | |||
| 62 | void SetSize(std::size_t new_size) { | ||
| 63 | size = new_size; | ||
| 64 | } | ||
| 65 | |||
| 66 | void SetInternalState(bool is_internal_) { | ||
| 67 | is_internal = is_internal_; | ||
| 68 | } | ||
| 69 | |||
| 70 | BufferStorageType ExchangeBuffer(BufferStorageType buffer_, std::size_t new_capacity) { | ||
| 71 | capacity = new_capacity; | ||
| 72 | std::swap(buffer, buffer_); | ||
| 73 | return buffer_; | ||
| 74 | } | ||
| 75 | |||
| 76 | private: | ||
| 77 | u8* host_ptr{}; | ||
| 78 | VAddr cpu_addr{}; | ||
| 79 | std::size_t size{}; | ||
| 80 | std::size_t capacity{}; | ||
| 81 | bool is_internal{}; | ||
| 82 | BufferStorageType buffer; | ||
| 83 | }; | ||
| 84 | |||
| 85 | template <typename BufferStorageType, typename BufferType, typename StreamBuffer> | ||
| 86 | class BufferCache : public RasterizerCache<std::shared_ptr<CachedBuffer<BufferStorageType>>> { | ||
| 87 | public: | ||
| 88 | using Buffer = std::shared_ptr<CachedBuffer<BufferStorageType>>; | ||
| 89 | using BufferInfo = std::pair<const BufferType*, u64>; | ||
| 90 | |||
| 91 | explicit BufferCache(VideoCore::RasterizerInterface& rasterizer, Core::System& system, | ||
| 92 | std::unique_ptr<StreamBuffer> stream_buffer) | ||
| 93 | : RasterizerCache<Buffer>{rasterizer}, system{system}, | ||
| 94 | stream_buffer{std::move(stream_buffer)}, stream_buffer_handle{ | ||
| 95 | this->stream_buffer->GetHandle()} {} | ||
| 96 | ~BufferCache() = default; | ||
| 97 | |||
| 98 | void Unregister(const Buffer& entry) override { | ||
| 99 | std::lock_guard lock{RasterizerCache<Buffer>::mutex}; | ||
| 100 | if (entry->IsInternalized()) { | ||
| 101 | internalized_entries.erase(entry->GetCacheAddr()); | ||
| 102 | } | ||
| 103 | ReserveBuffer(entry); | ||
| 104 | RasterizerCache<Buffer>::Unregister(entry); | ||
| 105 | } | ||
| 106 | |||
| 107 | void TickFrame() { | ||
| 108 | marked_for_destruction_index = | ||
| 109 | (marked_for_destruction_index + 1) % marked_for_destruction_ring_buffer.size(); | ||
| 110 | MarkedForDestruction().clear(); | ||
| 111 | } | ||
| 112 | |||
| 113 | BufferInfo UploadMemory(GPUVAddr gpu_addr, std::size_t size, std::size_t alignment = 4, | ||
| 114 | bool internalize = false, bool is_written = false) { | ||
| 115 | std::lock_guard lock{RasterizerCache<Buffer>::mutex}; | ||
| 116 | |||
| 117 | auto& memory_manager = system.GPU().MemoryManager(); | ||
| 118 | const auto host_ptr = memory_manager.GetPointer(gpu_addr); | ||
| 119 | if (!host_ptr) { | ||
| 120 | return {GetEmptyBuffer(size), 0}; | ||
| 121 | } | ||
| 122 | const auto cache_addr = ToCacheAddr(host_ptr); | ||
| 123 | |||
| 124 | // Cache management is a big overhead, so only cache entries with a given size. | ||
| 125 | // TODO: Figure out which size is the best for given games. | ||
| 126 | constexpr std::size_t max_stream_size = 0x800; | ||
| 127 | if (!internalize && size < max_stream_size && | ||
| 128 | internalized_entries.find(cache_addr) == internalized_entries.end()) { | ||
| 129 | return StreamBufferUpload(host_ptr, size, alignment); | ||
| 130 | } | ||
| 131 | |||
| 132 | auto entry = RasterizerCache<Buffer>::TryGet(cache_addr); | ||
| 133 | if (!entry) { | ||
| 134 | return FixedBufferUpload(gpu_addr, host_ptr, size, internalize, is_written); | ||
| 135 | } | ||
| 136 | |||
| 137 | if (entry->GetSize() < size) { | ||
| 138 | IncreaseBufferSize(entry, size); | ||
| 139 | } | ||
| 140 | if (is_written) { | ||
| 141 | entry->MarkAsModified(true, *this); | ||
| 142 | } | ||
| 143 | return {ToHandle(entry->GetBuffer()), 0}; | ||
| 144 | } | ||
| 145 | |||
| 146 | /// Uploads from a host memory. Returns the OpenGL buffer where it's located and its offset. | ||
| 147 | BufferInfo UploadHostMemory(const void* raw_pointer, std::size_t size, | ||
| 148 | std::size_t alignment = 4) { | ||
| 149 | std::lock_guard lock{RasterizerCache<Buffer>::mutex}; | ||
| 150 | return StreamBufferUpload(raw_pointer, size, alignment); | ||
| 151 | } | ||
| 152 | |||
| 153 | void Map(std::size_t max_size) { | ||
| 154 | std::tie(buffer_ptr, buffer_offset_base, invalidated) = stream_buffer->Map(max_size, 4); | ||
| 155 | buffer_offset = buffer_offset_base; | ||
| 156 | } | ||
| 157 | |||
| 158 | /// Finishes the upload stream, returns true on bindings invalidation. | ||
| 159 | bool Unmap() { | ||
| 160 | stream_buffer->Unmap(buffer_offset - buffer_offset_base); | ||
| 161 | return std::exchange(invalidated, false); | ||
| 162 | } | ||
| 163 | |||
| 164 | virtual const BufferType* GetEmptyBuffer(std::size_t size) = 0; | ||
| 165 | |||
| 166 | protected: | ||
| 167 | void FlushObjectInner(const Buffer& entry) override { | ||
| 168 | DownloadBufferData(entry->GetBuffer(), 0, entry->GetSize(), entry->GetWritableHostPtr()); | ||
| 169 | } | ||
| 170 | |||
| 171 | virtual BufferStorageType CreateBuffer(std::size_t size) = 0; | ||
| 172 | |||
| 173 | virtual const BufferType* ToHandle(const BufferStorageType& storage) = 0; | ||
| 174 | |||
| 175 | virtual void UploadBufferData(const BufferStorageType& buffer, std::size_t offset, | ||
| 176 | std::size_t size, const u8* data) = 0; | ||
| 177 | |||
| 178 | virtual void DownloadBufferData(const BufferStorageType& buffer, std::size_t offset, | ||
| 179 | std::size_t size, u8* data) = 0; | ||
| 180 | |||
| 181 | virtual void CopyBufferData(const BufferStorageType& src, const BufferStorageType& dst, | ||
| 182 | std::size_t src_offset, std::size_t dst_offset, | ||
| 183 | std::size_t size) = 0; | ||
| 184 | |||
| 185 | private: | ||
| 186 | BufferInfo StreamBufferUpload(const void* raw_pointer, std::size_t size, | ||
| 187 | std::size_t alignment) { | ||
| 188 | AlignBuffer(alignment); | ||
| 189 | const std::size_t uploaded_offset = buffer_offset; | ||
| 190 | std::memcpy(buffer_ptr, raw_pointer, size); | ||
| 191 | |||
| 192 | buffer_ptr += size; | ||
| 193 | buffer_offset += size; | ||
| 194 | return {&stream_buffer_handle, uploaded_offset}; | ||
| 195 | } | ||
| 196 | |||
| 197 | BufferInfo FixedBufferUpload(GPUVAddr gpu_addr, u8* host_ptr, std::size_t size, | ||
| 198 | bool internalize, bool is_written) { | ||
| 199 | auto& memory_manager = Core::System::GetInstance().GPU().MemoryManager(); | ||
| 200 | const auto cpu_addr = memory_manager.GpuToCpuAddress(gpu_addr); | ||
| 201 | ASSERT(cpu_addr); | ||
| 202 | |||
| 203 | auto entry = GetUncachedBuffer(*cpu_addr, host_ptr); | ||
| 204 | entry->SetSize(size); | ||
| 205 | entry->SetInternalState(internalize); | ||
| 206 | RasterizerCache<Buffer>::Register(entry); | ||
| 207 | |||
| 208 | if (internalize) { | ||
| 209 | internalized_entries.emplace(ToCacheAddr(host_ptr)); | ||
| 210 | } | ||
| 211 | if (is_written) { | ||
| 212 | entry->MarkAsModified(true, *this); | ||
| 213 | } | ||
| 214 | |||
| 215 | if (entry->GetCapacity() < size) { | ||
| 216 | MarkedForDestruction().push_back(entry->ExchangeBuffer(CreateBuffer(size), size)); | ||
| 217 | } | ||
| 218 | |||
| 219 | UploadBufferData(entry->GetBuffer(), 0, size, host_ptr); | ||
| 220 | return {ToHandle(entry->GetBuffer()), 0}; | ||
| 221 | } | ||
| 222 | |||
| 223 | void IncreaseBufferSize(Buffer& entry, std::size_t new_size) { | ||
| 224 | const std::size_t old_size = entry->GetSize(); | ||
| 225 | if (entry->GetCapacity() < new_size) { | ||
| 226 | const auto& old_buffer = entry->GetBuffer(); | ||
| 227 | auto new_buffer = CreateBuffer(new_size); | ||
| 228 | |||
| 229 | // Copy bits from the old buffer to the new buffer. | ||
| 230 | CopyBufferData(old_buffer, new_buffer, 0, 0, old_size); | ||
| 231 | MarkedForDestruction().push_back( | ||
| 232 | entry->ExchangeBuffer(std::move(new_buffer), new_size)); | ||
| 233 | |||
| 234 | // This buffer could have been used | ||
| 235 | invalidated = true; | ||
| 236 | } | ||
| 237 | // Upload the new bits. | ||
| 238 | const std::size_t size_diff = new_size - old_size; | ||
| 239 | UploadBufferData(entry->GetBuffer(), old_size, size_diff, entry->GetHostPtr() + old_size); | ||
| 240 | |||
| 241 | // Update entry's size in the object and in the cache. | ||
| 242 | Unregister(entry); | ||
| 243 | |||
| 244 | entry->SetSize(new_size); | ||
| 245 | RasterizerCache<Buffer>::Register(entry); | ||
| 246 | } | ||
| 247 | |||
| 248 | Buffer GetUncachedBuffer(VAddr cpu_addr, u8* host_ptr) { | ||
| 249 | if (auto entry = TryGetReservedBuffer(host_ptr)) { | ||
| 250 | return entry; | ||
| 251 | } | ||
| 252 | return std::make_shared<CachedBuffer<BufferStorageType>>(cpu_addr, host_ptr); | ||
| 253 | } | ||
| 254 | |||
| 255 | Buffer TryGetReservedBuffer(u8* host_ptr) { | ||
| 256 | const auto it = buffer_reserve.find(ToCacheAddr(host_ptr)); | ||
| 257 | if (it == buffer_reserve.end()) { | ||
| 258 | return {}; | ||
| 259 | } | ||
| 260 | auto& reserve = it->second; | ||
| 261 | auto entry = reserve.back(); | ||
| 262 | reserve.pop_back(); | ||
| 263 | return entry; | ||
| 264 | } | ||
| 265 | |||
| 266 | void ReserveBuffer(Buffer entry) { | ||
| 267 | buffer_reserve[entry->GetCacheAddr()].push_back(std::move(entry)); | ||
| 268 | } | ||
| 269 | |||
| 270 | void AlignBuffer(std::size_t alignment) { | ||
| 271 | // Align the offset, not the mapped pointer | ||
| 272 | const std::size_t offset_aligned = Common::AlignUp(buffer_offset, alignment); | ||
| 273 | buffer_ptr += offset_aligned - buffer_offset; | ||
| 274 | buffer_offset = offset_aligned; | ||
| 275 | } | ||
| 276 | |||
| 277 | std::vector<BufferStorageType>& MarkedForDestruction() { | ||
| 278 | return marked_for_destruction_ring_buffer[marked_for_destruction_index]; | ||
| 279 | } | ||
| 280 | |||
| 281 | Core::System& system; | ||
| 282 | |||
| 283 | std::unique_ptr<StreamBuffer> stream_buffer; | ||
| 284 | BufferType stream_buffer_handle{}; | ||
| 285 | |||
| 286 | bool invalidated = false; | ||
| 287 | |||
| 288 | u8* buffer_ptr = nullptr; | ||
| 289 | u64 buffer_offset = 0; | ||
| 290 | u64 buffer_offset_base = 0; | ||
| 291 | |||
| 292 | std::size_t marked_for_destruction_index = 0; | ||
| 293 | std::array<std::vector<BufferStorageType>, 4> marked_for_destruction_ring_buffer; | ||
| 294 | |||
| 295 | std::unordered_set<CacheAddr> internalized_entries; | ||
| 296 | std::unordered_map<CacheAddr, std::vector<Buffer>> buffer_reserve; | ||
| 297 | }; | ||
| 298 | |||
| 299 | } // namespace VideoCommon | ||
diff --git a/src/video_core/buffer_cache/buffer_block.h b/src/video_core/buffer_cache/buffer_block.h new file mode 100644 index 000000000..4b9193182 --- /dev/null +++ b/src/video_core/buffer_cache/buffer_block.h | |||
| @@ -0,0 +1,76 @@ | |||
| 1 | // Copyright 2019 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include <unordered_set> | ||
| 8 | #include <utility> | ||
| 9 | |||
| 10 | #include "common/alignment.h" | ||
| 11 | #include "common/common_types.h" | ||
| 12 | #include "video_core/gpu.h" | ||
| 13 | |||
| 14 | namespace VideoCommon { | ||
| 15 | |||
| 16 | class BufferBlock { | ||
| 17 | public: | ||
| 18 | bool Overlaps(const CacheAddr start, const CacheAddr end) const { | ||
| 19 | return (cache_addr < end) && (cache_addr_end > start); | ||
| 20 | } | ||
| 21 | |||
| 22 | bool IsInside(const CacheAddr other_start, const CacheAddr other_end) const { | ||
| 23 | return cache_addr <= other_start && other_end <= cache_addr_end; | ||
| 24 | } | ||
| 25 | |||
| 26 | u8* GetWritableHostPtr() const { | ||
| 27 | return FromCacheAddr(cache_addr); | ||
| 28 | } | ||
| 29 | |||
| 30 | u8* GetWritableHostPtr(std::size_t offset) const { | ||
| 31 | return FromCacheAddr(cache_addr + offset); | ||
| 32 | } | ||
| 33 | |||
| 34 | std::size_t GetOffset(const CacheAddr in_addr) { | ||
| 35 | return static_cast<std::size_t>(in_addr - cache_addr); | ||
| 36 | } | ||
| 37 | |||
| 38 | CacheAddr GetCacheAddr() const { | ||
| 39 | return cache_addr; | ||
| 40 | } | ||
| 41 | |||
| 42 | CacheAddr GetCacheAddrEnd() const { | ||
| 43 | return cache_addr_end; | ||
| 44 | } | ||
| 45 | |||
| 46 | void SetCacheAddr(const CacheAddr new_addr) { | ||
| 47 | cache_addr = new_addr; | ||
| 48 | cache_addr_end = new_addr + size; | ||
| 49 | } | ||
| 50 | |||
| 51 | std::size_t GetSize() const { | ||
| 52 | return size; | ||
| 53 | } | ||
| 54 | |||
| 55 | void SetEpoch(u64 new_epoch) { | ||
| 56 | epoch = new_epoch; | ||
| 57 | } | ||
| 58 | |||
| 59 | u64 GetEpoch() { | ||
| 60 | return epoch; | ||
| 61 | } | ||
| 62 | |||
| 63 | protected: | ||
| 64 | explicit BufferBlock(CacheAddr cache_addr, const std::size_t size) : size{size} { | ||
| 65 | SetCacheAddr(cache_addr); | ||
| 66 | } | ||
| 67 | ~BufferBlock() = default; | ||
| 68 | |||
| 69 | private: | ||
| 70 | CacheAddr cache_addr{}; | ||
| 71 | CacheAddr cache_addr_end{}; | ||
| 72 | std::size_t size{}; | ||
| 73 | u64 epoch{}; | ||
| 74 | }; | ||
| 75 | |||
| 76 | } // namespace VideoCommon | ||
diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h new file mode 100644 index 000000000..2442ddfd6 --- /dev/null +++ b/src/video_core/buffer_cache/buffer_cache.h | |||
| @@ -0,0 +1,447 @@ | |||
| 1 | // Copyright 2019 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include <array> | ||
| 8 | #include <memory> | ||
| 9 | #include <mutex> | ||
| 10 | #include <unordered_map> | ||
| 11 | #include <unordered_set> | ||
| 12 | #include <utility> | ||
| 13 | #include <vector> | ||
| 14 | |||
| 15 | #include "common/alignment.h" | ||
| 16 | #include "common/common_types.h" | ||
| 17 | #include "core/core.h" | ||
| 18 | #include "video_core/buffer_cache/buffer_block.h" | ||
| 19 | #include "video_core/buffer_cache/map_interval.h" | ||
| 20 | #include "video_core/memory_manager.h" | ||
| 21 | #include "video_core/rasterizer_interface.h" | ||
| 22 | |||
| 23 | namespace VideoCommon { | ||
| 24 | |||
| 25 | using MapInterval = std::shared_ptr<MapIntervalBase>; | ||
| 26 | |||
| 27 | template <typename TBuffer, typename TBufferType, typename StreamBuffer> | ||
| 28 | class BufferCache { | ||
| 29 | public: | ||
| 30 | using BufferInfo = std::pair<const TBufferType*, u64>; | ||
| 31 | |||
| 32 | BufferInfo UploadMemory(GPUVAddr gpu_addr, std::size_t size, std::size_t alignment = 4, | ||
| 33 | bool is_written = false) { | ||
| 34 | std::lock_guard lock{mutex}; | ||
| 35 | |||
| 36 | auto& memory_manager = system.GPU().MemoryManager(); | ||
| 37 | const auto host_ptr = memory_manager.GetPointer(gpu_addr); | ||
| 38 | if (!host_ptr) { | ||
| 39 | return {GetEmptyBuffer(size), 0}; | ||
| 40 | } | ||
| 41 | const auto cache_addr = ToCacheAddr(host_ptr); | ||
| 42 | |||
| 43 | // Cache management is a big overhead, so only cache entries with a given size. | ||
| 44 | // TODO: Figure out which size is the best for given games. | ||
| 45 | constexpr std::size_t max_stream_size = 0x800; | ||
| 46 | if (size < max_stream_size) { | ||
| 47 | if (!is_written && !IsRegionWritten(cache_addr, cache_addr + size - 1)) { | ||
| 48 | return StreamBufferUpload(host_ptr, size, alignment); | ||
| 49 | } | ||
| 50 | } | ||
| 51 | |||
| 52 | auto block = GetBlock(cache_addr, size); | ||
| 53 | auto map = MapAddress(block, gpu_addr, cache_addr, size); | ||
| 54 | if (is_written) { | ||
| 55 | map->MarkAsModified(true, GetModifiedTicks()); | ||
| 56 | if (!map->IsWritten()) { | ||
| 57 | map->MarkAsWritten(true); | ||
| 58 | MarkRegionAsWritten(map->GetStart(), map->GetEnd() - 1); | ||
| 59 | } | ||
| 60 | } else { | ||
| 61 | if (map->IsWritten()) { | ||
| 62 | WriteBarrier(); | ||
| 63 | } | ||
| 64 | } | ||
| 65 | |||
| 66 | const u64 offset = static_cast<u64>(block->GetOffset(cache_addr)); | ||
| 67 | |||
| 68 | return {ToHandle(block), offset}; | ||
| 69 | } | ||
| 70 | |||
| 71 | /// Uploads from a host memory. Returns the OpenGL buffer where it's located and its offset. | ||
| 72 | BufferInfo UploadHostMemory(const void* raw_pointer, std::size_t size, | ||
| 73 | std::size_t alignment = 4) { | ||
| 74 | std::lock_guard lock{mutex}; | ||
| 75 | return StreamBufferUpload(raw_pointer, size, alignment); | ||
| 76 | } | ||
| 77 | |||
| 78 | void Map(std::size_t max_size) { | ||
| 79 | std::lock_guard lock{mutex}; | ||
| 80 | |||
| 81 | std::tie(buffer_ptr, buffer_offset_base, invalidated) = stream_buffer->Map(max_size, 4); | ||
| 82 | buffer_offset = buffer_offset_base; | ||
| 83 | } | ||
| 84 | |||
| 85 | /// Finishes the upload stream, returns true on bindings invalidation. | ||
| 86 | bool Unmap() { | ||
| 87 | std::lock_guard lock{mutex}; | ||
| 88 | |||
| 89 | stream_buffer->Unmap(buffer_offset - buffer_offset_base); | ||
| 90 | return std::exchange(invalidated, false); | ||
| 91 | } | ||
| 92 | |||
| 93 | void TickFrame() { | ||
| 94 | ++epoch; | ||
| 95 | while (!pending_destruction.empty()) { | ||
| 96 | if (pending_destruction.front()->GetEpoch() + 1 > epoch) { | ||
| 97 | break; | ||
| 98 | } | ||
| 99 | pending_destruction.pop_front(); | ||
| 100 | } | ||
| 101 | } | ||
| 102 | |||
| 103 | /// Write any cached resources overlapping the specified region back to memory | ||
| 104 | void FlushRegion(CacheAddr addr, std::size_t size) { | ||
| 105 | std::lock_guard lock{mutex}; | ||
| 106 | |||
| 107 | std::vector<MapInterval> objects = GetMapsInRange(addr, size); | ||
| 108 | std::sort(objects.begin(), objects.end(), [](const MapInterval& a, const MapInterval& b) { | ||
| 109 | return a->GetModificationTick() < b->GetModificationTick(); | ||
| 110 | }); | ||
| 111 | for (auto& object : objects) { | ||
| 112 | if (object->IsModified() && object->IsRegistered()) { | ||
| 113 | FlushMap(object); | ||
| 114 | } | ||
| 115 | } | ||
| 116 | } | ||
| 117 | |||
| 118 | /// Mark the specified region as being invalidated | ||
| 119 | void InvalidateRegion(CacheAddr addr, u64 size) { | ||
| 120 | std::lock_guard lock{mutex}; | ||
| 121 | |||
| 122 | std::vector<MapInterval> objects = GetMapsInRange(addr, size); | ||
| 123 | for (auto& object : objects) { | ||
| 124 | if (object->IsRegistered()) { | ||
| 125 | Unregister(object); | ||
| 126 | } | ||
| 127 | } | ||
| 128 | } | ||
| 129 | |||
| 130 | virtual const TBufferType* GetEmptyBuffer(std::size_t size) = 0; | ||
| 131 | |||
| 132 | protected: | ||
| 133 | explicit BufferCache(VideoCore::RasterizerInterface& rasterizer, Core::System& system, | ||
| 134 | std::unique_ptr<StreamBuffer> stream_buffer) | ||
| 135 | : rasterizer{rasterizer}, system{system}, stream_buffer{std::move(stream_buffer)}, | ||
| 136 | stream_buffer_handle{this->stream_buffer->GetHandle()} {} | ||
| 137 | |||
| 138 | ~BufferCache() = default; | ||
| 139 | |||
| 140 | virtual const TBufferType* ToHandle(const TBuffer& storage) = 0; | ||
| 141 | |||
| 142 | virtual void WriteBarrier() = 0; | ||
| 143 | |||
| 144 | virtual TBuffer CreateBlock(CacheAddr cache_addr, std::size_t size) = 0; | ||
| 145 | |||
| 146 | virtual void UploadBlockData(const TBuffer& buffer, std::size_t offset, std::size_t size, | ||
| 147 | const u8* data) = 0; | ||
| 148 | |||
| 149 | virtual void DownloadBlockData(const TBuffer& buffer, std::size_t offset, std::size_t size, | ||
| 150 | u8* data) = 0; | ||
| 151 | |||
| 152 | virtual void CopyBlock(const TBuffer& src, const TBuffer& dst, std::size_t src_offset, | ||
| 153 | std::size_t dst_offset, std::size_t size) = 0; | ||
| 154 | |||
| 155 | /// Register an object into the cache | ||
| 156 | void Register(const MapInterval& new_map, bool inherit_written = false) { | ||
| 157 | const CacheAddr cache_ptr = new_map->GetStart(); | ||
| 158 | const std::optional<VAddr> cpu_addr = | ||
| 159 | system.GPU().MemoryManager().GpuToCpuAddress(new_map->GetGpuAddress()); | ||
| 160 | if (!cache_ptr || !cpu_addr) { | ||
| 161 | LOG_CRITICAL(HW_GPU, "Failed to register buffer with unmapped gpu_address 0x{:016x}", | ||
| 162 | new_map->GetGpuAddress()); | ||
| 163 | return; | ||
| 164 | } | ||
| 165 | const std::size_t size = new_map->GetEnd() - new_map->GetStart(); | ||
| 166 | new_map->SetCpuAddress(*cpu_addr); | ||
| 167 | new_map->MarkAsRegistered(true); | ||
| 168 | const IntervalType interval{new_map->GetStart(), new_map->GetEnd()}; | ||
| 169 | mapped_addresses.insert({interval, new_map}); | ||
| 170 | rasterizer.UpdatePagesCachedCount(*cpu_addr, size, 1); | ||
| 171 | if (inherit_written) { | ||
| 172 | MarkRegionAsWritten(new_map->GetStart(), new_map->GetEnd() - 1); | ||
| 173 | new_map->MarkAsWritten(true); | ||
| 174 | } | ||
| 175 | } | ||
| 176 | |||
| 177 | /// Unregisters an object from the cache | ||
| 178 | void Unregister(MapInterval& map) { | ||
| 179 | const std::size_t size = map->GetEnd() - map->GetStart(); | ||
| 180 | rasterizer.UpdatePagesCachedCount(map->GetCpuAddress(), size, -1); | ||
| 181 | map->MarkAsRegistered(false); | ||
| 182 | if (map->IsWritten()) { | ||
| 183 | UnmarkRegionAsWritten(map->GetStart(), map->GetEnd() - 1); | ||
| 184 | } | ||
| 185 | const IntervalType delete_interval{map->GetStart(), map->GetEnd()}; | ||
| 186 | mapped_addresses.erase(delete_interval); | ||
| 187 | } | ||
| 188 | |||
| 189 | private: | ||
| 190 | MapInterval CreateMap(const CacheAddr start, const CacheAddr end, const GPUVAddr gpu_addr) { | ||
| 191 | return std::make_shared<MapIntervalBase>(start, end, gpu_addr); | ||
| 192 | } | ||
| 193 | |||
| 194 | MapInterval MapAddress(const TBuffer& block, const GPUVAddr gpu_addr, | ||
| 195 | const CacheAddr cache_addr, const std::size_t size) { | ||
| 196 | |||
| 197 | std::vector<MapInterval> overlaps = GetMapsInRange(cache_addr, size); | ||
| 198 | if (overlaps.empty()) { | ||
| 199 | const CacheAddr cache_addr_end = cache_addr + size; | ||
| 200 | MapInterval new_map = CreateMap(cache_addr, cache_addr_end, gpu_addr); | ||
| 201 | u8* host_ptr = FromCacheAddr(cache_addr); | ||
| 202 | UploadBlockData(block, block->GetOffset(cache_addr), size, host_ptr); | ||
| 203 | Register(new_map); | ||
| 204 | return new_map; | ||
| 205 | } | ||
| 206 | |||
| 207 | const CacheAddr cache_addr_end = cache_addr + size; | ||
| 208 | if (overlaps.size() == 1) { | ||
| 209 | MapInterval& current_map = overlaps[0]; | ||
| 210 | if (current_map->IsInside(cache_addr, cache_addr_end)) { | ||
| 211 | return current_map; | ||
| 212 | } | ||
| 213 | } | ||
| 214 | CacheAddr new_start = cache_addr; | ||
| 215 | CacheAddr new_end = cache_addr_end; | ||
| 216 | bool write_inheritance = false; | ||
| 217 | bool modified_inheritance = false; | ||
| 218 | // Calculate new buffer parameters | ||
| 219 | for (auto& overlap : overlaps) { | ||
| 220 | new_start = std::min(overlap->GetStart(), new_start); | ||
| 221 | new_end = std::max(overlap->GetEnd(), new_end); | ||
| 222 | write_inheritance |= overlap->IsWritten(); | ||
| 223 | modified_inheritance |= overlap->IsModified(); | ||
| 224 | } | ||
| 225 | GPUVAddr new_gpu_addr = gpu_addr + new_start - cache_addr; | ||
| 226 | for (auto& overlap : overlaps) { | ||
| 227 | Unregister(overlap); | ||
| 228 | } | ||
| 229 | UpdateBlock(block, new_start, new_end, overlaps); | ||
| 230 | MapInterval new_map = CreateMap(new_start, new_end, new_gpu_addr); | ||
| 231 | if (modified_inheritance) { | ||
| 232 | new_map->MarkAsModified(true, GetModifiedTicks()); | ||
| 233 | } | ||
| 234 | Register(new_map, write_inheritance); | ||
| 235 | return new_map; | ||
| 236 | } | ||
| 237 | |||
| 238 | void UpdateBlock(const TBuffer& block, CacheAddr start, CacheAddr end, | ||
| 239 | std::vector<MapInterval>& overlaps) { | ||
| 240 | const IntervalType base_interval{start, end}; | ||
| 241 | IntervalSet interval_set{}; | ||
| 242 | interval_set.add(base_interval); | ||
| 243 | for (auto& overlap : overlaps) { | ||
| 244 | const IntervalType subtract{overlap->GetStart(), overlap->GetEnd()}; | ||
| 245 | interval_set.subtract(subtract); | ||
| 246 | } | ||
| 247 | for (auto& interval : interval_set) { | ||
| 248 | std::size_t size = interval.upper() - interval.lower(); | ||
| 249 | if (size > 0) { | ||
| 250 | u8* host_ptr = FromCacheAddr(interval.lower()); | ||
| 251 | UploadBlockData(block, block->GetOffset(interval.lower()), size, host_ptr); | ||
| 252 | } | ||
| 253 | } | ||
| 254 | } | ||
| 255 | |||
| 256 | std::vector<MapInterval> GetMapsInRange(CacheAddr addr, std::size_t size) { | ||
| 257 | if (size == 0) { | ||
| 258 | return {}; | ||
| 259 | } | ||
| 260 | |||
| 261 | std::vector<MapInterval> objects{}; | ||
| 262 | const IntervalType interval{addr, addr + size}; | ||
| 263 | for (auto& pair : boost::make_iterator_range(mapped_addresses.equal_range(interval))) { | ||
| 264 | objects.push_back(pair.second); | ||
| 265 | } | ||
| 266 | |||
| 267 | return objects; | ||
| 268 | } | ||
| 269 | |||
| 270 | /// Returns a ticks counter used for tracking when cached objects were last modified | ||
| 271 | u64 GetModifiedTicks() { | ||
| 272 | return ++modified_ticks; | ||
| 273 | } | ||
| 274 | |||
| 275 | void FlushMap(MapInterval map) { | ||
| 276 | std::size_t size = map->GetEnd() - map->GetStart(); | ||
| 277 | TBuffer block = blocks[map->GetStart() >> block_page_bits]; | ||
| 278 | u8* host_ptr = FromCacheAddr(map->GetStart()); | ||
| 279 | DownloadBlockData(block, block->GetOffset(map->GetStart()), size, host_ptr); | ||
| 280 | map->MarkAsModified(false, 0); | ||
| 281 | } | ||
| 282 | |||
| 283 | BufferInfo StreamBufferUpload(const void* raw_pointer, std::size_t size, | ||
| 284 | std::size_t alignment) { | ||
| 285 | AlignBuffer(alignment); | ||
| 286 | const std::size_t uploaded_offset = buffer_offset; | ||
| 287 | std::memcpy(buffer_ptr, raw_pointer, size); | ||
| 288 | |||
| 289 | buffer_ptr += size; | ||
| 290 | buffer_offset += size; | ||
| 291 | return {&stream_buffer_handle, uploaded_offset}; | ||
| 292 | } | ||
| 293 | |||
| 294 | void AlignBuffer(std::size_t alignment) { | ||
| 295 | // Align the offset, not the mapped pointer | ||
| 296 | const std::size_t offset_aligned = Common::AlignUp(buffer_offset, alignment); | ||
| 297 | buffer_ptr += offset_aligned - buffer_offset; | ||
| 298 | buffer_offset = offset_aligned; | ||
| 299 | } | ||
| 300 | |||
| 301 | TBuffer EnlargeBlock(TBuffer buffer) { | ||
| 302 | const std::size_t old_size = buffer->GetSize(); | ||
| 303 | const std::size_t new_size = old_size + block_page_size; | ||
| 304 | const CacheAddr cache_addr = buffer->GetCacheAddr(); | ||
| 305 | TBuffer new_buffer = CreateBlock(cache_addr, new_size); | ||
| 306 | CopyBlock(buffer, new_buffer, 0, 0, old_size); | ||
| 307 | buffer->SetEpoch(epoch); | ||
| 308 | pending_destruction.push_back(buffer); | ||
| 309 | const CacheAddr cache_addr_end = cache_addr + new_size - 1; | ||
| 310 | u64 page_start = cache_addr >> block_page_bits; | ||
| 311 | const u64 page_end = cache_addr_end >> block_page_bits; | ||
| 312 | while (page_start <= page_end) { | ||
| 313 | blocks[page_start] = new_buffer; | ||
| 314 | ++page_start; | ||
| 315 | } | ||
| 316 | return new_buffer; | ||
| 317 | } | ||
| 318 | |||
| 319 | TBuffer MergeBlocks(TBuffer first, TBuffer second) { | ||
| 320 | const std::size_t size_1 = first->GetSize(); | ||
| 321 | const std::size_t size_2 = second->GetSize(); | ||
| 322 | const CacheAddr first_addr = first->GetCacheAddr(); | ||
| 323 | const CacheAddr second_addr = second->GetCacheAddr(); | ||
| 324 | const CacheAddr new_addr = std::min(first_addr, second_addr); | ||
| 325 | const std::size_t new_size = size_1 + size_2; | ||
| 326 | TBuffer new_buffer = CreateBlock(new_addr, new_size); | ||
| 327 | CopyBlock(first, new_buffer, 0, new_buffer->GetOffset(first_addr), size_1); | ||
| 328 | CopyBlock(second, new_buffer, 0, new_buffer->GetOffset(second_addr), size_2); | ||
| 329 | first->SetEpoch(epoch); | ||
| 330 | second->SetEpoch(epoch); | ||
| 331 | pending_destruction.push_back(first); | ||
| 332 | pending_destruction.push_back(second); | ||
| 333 | const CacheAddr cache_addr_end = new_addr + new_size - 1; | ||
| 334 | u64 page_start = new_addr >> block_page_bits; | ||
| 335 | const u64 page_end = cache_addr_end >> block_page_bits; | ||
| 336 | while (page_start <= page_end) { | ||
| 337 | blocks[page_start] = new_buffer; | ||
| 338 | ++page_start; | ||
| 339 | } | ||
| 340 | return new_buffer; | ||
| 341 | } | ||
| 342 | |||
| 343 | TBuffer GetBlock(const CacheAddr cache_addr, const std::size_t size) { | ||
| 344 | TBuffer found{}; | ||
| 345 | const CacheAddr cache_addr_end = cache_addr + size - 1; | ||
| 346 | u64 page_start = cache_addr >> block_page_bits; | ||
| 347 | const u64 page_end = cache_addr_end >> block_page_bits; | ||
| 348 | while (page_start <= page_end) { | ||
| 349 | auto it = blocks.find(page_start); | ||
| 350 | if (it == blocks.end()) { | ||
| 351 | if (found) { | ||
| 352 | found = EnlargeBlock(found); | ||
| 353 | } else { | ||
| 354 | const CacheAddr start_addr = (page_start << block_page_bits); | ||
| 355 | found = CreateBlock(start_addr, block_page_size); | ||
| 356 | blocks[page_start] = found; | ||
| 357 | } | ||
| 358 | } else { | ||
| 359 | if (found) { | ||
| 360 | if (found == it->second) { | ||
| 361 | ++page_start; | ||
| 362 | continue; | ||
| 363 | } | ||
| 364 | found = MergeBlocks(found, it->second); | ||
| 365 | } else { | ||
| 366 | found = it->second; | ||
| 367 | } | ||
| 368 | } | ||
| 369 | ++page_start; | ||
| 370 | } | ||
| 371 | return found; | ||
| 372 | } | ||
| 373 | |||
| 374 | void MarkRegionAsWritten(const CacheAddr start, const CacheAddr end) { | ||
| 375 | u64 page_start = start >> write_page_bit; | ||
| 376 | const u64 page_end = end >> write_page_bit; | ||
| 377 | while (page_start <= page_end) { | ||
| 378 | auto it = written_pages.find(page_start); | ||
| 379 | if (it != written_pages.end()) { | ||
| 380 | it->second = it->second + 1; | ||
| 381 | } else { | ||
| 382 | written_pages[page_start] = 1; | ||
| 383 | } | ||
| 384 | page_start++; | ||
| 385 | } | ||
| 386 | } | ||
| 387 | |||
| 388 | void UnmarkRegionAsWritten(const CacheAddr start, const CacheAddr end) { | ||
| 389 | u64 page_start = start >> write_page_bit; | ||
| 390 | const u64 page_end = end >> write_page_bit; | ||
| 391 | while (page_start <= page_end) { | ||
| 392 | auto it = written_pages.find(page_start); | ||
| 393 | if (it != written_pages.end()) { | ||
| 394 | if (it->second > 1) { | ||
| 395 | it->second = it->second - 1; | ||
| 396 | } else { | ||
| 397 | written_pages.erase(it); | ||
| 398 | } | ||
| 399 | } | ||
| 400 | page_start++; | ||
| 401 | } | ||
| 402 | } | ||
| 403 | |||
| 404 | bool IsRegionWritten(const CacheAddr start, const CacheAddr end) const { | ||
| 405 | u64 page_start = start >> write_page_bit; | ||
| 406 | const u64 page_end = end >> write_page_bit; | ||
| 407 | while (page_start <= page_end) { | ||
| 408 | if (written_pages.count(page_start) > 0) { | ||
| 409 | return true; | ||
| 410 | } | ||
| 411 | page_start++; | ||
| 412 | } | ||
| 413 | return false; | ||
| 414 | } | ||
| 415 | |||
| 416 | VideoCore::RasterizerInterface& rasterizer; | ||
| 417 | Core::System& system; | ||
| 418 | std::unique_ptr<StreamBuffer> stream_buffer; | ||
| 419 | |||
| 420 | TBufferType stream_buffer_handle{}; | ||
| 421 | |||
| 422 | bool invalidated = false; | ||
| 423 | |||
| 424 | u8* buffer_ptr = nullptr; | ||
| 425 | u64 buffer_offset = 0; | ||
| 426 | u64 buffer_offset_base = 0; | ||
| 427 | |||
| 428 | using IntervalSet = boost::icl::interval_set<CacheAddr>; | ||
| 429 | using IntervalCache = boost::icl::interval_map<CacheAddr, MapInterval>; | ||
| 430 | using IntervalType = typename IntervalCache::interval_type; | ||
| 431 | IntervalCache mapped_addresses{}; | ||
| 432 | |||
| 433 | static constexpr u64 write_page_bit{11}; | ||
| 434 | std::unordered_map<u64, u32> written_pages{}; | ||
| 435 | |||
| 436 | static constexpr u64 block_page_bits{21}; | ||
| 437 | static constexpr u64 block_page_size{1 << block_page_bits}; | ||
| 438 | std::unordered_map<u64, TBuffer> blocks{}; | ||
| 439 | |||
| 440 | std::list<TBuffer> pending_destruction{}; | ||
| 441 | u64 epoch{}; | ||
| 442 | u64 modified_ticks{}; | ||
| 443 | |||
| 444 | std::recursive_mutex mutex; | ||
| 445 | }; | ||
| 446 | |||
| 447 | } // namespace VideoCommon | ||
diff --git a/src/video_core/buffer_cache/map_interval.h b/src/video_core/buffer_cache/map_interval.h new file mode 100644 index 000000000..3a104d5cd --- /dev/null +++ b/src/video_core/buffer_cache/map_interval.h | |||
| @@ -0,0 +1,89 @@ | |||
| 1 | // Copyright 2019 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include "common/common_types.h" | ||
| 8 | #include "video_core/gpu.h" | ||
| 9 | |||
| 10 | namespace VideoCommon { | ||
| 11 | |||
| 12 | class MapIntervalBase { | ||
| 13 | public: | ||
| 14 | MapIntervalBase(const CacheAddr start, const CacheAddr end, const GPUVAddr gpu_addr) | ||
| 15 | : start{start}, end{end}, gpu_addr{gpu_addr} {} | ||
| 16 | |||
| 17 | void SetCpuAddress(VAddr new_cpu_addr) { | ||
| 18 | cpu_addr = new_cpu_addr; | ||
| 19 | } | ||
| 20 | |||
| 21 | VAddr GetCpuAddress() const { | ||
| 22 | return cpu_addr; | ||
| 23 | } | ||
| 24 | |||
| 25 | GPUVAddr GetGpuAddress() const { | ||
| 26 | return gpu_addr; | ||
| 27 | } | ||
| 28 | |||
| 29 | bool IsInside(const CacheAddr other_start, const CacheAddr other_end) const { | ||
| 30 | return (start <= other_start && other_end <= end); | ||
| 31 | } | ||
| 32 | |||
| 33 | bool operator==(const MapIntervalBase& rhs) const { | ||
| 34 | return std::tie(start, end) == std::tie(rhs.start, rhs.end); | ||
| 35 | } | ||
| 36 | |||
| 37 | bool operator!=(const MapIntervalBase& rhs) const { | ||
| 38 | return !operator==(rhs); | ||
| 39 | } | ||
| 40 | |||
| 41 | void MarkAsRegistered(const bool registered) { | ||
| 42 | is_registered = registered; | ||
| 43 | } | ||
| 44 | |||
| 45 | bool IsRegistered() const { | ||
| 46 | return is_registered; | ||
| 47 | } | ||
| 48 | |||
| 49 | CacheAddr GetStart() const { | ||
| 50 | return start; | ||
| 51 | } | ||
| 52 | |||
| 53 | CacheAddr GetEnd() const { | ||
| 54 | return end; | ||
| 55 | } | ||
| 56 | |||
| 57 | void MarkAsModified(const bool is_modified_, const u64 tick) { | ||
| 58 | is_modified = is_modified_; | ||
| 59 | ticks = tick; | ||
| 60 | } | ||
| 61 | |||
| 62 | bool IsModified() const { | ||
| 63 | return is_modified; | ||
| 64 | } | ||
| 65 | |||
| 66 | u64 GetModificationTick() const { | ||
| 67 | return ticks; | ||
| 68 | } | ||
| 69 | |||
| 70 | void MarkAsWritten(const bool is_written_) { | ||
| 71 | is_written = is_written_; | ||
| 72 | } | ||
| 73 | |||
| 74 | bool IsWritten() const { | ||
| 75 | return is_written; | ||
| 76 | } | ||
| 77 | |||
| 78 | private: | ||
| 79 | CacheAddr start; | ||
| 80 | CacheAddr end; | ||
| 81 | GPUVAddr gpu_addr; | ||
| 82 | VAddr cpu_addr{}; | ||
| 83 | bool is_written{}; | ||
| 84 | bool is_modified{}; | ||
| 85 | bool is_registered{}; | ||
| 86 | u64 ticks{}; | ||
| 87 | }; | ||
| 88 | |||
| 89 | } // namespace VideoCommon | ||
diff --git a/src/video_core/dma_pusher.cpp b/src/video_core/dma_pusher.cpp index bd036cbe8..0094fd715 100644 --- a/src/video_core/dma_pusher.cpp +++ b/src/video_core/dma_pusher.cpp | |||
| @@ -31,6 +31,7 @@ void DmaPusher::DispatchCalls() { | |||
| 31 | break; | 31 | break; |
| 32 | } | 32 | } |
| 33 | } | 33 | } |
| 34 | gpu.FlushCommands(); | ||
| 34 | } | 35 | } |
| 35 | 36 | ||
| 36 | bool DmaPusher::Step() { | 37 | bool DmaPusher::Step() { |
diff --git a/src/video_core/engines/fermi_2d.cpp b/src/video_core/engines/fermi_2d.cpp index 0ee228e28..98a8b5337 100644 --- a/src/video_core/engines/fermi_2d.cpp +++ b/src/video_core/engines/fermi_2d.cpp | |||
| @@ -10,8 +10,7 @@ | |||
| 10 | 10 | ||
| 11 | namespace Tegra::Engines { | 11 | namespace Tegra::Engines { |
| 12 | 12 | ||
| 13 | Fermi2D::Fermi2D(VideoCore::RasterizerInterface& rasterizer, MemoryManager& memory_manager) | 13 | Fermi2D::Fermi2D(VideoCore::RasterizerInterface& rasterizer) : rasterizer{rasterizer} {} |
| 14 | : rasterizer{rasterizer}, memory_manager{memory_manager} {} | ||
| 15 | 14 | ||
| 16 | void Fermi2D::CallMethod(const GPU::MethodCall& method_call) { | 15 | void Fermi2D::CallMethod(const GPU::MethodCall& method_call) { |
| 17 | ASSERT_MSG(method_call.method < Regs::NUM_REGS, | 16 | ASSERT_MSG(method_call.method < Regs::NUM_REGS, |
diff --git a/src/video_core/engines/fermi_2d.h b/src/video_core/engines/fermi_2d.h index 05421d185..0901cf2fa 100644 --- a/src/video_core/engines/fermi_2d.h +++ b/src/video_core/engines/fermi_2d.h | |||
| @@ -33,7 +33,7 @@ namespace Tegra::Engines { | |||
| 33 | 33 | ||
| 34 | class Fermi2D final { | 34 | class Fermi2D final { |
| 35 | public: | 35 | public: |
| 36 | explicit Fermi2D(VideoCore::RasterizerInterface& rasterizer, MemoryManager& memory_manager); | 36 | explicit Fermi2D(VideoCore::RasterizerInterface& rasterizer); |
| 37 | ~Fermi2D() = default; | 37 | ~Fermi2D() = default; |
| 38 | 38 | ||
| 39 | /// Write the value to the register identified by method. | 39 | /// Write the value to the register identified by method. |
| @@ -145,7 +145,6 @@ public: | |||
| 145 | 145 | ||
| 146 | private: | 146 | private: |
| 147 | VideoCore::RasterizerInterface& rasterizer; | 147 | VideoCore::RasterizerInterface& rasterizer; |
| 148 | MemoryManager& memory_manager; | ||
| 149 | 148 | ||
| 150 | /// Performs the copy from the source surface to the destination surface as configured in the | 149 | /// Performs the copy from the source surface to the destination surface as configured in the |
| 151 | /// registers. | 150 | /// registers. |
diff --git a/src/video_core/engines/kepler_memory.cpp b/src/video_core/engines/kepler_memory.cpp index 44279de00..fa4a7c5c1 100644 --- a/src/video_core/engines/kepler_memory.cpp +++ b/src/video_core/engines/kepler_memory.cpp | |||
| @@ -15,7 +15,7 @@ | |||
| 15 | namespace Tegra::Engines { | 15 | namespace Tegra::Engines { |
| 16 | 16 | ||
| 17 | KeplerMemory::KeplerMemory(Core::System& system, MemoryManager& memory_manager) | 17 | KeplerMemory::KeplerMemory(Core::System& system, MemoryManager& memory_manager) |
| 18 | : system{system}, memory_manager{memory_manager}, upload_state{memory_manager, regs.upload} {} | 18 | : system{system}, upload_state{memory_manager, regs.upload} {} |
| 19 | 19 | ||
| 20 | KeplerMemory::~KeplerMemory() = default; | 20 | KeplerMemory::~KeplerMemory() = default; |
| 21 | 21 | ||
diff --git a/src/video_core/engines/kepler_memory.h b/src/video_core/engines/kepler_memory.h index f3bc675a9..e0e25c321 100644 --- a/src/video_core/engines/kepler_memory.h +++ b/src/video_core/engines/kepler_memory.h | |||
| @@ -65,7 +65,6 @@ public: | |||
| 65 | 65 | ||
| 66 | private: | 66 | private: |
| 67 | Core::System& system; | 67 | Core::System& system; |
| 68 | MemoryManager& memory_manager; | ||
| 69 | Upload::State upload_state; | 68 | Upload::State upload_state; |
| 70 | }; | 69 | }; |
| 71 | 70 | ||
diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp index 125c53360..f5158d219 100644 --- a/src/video_core/engines/maxwell_3d.cpp +++ b/src/video_core/engines/maxwell_3d.cpp | |||
| @@ -249,16 +249,10 @@ void Maxwell3D::CallMacroMethod(u32 method, std::vector<u32> parameters) { | |||
| 249 | executing_macro = 0; | 249 | executing_macro = 0; |
| 250 | 250 | ||
| 251 | // Lookup the macro offset | 251 | // Lookup the macro offset |
| 252 | const u32 entry{(method - MacroRegistersStart) >> 1}; | 252 | const u32 entry = ((method - MacroRegistersStart) >> 1) % macro_positions.size(); |
| 253 | const auto& search{macro_offsets.find(entry)}; | ||
| 254 | if (search == macro_offsets.end()) { | ||
| 255 | LOG_CRITICAL(HW_GPU, "macro not found for method 0x{:X}!", method); | ||
| 256 | UNREACHABLE(); | ||
| 257 | return; | ||
| 258 | } | ||
| 259 | 253 | ||
| 260 | // Execute the current macro. | 254 | // Execute the current macro. |
| 261 | macro_interpreter.Execute(search->second, std::move(parameters)); | 255 | macro_interpreter.Execute(macro_positions[entry], std::move(parameters)); |
| 262 | } | 256 | } |
| 263 | 257 | ||
| 264 | void Maxwell3D::CallMethod(const GPU::MethodCall& method_call) { | 258 | void Maxwell3D::CallMethod(const GPU::MethodCall& method_call) { |
| @@ -421,7 +415,7 @@ void Maxwell3D::ProcessMacroUpload(u32 data) { | |||
| 421 | } | 415 | } |
| 422 | 416 | ||
| 423 | void Maxwell3D::ProcessMacroBind(u32 data) { | 417 | void Maxwell3D::ProcessMacroBind(u32 data) { |
| 424 | macro_offsets[regs.macros.entry] = data; | 418 | macro_positions[regs.macros.entry++] = data; |
| 425 | } | 419 | } |
| 426 | 420 | ||
| 427 | void Maxwell3D::ProcessQueryGet() { | 421 | void Maxwell3D::ProcessQueryGet() { |
| @@ -524,7 +518,7 @@ void Maxwell3D::ProcessQueryCondition() { | |||
| 524 | void Maxwell3D::ProcessSyncPoint() { | 518 | void Maxwell3D::ProcessSyncPoint() { |
| 525 | const u32 sync_point = regs.sync_info.sync_point.Value(); | 519 | const u32 sync_point = regs.sync_info.sync_point.Value(); |
| 526 | const u32 increment = regs.sync_info.increment.Value(); | 520 | const u32 increment = regs.sync_info.increment.Value(); |
| 527 | const u32 cache_flush = regs.sync_info.unknown.Value(); | 521 | [[maybe_unused]] const u32 cache_flush = regs.sync_info.unknown.Value(); |
| 528 | if (increment) { | 522 | if (increment) { |
| 529 | system.GPU().IncrementSyncPoint(sync_point); | 523 | system.GPU().IncrementSyncPoint(sync_point); |
| 530 | } | 524 | } |
| @@ -626,10 +620,10 @@ Texture::TICEntry Maxwell3D::GetTICEntry(u32 tic_index) const { | |||
| 626 | Texture::TICEntry tic_entry; | 620 | Texture::TICEntry tic_entry; |
| 627 | memory_manager.ReadBlockUnsafe(tic_address_gpu, &tic_entry, sizeof(Texture::TICEntry)); | 621 | memory_manager.ReadBlockUnsafe(tic_address_gpu, &tic_entry, sizeof(Texture::TICEntry)); |
| 628 | 622 | ||
| 629 | const auto r_type{tic_entry.r_type.Value()}; | 623 | [[maybe_unused]] const auto r_type{tic_entry.r_type.Value()}; |
| 630 | const auto g_type{tic_entry.g_type.Value()}; | 624 | [[maybe_unused]] const auto g_type{tic_entry.g_type.Value()}; |
| 631 | const auto b_type{tic_entry.b_type.Value()}; | 625 | [[maybe_unused]] const auto b_type{tic_entry.b_type.Value()}; |
| 632 | const auto a_type{tic_entry.a_type.Value()}; | 626 | [[maybe_unused]] const auto a_type{tic_entry.a_type.Value()}; |
| 633 | 627 | ||
| 634 | // TODO(Subv): Different data types for separate components are not supported | 628 | // TODO(Subv): Different data types for separate components are not supported |
| 635 | DEBUG_ASSERT(r_type == g_type && r_type == b_type && r_type == a_type); | 629 | DEBUG_ASSERT(r_type == g_type && r_type == b_type && r_type == a_type); |
diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h index 1ee982b76..0184342a0 100644 --- a/src/video_core/engines/maxwell_3d.h +++ b/src/video_core/engines/maxwell_3d.h | |||
| @@ -1270,7 +1270,7 @@ private: | |||
| 1270 | MemoryManager& memory_manager; | 1270 | MemoryManager& memory_manager; |
| 1271 | 1271 | ||
| 1272 | /// Start offsets of each macro in macro_memory | 1272 | /// Start offsets of each macro in macro_memory |
| 1273 | std::unordered_map<u32, u32> macro_offsets; | 1273 | std::array<u32, 0x80> macro_positions = {}; |
| 1274 | 1274 | ||
| 1275 | /// Memory for macro code | 1275 | /// Memory for macro code |
| 1276 | MacroMemory macro_memory; | 1276 | MacroMemory macro_memory; |
diff --git a/src/video_core/engines/maxwell_dma.cpp b/src/video_core/engines/maxwell_dma.cpp index a28c04473..ad8453c5f 100644 --- a/src/video_core/engines/maxwell_dma.cpp +++ b/src/video_core/engines/maxwell_dma.cpp | |||
| @@ -5,18 +5,17 @@ | |||
| 5 | #include "common/assert.h" | 5 | #include "common/assert.h" |
| 6 | #include "common/logging/log.h" | 6 | #include "common/logging/log.h" |
| 7 | #include "core/core.h" | 7 | #include "core/core.h" |
| 8 | #include "core/settings.h" | ||
| 8 | #include "video_core/engines/maxwell_3d.h" | 9 | #include "video_core/engines/maxwell_3d.h" |
| 9 | #include "video_core/engines/maxwell_dma.h" | 10 | #include "video_core/engines/maxwell_dma.h" |
| 10 | #include "video_core/memory_manager.h" | 11 | #include "video_core/memory_manager.h" |
| 11 | #include "video_core/rasterizer_interface.h" | ||
| 12 | #include "video_core/renderer_base.h" | 12 | #include "video_core/renderer_base.h" |
| 13 | #include "video_core/textures/decoders.h" | 13 | #include "video_core/textures/decoders.h" |
| 14 | 14 | ||
| 15 | namespace Tegra::Engines { | 15 | namespace Tegra::Engines { |
| 16 | 16 | ||
| 17 | MaxwellDMA::MaxwellDMA(Core::System& system, VideoCore::RasterizerInterface& rasterizer, | 17 | MaxwellDMA::MaxwellDMA(Core::System& system, MemoryManager& memory_manager) |
| 18 | MemoryManager& memory_manager) | 18 | : system{system}, memory_manager{memory_manager} {} |
| 19 | : system{system}, rasterizer{rasterizer}, memory_manager{memory_manager} {} | ||
| 20 | 19 | ||
| 21 | void MaxwellDMA::CallMethod(const GPU::MethodCall& method_call) { | 20 | void MaxwellDMA::CallMethod(const GPU::MethodCall& method_call) { |
| 22 | ASSERT_MSG(method_call.method < Regs::NUM_REGS, | 21 | ASSERT_MSG(method_call.method < Regs::NUM_REGS, |
| @@ -84,13 +83,17 @@ void MaxwellDMA::HandleCopy() { | |||
| 84 | ASSERT(regs.exec.enable_2d == 1); | 83 | ASSERT(regs.exec.enable_2d == 1); |
| 85 | 84 | ||
| 86 | if (regs.exec.is_dst_linear && !regs.exec.is_src_linear) { | 85 | if (regs.exec.is_dst_linear && !regs.exec.is_src_linear) { |
| 87 | ASSERT(regs.src_params.size_z == 1); | 86 | ASSERT(regs.src_params.BlockDepth() == 0); |
| 88 | // If the input is tiled and the output is linear, deswizzle the input and copy it over. | 87 | // If the input is tiled and the output is linear, deswizzle the input and copy it over. |
| 89 | const u32 src_bytes_per_pixel = regs.src_pitch / regs.src_params.size_x; | 88 | const u32 bytes_per_pixel = regs.dst_pitch / regs.x_count; |
| 90 | const std::size_t src_size = Texture::CalculateSize( | 89 | const std::size_t src_size = Texture::CalculateSize( |
| 91 | true, src_bytes_per_pixel, regs.src_params.size_x, regs.src_params.size_y, | 90 | true, bytes_per_pixel, regs.src_params.size_x, regs.src_params.size_y, |
| 92 | regs.src_params.size_z, regs.src_params.BlockHeight(), regs.src_params.BlockDepth()); | 91 | regs.src_params.size_z, regs.src_params.BlockHeight(), regs.src_params.BlockDepth()); |
| 93 | 92 | ||
| 93 | const std::size_t src_layer_size = Texture::CalculateSize( | ||
| 94 | true, bytes_per_pixel, regs.src_params.size_x, regs.src_params.size_y, 1, | ||
| 95 | regs.src_params.BlockHeight(), regs.src_params.BlockDepth()); | ||
| 96 | |||
| 94 | const std::size_t dst_size = regs.dst_pitch * regs.y_count; | 97 | const std::size_t dst_size = regs.dst_pitch * regs.y_count; |
| 95 | 98 | ||
| 96 | if (read_buffer.size() < src_size) { | 99 | if (read_buffer.size() < src_size) { |
| @@ -104,23 +107,23 @@ void MaxwellDMA::HandleCopy() { | |||
| 104 | memory_manager.ReadBlock(source, read_buffer.data(), src_size); | 107 | memory_manager.ReadBlock(source, read_buffer.data(), src_size); |
| 105 | memory_manager.ReadBlock(dest, write_buffer.data(), dst_size); | 108 | memory_manager.ReadBlock(dest, write_buffer.data(), dst_size); |
| 106 | 109 | ||
| 107 | Texture::UnswizzleSubrect(regs.x_count, regs.y_count, regs.dst_pitch, | 110 | Texture::UnswizzleSubrect( |
| 108 | regs.src_params.size_x, src_bytes_per_pixel, read_buffer.data(), | 111 | regs.x_count, regs.y_count, regs.dst_pitch, regs.src_params.size_x, bytes_per_pixel, |
| 109 | write_buffer.data(), regs.src_params.BlockHeight(), | 112 | read_buffer.data() + src_layer_size * regs.src_params.pos_z, write_buffer.data(), |
| 110 | regs.src_params.pos_x, regs.src_params.pos_y); | 113 | regs.src_params.BlockHeight(), regs.src_params.pos_x, regs.src_params.pos_y); |
| 111 | 114 | ||
| 112 | memory_manager.WriteBlock(dest, write_buffer.data(), dst_size); | 115 | memory_manager.WriteBlock(dest, write_buffer.data(), dst_size); |
| 113 | } else { | 116 | } else { |
| 114 | ASSERT(regs.dst_params.BlockDepth() == 0); | 117 | ASSERT(regs.dst_params.BlockDepth() == 0); |
| 115 | 118 | ||
| 116 | const u32 src_bytes_per_pixel = regs.src_pitch / regs.x_count; | 119 | const u32 bytes_per_pixel = regs.src_pitch / regs.x_count; |
| 117 | 120 | ||
| 118 | const std::size_t dst_size = Texture::CalculateSize( | 121 | const std::size_t dst_size = Texture::CalculateSize( |
| 119 | true, src_bytes_per_pixel, regs.dst_params.size_x, regs.dst_params.size_y, | 122 | true, bytes_per_pixel, regs.dst_params.size_x, regs.dst_params.size_y, |
| 120 | regs.dst_params.size_z, regs.dst_params.BlockHeight(), regs.dst_params.BlockDepth()); | 123 | regs.dst_params.size_z, regs.dst_params.BlockHeight(), regs.dst_params.BlockDepth()); |
| 121 | 124 | ||
| 122 | const std::size_t dst_layer_size = Texture::CalculateSize( | 125 | const std::size_t dst_layer_size = Texture::CalculateSize( |
| 123 | true, src_bytes_per_pixel, regs.dst_params.size_x, regs.dst_params.size_y, 1, | 126 | true, bytes_per_pixel, regs.dst_params.size_x, regs.dst_params.size_y, 1, |
| 124 | regs.dst_params.BlockHeight(), regs.dst_params.BlockDepth()); | 127 | regs.dst_params.BlockHeight(), regs.dst_params.BlockDepth()); |
| 125 | 128 | ||
| 126 | const std::size_t src_size = regs.src_pitch * regs.y_count; | 129 | const std::size_t src_size = regs.src_pitch * regs.y_count; |
| @@ -133,14 +136,19 @@ void MaxwellDMA::HandleCopy() { | |||
| 133 | write_buffer.resize(dst_size); | 136 | write_buffer.resize(dst_size); |
| 134 | } | 137 | } |
| 135 | 138 | ||
| 136 | memory_manager.ReadBlock(source, read_buffer.data(), src_size); | 139 | if (Settings::values.use_accurate_gpu_emulation) { |
| 137 | memory_manager.ReadBlock(dest, write_buffer.data(), dst_size); | 140 | memory_manager.ReadBlock(source, read_buffer.data(), src_size); |
| 141 | memory_manager.ReadBlock(dest, write_buffer.data(), dst_size); | ||
| 142 | } else { | ||
| 143 | memory_manager.ReadBlockUnsafe(source, read_buffer.data(), src_size); | ||
| 144 | memory_manager.ReadBlockUnsafe(dest, write_buffer.data(), dst_size); | ||
| 145 | } | ||
| 138 | 146 | ||
| 139 | // If the input is linear and the output is tiled, swizzle the input and copy it over. | 147 | // If the input is linear and the output is tiled, swizzle the input and copy it over. |
| 140 | Texture::SwizzleSubrect(regs.x_count, regs.y_count, regs.src_pitch, regs.dst_params.size_x, | 148 | Texture::SwizzleSubrect( |
| 141 | src_bytes_per_pixel, | 149 | regs.x_count, regs.y_count, regs.src_pitch, regs.dst_params.size_x, bytes_per_pixel, |
| 142 | write_buffer.data() + dst_layer_size * regs.dst_params.pos_z, | 150 | write_buffer.data() + dst_layer_size * regs.dst_params.pos_z, read_buffer.data(), |
| 143 | read_buffer.data(), regs.dst_params.BlockHeight()); | 151 | regs.dst_params.BlockHeight(), regs.dst_params.pos_x, regs.dst_params.pos_y); |
| 144 | 152 | ||
| 145 | memory_manager.WriteBlock(dest, write_buffer.data(), dst_size); | 153 | memory_manager.WriteBlock(dest, write_buffer.data(), dst_size); |
| 146 | } | 154 | } |
diff --git a/src/video_core/engines/maxwell_dma.h b/src/video_core/engines/maxwell_dma.h index 17b015ca7..93808a9bb 100644 --- a/src/video_core/engines/maxwell_dma.h +++ b/src/video_core/engines/maxwell_dma.h | |||
| @@ -20,10 +20,6 @@ namespace Tegra { | |||
| 20 | class MemoryManager; | 20 | class MemoryManager; |
| 21 | } | 21 | } |
| 22 | 22 | ||
| 23 | namespace VideoCore { | ||
| 24 | class RasterizerInterface; | ||
| 25 | } | ||
| 26 | |||
| 27 | namespace Tegra::Engines { | 23 | namespace Tegra::Engines { |
| 28 | 24 | ||
| 29 | /** | 25 | /** |
| @@ -33,8 +29,7 @@ namespace Tegra::Engines { | |||
| 33 | 29 | ||
| 34 | class MaxwellDMA final { | 30 | class MaxwellDMA final { |
| 35 | public: | 31 | public: |
| 36 | explicit MaxwellDMA(Core::System& system, VideoCore::RasterizerInterface& rasterizer, | 32 | explicit MaxwellDMA(Core::System& system, MemoryManager& memory_manager); |
| 37 | MemoryManager& memory_manager); | ||
| 38 | ~MaxwellDMA() = default; | 33 | ~MaxwellDMA() = default; |
| 39 | 34 | ||
| 40 | /// Write the value to the register identified by method. | 35 | /// Write the value to the register identified by method. |
| @@ -180,8 +175,6 @@ public: | |||
| 180 | private: | 175 | private: |
| 181 | Core::System& system; | 176 | Core::System& system; |
| 182 | 177 | ||
| 183 | VideoCore::RasterizerInterface& rasterizer; | ||
| 184 | |||
| 185 | MemoryManager& memory_manager; | 178 | MemoryManager& memory_manager; |
| 186 | 179 | ||
| 187 | std::vector<u8> read_buffer; | 180 | std::vector<u8> read_buffer; |
diff --git a/src/video_core/engines/shader_bytecode.h b/src/video_core/engines/shader_bytecode.h index 083ee3304..c3678b9ea 100644 --- a/src/video_core/engines/shader_bytecode.h +++ b/src/video_core/engines/shader_bytecode.h | |||
| @@ -538,6 +538,12 @@ enum class PhysicalAttributeDirection : u64 { | |||
| 538 | Output = 1, | 538 | Output = 1, |
| 539 | }; | 539 | }; |
| 540 | 540 | ||
| 541 | enum class VoteOperation : u64 { | ||
| 542 | All = 0, // allThreadsNV | ||
| 543 | Any = 1, // anyThreadNV | ||
| 544 | Eq = 2, // allThreadsEqualNV | ||
| 545 | }; | ||
| 546 | |||
| 541 | union Instruction { | 547 | union Instruction { |
| 542 | Instruction& operator=(const Instruction& instr) { | 548 | Instruction& operator=(const Instruction& instr) { |
| 543 | value = instr.value; | 549 | value = instr.value; |
| @@ -565,6 +571,13 @@ union Instruction { | |||
| 565 | } nop; | 571 | } nop; |
| 566 | 572 | ||
| 567 | union { | 573 | union { |
| 574 | BitField<48, 2, VoteOperation> operation; | ||
| 575 | BitField<45, 3, u64> dest_pred; | ||
| 576 | BitField<39, 3, u64> value; | ||
| 577 | BitField<42, 1, u64> negate_value; | ||
| 578 | } vote; | ||
| 579 | |||
| 580 | union { | ||
| 568 | BitField<8, 8, Register> gpr; | 581 | BitField<8, 8, Register> gpr; |
| 569 | BitField<20, 24, s64> offset; | 582 | BitField<20, 24, s64> offset; |
| 570 | } gmem; | 583 | } gmem; |
| @@ -873,6 +886,7 @@ union Instruction { | |||
| 873 | union { | 886 | union { |
| 874 | BitField<0, 3, u64> pred0; | 887 | BitField<0, 3, u64> pred0; |
| 875 | BitField<3, 3, u64> pred3; | 888 | BitField<3, 3, u64> pred3; |
| 889 | BitField<6, 1, u64> neg_b; | ||
| 876 | BitField<7, 1, u64> abs_a; | 890 | BitField<7, 1, u64> abs_a; |
| 877 | BitField<39, 3, u64> pred39; | 891 | BitField<39, 3, u64> pred39; |
| 878 | BitField<42, 1, u64> neg_pred; | 892 | BitField<42, 1, u64> neg_pred; |
| @@ -1006,7 +1020,6 @@ union Instruction { | |||
| 1006 | } iset; | 1020 | } iset; |
| 1007 | 1021 | ||
| 1008 | union { | 1022 | union { |
| 1009 | BitField<41, 2, u64> selector; // i2i and i2f only | ||
| 1010 | BitField<45, 1, u64> negate_a; | 1023 | BitField<45, 1, u64> negate_a; |
| 1011 | BitField<49, 1, u64> abs_a; | 1024 | BitField<49, 1, u64> abs_a; |
| 1012 | BitField<10, 2, Register::Size> src_size; | 1025 | BitField<10, 2, Register::Size> src_size; |
| @@ -1023,8 +1036,6 @@ union Instruction { | |||
| 1023 | } f2i; | 1036 | } f2i; |
| 1024 | 1037 | ||
| 1025 | union { | 1038 | union { |
| 1026 | BitField<8, 2, Register::Size> src_size; | ||
| 1027 | BitField<10, 2, Register::Size> dst_size; | ||
| 1028 | BitField<39, 4, u64> rounding; | 1039 | BitField<39, 4, u64> rounding; |
| 1029 | // H0, H1 extract for F16 missing | 1040 | // H0, H1 extract for F16 missing |
| 1030 | BitField<41, 1, u64> selector; // Guessed as some games set it, TODO: reverse this value | 1041 | BitField<41, 1, u64> selector; // Guessed as some games set it, TODO: reverse this value |
| @@ -1034,6 +1045,13 @@ union Instruction { | |||
| 1034 | } | 1045 | } |
| 1035 | } f2f; | 1046 | } f2f; |
| 1036 | 1047 | ||
| 1048 | union { | ||
| 1049 | BitField<41, 2, u64> selector; | ||
| 1050 | } int_src; | ||
| 1051 | |||
| 1052 | union { | ||
| 1053 | BitField<41, 1, u64> selector; | ||
| 1054 | } float_src; | ||
| 1037 | } conversion; | 1055 | } conversion; |
| 1038 | 1056 | ||
| 1039 | union { | 1057 | union { |
| @@ -1489,6 +1507,7 @@ public: | |||
| 1489 | SYNC, | 1507 | SYNC, |
| 1490 | BRK, | 1508 | BRK, |
| 1491 | DEPBAR, | 1509 | DEPBAR, |
| 1510 | VOTE, | ||
| 1492 | BFE_C, | 1511 | BFE_C, |
| 1493 | BFE_R, | 1512 | BFE_R, |
| 1494 | BFE_IMM, | 1513 | BFE_IMM, |
| @@ -1651,6 +1670,7 @@ public: | |||
| 1651 | Hfma2, | 1670 | Hfma2, |
| 1652 | Flow, | 1671 | Flow, |
| 1653 | Synch, | 1672 | Synch, |
| 1673 | Warp, | ||
| 1654 | Memory, | 1674 | Memory, |
| 1655 | Texture, | 1675 | Texture, |
| 1656 | Image, | 1676 | Image, |
| @@ -1777,6 +1797,7 @@ private: | |||
| 1777 | INST("111000110100---", Id::BRK, Type::Flow, "BRK"), | 1797 | INST("111000110100---", Id::BRK, Type::Flow, "BRK"), |
| 1778 | INST("111000110000----", Id::EXIT, Type::Flow, "EXIT"), | 1798 | INST("111000110000----", Id::EXIT, Type::Flow, "EXIT"), |
| 1779 | INST("1111000011110---", Id::DEPBAR, Type::Synch, "DEPBAR"), | 1799 | INST("1111000011110---", Id::DEPBAR, Type::Synch, "DEPBAR"), |
| 1800 | INST("0101000011011---", Id::VOTE, Type::Warp, "VOTE"), | ||
| 1780 | INST("1110111111011---", Id::LD_A, Type::Memory, "LD_A"), | 1801 | INST("1110111111011---", Id::LD_A, Type::Memory, "LD_A"), |
| 1781 | INST("1110111101001---", Id::LD_S, Type::Memory, "LD_S"), | 1802 | INST("1110111101001---", Id::LD_S, Type::Memory, "LD_S"), |
| 1782 | INST("1110111101000---", Id::LD_L, Type::Memory, "LD_L"), | 1803 | INST("1110111101000---", Id::LD_L, Type::Memory, "LD_L"), |
diff --git a/src/video_core/gpu.cpp b/src/video_core/gpu.cpp index 3006d8059..2c47541cb 100644 --- a/src/video_core/gpu.cpp +++ b/src/video_core/gpu.cpp | |||
| @@ -23,9 +23,9 @@ GPU::GPU(Core::System& system, VideoCore::RendererBase& renderer, bool is_async) | |||
| 23 | memory_manager = std::make_unique<Tegra::MemoryManager>(system, rasterizer); | 23 | memory_manager = std::make_unique<Tegra::MemoryManager>(system, rasterizer); |
| 24 | dma_pusher = std::make_unique<Tegra::DmaPusher>(*this); | 24 | dma_pusher = std::make_unique<Tegra::DmaPusher>(*this); |
| 25 | maxwell_3d = std::make_unique<Engines::Maxwell3D>(system, rasterizer, *memory_manager); | 25 | maxwell_3d = std::make_unique<Engines::Maxwell3D>(system, rasterizer, *memory_manager); |
| 26 | fermi_2d = std::make_unique<Engines::Fermi2D>(rasterizer, *memory_manager); | 26 | fermi_2d = std::make_unique<Engines::Fermi2D>(rasterizer); |
| 27 | kepler_compute = std::make_unique<Engines::KeplerCompute>(system, rasterizer, *memory_manager); | 27 | kepler_compute = std::make_unique<Engines::KeplerCompute>(system, rasterizer, *memory_manager); |
| 28 | maxwell_dma = std::make_unique<Engines::MaxwellDMA>(system, rasterizer, *memory_manager); | 28 | maxwell_dma = std::make_unique<Engines::MaxwellDMA>(system, *memory_manager); |
| 29 | kepler_memory = std::make_unique<Engines::KeplerMemory>(system, *memory_manager); | 29 | kepler_memory = std::make_unique<Engines::KeplerMemory>(system, *memory_manager); |
| 30 | } | 30 | } |
| 31 | 31 | ||
| @@ -108,6 +108,10 @@ bool GPU::CancelSyncptInterrupt(const u32 syncpoint_id, const u32 value) { | |||
| 108 | return true; | 108 | return true; |
| 109 | } | 109 | } |
| 110 | 110 | ||
| 111 | void GPU::FlushCommands() { | ||
| 112 | renderer.Rasterizer().FlushCommands(); | ||
| 113 | } | ||
| 114 | |||
| 111 | u32 RenderTargetBytesPerPixel(RenderTargetFormat format) { | 115 | u32 RenderTargetBytesPerPixel(RenderTargetFormat format) { |
| 112 | ASSERT(format != RenderTargetFormat::NONE); | 116 | ASSERT(format != RenderTargetFormat::NONE); |
| 113 | 117 | ||
diff --git a/src/video_core/gpu.h b/src/video_core/gpu.h index 1a7f5bdf2..78bc0601a 100644 --- a/src/video_core/gpu.h +++ b/src/video_core/gpu.h | |||
| @@ -19,6 +19,10 @@ inline CacheAddr ToCacheAddr(const void* host_ptr) { | |||
| 19 | return reinterpret_cast<CacheAddr>(host_ptr); | 19 | return reinterpret_cast<CacheAddr>(host_ptr); |
| 20 | } | 20 | } |
| 21 | 21 | ||
| 22 | inline u8* FromCacheAddr(CacheAddr cache_addr) { | ||
| 23 | return reinterpret_cast<u8*>(cache_addr); | ||
| 24 | } | ||
| 25 | |||
| 22 | namespace Core { | 26 | namespace Core { |
| 23 | class System; | 27 | class System; |
| 24 | } | 28 | } |
| @@ -149,6 +153,8 @@ public: | |||
| 149 | /// Calls a GPU method. | 153 | /// Calls a GPU method. |
| 150 | void CallMethod(const MethodCall& method_call); | 154 | void CallMethod(const MethodCall& method_call); |
| 151 | 155 | ||
| 156 | void FlushCommands(); | ||
| 157 | |||
| 152 | /// Returns a reference to the Maxwell3D GPU engine. | 158 | /// Returns a reference to the Maxwell3D GPU engine. |
| 153 | Engines::Maxwell3D& Maxwell3D(); | 159 | Engines::Maxwell3D& Maxwell3D(); |
| 154 | 160 | ||
| @@ -274,8 +280,8 @@ private: | |||
| 274 | 280 | ||
| 275 | protected: | 281 | protected: |
| 276 | std::unique_ptr<Tegra::DmaPusher> dma_pusher; | 282 | std::unique_ptr<Tegra::DmaPusher> dma_pusher; |
| 277 | VideoCore::RendererBase& renderer; | ||
| 278 | Core::System& system; | 283 | Core::System& system; |
| 284 | VideoCore::RendererBase& renderer; | ||
| 279 | 285 | ||
| 280 | private: | 286 | private: |
| 281 | std::unique_ptr<Tegra::MemoryManager> memory_manager; | 287 | std::unique_ptr<Tegra::MemoryManager> memory_manager; |
diff --git a/src/video_core/rasterizer_interface.h b/src/video_core/rasterizer_interface.h index 9881df0d5..6b3f2d50a 100644 --- a/src/video_core/rasterizer_interface.h +++ b/src/video_core/rasterizer_interface.h | |||
| @@ -50,6 +50,9 @@ public: | |||
| 50 | /// and invalidated | 50 | /// and invalidated |
| 51 | virtual void FlushAndInvalidateRegion(CacheAddr addr, u64 size) = 0; | 51 | virtual void FlushAndInvalidateRegion(CacheAddr addr, u64 size) = 0; |
| 52 | 52 | ||
| 53 | /// Notify the rasterizer to send all written commands to the host GPU. | ||
| 54 | virtual void FlushCommands() = 0; | ||
| 55 | |||
| 53 | /// Notify rasterizer that a frame is about to finish | 56 | /// Notify rasterizer that a frame is about to finish |
| 54 | virtual void TickFrame() = 0; | 57 | virtual void TickFrame() = 0; |
| 55 | 58 | ||
diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.cpp b/src/video_core/renderer_opengl/gl_buffer_cache.cpp index 2a9b523f5..f8a807c84 100644 --- a/src/video_core/renderer_opengl/gl_buffer_cache.cpp +++ b/src/video_core/renderer_opengl/gl_buffer_cache.cpp | |||
| @@ -7,28 +7,41 @@ | |||
| 7 | #include <glad/glad.h> | 7 | #include <glad/glad.h> |
| 8 | 8 | ||
| 9 | #include "common/assert.h" | 9 | #include "common/assert.h" |
| 10 | #include "common/microprofile.h" | ||
| 11 | #include "video_core/rasterizer_interface.h" | ||
| 10 | #include "video_core/renderer_opengl/gl_buffer_cache.h" | 12 | #include "video_core/renderer_opengl/gl_buffer_cache.h" |
| 11 | #include "video_core/renderer_opengl/gl_rasterizer.h" | 13 | #include "video_core/renderer_opengl/gl_rasterizer.h" |
| 12 | #include "video_core/renderer_opengl/gl_resource_manager.h" | 14 | #include "video_core/renderer_opengl/gl_resource_manager.h" |
| 13 | 15 | ||
| 14 | namespace OpenGL { | 16 | namespace OpenGL { |
| 15 | 17 | ||
| 18 | MICROPROFILE_DEFINE(OpenGL_Buffer_Download, "OpenGL", "Buffer Download", MP_RGB(192, 192, 128)); | ||
| 19 | |||
| 20 | CachedBufferBlock::CachedBufferBlock(CacheAddr cache_addr, const std::size_t size) | ||
| 21 | : VideoCommon::BufferBlock{cache_addr, size} { | ||
| 22 | gl_buffer.Create(); | ||
| 23 | glNamedBufferData(gl_buffer.handle, static_cast<GLsizeiptr>(size), nullptr, GL_DYNAMIC_DRAW); | ||
| 24 | } | ||
| 25 | |||
| 26 | CachedBufferBlock::~CachedBufferBlock() = default; | ||
| 27 | |||
| 16 | OGLBufferCache::OGLBufferCache(RasterizerOpenGL& rasterizer, Core::System& system, | 28 | OGLBufferCache::OGLBufferCache(RasterizerOpenGL& rasterizer, Core::System& system, |
| 17 | std::size_t stream_size) | 29 | std::size_t stream_size) |
| 18 | : VideoCommon::BufferCache<OGLBuffer, GLuint, OGLStreamBuffer>{ | 30 | : VideoCommon::BufferCache<Buffer, GLuint, OGLStreamBuffer>{ |
| 19 | rasterizer, system, std::make_unique<OGLStreamBuffer>(stream_size, true)} {} | 31 | rasterizer, system, std::make_unique<OGLStreamBuffer>(stream_size, true)} {} |
| 20 | 32 | ||
| 21 | OGLBufferCache::~OGLBufferCache() = default; | 33 | OGLBufferCache::~OGLBufferCache() = default; |
| 22 | 34 | ||
| 23 | OGLBuffer OGLBufferCache::CreateBuffer(std::size_t size) { | 35 | Buffer OGLBufferCache::CreateBlock(CacheAddr cache_addr, std::size_t size) { |
| 24 | OGLBuffer buffer; | 36 | return std::make_shared<CachedBufferBlock>(cache_addr, size); |
| 25 | buffer.Create(); | 37 | } |
| 26 | glNamedBufferData(buffer.handle, static_cast<GLsizeiptr>(size), nullptr, GL_DYNAMIC_DRAW); | 38 | |
| 27 | return buffer; | 39 | void OGLBufferCache::WriteBarrier() { |
| 40 | glMemoryBarrier(GL_ALL_BARRIER_BITS); | ||
| 28 | } | 41 | } |
| 29 | 42 | ||
| 30 | const GLuint* OGLBufferCache::ToHandle(const OGLBuffer& buffer) { | 43 | const GLuint* OGLBufferCache::ToHandle(const Buffer& buffer) { |
| 31 | return &buffer.handle; | 44 | return buffer->GetHandle(); |
| 32 | } | 45 | } |
| 33 | 46 | ||
| 34 | const GLuint* OGLBufferCache::GetEmptyBuffer(std::size_t) { | 47 | const GLuint* OGLBufferCache::GetEmptyBuffer(std::size_t) { |
| @@ -36,23 +49,24 @@ const GLuint* OGLBufferCache::GetEmptyBuffer(std::size_t) { | |||
| 36 | return &null_buffer; | 49 | return &null_buffer; |
| 37 | } | 50 | } |
| 38 | 51 | ||
| 39 | void OGLBufferCache::UploadBufferData(const OGLBuffer& buffer, std::size_t offset, std::size_t size, | 52 | void OGLBufferCache::UploadBlockData(const Buffer& buffer, std::size_t offset, std::size_t size, |
| 40 | const u8* data) { | 53 | const u8* data) { |
| 41 | glNamedBufferSubData(buffer.handle, static_cast<GLintptr>(offset), | 54 | glNamedBufferSubData(*buffer->GetHandle(), static_cast<GLintptr>(offset), |
| 42 | static_cast<GLsizeiptr>(size), data); | 55 | static_cast<GLsizeiptr>(size), data); |
| 43 | } | 56 | } |
| 44 | 57 | ||
| 45 | void OGLBufferCache::DownloadBufferData(const OGLBuffer& buffer, std::size_t offset, | 58 | void OGLBufferCache::DownloadBlockData(const Buffer& buffer, std::size_t offset, std::size_t size, |
| 46 | std::size_t size, u8* data) { | 59 | u8* data) { |
| 47 | glGetNamedBufferSubData(buffer.handle, static_cast<GLintptr>(offset), | 60 | MICROPROFILE_SCOPE(OpenGL_Buffer_Download); |
| 61 | glGetNamedBufferSubData(*buffer->GetHandle(), static_cast<GLintptr>(offset), | ||
| 48 | static_cast<GLsizeiptr>(size), data); | 62 | static_cast<GLsizeiptr>(size), data); |
| 49 | } | 63 | } |
| 50 | 64 | ||
| 51 | void OGLBufferCache::CopyBufferData(const OGLBuffer& src, const OGLBuffer& dst, | 65 | void OGLBufferCache::CopyBlock(const Buffer& src, const Buffer& dst, std::size_t src_offset, |
| 52 | std::size_t src_offset, std::size_t dst_offset, | 66 | std::size_t dst_offset, std::size_t size) { |
| 53 | std::size_t size) { | 67 | glCopyNamedBufferSubData(*src->GetHandle(), *dst->GetHandle(), |
| 54 | glCopyNamedBufferSubData(src.handle, dst.handle, static_cast<GLintptr>(src_offset), | 68 | static_cast<GLintptr>(src_offset), static_cast<GLintptr>(dst_offset), |
| 55 | static_cast<GLintptr>(dst_offset), static_cast<GLsizeiptr>(size)); | 69 | static_cast<GLsizeiptr>(size)); |
| 56 | } | 70 | } |
| 57 | 71 | ||
| 58 | } // namespace OpenGL | 72 | } // namespace OpenGL |
diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.h b/src/video_core/renderer_opengl/gl_buffer_cache.h index 8c8ac4038..022e7bfa9 100644 --- a/src/video_core/renderer_opengl/gl_buffer_cache.h +++ b/src/video_core/renderer_opengl/gl_buffer_cache.h | |||
| @@ -7,7 +7,7 @@ | |||
| 7 | #include <memory> | 7 | #include <memory> |
| 8 | 8 | ||
| 9 | #include "common/common_types.h" | 9 | #include "common/common_types.h" |
| 10 | #include "video_core/buffer_cache.h" | 10 | #include "video_core/buffer_cache/buffer_cache.h" |
| 11 | #include "video_core/rasterizer_cache.h" | 11 | #include "video_core/rasterizer_cache.h" |
| 12 | #include "video_core/renderer_opengl/gl_resource_manager.h" | 12 | #include "video_core/renderer_opengl/gl_resource_manager.h" |
| 13 | #include "video_core/renderer_opengl/gl_stream_buffer.h" | 13 | #include "video_core/renderer_opengl/gl_stream_buffer.h" |
| @@ -21,7 +21,24 @@ namespace OpenGL { | |||
| 21 | class OGLStreamBuffer; | 21 | class OGLStreamBuffer; |
| 22 | class RasterizerOpenGL; | 22 | class RasterizerOpenGL; |
| 23 | 23 | ||
| 24 | class OGLBufferCache final : public VideoCommon::BufferCache<OGLBuffer, GLuint, OGLStreamBuffer> { | 24 | class CachedBufferBlock; |
| 25 | |||
| 26 | using Buffer = std::shared_ptr<CachedBufferBlock>; | ||
| 27 | |||
| 28 | class CachedBufferBlock : public VideoCommon::BufferBlock { | ||
| 29 | public: | ||
| 30 | explicit CachedBufferBlock(CacheAddr cache_addr, const std::size_t size); | ||
| 31 | ~CachedBufferBlock(); | ||
| 32 | |||
| 33 | const GLuint* GetHandle() const { | ||
| 34 | return &gl_buffer.handle; | ||
| 35 | } | ||
| 36 | |||
| 37 | private: | ||
| 38 | OGLBuffer gl_buffer{}; | ||
| 39 | }; | ||
| 40 | |||
| 41 | class OGLBufferCache final : public VideoCommon::BufferCache<Buffer, GLuint, OGLStreamBuffer> { | ||
| 25 | public: | 42 | public: |
| 26 | explicit OGLBufferCache(RasterizerOpenGL& rasterizer, Core::System& system, | 43 | explicit OGLBufferCache(RasterizerOpenGL& rasterizer, Core::System& system, |
| 27 | std::size_t stream_size); | 44 | std::size_t stream_size); |
| @@ -30,18 +47,20 @@ public: | |||
| 30 | const GLuint* GetEmptyBuffer(std::size_t) override; | 47 | const GLuint* GetEmptyBuffer(std::size_t) override; |
| 31 | 48 | ||
| 32 | protected: | 49 | protected: |
| 33 | OGLBuffer CreateBuffer(std::size_t size) override; | 50 | Buffer CreateBlock(CacheAddr cache_addr, std::size_t size) override; |
| 51 | |||
| 52 | void WriteBarrier() override; | ||
| 34 | 53 | ||
| 35 | const GLuint* ToHandle(const OGLBuffer& buffer) override; | 54 | const GLuint* ToHandle(const Buffer& buffer) override; |
| 36 | 55 | ||
| 37 | void UploadBufferData(const OGLBuffer& buffer, std::size_t offset, std::size_t size, | 56 | void UploadBlockData(const Buffer& buffer, std::size_t offset, std::size_t size, |
| 38 | const u8* data) override; | 57 | const u8* data) override; |
| 39 | 58 | ||
| 40 | void DownloadBufferData(const OGLBuffer& buffer, std::size_t offset, std::size_t size, | 59 | void DownloadBlockData(const Buffer& buffer, std::size_t offset, std::size_t size, |
| 41 | u8* data) override; | 60 | u8* data) override; |
| 42 | 61 | ||
| 43 | void CopyBufferData(const OGLBuffer& src, const OGLBuffer& dst, std::size_t src_offset, | 62 | void CopyBlock(const Buffer& src, const Buffer& dst, std::size_t src_offset, |
| 44 | std::size_t dst_offset, std::size_t size) override; | 63 | std::size_t dst_offset, std::size_t size) override; |
| 45 | }; | 64 | }; |
| 46 | 65 | ||
| 47 | } // namespace OpenGL | 66 | } // namespace OpenGL |
diff --git a/src/video_core/renderer_opengl/gl_device.cpp b/src/video_core/renderer_opengl/gl_device.cpp index 85424a4c9..03d434b28 100644 --- a/src/video_core/renderer_opengl/gl_device.cpp +++ b/src/video_core/renderer_opengl/gl_device.cpp | |||
| @@ -27,6 +27,8 @@ Device::Device() { | |||
| 27 | shader_storage_alignment = GetInteger<std::size_t>(GL_SHADER_STORAGE_BUFFER_OFFSET_ALIGNMENT); | 27 | shader_storage_alignment = GetInteger<std::size_t>(GL_SHADER_STORAGE_BUFFER_OFFSET_ALIGNMENT); |
| 28 | max_vertex_attributes = GetInteger<u32>(GL_MAX_VERTEX_ATTRIBS); | 28 | max_vertex_attributes = GetInteger<u32>(GL_MAX_VERTEX_ATTRIBS); |
| 29 | max_varyings = GetInteger<u32>(GL_MAX_VARYING_VECTORS); | 29 | max_varyings = GetInteger<u32>(GL_MAX_VARYING_VECTORS); |
| 30 | has_warp_intrinsics = GLAD_GL_NV_gpu_shader5 && GLAD_GL_NV_shader_thread_group && | ||
| 31 | GLAD_GL_NV_shader_thread_shuffle; | ||
| 30 | has_vertex_viewport_layer = GLAD_GL_ARB_shader_viewport_layer_array; | 32 | has_vertex_viewport_layer = GLAD_GL_ARB_shader_viewport_layer_array; |
| 31 | has_variable_aoffi = TestVariableAoffi(); | 33 | has_variable_aoffi = TestVariableAoffi(); |
| 32 | has_component_indexing_bug = TestComponentIndexingBug(); | 34 | has_component_indexing_bug = TestComponentIndexingBug(); |
| @@ -36,6 +38,7 @@ Device::Device(std::nullptr_t) { | |||
| 36 | uniform_buffer_alignment = 0; | 38 | uniform_buffer_alignment = 0; |
| 37 | max_vertex_attributes = 16; | 39 | max_vertex_attributes = 16; |
| 38 | max_varyings = 15; | 40 | max_varyings = 15; |
| 41 | has_warp_intrinsics = true; | ||
| 39 | has_vertex_viewport_layer = true; | 42 | has_vertex_viewport_layer = true; |
| 40 | has_variable_aoffi = true; | 43 | has_variable_aoffi = true; |
| 41 | has_component_indexing_bug = false; | 44 | has_component_indexing_bug = false; |
diff --git a/src/video_core/renderer_opengl/gl_device.h b/src/video_core/renderer_opengl/gl_device.h index dc883722d..3ef7c6dd8 100644 --- a/src/video_core/renderer_opengl/gl_device.h +++ b/src/video_core/renderer_opengl/gl_device.h | |||
| @@ -30,6 +30,10 @@ public: | |||
| 30 | return max_varyings; | 30 | return max_varyings; |
| 31 | } | 31 | } |
| 32 | 32 | ||
| 33 | bool HasWarpIntrinsics() const { | ||
| 34 | return has_warp_intrinsics; | ||
| 35 | } | ||
| 36 | |||
| 33 | bool HasVertexViewportLayer() const { | 37 | bool HasVertexViewportLayer() const { |
| 34 | return has_vertex_viewport_layer; | 38 | return has_vertex_viewport_layer; |
| 35 | } | 39 | } |
| @@ -50,6 +54,7 @@ private: | |||
| 50 | std::size_t shader_storage_alignment{}; | 54 | std::size_t shader_storage_alignment{}; |
| 51 | u32 max_vertex_attributes{}; | 55 | u32 max_vertex_attributes{}; |
| 52 | u32 max_varyings{}; | 56 | u32 max_varyings{}; |
| 57 | bool has_warp_intrinsics{}; | ||
| 53 | bool has_vertex_viewport_layer{}; | 58 | bool has_vertex_viewport_layer{}; |
| 54 | bool has_variable_aoffi{}; | 59 | bool has_variable_aoffi{}; |
| 55 | bool has_component_indexing_bug{}; | 60 | bool has_component_indexing_bug{}; |
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index c28ae795c..bb09ecd52 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp | |||
| @@ -708,8 +708,6 @@ void RasterizerOpenGL::DrawArrays() { | |||
| 708 | return; | 708 | return; |
| 709 | } | 709 | } |
| 710 | 710 | ||
| 711 | const auto& regs = gpu.regs; | ||
| 712 | |||
| 713 | SyncColorMask(); | 711 | SyncColorMask(); |
| 714 | SyncFragmentColorClampState(); | 712 | SyncFragmentColorClampState(); |
| 715 | SyncMultiSampleState(); | 713 | SyncMultiSampleState(); |
| @@ -863,6 +861,10 @@ void RasterizerOpenGL::FlushAndInvalidateRegion(CacheAddr addr, u64 size) { | |||
| 863 | InvalidateRegion(addr, size); | 861 | InvalidateRegion(addr, size); |
| 864 | } | 862 | } |
| 865 | 863 | ||
| 864 | void RasterizerOpenGL::FlushCommands() { | ||
| 865 | glFlush(); | ||
| 866 | } | ||
| 867 | |||
| 866 | void RasterizerOpenGL::TickFrame() { | 868 | void RasterizerOpenGL::TickFrame() { |
| 867 | buffer_cache.TickFrame(); | 869 | buffer_cache.TickFrame(); |
| 868 | } | 870 | } |
| @@ -976,7 +978,7 @@ void RasterizerOpenGL::SetupGlobalMemory(const GLShader::GlobalMemoryEntry& entr | |||
| 976 | GPUVAddr gpu_addr, std::size_t size) { | 978 | GPUVAddr gpu_addr, std::size_t size) { |
| 977 | const auto alignment{device.GetShaderStorageBufferAlignment()}; | 979 | const auto alignment{device.GetShaderStorageBufferAlignment()}; |
| 978 | const auto [ssbo, buffer_offset] = | 980 | const auto [ssbo, buffer_offset] = |
| 979 | buffer_cache.UploadMemory(gpu_addr, size, alignment, true, entry.IsWritten()); | 981 | buffer_cache.UploadMemory(gpu_addr, size, alignment, entry.IsWritten()); |
| 980 | bind_ssbo_pushbuffer.Push(ssbo, buffer_offset, static_cast<GLsizeiptr>(size)); | 982 | bind_ssbo_pushbuffer.Push(ssbo, buffer_offset, static_cast<GLsizeiptr>(size)); |
| 981 | } | 983 | } |
| 982 | 984 | ||
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h index 8b123c48d..9d20a4fbf 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.h +++ b/src/video_core/renderer_opengl/gl_rasterizer.h | |||
| @@ -63,6 +63,7 @@ public: | |||
| 63 | void FlushRegion(CacheAddr addr, u64 size) override; | 63 | void FlushRegion(CacheAddr addr, u64 size) override; |
| 64 | void InvalidateRegion(CacheAddr addr, u64 size) override; | 64 | void InvalidateRegion(CacheAddr addr, u64 size) override; |
| 65 | void FlushAndInvalidateRegion(CacheAddr addr, u64 size) override; | 65 | void FlushAndInvalidateRegion(CacheAddr addr, u64 size) override; |
| 66 | void FlushCommands() override; | ||
| 66 | void TickFrame() override; | 67 | void TickFrame() override; |
| 67 | bool AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Regs::Surface& src, | 68 | bool AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Regs::Surface& src, |
| 68 | const Tegra::Engines::Fermi2D::Regs::Surface& dst, | 69 | const Tegra::Engines::Fermi2D::Regs::Surface& dst, |
diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index 1c90facc3..cf6a5cddf 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp | |||
| @@ -212,7 +212,9 @@ CachedProgram SpecializeShader(const std::string& code, const GLShader::ShaderEn | |||
| 212 | const auto texture_buffer_usage{variant.texture_buffer_usage}; | 212 | const auto texture_buffer_usage{variant.texture_buffer_usage}; |
| 213 | 213 | ||
| 214 | std::string source = "#version 430 core\n" | 214 | std::string source = "#version 430 core\n" |
| 215 | "#extension GL_ARB_separate_shader_objects : enable\n"; | 215 | "#extension GL_ARB_separate_shader_objects : enable\n" |
| 216 | "#extension GL_NV_gpu_shader5 : enable\n" | ||
| 217 | "#extension GL_NV_shader_thread_group : enable\n"; | ||
| 216 | if (entries.shader_viewport_layer_array) { | 218 | if (entries.shader_viewport_layer_array) { |
| 217 | source += "#extension GL_ARB_shader_viewport_layer_array : enable\n"; | 219 | source += "#extension GL_ARB_shader_viewport_layer_array : enable\n"; |
| 218 | } | 220 | } |
| @@ -247,20 +249,24 @@ CachedProgram SpecializeShader(const std::string& code, const GLShader::ShaderEn | |||
| 247 | if (!texture_buffer_usage.test(i)) { | 249 | if (!texture_buffer_usage.test(i)) { |
| 248 | continue; | 250 | continue; |
| 249 | } | 251 | } |
| 250 | source += fmt::format("#define SAMPLER_{}_IS_BUFFER", i); | 252 | source += fmt::format("#define SAMPLER_{}_IS_BUFFER\n", i); |
| 253 | } | ||
| 254 | if (texture_buffer_usage.any()) { | ||
| 255 | source += '\n'; | ||
| 251 | } | 256 | } |
| 252 | 257 | ||
| 253 | if (program_type == ProgramType::Geometry) { | 258 | if (program_type == ProgramType::Geometry) { |
| 254 | const auto [glsl_topology, debug_name, max_vertices] = | 259 | const auto [glsl_topology, debug_name, max_vertices] = |
| 255 | GetPrimitiveDescription(primitive_mode); | 260 | GetPrimitiveDescription(primitive_mode); |
| 256 | 261 | ||
| 257 | source += "layout (" + std::string(glsl_topology) + ") in;\n"; | 262 | source += "layout (" + std::string(glsl_topology) + ") in;\n\n"; |
| 258 | source += "#define MAX_VERTEX_INPUT " + std::to_string(max_vertices) + '\n'; | 263 | source += "#define MAX_VERTEX_INPUT " + std::to_string(max_vertices) + '\n'; |
| 259 | } | 264 | } |
| 260 | if (program_type == ProgramType::Compute) { | 265 | if (program_type == ProgramType::Compute) { |
| 261 | source += "layout (local_size_variable) in;\n"; | 266 | source += "layout (local_size_variable) in;\n"; |
| 262 | } | 267 | } |
| 263 | 268 | ||
| 269 | source += '\n'; | ||
| 264 | source += code; | 270 | source += code; |
| 265 | 271 | ||
| 266 | OGLShader shader; | 272 | OGLShader shader; |
| @@ -289,7 +295,7 @@ std::set<GLenum> GetSupportedFormats() { | |||
| 289 | 295 | ||
| 290 | CachedShader::CachedShader(const ShaderParameters& params, ProgramType program_type, | 296 | CachedShader::CachedShader(const ShaderParameters& params, ProgramType program_type, |
| 291 | GLShader::ProgramResult result) | 297 | GLShader::ProgramResult result) |
| 292 | : RasterizerCacheObject{params.host_ptr}, host_ptr{params.host_ptr}, cpu_addr{params.cpu_addr}, | 298 | : RasterizerCacheObject{params.host_ptr}, cpu_addr{params.cpu_addr}, |
| 293 | unique_identifier{params.unique_identifier}, program_type{program_type}, | 299 | unique_identifier{params.unique_identifier}, program_type{program_type}, |
| 294 | disk_cache{params.disk_cache}, precompiled_programs{params.precompiled_programs}, | 300 | disk_cache{params.disk_cache}, precompiled_programs{params.precompiled_programs}, |
| 295 | entries{result.second}, code{std::move(result.first)}, shader_length{entries.shader_length} {} | 301 | entries{result.second}, code{std::move(result.first)}, shader_length{entries.shader_length} {} |
diff --git a/src/video_core/renderer_opengl/gl_shader_cache.h b/src/video_core/renderer_opengl/gl_shader_cache.h index a3106a0ff..2c8faf855 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.h +++ b/src/video_core/renderer_opengl/gl_shader_cache.h | |||
| @@ -106,7 +106,6 @@ private: | |||
| 106 | 106 | ||
| 107 | ShaderDiskCacheUsage GetUsage(const ProgramVariant& variant) const; | 107 | ShaderDiskCacheUsage GetUsage(const ProgramVariant& variant) const; |
| 108 | 108 | ||
| 109 | u8* host_ptr{}; | ||
| 110 | VAddr cpu_addr{}; | 109 | VAddr cpu_addr{}; |
| 111 | u64 unique_identifier{}; | 110 | u64 unique_identifier{}; |
| 112 | ProgramType program_type{}; | 111 | ProgramType program_type{}; |
diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp index ffe26b241..359d58cbe 100644 --- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp +++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp | |||
| @@ -565,7 +565,7 @@ private: | |||
| 565 | case Tegra::Shader::ImageType::Texture1D: | 565 | case Tegra::Shader::ImageType::Texture1D: |
| 566 | return "image1D"; | 566 | return "image1D"; |
| 567 | case Tegra::Shader::ImageType::TextureBuffer: | 567 | case Tegra::Shader::ImageType::TextureBuffer: |
| 568 | return "bufferImage"; | 568 | return "imageBuffer"; |
| 569 | case Tegra::Shader::ImageType::Texture1DArray: | 569 | case Tegra::Shader::ImageType::Texture1DArray: |
| 570 | return "image1DArray"; | 570 | return "image1DArray"; |
| 571 | case Tegra::Shader::ImageType::Texture2D: | 571 | case Tegra::Shader::ImageType::Texture2D: |
| @@ -1136,6 +1136,16 @@ private: | |||
| 1136 | Type::Float); | 1136 | Type::Float); |
| 1137 | } | 1137 | } |
| 1138 | 1138 | ||
| 1139 | std::string FCastHalf0(Operation operation) { | ||
| 1140 | const std::string op_a = VisitOperand(operation, 0, Type::HalfFloat); | ||
| 1141 | return fmt::format("({})[0]", op_a); | ||
| 1142 | } | ||
| 1143 | |||
| 1144 | std::string FCastHalf1(Operation operation) { | ||
| 1145 | const std::string op_a = VisitOperand(operation, 0, Type::HalfFloat); | ||
| 1146 | return fmt::format("({})[1]", op_a); | ||
| 1147 | } | ||
| 1148 | |||
| 1139 | template <Type type> | 1149 | template <Type type> |
| 1140 | std::string Min(Operation operation) { | 1150 | std::string Min(Operation operation) { |
| 1141 | return GenerateBinaryCall(operation, "min", type, type, type); | 1151 | return GenerateBinaryCall(operation, "min", type, type, type); |
| @@ -1292,6 +1302,11 @@ private: | |||
| 1292 | return ApplyPrecise(operation, BitwiseCastResult(clamped, Type::HalfFloat)); | 1302 | return ApplyPrecise(operation, BitwiseCastResult(clamped, Type::HalfFloat)); |
| 1293 | } | 1303 | } |
| 1294 | 1304 | ||
| 1305 | std::string HCastFloat(Operation operation) { | ||
| 1306 | const std::string op_a = VisitOperand(operation, 0, Type::Float); | ||
| 1307 | return fmt::format("fromHalf2(vec2({}, 0.0f))", op_a); | ||
| 1308 | } | ||
| 1309 | |||
| 1295 | std::string HUnpack(Operation operation) { | 1310 | std::string HUnpack(Operation operation) { |
| 1296 | const std::string operand{VisitOperand(operation, 0, Type::HalfFloat)}; | 1311 | const std::string operand{VisitOperand(operation, 0, Type::HalfFloat)}; |
| 1297 | const auto value = [&]() -> std::string { | 1312 | const auto value = [&]() -> std::string { |
| @@ -1720,6 +1735,48 @@ private: | |||
| 1720 | return "utof(gl_WorkGroupID"s + GetSwizzle(element) + ')'; | 1735 | return "utof(gl_WorkGroupID"s + GetSwizzle(element) + ')'; |
| 1721 | } | 1736 | } |
| 1722 | 1737 | ||
| 1738 | std::string BallotThread(Operation operation) { | ||
| 1739 | const std::string value = VisitOperand(operation, 0, Type::Bool); | ||
| 1740 | if (!device.HasWarpIntrinsics()) { | ||
| 1741 | LOG_ERROR(Render_OpenGL, | ||
| 1742 | "Nvidia warp intrinsics are not available and its required by a shader"); | ||
| 1743 | // Stub on non-Nvidia devices by simulating all threads voting the same as the active | ||
| 1744 | // one. | ||
| 1745 | return fmt::format("utof({} ? 0xFFFFFFFFU : 0U)", value); | ||
| 1746 | } | ||
| 1747 | return fmt::format("utof(ballotThreadNV({}))", value); | ||
| 1748 | } | ||
| 1749 | |||
| 1750 | std::string Vote(Operation operation, const char* func) { | ||
| 1751 | const std::string value = VisitOperand(operation, 0, Type::Bool); | ||
| 1752 | if (!device.HasWarpIntrinsics()) { | ||
| 1753 | LOG_ERROR(Render_OpenGL, | ||
| 1754 | "Nvidia vote intrinsics are not available and its required by a shader"); | ||
| 1755 | // Stub with a warp size of one. | ||
| 1756 | return value; | ||
| 1757 | } | ||
| 1758 | return fmt::format("{}({})", func, value); | ||
| 1759 | } | ||
| 1760 | |||
| 1761 | std::string VoteAll(Operation operation) { | ||
| 1762 | return Vote(operation, "allThreadsNV"); | ||
| 1763 | } | ||
| 1764 | |||
| 1765 | std::string VoteAny(Operation operation) { | ||
| 1766 | return Vote(operation, "anyThreadNV"); | ||
| 1767 | } | ||
| 1768 | |||
| 1769 | std::string VoteEqual(Operation operation) { | ||
| 1770 | if (!device.HasWarpIntrinsics()) { | ||
| 1771 | LOG_ERROR(Render_OpenGL, | ||
| 1772 | "Nvidia vote intrinsics are not available and its required by a shader"); | ||
| 1773 | // We must return true here since a stub for a theoretical warp size of 1 will always | ||
| 1774 | // return an equal result for all its votes. | ||
| 1775 | return "true"; | ||
| 1776 | } | ||
| 1777 | return Vote(operation, "allThreadsEqualNV"); | ||
| 1778 | } | ||
| 1779 | |||
| 1723 | static constexpr std::array operation_decompilers = { | 1780 | static constexpr std::array operation_decompilers = { |
| 1724 | &GLSLDecompiler::Assign, | 1781 | &GLSLDecompiler::Assign, |
| 1725 | 1782 | ||
| @@ -1732,6 +1789,8 @@ private: | |||
| 1732 | &GLSLDecompiler::Negate<Type::Float>, | 1789 | &GLSLDecompiler::Negate<Type::Float>, |
| 1733 | &GLSLDecompiler::Absolute<Type::Float>, | 1790 | &GLSLDecompiler::Absolute<Type::Float>, |
| 1734 | &GLSLDecompiler::FClamp, | 1791 | &GLSLDecompiler::FClamp, |
| 1792 | &GLSLDecompiler::FCastHalf0, | ||
| 1793 | &GLSLDecompiler::FCastHalf1, | ||
| 1735 | &GLSLDecompiler::Min<Type::Float>, | 1794 | &GLSLDecompiler::Min<Type::Float>, |
| 1736 | &GLSLDecompiler::Max<Type::Float>, | 1795 | &GLSLDecompiler::Max<Type::Float>, |
| 1737 | &GLSLDecompiler::FCos, | 1796 | &GLSLDecompiler::FCos, |
| @@ -1792,6 +1851,7 @@ private: | |||
| 1792 | &GLSLDecompiler::Absolute<Type::HalfFloat>, | 1851 | &GLSLDecompiler::Absolute<Type::HalfFloat>, |
| 1793 | &GLSLDecompiler::HNegate, | 1852 | &GLSLDecompiler::HNegate, |
| 1794 | &GLSLDecompiler::HClamp, | 1853 | &GLSLDecompiler::HClamp, |
| 1854 | &GLSLDecompiler::HCastFloat, | ||
| 1795 | &GLSLDecompiler::HUnpack, | 1855 | &GLSLDecompiler::HUnpack, |
| 1796 | &GLSLDecompiler::HMergeF32, | 1856 | &GLSLDecompiler::HMergeF32, |
| 1797 | &GLSLDecompiler::HMergeH0, | 1857 | &GLSLDecompiler::HMergeH0, |
| @@ -1867,6 +1927,11 @@ private: | |||
| 1867 | &GLSLDecompiler::WorkGroupId<0>, | 1927 | &GLSLDecompiler::WorkGroupId<0>, |
| 1868 | &GLSLDecompiler::WorkGroupId<1>, | 1928 | &GLSLDecompiler::WorkGroupId<1>, |
| 1869 | &GLSLDecompiler::WorkGroupId<2>, | 1929 | &GLSLDecompiler::WorkGroupId<2>, |
| 1930 | |||
| 1931 | &GLSLDecompiler::BallotThread, | ||
| 1932 | &GLSLDecompiler::VoteAll, | ||
| 1933 | &GLSLDecompiler::VoteAny, | ||
| 1934 | &GLSLDecompiler::VoteEqual, | ||
| 1870 | }; | 1935 | }; |
| 1871 | static_assert(operation_decompilers.size() == static_cast<std::size_t>(OperationCode::Amount)); | 1936 | static_assert(operation_decompilers.size() == static_cast<std::size_t>(OperationCode::Amount)); |
| 1872 | 1937 | ||
diff --git a/src/video_core/renderer_opengl/gl_texture_cache.cpp b/src/video_core/renderer_opengl/gl_texture_cache.cpp index 408332f90..4f135fe03 100644 --- a/src/video_core/renderer_opengl/gl_texture_cache.cpp +++ b/src/video_core/renderer_opengl/gl_texture_cache.cpp | |||
| @@ -184,6 +184,9 @@ GLint GetSwizzleSource(SwizzleSource source) { | |||
| 184 | } | 184 | } |
| 185 | 185 | ||
| 186 | void ApplyTextureDefaults(const SurfaceParams& params, GLuint texture) { | 186 | void ApplyTextureDefaults(const SurfaceParams& params, GLuint texture) { |
| 187 | if (params.IsBuffer()) { | ||
| 188 | return; | ||
| 189 | } | ||
| 187 | glTextureParameteri(texture, GL_TEXTURE_MIN_FILTER, GL_LINEAR); | 190 | glTextureParameteri(texture, GL_TEXTURE_MIN_FILTER, GL_LINEAR); |
| 188 | glTextureParameteri(texture, GL_TEXTURE_MAG_FILTER, GL_LINEAR); | 191 | glTextureParameteri(texture, GL_TEXTURE_MAG_FILTER, GL_LINEAR); |
| 189 | glTextureParameteri(texture, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE); | 192 | glTextureParameteri(texture, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE); |
| @@ -208,6 +211,7 @@ OGLTexture CreateTexture(const SurfaceParams& params, GLenum target, GLenum inte | |||
| 208 | glNamedBufferStorage(texture_buffer.handle, params.width * params.GetBytesPerPixel(), | 211 | glNamedBufferStorage(texture_buffer.handle, params.width * params.GetBytesPerPixel(), |
| 209 | nullptr, GL_DYNAMIC_STORAGE_BIT); | 212 | nullptr, GL_DYNAMIC_STORAGE_BIT); |
| 210 | glTextureBuffer(texture.handle, internal_format, texture_buffer.handle); | 213 | glTextureBuffer(texture.handle, internal_format, texture_buffer.handle); |
| 214 | break; | ||
| 211 | case SurfaceTarget::Texture2D: | 215 | case SurfaceTarget::Texture2D: |
| 212 | case SurfaceTarget::TextureCubemap: | 216 | case SurfaceTarget::TextureCubemap: |
| 213 | glTextureStorage2D(texture.handle, params.emulated_levels, internal_format, params.width, | 217 | glTextureStorage2D(texture.handle, params.emulated_levels, internal_format, params.width, |
diff --git a/src/video_core/renderer_opengl/gl_texture_cache.h b/src/video_core/renderer_opengl/gl_texture_cache.h index ff6ab6988..21324488a 100644 --- a/src/video_core/renderer_opengl/gl_texture_cache.h +++ b/src/video_core/renderer_opengl/gl_texture_cache.h | |||
| @@ -51,7 +51,7 @@ public: | |||
| 51 | } | 51 | } |
| 52 | 52 | ||
| 53 | protected: | 53 | protected: |
| 54 | void DecorateSurfaceName(); | 54 | void DecorateSurfaceName() override; |
| 55 | 55 | ||
| 56 | View CreateView(const ViewParams& view_key) override; | 56 | View CreateView(const ViewParams& view_key) override; |
| 57 | View CreateViewInner(const ViewParams& view_key, bool is_proxy); | 57 | View CreateViewInner(const ViewParams& view_key, bool is_proxy); |
diff --git a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp index d267712c9..a35b45c9c 100644 --- a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp +++ b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp | |||
| @@ -735,6 +735,16 @@ private: | |||
| 735 | return {}; | 735 | return {}; |
| 736 | } | 736 | } |
| 737 | 737 | ||
| 738 | Id FCastHalf0(Operation operation) { | ||
| 739 | UNIMPLEMENTED(); | ||
| 740 | return {}; | ||
| 741 | } | ||
| 742 | |||
| 743 | Id FCastHalf1(Operation operation) { | ||
| 744 | UNIMPLEMENTED(); | ||
| 745 | return {}; | ||
| 746 | } | ||
| 747 | |||
| 738 | Id HNegate(Operation operation) { | 748 | Id HNegate(Operation operation) { |
| 739 | UNIMPLEMENTED(); | 749 | UNIMPLEMENTED(); |
| 740 | return {}; | 750 | return {}; |
| @@ -745,6 +755,11 @@ private: | |||
| 745 | return {}; | 755 | return {}; |
| 746 | } | 756 | } |
| 747 | 757 | ||
| 758 | Id HCastFloat(Operation operation) { | ||
| 759 | UNIMPLEMENTED(); | ||
| 760 | return {}; | ||
| 761 | } | ||
| 762 | |||
| 748 | Id HUnpack(Operation operation) { | 763 | Id HUnpack(Operation operation) { |
| 749 | UNIMPLEMENTED(); | 764 | UNIMPLEMENTED(); |
| 750 | return {}; | 765 | return {}; |
| @@ -1057,6 +1072,26 @@ private: | |||
| 1057 | return {}; | 1072 | return {}; |
| 1058 | } | 1073 | } |
| 1059 | 1074 | ||
| 1075 | Id BallotThread(Operation) { | ||
| 1076 | UNIMPLEMENTED(); | ||
| 1077 | return {}; | ||
| 1078 | } | ||
| 1079 | |||
| 1080 | Id VoteAll(Operation) { | ||
| 1081 | UNIMPLEMENTED(); | ||
| 1082 | return {}; | ||
| 1083 | } | ||
| 1084 | |||
| 1085 | Id VoteAny(Operation) { | ||
| 1086 | UNIMPLEMENTED(); | ||
| 1087 | return {}; | ||
| 1088 | } | ||
| 1089 | |||
| 1090 | Id VoteEqual(Operation) { | ||
| 1091 | UNIMPLEMENTED(); | ||
| 1092 | return {}; | ||
| 1093 | } | ||
| 1094 | |||
| 1060 | Id DeclareBuiltIn(spv::BuiltIn builtin, spv::StorageClass storage, Id type, | 1095 | Id DeclareBuiltIn(spv::BuiltIn builtin, spv::StorageClass storage, Id type, |
| 1061 | const std::string& name) { | 1096 | const std::string& name) { |
| 1062 | const Id id = OpVariable(type, storage); | 1097 | const Id id = OpVariable(type, storage); |
| @@ -1210,6 +1245,8 @@ private: | |||
| 1210 | &SPIRVDecompiler::Unary<&Module::OpFNegate, Type::Float>, | 1245 | &SPIRVDecompiler::Unary<&Module::OpFNegate, Type::Float>, |
| 1211 | &SPIRVDecompiler::Unary<&Module::OpFAbs, Type::Float>, | 1246 | &SPIRVDecompiler::Unary<&Module::OpFAbs, Type::Float>, |
| 1212 | &SPIRVDecompiler::Ternary<&Module::OpFClamp, Type::Float>, | 1247 | &SPIRVDecompiler::Ternary<&Module::OpFClamp, Type::Float>, |
| 1248 | &SPIRVDecompiler::FCastHalf0, | ||
| 1249 | &SPIRVDecompiler::FCastHalf1, | ||
| 1213 | &SPIRVDecompiler::Binary<&Module::OpFMin, Type::Float>, | 1250 | &SPIRVDecompiler::Binary<&Module::OpFMin, Type::Float>, |
| 1214 | &SPIRVDecompiler::Binary<&Module::OpFMax, Type::Float>, | 1251 | &SPIRVDecompiler::Binary<&Module::OpFMax, Type::Float>, |
| 1215 | &SPIRVDecompiler::Unary<&Module::OpCos, Type::Float>, | 1252 | &SPIRVDecompiler::Unary<&Module::OpCos, Type::Float>, |
| @@ -1270,6 +1307,7 @@ private: | |||
| 1270 | &SPIRVDecompiler::Unary<&Module::OpFAbs, Type::HalfFloat>, | 1307 | &SPIRVDecompiler::Unary<&Module::OpFAbs, Type::HalfFloat>, |
| 1271 | &SPIRVDecompiler::HNegate, | 1308 | &SPIRVDecompiler::HNegate, |
| 1272 | &SPIRVDecompiler::HClamp, | 1309 | &SPIRVDecompiler::HClamp, |
| 1310 | &SPIRVDecompiler::HCastFloat, | ||
| 1273 | &SPIRVDecompiler::HUnpack, | 1311 | &SPIRVDecompiler::HUnpack, |
| 1274 | &SPIRVDecompiler::HMergeF32, | 1312 | &SPIRVDecompiler::HMergeF32, |
| 1275 | &SPIRVDecompiler::HMergeH0, | 1313 | &SPIRVDecompiler::HMergeH0, |
| @@ -1346,6 +1384,11 @@ private: | |||
| 1346 | &SPIRVDecompiler::WorkGroupId<0>, | 1384 | &SPIRVDecompiler::WorkGroupId<0>, |
| 1347 | &SPIRVDecompiler::WorkGroupId<1>, | 1385 | &SPIRVDecompiler::WorkGroupId<1>, |
| 1348 | &SPIRVDecompiler::WorkGroupId<2>, | 1386 | &SPIRVDecompiler::WorkGroupId<2>, |
| 1387 | |||
| 1388 | &SPIRVDecompiler::BallotThread, | ||
| 1389 | &SPIRVDecompiler::VoteAll, | ||
| 1390 | &SPIRVDecompiler::VoteAny, | ||
| 1391 | &SPIRVDecompiler::VoteEqual, | ||
| 1349 | }; | 1392 | }; |
| 1350 | static_assert(operation_decompilers.size() == static_cast<std::size_t>(OperationCode::Amount)); | 1393 | static_assert(operation_decompilers.size() == static_cast<std::size_t>(OperationCode::Amount)); |
| 1351 | 1394 | ||
diff --git a/src/video_core/shader/decode.cpp b/src/video_core/shader/decode.cpp index b547d8323..47a9fd961 100644 --- a/src/video_core/shader/decode.cpp +++ b/src/video_core/shader/decode.cpp | |||
| @@ -176,6 +176,7 @@ u32 ShaderIR::DecodeInstr(NodeBlock& bb, u32 pc) { | |||
| 176 | {OpCode::Type::Ffma, &ShaderIR::DecodeFfma}, | 176 | {OpCode::Type::Ffma, &ShaderIR::DecodeFfma}, |
| 177 | {OpCode::Type::Hfma2, &ShaderIR::DecodeHfma2}, | 177 | {OpCode::Type::Hfma2, &ShaderIR::DecodeHfma2}, |
| 178 | {OpCode::Type::Conversion, &ShaderIR::DecodeConversion}, | 178 | {OpCode::Type::Conversion, &ShaderIR::DecodeConversion}, |
| 179 | {OpCode::Type::Warp, &ShaderIR::DecodeWarp}, | ||
| 179 | {OpCode::Type::Memory, &ShaderIR::DecodeMemory}, | 180 | {OpCode::Type::Memory, &ShaderIR::DecodeMemory}, |
| 180 | {OpCode::Type::Texture, &ShaderIR::DecodeTexture}, | 181 | {OpCode::Type::Texture, &ShaderIR::DecodeTexture}, |
| 181 | {OpCode::Type::Image, &ShaderIR::DecodeImage}, | 182 | {OpCode::Type::Image, &ShaderIR::DecodeImage}, |
diff --git a/src/video_core/shader/decode/conversion.cpp b/src/video_core/shader/decode/conversion.cpp index 4221f0c58..32facd6ba 100644 --- a/src/video_core/shader/decode/conversion.cpp +++ b/src/video_core/shader/decode/conversion.cpp | |||
| @@ -14,6 +14,12 @@ using Tegra::Shader::Instruction; | |||
| 14 | using Tegra::Shader::OpCode; | 14 | using Tegra::Shader::OpCode; |
| 15 | using Tegra::Shader::Register; | 15 | using Tegra::Shader::Register; |
| 16 | 16 | ||
| 17 | namespace { | ||
| 18 | constexpr OperationCode GetFloatSelector(u64 selector) { | ||
| 19 | return selector == 0 ? OperationCode::FCastHalf0 : OperationCode::FCastHalf1; | ||
| 20 | } | ||
| 21 | } // Anonymous namespace | ||
| 22 | |||
| 17 | u32 ShaderIR::DecodeConversion(NodeBlock& bb, u32 pc) { | 23 | u32 ShaderIR::DecodeConversion(NodeBlock& bb, u32 pc) { |
| 18 | const Instruction instr = {program_code[pc]}; | 24 | const Instruction instr = {program_code[pc]}; |
| 19 | const auto opcode = OpCode::Decode(instr); | 25 | const auto opcode = OpCode::Decode(instr); |
| @@ -22,7 +28,7 @@ u32 ShaderIR::DecodeConversion(NodeBlock& bb, u32 pc) { | |||
| 22 | case OpCode::Id::I2I_R: | 28 | case OpCode::Id::I2I_R: |
| 23 | case OpCode::Id::I2I_C: | 29 | case OpCode::Id::I2I_C: |
| 24 | case OpCode::Id::I2I_IMM: { | 30 | case OpCode::Id::I2I_IMM: { |
| 25 | UNIMPLEMENTED_IF(instr.conversion.selector); | 31 | UNIMPLEMENTED_IF(instr.conversion.int_src.selector != 0); |
| 26 | UNIMPLEMENTED_IF(instr.conversion.dst_size != Register::Size::Word); | 32 | UNIMPLEMENTED_IF(instr.conversion.dst_size != Register::Size::Word); |
| 27 | UNIMPLEMENTED_IF(instr.alu.saturate_d); | 33 | UNIMPLEMENTED_IF(instr.alu.saturate_d); |
| 28 | 34 | ||
| @@ -57,8 +63,8 @@ u32 ShaderIR::DecodeConversion(NodeBlock& bb, u32 pc) { | |||
| 57 | case OpCode::Id::I2F_R: | 63 | case OpCode::Id::I2F_R: |
| 58 | case OpCode::Id::I2F_C: | 64 | case OpCode::Id::I2F_C: |
| 59 | case OpCode::Id::I2F_IMM: { | 65 | case OpCode::Id::I2F_IMM: { |
| 60 | UNIMPLEMENTED_IF(instr.conversion.dst_size != Register::Size::Word); | 66 | UNIMPLEMENTED_IF(instr.conversion.int_src.selector != 0); |
| 61 | UNIMPLEMENTED_IF(instr.conversion.selector); | 67 | UNIMPLEMENTED_IF(instr.conversion.dst_size == Register::Size::Long); |
| 62 | UNIMPLEMENTED_IF_MSG(instr.generates_cc, | 68 | UNIMPLEMENTED_IF_MSG(instr.generates_cc, |
| 63 | "Condition codes generation in I2F is not implemented"); | 69 | "Condition codes generation in I2F is not implemented"); |
| 64 | 70 | ||
| @@ -82,14 +88,19 @@ u32 ShaderIR::DecodeConversion(NodeBlock& bb, u32 pc) { | |||
| 82 | value = GetOperandAbsNegFloat(value, false, instr.conversion.negate_a); | 88 | value = GetOperandAbsNegFloat(value, false, instr.conversion.negate_a); |
| 83 | 89 | ||
| 84 | SetInternalFlagsFromFloat(bb, value, instr.generates_cc); | 90 | SetInternalFlagsFromFloat(bb, value, instr.generates_cc); |
| 91 | |||
| 92 | if (instr.conversion.dst_size == Register::Size::Short) { | ||
| 93 | value = Operation(OperationCode::HCastFloat, PRECISE, value); | ||
| 94 | } | ||
| 95 | |||
| 85 | SetRegister(bb, instr.gpr0, value); | 96 | SetRegister(bb, instr.gpr0, value); |
| 86 | break; | 97 | break; |
| 87 | } | 98 | } |
| 88 | case OpCode::Id::F2F_R: | 99 | case OpCode::Id::F2F_R: |
| 89 | case OpCode::Id::F2F_C: | 100 | case OpCode::Id::F2F_C: |
| 90 | case OpCode::Id::F2F_IMM: { | 101 | case OpCode::Id::F2F_IMM: { |
| 91 | UNIMPLEMENTED_IF(instr.conversion.f2f.dst_size != Register::Size::Word); | 102 | UNIMPLEMENTED_IF(instr.conversion.dst_size == Register::Size::Long); |
| 92 | UNIMPLEMENTED_IF(instr.conversion.f2f.src_size != Register::Size::Word); | 103 | UNIMPLEMENTED_IF(instr.conversion.src_size == Register::Size::Long); |
| 93 | UNIMPLEMENTED_IF_MSG(instr.generates_cc, | 104 | UNIMPLEMENTED_IF_MSG(instr.generates_cc, |
| 94 | "Condition codes generation in F2F is not implemented"); | 105 | "Condition codes generation in F2F is not implemented"); |
| 95 | 106 | ||
| @@ -107,6 +118,13 @@ u32 ShaderIR::DecodeConversion(NodeBlock& bb, u32 pc) { | |||
| 107 | } | 118 | } |
| 108 | }(); | 119 | }(); |
| 109 | 120 | ||
| 121 | if (instr.conversion.src_size == Register::Size::Short) { | ||
| 122 | value = Operation(GetFloatSelector(instr.conversion.float_src.selector), NO_PRECISE, | ||
| 123 | std::move(value)); | ||
| 124 | } else { | ||
| 125 | ASSERT(instr.conversion.float_src.selector == 0); | ||
| 126 | } | ||
| 127 | |||
| 110 | value = GetOperandAbsNegFloat(value, instr.conversion.abs_a, instr.conversion.negate_a); | 128 | value = GetOperandAbsNegFloat(value, instr.conversion.abs_a, instr.conversion.negate_a); |
| 111 | 129 | ||
| 112 | value = [&]() { | 130 | value = [&]() { |
| @@ -124,19 +142,24 @@ u32 ShaderIR::DecodeConversion(NodeBlock& bb, u32 pc) { | |||
| 124 | default: | 142 | default: |
| 125 | UNIMPLEMENTED_MSG("Unimplemented F2F rounding mode {}", | 143 | UNIMPLEMENTED_MSG("Unimplemented F2F rounding mode {}", |
| 126 | static_cast<u32>(instr.conversion.f2f.rounding.Value())); | 144 | static_cast<u32>(instr.conversion.f2f.rounding.Value())); |
| 127 | return Immediate(0); | 145 | return value; |
| 128 | } | 146 | } |
| 129 | }(); | 147 | }(); |
| 130 | value = GetSaturatedFloat(value, instr.alu.saturate_d); | 148 | value = GetSaturatedFloat(value, instr.alu.saturate_d); |
| 131 | 149 | ||
| 132 | SetInternalFlagsFromFloat(bb, value, instr.generates_cc); | 150 | SetInternalFlagsFromFloat(bb, value, instr.generates_cc); |
| 151 | |||
| 152 | if (instr.conversion.dst_size == Register::Size::Short) { | ||
| 153 | value = Operation(OperationCode::HCastFloat, PRECISE, value); | ||
| 154 | } | ||
| 155 | |||
| 133 | SetRegister(bb, instr.gpr0, value); | 156 | SetRegister(bb, instr.gpr0, value); |
| 134 | break; | 157 | break; |
| 135 | } | 158 | } |
| 136 | case OpCode::Id::F2I_R: | 159 | case OpCode::Id::F2I_R: |
| 137 | case OpCode::Id::F2I_C: | 160 | case OpCode::Id::F2I_C: |
| 138 | case OpCode::Id::F2I_IMM: { | 161 | case OpCode::Id::F2I_IMM: { |
| 139 | UNIMPLEMENTED_IF(instr.conversion.src_size != Register::Size::Word); | 162 | UNIMPLEMENTED_IF(instr.conversion.src_size == Register::Size::Long); |
| 140 | UNIMPLEMENTED_IF_MSG(instr.generates_cc, | 163 | UNIMPLEMENTED_IF_MSG(instr.generates_cc, |
| 141 | "Condition codes generation in F2I is not implemented"); | 164 | "Condition codes generation in F2I is not implemented"); |
| 142 | Node value = [&]() { | 165 | Node value = [&]() { |
| @@ -153,6 +176,13 @@ u32 ShaderIR::DecodeConversion(NodeBlock& bb, u32 pc) { | |||
| 153 | } | 176 | } |
| 154 | }(); | 177 | }(); |
| 155 | 178 | ||
| 179 | if (instr.conversion.src_size == Register::Size::Short) { | ||
| 180 | value = Operation(GetFloatSelector(instr.conversion.float_src.selector), NO_PRECISE, | ||
| 181 | std::move(value)); | ||
| 182 | } else { | ||
| 183 | ASSERT(instr.conversion.float_src.selector == 0); | ||
| 184 | } | ||
| 185 | |||
| 156 | value = GetOperandAbsNegFloat(value, instr.conversion.abs_a, instr.conversion.negate_a); | 186 | value = GetOperandAbsNegFloat(value, instr.conversion.abs_a, instr.conversion.negate_a); |
| 157 | 187 | ||
| 158 | value = [&]() { | 188 | value = [&]() { |
diff --git a/src/video_core/shader/decode/float_set.cpp b/src/video_core/shader/decode/float_set.cpp index f5013e44a..5614e8a0d 100644 --- a/src/video_core/shader/decode/float_set.cpp +++ b/src/video_core/shader/decode/float_set.cpp | |||
| @@ -15,7 +15,6 @@ using Tegra::Shader::OpCode; | |||
| 15 | 15 | ||
| 16 | u32 ShaderIR::DecodeFloatSet(NodeBlock& bb, u32 pc) { | 16 | u32 ShaderIR::DecodeFloatSet(NodeBlock& bb, u32 pc) { |
| 17 | const Instruction instr = {program_code[pc]}; | 17 | const Instruction instr = {program_code[pc]}; |
| 18 | const auto opcode = OpCode::Decode(instr); | ||
| 19 | 18 | ||
| 20 | const Node op_a = GetOperandAbsNegFloat(GetRegister(instr.gpr8), instr.fset.abs_a != 0, | 19 | const Node op_a = GetOperandAbsNegFloat(GetRegister(instr.gpr8), instr.fset.abs_a != 0, |
| 21 | instr.fset.neg_a != 0); | 20 | instr.fset.neg_a != 0); |
diff --git a/src/video_core/shader/decode/float_set_predicate.cpp b/src/video_core/shader/decode/float_set_predicate.cpp index 2323052b0..200c2c983 100644 --- a/src/video_core/shader/decode/float_set_predicate.cpp +++ b/src/video_core/shader/decode/float_set_predicate.cpp | |||
| @@ -16,10 +16,9 @@ using Tegra::Shader::Pred; | |||
| 16 | 16 | ||
| 17 | u32 ShaderIR::DecodeFloatSetPredicate(NodeBlock& bb, u32 pc) { | 17 | u32 ShaderIR::DecodeFloatSetPredicate(NodeBlock& bb, u32 pc) { |
| 18 | const Instruction instr = {program_code[pc]}; | 18 | const Instruction instr = {program_code[pc]}; |
| 19 | const auto opcode = OpCode::Decode(instr); | ||
| 20 | 19 | ||
| 21 | const Node op_a = GetOperandAbsNegFloat(GetRegister(instr.gpr8), instr.fsetp.abs_a != 0, | 20 | Node op_a = GetOperandAbsNegFloat(GetRegister(instr.gpr8), instr.fsetp.abs_a != 0, |
| 22 | instr.fsetp.neg_a != 0); | 21 | instr.fsetp.neg_a != 0); |
| 23 | Node op_b = [&]() { | 22 | Node op_b = [&]() { |
| 24 | if (instr.is_b_imm) { | 23 | if (instr.is_b_imm) { |
| 25 | return GetImmediate19(instr); | 24 | return GetImmediate19(instr); |
| @@ -29,12 +28,13 @@ u32 ShaderIR::DecodeFloatSetPredicate(NodeBlock& bb, u32 pc) { | |||
| 29 | return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset()); | 28 | return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset()); |
| 30 | } | 29 | } |
| 31 | }(); | 30 | }(); |
| 32 | op_b = GetOperandAbsNegFloat(op_b, instr.fsetp.abs_b, false); | 31 | op_b = GetOperandAbsNegFloat(std::move(op_b), instr.fsetp.abs_b, instr.fsetp.neg_b); |
| 33 | 32 | ||
| 34 | // We can't use the constant predicate as destination. | 33 | // We can't use the constant predicate as destination. |
| 35 | ASSERT(instr.fsetp.pred3 != static_cast<u64>(Pred::UnusedIndex)); | 34 | ASSERT(instr.fsetp.pred3 != static_cast<u64>(Pred::UnusedIndex)); |
| 36 | 35 | ||
| 37 | const Node predicate = GetPredicateComparisonFloat(instr.fsetp.cond, op_a, op_b); | 36 | const Node predicate = |
| 37 | GetPredicateComparisonFloat(instr.fsetp.cond, std::move(op_a), std::move(op_b)); | ||
| 38 | const Node second_pred = GetPredicate(instr.fsetp.pred39, instr.fsetp.neg_pred != 0); | 38 | const Node second_pred = GetPredicate(instr.fsetp.pred39, instr.fsetp.neg_pred != 0); |
| 39 | 39 | ||
| 40 | const OperationCode combiner = GetPredicateCombiner(instr.fsetp.op); | 40 | const OperationCode combiner = GetPredicateCombiner(instr.fsetp.op); |
diff --git a/src/video_core/shader/decode/half_set_predicate.cpp b/src/video_core/shader/decode/half_set_predicate.cpp index a6c082cc9..afea33e5f 100644 --- a/src/video_core/shader/decode/half_set_predicate.cpp +++ b/src/video_core/shader/decode/half_set_predicate.cpp | |||
| @@ -30,7 +30,7 @@ u32 ShaderIR::DecodeHalfSetPredicate(NodeBlock& bb, u32 pc) { | |||
| 30 | case OpCode::Id::HSETP2_C: | 30 | case OpCode::Id::HSETP2_C: |
| 31 | cond = instr.hsetp2.cbuf_and_imm.cond; | 31 | cond = instr.hsetp2.cbuf_and_imm.cond; |
| 32 | h_and = instr.hsetp2.cbuf_and_imm.h_and; | 32 | h_and = instr.hsetp2.cbuf_and_imm.h_and; |
| 33 | op_b = GetOperandAbsNegHalf(GetConstBuffer(instr.cbuf34.index, instr.cbuf34.offset), | 33 | op_b = GetOperandAbsNegHalf(GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset()), |
| 34 | instr.hsetp2.cbuf.abs_b, instr.hsetp2.cbuf.negate_b); | 34 | instr.hsetp2.cbuf.abs_b, instr.hsetp2.cbuf.negate_b); |
| 35 | break; | 35 | break; |
| 36 | case OpCode::Id::HSETP2_IMM: | 36 | case OpCode::Id::HSETP2_IMM: |
diff --git a/src/video_core/shader/decode/integer_set.cpp b/src/video_core/shader/decode/integer_set.cpp index 46e3d5905..59809bcd8 100644 --- a/src/video_core/shader/decode/integer_set.cpp +++ b/src/video_core/shader/decode/integer_set.cpp | |||
| @@ -14,7 +14,6 @@ using Tegra::Shader::OpCode; | |||
| 14 | 14 | ||
| 15 | u32 ShaderIR::DecodeIntegerSet(NodeBlock& bb, u32 pc) { | 15 | u32 ShaderIR::DecodeIntegerSet(NodeBlock& bb, u32 pc) { |
| 16 | const Instruction instr = {program_code[pc]}; | 16 | const Instruction instr = {program_code[pc]}; |
| 17 | const auto opcode = OpCode::Decode(instr); | ||
| 18 | 17 | ||
| 19 | const Node op_a = GetRegister(instr.gpr8); | 18 | const Node op_a = GetRegister(instr.gpr8); |
| 20 | const Node op_b = [&]() { | 19 | const Node op_b = [&]() { |
diff --git a/src/video_core/shader/decode/integer_set_predicate.cpp b/src/video_core/shader/decode/integer_set_predicate.cpp index dd20775d7..25e48fef8 100644 --- a/src/video_core/shader/decode/integer_set_predicate.cpp +++ b/src/video_core/shader/decode/integer_set_predicate.cpp | |||
| @@ -16,7 +16,6 @@ using Tegra::Shader::Pred; | |||
| 16 | 16 | ||
| 17 | u32 ShaderIR::DecodeIntegerSetPredicate(NodeBlock& bb, u32 pc) { | 17 | u32 ShaderIR::DecodeIntegerSetPredicate(NodeBlock& bb, u32 pc) { |
| 18 | const Instruction instr = {program_code[pc]}; | 18 | const Instruction instr = {program_code[pc]}; |
| 19 | const auto opcode = OpCode::Decode(instr); | ||
| 20 | 19 | ||
| 21 | const Node op_a = GetRegister(instr.gpr8); | 20 | const Node op_a = GetRegister(instr.gpr8); |
| 22 | 21 | ||
diff --git a/src/video_core/shader/decode/other.cpp b/src/video_core/shader/decode/other.cpp index ac0e764d6..d46e0f823 100644 --- a/src/video_core/shader/decode/other.cpp +++ b/src/video_core/shader/decode/other.cpp | |||
| @@ -74,6 +74,13 @@ u32 ShaderIR::DecodeOther(NodeBlock& bb, u32 pc) { | |||
| 74 | case SystemVariable::InvocationInfo: | 74 | case SystemVariable::InvocationInfo: |
| 75 | LOG_WARNING(HW_GPU, "MOV_SYS instruction with InvocationInfo is incomplete"); | 75 | LOG_WARNING(HW_GPU, "MOV_SYS instruction with InvocationInfo is incomplete"); |
| 76 | return Immediate(0u); | 76 | return Immediate(0u); |
| 77 | case SystemVariable::Tid: { | ||
| 78 | Node value = Immediate(0); | ||
| 79 | value = BitfieldInsert(value, Operation(OperationCode::LocalInvocationIdX), 0, 9); | ||
| 80 | value = BitfieldInsert(value, Operation(OperationCode::LocalInvocationIdY), 16, 9); | ||
| 81 | value = BitfieldInsert(value, Operation(OperationCode::LocalInvocationIdZ), 26, 5); | ||
| 82 | return value; | ||
| 83 | } | ||
| 77 | case SystemVariable::TidX: | 84 | case SystemVariable::TidX: |
| 78 | return Operation(OperationCode::LocalInvocationIdX); | 85 | return Operation(OperationCode::LocalInvocationIdX); |
| 79 | case SystemVariable::TidY: | 86 | case SystemVariable::TidY: |
diff --git a/src/video_core/shader/decode/predicate_set_register.cpp b/src/video_core/shader/decode/predicate_set_register.cpp index febbfeb50..84dbc50fe 100644 --- a/src/video_core/shader/decode/predicate_set_register.cpp +++ b/src/video_core/shader/decode/predicate_set_register.cpp | |||
| @@ -15,7 +15,6 @@ using Tegra::Shader::OpCode; | |||
| 15 | 15 | ||
| 16 | u32 ShaderIR::DecodePredicateSetRegister(NodeBlock& bb, u32 pc) { | 16 | u32 ShaderIR::DecodePredicateSetRegister(NodeBlock& bb, u32 pc) { |
| 17 | const Instruction instr = {program_code[pc]}; | 17 | const Instruction instr = {program_code[pc]}; |
| 18 | const auto opcode = OpCode::Decode(instr); | ||
| 19 | 18 | ||
| 20 | UNIMPLEMENTED_IF_MSG(instr.generates_cc, | 19 | UNIMPLEMENTED_IF_MSG(instr.generates_cc, |
| 21 | "Condition codes generation in PSET is not implemented"); | 20 | "Condition codes generation in PSET is not implemented"); |
diff --git a/src/video_core/shader/decode/warp.cpp b/src/video_core/shader/decode/warp.cpp new file mode 100644 index 000000000..04ca74f46 --- /dev/null +++ b/src/video_core/shader/decode/warp.cpp | |||
| @@ -0,0 +1,55 @@ | |||
| 1 | // Copyright 2019 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "common/assert.h" | ||
| 6 | #include "common/common_types.h" | ||
| 7 | #include "video_core/engines/shader_bytecode.h" | ||
| 8 | #include "video_core/shader/node_helper.h" | ||
| 9 | #include "video_core/shader/shader_ir.h" | ||
| 10 | |||
| 11 | namespace VideoCommon::Shader { | ||
| 12 | |||
| 13 | using Tegra::Shader::Instruction; | ||
| 14 | using Tegra::Shader::OpCode; | ||
| 15 | using Tegra::Shader::Pred; | ||
| 16 | using Tegra::Shader::VoteOperation; | ||
| 17 | |||
| 18 | namespace { | ||
| 19 | OperationCode GetOperationCode(VoteOperation vote_op) { | ||
| 20 | switch (vote_op) { | ||
| 21 | case VoteOperation::All: | ||
| 22 | return OperationCode::VoteAll; | ||
| 23 | case VoteOperation::Any: | ||
| 24 | return OperationCode::VoteAny; | ||
| 25 | case VoteOperation::Eq: | ||
| 26 | return OperationCode::VoteEqual; | ||
| 27 | default: | ||
| 28 | UNREACHABLE_MSG("Invalid vote operation={}", static_cast<u64>(vote_op)); | ||
| 29 | return OperationCode::VoteAll; | ||
| 30 | } | ||
| 31 | } | ||
| 32 | } // Anonymous namespace | ||
| 33 | |||
| 34 | u32 ShaderIR::DecodeWarp(NodeBlock& bb, u32 pc) { | ||
| 35 | const Instruction instr = {program_code[pc]}; | ||
| 36 | const auto opcode = OpCode::Decode(instr); | ||
| 37 | |||
| 38 | switch (opcode->get().GetId()) { | ||
| 39 | case OpCode::Id::VOTE: { | ||
| 40 | const Node value = GetPredicate(instr.vote.value, instr.vote.negate_value != 0); | ||
| 41 | const Node active = Operation(OperationCode::BallotThread, value); | ||
| 42 | const Node vote = Operation(GetOperationCode(instr.vote.operation), value); | ||
| 43 | SetRegister(bb, instr.gpr0, active); | ||
| 44 | SetPredicate(bb, instr.vote.dest_pred, vote); | ||
| 45 | break; | ||
| 46 | } | ||
| 47 | default: | ||
| 48 | UNIMPLEMENTED_MSG("Unhandled warp instruction: {}", opcode->get().GetName()); | ||
| 49 | break; | ||
| 50 | } | ||
| 51 | |||
| 52 | return pc; | ||
| 53 | } | ||
| 54 | |||
| 55 | } // namespace VideoCommon::Shader | ||
diff --git a/src/video_core/shader/node.h b/src/video_core/shader/node.h index 715184d67..5db9313c4 100644 --- a/src/video_core/shader/node.h +++ b/src/video_core/shader/node.h | |||
| @@ -30,6 +30,8 @@ enum class OperationCode { | |||
| 30 | FNegate, /// (MetaArithmetic, float a) -> float | 30 | FNegate, /// (MetaArithmetic, float a) -> float |
| 31 | FAbsolute, /// (MetaArithmetic, float a) -> float | 31 | FAbsolute, /// (MetaArithmetic, float a) -> float |
| 32 | FClamp, /// (MetaArithmetic, float value, float min, float max) -> float | 32 | FClamp, /// (MetaArithmetic, float value, float min, float max) -> float |
| 33 | FCastHalf0, /// (MetaArithmetic, f16vec2 a) -> float | ||
| 34 | FCastHalf1, /// (MetaArithmetic, f16vec2 a) -> float | ||
| 33 | FMin, /// (MetaArithmetic, float a, float b) -> float | 35 | FMin, /// (MetaArithmetic, float a, float b) -> float |
| 34 | FMax, /// (MetaArithmetic, float a, float b) -> float | 36 | FMax, /// (MetaArithmetic, float a, float b) -> float |
| 35 | FCos, /// (MetaArithmetic, float a) -> float | 37 | FCos, /// (MetaArithmetic, float a) -> float |
| @@ -83,17 +85,18 @@ enum class OperationCode { | |||
| 83 | UBitfieldExtract, /// (MetaArithmetic, uint value, int offset, int offset) -> uint | 85 | UBitfieldExtract, /// (MetaArithmetic, uint value, int offset, int offset) -> uint |
| 84 | UBitCount, /// (MetaArithmetic, uint) -> uint | 86 | UBitCount, /// (MetaArithmetic, uint) -> uint |
| 85 | 87 | ||
| 86 | HAdd, /// (MetaArithmetic, f16vec2 a, f16vec2 b) -> f16vec2 | 88 | HAdd, /// (MetaArithmetic, f16vec2 a, f16vec2 b) -> f16vec2 |
| 87 | HMul, /// (MetaArithmetic, f16vec2 a, f16vec2 b) -> f16vec2 | 89 | HMul, /// (MetaArithmetic, f16vec2 a, f16vec2 b) -> f16vec2 |
| 88 | HFma, /// (MetaArithmetic, f16vec2 a, f16vec2 b, f16vec2 c) -> f16vec2 | 90 | HFma, /// (MetaArithmetic, f16vec2 a, f16vec2 b, f16vec2 c) -> f16vec2 |
| 89 | HAbsolute, /// (f16vec2 a) -> f16vec2 | 91 | HAbsolute, /// (f16vec2 a) -> f16vec2 |
| 90 | HNegate, /// (f16vec2 a, bool first, bool second) -> f16vec2 | 92 | HNegate, /// (f16vec2 a, bool first, bool second) -> f16vec2 |
| 91 | HClamp, /// (f16vec2 src, float min, float max) -> f16vec2 | 93 | HClamp, /// (f16vec2 src, float min, float max) -> f16vec2 |
| 92 | HUnpack, /// (Tegra::Shader::HalfType, T value) -> f16vec2 | 94 | HCastFloat, /// (MetaArithmetic, float a) -> f16vec2 |
| 93 | HMergeF32, /// (f16vec2 src) -> float | 95 | HUnpack, /// (Tegra::Shader::HalfType, T value) -> f16vec2 |
| 94 | HMergeH0, /// (f16vec2 dest, f16vec2 src) -> f16vec2 | 96 | HMergeF32, /// (f16vec2 src) -> float |
| 95 | HMergeH1, /// (f16vec2 dest, f16vec2 src) -> f16vec2 | 97 | HMergeH0, /// (f16vec2 dest, f16vec2 src) -> f16vec2 |
| 96 | HPack2, /// (float a, float b) -> f16vec2 | 98 | HMergeH1, /// (f16vec2 dest, f16vec2 src) -> f16vec2 |
| 99 | HPack2, /// (float a, float b) -> f16vec2 | ||
| 97 | 100 | ||
| 98 | LogicalAssign, /// (bool& dst, bool src) -> void | 101 | LogicalAssign, /// (bool& dst, bool src) -> void |
| 99 | LogicalAnd, /// (bool a, bool b) -> bool | 102 | LogicalAnd, /// (bool a, bool b) -> bool |
| @@ -165,6 +168,11 @@ enum class OperationCode { | |||
| 165 | WorkGroupIdY, /// () -> uint | 168 | WorkGroupIdY, /// () -> uint |
| 166 | WorkGroupIdZ, /// () -> uint | 169 | WorkGroupIdZ, /// () -> uint |
| 167 | 170 | ||
| 171 | BallotThread, /// (bool) -> uint | ||
| 172 | VoteAll, /// (bool) -> bool | ||
| 173 | VoteAny, /// (bool) -> bool | ||
| 174 | VoteEqual, /// (bool) -> bool | ||
| 175 | |||
| 168 | Amount, | 176 | Amount, |
| 169 | }; | 177 | }; |
| 170 | 178 | ||
diff --git a/src/video_core/shader/shader_ir.cpp b/src/video_core/shader/shader_ir.cpp index 5e91fe129..1e5c7f660 100644 --- a/src/video_core/shader/shader_ir.cpp +++ b/src/video_core/shader/shader_ir.cpp | |||
| @@ -405,4 +405,9 @@ Node ShaderIR::BitfieldExtract(Node value, u32 offset, u32 bits) { | |||
| 405 | Immediate(offset), Immediate(bits)); | 405 | Immediate(offset), Immediate(bits)); |
| 406 | } | 406 | } |
| 407 | 407 | ||
| 408 | Node ShaderIR::BitfieldInsert(Node base, Node insert, u32 offset, u32 bits) { | ||
| 409 | return Operation(OperationCode::UBitfieldInsert, NO_PRECISE, base, insert, Immediate(offset), | ||
| 410 | Immediate(bits)); | ||
| 411 | } | ||
| 412 | |||
| 408 | } // namespace VideoCommon::Shader | 413 | } // namespace VideoCommon::Shader |
diff --git a/src/video_core/shader/shader_ir.h b/src/video_core/shader/shader_ir.h index 59a083d90..bcc9b79b6 100644 --- a/src/video_core/shader/shader_ir.h +++ b/src/video_core/shader/shader_ir.h | |||
| @@ -167,6 +167,7 @@ private: | |||
| 167 | u32 DecodeFfma(NodeBlock& bb, u32 pc); | 167 | u32 DecodeFfma(NodeBlock& bb, u32 pc); |
| 168 | u32 DecodeHfma2(NodeBlock& bb, u32 pc); | 168 | u32 DecodeHfma2(NodeBlock& bb, u32 pc); |
| 169 | u32 DecodeConversion(NodeBlock& bb, u32 pc); | 169 | u32 DecodeConversion(NodeBlock& bb, u32 pc); |
| 170 | u32 DecodeWarp(NodeBlock& bb, u32 pc); | ||
| 170 | u32 DecodeMemory(NodeBlock& bb, u32 pc); | 171 | u32 DecodeMemory(NodeBlock& bb, u32 pc); |
| 171 | u32 DecodeTexture(NodeBlock& bb, u32 pc); | 172 | u32 DecodeTexture(NodeBlock& bb, u32 pc); |
| 172 | u32 DecodeImage(NodeBlock& bb, u32 pc); | 173 | u32 DecodeImage(NodeBlock& bb, u32 pc); |
| @@ -279,6 +280,9 @@ private: | |||
| 279 | /// Extracts a sequence of bits from a node | 280 | /// Extracts a sequence of bits from a node |
| 280 | Node BitfieldExtract(Node value, u32 offset, u32 bits); | 281 | Node BitfieldExtract(Node value, u32 offset, u32 bits); |
| 281 | 282 | ||
| 283 | /// Inserts a sequence of bits from a node | ||
| 284 | Node BitfieldInsert(Node base, Node insert, u32 offset, u32 bits); | ||
| 285 | |||
| 282 | void WriteTexInstructionFloat(NodeBlock& bb, Tegra::Shader::Instruction instr, | 286 | void WriteTexInstructionFloat(NodeBlock& bb, Tegra::Shader::Instruction instr, |
| 283 | const Node4& components); | 287 | const Node4& components); |
| 284 | 288 | ||
diff --git a/src/video_core/texture_cache/surface_params.h b/src/video_core/texture_cache/surface_params.h index 358d6757c..e7ef66ee2 100644 --- a/src/video_core/texture_cache/surface_params.h +++ b/src/video_core/texture_cache/surface_params.h | |||
| @@ -58,7 +58,6 @@ public: | |||
| 58 | std::size_t GetHostSizeInBytes() const { | 58 | std::size_t GetHostSizeInBytes() const { |
| 59 | std::size_t host_size_in_bytes; | 59 | std::size_t host_size_in_bytes; |
| 60 | if (GetCompressionType() == SurfaceCompression::Converted) { | 60 | if (GetCompressionType() == SurfaceCompression::Converted) { |
| 61 | constexpr std::size_t rgb8_bpp = 4ULL; | ||
| 62 | // ASTC is uncompressed in software, in emulated as RGBA8 | 61 | // ASTC is uncompressed in software, in emulated as RGBA8 |
| 63 | host_size_in_bytes = 0; | 62 | host_size_in_bytes = 0; |
| 64 | for (u32 level = 0; level < num_levels; ++level) { | 63 | for (u32 level = 0; level < num_levels; ++level) { |
diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index a3a3770a7..2ec0203d1 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h | |||
| @@ -308,8 +308,6 @@ protected: | |||
| 308 | if (!guard_render_targets && surface->IsRenderTarget()) { | 308 | if (!guard_render_targets && surface->IsRenderTarget()) { |
| 309 | ManageRenderTargetUnregister(surface); | 309 | ManageRenderTargetUnregister(surface); |
| 310 | } | 310 | } |
| 311 | const GPUVAddr gpu_addr = surface->GetGpuAddr(); | ||
| 312 | const CacheAddr cache_ptr = surface->GetCacheAddr(); | ||
| 313 | const std::size_t size = surface->GetSizeInBytes(); | 311 | const std::size_t size = surface->GetSizeInBytes(); |
| 314 | const VAddr cpu_addr = surface->GetCpuAddr(); | 312 | const VAddr cpu_addr = surface->GetCpuAddr(); |
| 315 | rasterizer.UpdatePagesCachedCount(cpu_addr, size, -1); | 313 | rasterizer.UpdatePagesCachedCount(cpu_addr, size, -1); |
diff --git a/src/video_core/textures/decoders.cpp b/src/video_core/textures/decoders.cpp index 7e8295944..7df5f1452 100644 --- a/src/video_core/textures/decoders.cpp +++ b/src/video_core/textures/decoders.cpp | |||
| @@ -257,19 +257,21 @@ std::vector<u8> UnswizzleTexture(u8* address, u32 tile_size_x, u32 tile_size_y, | |||
| 257 | 257 | ||
| 258 | void SwizzleSubrect(u32 subrect_width, u32 subrect_height, u32 source_pitch, u32 swizzled_width, | 258 | void SwizzleSubrect(u32 subrect_width, u32 subrect_height, u32 source_pitch, u32 swizzled_width, |
| 259 | u32 bytes_per_pixel, u8* swizzled_data, u8* unswizzled_data, | 259 | u32 bytes_per_pixel, u8* swizzled_data, u8* unswizzled_data, |
| 260 | u32 block_height_bit) { | 260 | u32 block_height_bit, u32 offset_x, u32 offset_y) { |
| 261 | const u32 block_height = 1U << block_height_bit; | 261 | const u32 block_height = 1U << block_height_bit; |
| 262 | const u32 image_width_in_gobs{(swizzled_width * bytes_per_pixel + (gob_size_x - 1)) / | 262 | const u32 image_width_in_gobs{(swizzled_width * bytes_per_pixel + (gob_size_x - 1)) / |
| 263 | gob_size_x}; | 263 | gob_size_x}; |
| 264 | for (u32 line = 0; line < subrect_height; ++line) { | 264 | for (u32 line = 0; line < subrect_height; ++line) { |
| 265 | const u32 dst_y = line + offset_y; | ||
| 265 | const u32 gob_address_y = | 266 | const u32 gob_address_y = |
| 266 | (line / (gob_size_y * block_height)) * gob_size * block_height * image_width_in_gobs + | 267 | (dst_y / (gob_size_y * block_height)) * gob_size * block_height * image_width_in_gobs + |
| 267 | ((line % (gob_size_y * block_height)) / gob_size_y) * gob_size; | 268 | ((dst_y % (gob_size_y * block_height)) / gob_size_y) * gob_size; |
| 268 | const auto& table = legacy_swizzle_table[line % gob_size_y]; | 269 | const auto& table = legacy_swizzle_table[dst_y % gob_size_y]; |
| 269 | for (u32 x = 0; x < subrect_width; ++x) { | 270 | for (u32 x = 0; x < subrect_width; ++x) { |
| 271 | const u32 dst_x = x + offset_x; | ||
| 270 | const u32 gob_address = | 272 | const u32 gob_address = |
| 271 | gob_address_y + (x * bytes_per_pixel / gob_size_x) * gob_size * block_height; | 273 | gob_address_y + (dst_x * bytes_per_pixel / gob_size_x) * gob_size * block_height; |
| 272 | const u32 swizzled_offset = gob_address + table[(x * bytes_per_pixel) % gob_size_x]; | 274 | const u32 swizzled_offset = gob_address + table[(dst_x * bytes_per_pixel) % gob_size_x]; |
| 273 | u8* source_line = unswizzled_data + line * source_pitch + x * bytes_per_pixel; | 275 | u8* source_line = unswizzled_data + line * source_pitch + x * bytes_per_pixel; |
| 274 | u8* dest_addr = swizzled_data + swizzled_offset; | 276 | u8* dest_addr = swizzled_data + swizzled_offset; |
| 275 | 277 | ||
diff --git a/src/video_core/textures/decoders.h b/src/video_core/textures/decoders.h index eaec9b5a5..f1e3952bc 100644 --- a/src/video_core/textures/decoders.h +++ b/src/video_core/textures/decoders.h | |||
| @@ -44,7 +44,8 @@ std::size_t CalculateSize(bool tiled, u32 bytes_per_pixel, u32 width, u32 height | |||
| 44 | 44 | ||
| 45 | /// Copies an untiled subrectangle into a tiled surface. | 45 | /// Copies an untiled subrectangle into a tiled surface. |
| 46 | void SwizzleSubrect(u32 subrect_width, u32 subrect_height, u32 source_pitch, u32 swizzled_width, | 46 | void SwizzleSubrect(u32 subrect_width, u32 subrect_height, u32 source_pitch, u32 swizzled_width, |
| 47 | u32 bytes_per_pixel, u8* swizzled_data, u8* unswizzled_data, u32 block_height); | 47 | u32 bytes_per_pixel, u8* swizzled_data, u8* unswizzled_data, u32 block_height, |
| 48 | u32 offset_x, u32 offset_y); | ||
| 48 | 49 | ||
| 49 | /// Copies a tiled subrectangle into a linear surface. | 50 | /// Copies a tiled subrectangle into a linear surface. |
| 50 | void UnswizzleSubrect(u32 subrect_width, u32 subrect_height, u32 dest_pitch, u32 swizzled_width, | 51 | void UnswizzleSubrect(u32 subrect_width, u32 subrect_height, u32 dest_pitch, u32 swizzled_width, |
diff --git a/src/video_core/textures/texture.h b/src/video_core/textures/texture.h index e3be018b9..e36bc2c04 100644 --- a/src/video_core/textures/texture.h +++ b/src/video_core/textures/texture.h | |||
| @@ -213,7 +213,7 @@ struct TICEntry { | |||
| 213 | if (header_version != TICHeaderVersion::OneDBuffer) { | 213 | if (header_version != TICHeaderVersion::OneDBuffer) { |
| 214 | return width_minus_1 + 1; | 214 | return width_minus_1 + 1; |
| 215 | } | 215 | } |
| 216 | return (buffer_high_width_minus_one << 16) | buffer_low_width_minus_one; | 216 | return ((buffer_high_width_minus_one << 16) | buffer_low_width_minus_one) + 1; |
| 217 | } | 217 | } |
| 218 | 218 | ||
| 219 | u32 Height() const { | 219 | u32 Height() const { |
diff --git a/src/yuzu/configuration/config.cpp b/src/yuzu/configuration/config.cpp index 5d0fb3f9f..0456248ac 100644 --- a/src/yuzu/configuration/config.cpp +++ b/src/yuzu/configuration/config.cpp | |||
| @@ -516,6 +516,7 @@ void Config::ReadPathValues() { | |||
| 516 | 516 | ||
| 517 | UISettings::values.roms_path = ReadSetting(QStringLiteral("romsPath")).toString(); | 517 | UISettings::values.roms_path = ReadSetting(QStringLiteral("romsPath")).toString(); |
| 518 | UISettings::values.symbols_path = ReadSetting(QStringLiteral("symbolsPath")).toString(); | 518 | UISettings::values.symbols_path = ReadSetting(QStringLiteral("symbolsPath")).toString(); |
| 519 | UISettings::values.screenshot_path = ReadSetting(QStringLiteral("screenshotPath")).toString(); | ||
| 519 | UISettings::values.game_directory_path = | 520 | UISettings::values.game_directory_path = |
| 520 | ReadSetting(QStringLiteral("gameListRootDir"), QStringLiteral(".")).toString(); | 521 | ReadSetting(QStringLiteral("gameListRootDir"), QStringLiteral(".")).toString(); |
| 521 | UISettings::values.game_directory_deepscan = | 522 | UISettings::values.game_directory_deepscan = |
diff --git a/src/yuzu/main.cpp b/src/yuzu/main.cpp index a7c656fdb..ac57229d5 100644 --- a/src/yuzu/main.cpp +++ b/src/yuzu/main.cpp | |||
| @@ -119,6 +119,7 @@ Q_IMPORT_PLUGIN(QWindowsIntegrationPlugin); | |||
| 119 | #endif | 119 | #endif |
| 120 | 120 | ||
| 121 | #ifdef _WIN32 | 121 | #ifdef _WIN32 |
| 122 | #include <windows.h> | ||
| 122 | extern "C" { | 123 | extern "C" { |
| 123 | // tells Nvidia and AMD drivers to use the dedicated GPU by default on laptops with switchable | 124 | // tells Nvidia and AMD drivers to use the dedicated GPU by default on laptops with switchable |
| 124 | // graphics | 125 | // graphics |
| @@ -747,6 +748,18 @@ void GMainWindow::OnDisplayTitleBars(bool show) { | |||
| 747 | } | 748 | } |
| 748 | } | 749 | } |
| 749 | 750 | ||
| 751 | void GMainWindow::PreventOSSleep() { | ||
| 752 | #ifdef _WIN32 | ||
| 753 | SetThreadExecutionState(ES_CONTINUOUS | ES_SYSTEM_REQUIRED | ES_DISPLAY_REQUIRED); | ||
| 754 | #endif | ||
| 755 | } | ||
| 756 | |||
| 757 | void GMainWindow::AllowOSSleep() { | ||
| 758 | #ifdef _WIN32 | ||
| 759 | SetThreadExecutionState(ES_CONTINUOUS); | ||
| 760 | #endif | ||
| 761 | } | ||
| 762 | |||
| 750 | QStringList GMainWindow::GetUnsupportedGLExtensions() { | 763 | QStringList GMainWindow::GetUnsupportedGLExtensions() { |
| 751 | QStringList unsupported_ext; | 764 | QStringList unsupported_ext; |
| 752 | 765 | ||
| @@ -966,6 +979,8 @@ void GMainWindow::BootGame(const QString& filename) { | |||
| 966 | } | 979 | } |
| 967 | 980 | ||
| 968 | void GMainWindow::ShutdownGame() { | 981 | void GMainWindow::ShutdownGame() { |
| 982 | AllowOSSleep(); | ||
| 983 | |||
| 969 | discord_rpc->Pause(); | 984 | discord_rpc->Pause(); |
| 970 | emu_thread->RequestStop(); | 985 | emu_thread->RequestStop(); |
| 971 | 986 | ||
| @@ -1567,6 +1582,8 @@ void GMainWindow::OnMenuRecentFile() { | |||
| 1567 | } | 1582 | } |
| 1568 | 1583 | ||
| 1569 | void GMainWindow::OnStartGame() { | 1584 | void GMainWindow::OnStartGame() { |
| 1585 | PreventOSSleep(); | ||
| 1586 | |||
| 1570 | emu_thread->SetRunning(true); | 1587 | emu_thread->SetRunning(true); |
| 1571 | 1588 | ||
| 1572 | qRegisterMetaType<Core::Frontend::SoftwareKeyboardParameters>( | 1589 | qRegisterMetaType<Core::Frontend::SoftwareKeyboardParameters>( |
| @@ -1598,6 +1615,8 @@ void GMainWindow::OnPauseGame() { | |||
| 1598 | ui.action_Pause->setEnabled(false); | 1615 | ui.action_Pause->setEnabled(false); |
| 1599 | ui.action_Stop->setEnabled(true); | 1616 | ui.action_Stop->setEnabled(true); |
| 1600 | ui.action_Capture_Screenshot->setEnabled(false); | 1617 | ui.action_Capture_Screenshot->setEnabled(false); |
| 1618 | |||
| 1619 | AllowOSSleep(); | ||
| 1601 | } | 1620 | } |
| 1602 | 1621 | ||
| 1603 | void GMainWindow::OnStopGame() { | 1622 | void GMainWindow::OnStopGame() { |
diff --git a/src/yuzu/main.h b/src/yuzu/main.h index 1137bbc7a..501608ddc 100644 --- a/src/yuzu/main.h +++ b/src/yuzu/main.h | |||
| @@ -130,6 +130,9 @@ private: | |||
| 130 | void ConnectWidgetEvents(); | 130 | void ConnectWidgetEvents(); |
| 131 | void ConnectMenuEvents(); | 131 | void ConnectMenuEvents(); |
| 132 | 132 | ||
| 133 | void PreventOSSleep(); | ||
| 134 | void AllowOSSleep(); | ||
| 135 | |||
| 133 | QStringList GetUnsupportedGLExtensions(); | 136 | QStringList GetUnsupportedGLExtensions(); |
| 134 | bool LoadROM(const QString& filename); | 137 | bool LoadROM(const QString& filename); |
| 135 | void BootGame(const QString& filename); | 138 | void BootGame(const QString& filename); |
diff --git a/src/yuzu_tester/yuzu.cpp b/src/yuzu_tester/yuzu.cpp index b589c3de3..0ee97aa54 100644 --- a/src/yuzu_tester/yuzu.cpp +++ b/src/yuzu_tester/yuzu.cpp | |||
| @@ -92,7 +92,6 @@ int main(int argc, char** argv) { | |||
| 92 | 92 | ||
| 93 | int option_index = 0; | 93 | int option_index = 0; |
| 94 | 94 | ||
| 95 | char* endarg; | ||
| 96 | #ifdef _WIN32 | 95 | #ifdef _WIN32 |
| 97 | int argc_w; | 96 | int argc_w; |
| 98 | auto argv_w = CommandLineToArgvW(GetCommandLineW(), &argc_w); | 97 | auto argv_w = CommandLineToArgvW(GetCommandLineW(), &argc_w); |
| @@ -226,7 +225,7 @@ int main(int argc, char** argv) { | |||
| 226 | 225 | ||
| 227 | switch (load_result) { | 226 | switch (load_result) { |
| 228 | case Core::System::ResultStatus::ErrorGetLoader: | 227 | case Core::System::ResultStatus::ErrorGetLoader: |
| 229 | LOG_CRITICAL(Frontend, "Failed to obtain loader for %s!", filepath.c_str()); | 228 | LOG_CRITICAL(Frontend, "Failed to obtain loader for {}!", filepath); |
| 230 | return -1; | 229 | return -1; |
| 231 | case Core::System::ResultStatus::ErrorLoader: | 230 | case Core::System::ResultStatus::ErrorLoader: |
| 232 | LOG_CRITICAL(Frontend, "Failed to load ROM!"); | 231 | LOG_CRITICAL(Frontend, "Failed to load ROM!"); |