diff options
Diffstat (limited to 'src')
81 files changed, 5208 insertions, 646 deletions
diff --git a/src/common/CMakeLists.txt b/src/common/CMakeLists.txt index 9b0c3db68..9afc6105d 100644 --- a/src/common/CMakeLists.txt +++ b/src/common/CMakeLists.txt | |||
| @@ -15,6 +15,10 @@ endif () | |||
| 15 | if (DEFINED ENV{DISPLAYVERSION}) | 15 | if (DEFINED ENV{DISPLAYVERSION}) |
| 16 | set(DISPLAY_VERSION $ENV{DISPLAYVERSION}) | 16 | set(DISPLAY_VERSION $ENV{DISPLAYVERSION}) |
| 17 | endif () | 17 | endif () |
| 18 | |||
| 19 | # Pass the path to git to the GenerateSCMRev.cmake as well | ||
| 20 | find_package(Git QUIET) | ||
| 21 | |||
| 18 | add_custom_command(OUTPUT scm_rev.cpp | 22 | add_custom_command(OUTPUT scm_rev.cpp |
| 19 | COMMAND ${CMAKE_COMMAND} | 23 | COMMAND ${CMAKE_COMMAND} |
| 20 | -DSRC_DIR="${CMAKE_SOURCE_DIR}" | 24 | -DSRC_DIR="${CMAKE_SOURCE_DIR}" |
| @@ -23,6 +27,7 @@ add_custom_command(OUTPUT scm_rev.cpp | |||
| 23 | -DTITLE_BAR_FORMAT_RUNNING="${TITLE_BAR_FORMAT_RUNNING}" | 27 | -DTITLE_BAR_FORMAT_RUNNING="${TITLE_BAR_FORMAT_RUNNING}" |
| 24 | -DBUILD_TAG="${BUILD_TAG}" | 28 | -DBUILD_TAG="${BUILD_TAG}" |
| 25 | -DBUILD_ID="${DISPLAY_VERSION}" | 29 | -DBUILD_ID="${DISPLAY_VERSION}" |
| 30 | -DGIT_EXECUTABLE="${GIT_EXECUTABLE}" | ||
| 26 | -P "${CMAKE_SOURCE_DIR}/CMakeModules/GenerateSCMRev.cmake" | 31 | -P "${CMAKE_SOURCE_DIR}/CMakeModules/GenerateSCMRev.cmake" |
| 27 | DEPENDS | 32 | DEPENDS |
| 28 | # WARNING! It was too much work to try and make a common location for this list, | 33 | # WARNING! It was too much work to try and make a common location for this list, |
diff --git a/src/common/telemetry.cpp b/src/common/telemetry.cpp index f53a8d193..200c6489a 100644 --- a/src/common/telemetry.cpp +++ b/src/common/telemetry.cpp | |||
| @@ -44,20 +44,6 @@ template class Field<std::string>; | |||
| 44 | template class Field<const char*>; | 44 | template class Field<const char*>; |
| 45 | template class Field<std::chrono::microseconds>; | 45 | template class Field<std::chrono::microseconds>; |
| 46 | 46 | ||
| 47 | #ifdef ARCHITECTURE_x86_64 | ||
| 48 | static const char* CpuVendorToStr(Common::CPUVendor vendor) { | ||
| 49 | switch (vendor) { | ||
| 50 | case Common::CPUVendor::INTEL: | ||
| 51 | return "Intel"; | ||
| 52 | case Common::CPUVendor::AMD: | ||
| 53 | return "Amd"; | ||
| 54 | case Common::CPUVendor::OTHER: | ||
| 55 | return "Other"; | ||
| 56 | } | ||
| 57 | UNREACHABLE(); | ||
| 58 | } | ||
| 59 | #endif | ||
| 60 | |||
| 61 | void AppendBuildInfo(FieldCollection& fc) { | 47 | void AppendBuildInfo(FieldCollection& fc) { |
| 62 | const bool is_git_dirty{std::strstr(Common::g_scm_desc, "dirty") != nullptr}; | 48 | const bool is_git_dirty{std::strstr(Common::g_scm_desc, "dirty") != nullptr}; |
| 63 | fc.AddField(FieldType::App, "Git_IsDirty", is_git_dirty); | 49 | fc.AddField(FieldType::App, "Git_IsDirty", is_git_dirty); |
| @@ -71,7 +57,6 @@ void AppendCPUInfo(FieldCollection& fc) { | |||
| 71 | #ifdef ARCHITECTURE_x86_64 | 57 | #ifdef ARCHITECTURE_x86_64 |
| 72 | fc.AddField(FieldType::UserSystem, "CPU_Model", Common::GetCPUCaps().cpu_string); | 58 | fc.AddField(FieldType::UserSystem, "CPU_Model", Common::GetCPUCaps().cpu_string); |
| 73 | fc.AddField(FieldType::UserSystem, "CPU_BrandString", Common::GetCPUCaps().brand_string); | 59 | fc.AddField(FieldType::UserSystem, "CPU_BrandString", Common::GetCPUCaps().brand_string); |
| 74 | fc.AddField(FieldType::UserSystem, "CPU_Vendor", CpuVendorToStr(Common::GetCPUCaps().vendor)); | ||
| 75 | fc.AddField(FieldType::UserSystem, "CPU_Extension_x64_AES", Common::GetCPUCaps().aes); | 60 | fc.AddField(FieldType::UserSystem, "CPU_Extension_x64_AES", Common::GetCPUCaps().aes); |
| 76 | fc.AddField(FieldType::UserSystem, "CPU_Extension_x64_AVX", Common::GetCPUCaps().avx); | 61 | fc.AddField(FieldType::UserSystem, "CPU_Extension_x64_AVX", Common::GetCPUCaps().avx); |
| 77 | fc.AddField(FieldType::UserSystem, "CPU_Extension_x64_AVX2", Common::GetCPUCaps().avx2); | 62 | fc.AddField(FieldType::UserSystem, "CPU_Extension_x64_AVX2", Common::GetCPUCaps().avx2); |
diff --git a/src/common/x64/cpu_detect.cpp b/src/common/x64/cpu_detect.cpp index 2dfcd39c8..c9349a6b4 100644 --- a/src/common/x64/cpu_detect.cpp +++ b/src/common/x64/cpu_detect.cpp | |||
| @@ -3,8 +3,6 @@ | |||
| 3 | // Refer to the license.txt file included. | 3 | // Refer to the license.txt file included. |
| 4 | 4 | ||
| 5 | #include <cstring> | 5 | #include <cstring> |
| 6 | #include <string> | ||
| 7 | #include <thread> | ||
| 8 | #include "common/common_types.h" | 6 | #include "common/common_types.h" |
| 9 | #include "common/x64/cpu_detect.h" | 7 | #include "common/x64/cpu_detect.h" |
| 10 | 8 | ||
| @@ -51,8 +49,6 @@ namespace Common { | |||
| 51 | static CPUCaps Detect() { | 49 | static CPUCaps Detect() { |
| 52 | CPUCaps caps = {}; | 50 | CPUCaps caps = {}; |
| 53 | 51 | ||
| 54 | caps.num_cores = std::thread::hardware_concurrency(); | ||
| 55 | |||
| 56 | // Assumes the CPU supports the CPUID instruction. Those that don't would likely not support | 52 | // Assumes the CPU supports the CPUID instruction. Those that don't would likely not support |
| 57 | // yuzu at all anyway | 53 | // yuzu at all anyway |
| 58 | 54 | ||
| @@ -70,12 +66,6 @@ static CPUCaps Detect() { | |||
| 70 | __cpuid(cpu_id, 0x80000000); | 66 | __cpuid(cpu_id, 0x80000000); |
| 71 | 67 | ||
| 72 | u32 max_ex_fn = cpu_id[0]; | 68 | u32 max_ex_fn = cpu_id[0]; |
| 73 | if (!strcmp(caps.brand_string, "GenuineIntel")) | ||
| 74 | caps.vendor = CPUVendor::INTEL; | ||
| 75 | else if (!strcmp(caps.brand_string, "AuthenticAMD")) | ||
| 76 | caps.vendor = CPUVendor::AMD; | ||
| 77 | else | ||
| 78 | caps.vendor = CPUVendor::OTHER; | ||
| 79 | 69 | ||
| 80 | // Set reasonable default brand string even if brand string not available | 70 | // Set reasonable default brand string even if brand string not available |
| 81 | strcpy(caps.cpu_string, caps.brand_string); | 71 | strcpy(caps.cpu_string, caps.brand_string); |
| @@ -96,15 +86,9 @@ static CPUCaps Detect() { | |||
| 96 | caps.sse4_1 = true; | 86 | caps.sse4_1 = true; |
| 97 | if ((cpu_id[2] >> 20) & 1) | 87 | if ((cpu_id[2] >> 20) & 1) |
| 98 | caps.sse4_2 = true; | 88 | caps.sse4_2 = true; |
| 99 | if ((cpu_id[2] >> 22) & 1) | ||
| 100 | caps.movbe = true; | ||
| 101 | if ((cpu_id[2] >> 25) & 1) | 89 | if ((cpu_id[2] >> 25) & 1) |
| 102 | caps.aes = true; | 90 | caps.aes = true; |
| 103 | 91 | ||
| 104 | if ((cpu_id[3] >> 24) & 1) { | ||
| 105 | caps.fxsave_fxrstor = true; | ||
| 106 | } | ||
| 107 | |||
| 108 | // AVX support requires 3 separate checks: | 92 | // AVX support requires 3 separate checks: |
| 109 | // - Is the AVX bit set in CPUID? | 93 | // - Is the AVX bit set in CPUID? |
| 110 | // - Is the XSAVE bit set in CPUID? | 94 | // - Is the XSAVE bit set in CPUID? |
| @@ -129,8 +113,6 @@ static CPUCaps Detect() { | |||
| 129 | } | 113 | } |
| 130 | } | 114 | } |
| 131 | 115 | ||
| 132 | caps.flush_to_zero = caps.sse; | ||
| 133 | |||
| 134 | if (max_ex_fn >= 0x80000004) { | 116 | if (max_ex_fn >= 0x80000004) { |
| 135 | // Extract CPU model string | 117 | // Extract CPU model string |
| 136 | __cpuid(cpu_id, 0x80000002); | 118 | __cpuid(cpu_id, 0x80000002); |
| @@ -144,14 +126,8 @@ static CPUCaps Detect() { | |||
| 144 | if (max_ex_fn >= 0x80000001) { | 126 | if (max_ex_fn >= 0x80000001) { |
| 145 | // Check for more features | 127 | // Check for more features |
| 146 | __cpuid(cpu_id, 0x80000001); | 128 | __cpuid(cpu_id, 0x80000001); |
| 147 | if (cpu_id[2] & 1) | ||
| 148 | caps.lahf_sahf_64 = true; | ||
| 149 | if ((cpu_id[2] >> 5) & 1) | ||
| 150 | caps.lzcnt = true; | ||
| 151 | if ((cpu_id[2] >> 16) & 1) | 129 | if ((cpu_id[2] >> 16) & 1) |
| 152 | caps.fma4 = true; | 130 | caps.fma4 = true; |
| 153 | if ((cpu_id[3] >> 29) & 1) | ||
| 154 | caps.long_mode = true; | ||
| 155 | } | 131 | } |
| 156 | 132 | ||
| 157 | return caps; | 133 | return caps; |
| @@ -162,48 +138,4 @@ const CPUCaps& GetCPUCaps() { | |||
| 162 | return caps; | 138 | return caps; |
| 163 | } | 139 | } |
| 164 | 140 | ||
| 165 | std::string GetCPUCapsString() { | ||
| 166 | auto caps = GetCPUCaps(); | ||
| 167 | |||
| 168 | std::string sum(caps.cpu_string); | ||
| 169 | sum += " ("; | ||
| 170 | sum += caps.brand_string; | ||
| 171 | sum += ")"; | ||
| 172 | |||
| 173 | if (caps.sse) | ||
| 174 | sum += ", SSE"; | ||
| 175 | if (caps.sse2) { | ||
| 176 | sum += ", SSE2"; | ||
| 177 | if (!caps.flush_to_zero) | ||
| 178 | sum += " (without DAZ)"; | ||
| 179 | } | ||
| 180 | |||
| 181 | if (caps.sse3) | ||
| 182 | sum += ", SSE3"; | ||
| 183 | if (caps.ssse3) | ||
| 184 | sum += ", SSSE3"; | ||
| 185 | if (caps.sse4_1) | ||
| 186 | sum += ", SSE4.1"; | ||
| 187 | if (caps.sse4_2) | ||
| 188 | sum += ", SSE4.2"; | ||
| 189 | if (caps.avx) | ||
| 190 | sum += ", AVX"; | ||
| 191 | if (caps.avx2) | ||
| 192 | sum += ", AVX2"; | ||
| 193 | if (caps.bmi1) | ||
| 194 | sum += ", BMI1"; | ||
| 195 | if (caps.bmi2) | ||
| 196 | sum += ", BMI2"; | ||
| 197 | if (caps.fma) | ||
| 198 | sum += ", FMA"; | ||
| 199 | if (caps.aes) | ||
| 200 | sum += ", AES"; | ||
| 201 | if (caps.movbe) | ||
| 202 | sum += ", MOVBE"; | ||
| 203 | if (caps.long_mode) | ||
| 204 | sum += ", 64-bit support"; | ||
| 205 | |||
| 206 | return sum; | ||
| 207 | } | ||
| 208 | |||
| 209 | } // namespace Common | 141 | } // namespace Common |
diff --git a/src/common/x64/cpu_detect.h b/src/common/x64/cpu_detect.h index 0af3a8adb..20f2ba234 100644 --- a/src/common/x64/cpu_detect.h +++ b/src/common/x64/cpu_detect.h | |||
| @@ -4,23 +4,12 @@ | |||
| 4 | 4 | ||
| 5 | #pragma once | 5 | #pragma once |
| 6 | 6 | ||
| 7 | #include <string> | ||
| 8 | |||
| 9 | namespace Common { | 7 | namespace Common { |
| 10 | 8 | ||
| 11 | /// x86/x64 CPU vendors that may be detected by this module | ||
| 12 | enum class CPUVendor { | ||
| 13 | INTEL, | ||
| 14 | AMD, | ||
| 15 | OTHER, | ||
| 16 | }; | ||
| 17 | |||
| 18 | /// x86/x64 CPU capabilities that may be detected by this module | 9 | /// x86/x64 CPU capabilities that may be detected by this module |
| 19 | struct CPUCaps { | 10 | struct CPUCaps { |
| 20 | CPUVendor vendor; | ||
| 21 | char cpu_string[0x21]; | 11 | char cpu_string[0x21]; |
| 22 | char brand_string[0x41]; | 12 | char brand_string[0x41]; |
| 23 | int num_cores; | ||
| 24 | bool sse; | 13 | bool sse; |
| 25 | bool sse2; | 14 | bool sse2; |
| 26 | bool sse3; | 15 | bool sse3; |
| @@ -35,20 +24,6 @@ struct CPUCaps { | |||
| 35 | bool fma; | 24 | bool fma; |
| 36 | bool fma4; | 25 | bool fma4; |
| 37 | bool aes; | 26 | bool aes; |
| 38 | |||
| 39 | // Support for the FXSAVE and FXRSTOR instructions | ||
| 40 | bool fxsave_fxrstor; | ||
| 41 | |||
| 42 | bool movbe; | ||
| 43 | |||
| 44 | // This flag indicates that the hardware supports some mode in which denormal inputs and outputs | ||
| 45 | // are automatically set to (signed) zero. | ||
| 46 | bool flush_to_zero; | ||
| 47 | |||
| 48 | // Support for LAHF and SAHF instructions in 64-bit mode | ||
| 49 | bool lahf_sahf_64; | ||
| 50 | |||
| 51 | bool long_mode; | ||
| 52 | }; | 27 | }; |
| 53 | 28 | ||
| 54 | /** | 29 | /** |
| @@ -57,10 +32,4 @@ struct CPUCaps { | |||
| 57 | */ | 32 | */ |
| 58 | const CPUCaps& GetCPUCaps(); | 33 | const CPUCaps& GetCPUCaps(); |
| 59 | 34 | ||
| 60 | /** | ||
| 61 | * Gets a string summary of the name and supported capabilities of the host CPU | ||
| 62 | * @return String summary | ||
| 63 | */ | ||
| 64 | std::string GetCPUCapsString(); | ||
| 65 | |||
| 66 | } // namespace Common | 35 | } // namespace Common |
diff --git a/src/core/arm/dynarmic/arm_dynarmic.cpp b/src/core/arm/dynarmic/arm_dynarmic.cpp index f8c7f0efd..e825c0526 100644 --- a/src/core/arm/dynarmic/arm_dynarmic.cpp +++ b/src/core/arm/dynarmic/arm_dynarmic.cpp | |||
| @@ -141,6 +141,7 @@ std::unique_ptr<Dynarmic::A64::Jit> ARM_Dynarmic::MakeJit(Common::PageTable& pag | |||
| 141 | config.page_table = reinterpret_cast<void**>(page_table.pointers.data()); | 141 | config.page_table = reinterpret_cast<void**>(page_table.pointers.data()); |
| 142 | config.page_table_address_space_bits = address_space_bits; | 142 | config.page_table_address_space_bits = address_space_bits; |
| 143 | config.silently_mirror_page_table = false; | 143 | config.silently_mirror_page_table = false; |
| 144 | config.absolute_offset_page_table = true; | ||
| 144 | 145 | ||
| 145 | // Multi-process state | 146 | // Multi-process state |
| 146 | config.processor_id = core_index; | 147 | config.processor_id = core_index; |
diff --git a/src/core/hle/kernel/physical_memory.h b/src/core/hle/kernel/physical_memory.h index 090565310..b689e8e8b 100644 --- a/src/core/hle/kernel/physical_memory.h +++ b/src/core/hle/kernel/physical_memory.h | |||
| @@ -14,6 +14,9 @@ namespace Kernel { | |||
| 14 | // - Second to ensure all host backing memory used is aligned to 256 bytes due | 14 | // - Second to ensure all host backing memory used is aligned to 256 bytes due |
| 15 | // to strict alignment restrictions on GPU memory. | 15 | // to strict alignment restrictions on GPU memory. |
| 16 | 16 | ||
| 17 | using PhysicalMemory = std::vector<u8, Common::AlignmentAllocator<u8, 256>>; | 17 | using PhysicalMemoryVector = std::vector<u8, Common::AlignmentAllocator<u8, 256>>; |
| 18 | class PhysicalMemory final : public PhysicalMemoryVector { | ||
| 19 | using PhysicalMemoryVector::PhysicalMemoryVector; | ||
| 20 | }; | ||
| 18 | 21 | ||
| 19 | } // namespace Kernel | 22 | } // namespace Kernel |
diff --git a/src/core/hle/kernel/process.cpp b/src/core/hle/kernel/process.cpp index 12ea4ebe3..b9035a0be 100644 --- a/src/core/hle/kernel/process.cpp +++ b/src/core/hle/kernel/process.cpp | |||
| @@ -317,6 +317,8 @@ void Process::FreeTLSRegion(VAddr tls_address) { | |||
| 317 | } | 317 | } |
| 318 | 318 | ||
| 319 | void Process::LoadModule(CodeSet module_, VAddr base_addr) { | 319 | void Process::LoadModule(CodeSet module_, VAddr base_addr) { |
| 320 | code_memory_size += module_.memory.size(); | ||
| 321 | |||
| 320 | const auto memory = std::make_shared<PhysicalMemory>(std::move(module_.memory)); | 322 | const auto memory = std::make_shared<PhysicalMemory>(std::move(module_.memory)); |
| 321 | 323 | ||
| 322 | const auto MapSegment = [&](const CodeSet::Segment& segment, VMAPermission permissions, | 324 | const auto MapSegment = [&](const CodeSet::Segment& segment, VMAPermission permissions, |
| @@ -332,8 +334,6 @@ void Process::LoadModule(CodeSet module_, VAddr base_addr) { | |||
| 332 | MapSegment(module_.CodeSegment(), VMAPermission::ReadExecute, MemoryState::Code); | 334 | MapSegment(module_.CodeSegment(), VMAPermission::ReadExecute, MemoryState::Code); |
| 333 | MapSegment(module_.RODataSegment(), VMAPermission::Read, MemoryState::CodeData); | 335 | MapSegment(module_.RODataSegment(), VMAPermission::Read, MemoryState::CodeData); |
| 334 | MapSegment(module_.DataSegment(), VMAPermission::ReadWrite, MemoryState::CodeData); | 336 | MapSegment(module_.DataSegment(), VMAPermission::ReadWrite, MemoryState::CodeData); |
| 335 | |||
| 336 | code_memory_size += module_.memory.size(); | ||
| 337 | } | 337 | } |
| 338 | 338 | ||
| 339 | Process::Process(Core::System& system) | 339 | Process::Process(Core::System& system) |
diff --git a/src/core/hle/kernel/vm_manager.cpp b/src/core/hle/kernel/vm_manager.cpp index a9a20ef76..0b3500fce 100644 --- a/src/core/hle/kernel/vm_manager.cpp +++ b/src/core/hle/kernel/vm_manager.cpp | |||
| @@ -3,6 +3,7 @@ | |||
| 3 | // Refer to the license.txt file included. | 3 | // Refer to the license.txt file included. |
| 4 | 4 | ||
| 5 | #include <algorithm> | 5 | #include <algorithm> |
| 6 | #include <cstring> | ||
| 6 | #include <iterator> | 7 | #include <iterator> |
| 7 | #include <utility> | 8 | #include <utility> |
| 8 | #include "common/alignment.h" | 9 | #include "common/alignment.h" |
| @@ -269,18 +270,9 @@ ResultVal<VAddr> VMManager::SetHeapSize(u64 size) { | |||
| 269 | // If necessary, expand backing vector to cover new heap extents in | 270 | // If necessary, expand backing vector to cover new heap extents in |
| 270 | // the case of allocating. Otherwise, shrink the backing memory, | 271 | // the case of allocating. Otherwise, shrink the backing memory, |
| 271 | // if a smaller heap has been requested. | 272 | // if a smaller heap has been requested. |
| 272 | const u64 old_heap_size = GetCurrentHeapSize(); | 273 | heap_memory->resize(size); |
| 273 | if (size > old_heap_size) { | 274 | heap_memory->shrink_to_fit(); |
| 274 | const u64 alloc_size = size - old_heap_size; | 275 | RefreshMemoryBlockMappings(heap_memory.get()); |
| 275 | |||
| 276 | heap_memory->insert(heap_memory->end(), alloc_size, 0); | ||
| 277 | RefreshMemoryBlockMappings(heap_memory.get()); | ||
| 278 | } else if (size < old_heap_size) { | ||
| 279 | heap_memory->resize(size); | ||
| 280 | heap_memory->shrink_to_fit(); | ||
| 281 | |||
| 282 | RefreshMemoryBlockMappings(heap_memory.get()); | ||
| 283 | } | ||
| 284 | 276 | ||
| 285 | heap_end = heap_region_base + size; | 277 | heap_end = heap_region_base + size; |
| 286 | ASSERT(GetCurrentHeapSize() == heap_memory->size()); | 278 | ASSERT(GetCurrentHeapSize() == heap_memory->size()); |
| @@ -752,24 +744,20 @@ void VMManager::MergeAdjacentVMA(VirtualMemoryArea& left, const VirtualMemoryAre | |||
| 752 | // Always merge allocated memory blocks, even when they don't share the same backing block. | 744 | // Always merge allocated memory blocks, even when they don't share the same backing block. |
| 753 | if (left.type == VMAType::AllocatedMemoryBlock && | 745 | if (left.type == VMAType::AllocatedMemoryBlock && |
| 754 | (left.backing_block != right.backing_block || left.offset + left.size != right.offset)) { | 746 | (left.backing_block != right.backing_block || left.offset + left.size != right.offset)) { |
| 755 | const auto right_begin = right.backing_block->begin() + right.offset; | ||
| 756 | const auto right_end = right_begin + right.size; | ||
| 757 | 747 | ||
| 758 | // Check if we can save work. | 748 | // Check if we can save work. |
| 759 | if (left.offset == 0 && left.size == left.backing_block->size()) { | 749 | if (left.offset == 0 && left.size == left.backing_block->size()) { |
| 760 | // Fast case: left is an entire backing block. | 750 | // Fast case: left is an entire backing block. |
| 761 | left.backing_block->insert(left.backing_block->end(), right_begin, right_end); | 751 | left.backing_block->resize(left.size + right.size); |
| 752 | std::memcpy(left.backing_block->data() + left.size, | ||
| 753 | right.backing_block->data() + right.offset, right.size); | ||
| 762 | } else { | 754 | } else { |
| 763 | // Slow case: make a new memory block for left and right. | 755 | // Slow case: make a new memory block for left and right. |
| 764 | const auto left_begin = left.backing_block->begin() + left.offset; | ||
| 765 | const auto left_end = left_begin + left.size; | ||
| 766 | const auto left_size = static_cast<std::size_t>(std::distance(left_begin, left_end)); | ||
| 767 | const auto right_size = static_cast<std::size_t>(std::distance(right_begin, right_end)); | ||
| 768 | |||
| 769 | auto new_memory = std::make_shared<PhysicalMemory>(); | 756 | auto new_memory = std::make_shared<PhysicalMemory>(); |
| 770 | new_memory->reserve(left_size + right_size); | 757 | new_memory->resize(left.size + right.size); |
| 771 | new_memory->insert(new_memory->end(), left_begin, left_end); | 758 | std::memcpy(new_memory->data(), left.backing_block->data() + left.offset, left.size); |
| 772 | new_memory->insert(new_memory->end(), right_begin, right_end); | 759 | std::memcpy(new_memory->data() + left.size, right.backing_block->data() + right.offset, |
| 760 | right.size); | ||
| 773 | 761 | ||
| 774 | left.backing_block = std::move(new_memory); | 762 | left.backing_block = std::move(new_memory); |
| 775 | left.offset = 0; | 763 | left.offset = 0; |
| @@ -792,8 +780,7 @@ void VMManager::UpdatePageTableForVMA(const VirtualMemoryArea& vma) { | |||
| 792 | memory.UnmapRegion(page_table, vma.base, vma.size); | 780 | memory.UnmapRegion(page_table, vma.base, vma.size); |
| 793 | break; | 781 | break; |
| 794 | case VMAType::AllocatedMemoryBlock: | 782 | case VMAType::AllocatedMemoryBlock: |
| 795 | memory.MapMemoryRegion(page_table, vma.base, vma.size, | 783 | memory.MapMemoryRegion(page_table, vma.base, vma.size, *vma.backing_block, vma.offset); |
| 796 | vma.backing_block->data() + vma.offset); | ||
| 797 | break; | 784 | break; |
| 798 | case VMAType::BackingMemory: | 785 | case VMAType::BackingMemory: |
| 799 | memory.MapMemoryRegion(page_table, vma.base, vma.size, vma.backing_memory); | 786 | memory.MapMemoryRegion(page_table, vma.base, vma.size, vma.backing_memory); |
diff --git a/src/core/hle/service/nifm/nifm.cpp b/src/core/hle/service/nifm/nifm.cpp index 2e53b3221..767158444 100644 --- a/src/core/hle/service/nifm/nifm.cpp +++ b/src/core/hle/service/nifm/nifm.cpp | |||
| @@ -9,6 +9,7 @@ | |||
| 9 | #include "core/hle/kernel/writable_event.h" | 9 | #include "core/hle/kernel/writable_event.h" |
| 10 | #include "core/hle/service/nifm/nifm.h" | 10 | #include "core/hle/service/nifm/nifm.h" |
| 11 | #include "core/hle/service/service.h" | 11 | #include "core/hle/service/service.h" |
| 12 | #include "core/settings.h" | ||
| 12 | 13 | ||
| 13 | namespace Service::NIFM { | 14 | namespace Service::NIFM { |
| 14 | 15 | ||
| @@ -86,7 +87,12 @@ private: | |||
| 86 | 87 | ||
| 87 | IPC::ResponseBuilder rb{ctx, 3}; | 88 | IPC::ResponseBuilder rb{ctx, 3}; |
| 88 | rb.Push(RESULT_SUCCESS); | 89 | rb.Push(RESULT_SUCCESS); |
| 89 | rb.PushEnum(RequestState::Connected); | 90 | |
| 91 | if (Settings::values.bcat_backend == "none") { | ||
| 92 | rb.PushEnum(RequestState::NotSubmitted); | ||
| 93 | } else { | ||
| 94 | rb.PushEnum(RequestState::Connected); | ||
| 95 | } | ||
| 90 | } | 96 | } |
| 91 | 97 | ||
| 92 | void GetResult(Kernel::HLERequestContext& ctx) { | 98 | void GetResult(Kernel::HLERequestContext& ctx) { |
| @@ -194,14 +200,22 @@ private: | |||
| 194 | 200 | ||
| 195 | IPC::ResponseBuilder rb{ctx, 3}; | 201 | IPC::ResponseBuilder rb{ctx, 3}; |
| 196 | rb.Push(RESULT_SUCCESS); | 202 | rb.Push(RESULT_SUCCESS); |
| 197 | rb.Push<u8>(1); | 203 | if (Settings::values.bcat_backend == "none") { |
| 204 | rb.Push<u8>(0); | ||
| 205 | } else { | ||
| 206 | rb.Push<u8>(1); | ||
| 207 | } | ||
| 198 | } | 208 | } |
| 199 | void IsAnyInternetRequestAccepted(Kernel::HLERequestContext& ctx) { | 209 | void IsAnyInternetRequestAccepted(Kernel::HLERequestContext& ctx) { |
| 200 | LOG_WARNING(Service_NIFM, "(STUBBED) called"); | 210 | LOG_WARNING(Service_NIFM, "(STUBBED) called"); |
| 201 | 211 | ||
| 202 | IPC::ResponseBuilder rb{ctx, 3}; | 212 | IPC::ResponseBuilder rb{ctx, 3}; |
| 203 | rb.Push(RESULT_SUCCESS); | 213 | rb.Push(RESULT_SUCCESS); |
| 204 | rb.Push<u8>(1); | 214 | if (Settings::values.bcat_backend == "none") { |
| 215 | rb.Push<u8>(0); | ||
| 216 | } else { | ||
| 217 | rb.Push<u8>(1); | ||
| 218 | } | ||
| 205 | } | 219 | } |
| 206 | Core::System& system; | 220 | Core::System& system; |
| 207 | }; | 221 | }; |
diff --git a/src/core/hle/service/nvflinger/nvflinger.cpp b/src/core/hle/service/nvflinger/nvflinger.cpp index 52623cf89..62752e419 100644 --- a/src/core/hle/service/nvflinger/nvflinger.cpp +++ b/src/core/hle/service/nvflinger/nvflinger.cpp | |||
| @@ -88,6 +88,12 @@ std::optional<u64> NVFlinger::CreateLayer(u64 display_id) { | |||
| 88 | return layer_id; | 88 | return layer_id; |
| 89 | } | 89 | } |
| 90 | 90 | ||
| 91 | void NVFlinger::CloseLayer(u64 layer_id) { | ||
| 92 | for (auto& display : displays) { | ||
| 93 | display.CloseLayer(layer_id); | ||
| 94 | } | ||
| 95 | } | ||
| 96 | |||
| 91 | std::optional<u32> NVFlinger::FindBufferQueueId(u64 display_id, u64 layer_id) const { | 97 | std::optional<u32> NVFlinger::FindBufferQueueId(u64 display_id, u64 layer_id) const { |
| 92 | const auto* const layer = FindLayer(display_id, layer_id); | 98 | const auto* const layer = FindLayer(display_id, layer_id); |
| 93 | 99 | ||
| @@ -192,7 +198,7 @@ void NVFlinger::Compose() { | |||
| 192 | 198 | ||
| 193 | const auto& igbp_buffer = buffer->get().igbp_buffer; | 199 | const auto& igbp_buffer = buffer->get().igbp_buffer; |
| 194 | 200 | ||
| 195 | const auto& gpu = system.GPU(); | 201 | auto& gpu = system.GPU(); |
| 196 | const auto& multi_fence = buffer->get().multi_fence; | 202 | const auto& multi_fence = buffer->get().multi_fence; |
| 197 | for (u32 fence_id = 0; fence_id < multi_fence.num_fences; fence_id++) { | 203 | for (u32 fence_id = 0; fence_id < multi_fence.num_fences; fence_id++) { |
| 198 | const auto& fence = multi_fence.fences[fence_id]; | 204 | const auto& fence = multi_fence.fences[fence_id]; |
diff --git a/src/core/hle/service/nvflinger/nvflinger.h b/src/core/hle/service/nvflinger/nvflinger.h index e3cc14bdc..57a21f33b 100644 --- a/src/core/hle/service/nvflinger/nvflinger.h +++ b/src/core/hle/service/nvflinger/nvflinger.h | |||
| @@ -54,6 +54,9 @@ public: | |||
| 54 | /// If an invalid display ID is specified, then an empty optional is returned. | 54 | /// If an invalid display ID is specified, then an empty optional is returned. |
| 55 | std::optional<u64> CreateLayer(u64 display_id); | 55 | std::optional<u64> CreateLayer(u64 display_id); |
| 56 | 56 | ||
| 57 | /// Closes a layer on all displays for the given layer ID. | ||
| 58 | void CloseLayer(u64 layer_id); | ||
| 59 | |||
| 57 | /// Finds the buffer queue ID of the specified layer in the specified display. | 60 | /// Finds the buffer queue ID of the specified layer in the specified display. |
| 58 | /// | 61 | /// |
| 59 | /// If an invalid display ID or layer ID is provided, then an empty optional is returned. | 62 | /// If an invalid display ID or layer ID is provided, then an empty optional is returned. |
diff --git a/src/core/hle/service/vi/display/vi_display.cpp b/src/core/hle/service/vi/display/vi_display.cpp index cd18c1610..5a202ac81 100644 --- a/src/core/hle/service/vi/display/vi_display.cpp +++ b/src/core/hle/service/vi/display/vi_display.cpp | |||
| @@ -24,11 +24,11 @@ Display::Display(u64 id, std::string name, Core::System& system) : id{id}, name{ | |||
| 24 | Display::~Display() = default; | 24 | Display::~Display() = default; |
| 25 | 25 | ||
| 26 | Layer& Display::GetLayer(std::size_t index) { | 26 | Layer& Display::GetLayer(std::size_t index) { |
| 27 | return layers.at(index); | 27 | return *layers.at(index); |
| 28 | } | 28 | } |
| 29 | 29 | ||
| 30 | const Layer& Display::GetLayer(std::size_t index) const { | 30 | const Layer& Display::GetLayer(std::size_t index) const { |
| 31 | return layers.at(index); | 31 | return *layers.at(index); |
| 32 | } | 32 | } |
| 33 | 33 | ||
| 34 | std::shared_ptr<Kernel::ReadableEvent> Display::GetVSyncEvent() const { | 34 | std::shared_ptr<Kernel::ReadableEvent> Display::GetVSyncEvent() const { |
| @@ -43,29 +43,38 @@ void Display::CreateLayer(u64 id, NVFlinger::BufferQueue& buffer_queue) { | |||
| 43 | // TODO(Subv): Support more than 1 layer. | 43 | // TODO(Subv): Support more than 1 layer. |
| 44 | ASSERT_MSG(layers.empty(), "Only one layer is supported per display at the moment"); | 44 | ASSERT_MSG(layers.empty(), "Only one layer is supported per display at the moment"); |
| 45 | 45 | ||
| 46 | layers.emplace_back(id, buffer_queue); | 46 | layers.emplace_back(std::make_shared<Layer>(id, buffer_queue)); |
| 47 | } | ||
| 48 | |||
| 49 | void Display::CloseLayer(u64 id) { | ||
| 50 | layers.erase( | ||
| 51 | std::remove_if(layers.begin(), layers.end(), | ||
| 52 | [id](const std::shared_ptr<Layer>& layer) { return layer->GetID() == id; }), | ||
| 53 | layers.end()); | ||
| 47 | } | 54 | } |
| 48 | 55 | ||
| 49 | Layer* Display::FindLayer(u64 id) { | 56 | Layer* Display::FindLayer(u64 id) { |
| 50 | const auto itr = std::find_if(layers.begin(), layers.end(), | 57 | const auto itr = |
| 51 | [id](const VI::Layer& layer) { return layer.GetID() == id; }); | 58 | std::find_if(layers.begin(), layers.end(), |
| 59 | [id](const std::shared_ptr<Layer>& layer) { return layer->GetID() == id; }); | ||
| 52 | 60 | ||
| 53 | if (itr == layers.end()) { | 61 | if (itr == layers.end()) { |
| 54 | return nullptr; | 62 | return nullptr; |
| 55 | } | 63 | } |
| 56 | 64 | ||
| 57 | return &*itr; | 65 | return itr->get(); |
| 58 | } | 66 | } |
| 59 | 67 | ||
| 60 | const Layer* Display::FindLayer(u64 id) const { | 68 | const Layer* Display::FindLayer(u64 id) const { |
| 61 | const auto itr = std::find_if(layers.begin(), layers.end(), | 69 | const auto itr = |
| 62 | [id](const VI::Layer& layer) { return layer.GetID() == id; }); | 70 | std::find_if(layers.begin(), layers.end(), |
| 71 | [id](const std::shared_ptr<Layer>& layer) { return layer->GetID() == id; }); | ||
| 63 | 72 | ||
| 64 | if (itr == layers.end()) { | 73 | if (itr == layers.end()) { |
| 65 | return nullptr; | 74 | return nullptr; |
| 66 | } | 75 | } |
| 67 | 76 | ||
| 68 | return &*itr; | 77 | return itr->get(); |
| 69 | } | 78 | } |
| 70 | 79 | ||
| 71 | } // namespace Service::VI | 80 | } // namespace Service::VI |
diff --git a/src/core/hle/service/vi/display/vi_display.h b/src/core/hle/service/vi/display/vi_display.h index 8bb966a85..a3855d8cd 100644 --- a/src/core/hle/service/vi/display/vi_display.h +++ b/src/core/hle/service/vi/display/vi_display.h | |||
| @@ -4,6 +4,7 @@ | |||
| 4 | 4 | ||
| 5 | #pragma once | 5 | #pragma once |
| 6 | 6 | ||
| 7 | #include <memory> | ||
| 7 | #include <string> | 8 | #include <string> |
| 8 | #include <vector> | 9 | #include <vector> |
| 9 | 10 | ||
| @@ -69,6 +70,12 @@ public: | |||
| 69 | /// | 70 | /// |
| 70 | void CreateLayer(u64 id, NVFlinger::BufferQueue& buffer_queue); | 71 | void CreateLayer(u64 id, NVFlinger::BufferQueue& buffer_queue); |
| 71 | 72 | ||
| 73 | /// Closes and removes a layer from this display with the given ID. | ||
| 74 | /// | ||
| 75 | /// @param id The ID assigned to the layer to close. | ||
| 76 | /// | ||
| 77 | void CloseLayer(u64 id); | ||
| 78 | |||
| 72 | /// Attempts to find a layer with the given ID. | 79 | /// Attempts to find a layer with the given ID. |
| 73 | /// | 80 | /// |
| 74 | /// @param id The layer ID. | 81 | /// @param id The layer ID. |
| @@ -91,7 +98,7 @@ private: | |||
| 91 | u64 id; | 98 | u64 id; |
| 92 | std::string name; | 99 | std::string name; |
| 93 | 100 | ||
| 94 | std::vector<Layer> layers; | 101 | std::vector<std::shared_ptr<Layer>> layers; |
| 95 | Kernel::EventPair vsync_event; | 102 | Kernel::EventPair vsync_event; |
| 96 | }; | 103 | }; |
| 97 | 104 | ||
diff --git a/src/core/hle/service/vi/vi.cpp b/src/core/hle/service/vi/vi.cpp index 651c89dc0..519da74e0 100644 --- a/src/core/hle/service/vi/vi.cpp +++ b/src/core/hle/service/vi/vi.cpp | |||
| @@ -1066,6 +1066,18 @@ private: | |||
| 1066 | rb.Push<u64>(ctx.WriteBuffer(native_window.Serialize())); | 1066 | rb.Push<u64>(ctx.WriteBuffer(native_window.Serialize())); |
| 1067 | } | 1067 | } |
| 1068 | 1068 | ||
| 1069 | void CloseLayer(Kernel::HLERequestContext& ctx) { | ||
| 1070 | IPC::RequestParser rp{ctx}; | ||
| 1071 | const auto layer_id{rp.Pop<u64>()}; | ||
| 1072 | |||
| 1073 | LOG_DEBUG(Service_VI, "called. layer_id=0x{:016X}", layer_id); | ||
| 1074 | |||
| 1075 | nv_flinger->CloseLayer(layer_id); | ||
| 1076 | |||
| 1077 | IPC::ResponseBuilder rb{ctx, 2}; | ||
| 1078 | rb.Push(RESULT_SUCCESS); | ||
| 1079 | } | ||
| 1080 | |||
| 1069 | void CreateStrayLayer(Kernel::HLERequestContext& ctx) { | 1081 | void CreateStrayLayer(Kernel::HLERequestContext& ctx) { |
| 1070 | IPC::RequestParser rp{ctx}; | 1082 | IPC::RequestParser rp{ctx}; |
| 1071 | const u32 flags = rp.Pop<u32>(); | 1083 | const u32 flags = rp.Pop<u32>(); |
| @@ -1178,7 +1190,7 @@ IApplicationDisplayService::IApplicationDisplayService( | |||
| 1178 | {1101, &IApplicationDisplayService::SetDisplayEnabled, "SetDisplayEnabled"}, | 1190 | {1101, &IApplicationDisplayService::SetDisplayEnabled, "SetDisplayEnabled"}, |
| 1179 | {1102, &IApplicationDisplayService::GetDisplayResolution, "GetDisplayResolution"}, | 1191 | {1102, &IApplicationDisplayService::GetDisplayResolution, "GetDisplayResolution"}, |
| 1180 | {2020, &IApplicationDisplayService::OpenLayer, "OpenLayer"}, | 1192 | {2020, &IApplicationDisplayService::OpenLayer, "OpenLayer"}, |
| 1181 | {2021, nullptr, "CloseLayer"}, | 1193 | {2021, &IApplicationDisplayService::CloseLayer, "CloseLayer"}, |
| 1182 | {2030, &IApplicationDisplayService::CreateStrayLayer, "CreateStrayLayer"}, | 1194 | {2030, &IApplicationDisplayService::CreateStrayLayer, "CreateStrayLayer"}, |
| 1183 | {2031, &IApplicationDisplayService::DestroyStrayLayer, "DestroyStrayLayer"}, | 1195 | {2031, &IApplicationDisplayService::DestroyStrayLayer, "DestroyStrayLayer"}, |
| 1184 | {2101, &IApplicationDisplayService::SetLayerScalingMode, "SetLayerScalingMode"}, | 1196 | {2101, &IApplicationDisplayService::SetLayerScalingMode, "SetLayerScalingMode"}, |
diff --git a/src/core/loader/elf.cpp b/src/core/loader/elf.cpp index f1795fdd6..8908e5328 100644 --- a/src/core/loader/elf.cpp +++ b/src/core/loader/elf.cpp | |||
| @@ -335,7 +335,8 @@ Kernel::CodeSet ElfReader::LoadInto(VAddr vaddr) { | |||
| 335 | codeset_segment->addr = segment_addr; | 335 | codeset_segment->addr = segment_addr; |
| 336 | codeset_segment->size = aligned_size; | 336 | codeset_segment->size = aligned_size; |
| 337 | 337 | ||
| 338 | memcpy(&program_image[current_image_position], GetSegmentPtr(i), p->p_filesz); | 338 | std::memcpy(program_image.data() + current_image_position, GetSegmentPtr(i), |
| 339 | p->p_filesz); | ||
| 339 | current_image_position += aligned_size; | 340 | current_image_position += aligned_size; |
| 340 | } | 341 | } |
| 341 | } | 342 | } |
diff --git a/src/core/loader/kip.cpp b/src/core/loader/kip.cpp index 474b55cb1..092103abe 100644 --- a/src/core/loader/kip.cpp +++ b/src/core/loader/kip.cpp | |||
| @@ -2,6 +2,7 @@ | |||
| 2 | // Licensed under GPLv2 or any later version | 2 | // Licensed under GPLv2 or any later version |
| 3 | // Refer to the license.txt file included. | 3 | // Refer to the license.txt file included. |
| 4 | 4 | ||
| 5 | #include <cstring> | ||
| 5 | #include "core/file_sys/kernel_executable.h" | 6 | #include "core/file_sys/kernel_executable.h" |
| 6 | #include "core/file_sys/program_metadata.h" | 7 | #include "core/file_sys/program_metadata.h" |
| 7 | #include "core/gdbstub/gdbstub.h" | 8 | #include "core/gdbstub/gdbstub.h" |
| @@ -76,8 +77,8 @@ AppLoader::LoadResult AppLoader_KIP::Load(Kernel::Process& process) { | |||
| 76 | segment.addr = offset; | 77 | segment.addr = offset; |
| 77 | segment.offset = offset; | 78 | segment.offset = offset; |
| 78 | segment.size = PageAlignSize(static_cast<u32>(data.size())); | 79 | segment.size = PageAlignSize(static_cast<u32>(data.size())); |
| 79 | program_image.resize(offset); | 80 | program_image.resize(offset + data.size()); |
| 80 | program_image.insert(program_image.end(), data.begin(), data.end()); | 81 | std::memcpy(program_image.data() + offset, data.data(), data.size()); |
| 81 | }; | 82 | }; |
| 82 | 83 | ||
| 83 | load_segment(codeset.CodeSegment(), kip->GetTextSection(), kip->GetTextOffset()); | 84 | load_segment(codeset.CodeSegment(), kip->GetTextSection(), kip->GetTextOffset()); |
diff --git a/src/core/loader/nso.cpp b/src/core/loader/nso.cpp index f629892ae..515c5accb 100644 --- a/src/core/loader/nso.cpp +++ b/src/core/loader/nso.cpp | |||
| @@ -3,6 +3,7 @@ | |||
| 3 | // Refer to the license.txt file included. | 3 | // Refer to the license.txt file included. |
| 4 | 4 | ||
| 5 | #include <cinttypes> | 5 | #include <cinttypes> |
| 6 | #include <cstring> | ||
| 6 | #include <vector> | 7 | #include <vector> |
| 7 | 8 | ||
| 8 | #include "common/common_funcs.h" | 9 | #include "common/common_funcs.h" |
| @@ -96,8 +97,9 @@ std::optional<VAddr> AppLoader_NSO::LoadModule(Kernel::Process& process, | |||
| 96 | if (nso_header.IsSegmentCompressed(i)) { | 97 | if (nso_header.IsSegmentCompressed(i)) { |
| 97 | data = DecompressSegment(data, nso_header.segments[i]); | 98 | data = DecompressSegment(data, nso_header.segments[i]); |
| 98 | } | 99 | } |
| 99 | program_image.resize(nso_header.segments[i].location); | 100 | program_image.resize(nso_header.segments[i].location + data.size()); |
| 100 | program_image.insert(program_image.end(), data.begin(), data.end()); | 101 | std::memcpy(program_image.data() + nso_header.segments[i].location, data.data(), |
| 102 | data.size()); | ||
| 101 | codeset.segments[i].addr = nso_header.segments[i].location; | 103 | codeset.segments[i].addr = nso_header.segments[i].location; |
| 102 | codeset.segments[i].offset = nso_header.segments[i].location; | 104 | codeset.segments[i].offset = nso_header.segments[i].location; |
| 103 | codeset.segments[i].size = PageAlignSize(static_cast<u32>(data.size())); | 105 | codeset.segments[i].size = PageAlignSize(static_cast<u32>(data.size())); |
| @@ -139,12 +141,12 @@ std::optional<VAddr> AppLoader_NSO::LoadModule(Kernel::Process& process, | |||
| 139 | std::vector<u8> pi_header; | 141 | std::vector<u8> pi_header; |
| 140 | pi_header.insert(pi_header.begin(), reinterpret_cast<u8*>(&nso_header), | 142 | pi_header.insert(pi_header.begin(), reinterpret_cast<u8*>(&nso_header), |
| 141 | reinterpret_cast<u8*>(&nso_header) + sizeof(NSOHeader)); | 143 | reinterpret_cast<u8*>(&nso_header) + sizeof(NSOHeader)); |
| 142 | pi_header.insert(pi_header.begin() + sizeof(NSOHeader), program_image.begin(), | 144 | pi_header.insert(pi_header.begin() + sizeof(NSOHeader), program_image.data(), |
| 143 | program_image.end()); | 145 | program_image.data() + program_image.size()); |
| 144 | 146 | ||
| 145 | pi_header = pm->PatchNSO(pi_header, file.GetName()); | 147 | pi_header = pm->PatchNSO(pi_header, file.GetName()); |
| 146 | 148 | ||
| 147 | std::copy(pi_header.begin() + sizeof(NSOHeader), pi_header.end(), program_image.begin()); | 149 | std::copy(pi_header.begin() + sizeof(NSOHeader), pi_header.end(), program_image.data()); |
| 148 | } | 150 | } |
| 149 | 151 | ||
| 150 | // Apply cheats if they exist and the program has a valid title ID | 152 | // Apply cheats if they exist and the program has a valid title ID |
diff --git a/src/core/memory.cpp b/src/core/memory.cpp index 91bf07a92..f0888327f 100644 --- a/src/core/memory.cpp +++ b/src/core/memory.cpp | |||
| @@ -14,6 +14,7 @@ | |||
| 14 | #include "common/swap.h" | 14 | #include "common/swap.h" |
| 15 | #include "core/arm/arm_interface.h" | 15 | #include "core/arm/arm_interface.h" |
| 16 | #include "core/core.h" | 16 | #include "core/core.h" |
| 17 | #include "core/hle/kernel/physical_memory.h" | ||
| 17 | #include "core/hle/kernel/process.h" | 18 | #include "core/hle/kernel/process.h" |
| 18 | #include "core/hle/kernel/vm_manager.h" | 19 | #include "core/hle/kernel/vm_manager.h" |
| 19 | #include "core/memory.h" | 20 | #include "core/memory.h" |
| @@ -38,6 +39,11 @@ struct Memory::Impl { | |||
| 38 | system.ArmInterface(3).PageTableChanged(*current_page_table, address_space_width); | 39 | system.ArmInterface(3).PageTableChanged(*current_page_table, address_space_width); |
| 39 | } | 40 | } |
| 40 | 41 | ||
| 42 | void MapMemoryRegion(Common::PageTable& page_table, VAddr base, u64 size, | ||
| 43 | Kernel::PhysicalMemory& memory, VAddr offset) { | ||
| 44 | MapMemoryRegion(page_table, base, size, memory.data() + offset); | ||
| 45 | } | ||
| 46 | |||
| 41 | void MapMemoryRegion(Common::PageTable& page_table, VAddr base, u64 size, u8* target) { | 47 | void MapMemoryRegion(Common::PageTable& page_table, VAddr base, u64 size, u8* target) { |
| 42 | ASSERT_MSG((size & PAGE_MASK) == 0, "non-page aligned size: {:016X}", size); | 48 | ASSERT_MSG((size & PAGE_MASK) == 0, "non-page aligned size: {:016X}", size); |
| 43 | ASSERT_MSG((base & PAGE_MASK) == 0, "non-page aligned base: {:016X}", base); | 49 | ASSERT_MSG((base & PAGE_MASK) == 0, "non-page aligned base: {:016X}", base); |
| @@ -146,7 +152,7 @@ struct Memory::Impl { | |||
| 146 | u8* GetPointer(const VAddr vaddr) { | 152 | u8* GetPointer(const VAddr vaddr) { |
| 147 | u8* const page_pointer = current_page_table->pointers[vaddr >> PAGE_BITS]; | 153 | u8* const page_pointer = current_page_table->pointers[vaddr >> PAGE_BITS]; |
| 148 | if (page_pointer != nullptr) { | 154 | if (page_pointer != nullptr) { |
| 149 | return page_pointer + (vaddr & PAGE_MASK); | 155 | return page_pointer + vaddr; |
| 150 | } | 156 | } |
| 151 | 157 | ||
| 152 | if (current_page_table->attributes[vaddr >> PAGE_BITS] == | 158 | if (current_page_table->attributes[vaddr >> PAGE_BITS] == |
| @@ -229,7 +235,8 @@ struct Memory::Impl { | |||
| 229 | case Common::PageType::Memory: { | 235 | case Common::PageType::Memory: { |
| 230 | DEBUG_ASSERT(page_table.pointers[page_index]); | 236 | DEBUG_ASSERT(page_table.pointers[page_index]); |
| 231 | 237 | ||
| 232 | const u8* const src_ptr = page_table.pointers[page_index] + page_offset; | 238 | const u8* const src_ptr = |
| 239 | page_table.pointers[page_index] + page_offset + (page_index << PAGE_BITS); | ||
| 233 | std::memcpy(dest_buffer, src_ptr, copy_amount); | 240 | std::memcpy(dest_buffer, src_ptr, copy_amount); |
| 234 | break; | 241 | break; |
| 235 | } | 242 | } |
| @@ -276,7 +283,8 @@ struct Memory::Impl { | |||
| 276 | case Common::PageType::Memory: { | 283 | case Common::PageType::Memory: { |
| 277 | DEBUG_ASSERT(page_table.pointers[page_index]); | 284 | DEBUG_ASSERT(page_table.pointers[page_index]); |
| 278 | 285 | ||
| 279 | u8* const dest_ptr = page_table.pointers[page_index] + page_offset; | 286 | u8* const dest_ptr = |
| 287 | page_table.pointers[page_index] + page_offset + (page_index << PAGE_BITS); | ||
| 280 | std::memcpy(dest_ptr, src_buffer, copy_amount); | 288 | std::memcpy(dest_ptr, src_buffer, copy_amount); |
| 281 | break; | 289 | break; |
| 282 | } | 290 | } |
| @@ -322,7 +330,8 @@ struct Memory::Impl { | |||
| 322 | case Common::PageType::Memory: { | 330 | case Common::PageType::Memory: { |
| 323 | DEBUG_ASSERT(page_table.pointers[page_index]); | 331 | DEBUG_ASSERT(page_table.pointers[page_index]); |
| 324 | 332 | ||
| 325 | u8* dest_ptr = page_table.pointers[page_index] + page_offset; | 333 | u8* dest_ptr = |
| 334 | page_table.pointers[page_index] + page_offset + (page_index << PAGE_BITS); | ||
| 326 | std::memset(dest_ptr, 0, copy_amount); | 335 | std::memset(dest_ptr, 0, copy_amount); |
| 327 | break; | 336 | break; |
| 328 | } | 337 | } |
| @@ -368,7 +377,8 @@ struct Memory::Impl { | |||
| 368 | } | 377 | } |
| 369 | case Common::PageType::Memory: { | 378 | case Common::PageType::Memory: { |
| 370 | DEBUG_ASSERT(page_table.pointers[page_index]); | 379 | DEBUG_ASSERT(page_table.pointers[page_index]); |
| 371 | const u8* src_ptr = page_table.pointers[page_index] + page_offset; | 380 | const u8* src_ptr = |
| 381 | page_table.pointers[page_index] + page_offset + (page_index << PAGE_BITS); | ||
| 372 | WriteBlock(process, dest_addr, src_ptr, copy_amount); | 382 | WriteBlock(process, dest_addr, src_ptr, copy_amount); |
| 373 | break; | 383 | break; |
| 374 | } | 384 | } |
| @@ -446,7 +456,8 @@ struct Memory::Impl { | |||
| 446 | page_type = Common::PageType::Unmapped; | 456 | page_type = Common::PageType::Unmapped; |
| 447 | } else { | 457 | } else { |
| 448 | page_type = Common::PageType::Memory; | 458 | page_type = Common::PageType::Memory; |
| 449 | current_page_table->pointers[vaddr >> PAGE_BITS] = pointer; | 459 | current_page_table->pointers[vaddr >> PAGE_BITS] = |
| 460 | pointer - (vaddr & ~PAGE_MASK); | ||
| 450 | } | 461 | } |
| 451 | break; | 462 | break; |
| 452 | } | 463 | } |
| @@ -493,7 +504,9 @@ struct Memory::Impl { | |||
| 493 | memory); | 504 | memory); |
| 494 | } else { | 505 | } else { |
| 495 | while (base != end) { | 506 | while (base != end) { |
| 496 | page_table.pointers[base] = memory; | 507 | page_table.pointers[base] = memory - (base << PAGE_BITS); |
| 508 | ASSERT_MSG(page_table.pointers[base], | ||
| 509 | "memory mapping base yield a nullptr within the table"); | ||
| 497 | 510 | ||
| 498 | base += 1; | 511 | base += 1; |
| 499 | memory += PAGE_SIZE; | 512 | memory += PAGE_SIZE; |
| @@ -518,7 +531,7 @@ struct Memory::Impl { | |||
| 518 | if (page_pointer != nullptr) { | 531 | if (page_pointer != nullptr) { |
| 519 | // NOTE: Avoid adding any extra logic to this fast-path block | 532 | // NOTE: Avoid adding any extra logic to this fast-path block |
| 520 | T value; | 533 | T value; |
| 521 | std::memcpy(&value, &page_pointer[vaddr & PAGE_MASK], sizeof(T)); | 534 | std::memcpy(&value, &page_pointer[vaddr], sizeof(T)); |
| 522 | return value; | 535 | return value; |
| 523 | } | 536 | } |
| 524 | 537 | ||
| @@ -559,7 +572,7 @@ struct Memory::Impl { | |||
| 559 | u8* const page_pointer = current_page_table->pointers[vaddr >> PAGE_BITS]; | 572 | u8* const page_pointer = current_page_table->pointers[vaddr >> PAGE_BITS]; |
| 560 | if (page_pointer != nullptr) { | 573 | if (page_pointer != nullptr) { |
| 561 | // NOTE: Avoid adding any extra logic to this fast-path block | 574 | // NOTE: Avoid adding any extra logic to this fast-path block |
| 562 | std::memcpy(&page_pointer[vaddr & PAGE_MASK], &data, sizeof(T)); | 575 | std::memcpy(&page_pointer[vaddr], &data, sizeof(T)); |
| 563 | return; | 576 | return; |
| 564 | } | 577 | } |
| 565 | 578 | ||
| @@ -594,6 +607,11 @@ void Memory::SetCurrentPageTable(Kernel::Process& process) { | |||
| 594 | impl->SetCurrentPageTable(process); | 607 | impl->SetCurrentPageTable(process); |
| 595 | } | 608 | } |
| 596 | 609 | ||
| 610 | void Memory::MapMemoryRegion(Common::PageTable& page_table, VAddr base, u64 size, | ||
| 611 | Kernel::PhysicalMemory& memory, VAddr offset) { | ||
| 612 | impl->MapMemoryRegion(page_table, base, size, memory, offset); | ||
| 613 | } | ||
| 614 | |||
| 597 | void Memory::MapMemoryRegion(Common::PageTable& page_table, VAddr base, u64 size, u8* target) { | 615 | void Memory::MapMemoryRegion(Common::PageTable& page_table, VAddr base, u64 size, u8* target) { |
| 598 | impl->MapMemoryRegion(page_table, base, size, target); | 616 | impl->MapMemoryRegion(page_table, base, size, target); |
| 599 | } | 617 | } |
diff --git a/src/core/memory.h b/src/core/memory.h index 1428a6d60..8913a9da4 100644 --- a/src/core/memory.h +++ b/src/core/memory.h | |||
| @@ -19,8 +19,9 @@ class System; | |||
| 19 | } | 19 | } |
| 20 | 20 | ||
| 21 | namespace Kernel { | 21 | namespace Kernel { |
| 22 | class PhysicalMemory; | ||
| 22 | class Process; | 23 | class Process; |
| 23 | } | 24 | } // namespace Kernel |
| 24 | 25 | ||
| 25 | namespace Memory { | 26 | namespace Memory { |
| 26 | 27 | ||
| @@ -66,6 +67,19 @@ public: | |||
| 66 | void SetCurrentPageTable(Kernel::Process& process); | 67 | void SetCurrentPageTable(Kernel::Process& process); |
| 67 | 68 | ||
| 68 | /** | 69 | /** |
| 70 | * Maps an physical buffer onto a region of the emulated process address space. | ||
| 71 | * | ||
| 72 | * @param page_table The page table of the emulated process. | ||
| 73 | * @param base The address to start mapping at. Must be page-aligned. | ||
| 74 | * @param size The amount of bytes to map. Must be page-aligned. | ||
| 75 | * @param memory Physical buffer with the memory backing the mapping. Must be of length | ||
| 76 | * at least `size + offset`. | ||
| 77 | * @param offset The offset within the physical memory. Must be page-aligned. | ||
| 78 | */ | ||
| 79 | void MapMemoryRegion(Common::PageTable& page_table, VAddr base, u64 size, | ||
| 80 | Kernel::PhysicalMemory& memory, VAddr offset); | ||
| 81 | |||
| 82 | /** | ||
| 69 | * Maps an allocated buffer onto a region of the emulated process address space. | 83 | * Maps an allocated buffer onto a region of the emulated process address space. |
| 70 | * | 84 | * |
| 71 | * @param page_table The page table of the emulated process. | 85 | * @param page_table The page table of the emulated process. |
diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt index 65d7b9f93..12c46e86f 100644 --- a/src/video_core/CMakeLists.txt +++ b/src/video_core/CMakeLists.txt | |||
| @@ -153,14 +153,29 @@ if (ENABLE_VULKAN) | |||
| 153 | renderer_vulkan/fixed_pipeline_state.h | 153 | renderer_vulkan/fixed_pipeline_state.h |
| 154 | renderer_vulkan/maxwell_to_vk.cpp | 154 | renderer_vulkan/maxwell_to_vk.cpp |
| 155 | renderer_vulkan/maxwell_to_vk.h | 155 | renderer_vulkan/maxwell_to_vk.h |
| 156 | renderer_vulkan/renderer_vulkan.h | ||
| 156 | renderer_vulkan/vk_buffer_cache.cpp | 157 | renderer_vulkan/vk_buffer_cache.cpp |
| 157 | renderer_vulkan/vk_buffer_cache.h | 158 | renderer_vulkan/vk_buffer_cache.h |
| 159 | renderer_vulkan/vk_compute_pass.cpp | ||
| 160 | renderer_vulkan/vk_compute_pass.h | ||
| 161 | renderer_vulkan/vk_compute_pipeline.cpp | ||
| 162 | renderer_vulkan/vk_compute_pipeline.h | ||
| 163 | renderer_vulkan/vk_descriptor_pool.cpp | ||
| 164 | renderer_vulkan/vk_descriptor_pool.h | ||
| 158 | renderer_vulkan/vk_device.cpp | 165 | renderer_vulkan/vk_device.cpp |
| 159 | renderer_vulkan/vk_device.h | 166 | renderer_vulkan/vk_device.h |
| 167 | renderer_vulkan/vk_graphics_pipeline.cpp | ||
| 168 | renderer_vulkan/vk_graphics_pipeline.h | ||
| 160 | renderer_vulkan/vk_image.cpp | 169 | renderer_vulkan/vk_image.cpp |
| 161 | renderer_vulkan/vk_image.h | 170 | renderer_vulkan/vk_image.h |
| 162 | renderer_vulkan/vk_memory_manager.cpp | 171 | renderer_vulkan/vk_memory_manager.cpp |
| 163 | renderer_vulkan/vk_memory_manager.h | 172 | renderer_vulkan/vk_memory_manager.h |
| 173 | renderer_vulkan/vk_pipeline_cache.cpp | ||
| 174 | renderer_vulkan/vk_pipeline_cache.h | ||
| 175 | renderer_vulkan/vk_rasterizer.cpp | ||
| 176 | renderer_vulkan/vk_rasterizer.h | ||
| 177 | renderer_vulkan/vk_renderpass_cache.cpp | ||
| 178 | renderer_vulkan/vk_renderpass_cache.h | ||
| 164 | renderer_vulkan/vk_resource_manager.cpp | 179 | renderer_vulkan/vk_resource_manager.cpp |
| 165 | renderer_vulkan/vk_resource_manager.h | 180 | renderer_vulkan/vk_resource_manager.h |
| 166 | renderer_vulkan/vk_sampler_cache.cpp | 181 | renderer_vulkan/vk_sampler_cache.cpp |
| @@ -169,12 +184,19 @@ if (ENABLE_VULKAN) | |||
| 169 | renderer_vulkan/vk_scheduler.h | 184 | renderer_vulkan/vk_scheduler.h |
| 170 | renderer_vulkan/vk_shader_decompiler.cpp | 185 | renderer_vulkan/vk_shader_decompiler.cpp |
| 171 | renderer_vulkan/vk_shader_decompiler.h | 186 | renderer_vulkan/vk_shader_decompiler.h |
| 187 | renderer_vulkan/vk_shader_util.cpp | ||
| 188 | renderer_vulkan/vk_shader_util.h | ||
| 172 | renderer_vulkan/vk_staging_buffer_pool.cpp | 189 | renderer_vulkan/vk_staging_buffer_pool.cpp |
| 173 | renderer_vulkan/vk_staging_buffer_pool.h | 190 | renderer_vulkan/vk_staging_buffer_pool.h |
| 174 | renderer_vulkan/vk_stream_buffer.cpp | 191 | renderer_vulkan/vk_stream_buffer.cpp |
| 175 | renderer_vulkan/vk_stream_buffer.h | 192 | renderer_vulkan/vk_stream_buffer.h |
| 176 | renderer_vulkan/vk_swapchain.cpp | 193 | renderer_vulkan/vk_swapchain.cpp |
| 177 | renderer_vulkan/vk_swapchain.h) | 194 | renderer_vulkan/vk_swapchain.h |
| 195 | renderer_vulkan/vk_texture_cache.cpp | ||
| 196 | renderer_vulkan/vk_texture_cache.h | ||
| 197 | renderer_vulkan/vk_update_descriptor.cpp | ||
| 198 | renderer_vulkan/vk_update_descriptor.h | ||
| 199 | ) | ||
| 178 | 200 | ||
| 179 | target_include_directories(video_core PRIVATE sirit ../../externals/Vulkan-Headers/include) | 201 | target_include_directories(video_core PRIVATE sirit ../../externals/Vulkan-Headers/include) |
| 180 | target_compile_definitions(video_core PRIVATE HAS_VULKAN) | 202 | target_compile_definitions(video_core PRIVATE HAS_VULKAN) |
diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h index a35e7a195..ee79260fc 100644 --- a/src/video_core/engines/maxwell_3d.h +++ b/src/video_core/engines/maxwell_3d.h | |||
| @@ -1018,7 +1018,14 @@ public: | |||
| 1018 | } | 1018 | } |
| 1019 | } instanced_arrays; | 1019 | } instanced_arrays; |
| 1020 | 1020 | ||
| 1021 | INSERT_UNION_PADDING_WORDS(0x6); | 1021 | INSERT_UNION_PADDING_WORDS(0x4); |
| 1022 | |||
| 1023 | union { | ||
| 1024 | BitField<0, 1, u32> enable; | ||
| 1025 | BitField<4, 8, u32> unk4; | ||
| 1026 | } vp_point_size; | ||
| 1027 | |||
| 1028 | INSERT_UNION_PADDING_WORDS(1); | ||
| 1022 | 1029 | ||
| 1023 | Cull cull; | 1030 | Cull cull; |
| 1024 | 1031 | ||
| @@ -1271,8 +1278,6 @@ public: | |||
| 1271 | 1278 | ||
| 1272 | } dirty{}; | 1279 | } dirty{}; |
| 1273 | 1280 | ||
| 1274 | std::array<u8, Regs::NUM_REGS> dirty_pointers{}; | ||
| 1275 | |||
| 1276 | /// Reads a register value located at the input method address | 1281 | /// Reads a register value located at the input method address |
| 1277 | u32 GetRegisterValue(u32 method) const; | 1282 | u32 GetRegisterValue(u32 method) const; |
| 1278 | 1283 | ||
| @@ -1367,6 +1372,8 @@ private: | |||
| 1367 | 1372 | ||
| 1368 | bool execute_on{true}; | 1373 | bool execute_on{true}; |
| 1369 | 1374 | ||
| 1375 | std::array<u8, Regs::NUM_REGS> dirty_pointers{}; | ||
| 1376 | |||
| 1370 | /// Retrieves information about a specific TIC entry from the TIC buffer. | 1377 | /// Retrieves information about a specific TIC entry from the TIC buffer. |
| 1371 | Texture::TICEntry GetTICEntry(u32 tic_index) const; | 1378 | Texture::TICEntry GetTICEntry(u32 tic_index) const; |
| 1372 | 1379 | ||
| @@ -1503,6 +1510,7 @@ ASSERT_REG_POSITION(primitive_restart, 0x591); | |||
| 1503 | ASSERT_REG_POSITION(index_array, 0x5F2); | 1510 | ASSERT_REG_POSITION(index_array, 0x5F2); |
| 1504 | ASSERT_REG_POSITION(polygon_offset_clamp, 0x61F); | 1511 | ASSERT_REG_POSITION(polygon_offset_clamp, 0x61F); |
| 1505 | ASSERT_REG_POSITION(instanced_arrays, 0x620); | 1512 | ASSERT_REG_POSITION(instanced_arrays, 0x620); |
| 1513 | ASSERT_REG_POSITION(vp_point_size, 0x644); | ||
| 1506 | ASSERT_REG_POSITION(cull, 0x646); | 1514 | ASSERT_REG_POSITION(cull, 0x646); |
| 1507 | ASSERT_REG_POSITION(pixel_center_integer, 0x649); | 1515 | ASSERT_REG_POSITION(pixel_center_integer, 0x649); |
| 1508 | ASSERT_REG_POSITION(viewport_transform_enabled, 0x64B); | 1516 | ASSERT_REG_POSITION(viewport_transform_enabled, 0x64B); |
diff --git a/src/video_core/engines/shader_bytecode.h b/src/video_core/engines/shader_bytecode.h index 57b57c647..6f98bd827 100644 --- a/src/video_core/engines/shader_bytecode.h +++ b/src/video_core/engines/shader_bytecode.h | |||
| @@ -215,6 +215,18 @@ enum class F2fRoundingOp : u64 { | |||
| 215 | Trunc = 11, | 215 | Trunc = 11, |
| 216 | }; | 216 | }; |
| 217 | 217 | ||
| 218 | enum class AtomicOp : u64 { | ||
| 219 | Add = 0, | ||
| 220 | Min = 1, | ||
| 221 | Max = 2, | ||
| 222 | Inc = 3, | ||
| 223 | Dec = 4, | ||
| 224 | And = 5, | ||
| 225 | Or = 6, | ||
| 226 | Xor = 7, | ||
| 227 | Exch = 8, | ||
| 228 | }; | ||
| 229 | |||
| 218 | enum class UniformType : u64 { | 230 | enum class UniformType : u64 { |
| 219 | UnsignedByte = 0, | 231 | UnsignedByte = 0, |
| 220 | SignedByte = 1, | 232 | SignedByte = 1, |
| @@ -236,6 +248,13 @@ enum class StoreType : u64 { | |||
| 236 | Bits128 = 6, | 248 | Bits128 = 6, |
| 237 | }; | 249 | }; |
| 238 | 250 | ||
| 251 | enum class AtomicType : u64 { | ||
| 252 | U32 = 0, | ||
| 253 | S32 = 1, | ||
| 254 | U64 = 2, | ||
| 255 | S64 = 3, | ||
| 256 | }; | ||
| 257 | |||
| 239 | enum class IMinMaxExchange : u64 { | 258 | enum class IMinMaxExchange : u64 { |
| 240 | None = 0, | 259 | None = 0, |
| 241 | XLo = 1, | 260 | XLo = 1, |
| @@ -939,6 +958,16 @@ union Instruction { | |||
| 939 | } stg; | 958 | } stg; |
| 940 | 959 | ||
| 941 | union { | 960 | union { |
| 961 | BitField<52, 4, AtomicOp> operation; | ||
| 962 | BitField<28, 2, AtomicType> type; | ||
| 963 | BitField<30, 22, s64> offset; | ||
| 964 | |||
| 965 | s32 GetImmediateOffset() const { | ||
| 966 | return static_cast<s32>(offset << 2); | ||
| 967 | } | ||
| 968 | } atoms; | ||
| 969 | |||
| 970 | union { | ||
| 942 | BitField<32, 1, PhysicalAttributeDirection> direction; | 971 | BitField<32, 1, PhysicalAttributeDirection> direction; |
| 943 | BitField<47, 3, AttributeSize> size; | 972 | BitField<47, 3, AttributeSize> size; |
| 944 | BitField<20, 11, u64> address; | 973 | BitField<20, 11, u64> address; |
| @@ -1659,9 +1688,10 @@ public: | |||
| 1659 | ST_A, | 1688 | ST_A, |
| 1660 | ST_L, | 1689 | ST_L, |
| 1661 | ST_S, | 1690 | ST_S, |
| 1662 | ST, // Store in generic memory | 1691 | ST, // Store in generic memory |
| 1663 | STG, // Store in global memory | 1692 | STG, // Store in global memory |
| 1664 | AL2P, // Transforms attribute memory into physical memory | 1693 | ATOMS, // Atomic operation on shared memory |
| 1694 | AL2P, // Transforms attribute memory into physical memory | ||
| 1665 | TEX, | 1695 | TEX, |
| 1666 | TEX_B, // Texture Load Bindless | 1696 | TEX_B, // Texture Load Bindless |
| 1667 | TXQ, // Texture Query | 1697 | TXQ, // Texture Query |
| @@ -1964,6 +1994,7 @@ private: | |||
| 1964 | INST("1110111101010---", Id::ST_L, Type::Memory, "ST_L"), | 1994 | INST("1110111101010---", Id::ST_L, Type::Memory, "ST_L"), |
| 1965 | INST("101-------------", Id::ST, Type::Memory, "ST"), | 1995 | INST("101-------------", Id::ST, Type::Memory, "ST"), |
| 1966 | INST("1110111011011---", Id::STG, Type::Memory, "STG"), | 1996 | INST("1110111011011---", Id::STG, Type::Memory, "STG"), |
| 1997 | INST("11101100--------", Id::ATOMS, Type::Memory, "ATOMS"), | ||
| 1967 | INST("1110111110100---", Id::AL2P, Type::Memory, "AL2P"), | 1998 | INST("1110111110100---", Id::AL2P, Type::Memory, "AL2P"), |
| 1968 | INST("110000----111---", Id::TEX, Type::Texture, "TEX"), | 1999 | INST("110000----111---", Id::TEX, Type::Texture, "TEX"), |
| 1969 | INST("1101111010111---", Id::TEX_B, Type::Texture, "TEX_B"), | 2000 | INST("1101111010111---", Id::TEX_B, Type::Texture, "TEX_B"), |
diff --git a/src/video_core/gpu.cpp b/src/video_core/gpu.cpp index 095660115..b9c5c41a2 100644 --- a/src/video_core/gpu.cpp +++ b/src/video_core/gpu.cpp | |||
| @@ -66,19 +66,20 @@ const DmaPusher& GPU::DmaPusher() const { | |||
| 66 | return *dma_pusher; | 66 | return *dma_pusher; |
| 67 | } | 67 | } |
| 68 | 68 | ||
| 69 | void GPU::WaitFence(u32 syncpoint_id, u32 value) const { | 69 | void GPU::WaitFence(u32 syncpoint_id, u32 value) { |
| 70 | // Synced GPU, is always in sync | 70 | // Synced GPU, is always in sync |
| 71 | if (!is_async) { | 71 | if (!is_async) { |
| 72 | return; | 72 | return; |
| 73 | } | 73 | } |
| 74 | MICROPROFILE_SCOPE(GPU_wait); | 74 | MICROPROFILE_SCOPE(GPU_wait); |
| 75 | while (syncpoints[syncpoint_id].load(std::memory_order_relaxed) < value) { | 75 | std::unique_lock lock{sync_mutex}; |
| 76 | } | 76 | sync_cv.wait(lock, [=]() { return syncpoints[syncpoint_id].load() >= value; }); |
| 77 | } | 77 | } |
| 78 | 78 | ||
| 79 | void GPU::IncrementSyncPoint(const u32 syncpoint_id) { | 79 | void GPU::IncrementSyncPoint(const u32 syncpoint_id) { |
| 80 | syncpoints[syncpoint_id]++; | 80 | syncpoints[syncpoint_id]++; |
| 81 | std::lock_guard lock{sync_mutex}; | 81 | std::lock_guard lock{sync_mutex}; |
| 82 | sync_cv.notify_all(); | ||
| 82 | if (!syncpt_interrupts[syncpoint_id].empty()) { | 83 | if (!syncpt_interrupts[syncpoint_id].empty()) { |
| 83 | u32 value = syncpoints[syncpoint_id].load(); | 84 | u32 value = syncpoints[syncpoint_id].load(); |
| 84 | auto it = syncpt_interrupts[syncpoint_id].begin(); | 85 | auto it = syncpt_interrupts[syncpoint_id].begin(); |
diff --git a/src/video_core/gpu.h b/src/video_core/gpu.h index ecc338ae9..b648317bb 100644 --- a/src/video_core/gpu.h +++ b/src/video_core/gpu.h | |||
| @@ -6,6 +6,7 @@ | |||
| 6 | 6 | ||
| 7 | #include <array> | 7 | #include <array> |
| 8 | #include <atomic> | 8 | #include <atomic> |
| 9 | #include <condition_variable> | ||
| 9 | #include <list> | 10 | #include <list> |
| 10 | #include <memory> | 11 | #include <memory> |
| 11 | #include <mutex> | 12 | #include <mutex> |
| @@ -181,7 +182,7 @@ public: | |||
| 181 | virtual void WaitIdle() const = 0; | 182 | virtual void WaitIdle() const = 0; |
| 182 | 183 | ||
| 183 | /// Allows the CPU/NvFlinger to wait on the GPU before presenting a frame. | 184 | /// Allows the CPU/NvFlinger to wait on the GPU before presenting a frame. |
| 184 | void WaitFence(u32 syncpoint_id, u32 value) const; | 185 | void WaitFence(u32 syncpoint_id, u32 value); |
| 185 | 186 | ||
| 186 | void IncrementSyncPoint(u32 syncpoint_id); | 187 | void IncrementSyncPoint(u32 syncpoint_id); |
| 187 | 188 | ||
| @@ -312,6 +313,8 @@ private: | |||
| 312 | 313 | ||
| 313 | std::mutex sync_mutex; | 314 | std::mutex sync_mutex; |
| 314 | 315 | ||
| 316 | std::condition_variable sync_cv; | ||
| 317 | |||
| 315 | const bool is_async; | 318 | const bool is_async; |
| 316 | }; | 319 | }; |
| 317 | 320 | ||
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index 672051102..c428f06e4 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp | |||
| @@ -1272,6 +1272,7 @@ void RasterizerOpenGL::SyncPointState() { | |||
| 1272 | const auto& regs = system.GPU().Maxwell3D().regs; | 1272 | const auto& regs = system.GPU().Maxwell3D().regs; |
| 1273 | // Limit the point size to 1 since nouveau sometimes sets a point size of 0 (and that's invalid | 1273 | // Limit the point size to 1 since nouveau sometimes sets a point size of 0 (and that's invalid |
| 1274 | // in OpenGL). | 1274 | // in OpenGL). |
| 1275 | state.point.program_control = regs.vp_point_size.enable != 0; | ||
| 1275 | state.point.size = std::max(1.0f, regs.point_size); | 1276 | state.point.size = std::max(1.0f, regs.point_size); |
| 1276 | } | 1277 | } |
| 1277 | 1278 | ||
diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index de742d11c..a4acb3796 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp | |||
| @@ -34,9 +34,6 @@ using VideoCommon::Shader::ShaderIR; | |||
| 34 | 34 | ||
| 35 | namespace { | 35 | namespace { |
| 36 | 36 | ||
| 37 | // One UBO is always reserved for emulation values on staged shaders | ||
| 38 | constexpr u32 STAGE_RESERVED_UBOS = 1; | ||
| 39 | |||
| 40 | constexpr u32 STAGE_MAIN_OFFSET = 10; | 37 | constexpr u32 STAGE_MAIN_OFFSET = 10; |
| 41 | constexpr u32 KERNEL_MAIN_OFFSET = 0; | 38 | constexpr u32 KERNEL_MAIN_OFFSET = 0; |
| 42 | 39 | ||
| @@ -243,7 +240,6 @@ CachedProgram BuildShader(const Device& device, u64 unique_identifier, ShaderTyp | |||
| 243 | if (!code_b.empty()) { | 240 | if (!code_b.empty()) { |
| 244 | ir_b.emplace(code_b, main_offset, COMPILER_SETTINGS, locker); | 241 | ir_b.emplace(code_b, main_offset, COMPILER_SETTINGS, locker); |
| 245 | } | 242 | } |
| 246 | const auto entries = GLShader::GetEntries(ir); | ||
| 247 | 243 | ||
| 248 | std::string source = fmt::format(R"(// {} | 244 | std::string source = fmt::format(R"(// {} |
| 249 | #version 430 core | 245 | #version 430 core |
| @@ -314,9 +310,10 @@ std::unordered_set<GLenum> GetSupportedFormats() { | |||
| 314 | 310 | ||
| 315 | CachedShader::CachedShader(const ShaderParameters& params, ShaderType shader_type, | 311 | CachedShader::CachedShader(const ShaderParameters& params, ShaderType shader_type, |
| 316 | GLShader::ShaderEntries entries, ProgramCode code, ProgramCode code_b) | 312 | GLShader::ShaderEntries entries, ProgramCode code, ProgramCode code_b) |
| 317 | : RasterizerCacheObject{params.host_ptr}, system{params.system}, disk_cache{params.disk_cache}, | 313 | : RasterizerCacheObject{params.host_ptr}, system{params.system}, |
| 318 | device{params.device}, cpu_addr{params.cpu_addr}, unique_identifier{params.unique_identifier}, | 314 | disk_cache{params.disk_cache}, device{params.device}, cpu_addr{params.cpu_addr}, |
| 319 | shader_type{shader_type}, entries{entries}, code{std::move(code)}, code_b{std::move(code_b)} { | 315 | unique_identifier{params.unique_identifier}, shader_type{shader_type}, |
| 316 | entries{std::move(entries)}, code{std::move(code)}, code_b{std::move(code_b)} { | ||
| 320 | if (!params.precompiled_variants) { | 317 | if (!params.precompiled_variants) { |
| 321 | return; | 318 | return; |
| 322 | } | 319 | } |
diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp index a311dbcfe..2996aaf08 100644 --- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp +++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp | |||
| @@ -751,6 +751,9 @@ private: | |||
| 751 | 751 | ||
| 752 | Expression Visit(const Node& node) { | 752 | Expression Visit(const Node& node) { |
| 753 | if (const auto operation = std::get_if<OperationNode>(&*node)) { | 753 | if (const auto operation = std::get_if<OperationNode>(&*node)) { |
| 754 | if (const auto amend_index = operation->GetAmendIndex()) { | ||
| 755 | Visit(ir.GetAmendNode(*amend_index)).CheckVoid(); | ||
| 756 | } | ||
| 754 | const auto operation_index = static_cast<std::size_t>(operation->GetCode()); | 757 | const auto operation_index = static_cast<std::size_t>(operation->GetCode()); |
| 755 | if (operation_index >= operation_decompilers.size()) { | 758 | if (operation_index >= operation_decompilers.size()) { |
| 756 | UNREACHABLE_MSG("Out of bounds operation: {}", operation_index); | 759 | UNREACHABLE_MSG("Out of bounds operation: {}", operation_index); |
| @@ -872,6 +875,9 @@ private: | |||
| 872 | } | 875 | } |
| 873 | 876 | ||
| 874 | if (const auto conditional = std::get_if<ConditionalNode>(&*node)) { | 877 | if (const auto conditional = std::get_if<ConditionalNode>(&*node)) { |
| 878 | if (const auto amend_index = conditional->GetAmendIndex()) { | ||
| 879 | Visit(ir.GetAmendNode(*amend_index)).CheckVoid(); | ||
| 880 | } | ||
| 875 | // It's invalid to call conditional on nested nodes, use an operation instead | 881 | // It's invalid to call conditional on nested nodes, use an operation instead |
| 876 | code.AddLine("if ({}) {{", Visit(conditional->GetCondition()).AsBool()); | 882 | code.AddLine("if ({}) {{", Visit(conditional->GetCondition()).AsBool()); |
| 877 | ++code.scope; | 883 | ++code.scope; |
| @@ -1850,6 +1856,16 @@ private: | |||
| 1850 | Type::Uint}; | 1856 | Type::Uint}; |
| 1851 | } | 1857 | } |
| 1852 | 1858 | ||
| 1859 | template <const std::string_view& opname, Type type> | ||
| 1860 | Expression Atomic(Operation operation) { | ||
| 1861 | ASSERT(stage == ShaderType::Compute); | ||
| 1862 | auto& smem = std::get<SmemNode>(*operation[0]); | ||
| 1863 | |||
| 1864 | return {fmt::format("atomic{}(smem[{} >> 2], {})", opname, Visit(smem.GetAddress()).AsInt(), | ||
| 1865 | Visit(operation[1]).As(type)), | ||
| 1866 | type}; | ||
| 1867 | } | ||
| 1868 | |||
| 1853 | Expression Branch(Operation operation) { | 1869 | Expression Branch(Operation operation) { |
| 1854 | const auto target = std::get_if<ImmediateNode>(&*operation[0]); | 1870 | const auto target = std::get_if<ImmediateNode>(&*operation[0]); |
| 1855 | UNIMPLEMENTED_IF(!target); | 1871 | UNIMPLEMENTED_IF(!target); |
| @@ -2188,6 +2204,8 @@ private: | |||
| 2188 | &GLSLDecompiler::AtomicImage<Func::Xor>, | 2204 | &GLSLDecompiler::AtomicImage<Func::Xor>, |
| 2189 | &GLSLDecompiler::AtomicImage<Func::Exchange>, | 2205 | &GLSLDecompiler::AtomicImage<Func::Exchange>, |
| 2190 | 2206 | ||
| 2207 | &GLSLDecompiler::Atomic<Func::Add, Type::Uint>, | ||
| 2208 | |||
| 2191 | &GLSLDecompiler::Branch, | 2209 | &GLSLDecompiler::Branch, |
| 2192 | &GLSLDecompiler::BranchIndirect, | 2210 | &GLSLDecompiler::BranchIndirect, |
| 2193 | &GLSLDecompiler::PushFlowStack, | 2211 | &GLSLDecompiler::PushFlowStack, |
| @@ -2307,7 +2325,7 @@ public: | |||
| 2307 | explicit ExprDecompiler(GLSLDecompiler& decomp) : decomp{decomp} {} | 2325 | explicit ExprDecompiler(GLSLDecompiler& decomp) : decomp{decomp} {} |
| 2308 | 2326 | ||
| 2309 | void operator()(const ExprAnd& expr) { | 2327 | void operator()(const ExprAnd& expr) { |
| 2310 | inner += "( "; | 2328 | inner += '('; |
| 2311 | std::visit(*this, *expr.operand1); | 2329 | std::visit(*this, *expr.operand1); |
| 2312 | inner += " && "; | 2330 | inner += " && "; |
| 2313 | std::visit(*this, *expr.operand2); | 2331 | std::visit(*this, *expr.operand2); |
| @@ -2315,7 +2333,7 @@ public: | |||
| 2315 | } | 2333 | } |
| 2316 | 2334 | ||
| 2317 | void operator()(const ExprOr& expr) { | 2335 | void operator()(const ExprOr& expr) { |
| 2318 | inner += "( "; | 2336 | inner += '('; |
| 2319 | std::visit(*this, *expr.operand1); | 2337 | std::visit(*this, *expr.operand1); |
| 2320 | inner += " || "; | 2338 | inner += " || "; |
| 2321 | std::visit(*this, *expr.operand2); | 2339 | std::visit(*this, *expr.operand2); |
| @@ -2333,28 +2351,7 @@ public: | |||
| 2333 | } | 2351 | } |
| 2334 | 2352 | ||
| 2335 | void operator()(const ExprCondCode& expr) { | 2353 | void operator()(const ExprCondCode& expr) { |
| 2336 | const Node cc = decomp.ir.GetConditionCode(expr.cc); | 2354 | inner += decomp.Visit(decomp.ir.GetConditionCode(expr.cc)).AsBool(); |
| 2337 | std::string target; | ||
| 2338 | |||
| 2339 | if (const auto pred = std::get_if<PredicateNode>(&*cc)) { | ||
| 2340 | const auto index = pred->GetIndex(); | ||
| 2341 | switch (index) { | ||
| 2342 | case Tegra::Shader::Pred::NeverExecute: | ||
| 2343 | target = "false"; | ||
| 2344 | break; | ||
| 2345 | case Tegra::Shader::Pred::UnusedIndex: | ||
| 2346 | target = "true"; | ||
| 2347 | break; | ||
| 2348 | default: | ||
| 2349 | target = decomp.GetPredicate(index); | ||
| 2350 | break; | ||
| 2351 | } | ||
| 2352 | } else if (const auto flag = std::get_if<InternalFlagNode>(&*cc)) { | ||
| 2353 | target = decomp.GetInternalFlag(flag->GetFlag()); | ||
| 2354 | } else { | ||
| 2355 | UNREACHABLE(); | ||
| 2356 | } | ||
| 2357 | inner += target; | ||
| 2358 | } | 2355 | } |
| 2359 | 2356 | ||
| 2360 | void operator()(const ExprVar& expr) { | 2357 | void operator()(const ExprVar& expr) { |
| @@ -2366,8 +2363,7 @@ public: | |||
| 2366 | } | 2363 | } |
| 2367 | 2364 | ||
| 2368 | void operator()(VideoCommon::Shader::ExprGprEqual& expr) { | 2365 | void operator()(VideoCommon::Shader::ExprGprEqual& expr) { |
| 2369 | inner += | 2366 | inner += fmt::format("(ftou({}) == {})", decomp.GetRegister(expr.gpr), expr.value); |
| 2370 | "( ftou(" + decomp.GetRegister(expr.gpr) + ") == " + std::to_string(expr.value) + ')'; | ||
| 2371 | } | 2367 | } |
| 2372 | 2368 | ||
| 2373 | const std::string& GetResult() const { | 2369 | const std::string& GetResult() const { |
| @@ -2375,8 +2371,8 @@ public: | |||
| 2375 | } | 2371 | } |
| 2376 | 2372 | ||
| 2377 | private: | 2373 | private: |
| 2378 | std::string inner; | ||
| 2379 | GLSLDecompiler& decomp; | 2374 | GLSLDecompiler& decomp; |
| 2375 | std::string inner; | ||
| 2380 | }; | 2376 | }; |
| 2381 | 2377 | ||
| 2382 | class ASTDecompiler { | 2378 | class ASTDecompiler { |
diff --git a/src/video_core/renderer_opengl/gl_state.cpp b/src/video_core/renderer_opengl/gl_state.cpp index df2e2395a..cc185e9e1 100644 --- a/src/video_core/renderer_opengl/gl_state.cpp +++ b/src/video_core/renderer_opengl/gl_state.cpp | |||
| @@ -127,6 +127,7 @@ void OpenGLState::ApplyClipDistances() { | |||
| 127 | } | 127 | } |
| 128 | 128 | ||
| 129 | void OpenGLState::ApplyPointSize() { | 129 | void OpenGLState::ApplyPointSize() { |
| 130 | Enable(GL_PROGRAM_POINT_SIZE, cur_state.point.program_control, point.program_control); | ||
| 130 | if (UpdateValue(cur_state.point.size, point.size)) { | 131 | if (UpdateValue(cur_state.point.size, point.size)) { |
| 131 | glPointSize(point.size); | 132 | glPointSize(point.size); |
| 132 | } | 133 | } |
diff --git a/src/video_core/renderer_opengl/gl_state.h b/src/video_core/renderer_opengl/gl_state.h index fb180f302..678e5cd89 100644 --- a/src/video_core/renderer_opengl/gl_state.h +++ b/src/video_core/renderer_opengl/gl_state.h | |||
| @@ -131,7 +131,8 @@ public: | |||
| 131 | std::array<Viewport, Tegra::Engines::Maxwell3D::Regs::NumViewports> viewports; | 131 | std::array<Viewport, Tegra::Engines::Maxwell3D::Regs::NumViewports> viewports; |
| 132 | 132 | ||
| 133 | struct { | 133 | struct { |
| 134 | float size = 1.0f; // GL_POINT_SIZE | 134 | bool program_control = false; // GL_PROGRAM_POINT_SIZE |
| 135 | GLfloat size = 1.0f; // GL_POINT_SIZE | ||
| 135 | } point; | 136 | } point; |
| 136 | 137 | ||
| 137 | struct { | 138 | struct { |
diff --git a/src/video_core/renderer_opengl/gl_texture_cache.cpp b/src/video_core/renderer_opengl/gl_texture_cache.cpp index b790b0ef4..e95eb069e 100644 --- a/src/video_core/renderer_opengl/gl_texture_cache.cpp +++ b/src/video_core/renderer_opengl/gl_texture_cache.cpp | |||
| @@ -44,7 +44,7 @@ struct FormatTuple { | |||
| 44 | 44 | ||
| 45 | constexpr std::array<FormatTuple, VideoCore::Surface::MaxPixelFormat> tex_format_tuples = {{ | 45 | constexpr std::array<FormatTuple, VideoCore::Surface::MaxPixelFormat> tex_format_tuples = {{ |
| 46 | {GL_RGBA8, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV, false}, // ABGR8U | 46 | {GL_RGBA8, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV, false}, // ABGR8U |
| 47 | {GL_RGBA8, GL_RGBA, GL_BYTE, false}, // ABGR8S | 47 | {GL_RGBA8_SNORM, GL_RGBA, GL_BYTE, false}, // ABGR8S |
| 48 | {GL_RGBA8UI, GL_RGBA_INTEGER, GL_UNSIGNED_BYTE, false}, // ABGR8UI | 48 | {GL_RGBA8UI, GL_RGBA_INTEGER, GL_UNSIGNED_BYTE, false}, // ABGR8UI |
| 49 | {GL_RGB565, GL_RGB, GL_UNSIGNED_SHORT_5_6_5_REV, false}, // B5G6R5U | 49 | {GL_RGB565, GL_RGB, GL_UNSIGNED_SHORT_5_6_5_REV, false}, // B5G6R5U |
| 50 | {GL_RGB10_A2, GL_RGBA, GL_UNSIGNED_INT_2_10_10_10_REV, false}, // A2B10G10R10U | 50 | {GL_RGB10_A2, GL_RGBA, GL_UNSIGNED_INT_2_10_10_10_REV, false}, // A2B10G10R10U |
| @@ -83,9 +83,9 @@ constexpr std::array<FormatTuple, VideoCore::Surface::MaxPixelFormat> tex_format | |||
| 83 | {GL_RGB32F, GL_RGB, GL_FLOAT, false}, // RGB32F | 83 | {GL_RGB32F, GL_RGB, GL_FLOAT, false}, // RGB32F |
| 84 | {GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV, false}, // RGBA8_SRGB | 84 | {GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV, false}, // RGBA8_SRGB |
| 85 | {GL_RG8, GL_RG, GL_UNSIGNED_BYTE, false}, // RG8U | 85 | {GL_RG8, GL_RG, GL_UNSIGNED_BYTE, false}, // RG8U |
| 86 | {GL_RG8, GL_RG, GL_BYTE, false}, // RG8S | 86 | {GL_RG8_SNORM, GL_RG, GL_BYTE, false}, // RG8S |
| 87 | {GL_RG32UI, GL_RG_INTEGER, GL_UNSIGNED_INT, false}, // RG32UI | 87 | {GL_RG32UI, GL_RG_INTEGER, GL_UNSIGNED_INT, false}, // RG32UI |
| 88 | {GL_RGB16F, GL_RGBA16, GL_HALF_FLOAT, false}, // RGBX16F | 88 | {GL_RGB16F, GL_RGBA, GL_HALF_FLOAT, false}, // RGBX16F |
| 89 | {GL_R32UI, GL_RED_INTEGER, GL_UNSIGNED_INT, false}, // R32UI | 89 | {GL_R32UI, GL_RED_INTEGER, GL_UNSIGNED_INT, false}, // R32UI |
| 90 | {GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE, false}, // ASTC_2D_8X8 | 90 | {GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE, false}, // ASTC_2D_8X8 |
| 91 | {GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE, false}, // ASTC_2D_8X5 | 91 | {GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE, false}, // ASTC_2D_8X5 |
| @@ -253,14 +253,12 @@ void CachedSurface::DownloadTexture(std::vector<u8>& staging_buffer) { | |||
| 253 | glPixelStorei(GL_PACK_ALIGNMENT, std::min(8U, params.GetRowAlignment(level))); | 253 | glPixelStorei(GL_PACK_ALIGNMENT, std::min(8U, params.GetRowAlignment(level))); |
| 254 | glPixelStorei(GL_PACK_ROW_LENGTH, static_cast<GLint>(params.GetMipWidth(level))); | 254 | glPixelStorei(GL_PACK_ROW_LENGTH, static_cast<GLint>(params.GetMipWidth(level))); |
| 255 | const std::size_t mip_offset = params.GetHostMipmapLevelOffset(level); | 255 | const std::size_t mip_offset = params.GetHostMipmapLevelOffset(level); |
| 256 | u8* const mip_data = staging_buffer.data() + mip_offset; | ||
| 257 | const GLsizei size = static_cast<GLsizei>(params.GetHostMipmapSize(level)); | ||
| 256 | if (is_compressed) { | 258 | if (is_compressed) { |
| 257 | glGetCompressedTextureImage(texture.handle, level, | 259 | glGetCompressedTextureImage(texture.handle, level, size, mip_data); |
| 258 | static_cast<GLsizei>(params.GetHostMipmapSize(level)), | ||
| 259 | staging_buffer.data() + mip_offset); | ||
| 260 | } else { | 260 | } else { |
| 261 | glGetTextureImage(texture.handle, level, format, type, | 261 | glGetTextureImage(texture.handle, level, format, type, size, mip_data); |
| 262 | static_cast<GLsizei>(params.GetHostMipmapSize(level)), | ||
| 263 | staging_buffer.data() + mip_offset); | ||
| 264 | } | 262 | } |
| 265 | } | 263 | } |
| 266 | } | 264 | } |
diff --git a/src/video_core/renderer_opengl/utils.cpp b/src/video_core/renderer_opengl/utils.cpp index 9770dda1c..ac99e6385 100644 --- a/src/video_core/renderer_opengl/utils.cpp +++ b/src/video_core/renderer_opengl/utils.cpp | |||
| @@ -6,16 +6,20 @@ | |||
| 6 | #include <vector> | 6 | #include <vector> |
| 7 | 7 | ||
| 8 | #include <fmt/format.h> | 8 | #include <fmt/format.h> |
| 9 | |||
| 10 | #include <glad/glad.h> | 9 | #include <glad/glad.h> |
| 11 | 10 | ||
| 12 | #include "common/assert.h" | ||
| 13 | #include "common/common_types.h" | 11 | #include "common/common_types.h" |
| 14 | #include "common/scope_exit.h" | ||
| 15 | #include "video_core/renderer_opengl/utils.h" | 12 | #include "video_core/renderer_opengl/utils.h" |
| 16 | 13 | ||
| 17 | namespace OpenGL { | 14 | namespace OpenGL { |
| 18 | 15 | ||
| 16 | struct VertexArrayPushBuffer::Entry { | ||
| 17 | GLuint binding_index{}; | ||
| 18 | const GLuint* buffer{}; | ||
| 19 | GLintptr offset{}; | ||
| 20 | GLsizei stride{}; | ||
| 21 | }; | ||
| 22 | |||
| 19 | VertexArrayPushBuffer::VertexArrayPushBuffer() = default; | 23 | VertexArrayPushBuffer::VertexArrayPushBuffer() = default; |
| 20 | 24 | ||
| 21 | VertexArrayPushBuffer::~VertexArrayPushBuffer() = default; | 25 | VertexArrayPushBuffer::~VertexArrayPushBuffer() = default; |
| @@ -47,6 +51,13 @@ void VertexArrayPushBuffer::Bind() { | |||
| 47 | } | 51 | } |
| 48 | } | 52 | } |
| 49 | 53 | ||
| 54 | struct BindBuffersRangePushBuffer::Entry { | ||
| 55 | GLuint binding; | ||
| 56 | const GLuint* buffer; | ||
| 57 | GLintptr offset; | ||
| 58 | GLsizeiptr size; | ||
| 59 | }; | ||
| 60 | |||
| 50 | BindBuffersRangePushBuffer::BindBuffersRangePushBuffer(GLenum target) : target{target} {} | 61 | BindBuffersRangePushBuffer::BindBuffersRangePushBuffer(GLenum target) : target{target} {} |
| 51 | 62 | ||
| 52 | BindBuffersRangePushBuffer::~BindBuffersRangePushBuffer() = default; | 63 | BindBuffersRangePushBuffer::~BindBuffersRangePushBuffer() = default; |
diff --git a/src/video_core/renderer_opengl/utils.h b/src/video_core/renderer_opengl/utils.h index d56153fe7..3ad7c02d4 100644 --- a/src/video_core/renderer_opengl/utils.h +++ b/src/video_core/renderer_opengl/utils.h | |||
| @@ -26,12 +26,7 @@ public: | |||
| 26 | void Bind(); | 26 | void Bind(); |
| 27 | 27 | ||
| 28 | private: | 28 | private: |
| 29 | struct Entry { | 29 | struct Entry; |
| 30 | GLuint binding_index{}; | ||
| 31 | const GLuint* buffer{}; | ||
| 32 | GLintptr offset{}; | ||
| 33 | GLsizei stride{}; | ||
| 34 | }; | ||
| 35 | 30 | ||
| 36 | GLuint vao{}; | 31 | GLuint vao{}; |
| 37 | const GLuint* index_buffer{}; | 32 | const GLuint* index_buffer{}; |
| @@ -50,12 +45,7 @@ public: | |||
| 50 | void Bind(); | 45 | void Bind(); |
| 51 | 46 | ||
| 52 | private: | 47 | private: |
| 53 | struct Entry { | 48 | struct Entry; |
| 54 | GLuint binding; | ||
| 55 | const GLuint* buffer; | ||
| 56 | GLintptr offset; | ||
| 57 | GLsizeiptr size; | ||
| 58 | }; | ||
| 59 | 49 | ||
| 60 | GLenum target; | 50 | GLenum target; |
| 61 | std::vector<Entry> entries; | 51 | std::vector<Entry> entries; |
diff --git a/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp b/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp index 5a490f6ef..4e3ff231e 100644 --- a/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp +++ b/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp | |||
| @@ -109,6 +109,9 @@ constexpr FixedPipelineState::Rasterizer GetRasterizerState(const Maxwell& regs) | |||
| 109 | const auto topology = static_cast<std::size_t>(regs.draw.topology.Value()); | 109 | const auto topology = static_cast<std::size_t>(regs.draw.topology.Value()); |
| 110 | const bool depth_bias_enabled = enabled_lut[PolygonOffsetEnableLUT[topology]]; | 110 | const bool depth_bias_enabled = enabled_lut[PolygonOffsetEnableLUT[topology]]; |
| 111 | 111 | ||
| 112 | const auto& clip = regs.view_volume_clip_control; | ||
| 113 | const bool depth_clamp_enabled = clip.depth_clamp_near == 1 || clip.depth_clamp_far == 1; | ||
| 114 | |||
| 112 | Maxwell::Cull::FrontFace front_face = regs.cull.front_face; | 115 | Maxwell::Cull::FrontFace front_face = regs.cull.front_face; |
| 113 | if (regs.screen_y_control.triangle_rast_flip != 0 && | 116 | if (regs.screen_y_control.triangle_rast_flip != 0 && |
| 114 | regs.viewport_transform[0].scale_y > 0.0f) { | 117 | regs.viewport_transform[0].scale_y > 0.0f) { |
| @@ -119,8 +122,9 @@ constexpr FixedPipelineState::Rasterizer GetRasterizerState(const Maxwell& regs) | |||
| 119 | } | 122 | } |
| 120 | 123 | ||
| 121 | const bool gl_ndc = regs.depth_mode == Maxwell::DepthMode::MinusOneToOne; | 124 | const bool gl_ndc = regs.depth_mode == Maxwell::DepthMode::MinusOneToOne; |
| 122 | return FixedPipelineState::Rasterizer(regs.cull.enabled, depth_bias_enabled, gl_ndc, | 125 | return FixedPipelineState::Rasterizer(regs.cull.enabled, depth_bias_enabled, |
| 123 | regs.cull.cull_face, front_face); | 126 | depth_clamp_enabled, gl_ndc, regs.cull.cull_face, |
| 127 | front_face); | ||
| 124 | } | 128 | } |
| 125 | 129 | ||
| 126 | } // Anonymous namespace | 130 | } // Anonymous namespace |
| @@ -222,15 +226,17 @@ bool FixedPipelineState::Tessellation::operator==(const Tessellation& rhs) const | |||
| 222 | std::size_t FixedPipelineState::Rasterizer::Hash() const noexcept { | 226 | std::size_t FixedPipelineState::Rasterizer::Hash() const noexcept { |
| 223 | return static_cast<std::size_t>(cull_enable) ^ | 227 | return static_cast<std::size_t>(cull_enable) ^ |
| 224 | (static_cast<std::size_t>(depth_bias_enable) << 1) ^ | 228 | (static_cast<std::size_t>(depth_bias_enable) << 1) ^ |
| 225 | (static_cast<std::size_t>(ndc_minus_one_to_one) << 2) ^ | 229 | (static_cast<std::size_t>(depth_clamp_enable) << 2) ^ |
| 230 | (static_cast<std::size_t>(ndc_minus_one_to_one) << 3) ^ | ||
| 226 | (static_cast<std::size_t>(cull_face) << 24) ^ | 231 | (static_cast<std::size_t>(cull_face) << 24) ^ |
| 227 | (static_cast<std::size_t>(front_face) << 48); | 232 | (static_cast<std::size_t>(front_face) << 48); |
| 228 | } | 233 | } |
| 229 | 234 | ||
| 230 | bool FixedPipelineState::Rasterizer::operator==(const Rasterizer& rhs) const noexcept { | 235 | bool FixedPipelineState::Rasterizer::operator==(const Rasterizer& rhs) const noexcept { |
| 231 | return std::tie(cull_enable, depth_bias_enable, ndc_minus_one_to_one, cull_face, front_face) == | 236 | return std::tie(cull_enable, depth_bias_enable, depth_clamp_enable, ndc_minus_one_to_one, |
| 232 | std::tie(rhs.cull_enable, rhs.depth_bias_enable, rhs.ndc_minus_one_to_one, rhs.cull_face, | 237 | cull_face, front_face) == |
| 233 | rhs.front_face); | 238 | std::tie(rhs.cull_enable, rhs.depth_bias_enable, rhs.depth_clamp_enable, |
| 239 | rhs.ndc_minus_one_to_one, rhs.cull_face, rhs.front_face); | ||
| 234 | } | 240 | } |
| 235 | 241 | ||
| 236 | std::size_t FixedPipelineState::DepthStencil::Hash() const noexcept { | 242 | std::size_t FixedPipelineState::DepthStencil::Hash() const noexcept { |
diff --git a/src/video_core/renderer_vulkan/fixed_pipeline_state.h b/src/video_core/renderer_vulkan/fixed_pipeline_state.h index 04152c0d4..87056ef37 100644 --- a/src/video_core/renderer_vulkan/fixed_pipeline_state.h +++ b/src/video_core/renderer_vulkan/fixed_pipeline_state.h | |||
| @@ -170,15 +170,17 @@ struct FixedPipelineState { | |||
| 170 | }; | 170 | }; |
| 171 | 171 | ||
| 172 | struct Rasterizer { | 172 | struct Rasterizer { |
| 173 | constexpr Rasterizer(bool cull_enable, bool depth_bias_enable, bool ndc_minus_one_to_one, | 173 | constexpr Rasterizer(bool cull_enable, bool depth_bias_enable, bool depth_clamp_enable, |
| 174 | Maxwell::Cull::CullFace cull_face, Maxwell::Cull::FrontFace front_face) | 174 | bool ndc_minus_one_to_one, Maxwell::Cull::CullFace cull_face, |
| 175 | Maxwell::Cull::FrontFace front_face) | ||
| 175 | : cull_enable{cull_enable}, depth_bias_enable{depth_bias_enable}, | 176 | : cull_enable{cull_enable}, depth_bias_enable{depth_bias_enable}, |
| 176 | ndc_minus_one_to_one{ndc_minus_one_to_one}, cull_face{cull_face}, front_face{ | 177 | depth_clamp_enable{depth_clamp_enable}, ndc_minus_one_to_one{ndc_minus_one_to_one}, |
| 177 | front_face} {} | 178 | cull_face{cull_face}, front_face{front_face} {} |
| 178 | Rasterizer() = default; | 179 | Rasterizer() = default; |
| 179 | 180 | ||
| 180 | bool cull_enable; | 181 | bool cull_enable; |
| 181 | bool depth_bias_enable; | 182 | bool depth_bias_enable; |
| 183 | bool depth_clamp_enable; | ||
| 182 | bool ndc_minus_one_to_one; | 184 | bool ndc_minus_one_to_one; |
| 183 | Maxwell::Cull::CullFace cull_face; | 185 | Maxwell::Cull::CullFace cull_face; |
| 184 | Maxwell::Cull::FrontFace front_face; | 186 | Maxwell::Cull::FrontFace front_face; |
diff --git a/src/video_core/renderer_vulkan/maxwell_to_vk.cpp b/src/video_core/renderer_vulkan/maxwell_to_vk.cpp index 000e3616d..331808113 100644 --- a/src/video_core/renderer_vulkan/maxwell_to_vk.cpp +++ b/src/video_core/renderer_vulkan/maxwell_to_vk.cpp | |||
| @@ -44,7 +44,7 @@ vk::SamplerMipmapMode MipmapMode(Tegra::Texture::TextureMipmapFilter mipmap_filt | |||
| 44 | return {}; | 44 | return {}; |
| 45 | } | 45 | } |
| 46 | 46 | ||
| 47 | vk::SamplerAddressMode WrapMode(Tegra::Texture::WrapMode wrap_mode, | 47 | vk::SamplerAddressMode WrapMode(const VKDevice& device, Tegra::Texture::WrapMode wrap_mode, |
| 48 | Tegra::Texture::TextureFilter filter) { | 48 | Tegra::Texture::TextureFilter filter) { |
| 49 | switch (wrap_mode) { | 49 | switch (wrap_mode) { |
| 50 | case Tegra::Texture::WrapMode::Wrap: | 50 | case Tegra::Texture::WrapMode::Wrap: |
| @@ -56,7 +56,12 @@ vk::SamplerAddressMode WrapMode(Tegra::Texture::WrapMode wrap_mode, | |||
| 56 | case Tegra::Texture::WrapMode::Border: | 56 | case Tegra::Texture::WrapMode::Border: |
| 57 | return vk::SamplerAddressMode::eClampToBorder; | 57 | return vk::SamplerAddressMode::eClampToBorder; |
| 58 | case Tegra::Texture::WrapMode::Clamp: | 58 | case Tegra::Texture::WrapMode::Clamp: |
| 59 | // TODO(Rodrigo): Emulate GL_CLAMP properly | 59 | if (device.GetDriverID() == vk::DriverIdKHR::eNvidiaProprietary) { |
| 60 | // Nvidia's Vulkan driver defaults to GL_CLAMP on invalid enumerations, we can hack this | ||
| 61 | // by sending an invalid enumeration. | ||
| 62 | return static_cast<vk::SamplerAddressMode>(0xcafe); | ||
| 63 | } | ||
| 64 | // TODO(Rodrigo): Emulate GL_CLAMP properly on other vendors | ||
| 60 | switch (filter) { | 65 | switch (filter) { |
| 61 | case Tegra::Texture::TextureFilter::Nearest: | 66 | case Tegra::Texture::TextureFilter::Nearest: |
| 62 | return vk::SamplerAddressMode::eClampToEdge; | 67 | return vk::SamplerAddressMode::eClampToEdge; |
diff --git a/src/video_core/renderer_vulkan/maxwell_to_vk.h b/src/video_core/renderer_vulkan/maxwell_to_vk.h index 1534b738b..7e9678b7b 100644 --- a/src/video_core/renderer_vulkan/maxwell_to_vk.h +++ b/src/video_core/renderer_vulkan/maxwell_to_vk.h | |||
| @@ -22,7 +22,7 @@ vk::Filter Filter(Tegra::Texture::TextureFilter filter); | |||
| 22 | 22 | ||
| 23 | vk::SamplerMipmapMode MipmapMode(Tegra::Texture::TextureMipmapFilter mipmap_filter); | 23 | vk::SamplerMipmapMode MipmapMode(Tegra::Texture::TextureMipmapFilter mipmap_filter); |
| 24 | 24 | ||
| 25 | vk::SamplerAddressMode WrapMode(Tegra::Texture::WrapMode wrap_mode, | 25 | vk::SamplerAddressMode WrapMode(const VKDevice& device, Tegra::Texture::WrapMode wrap_mode, |
| 26 | Tegra::Texture::TextureFilter filter); | 26 | Tegra::Texture::TextureFilter filter); |
| 27 | 27 | ||
| 28 | vk::CompareOp DepthCompareFunction(Tegra::Texture::DepthCompareFunc depth_compare_func); | 28 | vk::CompareOp DepthCompareFunction(Tegra::Texture::DepthCompareFunc depth_compare_func); |
diff --git a/src/video_core/renderer_vulkan/renderer_vulkan.h b/src/video_core/renderer_vulkan/renderer_vulkan.h new file mode 100644 index 000000000..a472c5dc9 --- /dev/null +++ b/src/video_core/renderer_vulkan/renderer_vulkan.h | |||
| @@ -0,0 +1,72 @@ | |||
| 1 | // Copyright 2018 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include <optional> | ||
| 8 | #include <vector> | ||
| 9 | #include "video_core/renderer_base.h" | ||
| 10 | #include "video_core/renderer_vulkan/declarations.h" | ||
| 11 | |||
| 12 | namespace Core { | ||
| 13 | class System; | ||
| 14 | } | ||
| 15 | |||
| 16 | namespace Vulkan { | ||
| 17 | |||
| 18 | class VKBlitScreen; | ||
| 19 | class VKDevice; | ||
| 20 | class VKFence; | ||
| 21 | class VKMemoryManager; | ||
| 22 | class VKResourceManager; | ||
| 23 | class VKSwapchain; | ||
| 24 | class VKScheduler; | ||
| 25 | class VKImage; | ||
| 26 | |||
| 27 | struct VKScreenInfo { | ||
| 28 | VKImage* image{}; | ||
| 29 | u32 width{}; | ||
| 30 | u32 height{}; | ||
| 31 | bool is_srgb{}; | ||
| 32 | }; | ||
| 33 | |||
| 34 | class RendererVulkan final : public VideoCore::RendererBase { | ||
| 35 | public: | ||
| 36 | explicit RendererVulkan(Core::Frontend::EmuWindow& window, Core::System& system); | ||
| 37 | ~RendererVulkan() override; | ||
| 38 | |||
| 39 | /// Swap buffers (render frame) | ||
| 40 | void SwapBuffers(const Tegra::FramebufferConfig* framebuffer) override; | ||
| 41 | |||
| 42 | /// Initialize the renderer | ||
| 43 | bool Init() override; | ||
| 44 | |||
| 45 | /// Shutdown the renderer | ||
| 46 | void ShutDown() override; | ||
| 47 | |||
| 48 | private: | ||
| 49 | std::optional<vk::DebugUtilsMessengerEXT> CreateDebugCallback( | ||
| 50 | const vk::DispatchLoaderDynamic& dldi); | ||
| 51 | |||
| 52 | bool PickDevices(const vk::DispatchLoaderDynamic& dldi); | ||
| 53 | |||
| 54 | void Report() const; | ||
| 55 | |||
| 56 | Core::System& system; | ||
| 57 | |||
| 58 | vk::Instance instance; | ||
| 59 | vk::SurfaceKHR surface; | ||
| 60 | |||
| 61 | VKScreenInfo screen_info; | ||
| 62 | |||
| 63 | UniqueDebugUtilsMessengerEXT debug_callback; | ||
| 64 | std::unique_ptr<VKDevice> device; | ||
| 65 | std::unique_ptr<VKSwapchain> swapchain; | ||
| 66 | std::unique_ptr<VKMemoryManager> memory_manager; | ||
| 67 | std::unique_ptr<VKResourceManager> resource_manager; | ||
| 68 | std::unique_ptr<VKScheduler> scheduler; | ||
| 69 | std::unique_ptr<VKBlitScreen> blit_screen; | ||
| 70 | }; | ||
| 71 | |||
| 72 | } // namespace Vulkan | ||
diff --git a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp index 46da81aaa..1ba544943 100644 --- a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp | |||
| @@ -2,124 +2,145 @@ | |||
| 2 | // Licensed under GPLv2 or any later version | 2 | // Licensed under GPLv2 or any later version |
| 3 | // Refer to the license.txt file included. | 3 | // Refer to the license.txt file included. |
| 4 | 4 | ||
| 5 | #include <algorithm> | ||
| 5 | #include <cstring> | 6 | #include <cstring> |
| 6 | #include <memory> | 7 | #include <memory> |
| 7 | #include <optional> | 8 | #include <optional> |
| 8 | #include <tuple> | 9 | #include <tuple> |
| 9 | 10 | ||
| 10 | #include "common/alignment.h" | ||
| 11 | #include "common/assert.h" | 11 | #include "common/assert.h" |
| 12 | #include "core/memory.h" | 12 | #include "common/bit_util.h" |
| 13 | #include "video_core/memory_manager.h" | 13 | #include "core/core.h" |
| 14 | #include "video_core/renderer_vulkan/declarations.h" | 14 | #include "video_core/renderer_vulkan/declarations.h" |
| 15 | #include "video_core/renderer_vulkan/vk_buffer_cache.h" | 15 | #include "video_core/renderer_vulkan/vk_buffer_cache.h" |
| 16 | #include "video_core/renderer_vulkan/vk_device.h" | ||
| 16 | #include "video_core/renderer_vulkan/vk_scheduler.h" | 17 | #include "video_core/renderer_vulkan/vk_scheduler.h" |
| 17 | #include "video_core/renderer_vulkan/vk_stream_buffer.h" | 18 | #include "video_core/renderer_vulkan/vk_stream_buffer.h" |
| 18 | 19 | ||
| 19 | namespace Vulkan { | 20 | namespace Vulkan { |
| 20 | 21 | ||
| 21 | CachedBufferEntry::CachedBufferEntry(VAddr cpu_addr, std::size_t size, u64 offset, | 22 | namespace { |
| 22 | std::size_t alignment, u8* host_ptr) | ||
| 23 | : RasterizerCacheObject{host_ptr}, cpu_addr{cpu_addr}, size{size}, offset{offset}, | ||
| 24 | alignment{alignment} {} | ||
| 25 | |||
| 26 | VKBufferCache::VKBufferCache(Tegra::MemoryManager& tegra_memory_manager, | ||
| 27 | Memory::Memory& cpu_memory_, | ||
| 28 | VideoCore::RasterizerInterface& rasterizer, const VKDevice& device, | ||
| 29 | VKMemoryManager& memory_manager, VKScheduler& scheduler, u64 size) | ||
| 30 | : RasterizerCache{rasterizer}, tegra_memory_manager{tegra_memory_manager}, cpu_memory{ | ||
| 31 | cpu_memory_} { | ||
| 32 | const auto usage = vk::BufferUsageFlagBits::eVertexBuffer | | ||
| 33 | vk::BufferUsageFlagBits::eIndexBuffer | | ||
| 34 | vk::BufferUsageFlagBits::eUniformBuffer; | ||
| 35 | const auto access = vk::AccessFlagBits::eVertexAttributeRead | vk::AccessFlagBits::eIndexRead | | ||
| 36 | vk::AccessFlagBits::eUniformRead; | ||
| 37 | stream_buffer = | ||
| 38 | std::make_unique<VKStreamBuffer>(device, memory_manager, scheduler, size, usage, access, | ||
| 39 | vk::PipelineStageFlagBits::eAllCommands); | ||
| 40 | buffer_handle = stream_buffer->GetBuffer(); | ||
| 41 | } | ||
| 42 | 23 | ||
| 43 | VKBufferCache::~VKBufferCache() = default; | 24 | const auto BufferUsage = |
| 25 | vk::BufferUsageFlagBits::eVertexBuffer | vk::BufferUsageFlagBits::eIndexBuffer | | ||
| 26 | vk::BufferUsageFlagBits::eUniformBuffer | vk::BufferUsageFlagBits::eStorageBuffer; | ||
| 27 | |||
| 28 | const auto UploadPipelineStage = | ||
| 29 | vk::PipelineStageFlagBits::eTransfer | vk::PipelineStageFlagBits::eVertexInput | | ||
| 30 | vk::PipelineStageFlagBits::eVertexShader | vk::PipelineStageFlagBits::eFragmentShader | | ||
| 31 | vk::PipelineStageFlagBits::eComputeShader; | ||
| 44 | 32 | ||
| 45 | u64 VKBufferCache::UploadMemory(GPUVAddr gpu_addr, std::size_t size, u64 alignment, bool cache) { | 33 | const auto UploadAccessBarriers = |
| 46 | const auto cpu_addr{tegra_memory_manager.GpuToCpuAddress(gpu_addr)}; | 34 | vk::AccessFlagBits::eTransferRead | vk::AccessFlagBits::eShaderRead | |
| 47 | ASSERT_MSG(cpu_addr, "Invalid GPU address"); | 35 | vk::AccessFlagBits::eUniformRead | vk::AccessFlagBits::eVertexAttributeRead | |
| 48 | 36 | vk::AccessFlagBits::eIndexRead; | |
| 49 | // Cache management is a big overhead, so only cache entries with a given size. | 37 | |
| 50 | // TODO: Figure out which size is the best for given games. | 38 | auto CreateStreamBuffer(const VKDevice& device, VKScheduler& scheduler) { |
| 51 | cache &= size >= 2048; | 39 | return std::make_unique<VKStreamBuffer>(device, scheduler, BufferUsage); |
| 52 | |||
| 53 | u8* const host_ptr{cpu_memory.GetPointer(*cpu_addr)}; | ||
| 54 | if (cache) { | ||
| 55 | const auto entry = TryGet(host_ptr); | ||
| 56 | if (entry) { | ||
| 57 | if (entry->GetSize() >= size && entry->GetAlignment() == alignment) { | ||
| 58 | return entry->GetOffset(); | ||
| 59 | } | ||
| 60 | Unregister(entry); | ||
| 61 | } | ||
| 62 | } | ||
| 63 | |||
| 64 | AlignBuffer(alignment); | ||
| 65 | const u64 uploaded_offset = buffer_offset; | ||
| 66 | |||
| 67 | if (host_ptr == nullptr) { | ||
| 68 | return uploaded_offset; | ||
| 69 | } | ||
| 70 | |||
| 71 | std::memcpy(buffer_ptr, host_ptr, size); | ||
| 72 | buffer_ptr += size; | ||
| 73 | buffer_offset += size; | ||
| 74 | |||
| 75 | if (cache) { | ||
| 76 | auto entry = std::make_shared<CachedBufferEntry>(*cpu_addr, size, uploaded_offset, | ||
| 77 | alignment, host_ptr); | ||
| 78 | Register(entry); | ||
| 79 | } | ||
| 80 | |||
| 81 | return uploaded_offset; | ||
| 82 | } | 40 | } |
| 83 | 41 | ||
| 84 | u64 VKBufferCache::UploadHostMemory(const u8* raw_pointer, std::size_t size, u64 alignment) { | 42 | } // Anonymous namespace |
| 85 | AlignBuffer(alignment); | 43 | |
| 86 | std::memcpy(buffer_ptr, raw_pointer, size); | 44 | CachedBufferBlock::CachedBufferBlock(const VKDevice& device, VKMemoryManager& memory_manager, |
| 87 | const u64 uploaded_offset = buffer_offset; | 45 | CacheAddr cache_addr, std::size_t size) |
| 46 | : VideoCommon::BufferBlock{cache_addr, size} { | ||
| 47 | const vk::BufferCreateInfo buffer_ci({}, static_cast<vk::DeviceSize>(size), | ||
| 48 | BufferUsage | vk::BufferUsageFlagBits::eTransferSrc | | ||
| 49 | vk::BufferUsageFlagBits::eTransferDst, | ||
| 50 | vk::SharingMode::eExclusive, 0, nullptr); | ||
| 88 | 51 | ||
| 89 | buffer_ptr += size; | 52 | const auto& dld{device.GetDispatchLoader()}; |
| 90 | buffer_offset += size; | 53 | const auto dev{device.GetLogical()}; |
| 91 | return uploaded_offset; | 54 | buffer.handle = dev.createBufferUnique(buffer_ci, nullptr, dld); |
| 55 | buffer.commit = memory_manager.Commit(*buffer.handle, false); | ||
| 92 | } | 56 | } |
| 93 | 57 | ||
| 94 | std::tuple<u8*, u64> VKBufferCache::ReserveMemory(std::size_t size, u64 alignment) { | 58 | CachedBufferBlock::~CachedBufferBlock() = default; |
| 95 | AlignBuffer(alignment); | 59 | |
| 96 | u8* const uploaded_ptr = buffer_ptr; | 60 | VKBufferCache::VKBufferCache(VideoCore::RasterizerInterface& rasterizer, Core::System& system, |
| 97 | const u64 uploaded_offset = buffer_offset; | 61 | const VKDevice& device, VKMemoryManager& memory_manager, |
| 62 | VKScheduler& scheduler, VKStagingBufferPool& staging_pool) | ||
| 63 | : VideoCommon::BufferCache<Buffer, vk::Buffer, VKStreamBuffer>{rasterizer, system, | ||
| 64 | CreateStreamBuffer(device, | ||
| 65 | scheduler)}, | ||
| 66 | device{device}, memory_manager{memory_manager}, scheduler{scheduler}, staging_pool{ | ||
| 67 | staging_pool} {} | ||
| 98 | 68 | ||
| 99 | buffer_ptr += size; | 69 | VKBufferCache::~VKBufferCache() = default; |
| 100 | buffer_offset += size; | 70 | |
| 101 | return {uploaded_ptr, uploaded_offset}; | 71 | Buffer VKBufferCache::CreateBlock(CacheAddr cache_addr, std::size_t size) { |
| 72 | return std::make_shared<CachedBufferBlock>(device, memory_manager, cache_addr, size); | ||
| 102 | } | 73 | } |
| 103 | 74 | ||
| 104 | void VKBufferCache::Reserve(std::size_t max_size) { | 75 | const vk::Buffer* VKBufferCache::ToHandle(const Buffer& buffer) { |
| 105 | bool invalidate; | 76 | return buffer->GetHandle(); |
| 106 | std::tie(buffer_ptr, buffer_offset_base, invalidate) = stream_buffer->Reserve(max_size); | 77 | } |
| 107 | buffer_offset = buffer_offset_base; | 78 | |
| 79 | const vk::Buffer* VKBufferCache::GetEmptyBuffer(std::size_t size) { | ||
| 80 | size = std::max(size, std::size_t(4)); | ||
| 81 | const auto& empty = staging_pool.GetUnusedBuffer(size, false); | ||
| 82 | scheduler.RequestOutsideRenderPassOperationContext(); | ||
| 83 | scheduler.Record([size, buffer = *empty.handle](vk::CommandBuffer cmdbuf, auto& dld) { | ||
| 84 | cmdbuf.fillBuffer(buffer, 0, size, 0, dld); | ||
| 85 | }); | ||
| 86 | return &*empty.handle; | ||
| 87 | } | ||
| 108 | 88 | ||
| 109 | if (invalidate) { | 89 | void VKBufferCache::UploadBlockData(const Buffer& buffer, std::size_t offset, std::size_t size, |
| 110 | InvalidateAll(); | 90 | const u8* data) { |
| 111 | } | 91 | const auto& staging = staging_pool.GetUnusedBuffer(size, true); |
| 92 | std::memcpy(staging.commit->Map(size), data, size); | ||
| 93 | |||
| 94 | scheduler.RequestOutsideRenderPassOperationContext(); | ||
| 95 | scheduler.Record([staging = *staging.handle, buffer = *buffer->GetHandle(), offset, | ||
| 96 | size](auto cmdbuf, auto& dld) { | ||
| 97 | cmdbuf.copyBuffer(staging, buffer, {{0, offset, size}}, dld); | ||
| 98 | cmdbuf.pipelineBarrier( | ||
| 99 | vk::PipelineStageFlagBits::eTransfer, UploadPipelineStage, {}, {}, | ||
| 100 | {vk::BufferMemoryBarrier(vk::AccessFlagBits::eTransferWrite, UploadAccessBarriers, | ||
| 101 | VK_QUEUE_FAMILY_IGNORED, VK_QUEUE_FAMILY_IGNORED, buffer, | ||
| 102 | offset, size)}, | ||
| 103 | {}, dld); | ||
| 104 | }); | ||
| 112 | } | 105 | } |
| 113 | 106 | ||
| 114 | void VKBufferCache::Send() { | 107 | void VKBufferCache::DownloadBlockData(const Buffer& buffer, std::size_t offset, std::size_t size, |
| 115 | stream_buffer->Send(buffer_offset - buffer_offset_base); | 108 | u8* data) { |
| 109 | const auto& staging = staging_pool.GetUnusedBuffer(size, true); | ||
| 110 | scheduler.RequestOutsideRenderPassOperationContext(); | ||
| 111 | scheduler.Record([staging = *staging.handle, buffer = *buffer->GetHandle(), offset, | ||
| 112 | size](auto cmdbuf, auto& dld) { | ||
| 113 | cmdbuf.pipelineBarrier( | ||
| 114 | vk::PipelineStageFlagBits::eVertexShader | vk::PipelineStageFlagBits::eFragmentShader | | ||
| 115 | vk::PipelineStageFlagBits::eComputeShader, | ||
| 116 | vk::PipelineStageFlagBits::eTransfer, {}, {}, | ||
| 117 | {vk::BufferMemoryBarrier(vk::AccessFlagBits::eShaderWrite, | ||
| 118 | vk::AccessFlagBits::eTransferRead, VK_QUEUE_FAMILY_IGNORED, | ||
| 119 | VK_QUEUE_FAMILY_IGNORED, buffer, offset, size)}, | ||
| 120 | {}, dld); | ||
| 121 | cmdbuf.copyBuffer(buffer, staging, {{offset, 0, size}}, dld); | ||
| 122 | }); | ||
| 123 | scheduler.Finish(); | ||
| 124 | |||
| 125 | std::memcpy(data, staging.commit->Map(size), size); | ||
| 116 | } | 126 | } |
| 117 | 127 | ||
| 118 | void VKBufferCache::AlignBuffer(std::size_t alignment) { | 128 | void VKBufferCache::CopyBlock(const Buffer& src, const Buffer& dst, std::size_t src_offset, |
| 119 | // Align the offset, not the mapped pointer | 129 | std::size_t dst_offset, std::size_t size) { |
| 120 | const u64 offset_aligned = Common::AlignUp(buffer_offset, alignment); | 130 | scheduler.RequestOutsideRenderPassOperationContext(); |
| 121 | buffer_ptr += offset_aligned - buffer_offset; | 131 | scheduler.Record([src_buffer = *src->GetHandle(), dst_buffer = *dst->GetHandle(), src_offset, |
| 122 | buffer_offset = offset_aligned; | 132 | dst_offset, size](auto cmdbuf, auto& dld) { |
| 133 | cmdbuf.copyBuffer(src_buffer, dst_buffer, {{src_offset, dst_offset, size}}, dld); | ||
| 134 | cmdbuf.pipelineBarrier( | ||
| 135 | vk::PipelineStageFlagBits::eTransfer, UploadPipelineStage, {}, {}, | ||
| 136 | {vk::BufferMemoryBarrier(vk::AccessFlagBits::eTransferRead, | ||
| 137 | vk::AccessFlagBits::eShaderWrite, VK_QUEUE_FAMILY_IGNORED, | ||
| 138 | VK_QUEUE_FAMILY_IGNORED, src_buffer, src_offset, size), | ||
| 139 | vk::BufferMemoryBarrier(vk::AccessFlagBits::eTransferWrite, UploadAccessBarriers, | ||
| 140 | VK_QUEUE_FAMILY_IGNORED, VK_QUEUE_FAMILY_IGNORED, dst_buffer, | ||
| 141 | dst_offset, size)}, | ||
| 142 | {}, dld); | ||
| 143 | }); | ||
| 123 | } | 144 | } |
| 124 | 145 | ||
| 125 | } // namespace Vulkan | 146 | } // namespace Vulkan |
diff --git a/src/video_core/renderer_vulkan/vk_buffer_cache.h b/src/video_core/renderer_vulkan/vk_buffer_cache.h index daa8ccf66..3f38eed0c 100644 --- a/src/video_core/renderer_vulkan/vk_buffer_cache.h +++ b/src/video_core/renderer_vulkan/vk_buffer_cache.h | |||
| @@ -5,105 +5,74 @@ | |||
| 5 | #pragma once | 5 | #pragma once |
| 6 | 6 | ||
| 7 | #include <memory> | 7 | #include <memory> |
| 8 | #include <tuple> | 8 | #include <unordered_map> |
| 9 | #include <vector> | ||
| 9 | 10 | ||
| 10 | #include "common/common_types.h" | 11 | #include "common/common_types.h" |
| 11 | #include "video_core/gpu.h" | 12 | #include "video_core/buffer_cache/buffer_cache.h" |
| 12 | #include "video_core/rasterizer_cache.h" | 13 | #include "video_core/rasterizer_cache.h" |
| 13 | #include "video_core/renderer_vulkan/declarations.h" | 14 | #include "video_core/renderer_vulkan/declarations.h" |
| 14 | #include "video_core/renderer_vulkan/vk_scheduler.h" | 15 | #include "video_core/renderer_vulkan/vk_memory_manager.h" |
| 16 | #include "video_core/renderer_vulkan/vk_resource_manager.h" | ||
| 17 | #include "video_core/renderer_vulkan/vk_staging_buffer_pool.h" | ||
| 18 | #include "video_core/renderer_vulkan/vk_stream_buffer.h" | ||
| 15 | 19 | ||
| 16 | namespace Memory { | 20 | namespace Core { |
| 17 | class Memory; | 21 | class System; |
| 18 | } | ||
| 19 | |||
| 20 | namespace Tegra { | ||
| 21 | class MemoryManager; | ||
| 22 | } | 22 | } |
| 23 | 23 | ||
| 24 | namespace Vulkan { | 24 | namespace Vulkan { |
| 25 | 25 | ||
| 26 | class VKDevice; | 26 | class VKDevice; |
| 27 | class VKFence; | ||
| 28 | class VKMemoryManager; | 27 | class VKMemoryManager; |
| 29 | class VKStreamBuffer; | 28 | class VKScheduler; |
| 30 | 29 | ||
| 31 | class CachedBufferEntry final : public RasterizerCacheObject { | 30 | class CachedBufferBlock final : public VideoCommon::BufferBlock { |
| 32 | public: | 31 | public: |
| 33 | explicit CachedBufferEntry(VAddr cpu_addr, std::size_t size, u64 offset, std::size_t alignment, | 32 | explicit CachedBufferBlock(const VKDevice& device, VKMemoryManager& memory_manager, |
| 34 | u8* host_ptr); | 33 | CacheAddr cache_addr, std::size_t size); |
| 34 | ~CachedBufferBlock(); | ||
| 35 | 35 | ||
| 36 | VAddr GetCpuAddr() const override { | 36 | const vk::Buffer* GetHandle() const { |
| 37 | return cpu_addr; | 37 | return &*buffer.handle; |
| 38 | } | ||
| 39 | |||
| 40 | std::size_t GetSizeInBytes() const override { | ||
| 41 | return size; | ||
| 42 | } | ||
| 43 | |||
| 44 | std::size_t GetSize() const { | ||
| 45 | return size; | ||
| 46 | } | ||
| 47 | |||
| 48 | u64 GetOffset() const { | ||
| 49 | return offset; | ||
| 50 | } | ||
| 51 | |||
| 52 | std::size_t GetAlignment() const { | ||
| 53 | return alignment; | ||
| 54 | } | 38 | } |
| 55 | 39 | ||
| 56 | private: | 40 | private: |
| 57 | VAddr cpu_addr{}; | 41 | VKBuffer buffer; |
| 58 | std::size_t size{}; | ||
| 59 | u64 offset{}; | ||
| 60 | std::size_t alignment{}; | ||
| 61 | }; | 42 | }; |
| 62 | 43 | ||
| 63 | class VKBufferCache final : public RasterizerCache<std::shared_ptr<CachedBufferEntry>> { | 44 | using Buffer = std::shared_ptr<CachedBufferBlock>; |
| 45 | |||
| 46 | class VKBufferCache final : public VideoCommon::BufferCache<Buffer, vk::Buffer, VKStreamBuffer> { | ||
| 64 | public: | 47 | public: |
| 65 | explicit VKBufferCache(Tegra::MemoryManager& tegra_memory_manager, Memory::Memory& cpu_memory_, | 48 | explicit VKBufferCache(VideoCore::RasterizerInterface& rasterizer, Core::System& system, |
| 66 | VideoCore::RasterizerInterface& rasterizer, const VKDevice& device, | 49 | const VKDevice& device, VKMemoryManager& memory_manager, |
| 67 | VKMemoryManager& memory_manager, VKScheduler& scheduler, u64 size); | 50 | VKScheduler& scheduler, VKStagingBufferPool& staging_pool); |
| 68 | ~VKBufferCache(); | 51 | ~VKBufferCache(); |
| 69 | 52 | ||
| 70 | /// Uploads data from a guest GPU address. Returns host's buffer offset where it's been | 53 | const vk::Buffer* GetEmptyBuffer(std::size_t size) override; |
| 71 | /// allocated. | ||
| 72 | u64 UploadMemory(GPUVAddr gpu_addr, std::size_t size, u64 alignment = 4, bool cache = true); | ||
| 73 | 54 | ||
| 74 | /// Uploads from a host memory. Returns host's buffer offset where it's been allocated. | 55 | protected: |
| 75 | u64 UploadHostMemory(const u8* raw_pointer, std::size_t size, u64 alignment = 4); | 56 | void WriteBarrier() override {} |
| 76 | 57 | ||
| 77 | /// Reserves memory to be used by host's CPU. Returns mapped address and offset. | 58 | Buffer CreateBlock(CacheAddr cache_addr, std::size_t size) override; |
| 78 | std::tuple<u8*, u64> ReserveMemory(std::size_t size, u64 alignment = 4); | ||
| 79 | 59 | ||
| 80 | /// Reserves a region of memory to be used in subsequent upload/reserve operations. | 60 | const vk::Buffer* ToHandle(const Buffer& buffer) override; |
| 81 | void Reserve(std::size_t max_size); | ||
| 82 | 61 | ||
| 83 | /// Ensures that the set data is sent to the device. | 62 | void UploadBlockData(const Buffer& buffer, std::size_t offset, std::size_t size, |
| 84 | void Send(); | 63 | const u8* data) override; |
| 85 | 64 | ||
| 86 | /// Returns the buffer cache handle. | 65 | void DownloadBlockData(const Buffer& buffer, std::size_t offset, std::size_t size, |
| 87 | vk::Buffer GetBuffer() const { | 66 | u8* data) override; |
| 88 | return buffer_handle; | ||
| 89 | } | ||
| 90 | 67 | ||
| 91 | protected: | 68 | void CopyBlock(const Buffer& src, const Buffer& dst, std::size_t src_offset, |
| 92 | // We do not have to flush this cache as things in it are never modified by us. | 69 | std::size_t dst_offset, std::size_t size) override; |
| 93 | void FlushObjectInner(const std::shared_ptr<CachedBufferEntry>& object) override {} | ||
| 94 | 70 | ||
| 95 | private: | 71 | private: |
| 96 | void AlignBuffer(std::size_t alignment); | 72 | const VKDevice& device; |
| 97 | 73 | VKMemoryManager& memory_manager; | |
| 98 | Tegra::MemoryManager& tegra_memory_manager; | 74 | VKScheduler& scheduler; |
| 99 | Memory::Memory& cpu_memory; | 75 | VKStagingBufferPool& staging_pool; |
| 100 | |||
| 101 | std::unique_ptr<VKStreamBuffer> stream_buffer; | ||
| 102 | vk::Buffer buffer_handle; | ||
| 103 | |||
| 104 | u8* buffer_ptr = nullptr; | ||
| 105 | u64 buffer_offset = 0; | ||
| 106 | u64 buffer_offset_base = 0; | ||
| 107 | }; | 76 | }; |
| 108 | 77 | ||
| 109 | } // namespace Vulkan | 78 | } // namespace Vulkan |
diff --git a/src/video_core/renderer_vulkan/vk_compute_pass.cpp b/src/video_core/renderer_vulkan/vk_compute_pass.cpp new file mode 100644 index 000000000..7bdda3d79 --- /dev/null +++ b/src/video_core/renderer_vulkan/vk_compute_pass.cpp | |||
| @@ -0,0 +1,339 @@ | |||
| 1 | // Copyright 2019 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include <cstring> | ||
| 6 | #include <memory> | ||
| 7 | #include <optional> | ||
| 8 | #include <utility> | ||
| 9 | #include <vector> | ||
| 10 | #include "common/alignment.h" | ||
| 11 | #include "common/assert.h" | ||
| 12 | #include "common/common_types.h" | ||
| 13 | #include "video_core/renderer_vulkan/declarations.h" | ||
| 14 | #include "video_core/renderer_vulkan/vk_compute_pass.h" | ||
| 15 | #include "video_core/renderer_vulkan/vk_descriptor_pool.h" | ||
| 16 | #include "video_core/renderer_vulkan/vk_device.h" | ||
| 17 | #include "video_core/renderer_vulkan/vk_scheduler.h" | ||
| 18 | #include "video_core/renderer_vulkan/vk_staging_buffer_pool.h" | ||
| 19 | #include "video_core/renderer_vulkan/vk_update_descriptor.h" | ||
| 20 | |||
| 21 | namespace Vulkan { | ||
| 22 | |||
| 23 | namespace { | ||
| 24 | |||
| 25 | // Quad array SPIR-V module. Generated from the "shaders/" directory, read the instructions there. | ||
| 26 | constexpr u8 quad_array[] = { | ||
| 27 | 0x03, 0x02, 0x23, 0x07, 0x00, 0x00, 0x01, 0x00, 0x07, 0x00, 0x08, 0x00, 0x54, 0x00, 0x00, 0x00, | ||
| 28 | 0x00, 0x00, 0x00, 0x00, 0x11, 0x00, 0x02, 0x00, 0x01, 0x00, 0x00, 0x00, 0x0b, 0x00, 0x06, 0x00, | ||
| 29 | 0x01, 0x00, 0x00, 0x00, 0x47, 0x4c, 0x53, 0x4c, 0x2e, 0x73, 0x74, 0x64, 0x2e, 0x34, 0x35, 0x30, | ||
| 30 | 0x00, 0x00, 0x00, 0x00, 0x0e, 0x00, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, | ||
| 31 | 0x0f, 0x00, 0x06, 0x00, 0x05, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x6d, 0x61, 0x69, 0x6e, | ||
| 32 | 0x00, 0x00, 0x00, 0x00, 0x0b, 0x00, 0x00, 0x00, 0x10, 0x00, 0x06, 0x00, 0x04, 0x00, 0x00, 0x00, | ||
| 33 | 0x11, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, | ||
| 34 | 0x47, 0x00, 0x04, 0x00, 0x0b, 0x00, 0x00, 0x00, 0x0b, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x00, 0x00, | ||
| 35 | 0x47, 0x00, 0x04, 0x00, 0x13, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, | ||
| 36 | 0x48, 0x00, 0x05, 0x00, 0x14, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x23, 0x00, 0x00, 0x00, | ||
| 37 | 0x00, 0x00, 0x00, 0x00, 0x47, 0x00, 0x03, 0x00, 0x14, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, | ||
| 38 | 0x47, 0x00, 0x04, 0x00, 0x16, 0x00, 0x00, 0x00, 0x22, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, | ||
| 39 | 0x47, 0x00, 0x04, 0x00, 0x16, 0x00, 0x00, 0x00, 0x21, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, | ||
| 40 | 0x48, 0x00, 0x05, 0x00, 0x29, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x23, 0x00, 0x00, 0x00, | ||
| 41 | 0x00, 0x00, 0x00, 0x00, 0x47, 0x00, 0x03, 0x00, 0x29, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, | ||
| 42 | 0x47, 0x00, 0x04, 0x00, 0x4a, 0x00, 0x00, 0x00, 0x0b, 0x00, 0x00, 0x00, 0x19, 0x00, 0x00, 0x00, | ||
| 43 | 0x13, 0x00, 0x02, 0x00, 0x02, 0x00, 0x00, 0x00, 0x21, 0x00, 0x03, 0x00, 0x03, 0x00, 0x00, 0x00, | ||
| 44 | 0x02, 0x00, 0x00, 0x00, 0x15, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00, | ||
| 45 | 0x00, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x07, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00, | ||
| 46 | 0x06, 0x00, 0x00, 0x00, 0x17, 0x00, 0x04, 0x00, 0x09, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, | ||
| 47 | 0x03, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x0a, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, | ||
| 48 | 0x09, 0x00, 0x00, 0x00, 0x3b, 0x00, 0x04, 0x00, 0x0a, 0x00, 0x00, 0x00, 0x0b, 0x00, 0x00, 0x00, | ||
| 49 | 0x01, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, | ||
| 50 | 0x00, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x0d, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, | ||
| 51 | 0x06, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, 0x11, 0x00, 0x00, 0x00, | ||
| 52 | 0x06, 0x00, 0x00, 0x00, 0x1d, 0x00, 0x03, 0x00, 0x13, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, | ||
| 53 | 0x1e, 0x00, 0x03, 0x00, 0x14, 0x00, 0x00, 0x00, 0x13, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, | ||
| 54 | 0x15, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x3b, 0x00, 0x04, 0x00, | ||
| 55 | 0x15, 0x00, 0x00, 0x00, 0x16, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x15, 0x00, 0x04, 0x00, | ||
| 56 | 0x18, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x14, 0x00, 0x02, 0x00, | ||
| 57 | 0x1b, 0x00, 0x00, 0x00, 0x1e, 0x00, 0x03, 0x00, 0x29, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, | ||
| 58 | 0x20, 0x00, 0x04, 0x00, 0x2a, 0x00, 0x00, 0x00, 0x09, 0x00, 0x00, 0x00, 0x29, 0x00, 0x00, 0x00, | ||
| 59 | 0x3b, 0x00, 0x04, 0x00, 0x2a, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x00, 0x00, 0x09, 0x00, 0x00, 0x00, | ||
| 60 | 0x2b, 0x00, 0x04, 0x00, 0x18, 0x00, 0x00, 0x00, 0x2c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, | ||
| 61 | 0x20, 0x00, 0x04, 0x00, 0x2d, 0x00, 0x00, 0x00, 0x09, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, | ||
| 62 | 0x2b, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, 0x31, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, | ||
| 63 | 0x1c, 0x00, 0x04, 0x00, 0x34, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, 0x11, 0x00, 0x00, 0x00, | ||
| 64 | 0x2b, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, 0x35, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, | ||
| 65 | 0x2b, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, 0x36, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, | ||
| 66 | 0x2b, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, 0x37, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, | ||
| 67 | 0x2c, 0x00, 0x09, 0x00, 0x34, 0x00, 0x00, 0x00, 0x38, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, | ||
| 68 | 0x35, 0x00, 0x00, 0x00, 0x36, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x36, 0x00, 0x00, 0x00, | ||
| 69 | 0x37, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x3a, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00, | ||
| 70 | 0x34, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x44, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, | ||
| 71 | 0x06, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x04, 0x00, 0x18, 0x00, 0x00, 0x00, 0x47, 0x00, 0x00, 0x00, | ||
| 72 | 0x01, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, 0x49, 0x00, 0x00, 0x00, | ||
| 73 | 0x00, 0x04, 0x00, 0x00, 0x2c, 0x00, 0x06, 0x00, 0x09, 0x00, 0x00, 0x00, 0x4a, 0x00, 0x00, 0x00, | ||
| 74 | 0x49, 0x00, 0x00, 0x00, 0x35, 0x00, 0x00, 0x00, 0x35, 0x00, 0x00, 0x00, 0x36, 0x00, 0x05, 0x00, | ||
| 75 | 0x02, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, | ||
| 76 | 0xf8, 0x00, 0x02, 0x00, 0x05, 0x00, 0x00, 0x00, 0x3b, 0x00, 0x04, 0x00, 0x3a, 0x00, 0x00, 0x00, | ||
| 77 | 0x3b, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00, 0xf9, 0x00, 0x02, 0x00, 0x4c, 0x00, 0x00, 0x00, | ||
| 78 | 0xf8, 0x00, 0x02, 0x00, 0x4c, 0x00, 0x00, 0x00, 0xf6, 0x00, 0x04, 0x00, 0x4b, 0x00, 0x00, 0x00, | ||
| 79 | 0x4e, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xf9, 0x00, 0x02, 0x00, 0x4d, 0x00, 0x00, 0x00, | ||
| 80 | 0xf8, 0x00, 0x02, 0x00, 0x4d, 0x00, 0x00, 0x00, 0x41, 0x00, 0x05, 0x00, 0x0d, 0x00, 0x00, 0x00, | ||
| 81 | 0x0e, 0x00, 0x00, 0x00, 0x0b, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x3d, 0x00, 0x04, 0x00, | ||
| 82 | 0x06, 0x00, 0x00, 0x00, 0x0f, 0x00, 0x00, 0x00, 0x0e, 0x00, 0x00, 0x00, 0x84, 0x00, 0x05, 0x00, | ||
| 83 | 0x06, 0x00, 0x00, 0x00, 0x12, 0x00, 0x00, 0x00, 0x0f, 0x00, 0x00, 0x00, 0x11, 0x00, 0x00, 0x00, | ||
| 84 | 0x44, 0x00, 0x05, 0x00, 0x06, 0x00, 0x00, 0x00, 0x17, 0x00, 0x00, 0x00, 0x16, 0x00, 0x00, 0x00, | ||
| 85 | 0x00, 0x00, 0x00, 0x00, 0x7c, 0x00, 0x04, 0x00, 0x18, 0x00, 0x00, 0x00, 0x19, 0x00, 0x00, 0x00, | ||
| 86 | 0x17, 0x00, 0x00, 0x00, 0x7c, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, 0x1a, 0x00, 0x00, 0x00, | ||
| 87 | 0x19, 0x00, 0x00, 0x00, 0xae, 0x00, 0x05, 0x00, 0x1b, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x00, 0x00, | ||
| 88 | 0x12, 0x00, 0x00, 0x00, 0x1a, 0x00, 0x00, 0x00, 0xf7, 0x00, 0x03, 0x00, 0x1e, 0x00, 0x00, 0x00, | ||
| 89 | 0x00, 0x00, 0x00, 0x00, 0xfa, 0x00, 0x04, 0x00, 0x1c, 0x00, 0x00, 0x00, 0x1d, 0x00, 0x00, 0x00, | ||
| 90 | 0x1e, 0x00, 0x00, 0x00, 0xf8, 0x00, 0x02, 0x00, 0x1d, 0x00, 0x00, 0x00, 0xf9, 0x00, 0x02, 0x00, | ||
| 91 | 0x4b, 0x00, 0x00, 0x00, 0xf8, 0x00, 0x02, 0x00, 0x1e, 0x00, 0x00, 0x00, 0xf9, 0x00, 0x02, 0x00, | ||
| 92 | 0x21, 0x00, 0x00, 0x00, 0xf8, 0x00, 0x02, 0x00, 0x21, 0x00, 0x00, 0x00, 0xf5, 0x00, 0x07, 0x00, | ||
| 93 | 0x06, 0x00, 0x00, 0x00, 0x53, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x1e, 0x00, 0x00, 0x00, | ||
| 94 | 0x48, 0x00, 0x00, 0x00, 0x22, 0x00, 0x00, 0x00, 0xb0, 0x00, 0x05, 0x00, 0x1b, 0x00, 0x00, 0x00, | ||
| 95 | 0x27, 0x00, 0x00, 0x00, 0x53, 0x00, 0x00, 0x00, 0x11, 0x00, 0x00, 0x00, 0xf6, 0x00, 0x04, 0x00, | ||
| 96 | 0x23, 0x00, 0x00, 0x00, 0x22, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xfa, 0x00, 0x04, 0x00, | ||
| 97 | 0x27, 0x00, 0x00, 0x00, 0x22, 0x00, 0x00, 0x00, 0x23, 0x00, 0x00, 0x00, 0xf8, 0x00, 0x02, 0x00, | ||
| 98 | 0x22, 0x00, 0x00, 0x00, 0x41, 0x00, 0x05, 0x00, 0x2d, 0x00, 0x00, 0x00, 0x2e, 0x00, 0x00, 0x00, | ||
| 99 | 0x2b, 0x00, 0x00, 0x00, 0x2c, 0x00, 0x00, 0x00, 0x3d, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, | ||
| 100 | 0x2f, 0x00, 0x00, 0x00, 0x2e, 0x00, 0x00, 0x00, 0x84, 0x00, 0x05, 0x00, 0x06, 0x00, 0x00, 0x00, | ||
| 101 | 0x32, 0x00, 0x00, 0x00, 0x0f, 0x00, 0x00, 0x00, 0x31, 0x00, 0x00, 0x00, 0x80, 0x00, 0x05, 0x00, | ||
| 102 | 0x06, 0x00, 0x00, 0x00, 0x33, 0x00, 0x00, 0x00, 0x2f, 0x00, 0x00, 0x00, 0x32, 0x00, 0x00, 0x00, | ||
| 103 | 0x3e, 0x00, 0x03, 0x00, 0x3b, 0x00, 0x00, 0x00, 0x38, 0x00, 0x00, 0x00, 0x41, 0x00, 0x05, 0x00, | ||
| 104 | 0x07, 0x00, 0x00, 0x00, 0x3c, 0x00, 0x00, 0x00, 0x3b, 0x00, 0x00, 0x00, 0x53, 0x00, 0x00, 0x00, | ||
| 105 | 0x3d, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, 0x3d, 0x00, 0x00, 0x00, 0x3c, 0x00, 0x00, 0x00, | ||
| 106 | 0x80, 0x00, 0x05, 0x00, 0x06, 0x00, 0x00, 0x00, 0x3e, 0x00, 0x00, 0x00, 0x33, 0x00, 0x00, 0x00, | ||
| 107 | 0x3d, 0x00, 0x00, 0x00, 0x80, 0x00, 0x05, 0x00, 0x06, 0x00, 0x00, 0x00, 0x42, 0x00, 0x00, 0x00, | ||
| 108 | 0x12, 0x00, 0x00, 0x00, 0x53, 0x00, 0x00, 0x00, 0x41, 0x00, 0x06, 0x00, 0x44, 0x00, 0x00, 0x00, | ||
| 109 | 0x45, 0x00, 0x00, 0x00, 0x16, 0x00, 0x00, 0x00, 0x2c, 0x00, 0x00, 0x00, 0x42, 0x00, 0x00, 0x00, | ||
| 110 | 0x3e, 0x00, 0x03, 0x00, 0x45, 0x00, 0x00, 0x00, 0x3e, 0x00, 0x00, 0x00, 0x80, 0x00, 0x05, 0x00, | ||
| 111 | 0x06, 0x00, 0x00, 0x00, 0x48, 0x00, 0x00, 0x00, 0x53, 0x00, 0x00, 0x00, 0x47, 0x00, 0x00, 0x00, | ||
| 112 | 0xf9, 0x00, 0x02, 0x00, 0x21, 0x00, 0x00, 0x00, 0xf8, 0x00, 0x02, 0x00, 0x23, 0x00, 0x00, 0x00, | ||
| 113 | 0xf9, 0x00, 0x02, 0x00, 0x4b, 0x00, 0x00, 0x00, 0xf8, 0x00, 0x02, 0x00, 0x4e, 0x00, 0x00, 0x00, | ||
| 114 | 0xf9, 0x00, 0x02, 0x00, 0x4c, 0x00, 0x00, 0x00, 0xf8, 0x00, 0x02, 0x00, 0x4b, 0x00, 0x00, 0x00, | ||
| 115 | 0xfd, 0x00, 0x01, 0x00, 0x38, 0x00, 0x01, 0x00}; | ||
| 116 | |||
| 117 | // Uint8 SPIR-V module. Generated from the "shaders/" directory. | ||
| 118 | constexpr u8 uint8_pass[] = { | ||
| 119 | 0x03, 0x02, 0x23, 0x07, 0x00, 0x00, 0x01, 0x00, 0x07, 0x00, 0x08, 0x00, 0x2f, 0x00, 0x00, 0x00, | ||
| 120 | 0x00, 0x00, 0x00, 0x00, 0x11, 0x00, 0x02, 0x00, 0x01, 0x00, 0x00, 0x00, 0x11, 0x00, 0x02, 0x00, | ||
| 121 | 0x51, 0x11, 0x00, 0x00, 0x11, 0x00, 0x02, 0x00, 0x61, 0x11, 0x00, 0x00, 0x0a, 0x00, 0x07, 0x00, | ||
| 122 | 0x53, 0x50, 0x56, 0x5f, 0x4b, 0x48, 0x52, 0x5f, 0x31, 0x36, 0x62, 0x69, 0x74, 0x5f, 0x73, 0x74, | ||
| 123 | 0x6f, 0x72, 0x61, 0x67, 0x65, 0x00, 0x00, 0x00, 0x0a, 0x00, 0x07, 0x00, 0x53, 0x50, 0x56, 0x5f, | ||
| 124 | 0x4b, 0x48, 0x52, 0x5f, 0x38, 0x62, 0x69, 0x74, 0x5f, 0x73, 0x74, 0x6f, 0x72, 0x61, 0x67, 0x65, | ||
| 125 | 0x00, 0x00, 0x00, 0x00, 0x0b, 0x00, 0x06, 0x00, 0x01, 0x00, 0x00, 0x00, 0x47, 0x4c, 0x53, 0x4c, | ||
| 126 | 0x2e, 0x73, 0x74, 0x64, 0x2e, 0x34, 0x35, 0x30, 0x00, 0x00, 0x00, 0x00, 0x0e, 0x00, 0x03, 0x00, | ||
| 127 | 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x0f, 0x00, 0x06, 0x00, 0x05, 0x00, 0x00, 0x00, | ||
| 128 | 0x04, 0x00, 0x00, 0x00, 0x6d, 0x61, 0x69, 0x6e, 0x00, 0x00, 0x00, 0x00, 0x0b, 0x00, 0x00, 0x00, | ||
| 129 | 0x10, 0x00, 0x06, 0x00, 0x04, 0x00, 0x00, 0x00, 0x11, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, | ||
| 130 | 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, 0x0b, 0x00, 0x00, 0x00, | ||
| 131 | 0x0b, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, 0x12, 0x00, 0x00, 0x00, | ||
| 132 | 0x06, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x48, 0x00, 0x04, 0x00, 0x13, 0x00, 0x00, 0x00, | ||
| 133 | 0x00, 0x00, 0x00, 0x00, 0x18, 0x00, 0x00, 0x00, 0x48, 0x00, 0x05, 0x00, 0x13, 0x00, 0x00, 0x00, | ||
| 134 | 0x00, 0x00, 0x00, 0x00, 0x23, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x47, 0x00, 0x03, 0x00, | ||
| 135 | 0x13, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, 0x15, 0x00, 0x00, 0x00, | ||
| 136 | 0x22, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, 0x15, 0x00, 0x00, 0x00, | ||
| 137 | 0x21, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, 0x1f, 0x00, 0x00, 0x00, | ||
| 138 | 0x06, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x48, 0x00, 0x04, 0x00, 0x20, 0x00, 0x00, 0x00, | ||
| 139 | 0x00, 0x00, 0x00, 0x00, 0x19, 0x00, 0x00, 0x00, 0x48, 0x00, 0x05, 0x00, 0x20, 0x00, 0x00, 0x00, | ||
| 140 | 0x00, 0x00, 0x00, 0x00, 0x23, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x47, 0x00, 0x03, 0x00, | ||
| 141 | 0x20, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, 0x22, 0x00, 0x00, 0x00, | ||
| 142 | 0x22, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, 0x22, 0x00, 0x00, 0x00, | ||
| 143 | 0x21, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, 0x2e, 0x00, 0x00, 0x00, | ||
| 144 | 0x0b, 0x00, 0x00, 0x00, 0x19, 0x00, 0x00, 0x00, 0x13, 0x00, 0x02, 0x00, 0x02, 0x00, 0x00, 0x00, | ||
| 145 | 0x21, 0x00, 0x03, 0x00, 0x03, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x15, 0x00, 0x04, 0x00, | ||
| 146 | 0x06, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, | ||
| 147 | 0x07, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, 0x17, 0x00, 0x04, 0x00, | ||
| 148 | 0x09, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, | ||
| 149 | 0x0a, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x09, 0x00, 0x00, 0x00, 0x3b, 0x00, 0x04, 0x00, | ||
| 150 | 0x0a, 0x00, 0x00, 0x00, 0x0b, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x04, 0x00, | ||
| 151 | 0x06, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, | ||
| 152 | 0x0d, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, 0x15, 0x00, 0x04, 0x00, | ||
| 153 | 0x11, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x1d, 0x00, 0x03, 0x00, | ||
| 154 | 0x12, 0x00, 0x00, 0x00, 0x11, 0x00, 0x00, 0x00, 0x1e, 0x00, 0x03, 0x00, 0x13, 0x00, 0x00, 0x00, | ||
| 155 | 0x12, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x14, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, | ||
| 156 | 0x13, 0x00, 0x00, 0x00, 0x3b, 0x00, 0x04, 0x00, 0x14, 0x00, 0x00, 0x00, 0x15, 0x00, 0x00, 0x00, | ||
| 157 | 0x02, 0x00, 0x00, 0x00, 0x15, 0x00, 0x04, 0x00, 0x17, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00, | ||
| 158 | 0x01, 0x00, 0x00, 0x00, 0x14, 0x00, 0x02, 0x00, 0x1a, 0x00, 0x00, 0x00, 0x15, 0x00, 0x04, 0x00, | ||
| 159 | 0x1e, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x1d, 0x00, 0x03, 0x00, | ||
| 160 | 0x1f, 0x00, 0x00, 0x00, 0x1e, 0x00, 0x00, 0x00, 0x1e, 0x00, 0x03, 0x00, 0x20, 0x00, 0x00, 0x00, | ||
| 161 | 0x1f, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x21, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, | ||
| 162 | 0x20, 0x00, 0x00, 0x00, 0x3b, 0x00, 0x04, 0x00, 0x21, 0x00, 0x00, 0x00, 0x22, 0x00, 0x00, 0x00, | ||
| 163 | 0x02, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x04, 0x00, 0x17, 0x00, 0x00, 0x00, 0x23, 0x00, 0x00, 0x00, | ||
| 164 | 0x00, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x26, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, | ||
| 165 | 0x11, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x2a, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, | ||
| 166 | 0x1e, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, 0x2c, 0x00, 0x00, 0x00, | ||
| 167 | 0x00, 0x04, 0x00, 0x00, 0x2b, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, 0x2d, 0x00, 0x00, 0x00, | ||
| 168 | 0x01, 0x00, 0x00, 0x00, 0x2c, 0x00, 0x06, 0x00, 0x09, 0x00, 0x00, 0x00, 0x2e, 0x00, 0x00, 0x00, | ||
| 169 | 0x2c, 0x00, 0x00, 0x00, 0x2d, 0x00, 0x00, 0x00, 0x2d, 0x00, 0x00, 0x00, 0x36, 0x00, 0x05, 0x00, | ||
| 170 | 0x02, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, | ||
| 171 | 0xf8, 0x00, 0x02, 0x00, 0x05, 0x00, 0x00, 0x00, 0x3b, 0x00, 0x04, 0x00, 0x07, 0x00, 0x00, 0x00, | ||
| 172 | 0x08, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00, 0x41, 0x00, 0x05, 0x00, 0x0d, 0x00, 0x00, 0x00, | ||
| 173 | 0x0e, 0x00, 0x00, 0x00, 0x0b, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x3d, 0x00, 0x04, 0x00, | ||
| 174 | 0x06, 0x00, 0x00, 0x00, 0x0f, 0x00, 0x00, 0x00, 0x0e, 0x00, 0x00, 0x00, 0x3e, 0x00, 0x03, 0x00, | ||
| 175 | 0x08, 0x00, 0x00, 0x00, 0x0f, 0x00, 0x00, 0x00, 0x3d, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, | ||
| 176 | 0x10, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x44, 0x00, 0x05, 0x00, 0x06, 0x00, 0x00, 0x00, | ||
| 177 | 0x16, 0x00, 0x00, 0x00, 0x15, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x7c, 0x00, 0x04, 0x00, | ||
| 178 | 0x17, 0x00, 0x00, 0x00, 0x18, 0x00, 0x00, 0x00, 0x16, 0x00, 0x00, 0x00, 0x7c, 0x00, 0x04, 0x00, | ||
| 179 | 0x06, 0x00, 0x00, 0x00, 0x19, 0x00, 0x00, 0x00, 0x18, 0x00, 0x00, 0x00, 0xb0, 0x00, 0x05, 0x00, | ||
| 180 | 0x1a, 0x00, 0x00, 0x00, 0x1b, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x19, 0x00, 0x00, 0x00, | ||
| 181 | 0xf7, 0x00, 0x03, 0x00, 0x1d, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xfa, 0x00, 0x04, 0x00, | ||
| 182 | 0x1b, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x00, 0x00, 0x1d, 0x00, 0x00, 0x00, 0xf8, 0x00, 0x02, 0x00, | ||
| 183 | 0x1c, 0x00, 0x00, 0x00, 0x3d, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, 0x24, 0x00, 0x00, 0x00, | ||
| 184 | 0x08, 0x00, 0x00, 0x00, 0x3d, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, 0x25, 0x00, 0x00, 0x00, | ||
| 185 | 0x08, 0x00, 0x00, 0x00, 0x41, 0x00, 0x06, 0x00, 0x26, 0x00, 0x00, 0x00, 0x27, 0x00, 0x00, 0x00, | ||
| 186 | 0x15, 0x00, 0x00, 0x00, 0x23, 0x00, 0x00, 0x00, 0x25, 0x00, 0x00, 0x00, 0x3d, 0x00, 0x04, 0x00, | ||
| 187 | 0x11, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00, 0x27, 0x00, 0x00, 0x00, 0x71, 0x00, 0x04, 0x00, | ||
| 188 | 0x1e, 0x00, 0x00, 0x00, 0x29, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00, 0x41, 0x00, 0x06, 0x00, | ||
| 189 | 0x2a, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x00, 0x00, 0x22, 0x00, 0x00, 0x00, 0x23, 0x00, 0x00, 0x00, | ||
| 190 | 0x24, 0x00, 0x00, 0x00, 0x3e, 0x00, 0x03, 0x00, 0x2b, 0x00, 0x00, 0x00, 0x29, 0x00, 0x00, 0x00, | ||
| 191 | 0xf9, 0x00, 0x02, 0x00, 0x1d, 0x00, 0x00, 0x00, 0xf8, 0x00, 0x02, 0x00, 0x1d, 0x00, 0x00, 0x00, | ||
| 192 | 0xfd, 0x00, 0x01, 0x00, 0x38, 0x00, 0x01, 0x00}; | ||
| 193 | |||
| 194 | } // Anonymous namespace | ||
| 195 | |||
| 196 | VKComputePass::VKComputePass(const VKDevice& device, VKDescriptorPool& descriptor_pool, | ||
| 197 | const std::vector<vk::DescriptorSetLayoutBinding>& bindings, | ||
| 198 | const std::vector<vk::DescriptorUpdateTemplateEntry>& templates, | ||
| 199 | const std::vector<vk::PushConstantRange> push_constants, | ||
| 200 | std::size_t code_size, const u8* code) { | ||
| 201 | const auto dev = device.GetLogical(); | ||
| 202 | const auto& dld = device.GetDispatchLoader(); | ||
| 203 | |||
| 204 | const vk::DescriptorSetLayoutCreateInfo descriptor_layout_ci( | ||
| 205 | {}, static_cast<u32>(bindings.size()), bindings.data()); | ||
| 206 | descriptor_set_layout = dev.createDescriptorSetLayoutUnique(descriptor_layout_ci, nullptr, dld); | ||
| 207 | |||
| 208 | const vk::PipelineLayoutCreateInfo pipeline_layout_ci({}, 1, &*descriptor_set_layout, | ||
| 209 | static_cast<u32>(push_constants.size()), | ||
| 210 | push_constants.data()); | ||
| 211 | layout = dev.createPipelineLayoutUnique(pipeline_layout_ci, nullptr, dld); | ||
| 212 | |||
| 213 | if (!templates.empty()) { | ||
| 214 | const vk::DescriptorUpdateTemplateCreateInfo template_ci( | ||
| 215 | {}, static_cast<u32>(templates.size()), templates.data(), | ||
| 216 | vk::DescriptorUpdateTemplateType::eDescriptorSet, *descriptor_set_layout, | ||
| 217 | vk::PipelineBindPoint::eGraphics, *layout, 0); | ||
| 218 | descriptor_template = dev.createDescriptorUpdateTemplateUnique(template_ci, nullptr, dld); | ||
| 219 | |||
| 220 | descriptor_allocator.emplace(descriptor_pool, *descriptor_set_layout); | ||
| 221 | } | ||
| 222 | |||
| 223 | auto code_copy = std::make_unique<u32[]>(code_size / sizeof(u32) + 1); | ||
| 224 | std::memcpy(code_copy.get(), code, code_size); | ||
| 225 | const vk::ShaderModuleCreateInfo module_ci({}, code_size, code_copy.get()); | ||
| 226 | module = dev.createShaderModuleUnique(module_ci, nullptr, dld); | ||
| 227 | |||
| 228 | const vk::PipelineShaderStageCreateInfo stage_ci({}, vk::ShaderStageFlagBits::eCompute, *module, | ||
| 229 | "main", nullptr); | ||
| 230 | |||
| 231 | const vk::ComputePipelineCreateInfo pipeline_ci({}, stage_ci, *layout, nullptr, 0); | ||
| 232 | pipeline = dev.createComputePipelineUnique(nullptr, pipeline_ci, nullptr, dld); | ||
| 233 | } | ||
| 234 | |||
| 235 | VKComputePass::~VKComputePass() = default; | ||
| 236 | |||
| 237 | vk::DescriptorSet VKComputePass::CommitDescriptorSet( | ||
| 238 | VKUpdateDescriptorQueue& update_descriptor_queue, VKFence& fence) { | ||
| 239 | if (!descriptor_template) { | ||
| 240 | return {}; | ||
| 241 | } | ||
| 242 | const auto set = descriptor_allocator->Commit(fence); | ||
| 243 | update_descriptor_queue.Send(*descriptor_template, set); | ||
| 244 | return set; | ||
| 245 | } | ||
| 246 | |||
| 247 | QuadArrayPass::QuadArrayPass(const VKDevice& device, VKScheduler& scheduler, | ||
| 248 | VKDescriptorPool& descriptor_pool, | ||
| 249 | VKStagingBufferPool& staging_buffer_pool, | ||
| 250 | VKUpdateDescriptorQueue& update_descriptor_queue) | ||
| 251 | : VKComputePass(device, descriptor_pool, | ||
| 252 | {vk::DescriptorSetLayoutBinding(0, vk::DescriptorType::eStorageBuffer, 1, | ||
| 253 | vk::ShaderStageFlagBits::eCompute, nullptr)}, | ||
| 254 | {vk::DescriptorUpdateTemplateEntry(0, 0, 1, vk::DescriptorType::eStorageBuffer, | ||
| 255 | 0, sizeof(DescriptorUpdateEntry))}, | ||
| 256 | {vk::PushConstantRange(vk::ShaderStageFlagBits::eCompute, 0, sizeof(u32))}, | ||
| 257 | std::size(quad_array), quad_array), | ||
| 258 | scheduler{scheduler}, staging_buffer_pool{staging_buffer_pool}, | ||
| 259 | update_descriptor_queue{update_descriptor_queue} {} | ||
| 260 | |||
| 261 | QuadArrayPass::~QuadArrayPass() = default; | ||
| 262 | |||
| 263 | std::pair<const vk::Buffer&, vk::DeviceSize> QuadArrayPass::Assemble(u32 num_vertices, u32 first) { | ||
| 264 | const u32 num_triangle_vertices = num_vertices * 6 / 4; | ||
| 265 | const std::size_t staging_size = num_triangle_vertices * sizeof(u32); | ||
| 266 | auto& buffer = staging_buffer_pool.GetUnusedBuffer(staging_size, false); | ||
| 267 | |||
| 268 | update_descriptor_queue.Acquire(); | ||
| 269 | update_descriptor_queue.AddBuffer(&*buffer.handle, 0, staging_size); | ||
| 270 | const auto set = CommitDescriptorSet(update_descriptor_queue, scheduler.GetFence()); | ||
| 271 | |||
| 272 | scheduler.RequestOutsideRenderPassOperationContext(); | ||
| 273 | |||
| 274 | ASSERT(num_vertices % 4 == 0); | ||
| 275 | const u32 num_quads = num_vertices / 4; | ||
| 276 | scheduler.Record([layout = *layout, pipeline = *pipeline, buffer = *buffer.handle, num_quads, | ||
| 277 | first, set](auto cmdbuf, auto& dld) { | ||
| 278 | constexpr u32 dispatch_size = 1024; | ||
| 279 | cmdbuf.bindPipeline(vk::PipelineBindPoint::eCompute, pipeline, dld); | ||
| 280 | cmdbuf.bindDescriptorSets(vk::PipelineBindPoint::eCompute, layout, 0, {set}, {}, dld); | ||
| 281 | cmdbuf.pushConstants(layout, vk::ShaderStageFlagBits::eCompute, 0, sizeof(first), &first, | ||
| 282 | dld); | ||
| 283 | cmdbuf.dispatch(Common::AlignUp(num_quads, dispatch_size) / dispatch_size, 1, 1, dld); | ||
| 284 | |||
| 285 | const vk::BufferMemoryBarrier barrier( | ||
| 286 | vk::AccessFlagBits::eShaderWrite, vk::AccessFlagBits::eVertexAttributeRead, | ||
| 287 | VK_QUEUE_FAMILY_IGNORED, VK_QUEUE_FAMILY_IGNORED, buffer, 0, | ||
| 288 | static_cast<vk::DeviceSize>(num_quads) * 6 * sizeof(u32)); | ||
| 289 | cmdbuf.pipelineBarrier(vk::PipelineStageFlagBits::eComputeShader, | ||
| 290 | vk::PipelineStageFlagBits::eVertexInput, {}, {}, {barrier}, {}, dld); | ||
| 291 | }); | ||
| 292 | return {*buffer.handle, 0}; | ||
| 293 | } | ||
| 294 | |||
| 295 | Uint8Pass::Uint8Pass(const VKDevice& device, VKScheduler& scheduler, | ||
| 296 | VKDescriptorPool& descriptor_pool, VKStagingBufferPool& staging_buffer_pool, | ||
| 297 | VKUpdateDescriptorQueue& update_descriptor_queue) | ||
| 298 | : VKComputePass(device, descriptor_pool, | ||
| 299 | {vk::DescriptorSetLayoutBinding(0, vk::DescriptorType::eStorageBuffer, 1, | ||
| 300 | vk::ShaderStageFlagBits::eCompute, nullptr), | ||
| 301 | vk::DescriptorSetLayoutBinding(1, vk::DescriptorType::eStorageBuffer, 1, | ||
| 302 | vk::ShaderStageFlagBits::eCompute, nullptr)}, | ||
| 303 | {vk::DescriptorUpdateTemplateEntry(0, 0, 2, vk::DescriptorType::eStorageBuffer, | ||
| 304 | 0, sizeof(DescriptorUpdateEntry))}, | ||
| 305 | {}, std::size(uint8_pass), uint8_pass), | ||
| 306 | scheduler{scheduler}, staging_buffer_pool{staging_buffer_pool}, | ||
| 307 | update_descriptor_queue{update_descriptor_queue} {} | ||
| 308 | |||
| 309 | Uint8Pass::~Uint8Pass() = default; | ||
| 310 | |||
| 311 | std::pair<const vk::Buffer*, u64> Uint8Pass::Assemble(u32 num_vertices, vk::Buffer src_buffer, | ||
| 312 | u64 src_offset) { | ||
| 313 | const auto staging_size = static_cast<u32>(num_vertices * sizeof(u16)); | ||
| 314 | auto& buffer = staging_buffer_pool.GetUnusedBuffer(staging_size, false); | ||
| 315 | |||
| 316 | update_descriptor_queue.Acquire(); | ||
| 317 | update_descriptor_queue.AddBuffer(&src_buffer, src_offset, num_vertices); | ||
| 318 | update_descriptor_queue.AddBuffer(&*buffer.handle, 0, staging_size); | ||
| 319 | const auto set = CommitDescriptorSet(update_descriptor_queue, scheduler.GetFence()); | ||
| 320 | |||
| 321 | scheduler.RequestOutsideRenderPassOperationContext(); | ||
| 322 | scheduler.Record([layout = *layout, pipeline = *pipeline, buffer = *buffer.handle, set, | ||
| 323 | num_vertices](auto cmdbuf, auto& dld) { | ||
| 324 | constexpr u32 dispatch_size = 1024; | ||
| 325 | cmdbuf.bindPipeline(vk::PipelineBindPoint::eCompute, pipeline, dld); | ||
| 326 | cmdbuf.bindDescriptorSets(vk::PipelineBindPoint::eCompute, layout, 0, {set}, {}, dld); | ||
| 327 | cmdbuf.dispatch(Common::AlignUp(num_vertices, dispatch_size) / dispatch_size, 1, 1, dld); | ||
| 328 | |||
| 329 | const vk::BufferMemoryBarrier barrier( | ||
| 330 | vk::AccessFlagBits::eShaderWrite, vk::AccessFlagBits::eVertexAttributeRead, | ||
| 331 | VK_QUEUE_FAMILY_IGNORED, VK_QUEUE_FAMILY_IGNORED, buffer, 0, | ||
| 332 | static_cast<vk::DeviceSize>(num_vertices) * sizeof(u16)); | ||
| 333 | cmdbuf.pipelineBarrier(vk::PipelineStageFlagBits::eComputeShader, | ||
| 334 | vk::PipelineStageFlagBits::eVertexInput, {}, {}, {barrier}, {}, dld); | ||
| 335 | }); | ||
| 336 | return {&*buffer.handle, 0}; | ||
| 337 | } | ||
| 338 | |||
| 339 | } // namespace Vulkan | ||
diff --git a/src/video_core/renderer_vulkan/vk_compute_pass.h b/src/video_core/renderer_vulkan/vk_compute_pass.h new file mode 100644 index 000000000..7057eb837 --- /dev/null +++ b/src/video_core/renderer_vulkan/vk_compute_pass.h | |||
| @@ -0,0 +1,77 @@ | |||
| 1 | // Copyright 2019 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include <optional> | ||
| 8 | #include <utility> | ||
| 9 | #include <vector> | ||
| 10 | #include "common/common_types.h" | ||
| 11 | #include "video_core/renderer_vulkan/declarations.h" | ||
| 12 | #include "video_core/renderer_vulkan/vk_descriptor_pool.h" | ||
| 13 | |||
| 14 | namespace Vulkan { | ||
| 15 | |||
| 16 | class VKDevice; | ||
| 17 | class VKFence; | ||
| 18 | class VKScheduler; | ||
| 19 | class VKStagingBufferPool; | ||
| 20 | class VKUpdateDescriptorQueue; | ||
| 21 | |||
| 22 | class VKComputePass { | ||
| 23 | public: | ||
| 24 | explicit VKComputePass(const VKDevice& device, VKDescriptorPool& descriptor_pool, | ||
| 25 | const std::vector<vk::DescriptorSetLayoutBinding>& bindings, | ||
| 26 | const std::vector<vk::DescriptorUpdateTemplateEntry>& templates, | ||
| 27 | const std::vector<vk::PushConstantRange> push_constants, | ||
| 28 | std::size_t code_size, const u8* code); | ||
| 29 | ~VKComputePass(); | ||
| 30 | |||
| 31 | protected: | ||
| 32 | vk::DescriptorSet CommitDescriptorSet(VKUpdateDescriptorQueue& update_descriptor_queue, | ||
| 33 | VKFence& fence); | ||
| 34 | |||
| 35 | UniqueDescriptorUpdateTemplate descriptor_template; | ||
| 36 | UniquePipelineLayout layout; | ||
| 37 | UniquePipeline pipeline; | ||
| 38 | |||
| 39 | private: | ||
| 40 | UniqueDescriptorSetLayout descriptor_set_layout; | ||
| 41 | std::optional<DescriptorAllocator> descriptor_allocator; | ||
| 42 | UniqueShaderModule module; | ||
| 43 | }; | ||
| 44 | |||
| 45 | class QuadArrayPass final : public VKComputePass { | ||
| 46 | public: | ||
| 47 | explicit QuadArrayPass(const VKDevice& device, VKScheduler& scheduler, | ||
| 48 | VKDescriptorPool& descriptor_pool, | ||
| 49 | VKStagingBufferPool& staging_buffer_pool, | ||
| 50 | VKUpdateDescriptorQueue& update_descriptor_queue); | ||
| 51 | ~QuadArrayPass(); | ||
| 52 | |||
| 53 | std::pair<const vk::Buffer&, vk::DeviceSize> Assemble(u32 num_vertices, u32 first); | ||
| 54 | |||
| 55 | private: | ||
| 56 | VKScheduler& scheduler; | ||
| 57 | VKStagingBufferPool& staging_buffer_pool; | ||
| 58 | VKUpdateDescriptorQueue& update_descriptor_queue; | ||
| 59 | }; | ||
| 60 | |||
| 61 | class Uint8Pass final : public VKComputePass { | ||
| 62 | public: | ||
| 63 | explicit Uint8Pass(const VKDevice& device, VKScheduler& scheduler, | ||
| 64 | VKDescriptorPool& descriptor_pool, VKStagingBufferPool& staging_buffer_pool, | ||
| 65 | VKUpdateDescriptorQueue& update_descriptor_queue); | ||
| 66 | ~Uint8Pass(); | ||
| 67 | |||
| 68 | std::pair<const vk::Buffer*, u64> Assemble(u32 num_vertices, vk::Buffer src_buffer, | ||
| 69 | u64 src_offset); | ||
| 70 | |||
| 71 | private: | ||
| 72 | VKScheduler& scheduler; | ||
| 73 | VKStagingBufferPool& staging_buffer_pool; | ||
| 74 | VKUpdateDescriptorQueue& update_descriptor_queue; | ||
| 75 | }; | ||
| 76 | |||
| 77 | } // namespace Vulkan | ||
diff --git a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp new file mode 100644 index 000000000..9d5b8de7a --- /dev/null +++ b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp | |||
| @@ -0,0 +1,112 @@ | |||
| 1 | // Copyright 2019 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include <memory> | ||
| 6 | #include <vector> | ||
| 7 | |||
| 8 | #include "video_core/renderer_vulkan/declarations.h" | ||
| 9 | #include "video_core/renderer_vulkan/vk_compute_pipeline.h" | ||
| 10 | #include "video_core/renderer_vulkan/vk_descriptor_pool.h" | ||
| 11 | #include "video_core/renderer_vulkan/vk_device.h" | ||
| 12 | #include "video_core/renderer_vulkan/vk_pipeline_cache.h" | ||
| 13 | #include "video_core/renderer_vulkan/vk_resource_manager.h" | ||
| 14 | #include "video_core/renderer_vulkan/vk_scheduler.h" | ||
| 15 | #include "video_core/renderer_vulkan/vk_shader_decompiler.h" | ||
| 16 | #include "video_core/renderer_vulkan/vk_update_descriptor.h" | ||
| 17 | |||
| 18 | namespace Vulkan { | ||
| 19 | |||
| 20 | VKComputePipeline::VKComputePipeline(const VKDevice& device, VKScheduler& scheduler, | ||
| 21 | VKDescriptorPool& descriptor_pool, | ||
| 22 | VKUpdateDescriptorQueue& update_descriptor_queue, | ||
| 23 | const SPIRVShader& shader) | ||
| 24 | : device{device}, scheduler{scheduler}, entries{shader.entries}, | ||
| 25 | descriptor_set_layout{CreateDescriptorSetLayout()}, | ||
| 26 | descriptor_allocator{descriptor_pool, *descriptor_set_layout}, | ||
| 27 | update_descriptor_queue{update_descriptor_queue}, layout{CreatePipelineLayout()}, | ||
| 28 | descriptor_template{CreateDescriptorUpdateTemplate()}, | ||
| 29 | shader_module{CreateShaderModule(shader.code)}, pipeline{CreatePipeline()} {} | ||
| 30 | |||
| 31 | VKComputePipeline::~VKComputePipeline() = default; | ||
| 32 | |||
| 33 | vk::DescriptorSet VKComputePipeline::CommitDescriptorSet() { | ||
| 34 | if (!descriptor_template) { | ||
| 35 | return {}; | ||
| 36 | } | ||
| 37 | const auto set = descriptor_allocator.Commit(scheduler.GetFence()); | ||
| 38 | update_descriptor_queue.Send(*descriptor_template, set); | ||
| 39 | return set; | ||
| 40 | } | ||
| 41 | |||
| 42 | UniqueDescriptorSetLayout VKComputePipeline::CreateDescriptorSetLayout() const { | ||
| 43 | std::vector<vk::DescriptorSetLayoutBinding> bindings; | ||
| 44 | u32 binding = 0; | ||
| 45 | const auto AddBindings = [&](vk::DescriptorType descriptor_type, std::size_t num_entries) { | ||
| 46 | // TODO(Rodrigo): Maybe make individual bindings here? | ||
| 47 | for (u32 bindpoint = 0; bindpoint < static_cast<u32>(num_entries); ++bindpoint) { | ||
| 48 | bindings.emplace_back(binding++, descriptor_type, 1, vk::ShaderStageFlagBits::eCompute, | ||
| 49 | nullptr); | ||
| 50 | } | ||
| 51 | }; | ||
| 52 | AddBindings(vk::DescriptorType::eUniformBuffer, entries.const_buffers.size()); | ||
| 53 | AddBindings(vk::DescriptorType::eStorageBuffer, entries.global_buffers.size()); | ||
| 54 | AddBindings(vk::DescriptorType::eUniformTexelBuffer, entries.texel_buffers.size()); | ||
| 55 | AddBindings(vk::DescriptorType::eCombinedImageSampler, entries.samplers.size()); | ||
| 56 | AddBindings(vk::DescriptorType::eStorageImage, entries.images.size()); | ||
| 57 | |||
| 58 | const vk::DescriptorSetLayoutCreateInfo descriptor_set_layout_ci( | ||
| 59 | {}, static_cast<u32>(bindings.size()), bindings.data()); | ||
| 60 | |||
| 61 | const auto dev = device.GetLogical(); | ||
| 62 | const auto& dld = device.GetDispatchLoader(); | ||
| 63 | return dev.createDescriptorSetLayoutUnique(descriptor_set_layout_ci, nullptr, dld); | ||
| 64 | } | ||
| 65 | |||
| 66 | UniquePipelineLayout VKComputePipeline::CreatePipelineLayout() const { | ||
| 67 | const vk::PipelineLayoutCreateInfo layout_ci({}, 1, &*descriptor_set_layout, 0, nullptr); | ||
| 68 | const auto dev = device.GetLogical(); | ||
| 69 | return dev.createPipelineLayoutUnique(layout_ci, nullptr, device.GetDispatchLoader()); | ||
| 70 | } | ||
| 71 | |||
| 72 | UniqueDescriptorUpdateTemplate VKComputePipeline::CreateDescriptorUpdateTemplate() const { | ||
| 73 | std::vector<vk::DescriptorUpdateTemplateEntry> template_entries; | ||
| 74 | u32 binding = 0; | ||
| 75 | u32 offset = 0; | ||
| 76 | FillDescriptorUpdateTemplateEntries(device, entries, binding, offset, template_entries); | ||
| 77 | if (template_entries.empty()) { | ||
| 78 | // If the shader doesn't use descriptor sets, skip template creation. | ||
| 79 | return UniqueDescriptorUpdateTemplate{}; | ||
| 80 | } | ||
| 81 | |||
| 82 | const vk::DescriptorUpdateTemplateCreateInfo template_ci( | ||
| 83 | {}, static_cast<u32>(template_entries.size()), template_entries.data(), | ||
| 84 | vk::DescriptorUpdateTemplateType::eDescriptorSet, *descriptor_set_layout, | ||
| 85 | vk::PipelineBindPoint::eGraphics, *layout, DESCRIPTOR_SET); | ||
| 86 | |||
| 87 | const auto dev = device.GetLogical(); | ||
| 88 | const auto& dld = device.GetDispatchLoader(); | ||
| 89 | return dev.createDescriptorUpdateTemplateUnique(template_ci, nullptr, dld); | ||
| 90 | } | ||
| 91 | |||
| 92 | UniqueShaderModule VKComputePipeline::CreateShaderModule(const std::vector<u32>& code) const { | ||
| 93 | const vk::ShaderModuleCreateInfo module_ci({}, code.size() * sizeof(u32), code.data()); | ||
| 94 | const auto dev = device.GetLogical(); | ||
| 95 | return dev.createShaderModuleUnique(module_ci, nullptr, device.GetDispatchLoader()); | ||
| 96 | } | ||
| 97 | |||
| 98 | UniquePipeline VKComputePipeline::CreatePipeline() const { | ||
| 99 | vk::PipelineShaderStageCreateInfo shader_stage_ci({}, vk::ShaderStageFlagBits::eCompute, | ||
| 100 | *shader_module, "main", nullptr); | ||
| 101 | vk::PipelineShaderStageRequiredSubgroupSizeCreateInfoEXT subgroup_size_ci; | ||
| 102 | subgroup_size_ci.requiredSubgroupSize = GuestWarpSize; | ||
| 103 | if (entries.uses_warps && device.IsGuestWarpSizeSupported(vk::ShaderStageFlagBits::eCompute)) { | ||
| 104 | shader_stage_ci.pNext = &subgroup_size_ci; | ||
| 105 | } | ||
| 106 | |||
| 107 | const vk::ComputePipelineCreateInfo create_info({}, shader_stage_ci, *layout, {}, 0); | ||
| 108 | const auto dev = device.GetLogical(); | ||
| 109 | return dev.createComputePipelineUnique({}, create_info, nullptr, device.GetDispatchLoader()); | ||
| 110 | } | ||
| 111 | |||
| 112 | } // namespace Vulkan | ||
diff --git a/src/video_core/renderer_vulkan/vk_compute_pipeline.h b/src/video_core/renderer_vulkan/vk_compute_pipeline.h new file mode 100644 index 000000000..22235c6c9 --- /dev/null +++ b/src/video_core/renderer_vulkan/vk_compute_pipeline.h | |||
| @@ -0,0 +1,66 @@ | |||
| 1 | // Copyright 2019 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include <memory> | ||
| 8 | |||
| 9 | #include "common/common_types.h" | ||
| 10 | #include "video_core/renderer_vulkan/declarations.h" | ||
| 11 | #include "video_core/renderer_vulkan/vk_descriptor_pool.h" | ||
| 12 | #include "video_core/renderer_vulkan/vk_shader_decompiler.h" | ||
| 13 | |||
| 14 | namespace Vulkan { | ||
| 15 | |||
| 16 | class VKDevice; | ||
| 17 | class VKScheduler; | ||
| 18 | class VKUpdateDescriptorQueue; | ||
| 19 | |||
| 20 | class VKComputePipeline final { | ||
| 21 | public: | ||
| 22 | explicit VKComputePipeline(const VKDevice& device, VKScheduler& scheduler, | ||
| 23 | VKDescriptorPool& descriptor_pool, | ||
| 24 | VKUpdateDescriptorQueue& update_descriptor_queue, | ||
| 25 | const SPIRVShader& shader); | ||
| 26 | ~VKComputePipeline(); | ||
| 27 | |||
| 28 | vk::DescriptorSet CommitDescriptorSet(); | ||
| 29 | |||
| 30 | vk::Pipeline GetHandle() const { | ||
| 31 | return *pipeline; | ||
| 32 | } | ||
| 33 | |||
| 34 | vk::PipelineLayout GetLayout() const { | ||
| 35 | return *layout; | ||
| 36 | } | ||
| 37 | |||
| 38 | const ShaderEntries& GetEntries() { | ||
| 39 | return entries; | ||
| 40 | } | ||
| 41 | |||
| 42 | private: | ||
| 43 | UniqueDescriptorSetLayout CreateDescriptorSetLayout() const; | ||
| 44 | |||
| 45 | UniquePipelineLayout CreatePipelineLayout() const; | ||
| 46 | |||
| 47 | UniqueDescriptorUpdateTemplate CreateDescriptorUpdateTemplate() const; | ||
| 48 | |||
| 49 | UniqueShaderModule CreateShaderModule(const std::vector<u32>& code) const; | ||
| 50 | |||
| 51 | UniquePipeline CreatePipeline() const; | ||
| 52 | |||
| 53 | const VKDevice& device; | ||
| 54 | VKScheduler& scheduler; | ||
| 55 | ShaderEntries entries; | ||
| 56 | |||
| 57 | UniqueDescriptorSetLayout descriptor_set_layout; | ||
| 58 | DescriptorAllocator descriptor_allocator; | ||
| 59 | VKUpdateDescriptorQueue& update_descriptor_queue; | ||
| 60 | UniquePipelineLayout layout; | ||
| 61 | UniqueDescriptorUpdateTemplate descriptor_template; | ||
| 62 | UniqueShaderModule shader_module; | ||
| 63 | UniquePipeline pipeline; | ||
| 64 | }; | ||
| 65 | |||
| 66 | } // namespace Vulkan | ||
diff --git a/src/video_core/renderer_vulkan/vk_descriptor_pool.cpp b/src/video_core/renderer_vulkan/vk_descriptor_pool.cpp new file mode 100644 index 000000000..cc7c281a0 --- /dev/null +++ b/src/video_core/renderer_vulkan/vk_descriptor_pool.cpp | |||
| @@ -0,0 +1,89 @@ | |||
| 1 | // Copyright 2019 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include <memory> | ||
| 6 | #include <vector> | ||
| 7 | |||
| 8 | #include "common/common_types.h" | ||
| 9 | #include "video_core/renderer_vulkan/declarations.h" | ||
| 10 | #include "video_core/renderer_vulkan/vk_descriptor_pool.h" | ||
| 11 | #include "video_core/renderer_vulkan/vk_device.h" | ||
| 12 | #include "video_core/renderer_vulkan/vk_resource_manager.h" | ||
| 13 | |||
| 14 | namespace Vulkan { | ||
| 15 | |||
| 16 | // Prefer small grow rates to avoid saturating the descriptor pool with barely used pipelines. | ||
| 17 | constexpr std::size_t SETS_GROW_RATE = 0x20; | ||
| 18 | |||
| 19 | DescriptorAllocator::DescriptorAllocator(VKDescriptorPool& descriptor_pool, | ||
| 20 | vk::DescriptorSetLayout layout) | ||
| 21 | : VKFencedPool{SETS_GROW_RATE}, descriptor_pool{descriptor_pool}, layout{layout} {} | ||
| 22 | |||
| 23 | DescriptorAllocator::~DescriptorAllocator() = default; | ||
| 24 | |||
| 25 | vk::DescriptorSet DescriptorAllocator::Commit(VKFence& fence) { | ||
| 26 | return *descriptors[CommitResource(fence)]; | ||
| 27 | } | ||
| 28 | |||
| 29 | void DescriptorAllocator::Allocate(std::size_t begin, std::size_t end) { | ||
| 30 | auto new_sets = descriptor_pool.AllocateDescriptors(layout, end - begin); | ||
| 31 | descriptors.insert(descriptors.end(), std::make_move_iterator(new_sets.begin()), | ||
| 32 | std::make_move_iterator(new_sets.end())); | ||
| 33 | } | ||
| 34 | |||
| 35 | VKDescriptorPool::VKDescriptorPool(const VKDevice& device) | ||
| 36 | : device{device}, active_pool{AllocateNewPool()} {} | ||
| 37 | |||
| 38 | VKDescriptorPool::~VKDescriptorPool() = default; | ||
| 39 | |||
| 40 | vk::DescriptorPool VKDescriptorPool::AllocateNewPool() { | ||
| 41 | static constexpr u32 num_sets = 0x20000; | ||
| 42 | static constexpr vk::DescriptorPoolSize pool_sizes[] = { | ||
| 43 | {vk::DescriptorType::eUniformBuffer, num_sets * 90}, | ||
| 44 | {vk::DescriptorType::eStorageBuffer, num_sets * 60}, | ||
| 45 | {vk::DescriptorType::eUniformTexelBuffer, num_sets * 64}, | ||
| 46 | {vk::DescriptorType::eCombinedImageSampler, num_sets * 64}, | ||
| 47 | {vk::DescriptorType::eStorageImage, num_sets * 40}}; | ||
| 48 | |||
| 49 | const vk::DescriptorPoolCreateInfo create_info( | ||
| 50 | vk::DescriptorPoolCreateFlagBits::eFreeDescriptorSet, num_sets, | ||
| 51 | static_cast<u32>(std::size(pool_sizes)), std::data(pool_sizes)); | ||
| 52 | const auto dev = device.GetLogical(); | ||
| 53 | return *pools.emplace_back( | ||
| 54 | dev.createDescriptorPoolUnique(create_info, nullptr, device.GetDispatchLoader())); | ||
| 55 | } | ||
| 56 | |||
| 57 | std::vector<UniqueDescriptorSet> VKDescriptorPool::AllocateDescriptors( | ||
| 58 | vk::DescriptorSetLayout layout, std::size_t count) { | ||
| 59 | std::vector layout_copies(count, layout); | ||
| 60 | vk::DescriptorSetAllocateInfo allocate_info(active_pool, static_cast<u32>(count), | ||
| 61 | layout_copies.data()); | ||
| 62 | |||
| 63 | std::vector<vk::DescriptorSet> sets(count); | ||
| 64 | const auto dev = device.GetLogical(); | ||
| 65 | const auto& dld = device.GetDispatchLoader(); | ||
| 66 | switch (const auto result = dev.allocateDescriptorSets(&allocate_info, sets.data(), dld)) { | ||
| 67 | case vk::Result::eSuccess: | ||
| 68 | break; | ||
| 69 | case vk::Result::eErrorOutOfPoolMemory: | ||
| 70 | active_pool = AllocateNewPool(); | ||
| 71 | allocate_info.descriptorPool = active_pool; | ||
| 72 | if (dev.allocateDescriptorSets(&allocate_info, sets.data(), dld) == vk::Result::eSuccess) { | ||
| 73 | break; | ||
| 74 | } | ||
| 75 | [[fallthrough]]; | ||
| 76 | default: | ||
| 77 | vk::throwResultException(result, "vk::Device::allocateDescriptorSetsUnique"); | ||
| 78 | } | ||
| 79 | |||
| 80 | vk::PoolFree deleter(dev, active_pool, dld); | ||
| 81 | std::vector<UniqueDescriptorSet> unique_sets; | ||
| 82 | unique_sets.reserve(count); | ||
| 83 | for (const auto set : sets) { | ||
| 84 | unique_sets.push_back(UniqueDescriptorSet{set, deleter}); | ||
| 85 | } | ||
| 86 | return unique_sets; | ||
| 87 | } | ||
| 88 | |||
| 89 | } // namespace Vulkan | ||
diff --git a/src/video_core/renderer_vulkan/vk_descriptor_pool.h b/src/video_core/renderer_vulkan/vk_descriptor_pool.h new file mode 100644 index 000000000..a441dbc0f --- /dev/null +++ b/src/video_core/renderer_vulkan/vk_descriptor_pool.h | |||
| @@ -0,0 +1,56 @@ | |||
| 1 | // Copyright 2019 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include <memory> | ||
| 8 | #include <vector> | ||
| 9 | |||
| 10 | #include "common/common_types.h" | ||
| 11 | #include "video_core/renderer_vulkan/declarations.h" | ||
| 12 | #include "video_core/renderer_vulkan/vk_resource_manager.h" | ||
| 13 | |||
| 14 | namespace Vulkan { | ||
| 15 | |||
| 16 | class VKDescriptorPool; | ||
| 17 | |||
| 18 | class DescriptorAllocator final : public VKFencedPool { | ||
| 19 | public: | ||
| 20 | explicit DescriptorAllocator(VKDescriptorPool& descriptor_pool, vk::DescriptorSetLayout layout); | ||
| 21 | ~DescriptorAllocator() override; | ||
| 22 | |||
| 23 | DescriptorAllocator(const DescriptorAllocator&) = delete; | ||
| 24 | |||
| 25 | vk::DescriptorSet Commit(VKFence& fence); | ||
| 26 | |||
| 27 | protected: | ||
| 28 | void Allocate(std::size_t begin, std::size_t end) override; | ||
| 29 | |||
| 30 | private: | ||
| 31 | VKDescriptorPool& descriptor_pool; | ||
| 32 | const vk::DescriptorSetLayout layout; | ||
| 33 | |||
| 34 | std::vector<UniqueDescriptorSet> descriptors; | ||
| 35 | }; | ||
| 36 | |||
| 37 | class VKDescriptorPool final { | ||
| 38 | friend DescriptorAllocator; | ||
| 39 | |||
| 40 | public: | ||
| 41 | explicit VKDescriptorPool(const VKDevice& device); | ||
| 42 | ~VKDescriptorPool(); | ||
| 43 | |||
| 44 | private: | ||
| 45 | vk::DescriptorPool AllocateNewPool(); | ||
| 46 | |||
| 47 | std::vector<UniqueDescriptorSet> AllocateDescriptors(vk::DescriptorSetLayout layout, | ||
| 48 | std::size_t count); | ||
| 49 | |||
| 50 | const VKDevice& device; | ||
| 51 | |||
| 52 | std::vector<UniqueDescriptorPool> pools; | ||
| 53 | vk::DescriptorPool active_pool; | ||
| 54 | }; | ||
| 55 | |||
| 56 | } // namespace Vulkan \ No newline at end of file | ||
diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp new file mode 100644 index 000000000..2e0536bf6 --- /dev/null +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp | |||
| @@ -0,0 +1,271 @@ | |||
| 1 | // Copyright 2019 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include <vector> | ||
| 6 | #include "common/assert.h" | ||
| 7 | #include "common/common_types.h" | ||
| 8 | #include "common/microprofile.h" | ||
| 9 | #include "video_core/renderer_vulkan/declarations.h" | ||
| 10 | #include "video_core/renderer_vulkan/fixed_pipeline_state.h" | ||
| 11 | #include "video_core/renderer_vulkan/maxwell_to_vk.h" | ||
| 12 | #include "video_core/renderer_vulkan/vk_descriptor_pool.h" | ||
| 13 | #include "video_core/renderer_vulkan/vk_device.h" | ||
| 14 | #include "video_core/renderer_vulkan/vk_graphics_pipeline.h" | ||
| 15 | #include "video_core/renderer_vulkan/vk_pipeline_cache.h" | ||
| 16 | #include "video_core/renderer_vulkan/vk_renderpass_cache.h" | ||
| 17 | #include "video_core/renderer_vulkan/vk_scheduler.h" | ||
| 18 | #include "video_core/renderer_vulkan/vk_update_descriptor.h" | ||
| 19 | |||
| 20 | namespace Vulkan { | ||
| 21 | |||
| 22 | MICROPROFILE_DECLARE(Vulkan_PipelineCache); | ||
| 23 | |||
| 24 | namespace { | ||
| 25 | |||
| 26 | vk::StencilOpState GetStencilFaceState(const FixedPipelineState::StencilFace& face) { | ||
| 27 | return vk::StencilOpState(MaxwellToVK::StencilOp(face.action_stencil_fail), | ||
| 28 | MaxwellToVK::StencilOp(face.action_depth_pass), | ||
| 29 | MaxwellToVK::StencilOp(face.action_depth_fail), | ||
| 30 | MaxwellToVK::ComparisonOp(face.test_func), 0, 0, 0); | ||
| 31 | } | ||
| 32 | |||
| 33 | bool SupportsPrimitiveRestart(vk::PrimitiveTopology topology) { | ||
| 34 | static constexpr std::array unsupported_topologies = { | ||
| 35 | vk::PrimitiveTopology::ePointList, | ||
| 36 | vk::PrimitiveTopology::eLineList, | ||
| 37 | vk::PrimitiveTopology::eTriangleList, | ||
| 38 | vk::PrimitiveTopology::eLineListWithAdjacency, | ||
| 39 | vk::PrimitiveTopology::eTriangleListWithAdjacency, | ||
| 40 | vk::PrimitiveTopology::ePatchList}; | ||
| 41 | return std::find(std::begin(unsupported_topologies), std::end(unsupported_topologies), | ||
| 42 | topology) == std::end(unsupported_topologies); | ||
| 43 | } | ||
| 44 | |||
| 45 | } // Anonymous namespace | ||
| 46 | |||
| 47 | VKGraphicsPipeline::VKGraphicsPipeline(const VKDevice& device, VKScheduler& scheduler, | ||
| 48 | VKDescriptorPool& descriptor_pool, | ||
| 49 | VKUpdateDescriptorQueue& update_descriptor_queue, | ||
| 50 | VKRenderPassCache& renderpass_cache, | ||
| 51 | const GraphicsPipelineCacheKey& key, | ||
| 52 | const std::vector<vk::DescriptorSetLayoutBinding>& bindings, | ||
| 53 | const SPIRVProgram& program) | ||
| 54 | : device{device}, scheduler{scheduler}, fixed_state{key.fixed_state}, hash{key.Hash()}, | ||
| 55 | descriptor_set_layout{CreateDescriptorSetLayout(bindings)}, | ||
| 56 | descriptor_allocator{descriptor_pool, *descriptor_set_layout}, | ||
| 57 | update_descriptor_queue{update_descriptor_queue}, layout{CreatePipelineLayout()}, | ||
| 58 | descriptor_template{CreateDescriptorUpdateTemplate(program)}, modules{CreateShaderModules( | ||
| 59 | program)}, | ||
| 60 | renderpass{renderpass_cache.GetRenderPass(key.renderpass_params)}, pipeline{CreatePipeline( | ||
| 61 | key.renderpass_params, | ||
| 62 | program)} {} | ||
| 63 | |||
| 64 | VKGraphicsPipeline::~VKGraphicsPipeline() = default; | ||
| 65 | |||
| 66 | vk::DescriptorSet VKGraphicsPipeline::CommitDescriptorSet() { | ||
| 67 | if (!descriptor_template) { | ||
| 68 | return {}; | ||
| 69 | } | ||
| 70 | const auto set = descriptor_allocator.Commit(scheduler.GetFence()); | ||
| 71 | update_descriptor_queue.Send(*descriptor_template, set); | ||
| 72 | return set; | ||
| 73 | } | ||
| 74 | |||
| 75 | UniqueDescriptorSetLayout VKGraphicsPipeline::CreateDescriptorSetLayout( | ||
| 76 | const std::vector<vk::DescriptorSetLayoutBinding>& bindings) const { | ||
| 77 | const vk::DescriptorSetLayoutCreateInfo descriptor_set_layout_ci( | ||
| 78 | {}, static_cast<u32>(bindings.size()), bindings.data()); | ||
| 79 | |||
| 80 | const auto dev = device.GetLogical(); | ||
| 81 | const auto& dld = device.GetDispatchLoader(); | ||
| 82 | return dev.createDescriptorSetLayoutUnique(descriptor_set_layout_ci, nullptr, dld); | ||
| 83 | } | ||
| 84 | |||
| 85 | UniquePipelineLayout VKGraphicsPipeline::CreatePipelineLayout() const { | ||
| 86 | const vk::PipelineLayoutCreateInfo pipeline_layout_ci({}, 1, &*descriptor_set_layout, 0, | ||
| 87 | nullptr); | ||
| 88 | const auto dev = device.GetLogical(); | ||
| 89 | const auto& dld = device.GetDispatchLoader(); | ||
| 90 | return dev.createPipelineLayoutUnique(pipeline_layout_ci, nullptr, dld); | ||
| 91 | } | ||
| 92 | |||
| 93 | UniqueDescriptorUpdateTemplate VKGraphicsPipeline::CreateDescriptorUpdateTemplate( | ||
| 94 | const SPIRVProgram& program) const { | ||
| 95 | std::vector<vk::DescriptorUpdateTemplateEntry> template_entries; | ||
| 96 | u32 binding = 0; | ||
| 97 | u32 offset = 0; | ||
| 98 | for (const auto& stage : program) { | ||
| 99 | if (stage) { | ||
| 100 | FillDescriptorUpdateTemplateEntries(device, stage->entries, binding, offset, | ||
| 101 | template_entries); | ||
| 102 | } | ||
| 103 | } | ||
| 104 | if (template_entries.empty()) { | ||
| 105 | // If the shader doesn't use descriptor sets, skip template creation. | ||
| 106 | return UniqueDescriptorUpdateTemplate{}; | ||
| 107 | } | ||
| 108 | |||
| 109 | const vk::DescriptorUpdateTemplateCreateInfo template_ci( | ||
| 110 | {}, static_cast<u32>(template_entries.size()), template_entries.data(), | ||
| 111 | vk::DescriptorUpdateTemplateType::eDescriptorSet, *descriptor_set_layout, | ||
| 112 | vk::PipelineBindPoint::eGraphics, *layout, DESCRIPTOR_SET); | ||
| 113 | |||
| 114 | const auto dev = device.GetLogical(); | ||
| 115 | const auto& dld = device.GetDispatchLoader(); | ||
| 116 | return dev.createDescriptorUpdateTemplateUnique(template_ci, nullptr, dld); | ||
| 117 | } | ||
| 118 | |||
| 119 | std::vector<UniqueShaderModule> VKGraphicsPipeline::CreateShaderModules( | ||
| 120 | const SPIRVProgram& program) const { | ||
| 121 | std::vector<UniqueShaderModule> modules; | ||
| 122 | const auto dev = device.GetLogical(); | ||
| 123 | const auto& dld = device.GetDispatchLoader(); | ||
| 124 | for (std::size_t i = 0; i < Maxwell::MaxShaderStage; ++i) { | ||
| 125 | const auto& stage = program[i]; | ||
| 126 | if (!stage) { | ||
| 127 | continue; | ||
| 128 | } | ||
| 129 | const vk::ShaderModuleCreateInfo module_ci({}, stage->code.size() * sizeof(u32), | ||
| 130 | stage->code.data()); | ||
| 131 | modules.emplace_back(dev.createShaderModuleUnique(module_ci, nullptr, dld)); | ||
| 132 | } | ||
| 133 | return modules; | ||
| 134 | } | ||
| 135 | |||
| 136 | UniquePipeline VKGraphicsPipeline::CreatePipeline(const RenderPassParams& renderpass_params, | ||
| 137 | const SPIRVProgram& program) const { | ||
| 138 | const auto& vi = fixed_state.vertex_input; | ||
| 139 | const auto& ia = fixed_state.input_assembly; | ||
| 140 | const auto& ds = fixed_state.depth_stencil; | ||
| 141 | const auto& cd = fixed_state.color_blending; | ||
| 142 | const auto& ts = fixed_state.tessellation; | ||
| 143 | const auto& rs = fixed_state.rasterizer; | ||
| 144 | |||
| 145 | std::vector<vk::VertexInputBindingDescription> vertex_bindings; | ||
| 146 | std::vector<vk::VertexInputBindingDivisorDescriptionEXT> vertex_binding_divisors; | ||
| 147 | for (std::size_t i = 0; i < vi.num_bindings; ++i) { | ||
| 148 | const auto& binding = vi.bindings[i]; | ||
| 149 | const bool instanced = binding.divisor != 0; | ||
| 150 | const auto rate = instanced ? vk::VertexInputRate::eInstance : vk::VertexInputRate::eVertex; | ||
| 151 | vertex_bindings.emplace_back(binding.index, binding.stride, rate); | ||
| 152 | if (instanced) { | ||
| 153 | vertex_binding_divisors.emplace_back(binding.index, binding.divisor); | ||
| 154 | } | ||
| 155 | } | ||
| 156 | |||
| 157 | std::vector<vk::VertexInputAttributeDescription> vertex_attributes; | ||
| 158 | const auto& input_attributes = program[0]->entries.attributes; | ||
| 159 | for (std::size_t i = 0; i < vi.num_attributes; ++i) { | ||
| 160 | const auto& attribute = vi.attributes[i]; | ||
| 161 | if (input_attributes.find(attribute.index) == input_attributes.end()) { | ||
| 162 | // Skip attributes not used by the vertex shaders. | ||
| 163 | continue; | ||
| 164 | } | ||
| 165 | vertex_attributes.emplace_back(attribute.index, attribute.buffer, | ||
| 166 | MaxwellToVK::VertexFormat(attribute.type, attribute.size), | ||
| 167 | attribute.offset); | ||
| 168 | } | ||
| 169 | |||
| 170 | vk::PipelineVertexInputStateCreateInfo vertex_input_ci( | ||
| 171 | {}, static_cast<u32>(vertex_bindings.size()), vertex_bindings.data(), | ||
| 172 | static_cast<u32>(vertex_attributes.size()), vertex_attributes.data()); | ||
| 173 | |||
| 174 | const vk::PipelineVertexInputDivisorStateCreateInfoEXT vertex_input_divisor_ci( | ||
| 175 | static_cast<u32>(vertex_binding_divisors.size()), vertex_binding_divisors.data()); | ||
| 176 | if (!vertex_binding_divisors.empty()) { | ||
| 177 | vertex_input_ci.pNext = &vertex_input_divisor_ci; | ||
| 178 | } | ||
| 179 | |||
| 180 | const auto primitive_topology = MaxwellToVK::PrimitiveTopology(device, ia.topology); | ||
| 181 | const vk::PipelineInputAssemblyStateCreateInfo input_assembly_ci( | ||
| 182 | {}, primitive_topology, | ||
| 183 | ia.primitive_restart_enable && SupportsPrimitiveRestart(primitive_topology)); | ||
| 184 | |||
| 185 | const vk::PipelineTessellationStateCreateInfo tessellation_ci({}, ts.patch_control_points); | ||
| 186 | |||
| 187 | const vk::PipelineViewportStateCreateInfo viewport_ci({}, Maxwell::NumViewports, nullptr, | ||
| 188 | Maxwell::NumViewports, nullptr); | ||
| 189 | |||
| 190 | // TODO(Rodrigo): Find out what's the default register value for front face | ||
| 191 | const vk::PipelineRasterizationStateCreateInfo rasterizer_ci( | ||
| 192 | {}, rs.depth_clamp_enable, false, vk::PolygonMode::eFill, | ||
| 193 | rs.cull_enable ? MaxwellToVK::CullFace(rs.cull_face) : vk::CullModeFlagBits::eNone, | ||
| 194 | rs.cull_enable ? MaxwellToVK::FrontFace(rs.front_face) : vk::FrontFace::eCounterClockwise, | ||
| 195 | rs.depth_bias_enable, 0.0f, 0.0f, 0.0f, 1.0f); | ||
| 196 | |||
| 197 | const vk::PipelineMultisampleStateCreateInfo multisampling_ci( | ||
| 198 | {}, vk::SampleCountFlagBits::e1, false, 0.0f, nullptr, false, false); | ||
| 199 | |||
| 200 | const vk::CompareOp depth_test_compare = ds.depth_test_enable | ||
| 201 | ? MaxwellToVK::ComparisonOp(ds.depth_test_function) | ||
| 202 | : vk::CompareOp::eAlways; | ||
| 203 | |||
| 204 | const vk::PipelineDepthStencilStateCreateInfo depth_stencil_ci( | ||
| 205 | {}, ds.depth_test_enable, ds.depth_write_enable, depth_test_compare, ds.depth_bounds_enable, | ||
| 206 | ds.stencil_enable, GetStencilFaceState(ds.front_stencil), | ||
| 207 | GetStencilFaceState(ds.back_stencil), 0.0f, 0.0f); | ||
| 208 | |||
| 209 | std::array<vk::PipelineColorBlendAttachmentState, Maxwell::NumRenderTargets> cb_attachments; | ||
| 210 | const std::size_t num_attachments = | ||
| 211 | std::min(cd.attachments_count, renderpass_params.color_attachments.size()); | ||
| 212 | for (std::size_t i = 0; i < num_attachments; ++i) { | ||
| 213 | constexpr std::array component_table{ | ||
| 214 | vk::ColorComponentFlagBits::eR, vk::ColorComponentFlagBits::eG, | ||
| 215 | vk::ColorComponentFlagBits::eB, vk::ColorComponentFlagBits::eA}; | ||
| 216 | const auto& blend = cd.attachments[i]; | ||
| 217 | |||
| 218 | vk::ColorComponentFlags color_components{}; | ||
| 219 | for (std::size_t j = 0; j < component_table.size(); ++j) { | ||
| 220 | if (blend.components[j]) | ||
| 221 | color_components |= component_table[j]; | ||
| 222 | } | ||
| 223 | |||
| 224 | cb_attachments[i] = vk::PipelineColorBlendAttachmentState( | ||
| 225 | blend.enable, MaxwellToVK::BlendFactor(blend.src_rgb_func), | ||
| 226 | MaxwellToVK::BlendFactor(blend.dst_rgb_func), | ||
| 227 | MaxwellToVK::BlendEquation(blend.rgb_equation), | ||
| 228 | MaxwellToVK::BlendFactor(blend.src_a_func), MaxwellToVK::BlendFactor(blend.dst_a_func), | ||
| 229 | MaxwellToVK::BlendEquation(blend.a_equation), color_components); | ||
| 230 | } | ||
| 231 | const vk::PipelineColorBlendStateCreateInfo color_blending_ci({}, false, vk::LogicOp::eCopy, | ||
| 232 | static_cast<u32>(num_attachments), | ||
| 233 | cb_attachments.data(), {}); | ||
| 234 | |||
| 235 | constexpr std::array dynamic_states = { | ||
| 236 | vk::DynamicState::eViewport, vk::DynamicState::eScissor, | ||
| 237 | vk::DynamicState::eDepthBias, vk::DynamicState::eBlendConstants, | ||
| 238 | vk::DynamicState::eDepthBounds, vk::DynamicState::eStencilCompareMask, | ||
| 239 | vk::DynamicState::eStencilWriteMask, vk::DynamicState::eStencilReference}; | ||
| 240 | const vk::PipelineDynamicStateCreateInfo dynamic_state_ci( | ||
| 241 | {}, static_cast<u32>(dynamic_states.size()), dynamic_states.data()); | ||
| 242 | |||
| 243 | vk::PipelineShaderStageRequiredSubgroupSizeCreateInfoEXT subgroup_size_ci; | ||
| 244 | subgroup_size_ci.requiredSubgroupSize = GuestWarpSize; | ||
| 245 | |||
| 246 | std::vector<vk::PipelineShaderStageCreateInfo> shader_stages; | ||
| 247 | std::size_t module_index = 0; | ||
| 248 | for (std::size_t stage = 0; stage < Maxwell::MaxShaderStage; ++stage) { | ||
| 249 | if (!program[stage]) { | ||
| 250 | continue; | ||
| 251 | } | ||
| 252 | const auto stage_enum = static_cast<Tegra::Engines::ShaderType>(stage); | ||
| 253 | const auto vk_stage = MaxwellToVK::ShaderStage(stage_enum); | ||
| 254 | auto& stage_ci = shader_stages.emplace_back(vk::PipelineShaderStageCreateFlags{}, vk_stage, | ||
| 255 | *modules[module_index++], "main", nullptr); | ||
| 256 | if (program[stage]->entries.uses_warps && device.IsGuestWarpSizeSupported(vk_stage)) { | ||
| 257 | stage_ci.pNext = &subgroup_size_ci; | ||
| 258 | } | ||
| 259 | } | ||
| 260 | |||
| 261 | const vk::GraphicsPipelineCreateInfo create_info( | ||
| 262 | {}, static_cast<u32>(shader_stages.size()), shader_stages.data(), &vertex_input_ci, | ||
| 263 | &input_assembly_ci, &tessellation_ci, &viewport_ci, &rasterizer_ci, &multisampling_ci, | ||
| 264 | &depth_stencil_ci, &color_blending_ci, &dynamic_state_ci, *layout, renderpass, 0, {}, 0); | ||
| 265 | |||
| 266 | const auto dev = device.GetLogical(); | ||
| 267 | const auto& dld = device.GetDispatchLoader(); | ||
| 268 | return dev.createGraphicsPipelineUnique(nullptr, create_info, nullptr, dld); | ||
| 269 | } | ||
| 270 | |||
| 271 | } // namespace Vulkan | ||
diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.h b/src/video_core/renderer_vulkan/vk_graphics_pipeline.h new file mode 100644 index 000000000..4f5e4ea2d --- /dev/null +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.h | |||
| @@ -0,0 +1,90 @@ | |||
| 1 | // Copyright 2019 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include <array> | ||
| 8 | #include <memory> | ||
| 9 | #include <optional> | ||
| 10 | #include <unordered_map> | ||
| 11 | #include <vector> | ||
| 12 | |||
| 13 | #include "video_core/engines/maxwell_3d.h" | ||
| 14 | #include "video_core/renderer_vulkan/declarations.h" | ||
| 15 | #include "video_core/renderer_vulkan/fixed_pipeline_state.h" | ||
| 16 | #include "video_core/renderer_vulkan/vk_descriptor_pool.h" | ||
| 17 | #include "video_core/renderer_vulkan/vk_renderpass_cache.h" | ||
| 18 | #include "video_core/renderer_vulkan/vk_resource_manager.h" | ||
| 19 | #include "video_core/renderer_vulkan/vk_shader_decompiler.h" | ||
| 20 | |||
| 21 | namespace Vulkan { | ||
| 22 | |||
| 23 | using Maxwell = Tegra::Engines::Maxwell3D::Regs; | ||
| 24 | |||
| 25 | struct GraphicsPipelineCacheKey; | ||
| 26 | |||
| 27 | class VKDescriptorPool; | ||
| 28 | class VKDevice; | ||
| 29 | class VKRenderPassCache; | ||
| 30 | class VKScheduler; | ||
| 31 | class VKUpdateDescriptorQueue; | ||
| 32 | |||
| 33 | using SPIRVProgram = std::array<std::optional<SPIRVShader>, Maxwell::MaxShaderStage>; | ||
| 34 | |||
| 35 | class VKGraphicsPipeline final { | ||
| 36 | public: | ||
| 37 | explicit VKGraphicsPipeline(const VKDevice& device, VKScheduler& scheduler, | ||
| 38 | VKDescriptorPool& descriptor_pool, | ||
| 39 | VKUpdateDescriptorQueue& update_descriptor_queue, | ||
| 40 | VKRenderPassCache& renderpass_cache, | ||
| 41 | const GraphicsPipelineCacheKey& key, | ||
| 42 | const std::vector<vk::DescriptorSetLayoutBinding>& bindings, | ||
| 43 | const SPIRVProgram& program); | ||
| 44 | ~VKGraphicsPipeline(); | ||
| 45 | |||
| 46 | vk::DescriptorSet CommitDescriptorSet(); | ||
| 47 | |||
| 48 | vk::Pipeline GetHandle() const { | ||
| 49 | return *pipeline; | ||
| 50 | } | ||
| 51 | |||
| 52 | vk::PipelineLayout GetLayout() const { | ||
| 53 | return *layout; | ||
| 54 | } | ||
| 55 | |||
| 56 | vk::RenderPass GetRenderPass() const { | ||
| 57 | return renderpass; | ||
| 58 | } | ||
| 59 | |||
| 60 | private: | ||
| 61 | UniqueDescriptorSetLayout CreateDescriptorSetLayout( | ||
| 62 | const std::vector<vk::DescriptorSetLayoutBinding>& bindings) const; | ||
| 63 | |||
| 64 | UniquePipelineLayout CreatePipelineLayout() const; | ||
| 65 | |||
| 66 | UniqueDescriptorUpdateTemplate CreateDescriptorUpdateTemplate( | ||
| 67 | const SPIRVProgram& program) const; | ||
| 68 | |||
| 69 | std::vector<UniqueShaderModule> CreateShaderModules(const SPIRVProgram& program) const; | ||
| 70 | |||
| 71 | UniquePipeline CreatePipeline(const RenderPassParams& renderpass_params, | ||
| 72 | const SPIRVProgram& program) const; | ||
| 73 | |||
| 74 | const VKDevice& device; | ||
| 75 | VKScheduler& scheduler; | ||
| 76 | const FixedPipelineState fixed_state; | ||
| 77 | const u64 hash; | ||
| 78 | |||
| 79 | UniqueDescriptorSetLayout descriptor_set_layout; | ||
| 80 | DescriptorAllocator descriptor_allocator; | ||
| 81 | VKUpdateDescriptorQueue& update_descriptor_queue; | ||
| 82 | UniquePipelineLayout layout; | ||
| 83 | UniqueDescriptorUpdateTemplate descriptor_template; | ||
| 84 | std::vector<UniqueShaderModule> modules; | ||
| 85 | |||
| 86 | vk::RenderPass renderpass; | ||
| 87 | UniquePipeline pipeline; | ||
| 88 | }; | ||
| 89 | |||
| 90 | } // namespace Vulkan | ||
diff --git a/src/video_core/renderer_vulkan/vk_memory_manager.cpp b/src/video_core/renderer_vulkan/vk_memory_manager.cpp index 0451babbf..9cc9979d0 100644 --- a/src/video_core/renderer_vulkan/vk_memory_manager.cpp +++ b/src/video_core/renderer_vulkan/vk_memory_manager.cpp | |||
| @@ -6,6 +6,7 @@ | |||
| 6 | #include <optional> | 6 | #include <optional> |
| 7 | #include <tuple> | 7 | #include <tuple> |
| 8 | #include <vector> | 8 | #include <vector> |
| 9 | |||
| 9 | #include "common/alignment.h" | 10 | #include "common/alignment.h" |
| 10 | #include "common/assert.h" | 11 | #include "common/assert.h" |
| 11 | #include "common/common_types.h" | 12 | #include "common/common_types.h" |
| @@ -16,34 +17,32 @@ | |||
| 16 | 17 | ||
| 17 | namespace Vulkan { | 18 | namespace Vulkan { |
| 18 | 19 | ||
| 19 | // TODO(Rodrigo): Fine tune this number | 20 | namespace { |
| 20 | constexpr u64 ALLOC_CHUNK_SIZE = 64 * 1024 * 1024; | 21 | |
| 22 | u64 GetAllocationChunkSize(u64 required_size) { | ||
| 23 | static constexpr u64 sizes[] = {16ULL << 20, 32ULL << 20, 64ULL << 20, 128ULL << 20}; | ||
| 24 | auto it = std::lower_bound(std::begin(sizes), std::end(sizes), required_size); | ||
| 25 | return it != std::end(sizes) ? *it : Common::AlignUp(required_size, 256ULL << 20); | ||
| 26 | } | ||
| 27 | |||
| 28 | } // Anonymous namespace | ||
| 21 | 29 | ||
| 22 | class VKMemoryAllocation final { | 30 | class VKMemoryAllocation final { |
| 23 | public: | 31 | public: |
| 24 | explicit VKMemoryAllocation(const VKDevice& device, vk::DeviceMemory memory, | 32 | explicit VKMemoryAllocation(const VKDevice& device, vk::DeviceMemory memory, |
| 25 | vk::MemoryPropertyFlags properties, u64 alloc_size, u32 type) | 33 | vk::MemoryPropertyFlags properties, u64 allocation_size, u32 type) |
| 26 | : device{device}, memory{memory}, properties{properties}, alloc_size{alloc_size}, | 34 | : device{device}, memory{memory}, properties{properties}, allocation_size{allocation_size}, |
| 27 | shifted_type{ShiftType(type)}, is_mappable{properties & | 35 | shifted_type{ShiftType(type)} {} |
| 28 | vk::MemoryPropertyFlagBits::eHostVisible} { | ||
| 29 | if (is_mappable) { | ||
| 30 | const auto dev = device.GetLogical(); | ||
| 31 | const auto& dld = device.GetDispatchLoader(); | ||
| 32 | base_address = static_cast<u8*>(dev.mapMemory(memory, 0, alloc_size, {}, dld)); | ||
| 33 | } | ||
| 34 | } | ||
| 35 | 36 | ||
| 36 | ~VKMemoryAllocation() { | 37 | ~VKMemoryAllocation() { |
| 37 | const auto dev = device.GetLogical(); | 38 | const auto dev = device.GetLogical(); |
| 38 | const auto& dld = device.GetDispatchLoader(); | 39 | const auto& dld = device.GetDispatchLoader(); |
| 39 | if (is_mappable) | ||
| 40 | dev.unmapMemory(memory, dld); | ||
| 41 | dev.free(memory, nullptr, dld); | 40 | dev.free(memory, nullptr, dld); |
| 42 | } | 41 | } |
| 43 | 42 | ||
| 44 | VKMemoryCommit Commit(vk::DeviceSize commit_size, vk::DeviceSize alignment) { | 43 | VKMemoryCommit Commit(vk::DeviceSize commit_size, vk::DeviceSize alignment) { |
| 45 | auto found = TryFindFreeSection(free_iterator, alloc_size, static_cast<u64>(commit_size), | 44 | auto found = TryFindFreeSection(free_iterator, allocation_size, |
| 46 | static_cast<u64>(alignment)); | 45 | static_cast<u64>(commit_size), static_cast<u64>(alignment)); |
| 47 | if (!found) { | 46 | if (!found) { |
| 48 | found = TryFindFreeSection(0, free_iterator, static_cast<u64>(commit_size), | 47 | found = TryFindFreeSection(0, free_iterator, static_cast<u64>(commit_size), |
| 49 | static_cast<u64>(alignment)); | 48 | static_cast<u64>(alignment)); |
| @@ -52,8 +51,7 @@ public: | |||
| 52 | return nullptr; | 51 | return nullptr; |
| 53 | } | 52 | } |
| 54 | } | 53 | } |
| 55 | u8* address = is_mappable ? base_address + *found : nullptr; | 54 | auto commit = std::make_unique<VKMemoryCommitImpl>(device, this, memory, *found, |
| 56 | auto commit = std::make_unique<VKMemoryCommitImpl>(this, memory, address, *found, | ||
| 57 | *found + commit_size); | 55 | *found + commit_size); |
| 58 | commits.push_back(commit.get()); | 56 | commits.push_back(commit.get()); |
| 59 | 57 | ||
| @@ -65,12 +63,10 @@ public: | |||
| 65 | 63 | ||
| 66 | void Free(const VKMemoryCommitImpl* commit) { | 64 | void Free(const VKMemoryCommitImpl* commit) { |
| 67 | ASSERT(commit); | 65 | ASSERT(commit); |
| 68 | const auto it = | 66 | |
| 69 | std::find_if(commits.begin(), commits.end(), | 67 | const auto it = std::find(std::begin(commits), std::end(commits), commit); |
| 70 | [&](const auto& stored_commit) { return stored_commit == commit; }); | ||
| 71 | if (it == commits.end()) { | 68 | if (it == commits.end()) { |
| 72 | LOG_CRITICAL(Render_Vulkan, "Freeing unallocated commit!"); | 69 | UNREACHABLE_MSG("Freeing unallocated commit!"); |
| 73 | UNREACHABLE(); | ||
| 74 | return; | 70 | return; |
| 75 | } | 71 | } |
| 76 | commits.erase(it); | 72 | commits.erase(it); |
| @@ -88,11 +84,11 @@ private: | |||
| 88 | } | 84 | } |
| 89 | 85 | ||
| 90 | /// A memory allocator, it may return a free region between "start" and "end" with the solicited | 86 | /// A memory allocator, it may return a free region between "start" and "end" with the solicited |
| 91 | /// requeriments. | 87 | /// requirements. |
| 92 | std::optional<u64> TryFindFreeSection(u64 start, u64 end, u64 size, u64 alignment) const { | 88 | std::optional<u64> TryFindFreeSection(u64 start, u64 end, u64 size, u64 alignment) const { |
| 93 | u64 iterator = start; | 89 | u64 iterator = Common::AlignUp(start, alignment); |
| 94 | while (iterator + size < end) { | 90 | while (iterator + size <= end) { |
| 95 | const u64 try_left = Common::AlignUp(iterator, alignment); | 91 | const u64 try_left = iterator; |
| 96 | const u64 try_right = try_left + size; | 92 | const u64 try_right = try_left + size; |
| 97 | 93 | ||
| 98 | bool overlap = false; | 94 | bool overlap = false; |
| @@ -100,7 +96,7 @@ private: | |||
| 100 | const auto [commit_left, commit_right] = commit->interval; | 96 | const auto [commit_left, commit_right] = commit->interval; |
| 101 | if (try_left < commit_right && commit_left < try_right) { | 97 | if (try_left < commit_right && commit_left < try_right) { |
| 102 | // There's an overlap, continue the search where the overlapping commit ends. | 98 | // There's an overlap, continue the search where the overlapping commit ends. |
| 103 | iterator = commit_right; | 99 | iterator = Common::AlignUp(commit_right, alignment); |
| 104 | overlap = true; | 100 | overlap = true; |
| 105 | break; | 101 | break; |
| 106 | } | 102 | } |
| @@ -110,6 +106,7 @@ private: | |||
| 110 | return try_left; | 106 | return try_left; |
| 111 | } | 107 | } |
| 112 | } | 108 | } |
| 109 | |||
| 113 | // No free regions where found, return an empty optional. | 110 | // No free regions where found, return an empty optional. |
| 114 | return std::nullopt; | 111 | return std::nullopt; |
| 115 | } | 112 | } |
| @@ -117,12 +114,8 @@ private: | |||
| 117 | const VKDevice& device; ///< Vulkan device. | 114 | const VKDevice& device; ///< Vulkan device. |
| 118 | const vk::DeviceMemory memory; ///< Vulkan memory allocation handler. | 115 | const vk::DeviceMemory memory; ///< Vulkan memory allocation handler. |
| 119 | const vk::MemoryPropertyFlags properties; ///< Vulkan properties. | 116 | const vk::MemoryPropertyFlags properties; ///< Vulkan properties. |
| 120 | const u64 alloc_size; ///< Size of this allocation. | 117 | const u64 allocation_size; ///< Size of this allocation. |
| 121 | const u32 shifted_type; ///< Stored Vulkan type of this allocation, shifted. | 118 | const u32 shifted_type; ///< Stored Vulkan type of this allocation, shifted. |
| 122 | const bool is_mappable; ///< Whether the allocation is mappable. | ||
| 123 | |||
| 124 | /// Base address of the mapped pointer. | ||
| 125 | u8* base_address{}; | ||
| 126 | 119 | ||
| 127 | /// Hints where the next free region is likely going to be. | 120 | /// Hints where the next free region is likely going to be. |
| 128 | u64 free_iterator{}; | 121 | u64 free_iterator{}; |
| @@ -132,13 +125,15 @@ private: | |||
| 132 | }; | 125 | }; |
| 133 | 126 | ||
| 134 | VKMemoryManager::VKMemoryManager(const VKDevice& device) | 127 | VKMemoryManager::VKMemoryManager(const VKDevice& device) |
| 135 | : device{device}, props{device.GetPhysical().getMemoryProperties(device.GetDispatchLoader())}, | 128 | : device{device}, properties{device.GetPhysical().getMemoryProperties( |
| 136 | is_memory_unified{GetMemoryUnified(props)} {} | 129 | device.GetDispatchLoader())}, |
| 130 | is_memory_unified{GetMemoryUnified(properties)} {} | ||
| 137 | 131 | ||
| 138 | VKMemoryManager::~VKMemoryManager() = default; | 132 | VKMemoryManager::~VKMemoryManager() = default; |
| 139 | 133 | ||
| 140 | VKMemoryCommit VKMemoryManager::Commit(const vk::MemoryRequirements& reqs, bool host_visible) { | 134 | VKMemoryCommit VKMemoryManager::Commit(const vk::MemoryRequirements& requirements, |
| 141 | ASSERT(reqs.size < ALLOC_CHUNK_SIZE); | 135 | bool host_visible) { |
| 136 | const u64 chunk_size = GetAllocationChunkSize(requirements.size); | ||
| 142 | 137 | ||
| 143 | // When a host visible commit is asked, search for host visible and coherent, otherwise search | 138 | // When a host visible commit is asked, search for host visible and coherent, otherwise search |
| 144 | // for a fast device local type. | 139 | // for a fast device local type. |
| @@ -147,32 +142,21 @@ VKMemoryCommit VKMemoryManager::Commit(const vk::MemoryRequirements& reqs, bool | |||
| 147 | ? vk::MemoryPropertyFlagBits::eHostVisible | vk::MemoryPropertyFlagBits::eHostCoherent | 142 | ? vk::MemoryPropertyFlagBits::eHostVisible | vk::MemoryPropertyFlagBits::eHostCoherent |
| 148 | : vk::MemoryPropertyFlagBits::eDeviceLocal; | 143 | : vk::MemoryPropertyFlagBits::eDeviceLocal; |
| 149 | 144 | ||
| 150 | const auto TryCommit = [&]() -> VKMemoryCommit { | 145 | if (auto commit = TryAllocCommit(requirements, wanted_properties)) { |
| 151 | for (auto& alloc : allocs) { | ||
| 152 | if (!alloc->IsCompatible(wanted_properties, reqs.memoryTypeBits)) | ||
| 153 | continue; | ||
| 154 | |||
| 155 | if (auto commit = alloc->Commit(reqs.size, reqs.alignment); commit) { | ||
| 156 | return commit; | ||
| 157 | } | ||
| 158 | } | ||
| 159 | return {}; | ||
| 160 | }; | ||
| 161 | |||
| 162 | if (auto commit = TryCommit(); commit) { | ||
| 163 | return commit; | 146 | return commit; |
| 164 | } | 147 | } |
| 165 | 148 | ||
| 166 | // Commit has failed, allocate more memory. | 149 | // Commit has failed, allocate more memory. |
| 167 | if (!AllocMemory(wanted_properties, reqs.memoryTypeBits, ALLOC_CHUNK_SIZE)) { | 150 | if (!AllocMemory(wanted_properties, requirements.memoryTypeBits, chunk_size)) { |
| 168 | // TODO(Rodrigo): Try to use host memory. | 151 | // TODO(Rodrigo): Handle these situations in some way like flushing to guest memory. |
| 169 | LOG_CRITICAL(Render_Vulkan, "Ran out of memory!"); | 152 | // Allocation has failed, panic. |
| 170 | UNREACHABLE(); | 153 | UNREACHABLE_MSG("Ran out of VRAM!"); |
| 154 | return {}; | ||
| 171 | } | 155 | } |
| 172 | 156 | ||
| 173 | // Commit again, this time it won't fail since there's a fresh allocation above. If it does, | 157 | // Commit again, this time it won't fail since there's a fresh allocation above. If it does, |
| 174 | // there's a bug. | 158 | // there's a bug. |
| 175 | auto commit = TryCommit(); | 159 | auto commit = TryAllocCommit(requirements, wanted_properties); |
| 176 | ASSERT(commit); | 160 | ASSERT(commit); |
| 177 | return commit; | 161 | return commit; |
| 178 | } | 162 | } |
| @@ -180,8 +164,7 @@ VKMemoryCommit VKMemoryManager::Commit(const vk::MemoryRequirements& reqs, bool | |||
| 180 | VKMemoryCommit VKMemoryManager::Commit(vk::Buffer buffer, bool host_visible) { | 164 | VKMemoryCommit VKMemoryManager::Commit(vk::Buffer buffer, bool host_visible) { |
| 181 | const auto dev = device.GetLogical(); | 165 | const auto dev = device.GetLogical(); |
| 182 | const auto& dld = device.GetDispatchLoader(); | 166 | const auto& dld = device.GetDispatchLoader(); |
| 183 | const auto requeriments = dev.getBufferMemoryRequirements(buffer, dld); | 167 | auto commit = Commit(dev.getBufferMemoryRequirements(buffer, dld), host_visible); |
| 184 | auto commit = Commit(requeriments, host_visible); | ||
| 185 | dev.bindBufferMemory(buffer, commit->GetMemory(), commit->GetOffset(), dld); | 168 | dev.bindBufferMemory(buffer, commit->GetMemory(), commit->GetOffset(), dld); |
| 186 | return commit; | 169 | return commit; |
| 187 | } | 170 | } |
| @@ -189,25 +172,23 @@ VKMemoryCommit VKMemoryManager::Commit(vk::Buffer buffer, bool host_visible) { | |||
| 189 | VKMemoryCommit VKMemoryManager::Commit(vk::Image image, bool host_visible) { | 172 | VKMemoryCommit VKMemoryManager::Commit(vk::Image image, bool host_visible) { |
| 190 | const auto dev = device.GetLogical(); | 173 | const auto dev = device.GetLogical(); |
| 191 | const auto& dld = device.GetDispatchLoader(); | 174 | const auto& dld = device.GetDispatchLoader(); |
| 192 | const auto requeriments = dev.getImageMemoryRequirements(image, dld); | 175 | auto commit = Commit(dev.getImageMemoryRequirements(image, dld), host_visible); |
| 193 | auto commit = Commit(requeriments, host_visible); | ||
| 194 | dev.bindImageMemory(image, commit->GetMemory(), commit->GetOffset(), dld); | 176 | dev.bindImageMemory(image, commit->GetMemory(), commit->GetOffset(), dld); |
| 195 | return commit; | 177 | return commit; |
| 196 | } | 178 | } |
| 197 | 179 | ||
| 198 | bool VKMemoryManager::AllocMemory(vk::MemoryPropertyFlags wanted_properties, u32 type_mask, | 180 | bool VKMemoryManager::AllocMemory(vk::MemoryPropertyFlags wanted_properties, u32 type_mask, |
| 199 | u64 size) { | 181 | u64 size) { |
| 200 | const u32 type = [&]() { | 182 | const u32 type = [&] { |
| 201 | for (u32 type_index = 0; type_index < props.memoryTypeCount; ++type_index) { | 183 | for (u32 type_index = 0; type_index < properties.memoryTypeCount; ++type_index) { |
| 202 | const auto flags = props.memoryTypes[type_index].propertyFlags; | 184 | const auto flags = properties.memoryTypes[type_index].propertyFlags; |
| 203 | if ((type_mask & (1U << type_index)) && (flags & wanted_properties)) { | 185 | if ((type_mask & (1U << type_index)) && (flags & wanted_properties)) { |
| 204 | // The type matches in type and in the wanted properties. | 186 | // The type matches in type and in the wanted properties. |
| 205 | return type_index; | 187 | return type_index; |
| 206 | } | 188 | } |
| 207 | } | 189 | } |
| 208 | LOG_CRITICAL(Render_Vulkan, "Couldn't find a compatible memory type!"); | 190 | UNREACHABLE_MSG("Couldn't find a compatible memory type!"); |
| 209 | UNREACHABLE(); | 191 | return 0U; |
| 210 | return 0u; | ||
| 211 | }(); | 192 | }(); |
| 212 | 193 | ||
| 213 | const auto dev = device.GetLogical(); | 194 | const auto dev = device.GetLogical(); |
| @@ -216,19 +197,33 @@ bool VKMemoryManager::AllocMemory(vk::MemoryPropertyFlags wanted_properties, u32 | |||
| 216 | // Try to allocate found type. | 197 | // Try to allocate found type. |
| 217 | const vk::MemoryAllocateInfo memory_ai(size, type); | 198 | const vk::MemoryAllocateInfo memory_ai(size, type); |
| 218 | vk::DeviceMemory memory; | 199 | vk::DeviceMemory memory; |
| 219 | if (const vk::Result res = dev.allocateMemory(&memory_ai, nullptr, &memory, dld); | 200 | if (const auto res = dev.allocateMemory(&memory_ai, nullptr, &memory, dld); |
| 220 | res != vk::Result::eSuccess) { | 201 | res != vk::Result::eSuccess) { |
| 221 | LOG_CRITICAL(Render_Vulkan, "Device allocation failed with code {}!", vk::to_string(res)); | 202 | LOG_CRITICAL(Render_Vulkan, "Device allocation failed with code {}!", vk::to_string(res)); |
| 222 | return false; | 203 | return false; |
| 223 | } | 204 | } |
| 224 | allocs.push_back( | 205 | allocations.push_back( |
| 225 | std::make_unique<VKMemoryAllocation>(device, memory, wanted_properties, size, type)); | 206 | std::make_unique<VKMemoryAllocation>(device, memory, wanted_properties, size, type)); |
| 226 | return true; | 207 | return true; |
| 227 | } | 208 | } |
| 228 | 209 | ||
| 229 | /*static*/ bool VKMemoryManager::GetMemoryUnified(const vk::PhysicalDeviceMemoryProperties& props) { | 210 | VKMemoryCommit VKMemoryManager::TryAllocCommit(const vk::MemoryRequirements& requirements, |
| 230 | for (u32 heap_index = 0; heap_index < props.memoryHeapCount; ++heap_index) { | 211 | vk::MemoryPropertyFlags wanted_properties) { |
| 231 | if (!(props.memoryHeaps[heap_index].flags & vk::MemoryHeapFlagBits::eDeviceLocal)) { | 212 | for (auto& allocation : allocations) { |
| 213 | if (!allocation->IsCompatible(wanted_properties, requirements.memoryTypeBits)) { | ||
| 214 | continue; | ||
| 215 | } | ||
| 216 | if (auto commit = allocation->Commit(requirements.size, requirements.alignment)) { | ||
| 217 | return commit; | ||
| 218 | } | ||
| 219 | } | ||
| 220 | return {}; | ||
| 221 | } | ||
| 222 | |||
| 223 | /*static*/ bool VKMemoryManager::GetMemoryUnified( | ||
| 224 | const vk::PhysicalDeviceMemoryProperties& properties) { | ||
| 225 | for (u32 heap_index = 0; heap_index < properties.memoryHeapCount; ++heap_index) { | ||
| 226 | if (!(properties.memoryHeaps[heap_index].flags & vk::MemoryHeapFlagBits::eDeviceLocal)) { | ||
| 232 | // Memory is considered unified when heaps are device local only. | 227 | // Memory is considered unified when heaps are device local only. |
| 233 | return false; | 228 | return false; |
| 234 | } | 229 | } |
| @@ -236,17 +231,28 @@ bool VKMemoryManager::AllocMemory(vk::MemoryPropertyFlags wanted_properties, u32 | |||
| 236 | return true; | 231 | return true; |
| 237 | } | 232 | } |
| 238 | 233 | ||
| 239 | VKMemoryCommitImpl::VKMemoryCommitImpl(VKMemoryAllocation* allocation, vk::DeviceMemory memory, | 234 | VKMemoryCommitImpl::VKMemoryCommitImpl(const VKDevice& device, VKMemoryAllocation* allocation, |
| 240 | u8* data, u64 begin, u64 end) | 235 | vk::DeviceMemory memory, u64 begin, u64 end) |
| 241 | : interval(std::make_pair(begin, end)), memory{memory}, allocation{allocation}, data{data} {} | 236 | : device{device}, interval{begin, end}, memory{memory}, allocation{allocation} {} |
| 242 | 237 | ||
| 243 | VKMemoryCommitImpl::~VKMemoryCommitImpl() { | 238 | VKMemoryCommitImpl::~VKMemoryCommitImpl() { |
| 244 | allocation->Free(this); | 239 | allocation->Free(this); |
| 245 | } | 240 | } |
| 246 | 241 | ||
| 247 | u8* VKMemoryCommitImpl::GetData() const { | 242 | MemoryMap VKMemoryCommitImpl::Map(u64 size, u64 offset_) const { |
| 248 | ASSERT_MSG(data != nullptr, "Trying to access an unmapped commit."); | 243 | const auto dev = device.GetLogical(); |
| 249 | return data; | 244 | const auto address = reinterpret_cast<u8*>( |
| 245 | dev.mapMemory(memory, interval.first + offset_, size, {}, device.GetDispatchLoader())); | ||
| 246 | return MemoryMap{this, address}; | ||
| 247 | } | ||
| 248 | |||
| 249 | void VKMemoryCommitImpl::Unmap() const { | ||
| 250 | const auto dev = device.GetLogical(); | ||
| 251 | dev.unmapMemory(memory, device.GetDispatchLoader()); | ||
| 252 | } | ||
| 253 | |||
| 254 | MemoryMap VKMemoryCommitImpl::Map() const { | ||
| 255 | return Map(interval.second - interval.first); | ||
| 250 | } | 256 | } |
| 251 | 257 | ||
| 252 | } // namespace Vulkan | 258 | } // namespace Vulkan |
diff --git a/src/video_core/renderer_vulkan/vk_memory_manager.h b/src/video_core/renderer_vulkan/vk_memory_manager.h index 073597b35..cd00bb91b 100644 --- a/src/video_core/renderer_vulkan/vk_memory_manager.h +++ b/src/video_core/renderer_vulkan/vk_memory_manager.h | |||
| @@ -12,6 +12,7 @@ | |||
| 12 | 12 | ||
| 13 | namespace Vulkan { | 13 | namespace Vulkan { |
| 14 | 14 | ||
| 15 | class MemoryMap; | ||
| 15 | class VKDevice; | 16 | class VKDevice; |
| 16 | class VKMemoryAllocation; | 17 | class VKMemoryAllocation; |
| 17 | class VKMemoryCommitImpl; | 18 | class VKMemoryCommitImpl; |
| @@ -21,13 +22,14 @@ using VKMemoryCommit = std::unique_ptr<VKMemoryCommitImpl>; | |||
| 21 | class VKMemoryManager final { | 22 | class VKMemoryManager final { |
| 22 | public: | 23 | public: |
| 23 | explicit VKMemoryManager(const VKDevice& device); | 24 | explicit VKMemoryManager(const VKDevice& device); |
| 25 | VKMemoryManager(const VKMemoryManager&) = delete; | ||
| 24 | ~VKMemoryManager(); | 26 | ~VKMemoryManager(); |
| 25 | 27 | ||
| 26 | /** | 28 | /** |
| 27 | * Commits a memory with the specified requeriments. | 29 | * Commits a memory with the specified requeriments. |
| 28 | * @param reqs Requeriments returned from a Vulkan call. | 30 | * @param requirements Requirements returned from a Vulkan call. |
| 29 | * @param host_visible Signals the allocator that it *must* use host visible and coherent | 31 | * @param host_visible Signals the allocator that it *must* use host visible and coherent |
| 30 | * memory. When passing false, it will try to allocate device local memory. | 32 | * memory. When passing false, it will try to allocate device local memory. |
| 31 | * @returns A memory commit. | 33 | * @returns A memory commit. |
| 32 | */ | 34 | */ |
| 33 | VKMemoryCommit Commit(const vk::MemoryRequirements& reqs, bool host_visible); | 35 | VKMemoryCommit Commit(const vk::MemoryRequirements& reqs, bool host_visible); |
| @@ -47,25 +49,35 @@ private: | |||
| 47 | /// Allocates a chunk of memory. | 49 | /// Allocates a chunk of memory. |
| 48 | bool AllocMemory(vk::MemoryPropertyFlags wanted_properties, u32 type_mask, u64 size); | 50 | bool AllocMemory(vk::MemoryPropertyFlags wanted_properties, u32 type_mask, u64 size); |
| 49 | 51 | ||
| 52 | /// Tries to allocate a memory commit. | ||
| 53 | VKMemoryCommit TryAllocCommit(const vk::MemoryRequirements& requirements, | ||
| 54 | vk::MemoryPropertyFlags wanted_properties); | ||
| 55 | |||
| 50 | /// Returns true if the device uses an unified memory model. | 56 | /// Returns true if the device uses an unified memory model. |
| 51 | static bool GetMemoryUnified(const vk::PhysicalDeviceMemoryProperties& props); | 57 | static bool GetMemoryUnified(const vk::PhysicalDeviceMemoryProperties& properties); |
| 52 | 58 | ||
| 53 | const VKDevice& device; ///< Device handler. | 59 | const VKDevice& device; ///< Device handler. |
| 54 | const vk::PhysicalDeviceMemoryProperties props; ///< Physical device properties. | 60 | const vk::PhysicalDeviceMemoryProperties properties; ///< Physical device properties. |
| 55 | const bool is_memory_unified; ///< True if memory model is unified. | 61 | const bool is_memory_unified; ///< True if memory model is unified. |
| 56 | std::vector<std::unique_ptr<VKMemoryAllocation>> allocs; ///< Current allocations. | 62 | std::vector<std::unique_ptr<VKMemoryAllocation>> allocations; ///< Current allocations. |
| 57 | }; | 63 | }; |
| 58 | 64 | ||
| 59 | class VKMemoryCommitImpl final { | 65 | class VKMemoryCommitImpl final { |
| 60 | friend VKMemoryAllocation; | 66 | friend VKMemoryAllocation; |
| 67 | friend MemoryMap; | ||
| 61 | 68 | ||
| 62 | public: | 69 | public: |
| 63 | explicit VKMemoryCommitImpl(VKMemoryAllocation* allocation, vk::DeviceMemory memory, u8* data, | 70 | explicit VKMemoryCommitImpl(const VKDevice& device, VKMemoryAllocation* allocation, |
| 64 | u64 begin, u64 end); | 71 | vk::DeviceMemory memory, u64 begin, u64 end); |
| 65 | ~VKMemoryCommitImpl(); | 72 | ~VKMemoryCommitImpl(); |
| 66 | 73 | ||
| 67 | /// Returns the writeable memory map. The commit has to be mappable. | 74 | /// Maps a memory region and returns a pointer to it. |
| 68 | u8* GetData() const; | 75 | /// It's illegal to have more than one memory map at the same time. |
| 76 | MemoryMap Map(u64 size, u64 offset = 0) const; | ||
| 77 | |||
| 78 | /// Maps the whole commit and returns a pointer to it. | ||
| 79 | /// It's illegal to have more than one memory map at the same time. | ||
| 80 | MemoryMap Map() const; | ||
| 69 | 81 | ||
| 70 | /// Returns the Vulkan memory handler. | 82 | /// Returns the Vulkan memory handler. |
| 71 | vk::DeviceMemory GetMemory() const { | 83 | vk::DeviceMemory GetMemory() const { |
| @@ -78,10 +90,46 @@ public: | |||
| 78 | } | 90 | } |
| 79 | 91 | ||
| 80 | private: | 92 | private: |
| 93 | /// Unmaps memory. | ||
| 94 | void Unmap() const; | ||
| 95 | |||
| 96 | const VKDevice& device; ///< Vulkan device. | ||
| 81 | std::pair<u64, u64> interval{}; ///< Interval where the commit exists. | 97 | std::pair<u64, u64> interval{}; ///< Interval where the commit exists. |
| 82 | vk::DeviceMemory memory; ///< Vulkan device memory handler. | 98 | vk::DeviceMemory memory; ///< Vulkan device memory handler. |
| 83 | VKMemoryAllocation* allocation{}; ///< Pointer to the large memory allocation. | 99 | VKMemoryAllocation* allocation{}; ///< Pointer to the large memory allocation. |
| 84 | u8* data{}; ///< Pointer to the host mapped memory, it has the commit offset included. | 100 | }; |
| 101 | |||
| 102 | /// Holds ownership of a memory map. | ||
| 103 | class MemoryMap final { | ||
| 104 | public: | ||
| 105 | explicit MemoryMap(const VKMemoryCommitImpl* commit, u8* address) | ||
| 106 | : commit{commit}, address{address} {} | ||
| 107 | |||
| 108 | ~MemoryMap() { | ||
| 109 | if (commit) { | ||
| 110 | commit->Unmap(); | ||
| 111 | } | ||
| 112 | } | ||
| 113 | |||
| 114 | /// Prematurely releases the memory map. | ||
| 115 | void Release() { | ||
| 116 | commit->Unmap(); | ||
| 117 | commit = nullptr; | ||
| 118 | } | ||
| 119 | |||
| 120 | /// Returns the address of the memory map. | ||
| 121 | u8* GetAddress() const { | ||
| 122 | return address; | ||
| 123 | } | ||
| 124 | |||
| 125 | /// Returns the address of the memory map; | ||
| 126 | operator u8*() const { | ||
| 127 | return address; | ||
| 128 | } | ||
| 129 | |||
| 130 | private: | ||
| 131 | const VKMemoryCommitImpl* commit{}; ///< Mapped memory commit. | ||
| 132 | u8* address{}; ///< Address to the mapped memory. | ||
| 85 | }; | 133 | }; |
| 86 | 134 | ||
| 87 | } // namespace Vulkan | 135 | } // namespace Vulkan |
diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp new file mode 100644 index 000000000..48e23d4cd --- /dev/null +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp | |||
| @@ -0,0 +1,395 @@ | |||
| 1 | // Copyright 2019 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include <algorithm> | ||
| 6 | #include <cstddef> | ||
| 7 | #include <memory> | ||
| 8 | #include <vector> | ||
| 9 | |||
| 10 | #include "common/microprofile.h" | ||
| 11 | #include "core/core.h" | ||
| 12 | #include "core/memory.h" | ||
| 13 | #include "video_core/engines/kepler_compute.h" | ||
| 14 | #include "video_core/engines/maxwell_3d.h" | ||
| 15 | #include "video_core/memory_manager.h" | ||
| 16 | #include "video_core/renderer_vulkan/declarations.h" | ||
| 17 | #include "video_core/renderer_vulkan/fixed_pipeline_state.h" | ||
| 18 | #include "video_core/renderer_vulkan/maxwell_to_vk.h" | ||
| 19 | #include "video_core/renderer_vulkan/vk_compute_pipeline.h" | ||
| 20 | #include "video_core/renderer_vulkan/vk_descriptor_pool.h" | ||
| 21 | #include "video_core/renderer_vulkan/vk_device.h" | ||
| 22 | #include "video_core/renderer_vulkan/vk_graphics_pipeline.h" | ||
| 23 | #include "video_core/renderer_vulkan/vk_pipeline_cache.h" | ||
| 24 | #include "video_core/renderer_vulkan/vk_rasterizer.h" | ||
| 25 | #include "video_core/renderer_vulkan/vk_renderpass_cache.h" | ||
| 26 | #include "video_core/renderer_vulkan/vk_resource_manager.h" | ||
| 27 | #include "video_core/renderer_vulkan/vk_scheduler.h" | ||
| 28 | #include "video_core/renderer_vulkan/vk_update_descriptor.h" | ||
| 29 | #include "video_core/shader/compiler_settings.h" | ||
| 30 | |||
| 31 | namespace Vulkan { | ||
| 32 | |||
| 33 | MICROPROFILE_DECLARE(Vulkan_PipelineCache); | ||
| 34 | |||
| 35 | using Tegra::Engines::ShaderType; | ||
| 36 | |||
| 37 | namespace { | ||
| 38 | |||
| 39 | constexpr VideoCommon::Shader::CompilerSettings compiler_settings{ | ||
| 40 | VideoCommon::Shader::CompileDepth::FullDecompile}; | ||
| 41 | |||
| 42 | /// Gets the address for the specified shader stage program | ||
| 43 | GPUVAddr GetShaderAddress(Core::System& system, Maxwell::ShaderProgram program) { | ||
| 44 | const auto& gpu{system.GPU().Maxwell3D()}; | ||
| 45 | const auto& shader_config{gpu.regs.shader_config[static_cast<std::size_t>(program)]}; | ||
| 46 | return gpu.regs.code_address.CodeAddress() + shader_config.offset; | ||
| 47 | } | ||
| 48 | |||
| 49 | /// Gets if the current instruction offset is a scheduler instruction | ||
| 50 | constexpr bool IsSchedInstruction(std::size_t offset, std::size_t main_offset) { | ||
| 51 | // Sched instructions appear once every 4 instructions. | ||
| 52 | constexpr std::size_t SchedPeriod = 4; | ||
| 53 | const std::size_t absolute_offset = offset - main_offset; | ||
| 54 | return (absolute_offset % SchedPeriod) == 0; | ||
| 55 | } | ||
| 56 | |||
| 57 | /// Calculates the size of a program stream | ||
| 58 | std::size_t CalculateProgramSize(const ProgramCode& program, bool is_compute) { | ||
| 59 | const std::size_t start_offset = is_compute ? 0 : 10; | ||
| 60 | // This is the encoded version of BRA that jumps to itself. All Nvidia | ||
| 61 | // shaders end with one. | ||
| 62 | constexpr u64 self_jumping_branch = 0xE2400FFFFF07000FULL; | ||
| 63 | constexpr u64 mask = 0xFFFFFFFFFF7FFFFFULL; | ||
| 64 | std::size_t offset = start_offset; | ||
| 65 | while (offset < program.size()) { | ||
| 66 | const u64 instruction = program[offset]; | ||
| 67 | if (!IsSchedInstruction(offset, start_offset)) { | ||
| 68 | if ((instruction & mask) == self_jumping_branch) { | ||
| 69 | // End on Maxwell's "nop" instruction | ||
| 70 | break; | ||
| 71 | } | ||
| 72 | if (instruction == 0) { | ||
| 73 | break; | ||
| 74 | } | ||
| 75 | } | ||
| 76 | ++offset; | ||
| 77 | } | ||
| 78 | // The last instruction is included in the program size | ||
| 79 | return std::min(offset + 1, program.size()); | ||
| 80 | } | ||
| 81 | |||
| 82 | /// Gets the shader program code from memory for the specified address | ||
| 83 | ProgramCode GetShaderCode(Tegra::MemoryManager& memory_manager, const GPUVAddr gpu_addr, | ||
| 84 | const u8* host_ptr, bool is_compute) { | ||
| 85 | ProgramCode program_code(VideoCommon::Shader::MAX_PROGRAM_LENGTH); | ||
| 86 | ASSERT_OR_EXECUTE(host_ptr != nullptr, { | ||
| 87 | std::fill(program_code.begin(), program_code.end(), 0); | ||
| 88 | return program_code; | ||
| 89 | }); | ||
| 90 | memory_manager.ReadBlockUnsafe(gpu_addr, program_code.data(), | ||
| 91 | program_code.size() * sizeof(u64)); | ||
| 92 | program_code.resize(CalculateProgramSize(program_code, is_compute)); | ||
| 93 | return program_code; | ||
| 94 | } | ||
| 95 | |||
| 96 | constexpr std::size_t GetStageFromProgram(std::size_t program) { | ||
| 97 | return program == 0 ? 0 : program - 1; | ||
| 98 | } | ||
| 99 | |||
| 100 | constexpr ShaderType GetStageFromProgram(Maxwell::ShaderProgram program) { | ||
| 101 | return static_cast<ShaderType>(GetStageFromProgram(static_cast<std::size_t>(program))); | ||
| 102 | } | ||
| 103 | |||
| 104 | ShaderType GetShaderType(Maxwell::ShaderProgram program) { | ||
| 105 | switch (program) { | ||
| 106 | case Maxwell::ShaderProgram::VertexB: | ||
| 107 | return ShaderType::Vertex; | ||
| 108 | case Maxwell::ShaderProgram::TesselationControl: | ||
| 109 | return ShaderType::TesselationControl; | ||
| 110 | case Maxwell::ShaderProgram::TesselationEval: | ||
| 111 | return ShaderType::TesselationEval; | ||
| 112 | case Maxwell::ShaderProgram::Geometry: | ||
| 113 | return ShaderType::Geometry; | ||
| 114 | case Maxwell::ShaderProgram::Fragment: | ||
| 115 | return ShaderType::Fragment; | ||
| 116 | default: | ||
| 117 | UNIMPLEMENTED_MSG("program={}", static_cast<u32>(program)); | ||
| 118 | return ShaderType::Vertex; | ||
| 119 | } | ||
| 120 | } | ||
| 121 | |||
| 122 | u32 FillDescriptorLayout(const ShaderEntries& entries, | ||
| 123 | std::vector<vk::DescriptorSetLayoutBinding>& bindings, | ||
| 124 | Maxwell::ShaderProgram program_type, u32 base_binding) { | ||
| 125 | const ShaderType stage = GetStageFromProgram(program_type); | ||
| 126 | const vk::ShaderStageFlags stage_flags = MaxwellToVK::ShaderStage(stage); | ||
| 127 | |||
| 128 | u32 binding = base_binding; | ||
| 129 | const auto AddBindings = [&](vk::DescriptorType descriptor_type, std::size_t num_entries) { | ||
| 130 | for (std::size_t i = 0; i < num_entries; ++i) { | ||
| 131 | bindings.emplace_back(binding++, descriptor_type, 1, stage_flags, nullptr); | ||
| 132 | } | ||
| 133 | }; | ||
| 134 | AddBindings(vk::DescriptorType::eUniformBuffer, entries.const_buffers.size()); | ||
| 135 | AddBindings(vk::DescriptorType::eStorageBuffer, entries.global_buffers.size()); | ||
| 136 | AddBindings(vk::DescriptorType::eUniformTexelBuffer, entries.texel_buffers.size()); | ||
| 137 | AddBindings(vk::DescriptorType::eCombinedImageSampler, entries.samplers.size()); | ||
| 138 | AddBindings(vk::DescriptorType::eStorageImage, entries.images.size()); | ||
| 139 | return binding; | ||
| 140 | } | ||
| 141 | |||
| 142 | } // Anonymous namespace | ||
| 143 | |||
| 144 | CachedShader::CachedShader(Core::System& system, Tegra::Engines::ShaderType stage, | ||
| 145 | GPUVAddr gpu_addr, VAddr cpu_addr, u8* host_ptr, | ||
| 146 | ProgramCode program_code, u32 main_offset) | ||
| 147 | : RasterizerCacheObject{host_ptr}, gpu_addr{gpu_addr}, cpu_addr{cpu_addr}, | ||
| 148 | program_code{std::move(program_code)}, locker{stage, GetEngine(system, stage)}, | ||
| 149 | shader_ir{this->program_code, main_offset, compiler_settings, locker}, | ||
| 150 | entries{GenerateShaderEntries(shader_ir)} {} | ||
| 151 | |||
| 152 | CachedShader::~CachedShader() = default; | ||
| 153 | |||
| 154 | Tegra::Engines::ConstBufferEngineInterface& CachedShader::GetEngine( | ||
| 155 | Core::System& system, Tegra::Engines::ShaderType stage) { | ||
| 156 | if (stage == Tegra::Engines::ShaderType::Compute) { | ||
| 157 | return system.GPU().KeplerCompute(); | ||
| 158 | } else { | ||
| 159 | return system.GPU().Maxwell3D(); | ||
| 160 | } | ||
| 161 | } | ||
| 162 | |||
| 163 | VKPipelineCache::VKPipelineCache(Core::System& system, RasterizerVulkan& rasterizer, | ||
| 164 | const VKDevice& device, VKScheduler& scheduler, | ||
| 165 | VKDescriptorPool& descriptor_pool, | ||
| 166 | VKUpdateDescriptorQueue& update_descriptor_queue) | ||
| 167 | : RasterizerCache{rasterizer}, system{system}, device{device}, scheduler{scheduler}, | ||
| 168 | descriptor_pool{descriptor_pool}, update_descriptor_queue{update_descriptor_queue}, | ||
| 169 | renderpass_cache(device) {} | ||
| 170 | |||
| 171 | VKPipelineCache::~VKPipelineCache() = default; | ||
| 172 | |||
| 173 | std::array<Shader, Maxwell::MaxShaderProgram> VKPipelineCache::GetShaders() { | ||
| 174 | const auto& gpu = system.GPU().Maxwell3D(); | ||
| 175 | auto& dirty = system.GPU().Maxwell3D().dirty.shaders; | ||
| 176 | if (!dirty) { | ||
| 177 | return last_shaders; | ||
| 178 | } | ||
| 179 | dirty = false; | ||
| 180 | |||
| 181 | std::array<Shader, Maxwell::MaxShaderProgram> shaders; | ||
| 182 | for (std::size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) { | ||
| 183 | const auto& shader_config = gpu.regs.shader_config[index]; | ||
| 184 | const auto program{static_cast<Maxwell::ShaderProgram>(index)}; | ||
| 185 | |||
| 186 | // Skip stages that are not enabled | ||
| 187 | if (!gpu.regs.IsShaderConfigEnabled(index)) { | ||
| 188 | continue; | ||
| 189 | } | ||
| 190 | |||
| 191 | auto& memory_manager{system.GPU().MemoryManager()}; | ||
| 192 | const GPUVAddr program_addr{GetShaderAddress(system, program)}; | ||
| 193 | const auto host_ptr{memory_manager.GetPointer(program_addr)}; | ||
| 194 | auto shader = TryGet(host_ptr); | ||
| 195 | if (!shader) { | ||
| 196 | // No shader found - create a new one | ||
| 197 | constexpr u32 stage_offset = 10; | ||
| 198 | const auto stage = static_cast<Tegra::Engines::ShaderType>(index == 0 ? 0 : index - 1); | ||
| 199 | auto code = GetShaderCode(memory_manager, program_addr, host_ptr, false); | ||
| 200 | |||
| 201 | const std::optional cpu_addr = memory_manager.GpuToCpuAddress(program_addr); | ||
| 202 | ASSERT(cpu_addr); | ||
| 203 | |||
| 204 | shader = std::make_shared<CachedShader>(system, stage, program_addr, *cpu_addr, | ||
| 205 | host_ptr, std::move(code), stage_offset); | ||
| 206 | Register(shader); | ||
| 207 | } | ||
| 208 | shaders[index] = std::move(shader); | ||
| 209 | } | ||
| 210 | return last_shaders = shaders; | ||
| 211 | } | ||
| 212 | |||
| 213 | VKGraphicsPipeline& VKPipelineCache::GetGraphicsPipeline(const GraphicsPipelineCacheKey& key) { | ||
| 214 | MICROPROFILE_SCOPE(Vulkan_PipelineCache); | ||
| 215 | |||
| 216 | if (last_graphics_pipeline && last_graphics_key == key) { | ||
| 217 | return *last_graphics_pipeline; | ||
| 218 | } | ||
| 219 | last_graphics_key = key; | ||
| 220 | |||
| 221 | const auto [pair, is_cache_miss] = graphics_cache.try_emplace(key); | ||
| 222 | auto& entry = pair->second; | ||
| 223 | if (is_cache_miss) { | ||
| 224 | LOG_INFO(Render_Vulkan, "Compile 0x{:016X}", key.Hash()); | ||
| 225 | const auto [program, bindings] = DecompileShaders(key); | ||
| 226 | entry = std::make_unique<VKGraphicsPipeline>(device, scheduler, descriptor_pool, | ||
| 227 | update_descriptor_queue, renderpass_cache, key, | ||
| 228 | bindings, program); | ||
| 229 | } | ||
| 230 | return *(last_graphics_pipeline = entry.get()); | ||
| 231 | } | ||
| 232 | |||
| 233 | VKComputePipeline& VKPipelineCache::GetComputePipeline(const ComputePipelineCacheKey& key) { | ||
| 234 | MICROPROFILE_SCOPE(Vulkan_PipelineCache); | ||
| 235 | |||
| 236 | const auto [pair, is_cache_miss] = compute_cache.try_emplace(key); | ||
| 237 | auto& entry = pair->second; | ||
| 238 | if (!is_cache_miss) { | ||
| 239 | return *entry; | ||
| 240 | } | ||
| 241 | LOG_INFO(Render_Vulkan, "Compile 0x{:016X}", key.Hash()); | ||
| 242 | |||
| 243 | auto& memory_manager = system.GPU().MemoryManager(); | ||
| 244 | const auto program_addr = key.shader; | ||
| 245 | const auto host_ptr = memory_manager.GetPointer(program_addr); | ||
| 246 | |||
| 247 | auto shader = TryGet(host_ptr); | ||
| 248 | if (!shader) { | ||
| 249 | // No shader found - create a new one | ||
| 250 | const auto cpu_addr = memory_manager.GpuToCpuAddress(program_addr); | ||
| 251 | ASSERT(cpu_addr); | ||
| 252 | |||
| 253 | auto code = GetShaderCode(memory_manager, program_addr, host_ptr, true); | ||
| 254 | constexpr u32 kernel_main_offset = 0; | ||
| 255 | shader = std::make_shared<CachedShader>(system, Tegra::Engines::ShaderType::Compute, | ||
| 256 | program_addr, *cpu_addr, host_ptr, std::move(code), | ||
| 257 | kernel_main_offset); | ||
| 258 | Register(shader); | ||
| 259 | } | ||
| 260 | |||
| 261 | Specialization specialization; | ||
| 262 | specialization.workgroup_size = key.workgroup_size; | ||
| 263 | specialization.shared_memory_size = key.shared_memory_size; | ||
| 264 | |||
| 265 | const SPIRVShader spirv_shader{ | ||
| 266 | Decompile(device, shader->GetIR(), ShaderType::Compute, specialization), | ||
| 267 | shader->GetEntries()}; | ||
| 268 | entry = std::make_unique<VKComputePipeline>(device, scheduler, descriptor_pool, | ||
| 269 | update_descriptor_queue, spirv_shader); | ||
| 270 | return *entry; | ||
| 271 | } | ||
| 272 | |||
| 273 | void VKPipelineCache::Unregister(const Shader& shader) { | ||
| 274 | bool finished = false; | ||
| 275 | const auto Finish = [&] { | ||
| 276 | // TODO(Rodrigo): Instead of finishing here, wait for the fences that use this pipeline and | ||
| 277 | // flush. | ||
| 278 | if (finished) { | ||
| 279 | return; | ||
| 280 | } | ||
| 281 | finished = true; | ||
| 282 | scheduler.Finish(); | ||
| 283 | }; | ||
| 284 | |||
| 285 | const GPUVAddr invalidated_addr = shader->GetGpuAddr(); | ||
| 286 | for (auto it = graphics_cache.begin(); it != graphics_cache.end();) { | ||
| 287 | auto& entry = it->first; | ||
| 288 | if (std::find(entry.shaders.begin(), entry.shaders.end(), invalidated_addr) == | ||
| 289 | entry.shaders.end()) { | ||
| 290 | ++it; | ||
| 291 | continue; | ||
| 292 | } | ||
| 293 | Finish(); | ||
| 294 | it = graphics_cache.erase(it); | ||
| 295 | } | ||
| 296 | for (auto it = compute_cache.begin(); it != compute_cache.end();) { | ||
| 297 | auto& entry = it->first; | ||
| 298 | if (entry.shader != invalidated_addr) { | ||
| 299 | ++it; | ||
| 300 | continue; | ||
| 301 | } | ||
| 302 | Finish(); | ||
| 303 | it = compute_cache.erase(it); | ||
| 304 | } | ||
| 305 | |||
| 306 | RasterizerCache::Unregister(shader); | ||
| 307 | } | ||
| 308 | |||
| 309 | std::pair<SPIRVProgram, std::vector<vk::DescriptorSetLayoutBinding>> | ||
| 310 | VKPipelineCache::DecompileShaders(const GraphicsPipelineCacheKey& key) { | ||
| 311 | const auto& fixed_state = key.fixed_state; | ||
| 312 | auto& memory_manager = system.GPU().MemoryManager(); | ||
| 313 | const auto& gpu = system.GPU().Maxwell3D(); | ||
| 314 | |||
| 315 | Specialization specialization; | ||
| 316 | specialization.primitive_topology = fixed_state.input_assembly.topology; | ||
| 317 | if (specialization.primitive_topology == Maxwell::PrimitiveTopology::Points) { | ||
| 318 | ASSERT(fixed_state.input_assembly.point_size != 0.0f); | ||
| 319 | specialization.point_size = fixed_state.input_assembly.point_size; | ||
| 320 | } | ||
| 321 | for (std::size_t i = 0; i < Maxwell::NumVertexAttributes; ++i) { | ||
| 322 | specialization.attribute_types[i] = fixed_state.vertex_input.attributes[i].type; | ||
| 323 | } | ||
| 324 | specialization.ndc_minus_one_to_one = fixed_state.rasterizer.ndc_minus_one_to_one; | ||
| 325 | specialization.tessellation.primitive = fixed_state.tessellation.primitive; | ||
| 326 | specialization.tessellation.spacing = fixed_state.tessellation.spacing; | ||
| 327 | specialization.tessellation.clockwise = fixed_state.tessellation.clockwise; | ||
| 328 | for (const auto& rt : key.renderpass_params.color_attachments) { | ||
| 329 | specialization.enabled_rendertargets.set(rt.index); | ||
| 330 | } | ||
| 331 | |||
| 332 | SPIRVProgram program; | ||
| 333 | std::vector<vk::DescriptorSetLayoutBinding> bindings; | ||
| 334 | |||
| 335 | for (std::size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) { | ||
| 336 | const auto program_enum = static_cast<Maxwell::ShaderProgram>(index); | ||
| 337 | |||
| 338 | // Skip stages that are not enabled | ||
| 339 | if (!gpu.regs.IsShaderConfigEnabled(index)) { | ||
| 340 | continue; | ||
| 341 | } | ||
| 342 | |||
| 343 | const GPUVAddr gpu_addr = GetShaderAddress(system, program_enum); | ||
| 344 | const auto host_ptr = memory_manager.GetPointer(gpu_addr); | ||
| 345 | const auto shader = TryGet(host_ptr); | ||
| 346 | ASSERT(shader); | ||
| 347 | |||
| 348 | const std::size_t stage = index == 0 ? 0 : index - 1; // Stage indices are 0 - 5 | ||
| 349 | const auto program_type = GetShaderType(program_enum); | ||
| 350 | const auto& entries = shader->GetEntries(); | ||
| 351 | program[stage] = {Decompile(device, shader->GetIR(), program_type, specialization), | ||
| 352 | entries}; | ||
| 353 | |||
| 354 | if (program_enum == Maxwell::ShaderProgram::VertexA) { | ||
| 355 | // VertexB was combined with VertexA, so we skip the VertexB iteration | ||
| 356 | ++index; | ||
| 357 | } | ||
| 358 | |||
| 359 | const u32 old_binding = specialization.base_binding; | ||
| 360 | specialization.base_binding = | ||
| 361 | FillDescriptorLayout(entries, bindings, program_enum, specialization.base_binding); | ||
| 362 | ASSERT(old_binding + entries.NumBindings() == specialization.base_binding); | ||
| 363 | } | ||
| 364 | return {std::move(program), std::move(bindings)}; | ||
| 365 | } | ||
| 366 | |||
| 367 | void FillDescriptorUpdateTemplateEntries( | ||
| 368 | const VKDevice& device, const ShaderEntries& entries, u32& binding, u32& offset, | ||
| 369 | std::vector<vk::DescriptorUpdateTemplateEntry>& template_entries) { | ||
| 370 | static constexpr auto entry_size = static_cast<u32>(sizeof(DescriptorUpdateEntry)); | ||
| 371 | const auto AddEntry = [&](vk::DescriptorType descriptor_type, std::size_t count_) { | ||
| 372 | const u32 count = static_cast<u32>(count_); | ||
| 373 | if (descriptor_type == vk::DescriptorType::eUniformTexelBuffer && | ||
| 374 | device.GetDriverID() == vk::DriverIdKHR::eNvidiaProprietary) { | ||
| 375 | // Nvidia has a bug where updating multiple uniform texels at once causes the driver to | ||
| 376 | // crash. | ||
| 377 | for (u32 i = 0; i < count; ++i) { | ||
| 378 | template_entries.emplace_back(binding + i, 0, 1, descriptor_type, | ||
| 379 | offset + i * entry_size, entry_size); | ||
| 380 | } | ||
| 381 | } else if (count != 0) { | ||
| 382 | template_entries.emplace_back(binding, 0, count, descriptor_type, offset, entry_size); | ||
| 383 | } | ||
| 384 | offset += count * entry_size; | ||
| 385 | binding += count; | ||
| 386 | }; | ||
| 387 | |||
| 388 | AddEntry(vk::DescriptorType::eUniformBuffer, entries.const_buffers.size()); | ||
| 389 | AddEntry(vk::DescriptorType::eStorageBuffer, entries.global_buffers.size()); | ||
| 390 | AddEntry(vk::DescriptorType::eUniformTexelBuffer, entries.texel_buffers.size()); | ||
| 391 | AddEntry(vk::DescriptorType::eCombinedImageSampler, entries.samplers.size()); | ||
| 392 | AddEntry(vk::DescriptorType::eStorageImage, entries.images.size()); | ||
| 393 | } | ||
| 394 | |||
| 395 | } // namespace Vulkan | ||
diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.h b/src/video_core/renderer_vulkan/vk_pipeline_cache.h new file mode 100644 index 000000000..8678fc9c3 --- /dev/null +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.h | |||
| @@ -0,0 +1,200 @@ | |||
| 1 | // Copyright 2019 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include <array> | ||
| 8 | #include <cstddef> | ||
| 9 | #include <memory> | ||
| 10 | #include <tuple> | ||
| 11 | #include <type_traits> | ||
| 12 | #include <unordered_map> | ||
| 13 | #include <utility> | ||
| 14 | #include <vector> | ||
| 15 | |||
| 16 | #include <boost/functional/hash.hpp> | ||
| 17 | |||
| 18 | #include "common/common_types.h" | ||
| 19 | #include "video_core/engines/const_buffer_engine_interface.h" | ||
| 20 | #include "video_core/engines/maxwell_3d.h" | ||
| 21 | #include "video_core/rasterizer_cache.h" | ||
| 22 | #include "video_core/renderer_vulkan/declarations.h" | ||
| 23 | #include "video_core/renderer_vulkan/fixed_pipeline_state.h" | ||
| 24 | #include "video_core/renderer_vulkan/vk_graphics_pipeline.h" | ||
| 25 | #include "video_core/renderer_vulkan/vk_renderpass_cache.h" | ||
| 26 | #include "video_core/renderer_vulkan/vk_resource_manager.h" | ||
| 27 | #include "video_core/renderer_vulkan/vk_shader_decompiler.h" | ||
| 28 | #include "video_core/shader/const_buffer_locker.h" | ||
| 29 | #include "video_core/shader/shader_ir.h" | ||
| 30 | #include "video_core/surface.h" | ||
| 31 | |||
| 32 | namespace Core { | ||
| 33 | class System; | ||
| 34 | } | ||
| 35 | |||
| 36 | namespace Vulkan { | ||
| 37 | |||
| 38 | class RasterizerVulkan; | ||
| 39 | class VKComputePipeline; | ||
| 40 | class VKDescriptorPool; | ||
| 41 | class VKDevice; | ||
| 42 | class VKFence; | ||
| 43 | class VKScheduler; | ||
| 44 | class VKUpdateDescriptorQueue; | ||
| 45 | |||
| 46 | class CachedShader; | ||
| 47 | using Shader = std::shared_ptr<CachedShader>; | ||
| 48 | using Maxwell = Tegra::Engines::Maxwell3D::Regs; | ||
| 49 | |||
| 50 | using ProgramCode = std::vector<u64>; | ||
| 51 | |||
| 52 | struct GraphicsPipelineCacheKey { | ||
| 53 | FixedPipelineState fixed_state; | ||
| 54 | std::array<GPUVAddr, Maxwell::MaxShaderProgram> shaders; | ||
| 55 | RenderPassParams renderpass_params; | ||
| 56 | |||
| 57 | std::size_t Hash() const noexcept { | ||
| 58 | std::size_t hash = fixed_state.Hash(); | ||
| 59 | for (const auto& shader : shaders) { | ||
| 60 | boost::hash_combine(hash, shader); | ||
| 61 | } | ||
| 62 | boost::hash_combine(hash, renderpass_params.Hash()); | ||
| 63 | return hash; | ||
| 64 | } | ||
| 65 | |||
| 66 | bool operator==(const GraphicsPipelineCacheKey& rhs) const noexcept { | ||
| 67 | return std::tie(fixed_state, shaders, renderpass_params) == | ||
| 68 | std::tie(rhs.fixed_state, rhs.shaders, rhs.renderpass_params); | ||
| 69 | } | ||
| 70 | }; | ||
| 71 | |||
| 72 | struct ComputePipelineCacheKey { | ||
| 73 | GPUVAddr shader{}; | ||
| 74 | u32 shared_memory_size{}; | ||
| 75 | std::array<u32, 3> workgroup_size{}; | ||
| 76 | |||
| 77 | std::size_t Hash() const noexcept { | ||
| 78 | return static_cast<std::size_t>(shader) ^ | ||
| 79 | ((static_cast<std::size_t>(shared_memory_size) >> 7) << 40) ^ | ||
| 80 | static_cast<std::size_t>(workgroup_size[0]) ^ | ||
| 81 | (static_cast<std::size_t>(workgroup_size[1]) << 16) ^ | ||
| 82 | (static_cast<std::size_t>(workgroup_size[2]) << 24); | ||
| 83 | } | ||
| 84 | |||
| 85 | bool operator==(const ComputePipelineCacheKey& rhs) const noexcept { | ||
| 86 | return std::tie(shader, shared_memory_size, workgroup_size) == | ||
| 87 | std::tie(rhs.shader, rhs.shared_memory_size, rhs.workgroup_size); | ||
| 88 | } | ||
| 89 | }; | ||
| 90 | |||
| 91 | } // namespace Vulkan | ||
| 92 | |||
| 93 | namespace std { | ||
| 94 | |||
| 95 | template <> | ||
| 96 | struct hash<Vulkan::GraphicsPipelineCacheKey> { | ||
| 97 | std::size_t operator()(const Vulkan::GraphicsPipelineCacheKey& k) const noexcept { | ||
| 98 | return k.Hash(); | ||
| 99 | } | ||
| 100 | }; | ||
| 101 | |||
| 102 | template <> | ||
| 103 | struct hash<Vulkan::ComputePipelineCacheKey> { | ||
| 104 | std::size_t operator()(const Vulkan::ComputePipelineCacheKey& k) const noexcept { | ||
| 105 | return k.Hash(); | ||
| 106 | } | ||
| 107 | }; | ||
| 108 | |||
| 109 | } // namespace std | ||
| 110 | |||
| 111 | namespace Vulkan { | ||
| 112 | |||
| 113 | class CachedShader final : public RasterizerCacheObject { | ||
| 114 | public: | ||
| 115 | explicit CachedShader(Core::System& system, Tegra::Engines::ShaderType stage, GPUVAddr gpu_addr, | ||
| 116 | VAddr cpu_addr, u8* host_ptr, ProgramCode program_code, u32 main_offset); | ||
| 117 | ~CachedShader(); | ||
| 118 | |||
| 119 | GPUVAddr GetGpuAddr() const { | ||
| 120 | return gpu_addr; | ||
| 121 | } | ||
| 122 | |||
| 123 | VAddr GetCpuAddr() const override { | ||
| 124 | return cpu_addr; | ||
| 125 | } | ||
| 126 | |||
| 127 | std::size_t GetSizeInBytes() const override { | ||
| 128 | return program_code.size() * sizeof(u64); | ||
| 129 | } | ||
| 130 | |||
| 131 | VideoCommon::Shader::ShaderIR& GetIR() { | ||
| 132 | return shader_ir; | ||
| 133 | } | ||
| 134 | |||
| 135 | const VideoCommon::Shader::ShaderIR& GetIR() const { | ||
| 136 | return shader_ir; | ||
| 137 | } | ||
| 138 | |||
| 139 | const ShaderEntries& GetEntries() const { | ||
| 140 | return entries; | ||
| 141 | } | ||
| 142 | |||
| 143 | private: | ||
| 144 | static Tegra::Engines::ConstBufferEngineInterface& GetEngine(Core::System& system, | ||
| 145 | Tegra::Engines::ShaderType stage); | ||
| 146 | |||
| 147 | GPUVAddr gpu_addr{}; | ||
| 148 | VAddr cpu_addr{}; | ||
| 149 | ProgramCode program_code; | ||
| 150 | VideoCommon::Shader::ConstBufferLocker locker; | ||
| 151 | VideoCommon::Shader::ShaderIR shader_ir; | ||
| 152 | ShaderEntries entries; | ||
| 153 | }; | ||
| 154 | |||
| 155 | class VKPipelineCache final : public RasterizerCache<Shader> { | ||
| 156 | public: | ||
| 157 | explicit VKPipelineCache(Core::System& system, RasterizerVulkan& rasterizer, | ||
| 158 | const VKDevice& device, VKScheduler& scheduler, | ||
| 159 | VKDescriptorPool& descriptor_pool, | ||
| 160 | VKUpdateDescriptorQueue& update_descriptor_queue); | ||
| 161 | ~VKPipelineCache(); | ||
| 162 | |||
| 163 | std::array<Shader, Maxwell::MaxShaderProgram> GetShaders(); | ||
| 164 | |||
| 165 | VKGraphicsPipeline& GetGraphicsPipeline(const GraphicsPipelineCacheKey& key); | ||
| 166 | |||
| 167 | VKComputePipeline& GetComputePipeline(const ComputePipelineCacheKey& key); | ||
| 168 | |||
| 169 | protected: | ||
| 170 | void Unregister(const Shader& shader) override; | ||
| 171 | |||
| 172 | void FlushObjectInner(const Shader& object) override {} | ||
| 173 | |||
| 174 | private: | ||
| 175 | std::pair<SPIRVProgram, std::vector<vk::DescriptorSetLayoutBinding>> DecompileShaders( | ||
| 176 | const GraphicsPipelineCacheKey& key); | ||
| 177 | |||
| 178 | Core::System& system; | ||
| 179 | const VKDevice& device; | ||
| 180 | VKScheduler& scheduler; | ||
| 181 | VKDescriptorPool& descriptor_pool; | ||
| 182 | VKUpdateDescriptorQueue& update_descriptor_queue; | ||
| 183 | |||
| 184 | VKRenderPassCache renderpass_cache; | ||
| 185 | |||
| 186 | std::array<Shader, Maxwell::MaxShaderProgram> last_shaders; | ||
| 187 | |||
| 188 | GraphicsPipelineCacheKey last_graphics_key; | ||
| 189 | VKGraphicsPipeline* last_graphics_pipeline = nullptr; | ||
| 190 | |||
| 191 | std::unordered_map<GraphicsPipelineCacheKey, std::unique_ptr<VKGraphicsPipeline>> | ||
| 192 | graphics_cache; | ||
| 193 | std::unordered_map<ComputePipelineCacheKey, std::unique_ptr<VKComputePipeline>> compute_cache; | ||
| 194 | }; | ||
| 195 | |||
| 196 | void FillDescriptorUpdateTemplateEntries( | ||
| 197 | const VKDevice& device, const ShaderEntries& entries, u32& binding, u32& offset, | ||
| 198 | std::vector<vk::DescriptorUpdateTemplateEntry>& template_entries); | ||
| 199 | |||
| 200 | } // namespace Vulkan | ||
diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp new file mode 100644 index 000000000..d2c6b1189 --- /dev/null +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp | |||
| @@ -0,0 +1,1141 @@ | |||
| 1 | // Copyright 2019 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include <algorithm> | ||
| 6 | #include <array> | ||
| 7 | #include <memory> | ||
| 8 | #include <mutex> | ||
| 9 | #include <vector> | ||
| 10 | |||
| 11 | #include <boost/container/static_vector.hpp> | ||
| 12 | #include <boost/functional/hash.hpp> | ||
| 13 | |||
| 14 | #include "common/alignment.h" | ||
| 15 | #include "common/assert.h" | ||
| 16 | #include "common/logging/log.h" | ||
| 17 | #include "common/microprofile.h" | ||
| 18 | #include "core/core.h" | ||
| 19 | #include "core/memory.h" | ||
| 20 | #include "video_core/engines/kepler_compute.h" | ||
| 21 | #include "video_core/engines/maxwell_3d.h" | ||
| 22 | #include "video_core/renderer_vulkan/declarations.h" | ||
| 23 | #include "video_core/renderer_vulkan/fixed_pipeline_state.h" | ||
| 24 | #include "video_core/renderer_vulkan/maxwell_to_vk.h" | ||
| 25 | #include "video_core/renderer_vulkan/renderer_vulkan.h" | ||
| 26 | #include "video_core/renderer_vulkan/vk_buffer_cache.h" | ||
| 27 | #include "video_core/renderer_vulkan/vk_compute_pass.h" | ||
| 28 | #include "video_core/renderer_vulkan/vk_compute_pipeline.h" | ||
| 29 | #include "video_core/renderer_vulkan/vk_descriptor_pool.h" | ||
| 30 | #include "video_core/renderer_vulkan/vk_device.h" | ||
| 31 | #include "video_core/renderer_vulkan/vk_graphics_pipeline.h" | ||
| 32 | #include "video_core/renderer_vulkan/vk_pipeline_cache.h" | ||
| 33 | #include "video_core/renderer_vulkan/vk_rasterizer.h" | ||
| 34 | #include "video_core/renderer_vulkan/vk_renderpass_cache.h" | ||
| 35 | #include "video_core/renderer_vulkan/vk_resource_manager.h" | ||
| 36 | #include "video_core/renderer_vulkan/vk_sampler_cache.h" | ||
| 37 | #include "video_core/renderer_vulkan/vk_scheduler.h" | ||
| 38 | #include "video_core/renderer_vulkan/vk_staging_buffer_pool.h" | ||
| 39 | #include "video_core/renderer_vulkan/vk_texture_cache.h" | ||
| 40 | #include "video_core/renderer_vulkan/vk_update_descriptor.h" | ||
| 41 | |||
| 42 | namespace Vulkan { | ||
| 43 | |||
| 44 | using Maxwell = Tegra::Engines::Maxwell3D::Regs; | ||
| 45 | |||
| 46 | MICROPROFILE_DEFINE(Vulkan_WaitForWorker, "Vulkan", "Wait for worker", MP_RGB(255, 192, 192)); | ||
| 47 | MICROPROFILE_DEFINE(Vulkan_Drawing, "Vulkan", "Record drawing", MP_RGB(192, 128, 128)); | ||
| 48 | MICROPROFILE_DEFINE(Vulkan_Compute, "Vulkan", "Record compute", MP_RGB(192, 128, 128)); | ||
| 49 | MICROPROFILE_DEFINE(Vulkan_Clearing, "Vulkan", "Record clearing", MP_RGB(192, 128, 128)); | ||
| 50 | MICROPROFILE_DEFINE(Vulkan_Geometry, "Vulkan", "Setup geometry", MP_RGB(192, 128, 128)); | ||
| 51 | MICROPROFILE_DEFINE(Vulkan_ConstBuffers, "Vulkan", "Setup constant buffers", MP_RGB(192, 128, 128)); | ||
| 52 | MICROPROFILE_DEFINE(Vulkan_GlobalBuffers, "Vulkan", "Setup global buffers", MP_RGB(192, 128, 128)); | ||
| 53 | MICROPROFILE_DEFINE(Vulkan_RenderTargets, "Vulkan", "Setup render targets", MP_RGB(192, 128, 128)); | ||
| 54 | MICROPROFILE_DEFINE(Vulkan_Textures, "Vulkan", "Setup textures", MP_RGB(192, 128, 128)); | ||
| 55 | MICROPROFILE_DEFINE(Vulkan_Images, "Vulkan", "Setup images", MP_RGB(192, 128, 128)); | ||
| 56 | MICROPROFILE_DEFINE(Vulkan_PipelineCache, "Vulkan", "Pipeline cache", MP_RGB(192, 128, 128)); | ||
| 57 | |||
| 58 | namespace { | ||
| 59 | |||
| 60 | constexpr auto ComputeShaderIndex = static_cast<std::size_t>(Tegra::Engines::ShaderType::Compute); | ||
| 61 | |||
| 62 | vk::Viewport GetViewportState(const VKDevice& device, const Maxwell& regs, std::size_t index) { | ||
| 63 | const auto& viewport = regs.viewport_transform[index]; | ||
| 64 | const float x = viewport.translate_x - viewport.scale_x; | ||
| 65 | const float y = viewport.translate_y - viewport.scale_y; | ||
| 66 | const float width = viewport.scale_x * 2.0f; | ||
| 67 | const float height = viewport.scale_y * 2.0f; | ||
| 68 | |||
| 69 | const float reduce_z = regs.depth_mode == Maxwell::DepthMode::MinusOneToOne; | ||
| 70 | float near = viewport.translate_z - viewport.scale_z * reduce_z; | ||
| 71 | float far = viewport.translate_z + viewport.scale_z; | ||
| 72 | if (!device.IsExtDepthRangeUnrestrictedSupported()) { | ||
| 73 | near = std::clamp(near, 0.0f, 1.0f); | ||
| 74 | far = std::clamp(far, 0.0f, 1.0f); | ||
| 75 | } | ||
| 76 | |||
| 77 | return vk::Viewport(x, y, width != 0 ? width : 1.0f, height != 0 ? height : 1.0f, near, far); | ||
| 78 | } | ||
| 79 | |||
| 80 | constexpr vk::Rect2D GetScissorState(const Maxwell& regs, std::size_t index) { | ||
| 81 | const auto& scissor = regs.scissor_test[index]; | ||
| 82 | if (!scissor.enable) { | ||
| 83 | return {{0, 0}, {INT32_MAX, INT32_MAX}}; | ||
| 84 | } | ||
| 85 | const u32 width = scissor.max_x - scissor.min_x; | ||
| 86 | const u32 height = scissor.max_y - scissor.min_y; | ||
| 87 | return {{static_cast<s32>(scissor.min_x), static_cast<s32>(scissor.min_y)}, {width, height}}; | ||
| 88 | } | ||
| 89 | |||
| 90 | std::array<GPUVAddr, Maxwell::MaxShaderProgram> GetShaderAddresses( | ||
| 91 | const std::array<Shader, Maxwell::MaxShaderProgram>& shaders) { | ||
| 92 | std::array<GPUVAddr, Maxwell::MaxShaderProgram> addresses; | ||
| 93 | for (std::size_t i = 0; i < std::size(addresses); ++i) { | ||
| 94 | addresses[i] = shaders[i] ? shaders[i]->GetGpuAddr() : 0; | ||
| 95 | } | ||
| 96 | return addresses; | ||
| 97 | } | ||
| 98 | |||
| 99 | void TransitionImages(const std::vector<ImageView>& views, vk::PipelineStageFlags pipeline_stage, | ||
| 100 | vk::AccessFlags access) { | ||
| 101 | for (auto& [view, layout] : views) { | ||
| 102 | view->Transition(*layout, pipeline_stage, access); | ||
| 103 | } | ||
| 104 | } | ||
| 105 | |||
| 106 | template <typename Engine, typename Entry> | ||
| 107 | Tegra::Texture::FullTextureInfo GetTextureInfo(const Engine& engine, const Entry& entry, | ||
| 108 | std::size_t stage) { | ||
| 109 | const auto stage_type = static_cast<Tegra::Engines::ShaderType>(stage); | ||
| 110 | if (entry.IsBindless()) { | ||
| 111 | const Tegra::Texture::TextureHandle tex_handle = | ||
| 112 | engine.AccessConstBuffer32(stage_type, entry.GetBuffer(), entry.GetOffset()); | ||
| 113 | return engine.GetTextureInfo(tex_handle); | ||
| 114 | } | ||
| 115 | if constexpr (std::is_same_v<Engine, Tegra::Engines::Maxwell3D>) { | ||
| 116 | return engine.GetStageTexture(stage_type, entry.GetOffset()); | ||
| 117 | } else { | ||
| 118 | return engine.GetTexture(entry.GetOffset()); | ||
| 119 | } | ||
| 120 | } | ||
| 121 | |||
| 122 | } // Anonymous namespace | ||
| 123 | |||
| 124 | class BufferBindings final { | ||
| 125 | public: | ||
| 126 | void AddVertexBinding(const vk::Buffer* buffer, vk::DeviceSize offset) { | ||
| 127 | vertex.buffer_ptrs[vertex.num_buffers] = buffer; | ||
| 128 | vertex.offsets[vertex.num_buffers] = offset; | ||
| 129 | ++vertex.num_buffers; | ||
| 130 | } | ||
| 131 | |||
| 132 | void SetIndexBinding(const vk::Buffer* buffer, vk::DeviceSize offset, vk::IndexType type) { | ||
| 133 | index.buffer = buffer; | ||
| 134 | index.offset = offset; | ||
| 135 | index.type = type; | ||
| 136 | } | ||
| 137 | |||
| 138 | void Bind(VKScheduler& scheduler) const { | ||
| 139 | // Use this large switch case to avoid dispatching more memory in the record lambda than | ||
| 140 | // what we need. It looks horrible, but it's the best we can do on standard C++. | ||
| 141 | switch (vertex.num_buffers) { | ||
| 142 | case 0: | ||
| 143 | return BindStatic<0>(scheduler); | ||
| 144 | case 1: | ||
| 145 | return BindStatic<1>(scheduler); | ||
| 146 | case 2: | ||
| 147 | return BindStatic<2>(scheduler); | ||
| 148 | case 3: | ||
| 149 | return BindStatic<3>(scheduler); | ||
| 150 | case 4: | ||
| 151 | return BindStatic<4>(scheduler); | ||
| 152 | case 5: | ||
| 153 | return BindStatic<5>(scheduler); | ||
| 154 | case 6: | ||
| 155 | return BindStatic<6>(scheduler); | ||
| 156 | case 7: | ||
| 157 | return BindStatic<7>(scheduler); | ||
| 158 | case 8: | ||
| 159 | return BindStatic<8>(scheduler); | ||
| 160 | case 9: | ||
| 161 | return BindStatic<9>(scheduler); | ||
| 162 | case 10: | ||
| 163 | return BindStatic<10>(scheduler); | ||
| 164 | case 11: | ||
| 165 | return BindStatic<11>(scheduler); | ||
| 166 | case 12: | ||
| 167 | return BindStatic<12>(scheduler); | ||
| 168 | case 13: | ||
| 169 | return BindStatic<13>(scheduler); | ||
| 170 | case 14: | ||
| 171 | return BindStatic<14>(scheduler); | ||
| 172 | case 15: | ||
| 173 | return BindStatic<15>(scheduler); | ||
| 174 | case 16: | ||
| 175 | return BindStatic<16>(scheduler); | ||
| 176 | case 17: | ||
| 177 | return BindStatic<17>(scheduler); | ||
| 178 | case 18: | ||
| 179 | return BindStatic<18>(scheduler); | ||
| 180 | case 19: | ||
| 181 | return BindStatic<19>(scheduler); | ||
| 182 | case 20: | ||
| 183 | return BindStatic<20>(scheduler); | ||
| 184 | case 21: | ||
| 185 | return BindStatic<21>(scheduler); | ||
| 186 | case 22: | ||
| 187 | return BindStatic<22>(scheduler); | ||
| 188 | case 23: | ||
| 189 | return BindStatic<23>(scheduler); | ||
| 190 | case 24: | ||
| 191 | return BindStatic<24>(scheduler); | ||
| 192 | case 25: | ||
| 193 | return BindStatic<25>(scheduler); | ||
| 194 | case 26: | ||
| 195 | return BindStatic<26>(scheduler); | ||
| 196 | case 27: | ||
| 197 | return BindStatic<27>(scheduler); | ||
| 198 | case 28: | ||
| 199 | return BindStatic<28>(scheduler); | ||
| 200 | case 29: | ||
| 201 | return BindStatic<29>(scheduler); | ||
| 202 | case 30: | ||
| 203 | return BindStatic<30>(scheduler); | ||
| 204 | case 31: | ||
| 205 | return BindStatic<31>(scheduler); | ||
| 206 | case 32: | ||
| 207 | return BindStatic<32>(scheduler); | ||
| 208 | } | ||
| 209 | UNREACHABLE(); | ||
| 210 | } | ||
| 211 | |||
| 212 | private: | ||
| 213 | // Some of these fields are intentionally left uninitialized to avoid initializing them twice. | ||
| 214 | struct { | ||
| 215 | std::size_t num_buffers = 0; | ||
| 216 | std::array<const vk::Buffer*, Maxwell::NumVertexArrays> buffer_ptrs; | ||
| 217 | std::array<vk::DeviceSize, Maxwell::NumVertexArrays> offsets; | ||
| 218 | } vertex; | ||
| 219 | |||
| 220 | struct { | ||
| 221 | const vk::Buffer* buffer = nullptr; | ||
| 222 | vk::DeviceSize offset; | ||
| 223 | vk::IndexType type; | ||
| 224 | } index; | ||
| 225 | |||
| 226 | template <std::size_t N> | ||
| 227 | void BindStatic(VKScheduler& scheduler) const { | ||
| 228 | if (index.buffer != nullptr) { | ||
| 229 | BindStatic<N, true>(scheduler); | ||
| 230 | } else { | ||
| 231 | BindStatic<N, false>(scheduler); | ||
| 232 | } | ||
| 233 | } | ||
| 234 | |||
| 235 | template <std::size_t N, bool is_indexed> | ||
| 236 | void BindStatic(VKScheduler& scheduler) const { | ||
| 237 | static_assert(N <= Maxwell::NumVertexArrays); | ||
| 238 | if constexpr (N == 0) { | ||
| 239 | return; | ||
| 240 | } | ||
| 241 | |||
| 242 | std::array<vk::Buffer, N> buffers; | ||
| 243 | std::transform(vertex.buffer_ptrs.begin(), vertex.buffer_ptrs.begin() + N, buffers.begin(), | ||
| 244 | [](const auto ptr) { return *ptr; }); | ||
| 245 | |||
| 246 | std::array<vk::DeviceSize, N> offsets; | ||
| 247 | std::copy(vertex.offsets.begin(), vertex.offsets.begin() + N, offsets.begin()); | ||
| 248 | |||
| 249 | if constexpr (is_indexed) { | ||
| 250 | // Indexed draw | ||
| 251 | scheduler.Record([buffers, offsets, index_buffer = *index.buffer, | ||
| 252 | index_offset = index.offset, | ||
| 253 | index_type = index.type](auto cmdbuf, auto& dld) { | ||
| 254 | cmdbuf.bindIndexBuffer(index_buffer, index_offset, index_type, dld); | ||
| 255 | cmdbuf.bindVertexBuffers(0, static_cast<u32>(N), buffers.data(), offsets.data(), | ||
| 256 | dld); | ||
| 257 | }); | ||
| 258 | } else { | ||
| 259 | // Array draw | ||
| 260 | scheduler.Record([buffers, offsets](auto cmdbuf, auto& dld) { | ||
| 261 | cmdbuf.bindVertexBuffers(0, static_cast<u32>(N), buffers.data(), offsets.data(), | ||
| 262 | dld); | ||
| 263 | }); | ||
| 264 | } | ||
| 265 | } | ||
| 266 | }; | ||
| 267 | |||
| 268 | void RasterizerVulkan::DrawParameters::Draw(vk::CommandBuffer cmdbuf, | ||
| 269 | const vk::DispatchLoaderDynamic& dld) const { | ||
| 270 | if (is_indexed) { | ||
| 271 | cmdbuf.drawIndexed(num_vertices, num_instances, 0, base_vertex, base_instance, dld); | ||
| 272 | } else { | ||
| 273 | cmdbuf.draw(num_vertices, num_instances, base_vertex, base_instance, dld); | ||
| 274 | } | ||
| 275 | } | ||
| 276 | |||
| 277 | RasterizerVulkan::RasterizerVulkan(Core::System& system, Core::Frontend::EmuWindow& renderer, | ||
| 278 | VKScreenInfo& screen_info, const VKDevice& device, | ||
| 279 | VKResourceManager& resource_manager, | ||
| 280 | VKMemoryManager& memory_manager, VKScheduler& scheduler) | ||
| 281 | : RasterizerAccelerated{system.Memory()}, system{system}, render_window{renderer}, | ||
| 282 | screen_info{screen_info}, device{device}, resource_manager{resource_manager}, | ||
| 283 | memory_manager{memory_manager}, scheduler{scheduler}, | ||
| 284 | staging_pool(device, memory_manager, scheduler), descriptor_pool(device), | ||
| 285 | update_descriptor_queue(device, scheduler), | ||
| 286 | quad_array_pass(device, scheduler, descriptor_pool, staging_pool, update_descriptor_queue), | ||
| 287 | uint8_pass(device, scheduler, descriptor_pool, staging_pool, update_descriptor_queue), | ||
| 288 | texture_cache(system, *this, device, resource_manager, memory_manager, scheduler, | ||
| 289 | staging_pool), | ||
| 290 | pipeline_cache(system, *this, device, scheduler, descriptor_pool, update_descriptor_queue), | ||
| 291 | buffer_cache(*this, system, device, memory_manager, scheduler, staging_pool), | ||
| 292 | sampler_cache(device) {} | ||
| 293 | |||
| 294 | RasterizerVulkan::~RasterizerVulkan() = default; | ||
| 295 | |||
| 296 | bool RasterizerVulkan::DrawBatch(bool is_indexed) { | ||
| 297 | Draw(is_indexed, false); | ||
| 298 | return true; | ||
| 299 | } | ||
| 300 | |||
| 301 | bool RasterizerVulkan::DrawMultiBatch(bool is_indexed) { | ||
| 302 | Draw(is_indexed, true); | ||
| 303 | return true; | ||
| 304 | } | ||
| 305 | |||
| 306 | void RasterizerVulkan::Draw(bool is_indexed, bool is_instanced) { | ||
| 307 | MICROPROFILE_SCOPE(Vulkan_Drawing); | ||
| 308 | |||
| 309 | FlushWork(); | ||
| 310 | |||
| 311 | const auto& gpu = system.GPU().Maxwell3D(); | ||
| 312 | GraphicsPipelineCacheKey key{GetFixedPipelineState(gpu.regs)}; | ||
| 313 | |||
| 314 | buffer_cache.Map(CalculateGraphicsStreamBufferSize(is_indexed)); | ||
| 315 | |||
| 316 | BufferBindings buffer_bindings; | ||
| 317 | const DrawParameters draw_params = | ||
| 318 | SetupGeometry(key.fixed_state, buffer_bindings, is_indexed, is_instanced); | ||
| 319 | |||
| 320 | update_descriptor_queue.Acquire(); | ||
| 321 | sampled_views.clear(); | ||
| 322 | image_views.clear(); | ||
| 323 | |||
| 324 | const auto shaders = pipeline_cache.GetShaders(); | ||
| 325 | key.shaders = GetShaderAddresses(shaders); | ||
| 326 | SetupShaderDescriptors(shaders); | ||
| 327 | |||
| 328 | buffer_cache.Unmap(); | ||
| 329 | |||
| 330 | const auto texceptions = UpdateAttachments(); | ||
| 331 | SetupImageTransitions(texceptions, color_attachments, zeta_attachment); | ||
| 332 | |||
| 333 | key.renderpass_params = GetRenderPassParams(texceptions); | ||
| 334 | |||
| 335 | auto& pipeline = pipeline_cache.GetGraphicsPipeline(key); | ||
| 336 | scheduler.BindGraphicsPipeline(pipeline.GetHandle()); | ||
| 337 | |||
| 338 | const auto renderpass = pipeline.GetRenderPass(); | ||
| 339 | const auto [framebuffer, render_area] = ConfigureFramebuffers(renderpass); | ||
| 340 | scheduler.RequestRenderpass({renderpass, framebuffer, {{0, 0}, render_area}, 0, nullptr}); | ||
| 341 | |||
| 342 | UpdateDynamicStates(); | ||
| 343 | |||
| 344 | buffer_bindings.Bind(scheduler); | ||
| 345 | |||
| 346 | if (device.IsNvDeviceDiagnosticCheckpoints()) { | ||
| 347 | scheduler.Record( | ||
| 348 | [&pipeline](auto cmdbuf, auto& dld) { cmdbuf.setCheckpointNV(&pipeline, dld); }); | ||
| 349 | } | ||
| 350 | |||
| 351 | const auto pipeline_layout = pipeline.GetLayout(); | ||
| 352 | const auto descriptor_set = pipeline.CommitDescriptorSet(); | ||
| 353 | scheduler.Record([pipeline_layout, descriptor_set, draw_params](auto cmdbuf, auto& dld) { | ||
| 354 | if (descriptor_set) { | ||
| 355 | cmdbuf.bindDescriptorSets(vk::PipelineBindPoint::eGraphics, pipeline_layout, | ||
| 356 | DESCRIPTOR_SET, 1, &descriptor_set, 0, nullptr, dld); | ||
| 357 | } | ||
| 358 | draw_params.Draw(cmdbuf, dld); | ||
| 359 | }); | ||
| 360 | } | ||
| 361 | |||
| 362 | void RasterizerVulkan::Clear() { | ||
| 363 | MICROPROFILE_SCOPE(Vulkan_Clearing); | ||
| 364 | |||
| 365 | const auto& gpu = system.GPU().Maxwell3D(); | ||
| 366 | if (!system.GPU().Maxwell3D().ShouldExecute()) { | ||
| 367 | return; | ||
| 368 | } | ||
| 369 | |||
| 370 | const auto& regs = gpu.regs; | ||
| 371 | const bool use_color = regs.clear_buffers.R || regs.clear_buffers.G || regs.clear_buffers.B || | ||
| 372 | regs.clear_buffers.A; | ||
| 373 | const bool use_depth = regs.clear_buffers.Z; | ||
| 374 | const bool use_stencil = regs.clear_buffers.S; | ||
| 375 | if (!use_color && !use_depth && !use_stencil) { | ||
| 376 | return; | ||
| 377 | } | ||
| 378 | // Clearing images requires to be out of a renderpass | ||
| 379 | scheduler.RequestOutsideRenderPassOperationContext(); | ||
| 380 | |||
| 381 | // TODO(Rodrigo): Implement clears rendering a quad or using beginning a renderpass. | ||
| 382 | |||
| 383 | if (use_color) { | ||
| 384 | View color_view; | ||
| 385 | { | ||
| 386 | MICROPROFILE_SCOPE(Vulkan_RenderTargets); | ||
| 387 | color_view = texture_cache.GetColorBufferSurface(regs.clear_buffers.RT.Value(), false); | ||
| 388 | } | ||
| 389 | |||
| 390 | color_view->Transition(vk::ImageLayout::eTransferDstOptimal, | ||
| 391 | vk::PipelineStageFlagBits::eTransfer, | ||
| 392 | vk::AccessFlagBits::eTransferWrite); | ||
| 393 | |||
| 394 | const std::array clear_color = {regs.clear_color[0], regs.clear_color[1], | ||
| 395 | regs.clear_color[2], regs.clear_color[3]}; | ||
| 396 | const vk::ClearColorValue clear(clear_color); | ||
| 397 | scheduler.Record([image = color_view->GetImage(), | ||
| 398 | subresource = color_view->GetImageSubresourceRange(), | ||
| 399 | clear](auto cmdbuf, auto& dld) { | ||
| 400 | cmdbuf.clearColorImage(image, vk::ImageLayout::eTransferDstOptimal, clear, subresource, | ||
| 401 | dld); | ||
| 402 | }); | ||
| 403 | } | ||
| 404 | if (use_depth || use_stencil) { | ||
| 405 | View zeta_surface; | ||
| 406 | { | ||
| 407 | MICROPROFILE_SCOPE(Vulkan_RenderTargets); | ||
| 408 | zeta_surface = texture_cache.GetDepthBufferSurface(false); | ||
| 409 | } | ||
| 410 | |||
| 411 | zeta_surface->Transition(vk::ImageLayout::eTransferDstOptimal, | ||
| 412 | vk::PipelineStageFlagBits::eTransfer, | ||
| 413 | vk::AccessFlagBits::eTransferWrite); | ||
| 414 | |||
| 415 | const vk::ClearDepthStencilValue clear(regs.clear_depth, | ||
| 416 | static_cast<u32>(regs.clear_stencil)); | ||
| 417 | scheduler.Record([image = zeta_surface->GetImage(), | ||
| 418 | subresource = zeta_surface->GetImageSubresourceRange(), | ||
| 419 | clear](auto cmdbuf, auto& dld) { | ||
| 420 | cmdbuf.clearDepthStencilImage(image, vk::ImageLayout::eTransferDstOptimal, clear, | ||
| 421 | subresource, dld); | ||
| 422 | }); | ||
| 423 | } | ||
| 424 | } | ||
| 425 | |||
| 426 | void RasterizerVulkan::DispatchCompute(GPUVAddr code_addr) { | ||
| 427 | MICROPROFILE_SCOPE(Vulkan_Compute); | ||
| 428 | update_descriptor_queue.Acquire(); | ||
| 429 | sampled_views.clear(); | ||
| 430 | image_views.clear(); | ||
| 431 | |||
| 432 | const auto& launch_desc = system.GPU().KeplerCompute().launch_description; | ||
| 433 | const ComputePipelineCacheKey key{ | ||
| 434 | code_addr, | ||
| 435 | launch_desc.shared_alloc, | ||
| 436 | {launch_desc.block_dim_x, launch_desc.block_dim_y, launch_desc.block_dim_z}}; | ||
| 437 | auto& pipeline = pipeline_cache.GetComputePipeline(key); | ||
| 438 | |||
| 439 | // Compute dispatches can't be executed inside a renderpass | ||
| 440 | scheduler.RequestOutsideRenderPassOperationContext(); | ||
| 441 | |||
| 442 | buffer_cache.Map(CalculateComputeStreamBufferSize()); | ||
| 443 | |||
| 444 | const auto& entries = pipeline.GetEntries(); | ||
| 445 | SetupComputeConstBuffers(entries); | ||
| 446 | SetupComputeGlobalBuffers(entries); | ||
| 447 | SetupComputeTexelBuffers(entries); | ||
| 448 | SetupComputeTextures(entries); | ||
| 449 | SetupComputeImages(entries); | ||
| 450 | |||
| 451 | buffer_cache.Unmap(); | ||
| 452 | |||
| 453 | TransitionImages(sampled_views, vk::PipelineStageFlagBits::eComputeShader, | ||
| 454 | vk::AccessFlagBits::eShaderRead); | ||
| 455 | TransitionImages(image_views, vk::PipelineStageFlagBits::eComputeShader, | ||
| 456 | vk::AccessFlagBits::eShaderRead | vk::AccessFlagBits::eShaderWrite); | ||
| 457 | |||
| 458 | if (device.IsNvDeviceDiagnosticCheckpoints()) { | ||
| 459 | scheduler.Record( | ||
| 460 | [&pipeline](auto cmdbuf, auto& dld) { cmdbuf.setCheckpointNV(nullptr, dld); }); | ||
| 461 | } | ||
| 462 | |||
| 463 | scheduler.Record([grid_x = launch_desc.grid_dim_x, grid_y = launch_desc.grid_dim_y, | ||
| 464 | grid_z = launch_desc.grid_dim_z, pipeline_handle = pipeline.GetHandle(), | ||
| 465 | layout = pipeline.GetLayout(), | ||
| 466 | descriptor_set = pipeline.CommitDescriptorSet()](auto cmdbuf, auto& dld) { | ||
| 467 | cmdbuf.bindPipeline(vk::PipelineBindPoint::eCompute, pipeline_handle, dld); | ||
| 468 | cmdbuf.bindDescriptorSets(vk::PipelineBindPoint::eCompute, layout, DESCRIPTOR_SET, 1, | ||
| 469 | &descriptor_set, 0, nullptr, dld); | ||
| 470 | cmdbuf.dispatch(grid_x, grid_y, grid_z, dld); | ||
| 471 | }); | ||
| 472 | } | ||
| 473 | |||
| 474 | void RasterizerVulkan::FlushAll() {} | ||
| 475 | |||
| 476 | void RasterizerVulkan::FlushRegion(CacheAddr addr, u64 size) { | ||
| 477 | texture_cache.FlushRegion(addr, size); | ||
| 478 | buffer_cache.FlushRegion(addr, size); | ||
| 479 | } | ||
| 480 | |||
| 481 | void RasterizerVulkan::InvalidateRegion(CacheAddr addr, u64 size) { | ||
| 482 | texture_cache.InvalidateRegion(addr, size); | ||
| 483 | pipeline_cache.InvalidateRegion(addr, size); | ||
| 484 | buffer_cache.InvalidateRegion(addr, size); | ||
| 485 | } | ||
| 486 | |||
| 487 | void RasterizerVulkan::FlushAndInvalidateRegion(CacheAddr addr, u64 size) { | ||
| 488 | FlushRegion(addr, size); | ||
| 489 | InvalidateRegion(addr, size); | ||
| 490 | } | ||
| 491 | |||
| 492 | void RasterizerVulkan::FlushCommands() { | ||
| 493 | if (draw_counter > 0) { | ||
| 494 | draw_counter = 0; | ||
| 495 | scheduler.Flush(); | ||
| 496 | } | ||
| 497 | } | ||
| 498 | |||
| 499 | void RasterizerVulkan::TickFrame() { | ||
| 500 | draw_counter = 0; | ||
| 501 | update_descriptor_queue.TickFrame(); | ||
| 502 | buffer_cache.TickFrame(); | ||
| 503 | staging_pool.TickFrame(); | ||
| 504 | } | ||
| 505 | |||
| 506 | bool RasterizerVulkan::AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Regs::Surface& src, | ||
| 507 | const Tegra::Engines::Fermi2D::Regs::Surface& dst, | ||
| 508 | const Tegra::Engines::Fermi2D::Config& copy_config) { | ||
| 509 | texture_cache.DoFermiCopy(src, dst, copy_config); | ||
| 510 | return true; | ||
| 511 | } | ||
| 512 | |||
| 513 | bool RasterizerVulkan::AccelerateDisplay(const Tegra::FramebufferConfig& config, | ||
| 514 | VAddr framebuffer_addr, u32 pixel_stride) { | ||
| 515 | if (!framebuffer_addr) { | ||
| 516 | return false; | ||
| 517 | } | ||
| 518 | |||
| 519 | const u8* host_ptr{system.Memory().GetPointer(framebuffer_addr)}; | ||
| 520 | const auto surface{texture_cache.TryFindFramebufferSurface(host_ptr)}; | ||
| 521 | if (!surface) { | ||
| 522 | return false; | ||
| 523 | } | ||
| 524 | |||
| 525 | // Verify that the cached surface is the same size and format as the requested framebuffer | ||
| 526 | const auto& params{surface->GetSurfaceParams()}; | ||
| 527 | const auto& pixel_format{ | ||
| 528 | VideoCore::Surface::PixelFormatFromGPUPixelFormat(config.pixel_format)}; | ||
| 529 | ASSERT_MSG(params.width == config.width, "Framebuffer width is different"); | ||
| 530 | ASSERT_MSG(params.height == config.height, "Framebuffer height is different"); | ||
| 531 | |||
| 532 | screen_info.image = &surface->GetImage(); | ||
| 533 | screen_info.width = params.width; | ||
| 534 | screen_info.height = params.height; | ||
| 535 | screen_info.is_srgb = surface->GetSurfaceParams().srgb_conversion; | ||
| 536 | return true; | ||
| 537 | } | ||
| 538 | |||
| 539 | void RasterizerVulkan::FlushWork() { | ||
| 540 | static constexpr u32 DRAWS_TO_DISPATCH = 4096; | ||
| 541 | |||
| 542 | // Only check multiples of 8 draws | ||
| 543 | static_assert(DRAWS_TO_DISPATCH % 8 == 0); | ||
| 544 | if ((++draw_counter & 7) != 7) { | ||
| 545 | return; | ||
| 546 | } | ||
| 547 | |||
| 548 | if (draw_counter < DRAWS_TO_DISPATCH) { | ||
| 549 | // Send recorded tasks to the worker thread | ||
| 550 | scheduler.DispatchWork(); | ||
| 551 | return; | ||
| 552 | } | ||
| 553 | |||
| 554 | // Otherwise (every certain number of draws) flush execution. | ||
| 555 | // This submits commands to the Vulkan driver. | ||
| 556 | scheduler.Flush(); | ||
| 557 | draw_counter = 0; | ||
| 558 | } | ||
| 559 | |||
| 560 | RasterizerVulkan::Texceptions RasterizerVulkan::UpdateAttachments() { | ||
| 561 | MICROPROFILE_SCOPE(Vulkan_RenderTargets); | ||
| 562 | auto& dirty = system.GPU().Maxwell3D().dirty; | ||
| 563 | const bool update_rendertargets = dirty.render_settings; | ||
| 564 | dirty.render_settings = false; | ||
| 565 | |||
| 566 | texture_cache.GuardRenderTargets(true); | ||
| 567 | |||
| 568 | Texceptions texceptions; | ||
| 569 | for (std::size_t rt = 0; rt < Maxwell::NumRenderTargets; ++rt) { | ||
| 570 | if (update_rendertargets) { | ||
| 571 | color_attachments[rt] = texture_cache.GetColorBufferSurface(rt, true); | ||
| 572 | } | ||
| 573 | if (color_attachments[rt] && WalkAttachmentOverlaps(*color_attachments[rt])) { | ||
| 574 | texceptions.set(rt); | ||
| 575 | } | ||
| 576 | } | ||
| 577 | |||
| 578 | if (update_rendertargets) { | ||
| 579 | zeta_attachment = texture_cache.GetDepthBufferSurface(true); | ||
| 580 | } | ||
| 581 | if (zeta_attachment && WalkAttachmentOverlaps(*zeta_attachment)) { | ||
| 582 | texceptions.set(ZETA_TEXCEPTION_INDEX); | ||
| 583 | } | ||
| 584 | |||
| 585 | texture_cache.GuardRenderTargets(false); | ||
| 586 | |||
| 587 | return texceptions; | ||
| 588 | } | ||
| 589 | |||
| 590 | bool RasterizerVulkan::WalkAttachmentOverlaps(const CachedSurfaceView& attachment) { | ||
| 591 | bool overlap = false; | ||
| 592 | for (auto& [view, layout] : sampled_views) { | ||
| 593 | if (!attachment.IsSameSurface(*view)) { | ||
| 594 | continue; | ||
| 595 | } | ||
| 596 | overlap = true; | ||
| 597 | *layout = vk::ImageLayout::eGeneral; | ||
| 598 | } | ||
| 599 | return overlap; | ||
| 600 | } | ||
| 601 | |||
| 602 | std::tuple<vk::Framebuffer, vk::Extent2D> RasterizerVulkan::ConfigureFramebuffers( | ||
| 603 | vk::RenderPass renderpass) { | ||
| 604 | FramebufferCacheKey key{renderpass, std::numeric_limits<u32>::max(), | ||
| 605 | std::numeric_limits<u32>::max()}; | ||
| 606 | |||
| 607 | const auto MarkAsModifiedAndPush = [&](const View& view) { | ||
| 608 | if (view == nullptr) { | ||
| 609 | return false; | ||
| 610 | } | ||
| 611 | key.views.push_back(view->GetHandle()); | ||
| 612 | key.width = std::min(key.width, view->GetWidth()); | ||
| 613 | key.height = std::min(key.height, view->GetHeight()); | ||
| 614 | return true; | ||
| 615 | }; | ||
| 616 | |||
| 617 | for (std::size_t index = 0; index < std::size(color_attachments); ++index) { | ||
| 618 | if (MarkAsModifiedAndPush(color_attachments[index])) { | ||
| 619 | texture_cache.MarkColorBufferInUse(index); | ||
| 620 | } | ||
| 621 | } | ||
| 622 | if (MarkAsModifiedAndPush(zeta_attachment)) { | ||
| 623 | texture_cache.MarkDepthBufferInUse(); | ||
| 624 | } | ||
| 625 | |||
| 626 | const auto [fbentry, is_cache_miss] = framebuffer_cache.try_emplace(key); | ||
| 627 | auto& framebuffer = fbentry->second; | ||
| 628 | if (is_cache_miss) { | ||
| 629 | const vk::FramebufferCreateInfo framebuffer_ci({}, key.renderpass, | ||
| 630 | static_cast<u32>(key.views.size()), | ||
| 631 | key.views.data(), key.width, key.height, 1); | ||
| 632 | const auto dev = device.GetLogical(); | ||
| 633 | const auto& dld = device.GetDispatchLoader(); | ||
| 634 | framebuffer = dev.createFramebufferUnique(framebuffer_ci, nullptr, dld); | ||
| 635 | } | ||
| 636 | |||
| 637 | return {*framebuffer, vk::Extent2D{key.width, key.height}}; | ||
| 638 | } | ||
| 639 | |||
| 640 | RasterizerVulkan::DrawParameters RasterizerVulkan::SetupGeometry(FixedPipelineState& fixed_state, | ||
| 641 | BufferBindings& buffer_bindings, | ||
| 642 | bool is_indexed, | ||
| 643 | bool is_instanced) { | ||
| 644 | MICROPROFILE_SCOPE(Vulkan_Geometry); | ||
| 645 | |||
| 646 | const auto& gpu = system.GPU().Maxwell3D(); | ||
| 647 | const auto& regs = gpu.regs; | ||
| 648 | |||
| 649 | SetupVertexArrays(fixed_state.vertex_input, buffer_bindings); | ||
| 650 | |||
| 651 | const u32 base_instance = regs.vb_base_instance; | ||
| 652 | const u32 num_instances = is_instanced ? gpu.mme_draw.instance_count : 1; | ||
| 653 | const u32 base_vertex = is_indexed ? regs.vb_element_base : regs.vertex_buffer.first; | ||
| 654 | const u32 num_vertices = is_indexed ? regs.index_array.count : regs.vertex_buffer.count; | ||
| 655 | |||
| 656 | DrawParameters params{base_instance, num_instances, base_vertex, num_vertices, is_indexed}; | ||
| 657 | SetupIndexBuffer(buffer_bindings, params, is_indexed); | ||
| 658 | |||
| 659 | return params; | ||
| 660 | } | ||
| 661 | |||
| 662 | void RasterizerVulkan::SetupShaderDescriptors( | ||
| 663 | const std::array<Shader, Maxwell::MaxShaderProgram>& shaders) { | ||
| 664 | texture_cache.GuardSamplers(true); | ||
| 665 | |||
| 666 | for (std::size_t stage = 0; stage < Maxwell::MaxShaderStage; ++stage) { | ||
| 667 | // Skip VertexA stage | ||
| 668 | const auto& shader = shaders[stage + 1]; | ||
| 669 | if (!shader) { | ||
| 670 | continue; | ||
| 671 | } | ||
| 672 | const auto& entries = shader->GetEntries(); | ||
| 673 | SetupGraphicsConstBuffers(entries, stage); | ||
| 674 | SetupGraphicsGlobalBuffers(entries, stage); | ||
| 675 | SetupGraphicsTexelBuffers(entries, stage); | ||
| 676 | SetupGraphicsTextures(entries, stage); | ||
| 677 | SetupGraphicsImages(entries, stage); | ||
| 678 | } | ||
| 679 | texture_cache.GuardSamplers(false); | ||
| 680 | } | ||
| 681 | |||
| 682 | void RasterizerVulkan::SetupImageTransitions( | ||
| 683 | Texceptions texceptions, const std::array<View, Maxwell::NumRenderTargets>& color_attachments, | ||
| 684 | const View& zeta_attachment) { | ||
| 685 | TransitionImages(sampled_views, vk::PipelineStageFlagBits::eAllGraphics, | ||
| 686 | vk::AccessFlagBits::eShaderRead); | ||
| 687 | TransitionImages(image_views, vk::PipelineStageFlagBits::eAllGraphics, | ||
| 688 | vk::AccessFlagBits::eShaderRead | vk::AccessFlagBits::eShaderWrite); | ||
| 689 | |||
| 690 | for (std::size_t rt = 0; rt < std::size(color_attachments); ++rt) { | ||
| 691 | const auto color_attachment = color_attachments[rt]; | ||
| 692 | if (color_attachment == nullptr) { | ||
| 693 | continue; | ||
| 694 | } | ||
| 695 | const auto image_layout = | ||
| 696 | texceptions[rt] ? vk::ImageLayout::eGeneral : vk::ImageLayout::eColorAttachmentOptimal; | ||
| 697 | color_attachment->Transition( | ||
| 698 | image_layout, vk::PipelineStageFlagBits::eColorAttachmentOutput, | ||
| 699 | vk::AccessFlagBits::eColorAttachmentRead | vk::AccessFlagBits::eColorAttachmentWrite); | ||
| 700 | } | ||
| 701 | |||
| 702 | if (zeta_attachment != nullptr) { | ||
| 703 | const auto image_layout = texceptions[ZETA_TEXCEPTION_INDEX] | ||
| 704 | ? vk::ImageLayout::eGeneral | ||
| 705 | : vk::ImageLayout::eDepthStencilAttachmentOptimal; | ||
| 706 | zeta_attachment->Transition(image_layout, vk::PipelineStageFlagBits::eLateFragmentTests, | ||
| 707 | vk::AccessFlagBits::eDepthStencilAttachmentRead | | ||
| 708 | vk::AccessFlagBits::eDepthStencilAttachmentWrite); | ||
| 709 | } | ||
| 710 | } | ||
| 711 | |||
| 712 | void RasterizerVulkan::UpdateDynamicStates() { | ||
| 713 | auto& gpu = system.GPU().Maxwell3D(); | ||
| 714 | UpdateViewportsState(gpu); | ||
| 715 | UpdateScissorsState(gpu); | ||
| 716 | UpdateDepthBias(gpu); | ||
| 717 | UpdateBlendConstants(gpu); | ||
| 718 | UpdateDepthBounds(gpu); | ||
| 719 | UpdateStencilFaces(gpu); | ||
| 720 | } | ||
| 721 | |||
| 722 | void RasterizerVulkan::SetupVertexArrays(FixedPipelineState::VertexInput& vertex_input, | ||
| 723 | BufferBindings& buffer_bindings) { | ||
| 724 | const auto& regs = system.GPU().Maxwell3D().regs; | ||
| 725 | |||
| 726 | for (u32 index = 0; index < static_cast<u32>(Maxwell::NumVertexAttributes); ++index) { | ||
| 727 | const auto& attrib = regs.vertex_attrib_format[index]; | ||
| 728 | if (!attrib.IsValid()) { | ||
| 729 | continue; | ||
| 730 | } | ||
| 731 | |||
| 732 | const auto& buffer = regs.vertex_array[attrib.buffer]; | ||
| 733 | ASSERT(buffer.IsEnabled()); | ||
| 734 | |||
| 735 | vertex_input.attributes[vertex_input.num_attributes++] = | ||
| 736 | FixedPipelineState::VertexAttribute(index, attrib.buffer, attrib.type, attrib.size, | ||
| 737 | attrib.offset); | ||
| 738 | } | ||
| 739 | |||
| 740 | for (u32 index = 0; index < static_cast<u32>(Maxwell::NumVertexArrays); ++index) { | ||
| 741 | const auto& vertex_array = regs.vertex_array[index]; | ||
| 742 | if (!vertex_array.IsEnabled()) { | ||
| 743 | continue; | ||
| 744 | } | ||
| 745 | |||
| 746 | const GPUVAddr start{vertex_array.StartAddress()}; | ||
| 747 | const GPUVAddr end{regs.vertex_array_limit[index].LimitAddress()}; | ||
| 748 | |||
| 749 | ASSERT(end > start); | ||
| 750 | const std::size_t size{end - start + 1}; | ||
| 751 | const auto [buffer, offset] = buffer_cache.UploadMemory(start, size); | ||
| 752 | |||
| 753 | vertex_input.bindings[vertex_input.num_bindings++] = FixedPipelineState::VertexBinding( | ||
| 754 | index, vertex_array.stride, | ||
| 755 | regs.instanced_arrays.IsInstancingEnabled(index) ? vertex_array.divisor : 0); | ||
| 756 | buffer_bindings.AddVertexBinding(buffer, offset); | ||
| 757 | } | ||
| 758 | } | ||
| 759 | |||
| 760 | void RasterizerVulkan::SetupIndexBuffer(BufferBindings& buffer_bindings, DrawParameters& params, | ||
| 761 | bool is_indexed) { | ||
| 762 | const auto& regs = system.GPU().Maxwell3D().regs; | ||
| 763 | switch (regs.draw.topology) { | ||
| 764 | case Maxwell::PrimitiveTopology::Quads: | ||
| 765 | if (params.is_indexed) { | ||
| 766 | UNIMPLEMENTED(); | ||
| 767 | } else { | ||
| 768 | const auto [buffer, offset] = | ||
| 769 | quad_array_pass.Assemble(params.num_vertices, params.base_vertex); | ||
| 770 | buffer_bindings.SetIndexBinding(&buffer, offset, vk::IndexType::eUint32); | ||
| 771 | params.base_vertex = 0; | ||
| 772 | params.num_vertices = params.num_vertices * 6 / 4; | ||
| 773 | params.is_indexed = true; | ||
| 774 | } | ||
| 775 | break; | ||
| 776 | default: { | ||
| 777 | if (!is_indexed) { | ||
| 778 | break; | ||
| 779 | } | ||
| 780 | const GPUVAddr gpu_addr = regs.index_array.IndexStart(); | ||
| 781 | auto [buffer, offset] = buffer_cache.UploadMemory(gpu_addr, CalculateIndexBufferSize()); | ||
| 782 | |||
| 783 | auto format = regs.index_array.format; | ||
| 784 | const bool is_uint8 = format == Maxwell::IndexFormat::UnsignedByte; | ||
| 785 | if (is_uint8 && !device.IsExtIndexTypeUint8Supported()) { | ||
| 786 | std::tie(buffer, offset) = uint8_pass.Assemble(params.num_vertices, *buffer, offset); | ||
| 787 | format = Maxwell::IndexFormat::UnsignedShort; | ||
| 788 | } | ||
| 789 | |||
| 790 | buffer_bindings.SetIndexBinding(buffer, offset, MaxwellToVK::IndexFormat(device, format)); | ||
| 791 | break; | ||
| 792 | } | ||
| 793 | } | ||
| 794 | } | ||
| 795 | |||
| 796 | void RasterizerVulkan::SetupGraphicsConstBuffers(const ShaderEntries& entries, std::size_t stage) { | ||
| 797 | MICROPROFILE_SCOPE(Vulkan_ConstBuffers); | ||
| 798 | const auto& gpu = system.GPU().Maxwell3D(); | ||
| 799 | const auto& shader_stage = gpu.state.shader_stages[stage]; | ||
| 800 | for (const auto& entry : entries.const_buffers) { | ||
| 801 | SetupConstBuffer(entry, shader_stage.const_buffers[entry.GetIndex()]); | ||
| 802 | } | ||
| 803 | } | ||
| 804 | |||
| 805 | void RasterizerVulkan::SetupGraphicsGlobalBuffers(const ShaderEntries& entries, std::size_t stage) { | ||
| 806 | MICROPROFILE_SCOPE(Vulkan_GlobalBuffers); | ||
| 807 | auto& gpu{system.GPU()}; | ||
| 808 | const auto cbufs{gpu.Maxwell3D().state.shader_stages[stage]}; | ||
| 809 | |||
| 810 | for (const auto& entry : entries.global_buffers) { | ||
| 811 | const auto addr = cbufs.const_buffers[entry.GetCbufIndex()].address + entry.GetCbufOffset(); | ||
| 812 | SetupGlobalBuffer(entry, addr); | ||
| 813 | } | ||
| 814 | } | ||
| 815 | |||
| 816 | void RasterizerVulkan::SetupGraphicsTexelBuffers(const ShaderEntries& entries, std::size_t stage) { | ||
| 817 | MICROPROFILE_SCOPE(Vulkan_Textures); | ||
| 818 | const auto& gpu = system.GPU().Maxwell3D(); | ||
| 819 | for (const auto& entry : entries.texel_buffers) { | ||
| 820 | const auto image = GetTextureInfo(gpu, entry, stage).tic; | ||
| 821 | SetupTexelBuffer(image, entry); | ||
| 822 | } | ||
| 823 | } | ||
| 824 | |||
| 825 | void RasterizerVulkan::SetupGraphicsTextures(const ShaderEntries& entries, std::size_t stage) { | ||
| 826 | MICROPROFILE_SCOPE(Vulkan_Textures); | ||
| 827 | const auto& gpu = system.GPU().Maxwell3D(); | ||
| 828 | for (const auto& entry : entries.samplers) { | ||
| 829 | const auto texture = GetTextureInfo(gpu, entry, stage); | ||
| 830 | SetupTexture(texture, entry); | ||
| 831 | } | ||
| 832 | } | ||
| 833 | |||
| 834 | void RasterizerVulkan::SetupGraphicsImages(const ShaderEntries& entries, std::size_t stage) { | ||
| 835 | MICROPROFILE_SCOPE(Vulkan_Images); | ||
| 836 | const auto& gpu = system.GPU().KeplerCompute(); | ||
| 837 | for (const auto& entry : entries.images) { | ||
| 838 | const auto tic = GetTextureInfo(gpu, entry, stage).tic; | ||
| 839 | SetupImage(tic, entry); | ||
| 840 | } | ||
| 841 | } | ||
| 842 | |||
| 843 | void RasterizerVulkan::SetupComputeConstBuffers(const ShaderEntries& entries) { | ||
| 844 | MICROPROFILE_SCOPE(Vulkan_ConstBuffers); | ||
| 845 | const auto& launch_desc = system.GPU().KeplerCompute().launch_description; | ||
| 846 | for (const auto& entry : entries.const_buffers) { | ||
| 847 | const auto& config = launch_desc.const_buffer_config[entry.GetIndex()]; | ||
| 848 | const std::bitset<8> mask = launch_desc.const_buffer_enable_mask.Value(); | ||
| 849 | Tegra::Engines::ConstBufferInfo buffer; | ||
| 850 | buffer.address = config.Address(); | ||
| 851 | buffer.size = config.size; | ||
| 852 | buffer.enabled = mask[entry.GetIndex()]; | ||
| 853 | SetupConstBuffer(entry, buffer); | ||
| 854 | } | ||
| 855 | } | ||
| 856 | |||
| 857 | void RasterizerVulkan::SetupComputeGlobalBuffers(const ShaderEntries& entries) { | ||
| 858 | MICROPROFILE_SCOPE(Vulkan_GlobalBuffers); | ||
| 859 | const auto cbufs{system.GPU().KeplerCompute().launch_description.const_buffer_config}; | ||
| 860 | for (const auto& entry : entries.global_buffers) { | ||
| 861 | const auto addr{cbufs[entry.GetCbufIndex()].Address() + entry.GetCbufOffset()}; | ||
| 862 | SetupGlobalBuffer(entry, addr); | ||
| 863 | } | ||
| 864 | } | ||
| 865 | |||
| 866 | void RasterizerVulkan::SetupComputeTexelBuffers(const ShaderEntries& entries) { | ||
| 867 | MICROPROFILE_SCOPE(Vulkan_Textures); | ||
| 868 | const auto& gpu = system.GPU().KeplerCompute(); | ||
| 869 | for (const auto& entry : entries.texel_buffers) { | ||
| 870 | const auto image = GetTextureInfo(gpu, entry, ComputeShaderIndex).tic; | ||
| 871 | SetupTexelBuffer(image, entry); | ||
| 872 | } | ||
| 873 | } | ||
| 874 | |||
| 875 | void RasterizerVulkan::SetupComputeTextures(const ShaderEntries& entries) { | ||
| 876 | MICROPROFILE_SCOPE(Vulkan_Textures); | ||
| 877 | const auto& gpu = system.GPU().KeplerCompute(); | ||
| 878 | for (const auto& entry : entries.samplers) { | ||
| 879 | const auto texture = GetTextureInfo(gpu, entry, ComputeShaderIndex); | ||
| 880 | SetupTexture(texture, entry); | ||
| 881 | } | ||
| 882 | } | ||
| 883 | |||
| 884 | void RasterizerVulkan::SetupComputeImages(const ShaderEntries& entries) { | ||
| 885 | MICROPROFILE_SCOPE(Vulkan_Images); | ||
| 886 | const auto& gpu = system.GPU().KeplerCompute(); | ||
| 887 | for (const auto& entry : entries.images) { | ||
| 888 | const auto tic = GetTextureInfo(gpu, entry, ComputeShaderIndex).tic; | ||
| 889 | SetupImage(tic, entry); | ||
| 890 | } | ||
| 891 | } | ||
| 892 | |||
| 893 | void RasterizerVulkan::SetupConstBuffer(const ConstBufferEntry& entry, | ||
| 894 | const Tegra::Engines::ConstBufferInfo& buffer) { | ||
| 895 | // Align the size to avoid bad std140 interactions | ||
| 896 | const std::size_t size = | ||
| 897 | Common::AlignUp(CalculateConstBufferSize(entry, buffer), 4 * sizeof(float)); | ||
| 898 | ASSERT(size <= MaxConstbufferSize); | ||
| 899 | |||
| 900 | const auto [buffer_handle, offset] = | ||
| 901 | buffer_cache.UploadMemory(buffer.address, size, device.GetUniformBufferAlignment()); | ||
| 902 | |||
| 903 | update_descriptor_queue.AddBuffer(buffer_handle, offset, size); | ||
| 904 | } | ||
| 905 | |||
| 906 | void RasterizerVulkan::SetupGlobalBuffer(const GlobalBufferEntry& entry, GPUVAddr address) { | ||
| 907 | auto& memory_manager{system.GPU().MemoryManager()}; | ||
| 908 | const auto actual_addr = memory_manager.Read<u64>(address); | ||
| 909 | const auto size = memory_manager.Read<u32>(address + 8); | ||
| 910 | |||
| 911 | if (size == 0) { | ||
| 912 | // Sometimes global memory pointers don't have a proper size. Upload a dummy entry because | ||
| 913 | // Vulkan doesn't like empty buffers. | ||
| 914 | constexpr std::size_t dummy_size = 4; | ||
| 915 | const auto buffer = buffer_cache.GetEmptyBuffer(dummy_size); | ||
| 916 | update_descriptor_queue.AddBuffer(buffer, 0, dummy_size); | ||
| 917 | return; | ||
| 918 | } | ||
| 919 | |||
| 920 | const auto [buffer, offset] = buffer_cache.UploadMemory( | ||
| 921 | actual_addr, size, device.GetStorageBufferAlignment(), entry.IsWritten()); | ||
| 922 | update_descriptor_queue.AddBuffer(buffer, offset, size); | ||
| 923 | } | ||
| 924 | |||
| 925 | void RasterizerVulkan::SetupTexelBuffer(const Tegra::Texture::TICEntry& tic, | ||
| 926 | const TexelBufferEntry& entry) { | ||
| 927 | const auto view = texture_cache.GetTextureSurface(tic, entry); | ||
| 928 | ASSERT(view->IsBufferView()); | ||
| 929 | |||
| 930 | update_descriptor_queue.AddTexelBuffer(view->GetBufferView()); | ||
| 931 | } | ||
| 932 | |||
| 933 | void RasterizerVulkan::SetupTexture(const Tegra::Texture::FullTextureInfo& texture, | ||
| 934 | const SamplerEntry& entry) { | ||
| 935 | auto view = texture_cache.GetTextureSurface(texture.tic, entry); | ||
| 936 | ASSERT(!view->IsBufferView()); | ||
| 937 | |||
| 938 | const auto image_view = view->GetHandle(texture.tic.x_source, texture.tic.y_source, | ||
| 939 | texture.tic.z_source, texture.tic.w_source); | ||
| 940 | const auto sampler = sampler_cache.GetSampler(texture.tsc); | ||
| 941 | update_descriptor_queue.AddSampledImage(sampler, image_view); | ||
| 942 | |||
| 943 | const auto image_layout = update_descriptor_queue.GetLastImageLayout(); | ||
| 944 | *image_layout = vk::ImageLayout::eShaderReadOnlyOptimal; | ||
| 945 | sampled_views.push_back(ImageView{std::move(view), image_layout}); | ||
| 946 | } | ||
| 947 | |||
| 948 | void RasterizerVulkan::SetupImage(const Tegra::Texture::TICEntry& tic, const ImageEntry& entry) { | ||
| 949 | auto view = texture_cache.GetImageSurface(tic, entry); | ||
| 950 | |||
| 951 | if (entry.IsWritten()) { | ||
| 952 | view->MarkAsModified(texture_cache.Tick()); | ||
| 953 | } | ||
| 954 | |||
| 955 | UNIMPLEMENTED_IF(tic.IsBuffer()); | ||
| 956 | |||
| 957 | const auto image_view = view->GetHandle(tic.x_source, tic.y_source, tic.z_source, tic.w_source); | ||
| 958 | update_descriptor_queue.AddImage(image_view); | ||
| 959 | |||
| 960 | const auto image_layout = update_descriptor_queue.GetLastImageLayout(); | ||
| 961 | *image_layout = vk::ImageLayout::eGeneral; | ||
| 962 | image_views.push_back(ImageView{std::move(view), image_layout}); | ||
| 963 | } | ||
| 964 | |||
| 965 | void RasterizerVulkan::UpdateViewportsState(Tegra::Engines::Maxwell3D& gpu) { | ||
| 966 | if (!gpu.dirty.viewport_transform && scheduler.TouchViewports()) { | ||
| 967 | return; | ||
| 968 | } | ||
| 969 | gpu.dirty.viewport_transform = false; | ||
| 970 | const auto& regs = gpu.regs; | ||
| 971 | const std::array viewports{ | ||
| 972 | GetViewportState(device, regs, 0), GetViewportState(device, regs, 1), | ||
| 973 | GetViewportState(device, regs, 2), GetViewportState(device, regs, 3), | ||
| 974 | GetViewportState(device, regs, 4), GetViewportState(device, regs, 5), | ||
| 975 | GetViewportState(device, regs, 6), GetViewportState(device, regs, 7), | ||
| 976 | GetViewportState(device, regs, 8), GetViewportState(device, regs, 9), | ||
| 977 | GetViewportState(device, regs, 10), GetViewportState(device, regs, 11), | ||
| 978 | GetViewportState(device, regs, 12), GetViewportState(device, regs, 13), | ||
| 979 | GetViewportState(device, regs, 14), GetViewportState(device, regs, 15)}; | ||
| 980 | scheduler.Record([viewports](auto cmdbuf, auto& dld) { | ||
| 981 | cmdbuf.setViewport(0, static_cast<u32>(viewports.size()), viewports.data(), dld); | ||
| 982 | }); | ||
| 983 | } | ||
| 984 | |||
| 985 | void RasterizerVulkan::UpdateScissorsState(Tegra::Engines::Maxwell3D& gpu) { | ||
| 986 | if (!gpu.dirty.scissor_test && scheduler.TouchScissors()) { | ||
| 987 | return; | ||
| 988 | } | ||
| 989 | gpu.dirty.scissor_test = false; | ||
| 990 | const auto& regs = gpu.regs; | ||
| 991 | const std::array scissors = { | ||
| 992 | GetScissorState(regs, 0), GetScissorState(regs, 1), GetScissorState(regs, 2), | ||
| 993 | GetScissorState(regs, 3), GetScissorState(regs, 4), GetScissorState(regs, 5), | ||
| 994 | GetScissorState(regs, 6), GetScissorState(regs, 7), GetScissorState(regs, 8), | ||
| 995 | GetScissorState(regs, 9), GetScissorState(regs, 10), GetScissorState(regs, 11), | ||
| 996 | GetScissorState(regs, 12), GetScissorState(regs, 13), GetScissorState(regs, 14), | ||
| 997 | GetScissorState(regs, 15)}; | ||
| 998 | scheduler.Record([scissors](auto cmdbuf, auto& dld) { | ||
| 999 | cmdbuf.setScissor(0, static_cast<u32>(scissors.size()), scissors.data(), dld); | ||
| 1000 | }); | ||
| 1001 | } | ||
| 1002 | |||
| 1003 | void RasterizerVulkan::UpdateDepthBias(Tegra::Engines::Maxwell3D& gpu) { | ||
| 1004 | if (!gpu.dirty.polygon_offset && scheduler.TouchDepthBias()) { | ||
| 1005 | return; | ||
| 1006 | } | ||
| 1007 | gpu.dirty.polygon_offset = false; | ||
| 1008 | const auto& regs = gpu.regs; | ||
| 1009 | scheduler.Record([constant = regs.polygon_offset_units, clamp = regs.polygon_offset_clamp, | ||
| 1010 | factor = regs.polygon_offset_factor](auto cmdbuf, auto& dld) { | ||
| 1011 | cmdbuf.setDepthBias(constant, clamp, factor / 2.0f, dld); | ||
| 1012 | }); | ||
| 1013 | } | ||
| 1014 | |||
| 1015 | void RasterizerVulkan::UpdateBlendConstants(Tegra::Engines::Maxwell3D& gpu) { | ||
| 1016 | if (!gpu.dirty.blend_state && scheduler.TouchBlendConstants()) { | ||
| 1017 | return; | ||
| 1018 | } | ||
| 1019 | gpu.dirty.blend_state = false; | ||
| 1020 | const std::array blend_color = {gpu.regs.blend_color.r, gpu.regs.blend_color.g, | ||
| 1021 | gpu.regs.blend_color.b, gpu.regs.blend_color.a}; | ||
| 1022 | scheduler.Record([blend_color](auto cmdbuf, auto& dld) { | ||
| 1023 | cmdbuf.setBlendConstants(blend_color.data(), dld); | ||
| 1024 | }); | ||
| 1025 | } | ||
| 1026 | |||
| 1027 | void RasterizerVulkan::UpdateDepthBounds(Tegra::Engines::Maxwell3D& gpu) { | ||
| 1028 | if (!gpu.dirty.depth_bounds_values && scheduler.TouchDepthBounds()) { | ||
| 1029 | return; | ||
| 1030 | } | ||
| 1031 | gpu.dirty.depth_bounds_values = false; | ||
| 1032 | const auto& regs = gpu.regs; | ||
| 1033 | scheduler.Record([min = regs.depth_bounds[0], max = regs.depth_bounds[1]]( | ||
| 1034 | auto cmdbuf, auto& dld) { cmdbuf.setDepthBounds(min, max, dld); }); | ||
| 1035 | } | ||
| 1036 | |||
| 1037 | void RasterizerVulkan::UpdateStencilFaces(Tegra::Engines::Maxwell3D& gpu) { | ||
| 1038 | if (!gpu.dirty.stencil_test && scheduler.TouchStencilValues()) { | ||
| 1039 | return; | ||
| 1040 | } | ||
| 1041 | gpu.dirty.stencil_test = false; | ||
| 1042 | const auto& regs = gpu.regs; | ||
| 1043 | if (regs.stencil_two_side_enable) { | ||
| 1044 | // Separate values per face | ||
| 1045 | scheduler.Record( | ||
| 1046 | [front_ref = regs.stencil_front_func_ref, front_write_mask = regs.stencil_front_mask, | ||
| 1047 | front_test_mask = regs.stencil_front_func_mask, back_ref = regs.stencil_back_func_ref, | ||
| 1048 | back_write_mask = regs.stencil_back_mask, | ||
| 1049 | back_test_mask = regs.stencil_back_func_mask](auto cmdbuf, auto& dld) { | ||
| 1050 | // Front face | ||
| 1051 | cmdbuf.setStencilReference(vk::StencilFaceFlagBits::eFront, front_ref, dld); | ||
| 1052 | cmdbuf.setStencilWriteMask(vk::StencilFaceFlagBits::eFront, front_write_mask, dld); | ||
| 1053 | cmdbuf.setStencilCompareMask(vk::StencilFaceFlagBits::eFront, front_test_mask, dld); | ||
| 1054 | |||
| 1055 | // Back face | ||
| 1056 | cmdbuf.setStencilReference(vk::StencilFaceFlagBits::eBack, back_ref, dld); | ||
| 1057 | cmdbuf.setStencilWriteMask(vk::StencilFaceFlagBits::eBack, back_write_mask, dld); | ||
| 1058 | cmdbuf.setStencilCompareMask(vk::StencilFaceFlagBits::eBack, back_test_mask, dld); | ||
| 1059 | }); | ||
| 1060 | } else { | ||
| 1061 | // Front face defines both faces | ||
| 1062 | scheduler.Record([ref = regs.stencil_back_func_ref, write_mask = regs.stencil_back_mask, | ||
| 1063 | test_mask = regs.stencil_back_func_mask](auto cmdbuf, auto& dld) { | ||
| 1064 | cmdbuf.setStencilReference(vk::StencilFaceFlagBits::eFrontAndBack, ref, dld); | ||
| 1065 | cmdbuf.setStencilWriteMask(vk::StencilFaceFlagBits::eFrontAndBack, write_mask, dld); | ||
| 1066 | cmdbuf.setStencilCompareMask(vk::StencilFaceFlagBits::eFrontAndBack, test_mask, dld); | ||
| 1067 | }); | ||
| 1068 | } | ||
| 1069 | } | ||
| 1070 | |||
| 1071 | std::size_t RasterizerVulkan::CalculateGraphicsStreamBufferSize(bool is_indexed) const { | ||
| 1072 | std::size_t size = CalculateVertexArraysSize(); | ||
| 1073 | if (is_indexed) { | ||
| 1074 | size = Common::AlignUp(size, 4) + CalculateIndexBufferSize(); | ||
| 1075 | } | ||
| 1076 | size += Maxwell::MaxConstBuffers * (MaxConstbufferSize + device.GetUniformBufferAlignment()); | ||
| 1077 | return size; | ||
| 1078 | } | ||
| 1079 | |||
| 1080 | std::size_t RasterizerVulkan::CalculateComputeStreamBufferSize() const { | ||
| 1081 | return Tegra::Engines::KeplerCompute::NumConstBuffers * | ||
| 1082 | (Maxwell::MaxConstBufferSize + device.GetUniformBufferAlignment()); | ||
| 1083 | } | ||
| 1084 | |||
| 1085 | std::size_t RasterizerVulkan::CalculateVertexArraysSize() const { | ||
| 1086 | const auto& regs = system.GPU().Maxwell3D().regs; | ||
| 1087 | |||
| 1088 | std::size_t size = 0; | ||
| 1089 | for (u32 index = 0; index < Maxwell::NumVertexArrays; ++index) { | ||
| 1090 | // This implementation assumes that all attributes are used in the shader. | ||
| 1091 | const GPUVAddr start{regs.vertex_array[index].StartAddress()}; | ||
| 1092 | const GPUVAddr end{regs.vertex_array_limit[index].LimitAddress()}; | ||
| 1093 | DEBUG_ASSERT(end > start); | ||
| 1094 | |||
| 1095 | size += (end - start + 1) * regs.vertex_array[index].enable; | ||
| 1096 | } | ||
| 1097 | return size; | ||
| 1098 | } | ||
| 1099 | |||
| 1100 | std::size_t RasterizerVulkan::CalculateIndexBufferSize() const { | ||
| 1101 | const auto& regs = system.GPU().Maxwell3D().regs; | ||
| 1102 | return static_cast<std::size_t>(regs.index_array.count) * | ||
| 1103 | static_cast<std::size_t>(regs.index_array.FormatSizeInBytes()); | ||
| 1104 | } | ||
| 1105 | |||
| 1106 | std::size_t RasterizerVulkan::CalculateConstBufferSize( | ||
| 1107 | const ConstBufferEntry& entry, const Tegra::Engines::ConstBufferInfo& buffer) const { | ||
| 1108 | if (entry.IsIndirect()) { | ||
| 1109 | // Buffer is accessed indirectly, so upload the entire thing | ||
| 1110 | return buffer.size; | ||
| 1111 | } else { | ||
| 1112 | // Buffer is accessed directly, upload just what we use | ||
| 1113 | return entry.GetSize(); | ||
| 1114 | } | ||
| 1115 | } | ||
| 1116 | |||
| 1117 | RenderPassParams RasterizerVulkan::GetRenderPassParams(Texceptions texceptions) const { | ||
| 1118 | using namespace VideoCore::Surface; | ||
| 1119 | |||
| 1120 | const auto& regs = system.GPU().Maxwell3D().regs; | ||
| 1121 | RenderPassParams renderpass_params; | ||
| 1122 | |||
| 1123 | for (std::size_t rt = 0; rt < static_cast<std::size_t>(regs.rt_control.count); ++rt) { | ||
| 1124 | const auto& rendertarget = regs.rt[rt]; | ||
| 1125 | if (rendertarget.Address() == 0 || rendertarget.format == Tegra::RenderTargetFormat::NONE) | ||
| 1126 | continue; | ||
| 1127 | renderpass_params.color_attachments.push_back(RenderPassParams::ColorAttachment{ | ||
| 1128 | static_cast<u32>(rt), PixelFormatFromRenderTargetFormat(rendertarget.format), | ||
| 1129 | texceptions.test(rt)}); | ||
| 1130 | } | ||
| 1131 | |||
| 1132 | renderpass_params.has_zeta = regs.zeta_enable; | ||
| 1133 | if (renderpass_params.has_zeta) { | ||
| 1134 | renderpass_params.zeta_pixel_format = PixelFormatFromDepthFormat(regs.zeta.format); | ||
| 1135 | renderpass_params.zeta_texception = texceptions[ZETA_TEXCEPTION_INDEX]; | ||
| 1136 | } | ||
| 1137 | |||
| 1138 | return renderpass_params; | ||
| 1139 | } | ||
| 1140 | |||
| 1141 | } // namespace Vulkan | ||
diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.h b/src/video_core/renderer_vulkan/vk_rasterizer.h new file mode 100644 index 000000000..7be71e734 --- /dev/null +++ b/src/video_core/renderer_vulkan/vk_rasterizer.h | |||
| @@ -0,0 +1,263 @@ | |||
| 1 | // Copyright 2019 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include <array> | ||
| 8 | #include <bitset> | ||
| 9 | #include <memory> | ||
| 10 | #include <utility> | ||
| 11 | #include <vector> | ||
| 12 | |||
| 13 | #include <boost/container/static_vector.hpp> | ||
| 14 | #include <boost/functional/hash.hpp> | ||
| 15 | |||
| 16 | #include "common/common_types.h" | ||
| 17 | #include "video_core/memory_manager.h" | ||
| 18 | #include "video_core/rasterizer_accelerated.h" | ||
| 19 | #include "video_core/rasterizer_interface.h" | ||
| 20 | #include "video_core/renderer_vulkan/declarations.h" | ||
| 21 | #include "video_core/renderer_vulkan/fixed_pipeline_state.h" | ||
| 22 | #include "video_core/renderer_vulkan/vk_buffer_cache.h" | ||
| 23 | #include "video_core/renderer_vulkan/vk_compute_pass.h" | ||
| 24 | #include "video_core/renderer_vulkan/vk_descriptor_pool.h" | ||
| 25 | #include "video_core/renderer_vulkan/vk_memory_manager.h" | ||
| 26 | #include "video_core/renderer_vulkan/vk_pipeline_cache.h" | ||
| 27 | #include "video_core/renderer_vulkan/vk_renderpass_cache.h" | ||
| 28 | #include "video_core/renderer_vulkan/vk_resource_manager.h" | ||
| 29 | #include "video_core/renderer_vulkan/vk_sampler_cache.h" | ||
| 30 | #include "video_core/renderer_vulkan/vk_scheduler.h" | ||
| 31 | #include "video_core/renderer_vulkan/vk_staging_buffer_pool.h" | ||
| 32 | #include "video_core/renderer_vulkan/vk_texture_cache.h" | ||
| 33 | #include "video_core/renderer_vulkan/vk_update_descriptor.h" | ||
| 34 | |||
| 35 | namespace Core { | ||
| 36 | class System; | ||
| 37 | } | ||
| 38 | |||
| 39 | namespace Core::Frontend { | ||
| 40 | class EmuWindow; | ||
| 41 | } | ||
| 42 | |||
| 43 | namespace Tegra::Engines { | ||
| 44 | class Maxwell3D; | ||
| 45 | } | ||
| 46 | |||
| 47 | namespace Vulkan { | ||
| 48 | |||
| 49 | struct VKScreenInfo; | ||
| 50 | |||
| 51 | using ImageViewsPack = | ||
| 52 | boost::container::static_vector<vk::ImageView, Maxwell::NumRenderTargets + 1>; | ||
| 53 | |||
| 54 | struct FramebufferCacheKey { | ||
| 55 | vk::RenderPass renderpass{}; | ||
| 56 | u32 width = 0; | ||
| 57 | u32 height = 0; | ||
| 58 | ImageViewsPack views; | ||
| 59 | |||
| 60 | std::size_t Hash() const noexcept { | ||
| 61 | std::size_t hash = 0; | ||
| 62 | boost::hash_combine(hash, static_cast<VkRenderPass>(renderpass)); | ||
| 63 | for (const auto& view : views) { | ||
| 64 | boost::hash_combine(hash, static_cast<VkImageView>(view)); | ||
| 65 | } | ||
| 66 | boost::hash_combine(hash, width); | ||
| 67 | boost::hash_combine(hash, height); | ||
| 68 | return hash; | ||
| 69 | } | ||
| 70 | |||
| 71 | bool operator==(const FramebufferCacheKey& rhs) const noexcept { | ||
| 72 | return std::tie(renderpass, views, width, height) == | ||
| 73 | std::tie(rhs.renderpass, rhs.views, rhs.width, rhs.height); | ||
| 74 | } | ||
| 75 | }; | ||
| 76 | |||
| 77 | } // namespace Vulkan | ||
| 78 | |||
| 79 | namespace std { | ||
| 80 | |||
| 81 | template <> | ||
| 82 | struct hash<Vulkan::FramebufferCacheKey> { | ||
| 83 | std::size_t operator()(const Vulkan::FramebufferCacheKey& k) const noexcept { | ||
| 84 | return k.Hash(); | ||
| 85 | } | ||
| 86 | }; | ||
| 87 | |||
| 88 | } // namespace std | ||
| 89 | |||
| 90 | namespace Vulkan { | ||
| 91 | |||
| 92 | class BufferBindings; | ||
| 93 | |||
| 94 | struct ImageView { | ||
| 95 | View view; | ||
| 96 | vk::ImageLayout* layout = nullptr; | ||
| 97 | }; | ||
| 98 | |||
| 99 | class RasterizerVulkan : public VideoCore::RasterizerAccelerated { | ||
| 100 | public: | ||
| 101 | explicit RasterizerVulkan(Core::System& system, Core::Frontend::EmuWindow& render_window, | ||
| 102 | VKScreenInfo& screen_info, const VKDevice& device, | ||
| 103 | VKResourceManager& resource_manager, VKMemoryManager& memory_manager, | ||
| 104 | VKScheduler& scheduler); | ||
| 105 | ~RasterizerVulkan() override; | ||
| 106 | |||
| 107 | bool DrawBatch(bool is_indexed) override; | ||
| 108 | bool DrawMultiBatch(bool is_indexed) override; | ||
| 109 | void Clear() override; | ||
| 110 | void DispatchCompute(GPUVAddr code_addr) override; | ||
| 111 | void FlushAll() override; | ||
| 112 | void FlushRegion(CacheAddr addr, u64 size) override; | ||
| 113 | void InvalidateRegion(CacheAddr addr, u64 size) override; | ||
| 114 | void FlushAndInvalidateRegion(CacheAddr addr, u64 size) override; | ||
| 115 | void FlushCommands() override; | ||
| 116 | void TickFrame() override; | ||
| 117 | bool AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Regs::Surface& src, | ||
| 118 | const Tegra::Engines::Fermi2D::Regs::Surface& dst, | ||
| 119 | const Tegra::Engines::Fermi2D::Config& copy_config) override; | ||
| 120 | bool AccelerateDisplay(const Tegra::FramebufferConfig& config, VAddr framebuffer_addr, | ||
| 121 | u32 pixel_stride) override; | ||
| 122 | |||
| 123 | /// Maximum supported size that a constbuffer can have in bytes. | ||
| 124 | static constexpr std::size_t MaxConstbufferSize = 0x10000; | ||
| 125 | static_assert(MaxConstbufferSize % (4 * sizeof(float)) == 0, | ||
| 126 | "The maximum size of a constbuffer must be a multiple of the size of GLvec4"); | ||
| 127 | |||
| 128 | private: | ||
| 129 | struct DrawParameters { | ||
| 130 | void Draw(vk::CommandBuffer cmdbuf, const vk::DispatchLoaderDynamic& dld) const; | ||
| 131 | |||
| 132 | u32 base_instance = 0; | ||
| 133 | u32 num_instances = 0; | ||
| 134 | u32 base_vertex = 0; | ||
| 135 | u32 num_vertices = 0; | ||
| 136 | bool is_indexed = 0; | ||
| 137 | }; | ||
| 138 | |||
| 139 | using Texceptions = std::bitset<Maxwell::NumRenderTargets + 1>; | ||
| 140 | |||
| 141 | static constexpr std::size_t ZETA_TEXCEPTION_INDEX = 8; | ||
| 142 | |||
| 143 | void Draw(bool is_indexed, bool is_instanced); | ||
| 144 | |||
| 145 | void FlushWork(); | ||
| 146 | |||
| 147 | Texceptions UpdateAttachments(); | ||
| 148 | |||
| 149 | std::tuple<vk::Framebuffer, vk::Extent2D> ConfigureFramebuffers(vk::RenderPass renderpass); | ||
| 150 | |||
| 151 | /// Setups geometry buffers and state. | ||
| 152 | DrawParameters SetupGeometry(FixedPipelineState& fixed_state, BufferBindings& buffer_bindings, | ||
| 153 | bool is_indexed, bool is_instanced); | ||
| 154 | |||
| 155 | /// Setup descriptors in the graphics pipeline. | ||
| 156 | void SetupShaderDescriptors(const std::array<Shader, Maxwell::MaxShaderProgram>& shaders); | ||
| 157 | |||
| 158 | void SetupImageTransitions(Texceptions texceptions, | ||
| 159 | const std::array<View, Maxwell::NumRenderTargets>& color_attachments, | ||
| 160 | const View& zeta_attachment); | ||
| 161 | |||
| 162 | void UpdateDynamicStates(); | ||
| 163 | |||
| 164 | bool WalkAttachmentOverlaps(const CachedSurfaceView& attachment); | ||
| 165 | |||
| 166 | void SetupVertexArrays(FixedPipelineState::VertexInput& vertex_input, | ||
| 167 | BufferBindings& buffer_bindings); | ||
| 168 | |||
| 169 | void SetupIndexBuffer(BufferBindings& buffer_bindings, DrawParameters& params, bool is_indexed); | ||
| 170 | |||
| 171 | /// Setup constant buffers in the graphics pipeline. | ||
| 172 | void SetupGraphicsConstBuffers(const ShaderEntries& entries, std::size_t stage); | ||
| 173 | |||
| 174 | /// Setup global buffers in the graphics pipeline. | ||
| 175 | void SetupGraphicsGlobalBuffers(const ShaderEntries& entries, std::size_t stage); | ||
| 176 | |||
| 177 | /// Setup texel buffers in the graphics pipeline. | ||
| 178 | void SetupGraphicsTexelBuffers(const ShaderEntries& entries, std::size_t stage); | ||
| 179 | |||
| 180 | /// Setup textures in the graphics pipeline. | ||
| 181 | void SetupGraphicsTextures(const ShaderEntries& entries, std::size_t stage); | ||
| 182 | |||
| 183 | /// Setup images in the graphics pipeline. | ||
| 184 | void SetupGraphicsImages(const ShaderEntries& entries, std::size_t stage); | ||
| 185 | |||
| 186 | /// Setup constant buffers in the compute pipeline. | ||
| 187 | void SetupComputeConstBuffers(const ShaderEntries& entries); | ||
| 188 | |||
| 189 | /// Setup global buffers in the compute pipeline. | ||
| 190 | void SetupComputeGlobalBuffers(const ShaderEntries& entries); | ||
| 191 | |||
| 192 | /// Setup texel buffers in the compute pipeline. | ||
| 193 | void SetupComputeTexelBuffers(const ShaderEntries& entries); | ||
| 194 | |||
| 195 | /// Setup textures in the compute pipeline. | ||
| 196 | void SetupComputeTextures(const ShaderEntries& entries); | ||
| 197 | |||
| 198 | /// Setup images in the compute pipeline. | ||
| 199 | void SetupComputeImages(const ShaderEntries& entries); | ||
| 200 | |||
| 201 | void SetupConstBuffer(const ConstBufferEntry& entry, | ||
| 202 | const Tegra::Engines::ConstBufferInfo& buffer); | ||
| 203 | |||
| 204 | void SetupGlobalBuffer(const GlobalBufferEntry& entry, GPUVAddr address); | ||
| 205 | |||
| 206 | void SetupTexelBuffer(const Tegra::Texture::TICEntry& image, const TexelBufferEntry& entry); | ||
| 207 | |||
| 208 | void SetupTexture(const Tegra::Texture::FullTextureInfo& texture, const SamplerEntry& entry); | ||
| 209 | |||
| 210 | void SetupImage(const Tegra::Texture::TICEntry& tic, const ImageEntry& entry); | ||
| 211 | |||
| 212 | void UpdateViewportsState(Tegra::Engines::Maxwell3D& gpu); | ||
| 213 | void UpdateScissorsState(Tegra::Engines::Maxwell3D& gpu); | ||
| 214 | void UpdateDepthBias(Tegra::Engines::Maxwell3D& gpu); | ||
| 215 | void UpdateBlendConstants(Tegra::Engines::Maxwell3D& gpu); | ||
| 216 | void UpdateDepthBounds(Tegra::Engines::Maxwell3D& gpu); | ||
| 217 | void UpdateStencilFaces(Tegra::Engines::Maxwell3D& gpu); | ||
| 218 | |||
| 219 | std::size_t CalculateGraphicsStreamBufferSize(bool is_indexed) const; | ||
| 220 | |||
| 221 | std::size_t CalculateComputeStreamBufferSize() const; | ||
| 222 | |||
| 223 | std::size_t CalculateVertexArraysSize() const; | ||
| 224 | |||
| 225 | std::size_t CalculateIndexBufferSize() const; | ||
| 226 | |||
| 227 | std::size_t CalculateConstBufferSize(const ConstBufferEntry& entry, | ||
| 228 | const Tegra::Engines::ConstBufferInfo& buffer) const; | ||
| 229 | |||
| 230 | RenderPassParams GetRenderPassParams(Texceptions texceptions) const; | ||
| 231 | |||
| 232 | Core::System& system; | ||
| 233 | Core::Frontend::EmuWindow& render_window; | ||
| 234 | VKScreenInfo& screen_info; | ||
| 235 | const VKDevice& device; | ||
| 236 | VKResourceManager& resource_manager; | ||
| 237 | VKMemoryManager& memory_manager; | ||
| 238 | VKScheduler& scheduler; | ||
| 239 | |||
| 240 | VKStagingBufferPool staging_pool; | ||
| 241 | VKDescriptorPool descriptor_pool; | ||
| 242 | VKUpdateDescriptorQueue update_descriptor_queue; | ||
| 243 | QuadArrayPass quad_array_pass; | ||
| 244 | Uint8Pass uint8_pass; | ||
| 245 | |||
| 246 | VKTextureCache texture_cache; | ||
| 247 | VKPipelineCache pipeline_cache; | ||
| 248 | VKBufferCache buffer_cache; | ||
| 249 | VKSamplerCache sampler_cache; | ||
| 250 | |||
| 251 | std::array<View, Maxwell::NumRenderTargets> color_attachments; | ||
| 252 | View zeta_attachment; | ||
| 253 | |||
| 254 | std::vector<ImageView> sampled_views; | ||
| 255 | std::vector<ImageView> image_views; | ||
| 256 | |||
| 257 | u32 draw_counter = 0; | ||
| 258 | |||
| 259 | // TODO(Rodrigo): Invalidate on image destruction | ||
| 260 | std::unordered_map<FramebufferCacheKey, UniqueFramebuffer> framebuffer_cache; | ||
| 261 | }; | ||
| 262 | |||
| 263 | } // namespace Vulkan | ||
diff --git a/src/video_core/renderer_vulkan/vk_renderpass_cache.cpp b/src/video_core/renderer_vulkan/vk_renderpass_cache.cpp new file mode 100644 index 000000000..93f5d7ba0 --- /dev/null +++ b/src/video_core/renderer_vulkan/vk_renderpass_cache.cpp | |||
| @@ -0,0 +1,100 @@ | |||
| 1 | // Copyright 2019 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include <memory> | ||
| 6 | #include <vector> | ||
| 7 | |||
| 8 | #include "video_core/engines/maxwell_3d.h" | ||
| 9 | #include "video_core/renderer_vulkan/declarations.h" | ||
| 10 | #include "video_core/renderer_vulkan/maxwell_to_vk.h" | ||
| 11 | #include "video_core/renderer_vulkan/vk_device.h" | ||
| 12 | #include "video_core/renderer_vulkan/vk_renderpass_cache.h" | ||
| 13 | |||
| 14 | namespace Vulkan { | ||
| 15 | |||
| 16 | VKRenderPassCache::VKRenderPassCache(const VKDevice& device) : device{device} {} | ||
| 17 | |||
| 18 | VKRenderPassCache::~VKRenderPassCache() = default; | ||
| 19 | |||
| 20 | vk::RenderPass VKRenderPassCache::GetRenderPass(const RenderPassParams& params) { | ||
| 21 | const auto [pair, is_cache_miss] = cache.try_emplace(params); | ||
| 22 | auto& entry = pair->second; | ||
| 23 | if (is_cache_miss) { | ||
| 24 | entry = CreateRenderPass(params); | ||
| 25 | } | ||
| 26 | return *entry; | ||
| 27 | } | ||
| 28 | |||
| 29 | UniqueRenderPass VKRenderPassCache::CreateRenderPass(const RenderPassParams& params) const { | ||
| 30 | std::vector<vk::AttachmentDescription> descriptors; | ||
| 31 | std::vector<vk::AttachmentReference> color_references; | ||
| 32 | |||
| 33 | for (std::size_t rt = 0; rt < params.color_attachments.size(); ++rt) { | ||
| 34 | const auto attachment = params.color_attachments[rt]; | ||
| 35 | const auto format = | ||
| 36 | MaxwellToVK::SurfaceFormat(device, FormatType::Optimal, attachment.pixel_format); | ||
| 37 | ASSERT_MSG(format.attachable, "Trying to attach a non-attachable format with format={}", | ||
| 38 | static_cast<u32>(attachment.pixel_format)); | ||
| 39 | |||
| 40 | // TODO(Rodrigo): Add eMayAlias when it's needed. | ||
| 41 | const auto color_layout = attachment.is_texception | ||
| 42 | ? vk::ImageLayout::eGeneral | ||
| 43 | : vk::ImageLayout::eColorAttachmentOptimal; | ||
| 44 | descriptors.emplace_back(vk::AttachmentDescriptionFlagBits::eMayAlias, format.format, | ||
| 45 | vk::SampleCountFlagBits::e1, vk::AttachmentLoadOp::eLoad, | ||
| 46 | vk::AttachmentStoreOp::eStore, vk::AttachmentLoadOp::eDontCare, | ||
| 47 | vk::AttachmentStoreOp::eDontCare, color_layout, color_layout); | ||
| 48 | color_references.emplace_back(static_cast<u32>(rt), color_layout); | ||
| 49 | } | ||
| 50 | |||
| 51 | vk::AttachmentReference zeta_attachment_ref; | ||
| 52 | if (params.has_zeta) { | ||
| 53 | const auto format = | ||
| 54 | MaxwellToVK::SurfaceFormat(device, FormatType::Optimal, params.zeta_pixel_format); | ||
| 55 | ASSERT_MSG(format.attachable, "Trying to attach a non-attachable format with format={}", | ||
| 56 | static_cast<u32>(params.zeta_pixel_format)); | ||
| 57 | |||
| 58 | const auto zeta_layout = params.zeta_texception | ||
| 59 | ? vk::ImageLayout::eGeneral | ||
| 60 | : vk::ImageLayout::eDepthStencilAttachmentOptimal; | ||
| 61 | descriptors.emplace_back(vk::AttachmentDescriptionFlags{}, format.format, | ||
| 62 | vk::SampleCountFlagBits::e1, vk::AttachmentLoadOp::eLoad, | ||
| 63 | vk::AttachmentStoreOp::eStore, vk::AttachmentLoadOp::eLoad, | ||
| 64 | vk::AttachmentStoreOp::eStore, zeta_layout, zeta_layout); | ||
| 65 | zeta_attachment_ref = | ||
| 66 | vk::AttachmentReference(static_cast<u32>(params.color_attachments.size()), zeta_layout); | ||
| 67 | } | ||
| 68 | |||
| 69 | const vk::SubpassDescription subpass_description( | ||
| 70 | {}, vk::PipelineBindPoint::eGraphics, 0, nullptr, static_cast<u32>(color_references.size()), | ||
| 71 | color_references.data(), nullptr, params.has_zeta ? &zeta_attachment_ref : nullptr, 0, | ||
| 72 | nullptr); | ||
| 73 | |||
| 74 | vk::AccessFlags access; | ||
| 75 | vk::PipelineStageFlags stage; | ||
| 76 | if (!color_references.empty()) { | ||
| 77 | access |= | ||
| 78 | vk::AccessFlagBits::eColorAttachmentRead | vk::AccessFlagBits::eColorAttachmentWrite; | ||
| 79 | stage |= vk::PipelineStageFlagBits::eColorAttachmentOutput; | ||
| 80 | } | ||
| 81 | |||
| 82 | if (params.has_zeta) { | ||
| 83 | access |= vk::AccessFlagBits::eDepthStencilAttachmentRead | | ||
| 84 | vk::AccessFlagBits::eDepthStencilAttachmentWrite; | ||
| 85 | stage |= vk::PipelineStageFlagBits::eLateFragmentTests; | ||
| 86 | } | ||
| 87 | |||
| 88 | const vk::SubpassDependency subpass_dependency(VK_SUBPASS_EXTERNAL, 0, stage, stage, {}, access, | ||
| 89 | {}); | ||
| 90 | |||
| 91 | const vk::RenderPassCreateInfo create_info({}, static_cast<u32>(descriptors.size()), | ||
| 92 | descriptors.data(), 1, &subpass_description, 1, | ||
| 93 | &subpass_dependency); | ||
| 94 | |||
| 95 | const auto dev = device.GetLogical(); | ||
| 96 | const auto& dld = device.GetDispatchLoader(); | ||
| 97 | return dev.createRenderPassUnique(create_info, nullptr, dld); | ||
| 98 | } | ||
| 99 | |||
| 100 | } // namespace Vulkan | ||
diff --git a/src/video_core/renderer_vulkan/vk_renderpass_cache.h b/src/video_core/renderer_vulkan/vk_renderpass_cache.h new file mode 100644 index 000000000..b49b2db48 --- /dev/null +++ b/src/video_core/renderer_vulkan/vk_renderpass_cache.h | |||
| @@ -0,0 +1,97 @@ | |||
| 1 | // Copyright 2019 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include <memory> | ||
| 8 | #include <tuple> | ||
| 9 | #include <unordered_map> | ||
| 10 | |||
| 11 | #include <boost/container/static_vector.hpp> | ||
| 12 | #include <boost/functional/hash.hpp> | ||
| 13 | |||
| 14 | #include "video_core/engines/maxwell_3d.h" | ||
| 15 | #include "video_core/renderer_vulkan/declarations.h" | ||
| 16 | #include "video_core/surface.h" | ||
| 17 | |||
| 18 | namespace Vulkan { | ||
| 19 | |||
| 20 | class VKDevice; | ||
| 21 | |||
| 22 | // TODO(Rodrigo): Optimize this structure for faster hashing | ||
| 23 | |||
| 24 | struct RenderPassParams { | ||
| 25 | struct ColorAttachment { | ||
| 26 | u32 index = 0; | ||
| 27 | VideoCore::Surface::PixelFormat pixel_format = VideoCore::Surface::PixelFormat::Invalid; | ||
| 28 | bool is_texception = false; | ||
| 29 | |||
| 30 | std::size_t Hash() const noexcept { | ||
| 31 | return static_cast<std::size_t>(pixel_format) | | ||
| 32 | static_cast<std::size_t>(is_texception) << 6 | | ||
| 33 | static_cast<std::size_t>(index) << 7; | ||
| 34 | } | ||
| 35 | |||
| 36 | bool operator==(const ColorAttachment& rhs) const noexcept { | ||
| 37 | return std::tie(index, pixel_format, is_texception) == | ||
| 38 | std::tie(rhs.index, rhs.pixel_format, rhs.is_texception); | ||
| 39 | } | ||
| 40 | }; | ||
| 41 | |||
| 42 | boost::container::static_vector<ColorAttachment, | ||
| 43 | Tegra::Engines::Maxwell3D::Regs::NumRenderTargets> | ||
| 44 | color_attachments{}; | ||
| 45 | // TODO(Rodrigo): Unify has_zeta into zeta_pixel_format and zeta_component_type. | ||
| 46 | VideoCore::Surface::PixelFormat zeta_pixel_format = VideoCore::Surface::PixelFormat::Invalid; | ||
| 47 | bool has_zeta = false; | ||
| 48 | bool zeta_texception = false; | ||
| 49 | |||
| 50 | std::size_t Hash() const noexcept { | ||
| 51 | std::size_t hash = 0; | ||
| 52 | for (const auto& rt : color_attachments) { | ||
| 53 | boost::hash_combine(hash, rt.Hash()); | ||
| 54 | } | ||
| 55 | boost::hash_combine(hash, zeta_pixel_format); | ||
| 56 | boost::hash_combine(hash, has_zeta); | ||
| 57 | boost::hash_combine(hash, zeta_texception); | ||
| 58 | return hash; | ||
| 59 | } | ||
| 60 | |||
| 61 | bool operator==(const RenderPassParams& rhs) const { | ||
| 62 | return std::tie(color_attachments, zeta_pixel_format, has_zeta, zeta_texception) == | ||
| 63 | std::tie(rhs.color_attachments, rhs.zeta_pixel_format, rhs.has_zeta, | ||
| 64 | rhs.zeta_texception); | ||
| 65 | } | ||
| 66 | }; | ||
| 67 | |||
| 68 | } // namespace Vulkan | ||
| 69 | |||
| 70 | namespace std { | ||
| 71 | |||
| 72 | template <> | ||
| 73 | struct hash<Vulkan::RenderPassParams> { | ||
| 74 | std::size_t operator()(const Vulkan::RenderPassParams& k) const noexcept { | ||
| 75 | return k.Hash(); | ||
| 76 | } | ||
| 77 | }; | ||
| 78 | |||
| 79 | } // namespace std | ||
| 80 | |||
| 81 | namespace Vulkan { | ||
| 82 | |||
| 83 | class VKRenderPassCache final { | ||
| 84 | public: | ||
| 85 | explicit VKRenderPassCache(const VKDevice& device); | ||
| 86 | ~VKRenderPassCache(); | ||
| 87 | |||
| 88 | vk::RenderPass GetRenderPass(const RenderPassParams& params); | ||
| 89 | |||
| 90 | private: | ||
| 91 | UniqueRenderPass CreateRenderPass(const RenderPassParams& params) const; | ||
| 92 | |||
| 93 | const VKDevice& device; | ||
| 94 | std::unordered_map<RenderPassParams, UniqueRenderPass> cache; | ||
| 95 | }; | ||
| 96 | |||
| 97 | } // namespace Vulkan | ||
diff --git a/src/video_core/renderer_vulkan/vk_sampler_cache.cpp b/src/video_core/renderer_vulkan/vk_sampler_cache.cpp index 1ce583f75..0a8ec8398 100644 --- a/src/video_core/renderer_vulkan/vk_sampler_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_sampler_cache.cpp | |||
| @@ -46,9 +46,9 @@ UniqueSampler VKSamplerCache::CreateSampler(const Tegra::Texture::TSCEntry& tsc) | |||
| 46 | {}, MaxwellToVK::Sampler::Filter(tsc.mag_filter), | 46 | {}, MaxwellToVK::Sampler::Filter(tsc.mag_filter), |
| 47 | MaxwellToVK::Sampler::Filter(tsc.min_filter), | 47 | MaxwellToVK::Sampler::Filter(tsc.min_filter), |
| 48 | MaxwellToVK::Sampler::MipmapMode(tsc.mipmap_filter), | 48 | MaxwellToVK::Sampler::MipmapMode(tsc.mipmap_filter), |
| 49 | MaxwellToVK::Sampler::WrapMode(tsc.wrap_u, tsc.mag_filter), | 49 | MaxwellToVK::Sampler::WrapMode(device, tsc.wrap_u, tsc.mag_filter), |
| 50 | MaxwellToVK::Sampler::WrapMode(tsc.wrap_v, tsc.mag_filter), | 50 | MaxwellToVK::Sampler::WrapMode(device, tsc.wrap_v, tsc.mag_filter), |
| 51 | MaxwellToVK::Sampler::WrapMode(tsc.wrap_p, tsc.mag_filter), tsc.GetLodBias(), | 51 | MaxwellToVK::Sampler::WrapMode(device, tsc.wrap_p, tsc.mag_filter), tsc.GetLodBias(), |
| 52 | has_anisotropy, max_anisotropy, tsc.depth_compare_enabled, | 52 | has_anisotropy, max_anisotropy, tsc.depth_compare_enabled, |
| 53 | MaxwellToVK::Sampler::DepthCompareFunction(tsc.depth_compare_func), tsc.GetMinLod(), | 53 | MaxwellToVK::Sampler::DepthCompareFunction(tsc.depth_compare_func), tsc.GetMinLod(), |
| 54 | tsc.GetMaxLod(), vk_border_color.value_or(vk::BorderColor::eFloatTransparentBlack), | 54 | tsc.GetMaxLod(), vk_border_color.value_or(vk::BorderColor::eFloatTransparentBlack), |
diff --git a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp index a8baf91de..0cf97cafa 100644 --- a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp +++ b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp | |||
| @@ -954,6 +954,10 @@ private: | |||
| 954 | 954 | ||
| 955 | Expression Visit(const Node& node) { | 955 | Expression Visit(const Node& node) { |
| 956 | if (const auto operation = std::get_if<OperationNode>(&*node)) { | 956 | if (const auto operation = std::get_if<OperationNode>(&*node)) { |
| 957 | if (const auto amend_index = operation->GetAmendIndex()) { | ||
| 958 | [[maybe_unused]] const Type type = Visit(ir.GetAmendNode(*amend_index)).type; | ||
| 959 | ASSERT(type == Type::Void); | ||
| 960 | } | ||
| 957 | const auto operation_index = static_cast<std::size_t>(operation->GetCode()); | 961 | const auto operation_index = static_cast<std::size_t>(operation->GetCode()); |
| 958 | const auto decompiler = operation_decompilers[operation_index]; | 962 | const auto decompiler = operation_decompilers[operation_index]; |
| 959 | if (decompiler == nullptr) { | 963 | if (decompiler == nullptr) { |
| @@ -1142,6 +1146,10 @@ private: | |||
| 1142 | } | 1146 | } |
| 1143 | 1147 | ||
| 1144 | if (const auto conditional = std::get_if<ConditionalNode>(&*node)) { | 1148 | if (const auto conditional = std::get_if<ConditionalNode>(&*node)) { |
| 1149 | if (const auto amend_index = conditional->GetAmendIndex()) { | ||
| 1150 | [[maybe_unused]] const Type type = Visit(ir.GetAmendNode(*amend_index)).type; | ||
| 1151 | ASSERT(type == Type::Void); | ||
| 1152 | } | ||
| 1145 | // It's invalid to call conditional on nested nodes, use an operation instead | 1153 | // It's invalid to call conditional on nested nodes, use an operation instead |
| 1146 | const Id true_label = OpLabel(); | 1154 | const Id true_label = OpLabel(); |
| 1147 | const Id skip_label = OpLabel(); | 1155 | const Id skip_label = OpLabel(); |
| @@ -1788,6 +1796,11 @@ private: | |||
| 1788 | return {}; | 1796 | return {}; |
| 1789 | } | 1797 | } |
| 1790 | 1798 | ||
| 1799 | Expression UAtomicAdd(Operation) { | ||
| 1800 | UNIMPLEMENTED(); | ||
| 1801 | return {}; | ||
| 1802 | } | ||
| 1803 | |||
| 1791 | Expression Branch(Operation operation) { | 1804 | Expression Branch(Operation operation) { |
| 1792 | const auto& target = std::get<ImmediateNode>(*operation[0]); | 1805 | const auto& target = std::get<ImmediateNode>(*operation[0]); |
| 1793 | OpStore(jmp_to, Constant(t_uint, target.GetValue())); | 1806 | OpStore(jmp_to, Constant(t_uint, target.GetValue())); |
| @@ -2365,6 +2378,8 @@ private: | |||
| 2365 | &SPIRVDecompiler::AtomicImageXor, | 2378 | &SPIRVDecompiler::AtomicImageXor, |
| 2366 | &SPIRVDecompiler::AtomicImageExchange, | 2379 | &SPIRVDecompiler::AtomicImageExchange, |
| 2367 | 2380 | ||
| 2381 | &SPIRVDecompiler::UAtomicAdd, | ||
| 2382 | |||
| 2368 | &SPIRVDecompiler::Branch, | 2383 | &SPIRVDecompiler::Branch, |
| 2369 | &SPIRVDecompiler::BranchIndirect, | 2384 | &SPIRVDecompiler::BranchIndirect, |
| 2370 | &SPIRVDecompiler::PushFlowStack, | 2385 | &SPIRVDecompiler::PushFlowStack, |
diff --git a/src/video_core/renderer_vulkan/vk_shader_util.cpp b/src/video_core/renderer_vulkan/vk_shader_util.cpp new file mode 100644 index 000000000..b97c4cb3d --- /dev/null +++ b/src/video_core/renderer_vulkan/vk_shader_util.cpp | |||
| @@ -0,0 +1,34 @@ | |||
| 1 | // Copyright 2018 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include <cstring> | ||
| 6 | #include <memory> | ||
| 7 | #include <vector> | ||
| 8 | #include "common/alignment.h" | ||
| 9 | #include "common/assert.h" | ||
| 10 | #include "common/common_types.h" | ||
| 11 | #include "video_core/renderer_vulkan/declarations.h" | ||
| 12 | #include "video_core/renderer_vulkan/vk_device.h" | ||
| 13 | #include "video_core/renderer_vulkan/vk_shader_util.h" | ||
| 14 | |||
| 15 | namespace Vulkan { | ||
| 16 | |||
| 17 | UniqueShaderModule BuildShader(const VKDevice& device, std::size_t code_size, const u8* code_data) { | ||
| 18 | // Avoid undefined behavior by copying to a staging allocation | ||
| 19 | ASSERT(code_size % sizeof(u32) == 0); | ||
| 20 | const auto data = std::make_unique<u32[]>(code_size / sizeof(u32)); | ||
| 21 | std::memcpy(data.get(), code_data, code_size); | ||
| 22 | |||
| 23 | const auto dev = device.GetLogical(); | ||
| 24 | const auto& dld = device.GetDispatchLoader(); | ||
| 25 | const vk::ShaderModuleCreateInfo shader_ci({}, code_size, data.get()); | ||
| 26 | vk::ShaderModule shader_module; | ||
| 27 | if (dev.createShaderModule(&shader_ci, nullptr, &shader_module, dld) != vk::Result::eSuccess) { | ||
| 28 | UNREACHABLE_MSG("Shader module failed to build!"); | ||
| 29 | } | ||
| 30 | |||
| 31 | return UniqueShaderModule(shader_module, vk::ObjectDestroy(dev, nullptr, dld)); | ||
| 32 | } | ||
| 33 | |||
| 34 | } // namespace Vulkan | ||
diff --git a/src/video_core/renderer_vulkan/vk_shader_util.h b/src/video_core/renderer_vulkan/vk_shader_util.h new file mode 100644 index 000000000..c06d65970 --- /dev/null +++ b/src/video_core/renderer_vulkan/vk_shader_util.h | |||
| @@ -0,0 +1,17 @@ | |||
| 1 | // Copyright 2018 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include <vector> | ||
| 8 | #include "common/common_types.h" | ||
| 9 | #include "video_core/renderer_vulkan/declarations.h" | ||
| 10 | |||
| 11 | namespace Vulkan { | ||
| 12 | |||
| 13 | class VKDevice; | ||
| 14 | |||
| 15 | UniqueShaderModule BuildShader(const VKDevice& device, std::size_t code_size, const u8* code_data); | ||
| 16 | |||
| 17 | } // namespace Vulkan | ||
diff --git a/src/video_core/renderer_vulkan/vk_staging_buffer_pool.h b/src/video_core/renderer_vulkan/vk_staging_buffer_pool.h index 02310375f..4d9488f49 100644 --- a/src/video_core/renderer_vulkan/vk_staging_buffer_pool.h +++ b/src/video_core/renderer_vulkan/vk_staging_buffer_pool.h | |||
| @@ -13,6 +13,7 @@ | |||
| 13 | 13 | ||
| 14 | #include "video_core/renderer_vulkan/declarations.h" | 14 | #include "video_core/renderer_vulkan/declarations.h" |
| 15 | #include "video_core/renderer_vulkan/vk_memory_manager.h" | 15 | #include "video_core/renderer_vulkan/vk_memory_manager.h" |
| 16 | #include "video_core/renderer_vulkan/vk_resource_manager.h" | ||
| 16 | 17 | ||
| 17 | namespace Vulkan { | 18 | namespace Vulkan { |
| 18 | 19 | ||
diff --git a/src/video_core/renderer_vulkan/vk_stream_buffer.cpp b/src/video_core/renderer_vulkan/vk_stream_buffer.cpp index 62f1427f5..d48d3b44c 100644 --- a/src/video_core/renderer_vulkan/vk_stream_buffer.cpp +++ b/src/video_core/renderer_vulkan/vk_stream_buffer.cpp | |||
| @@ -3,86 +3,144 @@ | |||
| 3 | // Refer to the license.txt file included. | 3 | // Refer to the license.txt file included. |
| 4 | 4 | ||
| 5 | #include <algorithm> | 5 | #include <algorithm> |
| 6 | #include <memory> | ||
| 7 | #include <optional> | 6 | #include <optional> |
| 7 | #include <tuple> | ||
| 8 | #include <vector> | 8 | #include <vector> |
| 9 | 9 | ||
| 10 | #include "common/alignment.h" | ||
| 10 | #include "common/assert.h" | 11 | #include "common/assert.h" |
| 11 | #include "video_core/renderer_vulkan/declarations.h" | 12 | #include "video_core/renderer_vulkan/declarations.h" |
| 12 | #include "video_core/renderer_vulkan/vk_device.h" | 13 | #include "video_core/renderer_vulkan/vk_device.h" |
| 13 | #include "video_core/renderer_vulkan/vk_memory_manager.h" | ||
| 14 | #include "video_core/renderer_vulkan/vk_resource_manager.h" | 14 | #include "video_core/renderer_vulkan/vk_resource_manager.h" |
| 15 | #include "video_core/renderer_vulkan/vk_scheduler.h" | 15 | #include "video_core/renderer_vulkan/vk_scheduler.h" |
| 16 | #include "video_core/renderer_vulkan/vk_stream_buffer.h" | 16 | #include "video_core/renderer_vulkan/vk_stream_buffer.h" |
| 17 | 17 | ||
| 18 | namespace Vulkan { | 18 | namespace Vulkan { |
| 19 | 19 | ||
| 20 | namespace { | ||
| 21 | |||
| 20 | constexpr u64 WATCHES_INITIAL_RESERVE = 0x4000; | 22 | constexpr u64 WATCHES_INITIAL_RESERVE = 0x4000; |
| 21 | constexpr u64 WATCHES_RESERVE_CHUNK = 0x1000; | 23 | constexpr u64 WATCHES_RESERVE_CHUNK = 0x1000; |
| 22 | 24 | ||
| 23 | VKStreamBuffer::VKStreamBuffer(const VKDevice& device, VKMemoryManager& memory_manager, | 25 | constexpr u64 STREAM_BUFFER_SIZE = 256 * 1024 * 1024; |
| 24 | VKScheduler& scheduler, u64 size, vk::BufferUsageFlags usage, | 26 | |
| 25 | vk::AccessFlags access, vk::PipelineStageFlags pipeline_stage) | 27 | std::optional<u32> FindMemoryType(const VKDevice& device, u32 filter, |
| 26 | : device{device}, scheduler{scheduler}, buffer_size{size}, access{access}, pipeline_stage{ | 28 | vk::MemoryPropertyFlags wanted) { |
| 27 | pipeline_stage} { | 29 | const auto properties = device.GetPhysical().getMemoryProperties(device.GetDispatchLoader()); |
| 28 | CreateBuffers(memory_manager, usage); | 30 | for (u32 i = 0; i < properties.memoryTypeCount; i++) { |
| 29 | ReserveWatches(WATCHES_INITIAL_RESERVE); | 31 | if (!(filter & (1 << i))) { |
| 32 | continue; | ||
| 33 | } | ||
| 34 | if ((properties.memoryTypes[i].propertyFlags & wanted) == wanted) { | ||
| 35 | return i; | ||
| 36 | } | ||
| 37 | } | ||
| 38 | return {}; | ||
| 39 | } | ||
| 40 | |||
| 41 | } // Anonymous namespace | ||
| 42 | |||
| 43 | VKStreamBuffer::VKStreamBuffer(const VKDevice& device, VKScheduler& scheduler, | ||
| 44 | vk::BufferUsageFlags usage) | ||
| 45 | : device{device}, scheduler{scheduler} { | ||
| 46 | CreateBuffers(usage); | ||
| 47 | ReserveWatches(current_watches, WATCHES_INITIAL_RESERVE); | ||
| 48 | ReserveWatches(previous_watches, WATCHES_INITIAL_RESERVE); | ||
| 30 | } | 49 | } |
| 31 | 50 | ||
| 32 | VKStreamBuffer::~VKStreamBuffer() = default; | 51 | VKStreamBuffer::~VKStreamBuffer() = default; |
| 33 | 52 | ||
| 34 | std::tuple<u8*, u64, bool> VKStreamBuffer::Reserve(u64 size) { | 53 | std::tuple<u8*, u64, bool> VKStreamBuffer::Map(u64 size, u64 alignment) { |
| 35 | ASSERT(size <= buffer_size); | 54 | ASSERT(size <= STREAM_BUFFER_SIZE); |
| 36 | mapped_size = size; | 55 | mapped_size = size; |
| 37 | 56 | ||
| 38 | if (offset + size > buffer_size) { | 57 | if (alignment > 0) { |
| 39 | // The buffer would overflow, save the amount of used buffers, signal an invalidation and | 58 | offset = Common::AlignUp(offset, alignment); |
| 40 | // reset the state. | 59 | } |
| 41 | invalidation_mark = used_watches; | 60 | |
| 42 | used_watches = 0; | 61 | WaitPendingOperations(offset); |
| 62 | |||
| 63 | bool invalidated = false; | ||
| 64 | if (offset + size > STREAM_BUFFER_SIZE) { | ||
| 65 | // The buffer would overflow, save the amount of used watches and reset the state. | ||
| 66 | invalidation_mark = current_watch_cursor; | ||
| 67 | current_watch_cursor = 0; | ||
| 43 | offset = 0; | 68 | offset = 0; |
| 69 | |||
| 70 | // Swap watches and reset waiting cursors. | ||
| 71 | std::swap(previous_watches, current_watches); | ||
| 72 | wait_cursor = 0; | ||
| 73 | wait_bound = 0; | ||
| 74 | |||
| 75 | // Ensure that we don't wait for uncommitted fences. | ||
| 76 | scheduler.Flush(); | ||
| 77 | |||
| 78 | invalidated = true; | ||
| 44 | } | 79 | } |
| 45 | 80 | ||
| 46 | return {mapped_pointer + offset, offset, invalidation_mark.has_value()}; | 81 | const auto dev = device.GetLogical(); |
| 82 | const auto& dld = device.GetDispatchLoader(); | ||
| 83 | const auto pointer = reinterpret_cast<u8*>(dev.mapMemory(*memory, offset, size, {}, dld)); | ||
| 84 | return {pointer, offset, invalidated}; | ||
| 47 | } | 85 | } |
| 48 | 86 | ||
| 49 | void VKStreamBuffer::Send(u64 size) { | 87 | void VKStreamBuffer::Unmap(u64 size) { |
| 50 | ASSERT_MSG(size <= mapped_size, "Reserved size is too small"); | 88 | ASSERT_MSG(size <= mapped_size, "Reserved size is too small"); |
| 51 | 89 | ||
| 52 | if (invalidation_mark) { | 90 | const auto dev = device.GetLogical(); |
| 53 | // TODO(Rodrigo): Find a better way to invalidate than waiting for all watches to finish. | 91 | dev.unmapMemory(*memory, device.GetDispatchLoader()); |
| 54 | scheduler.Flush(); | 92 | |
| 55 | std::for_each(watches.begin(), watches.begin() + *invalidation_mark, | 93 | offset += size; |
| 56 | [&](auto& resource) { resource->Wait(); }); | ||
| 57 | invalidation_mark = std::nullopt; | ||
| 58 | } | ||
| 59 | 94 | ||
| 60 | if (used_watches + 1 >= watches.size()) { | 95 | if (current_watch_cursor + 1 >= current_watches.size()) { |
| 61 | // Ensure that there are enough watches. | 96 | // Ensure that there are enough watches. |
| 62 | ReserveWatches(WATCHES_RESERVE_CHUNK); | 97 | ReserveWatches(current_watches, WATCHES_RESERVE_CHUNK); |
| 63 | } | 98 | } |
| 64 | // Add a watch for this allocation. | 99 | auto& watch = current_watches[current_watch_cursor++]; |
| 65 | watches[used_watches++]->Watch(scheduler.GetFence()); | 100 | watch.upper_bound = offset; |
| 66 | 101 | watch.fence.Watch(scheduler.GetFence()); | |
| 67 | offset += size; | ||
| 68 | } | 102 | } |
| 69 | 103 | ||
| 70 | void VKStreamBuffer::CreateBuffers(VKMemoryManager& memory_manager, vk::BufferUsageFlags usage) { | 104 | void VKStreamBuffer::CreateBuffers(vk::BufferUsageFlags usage) { |
| 71 | const vk::BufferCreateInfo buffer_ci({}, buffer_size, usage, vk::SharingMode::eExclusive, 0, | 105 | const vk::BufferCreateInfo buffer_ci({}, STREAM_BUFFER_SIZE, usage, vk::SharingMode::eExclusive, |
| 72 | nullptr); | 106 | 0, nullptr); |
| 73 | |||
| 74 | const auto dev = device.GetLogical(); | 107 | const auto dev = device.GetLogical(); |
| 75 | const auto& dld = device.GetDispatchLoader(); | 108 | const auto& dld = device.GetDispatchLoader(); |
| 76 | buffer = dev.createBufferUnique(buffer_ci, nullptr, dld); | 109 | buffer = dev.createBufferUnique(buffer_ci, nullptr, dld); |
| 77 | commit = memory_manager.Commit(*buffer, true); | 110 | |
| 78 | mapped_pointer = commit->GetData(); | 111 | const auto requirements = dev.getBufferMemoryRequirements(*buffer, dld); |
| 112 | // Prefer device local host visible allocations (this should hit AMD's pinned memory). | ||
| 113 | auto type = FindMemoryType(device, requirements.memoryTypeBits, | ||
| 114 | vk::MemoryPropertyFlagBits::eHostVisible | | ||
| 115 | vk::MemoryPropertyFlagBits::eHostCoherent | | ||
| 116 | vk::MemoryPropertyFlagBits::eDeviceLocal); | ||
| 117 | if (!type) { | ||
| 118 | // Otherwise search for a host visible allocation. | ||
| 119 | type = FindMemoryType(device, requirements.memoryTypeBits, | ||
| 120 | vk::MemoryPropertyFlagBits::eHostVisible | | ||
| 121 | vk::MemoryPropertyFlagBits::eHostCoherent); | ||
| 122 | ASSERT_MSG(type, "No host visible and coherent memory type found"); | ||
| 123 | } | ||
| 124 | const vk::MemoryAllocateInfo alloc_ci(requirements.size, *type); | ||
| 125 | memory = dev.allocateMemoryUnique(alloc_ci, nullptr, dld); | ||
| 126 | |||
| 127 | dev.bindBufferMemory(*buffer, *memory, 0, dld); | ||
| 79 | } | 128 | } |
| 80 | 129 | ||
| 81 | void VKStreamBuffer::ReserveWatches(std::size_t grow_size) { | 130 | void VKStreamBuffer::ReserveWatches(std::vector<Watch>& watches, std::size_t grow_size) { |
| 82 | const std::size_t previous_size = watches.size(); | 131 | watches.resize(watches.size() + grow_size); |
| 83 | watches.resize(previous_size + grow_size); | 132 | } |
| 84 | std::generate(watches.begin() + previous_size, watches.end(), | 133 | |
| 85 | []() { return std::make_unique<VKFenceWatch>(); }); | 134 | void VKStreamBuffer::WaitPendingOperations(u64 requested_upper_bound) { |
| 135 | if (!invalidation_mark) { | ||
| 136 | return; | ||
| 137 | } | ||
| 138 | while (requested_upper_bound < wait_bound && wait_cursor < *invalidation_mark) { | ||
| 139 | auto& watch = previous_watches[wait_cursor]; | ||
| 140 | wait_bound = watch.upper_bound; | ||
| 141 | watch.fence.Wait(); | ||
| 142 | ++wait_cursor; | ||
| 143 | } | ||
| 86 | } | 144 | } |
| 87 | 145 | ||
| 88 | } // namespace Vulkan | 146 | } // namespace Vulkan |
diff --git a/src/video_core/renderer_vulkan/vk_stream_buffer.h b/src/video_core/renderer_vulkan/vk_stream_buffer.h index 842e54162..187c0c612 100644 --- a/src/video_core/renderer_vulkan/vk_stream_buffer.h +++ b/src/video_core/renderer_vulkan/vk_stream_buffer.h | |||
| @@ -4,28 +4,24 @@ | |||
| 4 | 4 | ||
| 5 | #pragma once | 5 | #pragma once |
| 6 | 6 | ||
| 7 | #include <memory> | ||
| 8 | #include <optional> | 7 | #include <optional> |
| 9 | #include <tuple> | 8 | #include <tuple> |
| 10 | #include <vector> | 9 | #include <vector> |
| 11 | 10 | ||
| 12 | #include "common/common_types.h" | 11 | #include "common/common_types.h" |
| 13 | #include "video_core/renderer_vulkan/declarations.h" | 12 | #include "video_core/renderer_vulkan/declarations.h" |
| 14 | #include "video_core/renderer_vulkan/vk_memory_manager.h" | ||
| 15 | 13 | ||
| 16 | namespace Vulkan { | 14 | namespace Vulkan { |
| 17 | 15 | ||
| 18 | class VKDevice; | 16 | class VKDevice; |
| 19 | class VKFence; | 17 | class VKFence; |
| 20 | class VKFenceWatch; | 18 | class VKFenceWatch; |
| 21 | class VKResourceManager; | ||
| 22 | class VKScheduler; | 19 | class VKScheduler; |
| 23 | 20 | ||
| 24 | class VKStreamBuffer { | 21 | class VKStreamBuffer final { |
| 25 | public: | 22 | public: |
| 26 | explicit VKStreamBuffer(const VKDevice& device, VKMemoryManager& memory_manager, | 23 | explicit VKStreamBuffer(const VKDevice& device, VKScheduler& scheduler, |
| 27 | VKScheduler& scheduler, u64 size, vk::BufferUsageFlags usage, | 24 | vk::BufferUsageFlags usage); |
| 28 | vk::AccessFlags access, vk::PipelineStageFlags pipeline_stage); | ||
| 29 | ~VKStreamBuffer(); | 25 | ~VKStreamBuffer(); |
| 30 | 26 | ||
| 31 | /** | 27 | /** |
| @@ -34,39 +30,47 @@ public: | |||
| 34 | * @returns A tuple in the following order: Raw memory pointer (with offset added), buffer | 30 | * @returns A tuple in the following order: Raw memory pointer (with offset added), buffer |
| 35 | * offset and a boolean that's true when buffer has been invalidated. | 31 | * offset and a boolean that's true when buffer has been invalidated. |
| 36 | */ | 32 | */ |
| 37 | std::tuple<u8*, u64, bool> Reserve(u64 size); | 33 | std::tuple<u8*, u64, bool> Map(u64 size, u64 alignment); |
| 38 | 34 | ||
| 39 | /// Ensures that "size" bytes of memory are available to the GPU, potentially recording a copy. | 35 | /// Ensures that "size" bytes of memory are available to the GPU, potentially recording a copy. |
| 40 | void Send(u64 size); | 36 | void Unmap(u64 size); |
| 41 | 37 | ||
| 42 | vk::Buffer GetBuffer() const { | 38 | vk::Buffer GetHandle() const { |
| 43 | return *buffer; | 39 | return *buffer; |
| 44 | } | 40 | } |
| 45 | 41 | ||
| 46 | private: | 42 | private: |
| 43 | struct Watch final { | ||
| 44 | VKFenceWatch fence; | ||
| 45 | u64 upper_bound{}; | ||
| 46 | }; | ||
| 47 | |||
| 47 | /// Creates Vulkan buffer handles committing the required the required memory. | 48 | /// Creates Vulkan buffer handles committing the required the required memory. |
| 48 | void CreateBuffers(VKMemoryManager& memory_manager, vk::BufferUsageFlags usage); | 49 | void CreateBuffers(vk::BufferUsageFlags usage); |
| 49 | 50 | ||
| 50 | /// Increases the amount of watches available. | 51 | /// Increases the amount of watches available. |
| 51 | void ReserveWatches(std::size_t grow_size); | 52 | void ReserveWatches(std::vector<Watch>& watches, std::size_t grow_size); |
| 53 | |||
| 54 | void WaitPendingOperations(u64 requested_upper_bound); | ||
| 52 | 55 | ||
| 53 | const VKDevice& device; ///< Vulkan device manager. | 56 | const VKDevice& device; ///< Vulkan device manager. |
| 54 | VKScheduler& scheduler; ///< Command scheduler. | 57 | VKScheduler& scheduler; ///< Command scheduler. |
| 55 | const u64 buffer_size; ///< Total size of the stream buffer. | ||
| 56 | const vk::AccessFlags access; ///< Access usage of this stream buffer. | 58 | const vk::AccessFlags access; ///< Access usage of this stream buffer. |
| 57 | const vk::PipelineStageFlags pipeline_stage; ///< Pipeline usage of this stream buffer. | 59 | const vk::PipelineStageFlags pipeline_stage; ///< Pipeline usage of this stream buffer. |
| 58 | 60 | ||
| 59 | UniqueBuffer buffer; ///< Mapped buffer. | 61 | UniqueBuffer buffer; ///< Mapped buffer. |
| 60 | VKMemoryCommit commit; ///< Memory commit. | 62 | UniqueDeviceMemory memory; ///< Memory allocation. |
| 61 | u8* mapped_pointer{}; ///< Pointer to the host visible commit | ||
| 62 | 63 | ||
| 63 | u64 offset{}; ///< Buffer iterator. | 64 | u64 offset{}; ///< Buffer iterator. |
| 64 | u64 mapped_size{}; ///< Size reserved for the current copy. | 65 | u64 mapped_size{}; ///< Size reserved for the current copy. |
| 65 | 66 | ||
| 66 | std::vector<std::unique_ptr<VKFenceWatch>> watches; ///< Total watches | 67 | std::vector<Watch> current_watches; ///< Watches recorded in the current iteration. |
| 67 | std::size_t used_watches{}; ///< Count of watches, reset on invalidation. | 68 | std::size_t current_watch_cursor{}; ///< Count of watches, reset on invalidation. |
| 68 | std::optional<std::size_t> | 69 | std::optional<std::size_t> invalidation_mark; ///< Number of watches used in the previous cycle. |
| 69 | invalidation_mark{}; ///< Number of watches used in the current invalidation. | 70 | |
| 71 | std::vector<Watch> previous_watches; ///< Watches used in the previous iteration. | ||
| 72 | std::size_t wait_cursor{}; ///< Last watch being waited for completion. | ||
| 73 | u64 wait_bound{}; ///< Highest offset being watched for completion. | ||
| 70 | }; | 74 | }; |
| 71 | 75 | ||
| 72 | } // namespace Vulkan | 76 | } // namespace Vulkan |
diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.cpp b/src/video_core/renderer_vulkan/vk_texture_cache.cpp new file mode 100644 index 000000000..51b0d38a6 --- /dev/null +++ b/src/video_core/renderer_vulkan/vk_texture_cache.cpp | |||
| @@ -0,0 +1,475 @@ | |||
| 1 | // Copyright 2019 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include <algorithm> | ||
| 6 | #include <array> | ||
| 7 | #include <cstddef> | ||
| 8 | #include <cstring> | ||
| 9 | #include <memory> | ||
| 10 | #include <variant> | ||
| 11 | #include <vector> | ||
| 12 | |||
| 13 | #include "common/alignment.h" | ||
| 14 | #include "common/assert.h" | ||
| 15 | #include "common/common_types.h" | ||
| 16 | #include "core/core.h" | ||
| 17 | #include "core/memory.h" | ||
| 18 | #include "video_core/engines/maxwell_3d.h" | ||
| 19 | #include "video_core/morton.h" | ||
| 20 | #include "video_core/renderer_vulkan/declarations.h" | ||
| 21 | #include "video_core/renderer_vulkan/maxwell_to_vk.h" | ||
| 22 | #include "video_core/renderer_vulkan/vk_device.h" | ||
| 23 | #include "video_core/renderer_vulkan/vk_memory_manager.h" | ||
| 24 | #include "video_core/renderer_vulkan/vk_rasterizer.h" | ||
| 25 | #include "video_core/renderer_vulkan/vk_staging_buffer_pool.h" | ||
| 26 | #include "video_core/renderer_vulkan/vk_texture_cache.h" | ||
| 27 | #include "video_core/surface.h" | ||
| 28 | #include "video_core/textures/convert.h" | ||
| 29 | |||
| 30 | namespace Vulkan { | ||
| 31 | |||
| 32 | using VideoCore::MortonSwizzle; | ||
| 33 | using VideoCore::MortonSwizzleMode; | ||
| 34 | |||
| 35 | using Tegra::Texture::SwizzleSource; | ||
| 36 | using VideoCore::Surface::PixelFormat; | ||
| 37 | using VideoCore::Surface::SurfaceCompression; | ||
| 38 | using VideoCore::Surface::SurfaceTarget; | ||
| 39 | |||
| 40 | namespace { | ||
| 41 | |||
| 42 | vk::ImageType SurfaceTargetToImage(SurfaceTarget target) { | ||
| 43 | switch (target) { | ||
| 44 | case SurfaceTarget::Texture1D: | ||
| 45 | case SurfaceTarget::Texture1DArray: | ||
| 46 | return vk::ImageType::e1D; | ||
| 47 | case SurfaceTarget::Texture2D: | ||
| 48 | case SurfaceTarget::Texture2DArray: | ||
| 49 | case SurfaceTarget::TextureCubemap: | ||
| 50 | case SurfaceTarget::TextureCubeArray: | ||
| 51 | return vk::ImageType::e2D; | ||
| 52 | case SurfaceTarget::Texture3D: | ||
| 53 | return vk::ImageType::e3D; | ||
| 54 | } | ||
| 55 | UNREACHABLE_MSG("Unknown texture target={}", static_cast<u32>(target)); | ||
| 56 | return {}; | ||
| 57 | } | ||
| 58 | |||
| 59 | vk::ImageAspectFlags PixelFormatToImageAspect(PixelFormat pixel_format) { | ||
| 60 | if (pixel_format < PixelFormat::MaxColorFormat) { | ||
| 61 | return vk::ImageAspectFlagBits::eColor; | ||
| 62 | } else if (pixel_format < PixelFormat::MaxDepthFormat) { | ||
| 63 | return vk::ImageAspectFlagBits::eDepth; | ||
| 64 | } else if (pixel_format < PixelFormat::MaxDepthStencilFormat) { | ||
| 65 | return vk::ImageAspectFlagBits::eDepth | vk::ImageAspectFlagBits::eStencil; | ||
| 66 | } else { | ||
| 67 | UNREACHABLE_MSG("Invalid pixel format={}", static_cast<u32>(pixel_format)); | ||
| 68 | return vk::ImageAspectFlagBits::eColor; | ||
| 69 | } | ||
| 70 | } | ||
| 71 | |||
| 72 | vk::ImageViewType GetImageViewType(SurfaceTarget target) { | ||
| 73 | switch (target) { | ||
| 74 | case SurfaceTarget::Texture1D: | ||
| 75 | return vk::ImageViewType::e1D; | ||
| 76 | case SurfaceTarget::Texture2D: | ||
| 77 | return vk::ImageViewType::e2D; | ||
| 78 | case SurfaceTarget::Texture3D: | ||
| 79 | return vk::ImageViewType::e3D; | ||
| 80 | case SurfaceTarget::Texture1DArray: | ||
| 81 | return vk::ImageViewType::e1DArray; | ||
| 82 | case SurfaceTarget::Texture2DArray: | ||
| 83 | return vk::ImageViewType::e2DArray; | ||
| 84 | case SurfaceTarget::TextureCubemap: | ||
| 85 | return vk::ImageViewType::eCube; | ||
| 86 | case SurfaceTarget::TextureCubeArray: | ||
| 87 | return vk::ImageViewType::eCubeArray; | ||
| 88 | case SurfaceTarget::TextureBuffer: | ||
| 89 | break; | ||
| 90 | } | ||
| 91 | UNREACHABLE(); | ||
| 92 | return {}; | ||
| 93 | } | ||
| 94 | |||
| 95 | UniqueBuffer CreateBuffer(const VKDevice& device, const SurfaceParams& params) { | ||
| 96 | // TODO(Rodrigo): Move texture buffer creation to the buffer cache | ||
| 97 | const vk::BufferCreateInfo buffer_ci({}, params.GetHostSizeInBytes(), | ||
| 98 | vk::BufferUsageFlagBits::eUniformTexelBuffer | | ||
| 99 | vk::BufferUsageFlagBits::eTransferSrc | | ||
| 100 | vk::BufferUsageFlagBits::eTransferDst, | ||
| 101 | vk::SharingMode::eExclusive, 0, nullptr); | ||
| 102 | const auto dev = device.GetLogical(); | ||
| 103 | const auto& dld = device.GetDispatchLoader(); | ||
| 104 | return dev.createBufferUnique(buffer_ci, nullptr, dld); | ||
| 105 | } | ||
| 106 | |||
| 107 | vk::BufferViewCreateInfo GenerateBufferViewCreateInfo(const VKDevice& device, | ||
| 108 | const SurfaceParams& params, | ||
| 109 | vk::Buffer buffer) { | ||
| 110 | ASSERT(params.IsBuffer()); | ||
| 111 | |||
| 112 | const auto format = | ||
| 113 | MaxwellToVK::SurfaceFormat(device, FormatType::Buffer, params.pixel_format).format; | ||
| 114 | return vk::BufferViewCreateInfo({}, buffer, format, 0, params.GetHostSizeInBytes()); | ||
| 115 | } | ||
| 116 | |||
| 117 | vk::ImageCreateInfo GenerateImageCreateInfo(const VKDevice& device, const SurfaceParams& params) { | ||
| 118 | constexpr auto sample_count = vk::SampleCountFlagBits::e1; | ||
| 119 | constexpr auto tiling = vk::ImageTiling::eOptimal; | ||
| 120 | |||
| 121 | ASSERT(!params.IsBuffer()); | ||
| 122 | |||
| 123 | const auto [format, attachable, storage] = | ||
| 124 | MaxwellToVK::SurfaceFormat(device, FormatType::Optimal, params.pixel_format); | ||
| 125 | |||
| 126 | auto image_usage = vk::ImageUsageFlagBits::eSampled | vk::ImageUsageFlagBits::eTransferDst | | ||
| 127 | vk::ImageUsageFlagBits::eTransferSrc; | ||
| 128 | if (attachable) { | ||
| 129 | image_usage |= params.IsPixelFormatZeta() ? vk::ImageUsageFlagBits::eDepthStencilAttachment | ||
| 130 | : vk::ImageUsageFlagBits::eColorAttachment; | ||
| 131 | } | ||
| 132 | if (storage) { | ||
| 133 | image_usage |= vk::ImageUsageFlagBits::eStorage; | ||
| 134 | } | ||
| 135 | |||
| 136 | vk::ImageCreateFlags flags; | ||
| 137 | vk::Extent3D extent; | ||
| 138 | switch (params.target) { | ||
| 139 | case SurfaceTarget::TextureCubemap: | ||
| 140 | case SurfaceTarget::TextureCubeArray: | ||
| 141 | flags |= vk::ImageCreateFlagBits::eCubeCompatible; | ||
| 142 | [[fallthrough]]; | ||
| 143 | case SurfaceTarget::Texture1D: | ||
| 144 | case SurfaceTarget::Texture1DArray: | ||
| 145 | case SurfaceTarget::Texture2D: | ||
| 146 | case SurfaceTarget::Texture2DArray: | ||
| 147 | extent = vk::Extent3D(params.width, params.height, 1); | ||
| 148 | break; | ||
| 149 | case SurfaceTarget::Texture3D: | ||
| 150 | extent = vk::Extent3D(params.width, params.height, params.depth); | ||
| 151 | break; | ||
| 152 | case SurfaceTarget::TextureBuffer: | ||
| 153 | UNREACHABLE(); | ||
| 154 | } | ||
| 155 | |||
| 156 | return vk::ImageCreateInfo(flags, SurfaceTargetToImage(params.target), format, extent, | ||
| 157 | params.num_levels, static_cast<u32>(params.GetNumLayers()), | ||
| 158 | sample_count, tiling, image_usage, vk::SharingMode::eExclusive, 0, | ||
| 159 | nullptr, vk::ImageLayout::eUndefined); | ||
| 160 | } | ||
| 161 | |||
| 162 | } // Anonymous namespace | ||
| 163 | |||
| 164 | CachedSurface::CachedSurface(Core::System& system, const VKDevice& device, | ||
| 165 | VKResourceManager& resource_manager, VKMemoryManager& memory_manager, | ||
| 166 | VKScheduler& scheduler, VKStagingBufferPool& staging_pool, | ||
| 167 | GPUVAddr gpu_addr, const SurfaceParams& params) | ||
| 168 | : SurfaceBase<View>{gpu_addr, params}, system{system}, device{device}, | ||
| 169 | resource_manager{resource_manager}, memory_manager{memory_manager}, scheduler{scheduler}, | ||
| 170 | staging_pool{staging_pool} { | ||
| 171 | if (params.IsBuffer()) { | ||
| 172 | buffer = CreateBuffer(device, params); | ||
| 173 | commit = memory_manager.Commit(*buffer, false); | ||
| 174 | |||
| 175 | const auto buffer_view_ci = GenerateBufferViewCreateInfo(device, params, *buffer); | ||
| 176 | format = buffer_view_ci.format; | ||
| 177 | |||
| 178 | const auto dev = device.GetLogical(); | ||
| 179 | const auto& dld = device.GetDispatchLoader(); | ||
| 180 | buffer_view = dev.createBufferViewUnique(buffer_view_ci, nullptr, dld); | ||
| 181 | } else { | ||
| 182 | const auto image_ci = GenerateImageCreateInfo(device, params); | ||
| 183 | format = image_ci.format; | ||
| 184 | |||
| 185 | image.emplace(device, scheduler, image_ci, PixelFormatToImageAspect(params.pixel_format)); | ||
| 186 | commit = memory_manager.Commit(image->GetHandle(), false); | ||
| 187 | } | ||
| 188 | |||
| 189 | // TODO(Rodrigo): Move this to a virtual function. | ||
| 190 | main_view = CreateViewInner( | ||
| 191 | ViewParams(params.target, 0, static_cast<u32>(params.GetNumLayers()), 0, params.num_levels), | ||
| 192 | true); | ||
| 193 | } | ||
| 194 | |||
| 195 | CachedSurface::~CachedSurface() = default; | ||
| 196 | |||
| 197 | void CachedSurface::UploadTexture(const std::vector<u8>& staging_buffer) { | ||
| 198 | // To upload data we have to be outside of a renderpass | ||
| 199 | scheduler.RequestOutsideRenderPassOperationContext(); | ||
| 200 | |||
| 201 | if (params.IsBuffer()) { | ||
| 202 | UploadBuffer(staging_buffer); | ||
| 203 | } else { | ||
| 204 | UploadImage(staging_buffer); | ||
| 205 | } | ||
| 206 | } | ||
| 207 | |||
| 208 | void CachedSurface::DownloadTexture(std::vector<u8>& staging_buffer) { | ||
| 209 | UNIMPLEMENTED_IF(params.IsBuffer()); | ||
| 210 | |||
| 211 | if (params.pixel_format == VideoCore::Surface::PixelFormat::A1B5G5R5U) { | ||
| 212 | LOG_WARNING(Render_Vulkan, "A1B5G5R5 flushing is stubbed"); | ||
| 213 | } | ||
| 214 | |||
| 215 | // We can't copy images to buffers inside a renderpass | ||
| 216 | scheduler.RequestOutsideRenderPassOperationContext(); | ||
| 217 | |||
| 218 | FullTransition(vk::PipelineStageFlagBits::eTransfer, vk::AccessFlagBits::eTransferRead, | ||
| 219 | vk::ImageLayout::eTransferSrcOptimal); | ||
| 220 | |||
| 221 | const auto& buffer = staging_pool.GetUnusedBuffer(host_memory_size, true); | ||
| 222 | // TODO(Rodrigo): Do this in a single copy | ||
| 223 | for (u32 level = 0; level < params.num_levels; ++level) { | ||
| 224 | scheduler.Record([image = image->GetHandle(), buffer = *buffer.handle, | ||
| 225 | copy = GetBufferImageCopy(level)](auto cmdbuf, auto& dld) { | ||
| 226 | cmdbuf.copyImageToBuffer(image, vk::ImageLayout::eTransferSrcOptimal, buffer, {copy}, | ||
| 227 | dld); | ||
| 228 | }); | ||
| 229 | } | ||
| 230 | scheduler.Finish(); | ||
| 231 | |||
| 232 | // TODO(Rodrigo): Use an intern buffer for staging buffers and avoid this unnecessary memcpy. | ||
| 233 | std::memcpy(staging_buffer.data(), buffer.commit->Map(host_memory_size), host_memory_size); | ||
| 234 | } | ||
| 235 | |||
| 236 | void CachedSurface::DecorateSurfaceName() { | ||
| 237 | // TODO(Rodrigo): Add name decorations | ||
| 238 | } | ||
| 239 | |||
| 240 | View CachedSurface::CreateView(const ViewParams& params) { | ||
| 241 | return CreateViewInner(params, false); | ||
| 242 | } | ||
| 243 | |||
| 244 | View CachedSurface::CreateViewInner(const ViewParams& params, bool is_proxy) { | ||
| 245 | // TODO(Rodrigo): Add name decorations | ||
| 246 | return views[params] = std::make_shared<CachedSurfaceView>(device, *this, params, is_proxy); | ||
| 247 | } | ||
| 248 | |||
| 249 | void CachedSurface::UploadBuffer(const std::vector<u8>& staging_buffer) { | ||
| 250 | const auto& src_buffer = staging_pool.GetUnusedBuffer(host_memory_size, true); | ||
| 251 | std::memcpy(src_buffer.commit->Map(host_memory_size), staging_buffer.data(), host_memory_size); | ||
| 252 | |||
| 253 | scheduler.Record([src_buffer = *src_buffer.handle, dst_buffer = *buffer, | ||
| 254 | size = params.GetHostSizeInBytes()](auto cmdbuf, auto& dld) { | ||
| 255 | const vk::BufferCopy copy(0, 0, size); | ||
| 256 | cmdbuf.copyBuffer(src_buffer, dst_buffer, {copy}, dld); | ||
| 257 | |||
| 258 | cmdbuf.pipelineBarrier( | ||
| 259 | vk::PipelineStageFlagBits::eTransfer, vk::PipelineStageFlagBits::eVertexShader, {}, {}, | ||
| 260 | {vk::BufferMemoryBarrier(vk::AccessFlagBits::eTransferWrite, | ||
| 261 | vk::AccessFlagBits::eShaderRead, 0, 0, dst_buffer, 0, size)}, | ||
| 262 | {}, dld); | ||
| 263 | }); | ||
| 264 | } | ||
| 265 | |||
| 266 | void CachedSurface::UploadImage(const std::vector<u8>& staging_buffer) { | ||
| 267 | const auto& src_buffer = staging_pool.GetUnusedBuffer(host_memory_size, true); | ||
| 268 | std::memcpy(src_buffer.commit->Map(host_memory_size), staging_buffer.data(), host_memory_size); | ||
| 269 | |||
| 270 | FullTransition(vk::PipelineStageFlagBits::eTransfer, vk::AccessFlagBits::eTransferWrite, | ||
| 271 | vk::ImageLayout::eTransferDstOptimal); | ||
| 272 | |||
| 273 | for (u32 level = 0; level < params.num_levels; ++level) { | ||
| 274 | vk::BufferImageCopy copy = GetBufferImageCopy(level); | ||
| 275 | const auto& dld = device.GetDispatchLoader(); | ||
| 276 | if (image->GetAspectMask() == | ||
| 277 | (vk::ImageAspectFlagBits::eDepth | vk::ImageAspectFlagBits::eStencil)) { | ||
| 278 | vk::BufferImageCopy depth = copy; | ||
| 279 | vk::BufferImageCopy stencil = copy; | ||
| 280 | depth.imageSubresource.aspectMask = vk::ImageAspectFlagBits::eDepth; | ||
| 281 | stencil.imageSubresource.aspectMask = vk::ImageAspectFlagBits::eStencil; | ||
| 282 | scheduler.Record([buffer = *src_buffer.handle, image = image->GetHandle(), depth, | ||
| 283 | stencil](auto cmdbuf, auto& dld) { | ||
| 284 | cmdbuf.copyBufferToImage(buffer, image, vk::ImageLayout::eTransferDstOptimal, | ||
| 285 | {depth, stencil}, dld); | ||
| 286 | }); | ||
| 287 | } else { | ||
| 288 | scheduler.Record([buffer = *src_buffer.handle, image = image->GetHandle(), | ||
| 289 | copy](auto cmdbuf, auto& dld) { | ||
| 290 | cmdbuf.copyBufferToImage(buffer, image, vk::ImageLayout::eTransferDstOptimal, | ||
| 291 | {copy}, dld); | ||
| 292 | }); | ||
| 293 | } | ||
| 294 | } | ||
| 295 | } | ||
| 296 | |||
| 297 | vk::BufferImageCopy CachedSurface::GetBufferImageCopy(u32 level) const { | ||
| 298 | const u32 vk_depth = params.target == SurfaceTarget::Texture3D ? params.GetMipDepth(level) : 1; | ||
| 299 | const auto compression_type = params.GetCompressionType(); | ||
| 300 | const std::size_t mip_offset = compression_type == SurfaceCompression::Converted | ||
| 301 | ? params.GetConvertedMipmapOffset(level) | ||
| 302 | : params.GetHostMipmapLevelOffset(level); | ||
| 303 | |||
| 304 | return vk::BufferImageCopy( | ||
| 305 | mip_offset, 0, 0, | ||
| 306 | {image->GetAspectMask(), level, 0, static_cast<u32>(params.GetNumLayers())}, {0, 0, 0}, | ||
| 307 | {params.GetMipWidth(level), params.GetMipHeight(level), vk_depth}); | ||
| 308 | } | ||
| 309 | |||
| 310 | vk::ImageSubresourceRange CachedSurface::GetImageSubresourceRange() const { | ||
| 311 | return {image->GetAspectMask(), 0, params.num_levels, 0, | ||
| 312 | static_cast<u32>(params.GetNumLayers())}; | ||
| 313 | } | ||
| 314 | |||
| 315 | CachedSurfaceView::CachedSurfaceView(const VKDevice& device, CachedSurface& surface, | ||
| 316 | const ViewParams& params, bool is_proxy) | ||
| 317 | : VideoCommon::ViewBase{params}, params{surface.GetSurfaceParams()}, | ||
| 318 | image{surface.GetImageHandle()}, buffer_view{surface.GetBufferViewHandle()}, | ||
| 319 | aspect_mask{surface.GetAspectMask()}, device{device}, surface{surface}, | ||
| 320 | base_layer{params.base_layer}, num_layers{params.num_layers}, base_level{params.base_level}, | ||
| 321 | num_levels{params.num_levels}, image_view_type{image ? GetImageViewType(params.target) | ||
| 322 | : vk::ImageViewType{}} {} | ||
| 323 | |||
| 324 | CachedSurfaceView::~CachedSurfaceView() = default; | ||
| 325 | |||
| 326 | vk::ImageView CachedSurfaceView::GetHandle(SwizzleSource x_source, SwizzleSource y_source, | ||
| 327 | SwizzleSource z_source, SwizzleSource w_source) { | ||
| 328 | const u32 swizzle = EncodeSwizzle(x_source, y_source, z_source, w_source); | ||
| 329 | if (last_image_view && last_swizzle == swizzle) { | ||
| 330 | return last_image_view; | ||
| 331 | } | ||
| 332 | last_swizzle = swizzle; | ||
| 333 | |||
| 334 | const auto [entry, is_cache_miss] = view_cache.try_emplace(swizzle); | ||
| 335 | auto& image_view = entry->second; | ||
| 336 | if (!is_cache_miss) { | ||
| 337 | return last_image_view = *image_view; | ||
| 338 | } | ||
| 339 | |||
| 340 | auto swizzle_x = MaxwellToVK::SwizzleSource(x_source); | ||
| 341 | auto swizzle_y = MaxwellToVK::SwizzleSource(y_source); | ||
| 342 | auto swizzle_z = MaxwellToVK::SwizzleSource(z_source); | ||
| 343 | auto swizzle_w = MaxwellToVK::SwizzleSource(w_source); | ||
| 344 | |||
| 345 | if (params.pixel_format == VideoCore::Surface::PixelFormat::A1B5G5R5U) { | ||
| 346 | // A1B5G5R5 is implemented as A1R5G5B5, we have to change the swizzle here. | ||
| 347 | std::swap(swizzle_x, swizzle_z); | ||
| 348 | } | ||
| 349 | |||
| 350 | // Games can sample depth or stencil values on textures. This is decided by the swizzle value on | ||
| 351 | // hardware. To emulate this on Vulkan we specify it in the aspect. | ||
| 352 | vk::ImageAspectFlags aspect = aspect_mask; | ||
| 353 | if (aspect == (vk::ImageAspectFlagBits::eDepth | vk::ImageAspectFlagBits::eStencil)) { | ||
| 354 | UNIMPLEMENTED_IF(x_source != SwizzleSource::R && x_source != SwizzleSource::G); | ||
| 355 | const bool is_first = x_source == SwizzleSource::R; | ||
| 356 | switch (params.pixel_format) { | ||
| 357 | case VideoCore::Surface::PixelFormat::Z24S8: | ||
| 358 | case VideoCore::Surface::PixelFormat::Z32FS8: | ||
| 359 | aspect = is_first ? vk::ImageAspectFlagBits::eDepth : vk::ImageAspectFlagBits::eStencil; | ||
| 360 | break; | ||
| 361 | case VideoCore::Surface::PixelFormat::S8Z24: | ||
| 362 | aspect = is_first ? vk::ImageAspectFlagBits::eStencil : vk::ImageAspectFlagBits::eDepth; | ||
| 363 | break; | ||
| 364 | default: | ||
| 365 | aspect = vk::ImageAspectFlagBits::eDepth; | ||
| 366 | UNIMPLEMENTED(); | ||
| 367 | } | ||
| 368 | |||
| 369 | // Vulkan doesn't seem to understand swizzling of a depth stencil image, use identity | ||
| 370 | swizzle_x = vk::ComponentSwizzle::eR; | ||
| 371 | swizzle_y = vk::ComponentSwizzle::eG; | ||
| 372 | swizzle_z = vk::ComponentSwizzle::eB; | ||
| 373 | swizzle_w = vk::ComponentSwizzle::eA; | ||
| 374 | } | ||
| 375 | |||
| 376 | const vk::ImageViewCreateInfo image_view_ci( | ||
| 377 | {}, surface.GetImageHandle(), image_view_type, surface.GetImage().GetFormat(), | ||
| 378 | {swizzle_x, swizzle_y, swizzle_z, swizzle_w}, | ||
| 379 | {aspect, base_level, num_levels, base_layer, num_layers}); | ||
| 380 | |||
| 381 | const auto dev = device.GetLogical(); | ||
| 382 | image_view = dev.createImageViewUnique(image_view_ci, nullptr, device.GetDispatchLoader()); | ||
| 383 | return last_image_view = *image_view; | ||
| 384 | } | ||
| 385 | |||
| 386 | VKTextureCache::VKTextureCache(Core::System& system, VideoCore::RasterizerInterface& rasterizer, | ||
| 387 | const VKDevice& device, VKResourceManager& resource_manager, | ||
| 388 | VKMemoryManager& memory_manager, VKScheduler& scheduler, | ||
| 389 | VKStagingBufferPool& staging_pool) | ||
| 390 | : TextureCache(system, rasterizer), device{device}, resource_manager{resource_manager}, | ||
| 391 | memory_manager{memory_manager}, scheduler{scheduler}, staging_pool{staging_pool} {} | ||
| 392 | |||
| 393 | VKTextureCache::~VKTextureCache() = default; | ||
| 394 | |||
| 395 | Surface VKTextureCache::CreateSurface(GPUVAddr gpu_addr, const SurfaceParams& params) { | ||
| 396 | return std::make_shared<CachedSurface>(system, device, resource_manager, memory_manager, | ||
| 397 | scheduler, staging_pool, gpu_addr, params); | ||
| 398 | } | ||
| 399 | |||
| 400 | void VKTextureCache::ImageCopy(Surface& src_surface, Surface& dst_surface, | ||
| 401 | const VideoCommon::CopyParams& copy_params) { | ||
| 402 | const bool src_3d = src_surface->GetSurfaceParams().target == SurfaceTarget::Texture3D; | ||
| 403 | const bool dst_3d = dst_surface->GetSurfaceParams().target == SurfaceTarget::Texture3D; | ||
| 404 | UNIMPLEMENTED_IF(src_3d); | ||
| 405 | |||
| 406 | // The texture cache handles depth in OpenGL terms, we have to handle it as subresource and | ||
| 407 | // dimension respectively. | ||
| 408 | const u32 dst_base_layer = dst_3d ? 0 : copy_params.dest_z; | ||
| 409 | const u32 dst_offset_z = dst_3d ? copy_params.dest_z : 0; | ||
| 410 | |||
| 411 | const u32 extent_z = dst_3d ? copy_params.depth : 1; | ||
| 412 | const u32 num_layers = dst_3d ? 1 : copy_params.depth; | ||
| 413 | |||
| 414 | // We can't copy inside a renderpass | ||
| 415 | scheduler.RequestOutsideRenderPassOperationContext(); | ||
| 416 | |||
| 417 | src_surface->Transition(copy_params.source_z, copy_params.depth, copy_params.source_level, 1, | ||
| 418 | vk::PipelineStageFlagBits::eTransfer, vk::AccessFlagBits::eTransferRead, | ||
| 419 | vk::ImageLayout::eTransferSrcOptimal); | ||
| 420 | dst_surface->Transition( | ||
| 421 | dst_base_layer, num_layers, copy_params.dest_level, 1, vk::PipelineStageFlagBits::eTransfer, | ||
| 422 | vk::AccessFlagBits::eTransferWrite, vk::ImageLayout::eTransferDstOptimal); | ||
| 423 | |||
| 424 | const auto& dld{device.GetDispatchLoader()}; | ||
| 425 | const vk::ImageSubresourceLayers src_subresource( | ||
| 426 | src_surface->GetAspectMask(), copy_params.source_level, copy_params.source_z, num_layers); | ||
| 427 | const vk::ImageSubresourceLayers dst_subresource( | ||
| 428 | dst_surface->GetAspectMask(), copy_params.dest_level, dst_base_layer, num_layers); | ||
| 429 | const vk::Offset3D src_offset(copy_params.source_x, copy_params.source_y, 0); | ||
| 430 | const vk::Offset3D dst_offset(copy_params.dest_x, copy_params.dest_y, dst_offset_z); | ||
| 431 | const vk::Extent3D extent(copy_params.width, copy_params.height, extent_z); | ||
| 432 | const vk::ImageCopy copy(src_subresource, src_offset, dst_subresource, dst_offset, extent); | ||
| 433 | const vk::Image src_image = src_surface->GetImageHandle(); | ||
| 434 | const vk::Image dst_image = dst_surface->GetImageHandle(); | ||
| 435 | scheduler.Record([src_image, dst_image, copy](auto cmdbuf, auto& dld) { | ||
| 436 | cmdbuf.copyImage(src_image, vk::ImageLayout::eTransferSrcOptimal, dst_image, | ||
| 437 | vk::ImageLayout::eTransferDstOptimal, {copy}, dld); | ||
| 438 | }); | ||
| 439 | } | ||
| 440 | |||
| 441 | void VKTextureCache::ImageBlit(View& src_view, View& dst_view, | ||
| 442 | const Tegra::Engines::Fermi2D::Config& copy_config) { | ||
| 443 | // We can't blit inside a renderpass | ||
| 444 | scheduler.RequestOutsideRenderPassOperationContext(); | ||
| 445 | |||
| 446 | src_view->Transition(vk::ImageLayout::eTransferSrcOptimal, vk::PipelineStageFlagBits::eTransfer, | ||
| 447 | vk::AccessFlagBits::eTransferRead); | ||
| 448 | dst_view->Transition(vk::ImageLayout::eTransferDstOptimal, vk::PipelineStageFlagBits::eTransfer, | ||
| 449 | vk::AccessFlagBits::eTransferWrite); | ||
| 450 | |||
| 451 | const auto& cfg = copy_config; | ||
| 452 | const auto src_top_left = vk::Offset3D(cfg.src_rect.left, cfg.src_rect.top, 0); | ||
| 453 | const auto src_bot_right = vk::Offset3D(cfg.src_rect.right, cfg.src_rect.bottom, 1); | ||
| 454 | const auto dst_top_left = vk::Offset3D(cfg.dst_rect.left, cfg.dst_rect.top, 0); | ||
| 455 | const auto dst_bot_right = vk::Offset3D(cfg.dst_rect.right, cfg.dst_rect.bottom, 1); | ||
| 456 | const vk::ImageBlit blit(src_view->GetImageSubresourceLayers(), {src_top_left, src_bot_right}, | ||
| 457 | dst_view->GetImageSubresourceLayers(), {dst_top_left, dst_bot_right}); | ||
| 458 | const bool is_linear = copy_config.filter == Tegra::Engines::Fermi2D::Filter::Linear; | ||
| 459 | |||
| 460 | const auto& dld{device.GetDispatchLoader()}; | ||
| 461 | scheduler.Record([src_image = src_view->GetImage(), dst_image = dst_view->GetImage(), blit, | ||
| 462 | is_linear](auto cmdbuf, auto& dld) { | ||
| 463 | cmdbuf.blitImage(src_image, vk::ImageLayout::eTransferSrcOptimal, dst_image, | ||
| 464 | vk::ImageLayout::eTransferDstOptimal, {blit}, | ||
| 465 | is_linear ? vk::Filter::eLinear : vk::Filter::eNearest, dld); | ||
| 466 | }); | ||
| 467 | } | ||
| 468 | |||
| 469 | void VKTextureCache::BufferCopy(Surface& src_surface, Surface& dst_surface) { | ||
| 470 | // Currently unimplemented. PBO copies should be dropped and we should use a render pass to | ||
| 471 | // convert from color to depth and viceversa. | ||
| 472 | LOG_WARNING(Render_Vulkan, "Unimplemented"); | ||
| 473 | } | ||
| 474 | |||
| 475 | } // namespace Vulkan | ||
diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.h b/src/video_core/renderer_vulkan/vk_texture_cache.h new file mode 100644 index 000000000..d3edbe80c --- /dev/null +++ b/src/video_core/renderer_vulkan/vk_texture_cache.h | |||
| @@ -0,0 +1,239 @@ | |||
| 1 | // Copyright 2019 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include <memory> | ||
| 8 | #include <unordered_map> | ||
| 9 | |||
| 10 | #include "common/assert.h" | ||
| 11 | #include "common/common_types.h" | ||
| 12 | #include "common/logging/log.h" | ||
| 13 | #include "common/math_util.h" | ||
| 14 | #include "video_core/gpu.h" | ||
| 15 | #include "video_core/rasterizer_cache.h" | ||
| 16 | #include "video_core/renderer_vulkan/declarations.h" | ||
| 17 | #include "video_core/renderer_vulkan/vk_image.h" | ||
| 18 | #include "video_core/renderer_vulkan/vk_memory_manager.h" | ||
| 19 | #include "video_core/renderer_vulkan/vk_scheduler.h" | ||
| 20 | #include "video_core/texture_cache/surface_base.h" | ||
| 21 | #include "video_core/texture_cache/texture_cache.h" | ||
| 22 | #include "video_core/textures/decoders.h" | ||
| 23 | |||
| 24 | namespace Core { | ||
| 25 | class System; | ||
| 26 | } | ||
| 27 | |||
| 28 | namespace VideoCore { | ||
| 29 | class RasterizerInterface; | ||
| 30 | } | ||
| 31 | |||
| 32 | namespace Vulkan { | ||
| 33 | |||
| 34 | class RasterizerVulkan; | ||
| 35 | class VKDevice; | ||
| 36 | class VKResourceManager; | ||
| 37 | class VKScheduler; | ||
| 38 | class VKStagingBufferPool; | ||
| 39 | |||
| 40 | class CachedSurfaceView; | ||
| 41 | class CachedSurface; | ||
| 42 | |||
| 43 | using Surface = std::shared_ptr<CachedSurface>; | ||
| 44 | using View = std::shared_ptr<CachedSurfaceView>; | ||
| 45 | using TextureCacheBase = VideoCommon::TextureCache<Surface, View>; | ||
| 46 | |||
| 47 | using VideoCommon::SurfaceParams; | ||
| 48 | using VideoCommon::ViewParams; | ||
| 49 | |||
| 50 | class CachedSurface final : public VideoCommon::SurfaceBase<View> { | ||
| 51 | friend CachedSurfaceView; | ||
| 52 | |||
| 53 | public: | ||
| 54 | explicit CachedSurface(Core::System& system, const VKDevice& device, | ||
| 55 | VKResourceManager& resource_manager, VKMemoryManager& memory_manager, | ||
| 56 | VKScheduler& scheduler, VKStagingBufferPool& staging_pool, | ||
| 57 | GPUVAddr gpu_addr, const SurfaceParams& params); | ||
| 58 | ~CachedSurface(); | ||
| 59 | |||
| 60 | void UploadTexture(const std::vector<u8>& staging_buffer) override; | ||
| 61 | void DownloadTexture(std::vector<u8>& staging_buffer) override; | ||
| 62 | |||
| 63 | void FullTransition(vk::PipelineStageFlags new_stage_mask, vk::AccessFlags new_access, | ||
| 64 | vk::ImageLayout new_layout) { | ||
| 65 | image->Transition(0, static_cast<u32>(params.GetNumLayers()), 0, params.num_levels, | ||
| 66 | new_stage_mask, new_access, new_layout); | ||
| 67 | } | ||
| 68 | |||
| 69 | void Transition(u32 base_layer, u32 num_layers, u32 base_level, u32 num_levels, | ||
| 70 | vk::PipelineStageFlags new_stage_mask, vk::AccessFlags new_access, | ||
| 71 | vk::ImageLayout new_layout) { | ||
| 72 | image->Transition(base_layer, num_layers, base_level, num_levels, new_stage_mask, | ||
| 73 | new_access, new_layout); | ||
| 74 | } | ||
| 75 | |||
| 76 | VKImage& GetImage() { | ||
| 77 | return *image; | ||
| 78 | } | ||
| 79 | |||
| 80 | const VKImage& GetImage() const { | ||
| 81 | return *image; | ||
| 82 | } | ||
| 83 | |||
| 84 | vk::Image GetImageHandle() const { | ||
| 85 | return image->GetHandle(); | ||
| 86 | } | ||
| 87 | |||
| 88 | vk::ImageAspectFlags GetAspectMask() const { | ||
| 89 | return image->GetAspectMask(); | ||
| 90 | } | ||
| 91 | |||
| 92 | vk::BufferView GetBufferViewHandle() const { | ||
| 93 | return *buffer_view; | ||
| 94 | } | ||
| 95 | |||
| 96 | protected: | ||
| 97 | void DecorateSurfaceName(); | ||
| 98 | |||
| 99 | View CreateView(const ViewParams& params) override; | ||
| 100 | View CreateViewInner(const ViewParams& params, bool is_proxy); | ||
| 101 | |||
| 102 | private: | ||
| 103 | void UploadBuffer(const std::vector<u8>& staging_buffer); | ||
| 104 | |||
| 105 | void UploadImage(const std::vector<u8>& staging_buffer); | ||
| 106 | |||
| 107 | vk::BufferImageCopy GetBufferImageCopy(u32 level) const; | ||
| 108 | |||
| 109 | vk::ImageSubresourceRange GetImageSubresourceRange() const; | ||
| 110 | |||
| 111 | Core::System& system; | ||
| 112 | const VKDevice& device; | ||
| 113 | VKResourceManager& resource_manager; | ||
| 114 | VKMemoryManager& memory_manager; | ||
| 115 | VKScheduler& scheduler; | ||
| 116 | VKStagingBufferPool& staging_pool; | ||
| 117 | |||
| 118 | std::optional<VKImage> image; | ||
| 119 | UniqueBuffer buffer; | ||
| 120 | UniqueBufferView buffer_view; | ||
| 121 | VKMemoryCommit commit; | ||
| 122 | |||
| 123 | vk::Format format; | ||
| 124 | }; | ||
| 125 | |||
| 126 | class CachedSurfaceView final : public VideoCommon::ViewBase { | ||
| 127 | public: | ||
| 128 | explicit CachedSurfaceView(const VKDevice& device, CachedSurface& surface, | ||
| 129 | const ViewParams& params, bool is_proxy); | ||
| 130 | ~CachedSurfaceView(); | ||
| 131 | |||
| 132 | vk::ImageView GetHandle(Tegra::Texture::SwizzleSource x_source, | ||
| 133 | Tegra::Texture::SwizzleSource y_source, | ||
| 134 | Tegra::Texture::SwizzleSource z_source, | ||
| 135 | Tegra::Texture::SwizzleSource w_source); | ||
| 136 | |||
| 137 | bool IsSameSurface(const CachedSurfaceView& rhs) const { | ||
| 138 | return &surface == &rhs.surface; | ||
| 139 | } | ||
| 140 | |||
| 141 | vk::ImageView GetHandle() { | ||
| 142 | return GetHandle(Tegra::Texture::SwizzleSource::R, Tegra::Texture::SwizzleSource::G, | ||
| 143 | Tegra::Texture::SwizzleSource::B, Tegra::Texture::SwizzleSource::A); | ||
| 144 | } | ||
| 145 | |||
| 146 | u32 GetWidth() const { | ||
| 147 | return params.GetMipWidth(base_level); | ||
| 148 | } | ||
| 149 | |||
| 150 | u32 GetHeight() const { | ||
| 151 | return params.GetMipHeight(base_level); | ||
| 152 | } | ||
| 153 | |||
| 154 | bool IsBufferView() const { | ||
| 155 | return buffer_view; | ||
| 156 | } | ||
| 157 | |||
| 158 | vk::Image GetImage() const { | ||
| 159 | return image; | ||
| 160 | } | ||
| 161 | |||
| 162 | vk::BufferView GetBufferView() const { | ||
| 163 | return buffer_view; | ||
| 164 | } | ||
| 165 | |||
| 166 | vk::ImageSubresourceRange GetImageSubresourceRange() const { | ||
| 167 | return {aspect_mask, base_level, num_levels, base_layer, num_layers}; | ||
| 168 | } | ||
| 169 | |||
| 170 | vk::ImageSubresourceLayers GetImageSubresourceLayers() const { | ||
| 171 | return {surface.GetAspectMask(), base_level, base_layer, num_layers}; | ||
| 172 | } | ||
| 173 | |||
| 174 | void Transition(vk::ImageLayout new_layout, vk::PipelineStageFlags new_stage_mask, | ||
| 175 | vk::AccessFlags new_access) const { | ||
| 176 | surface.Transition(base_layer, num_layers, base_level, num_levels, new_stage_mask, | ||
| 177 | new_access, new_layout); | ||
| 178 | } | ||
| 179 | |||
| 180 | void MarkAsModified(u64 tick) { | ||
| 181 | surface.MarkAsModified(true, tick); | ||
| 182 | } | ||
| 183 | |||
| 184 | private: | ||
| 185 | static u32 EncodeSwizzle(Tegra::Texture::SwizzleSource x_source, | ||
| 186 | Tegra::Texture::SwizzleSource y_source, | ||
| 187 | Tegra::Texture::SwizzleSource z_source, | ||
| 188 | Tegra::Texture::SwizzleSource w_source) { | ||
| 189 | return (static_cast<u32>(x_source) << 24) | (static_cast<u32>(y_source) << 16) | | ||
| 190 | (static_cast<u32>(z_source) << 8) | static_cast<u32>(w_source); | ||
| 191 | } | ||
| 192 | |||
| 193 | // Store a copy of these values to avoid double dereference when reading them | ||
| 194 | const SurfaceParams params; | ||
| 195 | const vk::Image image; | ||
| 196 | const vk::BufferView buffer_view; | ||
| 197 | const vk::ImageAspectFlags aspect_mask; | ||
| 198 | |||
| 199 | const VKDevice& device; | ||
| 200 | CachedSurface& surface; | ||
| 201 | const u32 base_layer; | ||
| 202 | const u32 num_layers; | ||
| 203 | const u32 base_level; | ||
| 204 | const u32 num_levels; | ||
| 205 | const vk::ImageViewType image_view_type; | ||
| 206 | |||
| 207 | vk::ImageView last_image_view; | ||
| 208 | u32 last_swizzle{}; | ||
| 209 | |||
| 210 | std::unordered_map<u32, UniqueImageView> view_cache; | ||
| 211 | }; | ||
| 212 | |||
| 213 | class VKTextureCache final : public TextureCacheBase { | ||
| 214 | public: | ||
| 215 | explicit VKTextureCache(Core::System& system, VideoCore::RasterizerInterface& rasterizer, | ||
| 216 | const VKDevice& device, VKResourceManager& resource_manager, | ||
| 217 | VKMemoryManager& memory_manager, VKScheduler& scheduler, | ||
| 218 | VKStagingBufferPool& staging_pool); | ||
| 219 | ~VKTextureCache(); | ||
| 220 | |||
| 221 | private: | ||
| 222 | Surface CreateSurface(GPUVAddr gpu_addr, const SurfaceParams& params) override; | ||
| 223 | |||
| 224 | void ImageCopy(Surface& src_surface, Surface& dst_surface, | ||
| 225 | const VideoCommon::CopyParams& copy_params) override; | ||
| 226 | |||
| 227 | void ImageBlit(View& src_view, View& dst_view, | ||
| 228 | const Tegra::Engines::Fermi2D::Config& copy_config) override; | ||
| 229 | |||
| 230 | void BufferCopy(Surface& src_surface, Surface& dst_surface) override; | ||
| 231 | |||
| 232 | const VKDevice& device; | ||
| 233 | VKResourceManager& resource_manager; | ||
| 234 | VKMemoryManager& memory_manager; | ||
| 235 | VKScheduler& scheduler; | ||
| 236 | VKStagingBufferPool& staging_pool; | ||
| 237 | }; | ||
| 238 | |||
| 239 | } // namespace Vulkan | ||
diff --git a/src/video_core/renderer_vulkan/vk_update_descriptor.cpp b/src/video_core/renderer_vulkan/vk_update_descriptor.cpp new file mode 100644 index 000000000..0e577b9ff --- /dev/null +++ b/src/video_core/renderer_vulkan/vk_update_descriptor.cpp | |||
| @@ -0,0 +1,57 @@ | |||
| 1 | // Copyright 2019 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include <variant> | ||
| 6 | #include <boost/container/static_vector.hpp> | ||
| 7 | |||
| 8 | #include "common/assert.h" | ||
| 9 | #include "common/logging/log.h" | ||
| 10 | #include "video_core/renderer_vulkan/declarations.h" | ||
| 11 | #include "video_core/renderer_vulkan/vk_device.h" | ||
| 12 | #include "video_core/renderer_vulkan/vk_scheduler.h" | ||
| 13 | #include "video_core/renderer_vulkan/vk_update_descriptor.h" | ||
| 14 | |||
| 15 | namespace Vulkan { | ||
| 16 | |||
| 17 | VKUpdateDescriptorQueue::VKUpdateDescriptorQueue(const VKDevice& device, VKScheduler& scheduler) | ||
| 18 | : device{device}, scheduler{scheduler} {} | ||
| 19 | |||
| 20 | VKUpdateDescriptorQueue::~VKUpdateDescriptorQueue() = default; | ||
| 21 | |||
| 22 | void VKUpdateDescriptorQueue::TickFrame() { | ||
| 23 | payload.clear(); | ||
| 24 | } | ||
| 25 | |||
| 26 | void VKUpdateDescriptorQueue::Acquire() { | ||
| 27 | entries.clear(); | ||
| 28 | } | ||
| 29 | |||
| 30 | void VKUpdateDescriptorQueue::Send(vk::DescriptorUpdateTemplate update_template, | ||
| 31 | vk::DescriptorSet set) { | ||
| 32 | if (payload.size() + entries.size() >= payload.max_size()) { | ||
| 33 | LOG_WARNING(Render_Vulkan, "Payload overflow, waiting for worker thread"); | ||
| 34 | scheduler.WaitWorker(); | ||
| 35 | payload.clear(); | ||
| 36 | } | ||
| 37 | |||
| 38 | const auto payload_start = payload.data() + payload.size(); | ||
| 39 | for (const auto& entry : entries) { | ||
| 40 | if (const auto image = std::get_if<vk::DescriptorImageInfo>(&entry)) { | ||
| 41 | payload.push_back(*image); | ||
| 42 | } else if (const auto buffer = std::get_if<Buffer>(&entry)) { | ||
| 43 | payload.emplace_back(*buffer->buffer, buffer->offset, buffer->size); | ||
| 44 | } else if (const auto texel = std::get_if<vk::BufferView>(&entry)) { | ||
| 45 | payload.push_back(*texel); | ||
| 46 | } else { | ||
| 47 | UNREACHABLE(); | ||
| 48 | } | ||
| 49 | } | ||
| 50 | |||
| 51 | scheduler.Record([dev = device.GetLogical(), payload_start, set, | ||
| 52 | update_template]([[maybe_unused]] auto cmdbuf, auto& dld) { | ||
| 53 | dev.updateDescriptorSetWithTemplate(set, update_template, payload_start, dld); | ||
| 54 | }); | ||
| 55 | } | ||
| 56 | |||
| 57 | } // namespace Vulkan | ||
diff --git a/src/video_core/renderer_vulkan/vk_update_descriptor.h b/src/video_core/renderer_vulkan/vk_update_descriptor.h new file mode 100644 index 000000000..8c825aa29 --- /dev/null +++ b/src/video_core/renderer_vulkan/vk_update_descriptor.h | |||
| @@ -0,0 +1,86 @@ | |||
| 1 | // Copyright 2019 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include <type_traits> | ||
| 8 | #include <variant> | ||
| 9 | #include <boost/container/static_vector.hpp> | ||
| 10 | |||
| 11 | #include "common/common_types.h" | ||
| 12 | #include "video_core/renderer_vulkan/declarations.h" | ||
| 13 | |||
| 14 | namespace Vulkan { | ||
| 15 | |||
| 16 | class VKDevice; | ||
| 17 | class VKScheduler; | ||
| 18 | |||
| 19 | class DescriptorUpdateEntry { | ||
| 20 | public: | ||
| 21 | explicit DescriptorUpdateEntry() : image{} {} | ||
| 22 | |||
| 23 | DescriptorUpdateEntry(vk::DescriptorImageInfo image) : image{image} {} | ||
| 24 | |||
| 25 | DescriptorUpdateEntry(vk::Buffer buffer, vk::DeviceSize offset, vk::DeviceSize size) | ||
| 26 | : buffer{buffer, offset, size} {} | ||
| 27 | |||
| 28 | DescriptorUpdateEntry(vk::BufferView texel_buffer) : texel_buffer{texel_buffer} {} | ||
| 29 | |||
| 30 | private: | ||
| 31 | union { | ||
| 32 | vk::DescriptorImageInfo image; | ||
| 33 | vk::DescriptorBufferInfo buffer; | ||
| 34 | vk::BufferView texel_buffer; | ||
| 35 | }; | ||
| 36 | }; | ||
| 37 | |||
| 38 | class VKUpdateDescriptorQueue final { | ||
| 39 | public: | ||
| 40 | explicit VKUpdateDescriptorQueue(const VKDevice& device, VKScheduler& scheduler); | ||
| 41 | ~VKUpdateDescriptorQueue(); | ||
| 42 | |||
| 43 | void TickFrame(); | ||
| 44 | |||
| 45 | void Acquire(); | ||
| 46 | |||
| 47 | void Send(vk::DescriptorUpdateTemplate update_template, vk::DescriptorSet set); | ||
| 48 | |||
| 49 | void AddSampledImage(vk::Sampler sampler, vk::ImageView image_view) { | ||
| 50 | entries.emplace_back(vk::DescriptorImageInfo{sampler, image_view, {}}); | ||
| 51 | } | ||
| 52 | |||
| 53 | void AddImage(vk::ImageView image_view) { | ||
| 54 | entries.emplace_back(vk::DescriptorImageInfo{{}, image_view, {}}); | ||
| 55 | } | ||
| 56 | |||
| 57 | void AddBuffer(const vk::Buffer* buffer, u64 offset, std::size_t size) { | ||
| 58 | entries.push_back(Buffer{buffer, offset, size}); | ||
| 59 | } | ||
| 60 | |||
| 61 | void AddTexelBuffer(vk::BufferView texel_buffer) { | ||
| 62 | entries.emplace_back(texel_buffer); | ||
| 63 | } | ||
| 64 | |||
| 65 | vk::ImageLayout* GetLastImageLayout() { | ||
| 66 | return &std::get<vk::DescriptorImageInfo>(entries.back()).imageLayout; | ||
| 67 | } | ||
| 68 | |||
| 69 | private: | ||
| 70 | struct Buffer { | ||
| 71 | const vk::Buffer* buffer{}; | ||
| 72 | u64 offset{}; | ||
| 73 | std::size_t size{}; | ||
| 74 | }; | ||
| 75 | using Variant = std::variant<vk::DescriptorImageInfo, Buffer, vk::BufferView>; | ||
| 76 | // Old gcc versions don't consider this trivially copyable. | ||
| 77 | // static_assert(std::is_trivially_copyable_v<Variant>); | ||
| 78 | |||
| 79 | const VKDevice& device; | ||
| 80 | VKScheduler& scheduler; | ||
| 81 | |||
| 82 | boost::container::static_vector<Variant, 0x400> entries; | ||
| 83 | boost::container::static_vector<DescriptorUpdateEntry, 0x10000> payload; | ||
| 84 | }; | ||
| 85 | |||
| 86 | } // namespace Vulkan | ||
diff --git a/src/video_core/shader/control_flow.cpp b/src/video_core/shader/control_flow.cpp index b427ac873..0229733b6 100644 --- a/src/video_core/shader/control_flow.cpp +++ b/src/video_core/shader/control_flow.cpp | |||
| @@ -65,7 +65,7 @@ struct BlockInfo { | |||
| 65 | 65 | ||
| 66 | struct CFGRebuildState { | 66 | struct CFGRebuildState { |
| 67 | explicit CFGRebuildState(const ProgramCode& program_code, u32 start, ConstBufferLocker& locker) | 67 | explicit CFGRebuildState(const ProgramCode& program_code, u32 start, ConstBufferLocker& locker) |
| 68 | : program_code{program_code}, start{start}, locker{locker} {} | 68 | : program_code{program_code}, locker{locker}, start{start} {} |
| 69 | 69 | ||
| 70 | const ProgramCode& program_code; | 70 | const ProgramCode& program_code; |
| 71 | ConstBufferLocker& locker; | 71 | ConstBufferLocker& locker; |
diff --git a/src/video_core/shader/decode/memory.cpp b/src/video_core/shader/decode/memory.cpp index c934d0719..7591a715f 100644 --- a/src/video_core/shader/decode/memory.cpp +++ b/src/video_core/shader/decode/memory.cpp | |||
| @@ -6,6 +6,7 @@ | |||
| 6 | #include <vector> | 6 | #include <vector> |
| 7 | #include <fmt/format.h> | 7 | #include <fmt/format.h> |
| 8 | 8 | ||
| 9 | #include "common/alignment.h" | ||
| 9 | #include "common/assert.h" | 10 | #include "common/assert.h" |
| 10 | #include "common/common_types.h" | 11 | #include "common/common_types.h" |
| 11 | #include "common/logging/log.h" | 12 | #include "common/logging/log.h" |
| @@ -15,6 +16,8 @@ | |||
| 15 | 16 | ||
| 16 | namespace VideoCommon::Shader { | 17 | namespace VideoCommon::Shader { |
| 17 | 18 | ||
| 19 | using Tegra::Shader::AtomicOp; | ||
| 20 | using Tegra::Shader::AtomicType; | ||
| 18 | using Tegra::Shader::Attribute; | 21 | using Tegra::Shader::Attribute; |
| 19 | using Tegra::Shader::Instruction; | 22 | using Tegra::Shader::Instruction; |
| 20 | using Tegra::Shader::OpCode; | 23 | using Tegra::Shader::OpCode; |
| @@ -22,34 +25,39 @@ using Tegra::Shader::Register; | |||
| 22 | 25 | ||
| 23 | namespace { | 26 | namespace { |
| 24 | 27 | ||
| 25 | u32 GetLdgMemorySize(Tegra::Shader::UniformType uniform_type) { | 28 | bool IsUnaligned(Tegra::Shader::UniformType uniform_type) { |
| 29 | return uniform_type == Tegra::Shader::UniformType::UnsignedByte || | ||
| 30 | uniform_type == Tegra::Shader::UniformType::UnsignedShort; | ||
| 31 | } | ||
| 32 | |||
| 33 | u32 GetUnalignedMask(Tegra::Shader::UniformType uniform_type) { | ||
| 26 | switch (uniform_type) { | 34 | switch (uniform_type) { |
| 27 | case Tegra::Shader::UniformType::UnsignedByte: | 35 | case Tegra::Shader::UniformType::UnsignedByte: |
| 28 | case Tegra::Shader::UniformType::Single: | 36 | return 0b11; |
| 29 | return 1; | 37 | case Tegra::Shader::UniformType::UnsignedShort: |
| 30 | case Tegra::Shader::UniformType::Double: | 38 | return 0b10; |
| 31 | return 2; | ||
| 32 | case Tegra::Shader::UniformType::Quad: | ||
| 33 | case Tegra::Shader::UniformType::UnsignedQuad: | ||
| 34 | return 4; | ||
| 35 | default: | 39 | default: |
| 36 | UNIMPLEMENTED_MSG("Unimplemented size={}!", static_cast<u32>(uniform_type)); | 40 | UNREACHABLE(); |
| 37 | return 1; | 41 | return 0; |
| 38 | } | 42 | } |
| 39 | } | 43 | } |
| 40 | 44 | ||
| 41 | u32 GetStgMemorySize(Tegra::Shader::UniformType uniform_type) { | 45 | u32 GetMemorySize(Tegra::Shader::UniformType uniform_type) { |
| 42 | switch (uniform_type) { | 46 | switch (uniform_type) { |
| 47 | case Tegra::Shader::UniformType::UnsignedByte: | ||
| 48 | return 8; | ||
| 49 | case Tegra::Shader::UniformType::UnsignedShort: | ||
| 50 | return 16; | ||
| 43 | case Tegra::Shader::UniformType::Single: | 51 | case Tegra::Shader::UniformType::Single: |
| 44 | return 1; | 52 | return 32; |
| 45 | case Tegra::Shader::UniformType::Double: | 53 | case Tegra::Shader::UniformType::Double: |
| 46 | return 2; | 54 | return 64; |
| 47 | case Tegra::Shader::UniformType::Quad: | 55 | case Tegra::Shader::UniformType::Quad: |
| 48 | case Tegra::Shader::UniformType::UnsignedQuad: | 56 | case Tegra::Shader::UniformType::UnsignedQuad: |
| 49 | return 4; | 57 | return 128; |
| 50 | default: | 58 | default: |
| 51 | UNIMPLEMENTED_MSG("Unimplemented size={}!", static_cast<u32>(uniform_type)); | 59 | UNIMPLEMENTED_MSG("Unimplemented size={}!", static_cast<u32>(uniform_type)); |
| 52 | return 1; | 60 | return 32; |
| 53 | } | 61 | } |
| 54 | } | 62 | } |
| 55 | 63 | ||
| @@ -184,9 +192,10 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) { | |||
| 184 | }(); | 192 | }(); |
| 185 | 193 | ||
| 186 | const auto [real_address_base, base_address, descriptor] = | 194 | const auto [real_address_base, base_address, descriptor] = |
| 187 | TrackGlobalMemory(bb, instr, false); | 195 | TrackGlobalMemory(bb, instr, true, false); |
| 188 | 196 | ||
| 189 | const u32 count = GetLdgMemorySize(type); | 197 | const u32 size = GetMemorySize(type); |
| 198 | const u32 count = Common::AlignUp(size, 32) / 32; | ||
| 190 | if (!real_address_base || !base_address) { | 199 | if (!real_address_base || !base_address) { |
| 191 | // Tracking failed, load zeroes. | 200 | // Tracking failed, load zeroes. |
| 192 | for (u32 i = 0; i < count; ++i) { | 201 | for (u32 i = 0; i < count; ++i) { |
| @@ -200,14 +209,15 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) { | |||
| 200 | const Node real_address = Operation(OperationCode::UAdd, real_address_base, it_offset); | 209 | const Node real_address = Operation(OperationCode::UAdd, real_address_base, it_offset); |
| 201 | Node gmem = MakeNode<GmemNode>(real_address, base_address, descriptor); | 210 | Node gmem = MakeNode<GmemNode>(real_address, base_address, descriptor); |
| 202 | 211 | ||
| 203 | if (type == Tegra::Shader::UniformType::UnsignedByte) { | 212 | // To handle unaligned loads get the bytes used to dereference global memory and extract |
| 204 | // To handle unaligned loads get the byte used to dereferenced global memory | 213 | // those bytes from the loaded u32. |
| 205 | // and extract that byte from the loaded uint32. | 214 | if (IsUnaligned(type)) { |
| 206 | Node byte = Operation(OperationCode::UBitwiseAnd, real_address, Immediate(3)); | 215 | Node mask = Immediate(GetUnalignedMask(type)); |
| 207 | byte = Operation(OperationCode::ULogicalShiftLeft, std::move(byte), Immediate(3)); | 216 | Node offset = Operation(OperationCode::UBitwiseAnd, real_address, std::move(mask)); |
| 217 | offset = Operation(OperationCode::ULogicalShiftLeft, offset, Immediate(3)); | ||
| 208 | 218 | ||
| 209 | gmem = Operation(OperationCode::UBitfieldExtract, std::move(gmem), std::move(byte), | 219 | gmem = Operation(OperationCode::UBitfieldExtract, std::move(gmem), |
| 210 | Immediate(8)); | 220 | std::move(offset), Immediate(size)); |
| 211 | } | 221 | } |
| 212 | 222 | ||
| 213 | SetTemporary(bb, i, gmem); | 223 | SetTemporary(bb, i, gmem); |
| @@ -295,23 +305,53 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) { | |||
| 295 | } | 305 | } |
| 296 | }(); | 306 | }(); |
| 297 | 307 | ||
| 308 | // For unaligned reads we have to read memory too. | ||
| 309 | const bool is_read = IsUnaligned(type); | ||
| 298 | const auto [real_address_base, base_address, descriptor] = | 310 | const auto [real_address_base, base_address, descriptor] = |
| 299 | TrackGlobalMemory(bb, instr, true); | 311 | TrackGlobalMemory(bb, instr, is_read, true); |
| 300 | if (!real_address_base || !base_address) { | 312 | if (!real_address_base || !base_address) { |
| 301 | // Tracking failed, skip the store. | 313 | // Tracking failed, skip the store. |
| 302 | break; | 314 | break; |
| 303 | } | 315 | } |
| 304 | 316 | ||
| 305 | const u32 count = GetStgMemorySize(type); | 317 | const u32 size = GetMemorySize(type); |
| 318 | const u32 count = Common::AlignUp(size, 32) / 32; | ||
| 306 | for (u32 i = 0; i < count; ++i) { | 319 | for (u32 i = 0; i < count; ++i) { |
| 307 | const Node it_offset = Immediate(i * 4); | 320 | const Node it_offset = Immediate(i * 4); |
| 308 | const Node real_address = Operation(OperationCode::UAdd, real_address_base, it_offset); | 321 | const Node real_address = Operation(OperationCode::UAdd, real_address_base, it_offset); |
| 309 | const Node gmem = MakeNode<GmemNode>(real_address, base_address, descriptor); | 322 | const Node gmem = MakeNode<GmemNode>(real_address, base_address, descriptor); |
| 310 | const Node value = GetRegister(instr.gpr0.Value() + i); | 323 | Node value = GetRegister(instr.gpr0.Value() + i); |
| 324 | |||
| 325 | if (IsUnaligned(type)) { | ||
| 326 | Node mask = Immediate(GetUnalignedMask(type)); | ||
| 327 | Node offset = Operation(OperationCode::UBitwiseAnd, real_address, std::move(mask)); | ||
| 328 | offset = Operation(OperationCode::ULogicalShiftLeft, offset, Immediate(3)); | ||
| 329 | |||
| 330 | value = Operation(OperationCode::UBitfieldInsert, gmem, std::move(value), offset, | ||
| 331 | Immediate(size)); | ||
| 332 | } | ||
| 333 | |||
| 311 | bb.push_back(Operation(OperationCode::Assign, gmem, value)); | 334 | bb.push_back(Operation(OperationCode::Assign, gmem, value)); |
| 312 | } | 335 | } |
| 313 | break; | 336 | break; |
| 314 | } | 337 | } |
| 338 | case OpCode::Id::ATOMS: { | ||
| 339 | UNIMPLEMENTED_IF_MSG(instr.atoms.operation != AtomicOp::Add, "operation={}", | ||
| 340 | static_cast<int>(instr.atoms.operation.Value())); | ||
| 341 | UNIMPLEMENTED_IF_MSG(instr.atoms.type != AtomicType::U32, "type={}", | ||
| 342 | static_cast<int>(instr.atoms.type.Value())); | ||
| 343 | |||
| 344 | const s32 offset = instr.atoms.GetImmediateOffset(); | ||
| 345 | Node address = GetRegister(instr.gpr8); | ||
| 346 | address = Operation(OperationCode::IAdd, std::move(address), Immediate(offset)); | ||
| 347 | |||
| 348 | Node memory = GetSharedMemory(std::move(address)); | ||
| 349 | Node data = GetRegister(instr.gpr20); | ||
| 350 | |||
| 351 | Node value = Operation(OperationCode::UAtomicAdd, std::move(memory), std::move(data)); | ||
| 352 | SetRegister(bb, instr.gpr0, std::move(value)); | ||
| 353 | break; | ||
| 354 | } | ||
| 315 | case OpCode::Id::AL2P: { | 355 | case OpCode::Id::AL2P: { |
| 316 | // Ignore al2p.direction since we don't care about it. | 356 | // Ignore al2p.direction since we don't care about it. |
| 317 | 357 | ||
| @@ -336,7 +376,7 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) { | |||
| 336 | 376 | ||
| 337 | std::tuple<Node, Node, GlobalMemoryBase> ShaderIR::TrackGlobalMemory(NodeBlock& bb, | 377 | std::tuple<Node, Node, GlobalMemoryBase> ShaderIR::TrackGlobalMemory(NodeBlock& bb, |
| 338 | Instruction instr, | 378 | Instruction instr, |
| 339 | bool is_write) { | 379 | bool is_read, bool is_write) { |
| 340 | const auto addr_register{GetRegister(instr.gmem.gpr)}; | 380 | const auto addr_register{GetRegister(instr.gmem.gpr)}; |
| 341 | const auto immediate_offset{static_cast<u32>(instr.gmem.offset)}; | 381 | const auto immediate_offset{static_cast<u32>(instr.gmem.offset)}; |
| 342 | 382 | ||
| @@ -351,11 +391,8 @@ std::tuple<Node, Node, GlobalMemoryBase> ShaderIR::TrackGlobalMemory(NodeBlock& | |||
| 351 | const GlobalMemoryBase descriptor{index, offset}; | 391 | const GlobalMemoryBase descriptor{index, offset}; |
| 352 | const auto& [entry, is_new] = used_global_memory.try_emplace(descriptor); | 392 | const auto& [entry, is_new] = used_global_memory.try_emplace(descriptor); |
| 353 | auto& usage = entry->second; | 393 | auto& usage = entry->second; |
| 354 | if (is_write) { | 394 | usage.is_written |= is_write; |
| 355 | usage.is_written = true; | 395 | usage.is_read |= is_read; |
| 356 | } else { | ||
| 357 | usage.is_read = true; | ||
| 358 | } | ||
| 359 | 396 | ||
| 360 | const auto real_address = | 397 | const auto real_address = |
| 361 | Operation(OperationCode::UAdd, NO_PRECISE, Immediate(immediate_offset), addr_register); | 398 | Operation(OperationCode::UAdd, NO_PRECISE, Immediate(immediate_offset), addr_register); |
diff --git a/src/video_core/shader/decode/texture.cpp b/src/video_core/shader/decode/texture.cpp index 4b14cdf58..cd984f763 100644 --- a/src/video_core/shader/decode/texture.cpp +++ b/src/video_core/shader/decode/texture.cpp | |||
| @@ -794,14 +794,10 @@ std::tuple<std::size_t, std::size_t> ShaderIR::ValidateAndGetCoordinateElement( | |||
| 794 | 794 | ||
| 795 | std::vector<Node> ShaderIR::GetAoffiCoordinates(Node aoffi_reg, std::size_t coord_count, | 795 | std::vector<Node> ShaderIR::GetAoffiCoordinates(Node aoffi_reg, std::size_t coord_count, |
| 796 | bool is_tld4) { | 796 | bool is_tld4) { |
| 797 | const auto [coord_offsets, size, wrap_value, | 797 | const std::array coord_offsets = is_tld4 ? std::array{0U, 8U, 16U} : std::array{0U, 4U, 8U}; |
| 798 | diff_value] = [is_tld4]() -> std::tuple<std::array<u32, 3>, u32, s32, s32> { | 798 | const u32 size = is_tld4 ? 6 : 4; |
| 799 | if (is_tld4) { | 799 | const s32 wrap_value = is_tld4 ? 32 : 8; |
| 800 | return {{0, 8, 16}, 6, 32, 64}; | 800 | const s32 diff_value = is_tld4 ? 64 : 16; |
| 801 | } else { | ||
| 802 | return {{0, 4, 8}, 4, 8, 16}; | ||
| 803 | } | ||
| 804 | }(); | ||
| 805 | const u32 mask = (1U << size) - 1; | 801 | const u32 mask = (1U << size) - 1; |
| 806 | 802 | ||
| 807 | std::vector<Node> aoffi; | 803 | std::vector<Node> aoffi; |
| @@ -814,7 +810,7 @@ std::vector<Node> ShaderIR::GetAoffiCoordinates(Node aoffi_reg, std::size_t coor | |||
| 814 | LOG_WARNING(HW_GPU, | 810 | LOG_WARNING(HW_GPU, |
| 815 | "AOFFI constant folding failed, some hardware might have graphical issues"); | 811 | "AOFFI constant folding failed, some hardware might have graphical issues"); |
| 816 | for (std::size_t coord = 0; coord < coord_count; ++coord) { | 812 | for (std::size_t coord = 0; coord < coord_count; ++coord) { |
| 817 | const Node value = BitfieldExtract(aoffi_reg, coord_offsets.at(coord), size); | 813 | const Node value = BitfieldExtract(aoffi_reg, coord_offsets[coord], size); |
| 818 | const Node condition = | 814 | const Node condition = |
| 819 | Operation(OperationCode::LogicalIGreaterEqual, value, Immediate(wrap_value)); | 815 | Operation(OperationCode::LogicalIGreaterEqual, value, Immediate(wrap_value)); |
| 820 | const Node negative = Operation(OperationCode::IAdd, value, Immediate(-diff_value)); | 816 | const Node negative = Operation(OperationCode::IAdd, value, Immediate(-diff_value)); |
| @@ -824,7 +820,7 @@ std::vector<Node> ShaderIR::GetAoffiCoordinates(Node aoffi_reg, std::size_t coor | |||
| 824 | } | 820 | } |
| 825 | 821 | ||
| 826 | for (std::size_t coord = 0; coord < coord_count; ++coord) { | 822 | for (std::size_t coord = 0; coord < coord_count; ++coord) { |
| 827 | s32 value = (*aoffi_immediate >> coord_offsets.at(coord)) & mask; | 823 | s32 value = (*aoffi_immediate >> coord_offsets[coord]) & mask; |
| 828 | if (value >= wrap_value) { | 824 | if (value >= wrap_value) { |
| 829 | value -= diff_value; | 825 | value -= diff_value; |
| 830 | } | 826 | } |
diff --git a/src/video_core/shader/node.h b/src/video_core/shader/node.h index 4d2f4d6a8..075c7d07c 100644 --- a/src/video_core/shader/node.h +++ b/src/video_core/shader/node.h | |||
| @@ -162,6 +162,8 @@ enum class OperationCode { | |||
| 162 | AtomicImageXor, /// (MetaImage, int[N] coords) -> void | 162 | AtomicImageXor, /// (MetaImage, int[N] coords) -> void |
| 163 | AtomicImageExchange, /// (MetaImage, int[N] coords) -> void | 163 | AtomicImageExchange, /// (MetaImage, int[N] coords) -> void |
| 164 | 164 | ||
| 165 | UAtomicAdd, /// (smem, uint) -> uint | ||
| 166 | |||
| 165 | Branch, /// (uint branch_target) -> void | 167 | Branch, /// (uint branch_target) -> void |
| 166 | BranchIndirect, /// (uint branch_target) -> void | 168 | BranchIndirect, /// (uint branch_target) -> void |
| 167 | PushFlowStack, /// (uint branch_target) -> void | 169 | PushFlowStack, /// (uint branch_target) -> void |
| @@ -392,8 +394,30 @@ struct MetaImage { | |||
| 392 | using Meta = | 394 | using Meta = |
| 393 | std::variant<MetaArithmetic, MetaTexture, MetaImage, MetaStackClass, Tegra::Shader::HalfType>; | 395 | std::variant<MetaArithmetic, MetaTexture, MetaImage, MetaStackClass, Tegra::Shader::HalfType>; |
| 394 | 396 | ||
| 397 | class AmendNode { | ||
| 398 | public: | ||
| 399 | std::optional<std::size_t> GetAmendIndex() const { | ||
| 400 | if (amend_index == amend_null_index) { | ||
| 401 | return std::nullopt; | ||
| 402 | } | ||
| 403 | return {amend_index}; | ||
| 404 | } | ||
| 405 | |||
| 406 | void SetAmendIndex(std::size_t index) { | ||
| 407 | amend_index = index; | ||
| 408 | } | ||
| 409 | |||
| 410 | void ClearAmend() { | ||
| 411 | amend_index = amend_null_index; | ||
| 412 | } | ||
| 413 | |||
| 414 | private: | ||
| 415 | static constexpr std::size_t amend_null_index = 0xFFFFFFFFFFFFFFFFULL; | ||
| 416 | std::size_t amend_index{amend_null_index}; | ||
| 417 | }; | ||
| 418 | |||
| 395 | /// Holds any kind of operation that can be done in the IR | 419 | /// Holds any kind of operation that can be done in the IR |
| 396 | class OperationNode final { | 420 | class OperationNode final : public AmendNode { |
| 397 | public: | 421 | public: |
| 398 | explicit OperationNode(OperationCode code) : OperationNode(code, Meta{}) {} | 422 | explicit OperationNode(OperationCode code) : OperationNode(code, Meta{}) {} |
| 399 | 423 | ||
| @@ -433,7 +457,7 @@ private: | |||
| 433 | }; | 457 | }; |
| 434 | 458 | ||
| 435 | /// Encloses inside any kind of node that returns a boolean conditionally-executed code | 459 | /// Encloses inside any kind of node that returns a boolean conditionally-executed code |
| 436 | class ConditionalNode final { | 460 | class ConditionalNode final : public AmendNode { |
| 437 | public: | 461 | public: |
| 438 | explicit ConditionalNode(Node condition, std::vector<Node>&& code) | 462 | explicit ConditionalNode(Node condition, std::vector<Node>&& code) |
| 439 | : condition{std::move(condition)}, code{std::move(code)} {} | 463 | : condition{std::move(condition)}, code{std::move(code)} {} |
diff --git a/src/video_core/shader/shader_ir.cpp b/src/video_core/shader/shader_ir.cpp index 1d9825c76..31eecb3f4 100644 --- a/src/video_core/shader/shader_ir.cpp +++ b/src/video_core/shader/shader_ir.cpp | |||
| @@ -446,4 +446,10 @@ Node ShaderIR::BitfieldInsert(Node base, Node insert, u32 offset, u32 bits) { | |||
| 446 | Immediate(bits)); | 446 | Immediate(bits)); |
| 447 | } | 447 | } |
| 448 | 448 | ||
| 449 | std::size_t ShaderIR::DeclareAmend(Node new_amend) { | ||
| 450 | const std::size_t id = amend_code.size(); | ||
| 451 | amend_code.push_back(new_amend); | ||
| 452 | return id; | ||
| 453 | } | ||
| 454 | |||
| 449 | } // namespace VideoCommon::Shader | 455 | } // namespace VideoCommon::Shader |
diff --git a/src/video_core/shader/shader_ir.h b/src/video_core/shader/shader_ir.h index baed06ccd..ba1db4c11 100644 --- a/src/video_core/shader/shader_ir.h +++ b/src/video_core/shader/shader_ir.h | |||
| @@ -176,6 +176,10 @@ public: | |||
| 176 | /// Returns a condition code evaluated from internal flags | 176 | /// Returns a condition code evaluated from internal flags |
| 177 | Node GetConditionCode(Tegra::Shader::ConditionCode cc) const; | 177 | Node GetConditionCode(Tegra::Shader::ConditionCode cc) const; |
| 178 | 178 | ||
| 179 | const Node& GetAmendNode(std::size_t index) const { | ||
| 180 | return amend_code[index]; | ||
| 181 | } | ||
| 182 | |||
| 179 | private: | 183 | private: |
| 180 | friend class ASTDecoder; | 184 | friend class ASTDecoder; |
| 181 | 185 | ||
| @@ -390,7 +394,10 @@ private: | |||
| 390 | 394 | ||
| 391 | std::tuple<Node, Node, GlobalMemoryBase> TrackGlobalMemory(NodeBlock& bb, | 395 | std::tuple<Node, Node, GlobalMemoryBase> TrackGlobalMemory(NodeBlock& bb, |
| 392 | Tegra::Shader::Instruction instr, | 396 | Tegra::Shader::Instruction instr, |
| 393 | bool is_write); | 397 | bool is_read, bool is_write); |
| 398 | |||
| 399 | /// Register new amending code and obtain the reference id. | ||
| 400 | std::size_t DeclareAmend(Node new_amend); | ||
| 394 | 401 | ||
| 395 | const ProgramCode& program_code; | 402 | const ProgramCode& program_code; |
| 396 | const u32 main_offset; | 403 | const u32 main_offset; |
| @@ -406,6 +413,7 @@ private: | |||
| 406 | std::map<u32, NodeBlock> basic_blocks; | 413 | std::map<u32, NodeBlock> basic_blocks; |
| 407 | NodeBlock global_code; | 414 | NodeBlock global_code; |
| 408 | ASTManager program_manager{true, true}; | 415 | ASTManager program_manager{true, true}; |
| 416 | std::vector<Node> amend_code; | ||
| 409 | 417 | ||
| 410 | std::set<u32> used_registers; | 418 | std::set<u32> used_registers; |
| 411 | std::set<Tegra::Shader::Pred> used_predicates; | 419 | std::set<Tegra::Shader::Pred> used_predicates; |
diff --git a/src/video_core/texture_cache/format_lookup_table.cpp b/src/video_core/texture_cache/format_lookup_table.cpp index 271e67533..81fb9f633 100644 --- a/src/video_core/texture_cache/format_lookup_table.cpp +++ b/src/video_core/texture_cache/format_lookup_table.cpp | |||
| @@ -95,7 +95,7 @@ constexpr std::array<Table, 74> DefinitionTable = {{ | |||
| 95 | {TextureFormat::ZF32, C, FLOAT, FLOAT, FLOAT, FLOAT, PixelFormat::Z32F}, | 95 | {TextureFormat::ZF32, C, FLOAT, FLOAT, FLOAT, FLOAT, PixelFormat::Z32F}, |
| 96 | {TextureFormat::Z16, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::Z16}, | 96 | {TextureFormat::Z16, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::Z16}, |
| 97 | {TextureFormat::S8Z24, C, UINT, UNORM, UNORM, UNORM, PixelFormat::S8Z24}, | 97 | {TextureFormat::S8Z24, C, UINT, UNORM, UNORM, UNORM, PixelFormat::S8Z24}, |
| 98 | {TextureFormat::ZF32_X24S8, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::Z32FS8}, | 98 | {TextureFormat::ZF32_X24S8, C, FLOAT, UINT, UNORM, UNORM, PixelFormat::Z32FS8}, |
| 99 | 99 | ||
| 100 | {TextureFormat::DXT1, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::DXT1}, | 100 | {TextureFormat::DXT1, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::DXT1}, |
| 101 | {TextureFormat::DXT1, S, UNORM, UNORM, UNORM, UNORM, PixelFormat::DXT1_SRGB}, | 101 | {TextureFormat::DXT1, S, UNORM, UNORM, UNORM, UNORM, PixelFormat::DXT1_SRGB}, |
diff --git a/src/video_core/texture_cache/surface_params.h b/src/video_core/texture_cache/surface_params.h index 992b5c022..9256fd6d9 100644 --- a/src/video_core/texture_cache/surface_params.h +++ b/src/video_core/texture_cache/surface_params.h | |||
| @@ -209,6 +209,11 @@ public: | |||
| 209 | return target == VideoCore::Surface::SurfaceTarget::TextureBuffer; | 209 | return target == VideoCore::Surface::SurfaceTarget::TextureBuffer; |
| 210 | } | 210 | } |
| 211 | 211 | ||
| 212 | /// Returns the number of layers in the surface. | ||
| 213 | std::size_t GetNumLayers() const { | ||
| 214 | return is_layered ? depth : 1; | ||
| 215 | } | ||
| 216 | |||
| 212 | /// Returns the debug name of the texture for use in graphic debuggers. | 217 | /// Returns the debug name of the texture for use in graphic debuggers. |
| 213 | std::string TargetName() const; | 218 | std::string TargetName() const; |
| 214 | 219 | ||
| @@ -287,10 +292,6 @@ private: | |||
| 287 | /// Returns the size of a layer | 292 | /// Returns the size of a layer |
| 288 | std::size_t GetLayerSize(bool as_host_size, bool uncompressed) const; | 293 | std::size_t GetLayerSize(bool as_host_size, bool uncompressed) const; |
| 289 | 294 | ||
| 290 | std::size_t GetNumLayers() const { | ||
| 291 | return is_layered ? depth : 1; | ||
| 292 | } | ||
| 293 | |||
| 294 | /// Returns true if these parameters are from a layered surface. | 295 | /// Returns true if these parameters are from a layered surface. |
| 295 | bool IsLayered() const; | 296 | bool IsLayered() const; |
| 296 | }; | 297 | }; |
diff --git a/src/yuzu/bootmanager.cpp b/src/yuzu/bootmanager.cpp index 07a720494..7490fb718 100644 --- a/src/yuzu/bootmanager.cpp +++ b/src/yuzu/bootmanager.cpp | |||
| @@ -215,18 +215,11 @@ void GRenderWindow::moveContext() { | |||
| 215 | } | 215 | } |
| 216 | 216 | ||
| 217 | void GRenderWindow::SwapBuffers() { | 217 | void GRenderWindow::SwapBuffers() { |
| 218 | // In our multi-threaded QWidget use case we shouldn't need to call `makeCurrent`, | ||
| 219 | // since we never call `doneCurrent` in this thread. | ||
| 220 | // However: | ||
| 221 | // - The Qt debug runtime prints a bogus warning on the console if `makeCurrent` wasn't called | ||
| 222 | // since the last time `swapBuffers` was executed; | ||
| 223 | // - On macOS, if `makeCurrent` isn't called explicitly, resizing the buffer breaks. | ||
| 224 | context->makeCurrent(child); | ||
| 225 | |||
| 226 | context->swapBuffers(child); | 218 | context->swapBuffers(child); |
| 219 | |||
| 227 | if (!first_frame) { | 220 | if (!first_frame) { |
| 228 | emit FirstFrameDisplayed(); | ||
| 229 | first_frame = true; | 221 | first_frame = true; |
| 222 | emit FirstFrameDisplayed(); | ||
| 230 | } | 223 | } |
| 231 | } | 224 | } |
| 232 | 225 | ||
diff --git a/src/yuzu/configuration/configure_gamelist.cpp b/src/yuzu/configuration/configure_gamelist.cpp index daedbc33e..e43e84d39 100644 --- a/src/yuzu/configuration/configure_gamelist.cpp +++ b/src/yuzu/configuration/configure_gamelist.cpp | |||
| @@ -21,10 +21,8 @@ constexpr std::array default_icon_sizes{ | |||
| 21 | }; | 21 | }; |
| 22 | 22 | ||
| 23 | constexpr std::array row_text_names{ | 23 | constexpr std::array row_text_names{ |
| 24 | QT_TR_NOOP("Filename"), | 24 | QT_TR_NOOP("Filename"), QT_TR_NOOP("Filetype"), QT_TR_NOOP("Title ID"), |
| 25 | QT_TR_NOOP("Filetype"), | 25 | QT_TR_NOOP("Title Name"), QT_TR_NOOP("None"), |
| 26 | QT_TR_NOOP("Title ID"), | ||
| 27 | QT_TR_NOOP("Title Name"), | ||
| 28 | }; | 26 | }; |
| 29 | } // Anonymous namespace | 27 | } // Anonymous namespace |
| 30 | 28 | ||
| @@ -46,6 +44,12 @@ ConfigureGameList::ConfigureGameList(QWidget* parent) | |||
| 46 | &ConfigureGameList::RequestGameListUpdate); | 44 | &ConfigureGameList::RequestGameListUpdate); |
| 47 | connect(ui->row_2_text_combobox, QOverload<int>::of(&QComboBox::currentIndexChanged), this, | 45 | connect(ui->row_2_text_combobox, QOverload<int>::of(&QComboBox::currentIndexChanged), this, |
| 48 | &ConfigureGameList::RequestGameListUpdate); | 46 | &ConfigureGameList::RequestGameListUpdate); |
| 47 | |||
| 48 | // Update text ComboBoxes after user interaction. | ||
| 49 | connect(ui->row_1_text_combobox, QOverload<int>::of(&QComboBox::activated), | ||
| 50 | [=]() { ConfigureGameList::UpdateSecondRowComboBox(); }); | ||
| 51 | connect(ui->row_2_text_combobox, QOverload<int>::of(&QComboBox::activated), | ||
| 52 | [=]() { ConfigureGameList::UpdateFirstRowComboBox(); }); | ||
| 49 | } | 53 | } |
| 50 | 54 | ||
| 51 | ConfigureGameList::~ConfigureGameList() = default; | 55 | ConfigureGameList::~ConfigureGameList() = default; |
| @@ -68,10 +72,6 @@ void ConfigureGameList::SetConfiguration() { | |||
| 68 | ui->show_add_ons->setChecked(UISettings::values.show_add_ons); | 72 | ui->show_add_ons->setChecked(UISettings::values.show_add_ons); |
| 69 | ui->icon_size_combobox->setCurrentIndex( | 73 | ui->icon_size_combobox->setCurrentIndex( |
| 70 | ui->icon_size_combobox->findData(UISettings::values.icon_size)); | 74 | ui->icon_size_combobox->findData(UISettings::values.icon_size)); |
| 71 | ui->row_1_text_combobox->setCurrentIndex( | ||
| 72 | ui->row_1_text_combobox->findData(UISettings::values.row_1_text_id)); | ||
| 73 | ui->row_2_text_combobox->setCurrentIndex( | ||
| 74 | ui->row_2_text_combobox->findData(UISettings::values.row_2_text_id)); | ||
| 75 | } | 75 | } |
| 76 | 76 | ||
| 77 | void ConfigureGameList::changeEvent(QEvent* event) { | 77 | void ConfigureGameList::changeEvent(QEvent* event) { |
| @@ -104,10 +104,43 @@ void ConfigureGameList::InitializeIconSizeComboBox() { | |||
| 104 | } | 104 | } |
| 105 | 105 | ||
| 106 | void ConfigureGameList::InitializeRowComboBoxes() { | 106 | void ConfigureGameList::InitializeRowComboBoxes() { |
| 107 | for (std::size_t i = 0; i < row_text_names.size(); ++i) { | 107 | UpdateFirstRowComboBox(true); |
| 108 | const QString row_text_name = QString::fromUtf8(row_text_names[i]); | 108 | UpdateSecondRowComboBox(true); |
| 109 | } | ||
| 110 | |||
| 111 | void ConfigureGameList::UpdateFirstRowComboBox(bool init) { | ||
| 112 | const int currentIndex = | ||
| 113 | init ? UISettings::values.row_1_text_id | ||
| 114 | : ui->row_1_text_combobox->findData(ui->row_1_text_combobox->currentData()); | ||
| 109 | 115 | ||
| 116 | ui->row_1_text_combobox->clear(); | ||
| 117 | |||
| 118 | for (std::size_t i = 0; i < row_text_names.size(); i++) { | ||
| 119 | const QString row_text_name = QString::fromUtf8(row_text_names[i]); | ||
| 110 | ui->row_1_text_combobox->addItem(row_text_name, QVariant::fromValue(i)); | 120 | ui->row_1_text_combobox->addItem(row_text_name, QVariant::fromValue(i)); |
| 121 | } | ||
| 122 | |||
| 123 | ui->row_1_text_combobox->setCurrentIndex(ui->row_1_text_combobox->findData(currentIndex)); | ||
| 124 | |||
| 125 | ui->row_1_text_combobox->removeItem(4); // None | ||
| 126 | ui->row_1_text_combobox->removeItem( | ||
| 127 | ui->row_1_text_combobox->findData(ui->row_2_text_combobox->currentData())); | ||
| 128 | } | ||
| 129 | |||
| 130 | void ConfigureGameList::UpdateSecondRowComboBox(bool init) { | ||
| 131 | const int currentIndex = | ||
| 132 | init ? UISettings::values.row_2_text_id | ||
| 133 | : ui->row_2_text_combobox->findData(ui->row_2_text_combobox->currentData()); | ||
| 134 | |||
| 135 | ui->row_2_text_combobox->clear(); | ||
| 136 | |||
| 137 | for (std::size_t i = 0; i < row_text_names.size(); ++i) { | ||
| 138 | const QString row_text_name = QString::fromUtf8(row_text_names[i]); | ||
| 111 | ui->row_2_text_combobox->addItem(row_text_name, QVariant::fromValue(i)); | 139 | ui->row_2_text_combobox->addItem(row_text_name, QVariant::fromValue(i)); |
| 112 | } | 140 | } |
| 141 | |||
| 142 | ui->row_2_text_combobox->setCurrentIndex(ui->row_2_text_combobox->findData(currentIndex)); | ||
| 143 | |||
| 144 | ui->row_2_text_combobox->removeItem( | ||
| 145 | ui->row_2_text_combobox->findData(ui->row_1_text_combobox->currentData())); | ||
| 113 | } | 146 | } |
diff --git a/src/yuzu/configuration/configure_gamelist.h b/src/yuzu/configuration/configure_gamelist.h index e11822919..ecd3fa174 100644 --- a/src/yuzu/configuration/configure_gamelist.h +++ b/src/yuzu/configuration/configure_gamelist.h | |||
| @@ -31,5 +31,8 @@ private: | |||
| 31 | void InitializeIconSizeComboBox(); | 31 | void InitializeIconSizeComboBox(); |
| 32 | void InitializeRowComboBoxes(); | 32 | void InitializeRowComboBoxes(); |
| 33 | 33 | ||
| 34 | void UpdateFirstRowComboBox(bool init = false); | ||
| 35 | void UpdateSecondRowComboBox(bool init = false); | ||
| 36 | |||
| 34 | std::unique_ptr<Ui::ConfigureGameList> ui; | 37 | std::unique_ptr<Ui::ConfigureGameList> ui; |
| 35 | }; | 38 | }; |
diff --git a/src/yuzu/configuration/configure_hotkeys.cpp b/src/yuzu/configuration/configure_hotkeys.cpp index 3ea0b8d67..fa9052136 100644 --- a/src/yuzu/configuration/configure_hotkeys.cpp +++ b/src/yuzu/configuration/configure_hotkeys.cpp | |||
| @@ -48,6 +48,7 @@ void ConfigureHotkeys::Populate(const HotkeyRegistry& registry) { | |||
| 48 | } | 48 | } |
| 49 | 49 | ||
| 50 | ui->hotkey_list->expandAll(); | 50 | ui->hotkey_list->expandAll(); |
| 51 | ui->hotkey_list->resizeColumnToContents(0); | ||
| 51 | } | 52 | } |
| 52 | 53 | ||
| 53 | void ConfigureHotkeys::changeEvent(QEvent* event) { | 54 | void ConfigureHotkeys::changeEvent(QEvent* event) { |
diff --git a/src/yuzu/game_list_p.h b/src/yuzu/game_list_p.h index 1c2b37afd..7cde72d1b 100644 --- a/src/yuzu/game_list_p.h +++ b/src/yuzu/game_list_p.h | |||
| @@ -108,11 +108,14 @@ public: | |||
| 108 | }}; | 108 | }}; |
| 109 | 109 | ||
| 110 | const auto& row1 = row_data.at(UISettings::values.row_1_text_id); | 110 | const auto& row1 = row_data.at(UISettings::values.row_1_text_id); |
| 111 | const auto& row2 = row_data.at(UISettings::values.row_2_text_id); | 111 | const int row2_id = UISettings::values.row_2_text_id; |
| 112 | 112 | ||
| 113 | if (row1.isEmpty() || row1 == row2) | 113 | if (row2_id == 4) // None |
| 114 | return row2; | 114 | return row1; |
| 115 | if (row2.isEmpty()) | 115 | |
| 116 | const auto& row2 = row_data.at(row2_id); | ||
| 117 | |||
| 118 | if (row1 == row2) | ||
| 116 | return row1; | 119 | return row1; |
| 117 | 120 | ||
| 118 | return QString(row1 + QStringLiteral("\n ") + row2); | 121 | return QString(row1 + QStringLiteral("\n ") + row2); |
diff --git a/src/yuzu/main.cpp b/src/yuzu/main.cpp index b21fbf826..b5dd3e0d6 100644 --- a/src/yuzu/main.cpp +++ b/src/yuzu/main.cpp | |||
| @@ -526,19 +526,30 @@ void GMainWindow::InitializeHotkeys() { | |||
| 526 | 526 | ||
| 527 | const QString main_window = QStringLiteral("Main Window"); | 527 | const QString main_window = QStringLiteral("Main Window"); |
| 528 | const QString load_file = QStringLiteral("Load File"); | 528 | const QString load_file = QStringLiteral("Load File"); |
| 529 | const QString load_amiibo = QStringLiteral("Load Amiibo"); | ||
| 529 | const QString exit_yuzu = QStringLiteral("Exit yuzu"); | 530 | const QString exit_yuzu = QStringLiteral("Exit yuzu"); |
| 531 | const QString restart_emulation = QStringLiteral("Restart Emulation"); | ||
| 530 | const QString stop_emulation = QStringLiteral("Stop Emulation"); | 532 | const QString stop_emulation = QStringLiteral("Stop Emulation"); |
| 531 | const QString toggle_filter_bar = QStringLiteral("Toggle Filter Bar"); | 533 | const QString toggle_filter_bar = QStringLiteral("Toggle Filter Bar"); |
| 532 | const QString toggle_status_bar = QStringLiteral("Toggle Status Bar"); | 534 | const QString toggle_status_bar = QStringLiteral("Toggle Status Bar"); |
| 533 | const QString fullscreen = QStringLiteral("Fullscreen"); | 535 | const QString fullscreen = QStringLiteral("Fullscreen"); |
| 536 | const QString capture_screenshot = QStringLiteral("Capture Screenshot"); | ||
| 534 | 537 | ||
| 535 | ui.action_Load_File->setShortcut(hotkey_registry.GetKeySequence(main_window, load_file)); | 538 | ui.action_Load_File->setShortcut(hotkey_registry.GetKeySequence(main_window, load_file)); |
| 536 | ui.action_Load_File->setShortcutContext( | 539 | ui.action_Load_File->setShortcutContext( |
| 537 | hotkey_registry.GetShortcutContext(main_window, load_file)); | 540 | hotkey_registry.GetShortcutContext(main_window, load_file)); |
| 538 | 541 | ||
| 542 | ui.action_Load_Amiibo->setShortcut(hotkey_registry.GetKeySequence(main_window, load_amiibo)); | ||
| 543 | ui.action_Load_Amiibo->setShortcutContext( | ||
| 544 | hotkey_registry.GetShortcutContext(main_window, load_amiibo)); | ||
| 545 | |||
| 539 | ui.action_Exit->setShortcut(hotkey_registry.GetKeySequence(main_window, exit_yuzu)); | 546 | ui.action_Exit->setShortcut(hotkey_registry.GetKeySequence(main_window, exit_yuzu)); |
| 540 | ui.action_Exit->setShortcutContext(hotkey_registry.GetShortcutContext(main_window, exit_yuzu)); | 547 | ui.action_Exit->setShortcutContext(hotkey_registry.GetShortcutContext(main_window, exit_yuzu)); |
| 541 | 548 | ||
| 549 | ui.action_Restart->setShortcut(hotkey_registry.GetKeySequence(main_window, restart_emulation)); | ||
| 550 | ui.action_Restart->setShortcutContext( | ||
| 551 | hotkey_registry.GetShortcutContext(main_window, restart_emulation)); | ||
| 552 | |||
| 542 | ui.action_Stop->setShortcut(hotkey_registry.GetKeySequence(main_window, stop_emulation)); | 553 | ui.action_Stop->setShortcut(hotkey_registry.GetKeySequence(main_window, stop_emulation)); |
| 543 | ui.action_Stop->setShortcutContext( | 554 | ui.action_Stop->setShortcutContext( |
| 544 | hotkey_registry.GetShortcutContext(main_window, stop_emulation)); | 555 | hotkey_registry.GetShortcutContext(main_window, stop_emulation)); |
| @@ -553,6 +564,11 @@ void GMainWindow::InitializeHotkeys() { | |||
| 553 | ui.action_Show_Status_Bar->setShortcutContext( | 564 | ui.action_Show_Status_Bar->setShortcutContext( |
| 554 | hotkey_registry.GetShortcutContext(main_window, toggle_status_bar)); | 565 | hotkey_registry.GetShortcutContext(main_window, toggle_status_bar)); |
| 555 | 566 | ||
| 567 | ui.action_Capture_Screenshot->setShortcut( | ||
| 568 | hotkey_registry.GetKeySequence(main_window, capture_screenshot)); | ||
| 569 | ui.action_Capture_Screenshot->setShortcutContext( | ||
| 570 | hotkey_registry.GetShortcutContext(main_window, capture_screenshot)); | ||
| 571 | |||
| 556 | connect(hotkey_registry.GetHotkey(main_window, QStringLiteral("Load File"), this), | 572 | connect(hotkey_registry.GetHotkey(main_window, QStringLiteral("Load File"), this), |
| 557 | &QShortcut::activated, this, &GMainWindow::OnMenuLoadFile); | 573 | &QShortcut::activated, this, &GMainWindow::OnMenuLoadFile); |
| 558 | connect( | 574 | connect( |
diff --git a/src/yuzu/main.ui b/src/yuzu/main.ui index 21f422500..a2c9e4547 100644 --- a/src/yuzu/main.ui +++ b/src/yuzu/main.ui | |||
| @@ -15,7 +15,7 @@ | |||
| 15 | </property> | 15 | </property> |
| 16 | <property name="windowIcon"> | 16 | <property name="windowIcon"> |
| 17 | <iconset> | 17 | <iconset> |
| 18 | <normaloff>src/pcafe/res/icon3_64x64.ico</normaloff>src/pcafe/res/icon3_64x64.ico</iconset> | 18 | <normaloff>../dist/yuzu.ico</normaloff>../dist/yuzu.ico</iconset> |
| 19 | </property> | 19 | </property> |
| 20 | <property name="tabShape"> | 20 | <property name="tabShape"> |
| 21 | <enum>QTabWidget::Rounded</enum> | 21 | <enum>QTabWidget::Rounded</enum> |
| @@ -98,6 +98,7 @@ | |||
| 98 | <addaction name="action_Display_Dock_Widget_Headers"/> | 98 | <addaction name="action_Display_Dock_Widget_Headers"/> |
| 99 | <addaction name="action_Show_Filter_Bar"/> | 99 | <addaction name="action_Show_Filter_Bar"/> |
| 100 | <addaction name="action_Show_Status_Bar"/> | 100 | <addaction name="action_Show_Status_Bar"/> |
| 101 | <addaction name="separator"/> | ||
| 101 | <addaction name="menu_View_Debugging"/> | 102 | <addaction name="menu_View_Debugging"/> |
| 102 | </widget> | 103 | </widget> |
| 103 | <widget class="QMenu" name="menu_Tools"> | 104 | <widget class="QMenu" name="menu_Tools"> |