summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/common/CMakeLists.txt5
-rw-r--r--src/common/telemetry.cpp15
-rw-r--r--src/common/x64/cpu_detect.cpp68
-rw-r--r--src/common/x64/cpu_detect.h31
-rw-r--r--src/core/arm/dynarmic/arm_dynarmic.cpp1
-rw-r--r--src/core/hle/kernel/physical_memory.h5
-rw-r--r--src/core/hle/kernel/process.cpp4
-rw-r--r--src/core/hle/kernel/vm_manager.cpp37
-rw-r--r--src/core/hle/service/nifm/nifm.cpp20
-rw-r--r--src/core/hle/service/nvflinger/nvflinger.cpp8
-rw-r--r--src/core/hle/service/nvflinger/nvflinger.h3
-rw-r--r--src/core/hle/service/vi/display/vi_display.cpp27
-rw-r--r--src/core/hle/service/vi/display/vi_display.h9
-rw-r--r--src/core/hle/service/vi/vi.cpp14
-rw-r--r--src/core/loader/elf.cpp3
-rw-r--r--src/core/loader/kip.cpp5
-rw-r--r--src/core/loader/nso.cpp12
-rw-r--r--src/core/memory.cpp36
-rw-r--r--src/core/memory.h16
-rw-r--r--src/video_core/CMakeLists.txt24
-rw-r--r--src/video_core/engines/maxwell_3d.h14
-rw-r--r--src/video_core/engines/shader_bytecode.h37
-rw-r--r--src/video_core/gpu.cpp7
-rw-r--r--src/video_core/gpu.h5
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer.cpp1
-rw-r--r--src/video_core/renderer_opengl/gl_shader_cache.cpp11
-rw-r--r--src/video_core/renderer_opengl/gl_shader_decompiler.cpp50
-rw-r--r--src/video_core/renderer_opengl/gl_state.cpp1
-rw-r--r--src/video_core/renderer_opengl/gl_state.h3
-rw-r--r--src/video_core/renderer_opengl/gl_texture_cache.cpp16
-rw-r--r--src/video_core/renderer_opengl/utils.cpp17
-rw-r--r--src/video_core/renderer_opengl/utils.h14
-rw-r--r--src/video_core/renderer_vulkan/fixed_pipeline_state.cpp18
-rw-r--r--src/video_core/renderer_vulkan/fixed_pipeline_state.h10
-rw-r--r--src/video_core/renderer_vulkan/maxwell_to_vk.cpp9
-rw-r--r--src/video_core/renderer_vulkan/maxwell_to_vk.h2
-rw-r--r--src/video_core/renderer_vulkan/renderer_vulkan.h72
-rw-r--r--src/video_core/renderer_vulkan/vk_buffer_cache.cpp201
-rw-r--r--src/video_core/renderer_vulkan/vk_buffer_cache.h107
-rw-r--r--src/video_core/renderer_vulkan/vk_compute_pass.cpp339
-rw-r--r--src/video_core/renderer_vulkan/vk_compute_pass.h77
-rw-r--r--src/video_core/renderer_vulkan/vk_compute_pipeline.cpp112
-rw-r--r--src/video_core/renderer_vulkan/vk_compute_pipeline.h66
-rw-r--r--src/video_core/renderer_vulkan/vk_descriptor_pool.cpp89
-rw-r--r--src/video_core/renderer_vulkan/vk_descriptor_pool.h56
-rw-r--r--src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp271
-rw-r--r--src/video_core/renderer_vulkan/vk_graphics_pipeline.h90
-rw-r--r--src/video_core/renderer_vulkan/vk_memory_manager.cpp158
-rw-r--r--src/video_core/renderer_vulkan/vk_memory_manager.h72
-rw-r--r--src/video_core/renderer_vulkan/vk_pipeline_cache.cpp395
-rw-r--r--src/video_core/renderer_vulkan/vk_pipeline_cache.h200
-rw-r--r--src/video_core/renderer_vulkan/vk_rasterizer.cpp1141
-rw-r--r--src/video_core/renderer_vulkan/vk_rasterizer.h263
-rw-r--r--src/video_core/renderer_vulkan/vk_renderpass_cache.cpp100
-rw-r--r--src/video_core/renderer_vulkan/vk_renderpass_cache.h97
-rw-r--r--src/video_core/renderer_vulkan/vk_sampler_cache.cpp6
-rw-r--r--src/video_core/renderer_vulkan/vk_shader_decompiler.cpp15
-rw-r--r--src/video_core/renderer_vulkan/vk_shader_util.cpp34
-rw-r--r--src/video_core/renderer_vulkan/vk_shader_util.h17
-rw-r--r--src/video_core/renderer_vulkan/vk_staging_buffer_pool.h1
-rw-r--r--src/video_core/renderer_vulkan/vk_stream_buffer.cpp142
-rw-r--r--src/video_core/renderer_vulkan/vk_stream_buffer.h44
-rw-r--r--src/video_core/renderer_vulkan/vk_texture_cache.cpp475
-rw-r--r--src/video_core/renderer_vulkan/vk_texture_cache.h239
-rw-r--r--src/video_core/renderer_vulkan/vk_update_descriptor.cpp57
-rw-r--r--src/video_core/renderer_vulkan/vk_update_descriptor.h86
-rw-r--r--src/video_core/shader/control_flow.cpp2
-rw-r--r--src/video_core/shader/decode/memory.cpp103
-rw-r--r--src/video_core/shader/decode/texture.cpp16
-rw-r--r--src/video_core/shader/node.h28
-rw-r--r--src/video_core/shader/shader_ir.cpp6
-rw-r--r--src/video_core/shader/shader_ir.h10
-rw-r--r--src/video_core/texture_cache/format_lookup_table.cpp2
-rw-r--r--src/video_core/texture_cache/surface_params.h9
-rw-r--r--src/yuzu/bootmanager.cpp11
-rw-r--r--src/yuzu/configuration/configure_gamelist.cpp53
-rw-r--r--src/yuzu/configuration/configure_gamelist.h3
-rw-r--r--src/yuzu/configuration/configure_hotkeys.cpp1
-rw-r--r--src/yuzu/game_list_p.h11
-rw-r--r--src/yuzu/main.cpp16
-rw-r--r--src/yuzu/main.ui3
81 files changed, 5208 insertions, 646 deletions
diff --git a/src/common/CMakeLists.txt b/src/common/CMakeLists.txt
index 9b0c3db68..9afc6105d 100644
--- a/src/common/CMakeLists.txt
+++ b/src/common/CMakeLists.txt
@@ -15,6 +15,10 @@ endif ()
15if (DEFINED ENV{DISPLAYVERSION}) 15if (DEFINED ENV{DISPLAYVERSION})
16 set(DISPLAY_VERSION $ENV{DISPLAYVERSION}) 16 set(DISPLAY_VERSION $ENV{DISPLAYVERSION})
17endif () 17endif ()
18
19# Pass the path to git to the GenerateSCMRev.cmake as well
20find_package(Git QUIET)
21
18add_custom_command(OUTPUT scm_rev.cpp 22add_custom_command(OUTPUT scm_rev.cpp
19 COMMAND ${CMAKE_COMMAND} 23 COMMAND ${CMAKE_COMMAND}
20 -DSRC_DIR="${CMAKE_SOURCE_DIR}" 24 -DSRC_DIR="${CMAKE_SOURCE_DIR}"
@@ -23,6 +27,7 @@ add_custom_command(OUTPUT scm_rev.cpp
23 -DTITLE_BAR_FORMAT_RUNNING="${TITLE_BAR_FORMAT_RUNNING}" 27 -DTITLE_BAR_FORMAT_RUNNING="${TITLE_BAR_FORMAT_RUNNING}"
24 -DBUILD_TAG="${BUILD_TAG}" 28 -DBUILD_TAG="${BUILD_TAG}"
25 -DBUILD_ID="${DISPLAY_VERSION}" 29 -DBUILD_ID="${DISPLAY_VERSION}"
30 -DGIT_EXECUTABLE="${GIT_EXECUTABLE}"
26 -P "${CMAKE_SOURCE_DIR}/CMakeModules/GenerateSCMRev.cmake" 31 -P "${CMAKE_SOURCE_DIR}/CMakeModules/GenerateSCMRev.cmake"
27 DEPENDS 32 DEPENDS
28 # WARNING! It was too much work to try and make a common location for this list, 33 # WARNING! It was too much work to try and make a common location for this list,
diff --git a/src/common/telemetry.cpp b/src/common/telemetry.cpp
index f53a8d193..200c6489a 100644
--- a/src/common/telemetry.cpp
+++ b/src/common/telemetry.cpp
@@ -44,20 +44,6 @@ template class Field<std::string>;
44template class Field<const char*>; 44template class Field<const char*>;
45template class Field<std::chrono::microseconds>; 45template class Field<std::chrono::microseconds>;
46 46
47#ifdef ARCHITECTURE_x86_64
48static const char* CpuVendorToStr(Common::CPUVendor vendor) {
49 switch (vendor) {
50 case Common::CPUVendor::INTEL:
51 return "Intel";
52 case Common::CPUVendor::AMD:
53 return "Amd";
54 case Common::CPUVendor::OTHER:
55 return "Other";
56 }
57 UNREACHABLE();
58}
59#endif
60
61void AppendBuildInfo(FieldCollection& fc) { 47void AppendBuildInfo(FieldCollection& fc) {
62 const bool is_git_dirty{std::strstr(Common::g_scm_desc, "dirty") != nullptr}; 48 const bool is_git_dirty{std::strstr(Common::g_scm_desc, "dirty") != nullptr};
63 fc.AddField(FieldType::App, "Git_IsDirty", is_git_dirty); 49 fc.AddField(FieldType::App, "Git_IsDirty", is_git_dirty);
@@ -71,7 +57,6 @@ void AppendCPUInfo(FieldCollection& fc) {
71#ifdef ARCHITECTURE_x86_64 57#ifdef ARCHITECTURE_x86_64
72 fc.AddField(FieldType::UserSystem, "CPU_Model", Common::GetCPUCaps().cpu_string); 58 fc.AddField(FieldType::UserSystem, "CPU_Model", Common::GetCPUCaps().cpu_string);
73 fc.AddField(FieldType::UserSystem, "CPU_BrandString", Common::GetCPUCaps().brand_string); 59 fc.AddField(FieldType::UserSystem, "CPU_BrandString", Common::GetCPUCaps().brand_string);
74 fc.AddField(FieldType::UserSystem, "CPU_Vendor", CpuVendorToStr(Common::GetCPUCaps().vendor));
75 fc.AddField(FieldType::UserSystem, "CPU_Extension_x64_AES", Common::GetCPUCaps().aes); 60 fc.AddField(FieldType::UserSystem, "CPU_Extension_x64_AES", Common::GetCPUCaps().aes);
76 fc.AddField(FieldType::UserSystem, "CPU_Extension_x64_AVX", Common::GetCPUCaps().avx); 61 fc.AddField(FieldType::UserSystem, "CPU_Extension_x64_AVX", Common::GetCPUCaps().avx);
77 fc.AddField(FieldType::UserSystem, "CPU_Extension_x64_AVX2", Common::GetCPUCaps().avx2); 62 fc.AddField(FieldType::UserSystem, "CPU_Extension_x64_AVX2", Common::GetCPUCaps().avx2);
diff --git a/src/common/x64/cpu_detect.cpp b/src/common/x64/cpu_detect.cpp
index 2dfcd39c8..c9349a6b4 100644
--- a/src/common/x64/cpu_detect.cpp
+++ b/src/common/x64/cpu_detect.cpp
@@ -3,8 +3,6 @@
3// Refer to the license.txt file included. 3// Refer to the license.txt file included.
4 4
5#include <cstring> 5#include <cstring>
6#include <string>
7#include <thread>
8#include "common/common_types.h" 6#include "common/common_types.h"
9#include "common/x64/cpu_detect.h" 7#include "common/x64/cpu_detect.h"
10 8
@@ -51,8 +49,6 @@ namespace Common {
51static CPUCaps Detect() { 49static CPUCaps Detect() {
52 CPUCaps caps = {}; 50 CPUCaps caps = {};
53 51
54 caps.num_cores = std::thread::hardware_concurrency();
55
56 // Assumes the CPU supports the CPUID instruction. Those that don't would likely not support 52 // Assumes the CPU supports the CPUID instruction. Those that don't would likely not support
57 // yuzu at all anyway 53 // yuzu at all anyway
58 54
@@ -70,12 +66,6 @@ static CPUCaps Detect() {
70 __cpuid(cpu_id, 0x80000000); 66 __cpuid(cpu_id, 0x80000000);
71 67
72 u32 max_ex_fn = cpu_id[0]; 68 u32 max_ex_fn = cpu_id[0];
73 if (!strcmp(caps.brand_string, "GenuineIntel"))
74 caps.vendor = CPUVendor::INTEL;
75 else if (!strcmp(caps.brand_string, "AuthenticAMD"))
76 caps.vendor = CPUVendor::AMD;
77 else
78 caps.vendor = CPUVendor::OTHER;
79 69
80 // Set reasonable default brand string even if brand string not available 70 // Set reasonable default brand string even if brand string not available
81 strcpy(caps.cpu_string, caps.brand_string); 71 strcpy(caps.cpu_string, caps.brand_string);
@@ -96,15 +86,9 @@ static CPUCaps Detect() {
96 caps.sse4_1 = true; 86 caps.sse4_1 = true;
97 if ((cpu_id[2] >> 20) & 1) 87 if ((cpu_id[2] >> 20) & 1)
98 caps.sse4_2 = true; 88 caps.sse4_2 = true;
99 if ((cpu_id[2] >> 22) & 1)
100 caps.movbe = true;
101 if ((cpu_id[2] >> 25) & 1) 89 if ((cpu_id[2] >> 25) & 1)
102 caps.aes = true; 90 caps.aes = true;
103 91
104 if ((cpu_id[3] >> 24) & 1) {
105 caps.fxsave_fxrstor = true;
106 }
107
108 // AVX support requires 3 separate checks: 92 // AVX support requires 3 separate checks:
109 // - Is the AVX bit set in CPUID? 93 // - Is the AVX bit set in CPUID?
110 // - Is the XSAVE bit set in CPUID? 94 // - Is the XSAVE bit set in CPUID?
@@ -129,8 +113,6 @@ static CPUCaps Detect() {
129 } 113 }
130 } 114 }
131 115
132 caps.flush_to_zero = caps.sse;
133
134 if (max_ex_fn >= 0x80000004) { 116 if (max_ex_fn >= 0x80000004) {
135 // Extract CPU model string 117 // Extract CPU model string
136 __cpuid(cpu_id, 0x80000002); 118 __cpuid(cpu_id, 0x80000002);
@@ -144,14 +126,8 @@ static CPUCaps Detect() {
144 if (max_ex_fn >= 0x80000001) { 126 if (max_ex_fn >= 0x80000001) {
145 // Check for more features 127 // Check for more features
146 __cpuid(cpu_id, 0x80000001); 128 __cpuid(cpu_id, 0x80000001);
147 if (cpu_id[2] & 1)
148 caps.lahf_sahf_64 = true;
149 if ((cpu_id[2] >> 5) & 1)
150 caps.lzcnt = true;
151 if ((cpu_id[2] >> 16) & 1) 129 if ((cpu_id[2] >> 16) & 1)
152 caps.fma4 = true; 130 caps.fma4 = true;
153 if ((cpu_id[3] >> 29) & 1)
154 caps.long_mode = true;
155 } 131 }
156 132
157 return caps; 133 return caps;
@@ -162,48 +138,4 @@ const CPUCaps& GetCPUCaps() {
162 return caps; 138 return caps;
163} 139}
164 140
165std::string GetCPUCapsString() {
166 auto caps = GetCPUCaps();
167
168 std::string sum(caps.cpu_string);
169 sum += " (";
170 sum += caps.brand_string;
171 sum += ")";
172
173 if (caps.sse)
174 sum += ", SSE";
175 if (caps.sse2) {
176 sum += ", SSE2";
177 if (!caps.flush_to_zero)
178 sum += " (without DAZ)";
179 }
180
181 if (caps.sse3)
182 sum += ", SSE3";
183 if (caps.ssse3)
184 sum += ", SSSE3";
185 if (caps.sse4_1)
186 sum += ", SSE4.1";
187 if (caps.sse4_2)
188 sum += ", SSE4.2";
189 if (caps.avx)
190 sum += ", AVX";
191 if (caps.avx2)
192 sum += ", AVX2";
193 if (caps.bmi1)
194 sum += ", BMI1";
195 if (caps.bmi2)
196 sum += ", BMI2";
197 if (caps.fma)
198 sum += ", FMA";
199 if (caps.aes)
200 sum += ", AES";
201 if (caps.movbe)
202 sum += ", MOVBE";
203 if (caps.long_mode)
204 sum += ", 64-bit support";
205
206 return sum;
207}
208
209} // namespace Common 141} // namespace Common
diff --git a/src/common/x64/cpu_detect.h b/src/common/x64/cpu_detect.h
index 0af3a8adb..20f2ba234 100644
--- a/src/common/x64/cpu_detect.h
+++ b/src/common/x64/cpu_detect.h
@@ -4,23 +4,12 @@
4 4
5#pragma once 5#pragma once
6 6
7#include <string>
8
9namespace Common { 7namespace Common {
10 8
11/// x86/x64 CPU vendors that may be detected by this module
12enum class CPUVendor {
13 INTEL,
14 AMD,
15 OTHER,
16};
17
18/// x86/x64 CPU capabilities that may be detected by this module 9/// x86/x64 CPU capabilities that may be detected by this module
19struct CPUCaps { 10struct CPUCaps {
20 CPUVendor vendor;
21 char cpu_string[0x21]; 11 char cpu_string[0x21];
22 char brand_string[0x41]; 12 char brand_string[0x41];
23 int num_cores;
24 bool sse; 13 bool sse;
25 bool sse2; 14 bool sse2;
26 bool sse3; 15 bool sse3;
@@ -35,20 +24,6 @@ struct CPUCaps {
35 bool fma; 24 bool fma;
36 bool fma4; 25 bool fma4;
37 bool aes; 26 bool aes;
38
39 // Support for the FXSAVE and FXRSTOR instructions
40 bool fxsave_fxrstor;
41
42 bool movbe;
43
44 // This flag indicates that the hardware supports some mode in which denormal inputs and outputs
45 // are automatically set to (signed) zero.
46 bool flush_to_zero;
47
48 // Support for LAHF and SAHF instructions in 64-bit mode
49 bool lahf_sahf_64;
50
51 bool long_mode;
52}; 27};
53 28
54/** 29/**
@@ -57,10 +32,4 @@ struct CPUCaps {
57 */ 32 */
58const CPUCaps& GetCPUCaps(); 33const CPUCaps& GetCPUCaps();
59 34
60/**
61 * Gets a string summary of the name and supported capabilities of the host CPU
62 * @return String summary
63 */
64std::string GetCPUCapsString();
65
66} // namespace Common 35} // namespace Common
diff --git a/src/core/arm/dynarmic/arm_dynarmic.cpp b/src/core/arm/dynarmic/arm_dynarmic.cpp
index f8c7f0efd..e825c0526 100644
--- a/src/core/arm/dynarmic/arm_dynarmic.cpp
+++ b/src/core/arm/dynarmic/arm_dynarmic.cpp
@@ -141,6 +141,7 @@ std::unique_ptr<Dynarmic::A64::Jit> ARM_Dynarmic::MakeJit(Common::PageTable& pag
141 config.page_table = reinterpret_cast<void**>(page_table.pointers.data()); 141 config.page_table = reinterpret_cast<void**>(page_table.pointers.data());
142 config.page_table_address_space_bits = address_space_bits; 142 config.page_table_address_space_bits = address_space_bits;
143 config.silently_mirror_page_table = false; 143 config.silently_mirror_page_table = false;
144 config.absolute_offset_page_table = true;
144 145
145 // Multi-process state 146 // Multi-process state
146 config.processor_id = core_index; 147 config.processor_id = core_index;
diff --git a/src/core/hle/kernel/physical_memory.h b/src/core/hle/kernel/physical_memory.h
index 090565310..b689e8e8b 100644
--- a/src/core/hle/kernel/physical_memory.h
+++ b/src/core/hle/kernel/physical_memory.h
@@ -14,6 +14,9 @@ namespace Kernel {
14// - Second to ensure all host backing memory used is aligned to 256 bytes due 14// - Second to ensure all host backing memory used is aligned to 256 bytes due
15// to strict alignment restrictions on GPU memory. 15// to strict alignment restrictions on GPU memory.
16 16
17using PhysicalMemory = std::vector<u8, Common::AlignmentAllocator<u8, 256>>; 17using PhysicalMemoryVector = std::vector<u8, Common::AlignmentAllocator<u8, 256>>;
18class PhysicalMemory final : public PhysicalMemoryVector {
19 using PhysicalMemoryVector::PhysicalMemoryVector;
20};
18 21
19} // namespace Kernel 22} // namespace Kernel
diff --git a/src/core/hle/kernel/process.cpp b/src/core/hle/kernel/process.cpp
index 12ea4ebe3..b9035a0be 100644
--- a/src/core/hle/kernel/process.cpp
+++ b/src/core/hle/kernel/process.cpp
@@ -317,6 +317,8 @@ void Process::FreeTLSRegion(VAddr tls_address) {
317} 317}
318 318
319void Process::LoadModule(CodeSet module_, VAddr base_addr) { 319void Process::LoadModule(CodeSet module_, VAddr base_addr) {
320 code_memory_size += module_.memory.size();
321
320 const auto memory = std::make_shared<PhysicalMemory>(std::move(module_.memory)); 322 const auto memory = std::make_shared<PhysicalMemory>(std::move(module_.memory));
321 323
322 const auto MapSegment = [&](const CodeSet::Segment& segment, VMAPermission permissions, 324 const auto MapSegment = [&](const CodeSet::Segment& segment, VMAPermission permissions,
@@ -332,8 +334,6 @@ void Process::LoadModule(CodeSet module_, VAddr base_addr) {
332 MapSegment(module_.CodeSegment(), VMAPermission::ReadExecute, MemoryState::Code); 334 MapSegment(module_.CodeSegment(), VMAPermission::ReadExecute, MemoryState::Code);
333 MapSegment(module_.RODataSegment(), VMAPermission::Read, MemoryState::CodeData); 335 MapSegment(module_.RODataSegment(), VMAPermission::Read, MemoryState::CodeData);
334 MapSegment(module_.DataSegment(), VMAPermission::ReadWrite, MemoryState::CodeData); 336 MapSegment(module_.DataSegment(), VMAPermission::ReadWrite, MemoryState::CodeData);
335
336 code_memory_size += module_.memory.size();
337} 337}
338 338
339Process::Process(Core::System& system) 339Process::Process(Core::System& system)
diff --git a/src/core/hle/kernel/vm_manager.cpp b/src/core/hle/kernel/vm_manager.cpp
index a9a20ef76..0b3500fce 100644
--- a/src/core/hle/kernel/vm_manager.cpp
+++ b/src/core/hle/kernel/vm_manager.cpp
@@ -3,6 +3,7 @@
3// Refer to the license.txt file included. 3// Refer to the license.txt file included.
4 4
5#include <algorithm> 5#include <algorithm>
6#include <cstring>
6#include <iterator> 7#include <iterator>
7#include <utility> 8#include <utility>
8#include "common/alignment.h" 9#include "common/alignment.h"
@@ -269,18 +270,9 @@ ResultVal<VAddr> VMManager::SetHeapSize(u64 size) {
269 // If necessary, expand backing vector to cover new heap extents in 270 // If necessary, expand backing vector to cover new heap extents in
270 // the case of allocating. Otherwise, shrink the backing memory, 271 // the case of allocating. Otherwise, shrink the backing memory,
271 // if a smaller heap has been requested. 272 // if a smaller heap has been requested.
272 const u64 old_heap_size = GetCurrentHeapSize(); 273 heap_memory->resize(size);
273 if (size > old_heap_size) { 274 heap_memory->shrink_to_fit();
274 const u64 alloc_size = size - old_heap_size; 275 RefreshMemoryBlockMappings(heap_memory.get());
275
276 heap_memory->insert(heap_memory->end(), alloc_size, 0);
277 RefreshMemoryBlockMappings(heap_memory.get());
278 } else if (size < old_heap_size) {
279 heap_memory->resize(size);
280 heap_memory->shrink_to_fit();
281
282 RefreshMemoryBlockMappings(heap_memory.get());
283 }
284 276
285 heap_end = heap_region_base + size; 277 heap_end = heap_region_base + size;
286 ASSERT(GetCurrentHeapSize() == heap_memory->size()); 278 ASSERT(GetCurrentHeapSize() == heap_memory->size());
@@ -752,24 +744,20 @@ void VMManager::MergeAdjacentVMA(VirtualMemoryArea& left, const VirtualMemoryAre
752 // Always merge allocated memory blocks, even when they don't share the same backing block. 744 // Always merge allocated memory blocks, even when they don't share the same backing block.
753 if (left.type == VMAType::AllocatedMemoryBlock && 745 if (left.type == VMAType::AllocatedMemoryBlock &&
754 (left.backing_block != right.backing_block || left.offset + left.size != right.offset)) { 746 (left.backing_block != right.backing_block || left.offset + left.size != right.offset)) {
755 const auto right_begin = right.backing_block->begin() + right.offset;
756 const auto right_end = right_begin + right.size;
757 747
758 // Check if we can save work. 748 // Check if we can save work.
759 if (left.offset == 0 && left.size == left.backing_block->size()) { 749 if (left.offset == 0 && left.size == left.backing_block->size()) {
760 // Fast case: left is an entire backing block. 750 // Fast case: left is an entire backing block.
761 left.backing_block->insert(left.backing_block->end(), right_begin, right_end); 751 left.backing_block->resize(left.size + right.size);
752 std::memcpy(left.backing_block->data() + left.size,
753 right.backing_block->data() + right.offset, right.size);
762 } else { 754 } else {
763 // Slow case: make a new memory block for left and right. 755 // Slow case: make a new memory block for left and right.
764 const auto left_begin = left.backing_block->begin() + left.offset;
765 const auto left_end = left_begin + left.size;
766 const auto left_size = static_cast<std::size_t>(std::distance(left_begin, left_end));
767 const auto right_size = static_cast<std::size_t>(std::distance(right_begin, right_end));
768
769 auto new_memory = std::make_shared<PhysicalMemory>(); 756 auto new_memory = std::make_shared<PhysicalMemory>();
770 new_memory->reserve(left_size + right_size); 757 new_memory->resize(left.size + right.size);
771 new_memory->insert(new_memory->end(), left_begin, left_end); 758 std::memcpy(new_memory->data(), left.backing_block->data() + left.offset, left.size);
772 new_memory->insert(new_memory->end(), right_begin, right_end); 759 std::memcpy(new_memory->data() + left.size, right.backing_block->data() + right.offset,
760 right.size);
773 761
774 left.backing_block = std::move(new_memory); 762 left.backing_block = std::move(new_memory);
775 left.offset = 0; 763 left.offset = 0;
@@ -792,8 +780,7 @@ void VMManager::UpdatePageTableForVMA(const VirtualMemoryArea& vma) {
792 memory.UnmapRegion(page_table, vma.base, vma.size); 780 memory.UnmapRegion(page_table, vma.base, vma.size);
793 break; 781 break;
794 case VMAType::AllocatedMemoryBlock: 782 case VMAType::AllocatedMemoryBlock:
795 memory.MapMemoryRegion(page_table, vma.base, vma.size, 783 memory.MapMemoryRegion(page_table, vma.base, vma.size, *vma.backing_block, vma.offset);
796 vma.backing_block->data() + vma.offset);
797 break; 784 break;
798 case VMAType::BackingMemory: 785 case VMAType::BackingMemory:
799 memory.MapMemoryRegion(page_table, vma.base, vma.size, vma.backing_memory); 786 memory.MapMemoryRegion(page_table, vma.base, vma.size, vma.backing_memory);
diff --git a/src/core/hle/service/nifm/nifm.cpp b/src/core/hle/service/nifm/nifm.cpp
index 2e53b3221..767158444 100644
--- a/src/core/hle/service/nifm/nifm.cpp
+++ b/src/core/hle/service/nifm/nifm.cpp
@@ -9,6 +9,7 @@
9#include "core/hle/kernel/writable_event.h" 9#include "core/hle/kernel/writable_event.h"
10#include "core/hle/service/nifm/nifm.h" 10#include "core/hle/service/nifm/nifm.h"
11#include "core/hle/service/service.h" 11#include "core/hle/service/service.h"
12#include "core/settings.h"
12 13
13namespace Service::NIFM { 14namespace Service::NIFM {
14 15
@@ -86,7 +87,12 @@ private:
86 87
87 IPC::ResponseBuilder rb{ctx, 3}; 88 IPC::ResponseBuilder rb{ctx, 3};
88 rb.Push(RESULT_SUCCESS); 89 rb.Push(RESULT_SUCCESS);
89 rb.PushEnum(RequestState::Connected); 90
91 if (Settings::values.bcat_backend == "none") {
92 rb.PushEnum(RequestState::NotSubmitted);
93 } else {
94 rb.PushEnum(RequestState::Connected);
95 }
90 } 96 }
91 97
92 void GetResult(Kernel::HLERequestContext& ctx) { 98 void GetResult(Kernel::HLERequestContext& ctx) {
@@ -194,14 +200,22 @@ private:
194 200
195 IPC::ResponseBuilder rb{ctx, 3}; 201 IPC::ResponseBuilder rb{ctx, 3};
196 rb.Push(RESULT_SUCCESS); 202 rb.Push(RESULT_SUCCESS);
197 rb.Push<u8>(1); 203 if (Settings::values.bcat_backend == "none") {
204 rb.Push<u8>(0);
205 } else {
206 rb.Push<u8>(1);
207 }
198 } 208 }
199 void IsAnyInternetRequestAccepted(Kernel::HLERequestContext& ctx) { 209 void IsAnyInternetRequestAccepted(Kernel::HLERequestContext& ctx) {
200 LOG_WARNING(Service_NIFM, "(STUBBED) called"); 210 LOG_WARNING(Service_NIFM, "(STUBBED) called");
201 211
202 IPC::ResponseBuilder rb{ctx, 3}; 212 IPC::ResponseBuilder rb{ctx, 3};
203 rb.Push(RESULT_SUCCESS); 213 rb.Push(RESULT_SUCCESS);
204 rb.Push<u8>(1); 214 if (Settings::values.bcat_backend == "none") {
215 rb.Push<u8>(0);
216 } else {
217 rb.Push<u8>(1);
218 }
205 } 219 }
206 Core::System& system; 220 Core::System& system;
207}; 221};
diff --git a/src/core/hle/service/nvflinger/nvflinger.cpp b/src/core/hle/service/nvflinger/nvflinger.cpp
index 52623cf89..62752e419 100644
--- a/src/core/hle/service/nvflinger/nvflinger.cpp
+++ b/src/core/hle/service/nvflinger/nvflinger.cpp
@@ -88,6 +88,12 @@ std::optional<u64> NVFlinger::CreateLayer(u64 display_id) {
88 return layer_id; 88 return layer_id;
89} 89}
90 90
91void NVFlinger::CloseLayer(u64 layer_id) {
92 for (auto& display : displays) {
93 display.CloseLayer(layer_id);
94 }
95}
96
91std::optional<u32> NVFlinger::FindBufferQueueId(u64 display_id, u64 layer_id) const { 97std::optional<u32> NVFlinger::FindBufferQueueId(u64 display_id, u64 layer_id) const {
92 const auto* const layer = FindLayer(display_id, layer_id); 98 const auto* const layer = FindLayer(display_id, layer_id);
93 99
@@ -192,7 +198,7 @@ void NVFlinger::Compose() {
192 198
193 const auto& igbp_buffer = buffer->get().igbp_buffer; 199 const auto& igbp_buffer = buffer->get().igbp_buffer;
194 200
195 const auto& gpu = system.GPU(); 201 auto& gpu = system.GPU();
196 const auto& multi_fence = buffer->get().multi_fence; 202 const auto& multi_fence = buffer->get().multi_fence;
197 for (u32 fence_id = 0; fence_id < multi_fence.num_fences; fence_id++) { 203 for (u32 fence_id = 0; fence_id < multi_fence.num_fences; fence_id++) {
198 const auto& fence = multi_fence.fences[fence_id]; 204 const auto& fence = multi_fence.fences[fence_id];
diff --git a/src/core/hle/service/nvflinger/nvflinger.h b/src/core/hle/service/nvflinger/nvflinger.h
index e3cc14bdc..57a21f33b 100644
--- a/src/core/hle/service/nvflinger/nvflinger.h
+++ b/src/core/hle/service/nvflinger/nvflinger.h
@@ -54,6 +54,9 @@ public:
54 /// If an invalid display ID is specified, then an empty optional is returned. 54 /// If an invalid display ID is specified, then an empty optional is returned.
55 std::optional<u64> CreateLayer(u64 display_id); 55 std::optional<u64> CreateLayer(u64 display_id);
56 56
57 /// Closes a layer on all displays for the given layer ID.
58 void CloseLayer(u64 layer_id);
59
57 /// Finds the buffer queue ID of the specified layer in the specified display. 60 /// Finds the buffer queue ID of the specified layer in the specified display.
58 /// 61 ///
59 /// If an invalid display ID or layer ID is provided, then an empty optional is returned. 62 /// If an invalid display ID or layer ID is provided, then an empty optional is returned.
diff --git a/src/core/hle/service/vi/display/vi_display.cpp b/src/core/hle/service/vi/display/vi_display.cpp
index cd18c1610..5a202ac81 100644
--- a/src/core/hle/service/vi/display/vi_display.cpp
+++ b/src/core/hle/service/vi/display/vi_display.cpp
@@ -24,11 +24,11 @@ Display::Display(u64 id, std::string name, Core::System& system) : id{id}, name{
24Display::~Display() = default; 24Display::~Display() = default;
25 25
26Layer& Display::GetLayer(std::size_t index) { 26Layer& Display::GetLayer(std::size_t index) {
27 return layers.at(index); 27 return *layers.at(index);
28} 28}
29 29
30const Layer& Display::GetLayer(std::size_t index) const { 30const Layer& Display::GetLayer(std::size_t index) const {
31 return layers.at(index); 31 return *layers.at(index);
32} 32}
33 33
34std::shared_ptr<Kernel::ReadableEvent> Display::GetVSyncEvent() const { 34std::shared_ptr<Kernel::ReadableEvent> Display::GetVSyncEvent() const {
@@ -43,29 +43,38 @@ void Display::CreateLayer(u64 id, NVFlinger::BufferQueue& buffer_queue) {
43 // TODO(Subv): Support more than 1 layer. 43 // TODO(Subv): Support more than 1 layer.
44 ASSERT_MSG(layers.empty(), "Only one layer is supported per display at the moment"); 44 ASSERT_MSG(layers.empty(), "Only one layer is supported per display at the moment");
45 45
46 layers.emplace_back(id, buffer_queue); 46 layers.emplace_back(std::make_shared<Layer>(id, buffer_queue));
47}
48
49void Display::CloseLayer(u64 id) {
50 layers.erase(
51 std::remove_if(layers.begin(), layers.end(),
52 [id](const std::shared_ptr<Layer>& layer) { return layer->GetID() == id; }),
53 layers.end());
47} 54}
48 55
49Layer* Display::FindLayer(u64 id) { 56Layer* Display::FindLayer(u64 id) {
50 const auto itr = std::find_if(layers.begin(), layers.end(), 57 const auto itr =
51 [id](const VI::Layer& layer) { return layer.GetID() == id; }); 58 std::find_if(layers.begin(), layers.end(),
59 [id](const std::shared_ptr<Layer>& layer) { return layer->GetID() == id; });
52 60
53 if (itr == layers.end()) { 61 if (itr == layers.end()) {
54 return nullptr; 62 return nullptr;
55 } 63 }
56 64
57 return &*itr; 65 return itr->get();
58} 66}
59 67
60const Layer* Display::FindLayer(u64 id) const { 68const Layer* Display::FindLayer(u64 id) const {
61 const auto itr = std::find_if(layers.begin(), layers.end(), 69 const auto itr =
62 [id](const VI::Layer& layer) { return layer.GetID() == id; }); 70 std::find_if(layers.begin(), layers.end(),
71 [id](const std::shared_ptr<Layer>& layer) { return layer->GetID() == id; });
63 72
64 if (itr == layers.end()) { 73 if (itr == layers.end()) {
65 return nullptr; 74 return nullptr;
66 } 75 }
67 76
68 return &*itr; 77 return itr->get();
69} 78}
70 79
71} // namespace Service::VI 80} // namespace Service::VI
diff --git a/src/core/hle/service/vi/display/vi_display.h b/src/core/hle/service/vi/display/vi_display.h
index 8bb966a85..a3855d8cd 100644
--- a/src/core/hle/service/vi/display/vi_display.h
+++ b/src/core/hle/service/vi/display/vi_display.h
@@ -4,6 +4,7 @@
4 4
5#pragma once 5#pragma once
6 6
7#include <memory>
7#include <string> 8#include <string>
8#include <vector> 9#include <vector>
9 10
@@ -69,6 +70,12 @@ public:
69 /// 70 ///
70 void CreateLayer(u64 id, NVFlinger::BufferQueue& buffer_queue); 71 void CreateLayer(u64 id, NVFlinger::BufferQueue& buffer_queue);
71 72
73 /// Closes and removes a layer from this display with the given ID.
74 ///
75 /// @param id The ID assigned to the layer to close.
76 ///
77 void CloseLayer(u64 id);
78
72 /// Attempts to find a layer with the given ID. 79 /// Attempts to find a layer with the given ID.
73 /// 80 ///
74 /// @param id The layer ID. 81 /// @param id The layer ID.
@@ -91,7 +98,7 @@ private:
91 u64 id; 98 u64 id;
92 std::string name; 99 std::string name;
93 100
94 std::vector<Layer> layers; 101 std::vector<std::shared_ptr<Layer>> layers;
95 Kernel::EventPair vsync_event; 102 Kernel::EventPair vsync_event;
96}; 103};
97 104
diff --git a/src/core/hle/service/vi/vi.cpp b/src/core/hle/service/vi/vi.cpp
index 651c89dc0..519da74e0 100644
--- a/src/core/hle/service/vi/vi.cpp
+++ b/src/core/hle/service/vi/vi.cpp
@@ -1066,6 +1066,18 @@ private:
1066 rb.Push<u64>(ctx.WriteBuffer(native_window.Serialize())); 1066 rb.Push<u64>(ctx.WriteBuffer(native_window.Serialize()));
1067 } 1067 }
1068 1068
1069 void CloseLayer(Kernel::HLERequestContext& ctx) {
1070 IPC::RequestParser rp{ctx};
1071 const auto layer_id{rp.Pop<u64>()};
1072
1073 LOG_DEBUG(Service_VI, "called. layer_id=0x{:016X}", layer_id);
1074
1075 nv_flinger->CloseLayer(layer_id);
1076
1077 IPC::ResponseBuilder rb{ctx, 2};
1078 rb.Push(RESULT_SUCCESS);
1079 }
1080
1069 void CreateStrayLayer(Kernel::HLERequestContext& ctx) { 1081 void CreateStrayLayer(Kernel::HLERequestContext& ctx) {
1070 IPC::RequestParser rp{ctx}; 1082 IPC::RequestParser rp{ctx};
1071 const u32 flags = rp.Pop<u32>(); 1083 const u32 flags = rp.Pop<u32>();
@@ -1178,7 +1190,7 @@ IApplicationDisplayService::IApplicationDisplayService(
1178 {1101, &IApplicationDisplayService::SetDisplayEnabled, "SetDisplayEnabled"}, 1190 {1101, &IApplicationDisplayService::SetDisplayEnabled, "SetDisplayEnabled"},
1179 {1102, &IApplicationDisplayService::GetDisplayResolution, "GetDisplayResolution"}, 1191 {1102, &IApplicationDisplayService::GetDisplayResolution, "GetDisplayResolution"},
1180 {2020, &IApplicationDisplayService::OpenLayer, "OpenLayer"}, 1192 {2020, &IApplicationDisplayService::OpenLayer, "OpenLayer"},
1181 {2021, nullptr, "CloseLayer"}, 1193 {2021, &IApplicationDisplayService::CloseLayer, "CloseLayer"},
1182 {2030, &IApplicationDisplayService::CreateStrayLayer, "CreateStrayLayer"}, 1194 {2030, &IApplicationDisplayService::CreateStrayLayer, "CreateStrayLayer"},
1183 {2031, &IApplicationDisplayService::DestroyStrayLayer, "DestroyStrayLayer"}, 1195 {2031, &IApplicationDisplayService::DestroyStrayLayer, "DestroyStrayLayer"},
1184 {2101, &IApplicationDisplayService::SetLayerScalingMode, "SetLayerScalingMode"}, 1196 {2101, &IApplicationDisplayService::SetLayerScalingMode, "SetLayerScalingMode"},
diff --git a/src/core/loader/elf.cpp b/src/core/loader/elf.cpp
index f1795fdd6..8908e5328 100644
--- a/src/core/loader/elf.cpp
+++ b/src/core/loader/elf.cpp
@@ -335,7 +335,8 @@ Kernel::CodeSet ElfReader::LoadInto(VAddr vaddr) {
335 codeset_segment->addr = segment_addr; 335 codeset_segment->addr = segment_addr;
336 codeset_segment->size = aligned_size; 336 codeset_segment->size = aligned_size;
337 337
338 memcpy(&program_image[current_image_position], GetSegmentPtr(i), p->p_filesz); 338 std::memcpy(program_image.data() + current_image_position, GetSegmentPtr(i),
339 p->p_filesz);
339 current_image_position += aligned_size; 340 current_image_position += aligned_size;
340 } 341 }
341 } 342 }
diff --git a/src/core/loader/kip.cpp b/src/core/loader/kip.cpp
index 474b55cb1..092103abe 100644
--- a/src/core/loader/kip.cpp
+++ b/src/core/loader/kip.cpp
@@ -2,6 +2,7 @@
2// Licensed under GPLv2 or any later version 2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included. 3// Refer to the license.txt file included.
4 4
5#include <cstring>
5#include "core/file_sys/kernel_executable.h" 6#include "core/file_sys/kernel_executable.h"
6#include "core/file_sys/program_metadata.h" 7#include "core/file_sys/program_metadata.h"
7#include "core/gdbstub/gdbstub.h" 8#include "core/gdbstub/gdbstub.h"
@@ -76,8 +77,8 @@ AppLoader::LoadResult AppLoader_KIP::Load(Kernel::Process& process) {
76 segment.addr = offset; 77 segment.addr = offset;
77 segment.offset = offset; 78 segment.offset = offset;
78 segment.size = PageAlignSize(static_cast<u32>(data.size())); 79 segment.size = PageAlignSize(static_cast<u32>(data.size()));
79 program_image.resize(offset); 80 program_image.resize(offset + data.size());
80 program_image.insert(program_image.end(), data.begin(), data.end()); 81 std::memcpy(program_image.data() + offset, data.data(), data.size());
81 }; 82 };
82 83
83 load_segment(codeset.CodeSegment(), kip->GetTextSection(), kip->GetTextOffset()); 84 load_segment(codeset.CodeSegment(), kip->GetTextSection(), kip->GetTextOffset());
diff --git a/src/core/loader/nso.cpp b/src/core/loader/nso.cpp
index f629892ae..515c5accb 100644
--- a/src/core/loader/nso.cpp
+++ b/src/core/loader/nso.cpp
@@ -3,6 +3,7 @@
3// Refer to the license.txt file included. 3// Refer to the license.txt file included.
4 4
5#include <cinttypes> 5#include <cinttypes>
6#include <cstring>
6#include <vector> 7#include <vector>
7 8
8#include "common/common_funcs.h" 9#include "common/common_funcs.h"
@@ -96,8 +97,9 @@ std::optional<VAddr> AppLoader_NSO::LoadModule(Kernel::Process& process,
96 if (nso_header.IsSegmentCompressed(i)) { 97 if (nso_header.IsSegmentCompressed(i)) {
97 data = DecompressSegment(data, nso_header.segments[i]); 98 data = DecompressSegment(data, nso_header.segments[i]);
98 } 99 }
99 program_image.resize(nso_header.segments[i].location); 100 program_image.resize(nso_header.segments[i].location + data.size());
100 program_image.insert(program_image.end(), data.begin(), data.end()); 101 std::memcpy(program_image.data() + nso_header.segments[i].location, data.data(),
102 data.size());
101 codeset.segments[i].addr = nso_header.segments[i].location; 103 codeset.segments[i].addr = nso_header.segments[i].location;
102 codeset.segments[i].offset = nso_header.segments[i].location; 104 codeset.segments[i].offset = nso_header.segments[i].location;
103 codeset.segments[i].size = PageAlignSize(static_cast<u32>(data.size())); 105 codeset.segments[i].size = PageAlignSize(static_cast<u32>(data.size()));
@@ -139,12 +141,12 @@ std::optional<VAddr> AppLoader_NSO::LoadModule(Kernel::Process& process,
139 std::vector<u8> pi_header; 141 std::vector<u8> pi_header;
140 pi_header.insert(pi_header.begin(), reinterpret_cast<u8*>(&nso_header), 142 pi_header.insert(pi_header.begin(), reinterpret_cast<u8*>(&nso_header),
141 reinterpret_cast<u8*>(&nso_header) + sizeof(NSOHeader)); 143 reinterpret_cast<u8*>(&nso_header) + sizeof(NSOHeader));
142 pi_header.insert(pi_header.begin() + sizeof(NSOHeader), program_image.begin(), 144 pi_header.insert(pi_header.begin() + sizeof(NSOHeader), program_image.data(),
143 program_image.end()); 145 program_image.data() + program_image.size());
144 146
145 pi_header = pm->PatchNSO(pi_header, file.GetName()); 147 pi_header = pm->PatchNSO(pi_header, file.GetName());
146 148
147 std::copy(pi_header.begin() + sizeof(NSOHeader), pi_header.end(), program_image.begin()); 149 std::copy(pi_header.begin() + sizeof(NSOHeader), pi_header.end(), program_image.data());
148 } 150 }
149 151
150 // Apply cheats if they exist and the program has a valid title ID 152 // Apply cheats if they exist and the program has a valid title ID
diff --git a/src/core/memory.cpp b/src/core/memory.cpp
index 91bf07a92..f0888327f 100644
--- a/src/core/memory.cpp
+++ b/src/core/memory.cpp
@@ -14,6 +14,7 @@
14#include "common/swap.h" 14#include "common/swap.h"
15#include "core/arm/arm_interface.h" 15#include "core/arm/arm_interface.h"
16#include "core/core.h" 16#include "core/core.h"
17#include "core/hle/kernel/physical_memory.h"
17#include "core/hle/kernel/process.h" 18#include "core/hle/kernel/process.h"
18#include "core/hle/kernel/vm_manager.h" 19#include "core/hle/kernel/vm_manager.h"
19#include "core/memory.h" 20#include "core/memory.h"
@@ -38,6 +39,11 @@ struct Memory::Impl {
38 system.ArmInterface(3).PageTableChanged(*current_page_table, address_space_width); 39 system.ArmInterface(3).PageTableChanged(*current_page_table, address_space_width);
39 } 40 }
40 41
42 void MapMemoryRegion(Common::PageTable& page_table, VAddr base, u64 size,
43 Kernel::PhysicalMemory& memory, VAddr offset) {
44 MapMemoryRegion(page_table, base, size, memory.data() + offset);
45 }
46
41 void MapMemoryRegion(Common::PageTable& page_table, VAddr base, u64 size, u8* target) { 47 void MapMemoryRegion(Common::PageTable& page_table, VAddr base, u64 size, u8* target) {
42 ASSERT_MSG((size & PAGE_MASK) == 0, "non-page aligned size: {:016X}", size); 48 ASSERT_MSG((size & PAGE_MASK) == 0, "non-page aligned size: {:016X}", size);
43 ASSERT_MSG((base & PAGE_MASK) == 0, "non-page aligned base: {:016X}", base); 49 ASSERT_MSG((base & PAGE_MASK) == 0, "non-page aligned base: {:016X}", base);
@@ -146,7 +152,7 @@ struct Memory::Impl {
146 u8* GetPointer(const VAddr vaddr) { 152 u8* GetPointer(const VAddr vaddr) {
147 u8* const page_pointer = current_page_table->pointers[vaddr >> PAGE_BITS]; 153 u8* const page_pointer = current_page_table->pointers[vaddr >> PAGE_BITS];
148 if (page_pointer != nullptr) { 154 if (page_pointer != nullptr) {
149 return page_pointer + (vaddr & PAGE_MASK); 155 return page_pointer + vaddr;
150 } 156 }
151 157
152 if (current_page_table->attributes[vaddr >> PAGE_BITS] == 158 if (current_page_table->attributes[vaddr >> PAGE_BITS] ==
@@ -229,7 +235,8 @@ struct Memory::Impl {
229 case Common::PageType::Memory: { 235 case Common::PageType::Memory: {
230 DEBUG_ASSERT(page_table.pointers[page_index]); 236 DEBUG_ASSERT(page_table.pointers[page_index]);
231 237
232 const u8* const src_ptr = page_table.pointers[page_index] + page_offset; 238 const u8* const src_ptr =
239 page_table.pointers[page_index] + page_offset + (page_index << PAGE_BITS);
233 std::memcpy(dest_buffer, src_ptr, copy_amount); 240 std::memcpy(dest_buffer, src_ptr, copy_amount);
234 break; 241 break;
235 } 242 }
@@ -276,7 +283,8 @@ struct Memory::Impl {
276 case Common::PageType::Memory: { 283 case Common::PageType::Memory: {
277 DEBUG_ASSERT(page_table.pointers[page_index]); 284 DEBUG_ASSERT(page_table.pointers[page_index]);
278 285
279 u8* const dest_ptr = page_table.pointers[page_index] + page_offset; 286 u8* const dest_ptr =
287 page_table.pointers[page_index] + page_offset + (page_index << PAGE_BITS);
280 std::memcpy(dest_ptr, src_buffer, copy_amount); 288 std::memcpy(dest_ptr, src_buffer, copy_amount);
281 break; 289 break;
282 } 290 }
@@ -322,7 +330,8 @@ struct Memory::Impl {
322 case Common::PageType::Memory: { 330 case Common::PageType::Memory: {
323 DEBUG_ASSERT(page_table.pointers[page_index]); 331 DEBUG_ASSERT(page_table.pointers[page_index]);
324 332
325 u8* dest_ptr = page_table.pointers[page_index] + page_offset; 333 u8* dest_ptr =
334 page_table.pointers[page_index] + page_offset + (page_index << PAGE_BITS);
326 std::memset(dest_ptr, 0, copy_amount); 335 std::memset(dest_ptr, 0, copy_amount);
327 break; 336 break;
328 } 337 }
@@ -368,7 +377,8 @@ struct Memory::Impl {
368 } 377 }
369 case Common::PageType::Memory: { 378 case Common::PageType::Memory: {
370 DEBUG_ASSERT(page_table.pointers[page_index]); 379 DEBUG_ASSERT(page_table.pointers[page_index]);
371 const u8* src_ptr = page_table.pointers[page_index] + page_offset; 380 const u8* src_ptr =
381 page_table.pointers[page_index] + page_offset + (page_index << PAGE_BITS);
372 WriteBlock(process, dest_addr, src_ptr, copy_amount); 382 WriteBlock(process, dest_addr, src_ptr, copy_amount);
373 break; 383 break;
374 } 384 }
@@ -446,7 +456,8 @@ struct Memory::Impl {
446 page_type = Common::PageType::Unmapped; 456 page_type = Common::PageType::Unmapped;
447 } else { 457 } else {
448 page_type = Common::PageType::Memory; 458 page_type = Common::PageType::Memory;
449 current_page_table->pointers[vaddr >> PAGE_BITS] = pointer; 459 current_page_table->pointers[vaddr >> PAGE_BITS] =
460 pointer - (vaddr & ~PAGE_MASK);
450 } 461 }
451 break; 462 break;
452 } 463 }
@@ -493,7 +504,9 @@ struct Memory::Impl {
493 memory); 504 memory);
494 } else { 505 } else {
495 while (base != end) { 506 while (base != end) {
496 page_table.pointers[base] = memory; 507 page_table.pointers[base] = memory - (base << PAGE_BITS);
508 ASSERT_MSG(page_table.pointers[base],
509 "memory mapping base yield a nullptr within the table");
497 510
498 base += 1; 511 base += 1;
499 memory += PAGE_SIZE; 512 memory += PAGE_SIZE;
@@ -518,7 +531,7 @@ struct Memory::Impl {
518 if (page_pointer != nullptr) { 531 if (page_pointer != nullptr) {
519 // NOTE: Avoid adding any extra logic to this fast-path block 532 // NOTE: Avoid adding any extra logic to this fast-path block
520 T value; 533 T value;
521 std::memcpy(&value, &page_pointer[vaddr & PAGE_MASK], sizeof(T)); 534 std::memcpy(&value, &page_pointer[vaddr], sizeof(T));
522 return value; 535 return value;
523 } 536 }
524 537
@@ -559,7 +572,7 @@ struct Memory::Impl {
559 u8* const page_pointer = current_page_table->pointers[vaddr >> PAGE_BITS]; 572 u8* const page_pointer = current_page_table->pointers[vaddr >> PAGE_BITS];
560 if (page_pointer != nullptr) { 573 if (page_pointer != nullptr) {
561 // NOTE: Avoid adding any extra logic to this fast-path block 574 // NOTE: Avoid adding any extra logic to this fast-path block
562 std::memcpy(&page_pointer[vaddr & PAGE_MASK], &data, sizeof(T)); 575 std::memcpy(&page_pointer[vaddr], &data, sizeof(T));
563 return; 576 return;
564 } 577 }
565 578
@@ -594,6 +607,11 @@ void Memory::SetCurrentPageTable(Kernel::Process& process) {
594 impl->SetCurrentPageTable(process); 607 impl->SetCurrentPageTable(process);
595} 608}
596 609
610void Memory::MapMemoryRegion(Common::PageTable& page_table, VAddr base, u64 size,
611 Kernel::PhysicalMemory& memory, VAddr offset) {
612 impl->MapMemoryRegion(page_table, base, size, memory, offset);
613}
614
597void Memory::MapMemoryRegion(Common::PageTable& page_table, VAddr base, u64 size, u8* target) { 615void Memory::MapMemoryRegion(Common::PageTable& page_table, VAddr base, u64 size, u8* target) {
598 impl->MapMemoryRegion(page_table, base, size, target); 616 impl->MapMemoryRegion(page_table, base, size, target);
599} 617}
diff --git a/src/core/memory.h b/src/core/memory.h
index 1428a6d60..8913a9da4 100644
--- a/src/core/memory.h
+++ b/src/core/memory.h
@@ -19,8 +19,9 @@ class System;
19} 19}
20 20
21namespace Kernel { 21namespace Kernel {
22class PhysicalMemory;
22class Process; 23class Process;
23} 24} // namespace Kernel
24 25
25namespace Memory { 26namespace Memory {
26 27
@@ -66,6 +67,19 @@ public:
66 void SetCurrentPageTable(Kernel::Process& process); 67 void SetCurrentPageTable(Kernel::Process& process);
67 68
68 /** 69 /**
70 * Maps an physical buffer onto a region of the emulated process address space.
71 *
72 * @param page_table The page table of the emulated process.
73 * @param base The address to start mapping at. Must be page-aligned.
74 * @param size The amount of bytes to map. Must be page-aligned.
75 * @param memory Physical buffer with the memory backing the mapping. Must be of length
76 * at least `size + offset`.
77 * @param offset The offset within the physical memory. Must be page-aligned.
78 */
79 void MapMemoryRegion(Common::PageTable& page_table, VAddr base, u64 size,
80 Kernel::PhysicalMemory& memory, VAddr offset);
81
82 /**
69 * Maps an allocated buffer onto a region of the emulated process address space. 83 * Maps an allocated buffer onto a region of the emulated process address space.
70 * 84 *
71 * @param page_table The page table of the emulated process. 85 * @param page_table The page table of the emulated process.
diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt
index 65d7b9f93..12c46e86f 100644
--- a/src/video_core/CMakeLists.txt
+++ b/src/video_core/CMakeLists.txt
@@ -153,14 +153,29 @@ if (ENABLE_VULKAN)
153 renderer_vulkan/fixed_pipeline_state.h 153 renderer_vulkan/fixed_pipeline_state.h
154 renderer_vulkan/maxwell_to_vk.cpp 154 renderer_vulkan/maxwell_to_vk.cpp
155 renderer_vulkan/maxwell_to_vk.h 155 renderer_vulkan/maxwell_to_vk.h
156 renderer_vulkan/renderer_vulkan.h
156 renderer_vulkan/vk_buffer_cache.cpp 157 renderer_vulkan/vk_buffer_cache.cpp
157 renderer_vulkan/vk_buffer_cache.h 158 renderer_vulkan/vk_buffer_cache.h
159 renderer_vulkan/vk_compute_pass.cpp
160 renderer_vulkan/vk_compute_pass.h
161 renderer_vulkan/vk_compute_pipeline.cpp
162 renderer_vulkan/vk_compute_pipeline.h
163 renderer_vulkan/vk_descriptor_pool.cpp
164 renderer_vulkan/vk_descriptor_pool.h
158 renderer_vulkan/vk_device.cpp 165 renderer_vulkan/vk_device.cpp
159 renderer_vulkan/vk_device.h 166 renderer_vulkan/vk_device.h
167 renderer_vulkan/vk_graphics_pipeline.cpp
168 renderer_vulkan/vk_graphics_pipeline.h
160 renderer_vulkan/vk_image.cpp 169 renderer_vulkan/vk_image.cpp
161 renderer_vulkan/vk_image.h 170 renderer_vulkan/vk_image.h
162 renderer_vulkan/vk_memory_manager.cpp 171 renderer_vulkan/vk_memory_manager.cpp
163 renderer_vulkan/vk_memory_manager.h 172 renderer_vulkan/vk_memory_manager.h
173 renderer_vulkan/vk_pipeline_cache.cpp
174 renderer_vulkan/vk_pipeline_cache.h
175 renderer_vulkan/vk_rasterizer.cpp
176 renderer_vulkan/vk_rasterizer.h
177 renderer_vulkan/vk_renderpass_cache.cpp
178 renderer_vulkan/vk_renderpass_cache.h
164 renderer_vulkan/vk_resource_manager.cpp 179 renderer_vulkan/vk_resource_manager.cpp
165 renderer_vulkan/vk_resource_manager.h 180 renderer_vulkan/vk_resource_manager.h
166 renderer_vulkan/vk_sampler_cache.cpp 181 renderer_vulkan/vk_sampler_cache.cpp
@@ -169,12 +184,19 @@ if (ENABLE_VULKAN)
169 renderer_vulkan/vk_scheduler.h 184 renderer_vulkan/vk_scheduler.h
170 renderer_vulkan/vk_shader_decompiler.cpp 185 renderer_vulkan/vk_shader_decompiler.cpp
171 renderer_vulkan/vk_shader_decompiler.h 186 renderer_vulkan/vk_shader_decompiler.h
187 renderer_vulkan/vk_shader_util.cpp
188 renderer_vulkan/vk_shader_util.h
172 renderer_vulkan/vk_staging_buffer_pool.cpp 189 renderer_vulkan/vk_staging_buffer_pool.cpp
173 renderer_vulkan/vk_staging_buffer_pool.h 190 renderer_vulkan/vk_staging_buffer_pool.h
174 renderer_vulkan/vk_stream_buffer.cpp 191 renderer_vulkan/vk_stream_buffer.cpp
175 renderer_vulkan/vk_stream_buffer.h 192 renderer_vulkan/vk_stream_buffer.h
176 renderer_vulkan/vk_swapchain.cpp 193 renderer_vulkan/vk_swapchain.cpp
177 renderer_vulkan/vk_swapchain.h) 194 renderer_vulkan/vk_swapchain.h
195 renderer_vulkan/vk_texture_cache.cpp
196 renderer_vulkan/vk_texture_cache.h
197 renderer_vulkan/vk_update_descriptor.cpp
198 renderer_vulkan/vk_update_descriptor.h
199 )
178 200
179 target_include_directories(video_core PRIVATE sirit ../../externals/Vulkan-Headers/include) 201 target_include_directories(video_core PRIVATE sirit ../../externals/Vulkan-Headers/include)
180 target_compile_definitions(video_core PRIVATE HAS_VULKAN) 202 target_compile_definitions(video_core PRIVATE HAS_VULKAN)
diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h
index a35e7a195..ee79260fc 100644
--- a/src/video_core/engines/maxwell_3d.h
+++ b/src/video_core/engines/maxwell_3d.h
@@ -1018,7 +1018,14 @@ public:
1018 } 1018 }
1019 } instanced_arrays; 1019 } instanced_arrays;
1020 1020
1021 INSERT_UNION_PADDING_WORDS(0x6); 1021 INSERT_UNION_PADDING_WORDS(0x4);
1022
1023 union {
1024 BitField<0, 1, u32> enable;
1025 BitField<4, 8, u32> unk4;
1026 } vp_point_size;
1027
1028 INSERT_UNION_PADDING_WORDS(1);
1022 1029
1023 Cull cull; 1030 Cull cull;
1024 1031
@@ -1271,8 +1278,6 @@ public:
1271 1278
1272 } dirty{}; 1279 } dirty{};
1273 1280
1274 std::array<u8, Regs::NUM_REGS> dirty_pointers{};
1275
1276 /// Reads a register value located at the input method address 1281 /// Reads a register value located at the input method address
1277 u32 GetRegisterValue(u32 method) const; 1282 u32 GetRegisterValue(u32 method) const;
1278 1283
@@ -1367,6 +1372,8 @@ private:
1367 1372
1368 bool execute_on{true}; 1373 bool execute_on{true};
1369 1374
1375 std::array<u8, Regs::NUM_REGS> dirty_pointers{};
1376
1370 /// Retrieves information about a specific TIC entry from the TIC buffer. 1377 /// Retrieves information about a specific TIC entry from the TIC buffer.
1371 Texture::TICEntry GetTICEntry(u32 tic_index) const; 1378 Texture::TICEntry GetTICEntry(u32 tic_index) const;
1372 1379
@@ -1503,6 +1510,7 @@ ASSERT_REG_POSITION(primitive_restart, 0x591);
1503ASSERT_REG_POSITION(index_array, 0x5F2); 1510ASSERT_REG_POSITION(index_array, 0x5F2);
1504ASSERT_REG_POSITION(polygon_offset_clamp, 0x61F); 1511ASSERT_REG_POSITION(polygon_offset_clamp, 0x61F);
1505ASSERT_REG_POSITION(instanced_arrays, 0x620); 1512ASSERT_REG_POSITION(instanced_arrays, 0x620);
1513ASSERT_REG_POSITION(vp_point_size, 0x644);
1506ASSERT_REG_POSITION(cull, 0x646); 1514ASSERT_REG_POSITION(cull, 0x646);
1507ASSERT_REG_POSITION(pixel_center_integer, 0x649); 1515ASSERT_REG_POSITION(pixel_center_integer, 0x649);
1508ASSERT_REG_POSITION(viewport_transform_enabled, 0x64B); 1516ASSERT_REG_POSITION(viewport_transform_enabled, 0x64B);
diff --git a/src/video_core/engines/shader_bytecode.h b/src/video_core/engines/shader_bytecode.h
index 57b57c647..6f98bd827 100644
--- a/src/video_core/engines/shader_bytecode.h
+++ b/src/video_core/engines/shader_bytecode.h
@@ -215,6 +215,18 @@ enum class F2fRoundingOp : u64 {
215 Trunc = 11, 215 Trunc = 11,
216}; 216};
217 217
218enum class AtomicOp : u64 {
219 Add = 0,
220 Min = 1,
221 Max = 2,
222 Inc = 3,
223 Dec = 4,
224 And = 5,
225 Or = 6,
226 Xor = 7,
227 Exch = 8,
228};
229
218enum class UniformType : u64 { 230enum class UniformType : u64 {
219 UnsignedByte = 0, 231 UnsignedByte = 0,
220 SignedByte = 1, 232 SignedByte = 1,
@@ -236,6 +248,13 @@ enum class StoreType : u64 {
236 Bits128 = 6, 248 Bits128 = 6,
237}; 249};
238 250
251enum class AtomicType : u64 {
252 U32 = 0,
253 S32 = 1,
254 U64 = 2,
255 S64 = 3,
256};
257
239enum class IMinMaxExchange : u64 { 258enum class IMinMaxExchange : u64 {
240 None = 0, 259 None = 0,
241 XLo = 1, 260 XLo = 1,
@@ -939,6 +958,16 @@ union Instruction {
939 } stg; 958 } stg;
940 959
941 union { 960 union {
961 BitField<52, 4, AtomicOp> operation;
962 BitField<28, 2, AtomicType> type;
963 BitField<30, 22, s64> offset;
964
965 s32 GetImmediateOffset() const {
966 return static_cast<s32>(offset << 2);
967 }
968 } atoms;
969
970 union {
942 BitField<32, 1, PhysicalAttributeDirection> direction; 971 BitField<32, 1, PhysicalAttributeDirection> direction;
943 BitField<47, 3, AttributeSize> size; 972 BitField<47, 3, AttributeSize> size;
944 BitField<20, 11, u64> address; 973 BitField<20, 11, u64> address;
@@ -1659,9 +1688,10 @@ public:
1659 ST_A, 1688 ST_A,
1660 ST_L, 1689 ST_L,
1661 ST_S, 1690 ST_S,
1662 ST, // Store in generic memory 1691 ST, // Store in generic memory
1663 STG, // Store in global memory 1692 STG, // Store in global memory
1664 AL2P, // Transforms attribute memory into physical memory 1693 ATOMS, // Atomic operation on shared memory
1694 AL2P, // Transforms attribute memory into physical memory
1665 TEX, 1695 TEX,
1666 TEX_B, // Texture Load Bindless 1696 TEX_B, // Texture Load Bindless
1667 TXQ, // Texture Query 1697 TXQ, // Texture Query
@@ -1964,6 +1994,7 @@ private:
1964 INST("1110111101010---", Id::ST_L, Type::Memory, "ST_L"), 1994 INST("1110111101010---", Id::ST_L, Type::Memory, "ST_L"),
1965 INST("101-------------", Id::ST, Type::Memory, "ST"), 1995 INST("101-------------", Id::ST, Type::Memory, "ST"),
1966 INST("1110111011011---", Id::STG, Type::Memory, "STG"), 1996 INST("1110111011011---", Id::STG, Type::Memory, "STG"),
1997 INST("11101100--------", Id::ATOMS, Type::Memory, "ATOMS"),
1967 INST("1110111110100---", Id::AL2P, Type::Memory, "AL2P"), 1998 INST("1110111110100---", Id::AL2P, Type::Memory, "AL2P"),
1968 INST("110000----111---", Id::TEX, Type::Texture, "TEX"), 1999 INST("110000----111---", Id::TEX, Type::Texture, "TEX"),
1969 INST("1101111010111---", Id::TEX_B, Type::Texture, "TEX_B"), 2000 INST("1101111010111---", Id::TEX_B, Type::Texture, "TEX_B"),
diff --git a/src/video_core/gpu.cpp b/src/video_core/gpu.cpp
index 095660115..b9c5c41a2 100644
--- a/src/video_core/gpu.cpp
+++ b/src/video_core/gpu.cpp
@@ -66,19 +66,20 @@ const DmaPusher& GPU::DmaPusher() const {
66 return *dma_pusher; 66 return *dma_pusher;
67} 67}
68 68
69void GPU::WaitFence(u32 syncpoint_id, u32 value) const { 69void GPU::WaitFence(u32 syncpoint_id, u32 value) {
70 // Synced GPU, is always in sync 70 // Synced GPU, is always in sync
71 if (!is_async) { 71 if (!is_async) {
72 return; 72 return;
73 } 73 }
74 MICROPROFILE_SCOPE(GPU_wait); 74 MICROPROFILE_SCOPE(GPU_wait);
75 while (syncpoints[syncpoint_id].load(std::memory_order_relaxed) < value) { 75 std::unique_lock lock{sync_mutex};
76 } 76 sync_cv.wait(lock, [=]() { return syncpoints[syncpoint_id].load() >= value; });
77} 77}
78 78
79void GPU::IncrementSyncPoint(const u32 syncpoint_id) { 79void GPU::IncrementSyncPoint(const u32 syncpoint_id) {
80 syncpoints[syncpoint_id]++; 80 syncpoints[syncpoint_id]++;
81 std::lock_guard lock{sync_mutex}; 81 std::lock_guard lock{sync_mutex};
82 sync_cv.notify_all();
82 if (!syncpt_interrupts[syncpoint_id].empty()) { 83 if (!syncpt_interrupts[syncpoint_id].empty()) {
83 u32 value = syncpoints[syncpoint_id].load(); 84 u32 value = syncpoints[syncpoint_id].load();
84 auto it = syncpt_interrupts[syncpoint_id].begin(); 85 auto it = syncpt_interrupts[syncpoint_id].begin();
diff --git a/src/video_core/gpu.h b/src/video_core/gpu.h
index ecc338ae9..b648317bb 100644
--- a/src/video_core/gpu.h
+++ b/src/video_core/gpu.h
@@ -6,6 +6,7 @@
6 6
7#include <array> 7#include <array>
8#include <atomic> 8#include <atomic>
9#include <condition_variable>
9#include <list> 10#include <list>
10#include <memory> 11#include <memory>
11#include <mutex> 12#include <mutex>
@@ -181,7 +182,7 @@ public:
181 virtual void WaitIdle() const = 0; 182 virtual void WaitIdle() const = 0;
182 183
183 /// Allows the CPU/NvFlinger to wait on the GPU before presenting a frame. 184 /// Allows the CPU/NvFlinger to wait on the GPU before presenting a frame.
184 void WaitFence(u32 syncpoint_id, u32 value) const; 185 void WaitFence(u32 syncpoint_id, u32 value);
185 186
186 void IncrementSyncPoint(u32 syncpoint_id); 187 void IncrementSyncPoint(u32 syncpoint_id);
187 188
@@ -312,6 +313,8 @@ private:
312 313
313 std::mutex sync_mutex; 314 std::mutex sync_mutex;
314 315
316 std::condition_variable sync_cv;
317
315 const bool is_async; 318 const bool is_async;
316}; 319};
317 320
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp
index 672051102..c428f06e4 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp
@@ -1272,6 +1272,7 @@ void RasterizerOpenGL::SyncPointState() {
1272 const auto& regs = system.GPU().Maxwell3D().regs; 1272 const auto& regs = system.GPU().Maxwell3D().regs;
1273 // Limit the point size to 1 since nouveau sometimes sets a point size of 0 (and that's invalid 1273 // Limit the point size to 1 since nouveau sometimes sets a point size of 0 (and that's invalid
1274 // in OpenGL). 1274 // in OpenGL).
1275 state.point.program_control = regs.vp_point_size.enable != 0;
1275 state.point.size = std::max(1.0f, regs.point_size); 1276 state.point.size = std::max(1.0f, regs.point_size);
1276} 1277}
1277 1278
diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp
index de742d11c..a4acb3796 100644
--- a/src/video_core/renderer_opengl/gl_shader_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp
@@ -34,9 +34,6 @@ using VideoCommon::Shader::ShaderIR;
34 34
35namespace { 35namespace {
36 36
37// One UBO is always reserved for emulation values on staged shaders
38constexpr u32 STAGE_RESERVED_UBOS = 1;
39
40constexpr u32 STAGE_MAIN_OFFSET = 10; 37constexpr u32 STAGE_MAIN_OFFSET = 10;
41constexpr u32 KERNEL_MAIN_OFFSET = 0; 38constexpr u32 KERNEL_MAIN_OFFSET = 0;
42 39
@@ -243,7 +240,6 @@ CachedProgram BuildShader(const Device& device, u64 unique_identifier, ShaderTyp
243 if (!code_b.empty()) { 240 if (!code_b.empty()) {
244 ir_b.emplace(code_b, main_offset, COMPILER_SETTINGS, locker); 241 ir_b.emplace(code_b, main_offset, COMPILER_SETTINGS, locker);
245 } 242 }
246 const auto entries = GLShader::GetEntries(ir);
247 243
248 std::string source = fmt::format(R"(// {} 244 std::string source = fmt::format(R"(// {}
249#version 430 core 245#version 430 core
@@ -314,9 +310,10 @@ std::unordered_set<GLenum> GetSupportedFormats() {
314 310
315CachedShader::CachedShader(const ShaderParameters& params, ShaderType shader_type, 311CachedShader::CachedShader(const ShaderParameters& params, ShaderType shader_type,
316 GLShader::ShaderEntries entries, ProgramCode code, ProgramCode code_b) 312 GLShader::ShaderEntries entries, ProgramCode code, ProgramCode code_b)
317 : RasterizerCacheObject{params.host_ptr}, system{params.system}, disk_cache{params.disk_cache}, 313 : RasterizerCacheObject{params.host_ptr}, system{params.system},
318 device{params.device}, cpu_addr{params.cpu_addr}, unique_identifier{params.unique_identifier}, 314 disk_cache{params.disk_cache}, device{params.device}, cpu_addr{params.cpu_addr},
319 shader_type{shader_type}, entries{entries}, code{std::move(code)}, code_b{std::move(code_b)} { 315 unique_identifier{params.unique_identifier}, shader_type{shader_type},
316 entries{std::move(entries)}, code{std::move(code)}, code_b{std::move(code_b)} {
320 if (!params.precompiled_variants) { 317 if (!params.precompiled_variants) {
321 return; 318 return;
322 } 319 }
diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
index a311dbcfe..2996aaf08 100644
--- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
@@ -751,6 +751,9 @@ private:
751 751
752 Expression Visit(const Node& node) { 752 Expression Visit(const Node& node) {
753 if (const auto operation = std::get_if<OperationNode>(&*node)) { 753 if (const auto operation = std::get_if<OperationNode>(&*node)) {
754 if (const auto amend_index = operation->GetAmendIndex()) {
755 Visit(ir.GetAmendNode(*amend_index)).CheckVoid();
756 }
754 const auto operation_index = static_cast<std::size_t>(operation->GetCode()); 757 const auto operation_index = static_cast<std::size_t>(operation->GetCode());
755 if (operation_index >= operation_decompilers.size()) { 758 if (operation_index >= operation_decompilers.size()) {
756 UNREACHABLE_MSG("Out of bounds operation: {}", operation_index); 759 UNREACHABLE_MSG("Out of bounds operation: {}", operation_index);
@@ -872,6 +875,9 @@ private:
872 } 875 }
873 876
874 if (const auto conditional = std::get_if<ConditionalNode>(&*node)) { 877 if (const auto conditional = std::get_if<ConditionalNode>(&*node)) {
878 if (const auto amend_index = conditional->GetAmendIndex()) {
879 Visit(ir.GetAmendNode(*amend_index)).CheckVoid();
880 }
875 // It's invalid to call conditional on nested nodes, use an operation instead 881 // It's invalid to call conditional on nested nodes, use an operation instead
876 code.AddLine("if ({}) {{", Visit(conditional->GetCondition()).AsBool()); 882 code.AddLine("if ({}) {{", Visit(conditional->GetCondition()).AsBool());
877 ++code.scope; 883 ++code.scope;
@@ -1850,6 +1856,16 @@ private:
1850 Type::Uint}; 1856 Type::Uint};
1851 } 1857 }
1852 1858
1859 template <const std::string_view& opname, Type type>
1860 Expression Atomic(Operation operation) {
1861 ASSERT(stage == ShaderType::Compute);
1862 auto& smem = std::get<SmemNode>(*operation[0]);
1863
1864 return {fmt::format("atomic{}(smem[{} >> 2], {})", opname, Visit(smem.GetAddress()).AsInt(),
1865 Visit(operation[1]).As(type)),
1866 type};
1867 }
1868
1853 Expression Branch(Operation operation) { 1869 Expression Branch(Operation operation) {
1854 const auto target = std::get_if<ImmediateNode>(&*operation[0]); 1870 const auto target = std::get_if<ImmediateNode>(&*operation[0]);
1855 UNIMPLEMENTED_IF(!target); 1871 UNIMPLEMENTED_IF(!target);
@@ -2188,6 +2204,8 @@ private:
2188 &GLSLDecompiler::AtomicImage<Func::Xor>, 2204 &GLSLDecompiler::AtomicImage<Func::Xor>,
2189 &GLSLDecompiler::AtomicImage<Func::Exchange>, 2205 &GLSLDecompiler::AtomicImage<Func::Exchange>,
2190 2206
2207 &GLSLDecompiler::Atomic<Func::Add, Type::Uint>,
2208
2191 &GLSLDecompiler::Branch, 2209 &GLSLDecompiler::Branch,
2192 &GLSLDecompiler::BranchIndirect, 2210 &GLSLDecompiler::BranchIndirect,
2193 &GLSLDecompiler::PushFlowStack, 2211 &GLSLDecompiler::PushFlowStack,
@@ -2307,7 +2325,7 @@ public:
2307 explicit ExprDecompiler(GLSLDecompiler& decomp) : decomp{decomp} {} 2325 explicit ExprDecompiler(GLSLDecompiler& decomp) : decomp{decomp} {}
2308 2326
2309 void operator()(const ExprAnd& expr) { 2327 void operator()(const ExprAnd& expr) {
2310 inner += "( "; 2328 inner += '(';
2311 std::visit(*this, *expr.operand1); 2329 std::visit(*this, *expr.operand1);
2312 inner += " && "; 2330 inner += " && ";
2313 std::visit(*this, *expr.operand2); 2331 std::visit(*this, *expr.operand2);
@@ -2315,7 +2333,7 @@ public:
2315 } 2333 }
2316 2334
2317 void operator()(const ExprOr& expr) { 2335 void operator()(const ExprOr& expr) {
2318 inner += "( "; 2336 inner += '(';
2319 std::visit(*this, *expr.operand1); 2337 std::visit(*this, *expr.operand1);
2320 inner += " || "; 2338 inner += " || ";
2321 std::visit(*this, *expr.operand2); 2339 std::visit(*this, *expr.operand2);
@@ -2333,28 +2351,7 @@ public:
2333 } 2351 }
2334 2352
2335 void operator()(const ExprCondCode& expr) { 2353 void operator()(const ExprCondCode& expr) {
2336 const Node cc = decomp.ir.GetConditionCode(expr.cc); 2354 inner += decomp.Visit(decomp.ir.GetConditionCode(expr.cc)).AsBool();
2337 std::string target;
2338
2339 if (const auto pred = std::get_if<PredicateNode>(&*cc)) {
2340 const auto index = pred->GetIndex();
2341 switch (index) {
2342 case Tegra::Shader::Pred::NeverExecute:
2343 target = "false";
2344 break;
2345 case Tegra::Shader::Pred::UnusedIndex:
2346 target = "true";
2347 break;
2348 default:
2349 target = decomp.GetPredicate(index);
2350 break;
2351 }
2352 } else if (const auto flag = std::get_if<InternalFlagNode>(&*cc)) {
2353 target = decomp.GetInternalFlag(flag->GetFlag());
2354 } else {
2355 UNREACHABLE();
2356 }
2357 inner += target;
2358 } 2355 }
2359 2356
2360 void operator()(const ExprVar& expr) { 2357 void operator()(const ExprVar& expr) {
@@ -2366,8 +2363,7 @@ public:
2366 } 2363 }
2367 2364
2368 void operator()(VideoCommon::Shader::ExprGprEqual& expr) { 2365 void operator()(VideoCommon::Shader::ExprGprEqual& expr) {
2369 inner += 2366 inner += fmt::format("(ftou({}) == {})", decomp.GetRegister(expr.gpr), expr.value);
2370 "( ftou(" + decomp.GetRegister(expr.gpr) + ") == " + std::to_string(expr.value) + ')';
2371 } 2367 }
2372 2368
2373 const std::string& GetResult() const { 2369 const std::string& GetResult() const {
@@ -2375,8 +2371,8 @@ public:
2375 } 2371 }
2376 2372
2377private: 2373private:
2378 std::string inner;
2379 GLSLDecompiler& decomp; 2374 GLSLDecompiler& decomp;
2375 std::string inner;
2380}; 2376};
2381 2377
2382class ASTDecompiler { 2378class ASTDecompiler {
diff --git a/src/video_core/renderer_opengl/gl_state.cpp b/src/video_core/renderer_opengl/gl_state.cpp
index df2e2395a..cc185e9e1 100644
--- a/src/video_core/renderer_opengl/gl_state.cpp
+++ b/src/video_core/renderer_opengl/gl_state.cpp
@@ -127,6 +127,7 @@ void OpenGLState::ApplyClipDistances() {
127} 127}
128 128
129void OpenGLState::ApplyPointSize() { 129void OpenGLState::ApplyPointSize() {
130 Enable(GL_PROGRAM_POINT_SIZE, cur_state.point.program_control, point.program_control);
130 if (UpdateValue(cur_state.point.size, point.size)) { 131 if (UpdateValue(cur_state.point.size, point.size)) {
131 glPointSize(point.size); 132 glPointSize(point.size);
132 } 133 }
diff --git a/src/video_core/renderer_opengl/gl_state.h b/src/video_core/renderer_opengl/gl_state.h
index fb180f302..678e5cd89 100644
--- a/src/video_core/renderer_opengl/gl_state.h
+++ b/src/video_core/renderer_opengl/gl_state.h
@@ -131,7 +131,8 @@ public:
131 std::array<Viewport, Tegra::Engines::Maxwell3D::Regs::NumViewports> viewports; 131 std::array<Viewport, Tegra::Engines::Maxwell3D::Regs::NumViewports> viewports;
132 132
133 struct { 133 struct {
134 float size = 1.0f; // GL_POINT_SIZE 134 bool program_control = false; // GL_PROGRAM_POINT_SIZE
135 GLfloat size = 1.0f; // GL_POINT_SIZE
135 } point; 136 } point;
136 137
137 struct { 138 struct {
diff --git a/src/video_core/renderer_opengl/gl_texture_cache.cpp b/src/video_core/renderer_opengl/gl_texture_cache.cpp
index b790b0ef4..e95eb069e 100644
--- a/src/video_core/renderer_opengl/gl_texture_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_texture_cache.cpp
@@ -44,7 +44,7 @@ struct FormatTuple {
44 44
45constexpr std::array<FormatTuple, VideoCore::Surface::MaxPixelFormat> tex_format_tuples = {{ 45constexpr std::array<FormatTuple, VideoCore::Surface::MaxPixelFormat> tex_format_tuples = {{
46 {GL_RGBA8, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV, false}, // ABGR8U 46 {GL_RGBA8, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV, false}, // ABGR8U
47 {GL_RGBA8, GL_RGBA, GL_BYTE, false}, // ABGR8S 47 {GL_RGBA8_SNORM, GL_RGBA, GL_BYTE, false}, // ABGR8S
48 {GL_RGBA8UI, GL_RGBA_INTEGER, GL_UNSIGNED_BYTE, false}, // ABGR8UI 48 {GL_RGBA8UI, GL_RGBA_INTEGER, GL_UNSIGNED_BYTE, false}, // ABGR8UI
49 {GL_RGB565, GL_RGB, GL_UNSIGNED_SHORT_5_6_5_REV, false}, // B5G6R5U 49 {GL_RGB565, GL_RGB, GL_UNSIGNED_SHORT_5_6_5_REV, false}, // B5G6R5U
50 {GL_RGB10_A2, GL_RGBA, GL_UNSIGNED_INT_2_10_10_10_REV, false}, // A2B10G10R10U 50 {GL_RGB10_A2, GL_RGBA, GL_UNSIGNED_INT_2_10_10_10_REV, false}, // A2B10G10R10U
@@ -83,9 +83,9 @@ constexpr std::array<FormatTuple, VideoCore::Surface::MaxPixelFormat> tex_format
83 {GL_RGB32F, GL_RGB, GL_FLOAT, false}, // RGB32F 83 {GL_RGB32F, GL_RGB, GL_FLOAT, false}, // RGB32F
84 {GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV, false}, // RGBA8_SRGB 84 {GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV, false}, // RGBA8_SRGB
85 {GL_RG8, GL_RG, GL_UNSIGNED_BYTE, false}, // RG8U 85 {GL_RG8, GL_RG, GL_UNSIGNED_BYTE, false}, // RG8U
86 {GL_RG8, GL_RG, GL_BYTE, false}, // RG8S 86 {GL_RG8_SNORM, GL_RG, GL_BYTE, false}, // RG8S
87 {GL_RG32UI, GL_RG_INTEGER, GL_UNSIGNED_INT, false}, // RG32UI 87 {GL_RG32UI, GL_RG_INTEGER, GL_UNSIGNED_INT, false}, // RG32UI
88 {GL_RGB16F, GL_RGBA16, GL_HALF_FLOAT, false}, // RGBX16F 88 {GL_RGB16F, GL_RGBA, GL_HALF_FLOAT, false}, // RGBX16F
89 {GL_R32UI, GL_RED_INTEGER, GL_UNSIGNED_INT, false}, // R32UI 89 {GL_R32UI, GL_RED_INTEGER, GL_UNSIGNED_INT, false}, // R32UI
90 {GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE, false}, // ASTC_2D_8X8 90 {GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE, false}, // ASTC_2D_8X8
91 {GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE, false}, // ASTC_2D_8X5 91 {GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE, false}, // ASTC_2D_8X5
@@ -253,14 +253,12 @@ void CachedSurface::DownloadTexture(std::vector<u8>& staging_buffer) {
253 glPixelStorei(GL_PACK_ALIGNMENT, std::min(8U, params.GetRowAlignment(level))); 253 glPixelStorei(GL_PACK_ALIGNMENT, std::min(8U, params.GetRowAlignment(level)));
254 glPixelStorei(GL_PACK_ROW_LENGTH, static_cast<GLint>(params.GetMipWidth(level))); 254 glPixelStorei(GL_PACK_ROW_LENGTH, static_cast<GLint>(params.GetMipWidth(level)));
255 const std::size_t mip_offset = params.GetHostMipmapLevelOffset(level); 255 const std::size_t mip_offset = params.GetHostMipmapLevelOffset(level);
256 u8* const mip_data = staging_buffer.data() + mip_offset;
257 const GLsizei size = static_cast<GLsizei>(params.GetHostMipmapSize(level));
256 if (is_compressed) { 258 if (is_compressed) {
257 glGetCompressedTextureImage(texture.handle, level, 259 glGetCompressedTextureImage(texture.handle, level, size, mip_data);
258 static_cast<GLsizei>(params.GetHostMipmapSize(level)),
259 staging_buffer.data() + mip_offset);
260 } else { 260 } else {
261 glGetTextureImage(texture.handle, level, format, type, 261 glGetTextureImage(texture.handle, level, format, type, size, mip_data);
262 static_cast<GLsizei>(params.GetHostMipmapSize(level)),
263 staging_buffer.data() + mip_offset);
264 } 262 }
265 } 263 }
266} 264}
diff --git a/src/video_core/renderer_opengl/utils.cpp b/src/video_core/renderer_opengl/utils.cpp
index 9770dda1c..ac99e6385 100644
--- a/src/video_core/renderer_opengl/utils.cpp
+++ b/src/video_core/renderer_opengl/utils.cpp
@@ -6,16 +6,20 @@
6#include <vector> 6#include <vector>
7 7
8#include <fmt/format.h> 8#include <fmt/format.h>
9
10#include <glad/glad.h> 9#include <glad/glad.h>
11 10
12#include "common/assert.h"
13#include "common/common_types.h" 11#include "common/common_types.h"
14#include "common/scope_exit.h"
15#include "video_core/renderer_opengl/utils.h" 12#include "video_core/renderer_opengl/utils.h"
16 13
17namespace OpenGL { 14namespace OpenGL {
18 15
16struct VertexArrayPushBuffer::Entry {
17 GLuint binding_index{};
18 const GLuint* buffer{};
19 GLintptr offset{};
20 GLsizei stride{};
21};
22
19VertexArrayPushBuffer::VertexArrayPushBuffer() = default; 23VertexArrayPushBuffer::VertexArrayPushBuffer() = default;
20 24
21VertexArrayPushBuffer::~VertexArrayPushBuffer() = default; 25VertexArrayPushBuffer::~VertexArrayPushBuffer() = default;
@@ -47,6 +51,13 @@ void VertexArrayPushBuffer::Bind() {
47 } 51 }
48} 52}
49 53
54struct BindBuffersRangePushBuffer::Entry {
55 GLuint binding;
56 const GLuint* buffer;
57 GLintptr offset;
58 GLsizeiptr size;
59};
60
50BindBuffersRangePushBuffer::BindBuffersRangePushBuffer(GLenum target) : target{target} {} 61BindBuffersRangePushBuffer::BindBuffersRangePushBuffer(GLenum target) : target{target} {}
51 62
52BindBuffersRangePushBuffer::~BindBuffersRangePushBuffer() = default; 63BindBuffersRangePushBuffer::~BindBuffersRangePushBuffer() = default;
diff --git a/src/video_core/renderer_opengl/utils.h b/src/video_core/renderer_opengl/utils.h
index d56153fe7..3ad7c02d4 100644
--- a/src/video_core/renderer_opengl/utils.h
+++ b/src/video_core/renderer_opengl/utils.h
@@ -26,12 +26,7 @@ public:
26 void Bind(); 26 void Bind();
27 27
28private: 28private:
29 struct Entry { 29 struct Entry;
30 GLuint binding_index{};
31 const GLuint* buffer{};
32 GLintptr offset{};
33 GLsizei stride{};
34 };
35 30
36 GLuint vao{}; 31 GLuint vao{};
37 const GLuint* index_buffer{}; 32 const GLuint* index_buffer{};
@@ -50,12 +45,7 @@ public:
50 void Bind(); 45 void Bind();
51 46
52private: 47private:
53 struct Entry { 48 struct Entry;
54 GLuint binding;
55 const GLuint* buffer;
56 GLintptr offset;
57 GLsizeiptr size;
58 };
59 49
60 GLenum target; 50 GLenum target;
61 std::vector<Entry> entries; 51 std::vector<Entry> entries;
diff --git a/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp b/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp
index 5a490f6ef..4e3ff231e 100644
--- a/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp
+++ b/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp
@@ -109,6 +109,9 @@ constexpr FixedPipelineState::Rasterizer GetRasterizerState(const Maxwell& regs)
109 const auto topology = static_cast<std::size_t>(regs.draw.topology.Value()); 109 const auto topology = static_cast<std::size_t>(regs.draw.topology.Value());
110 const bool depth_bias_enabled = enabled_lut[PolygonOffsetEnableLUT[topology]]; 110 const bool depth_bias_enabled = enabled_lut[PolygonOffsetEnableLUT[topology]];
111 111
112 const auto& clip = regs.view_volume_clip_control;
113 const bool depth_clamp_enabled = clip.depth_clamp_near == 1 || clip.depth_clamp_far == 1;
114
112 Maxwell::Cull::FrontFace front_face = regs.cull.front_face; 115 Maxwell::Cull::FrontFace front_face = regs.cull.front_face;
113 if (regs.screen_y_control.triangle_rast_flip != 0 && 116 if (regs.screen_y_control.triangle_rast_flip != 0 &&
114 regs.viewport_transform[0].scale_y > 0.0f) { 117 regs.viewport_transform[0].scale_y > 0.0f) {
@@ -119,8 +122,9 @@ constexpr FixedPipelineState::Rasterizer GetRasterizerState(const Maxwell& regs)
119 } 122 }
120 123
121 const bool gl_ndc = regs.depth_mode == Maxwell::DepthMode::MinusOneToOne; 124 const bool gl_ndc = regs.depth_mode == Maxwell::DepthMode::MinusOneToOne;
122 return FixedPipelineState::Rasterizer(regs.cull.enabled, depth_bias_enabled, gl_ndc, 125 return FixedPipelineState::Rasterizer(regs.cull.enabled, depth_bias_enabled,
123 regs.cull.cull_face, front_face); 126 depth_clamp_enabled, gl_ndc, regs.cull.cull_face,
127 front_face);
124} 128}
125 129
126} // Anonymous namespace 130} // Anonymous namespace
@@ -222,15 +226,17 @@ bool FixedPipelineState::Tessellation::operator==(const Tessellation& rhs) const
222std::size_t FixedPipelineState::Rasterizer::Hash() const noexcept { 226std::size_t FixedPipelineState::Rasterizer::Hash() const noexcept {
223 return static_cast<std::size_t>(cull_enable) ^ 227 return static_cast<std::size_t>(cull_enable) ^
224 (static_cast<std::size_t>(depth_bias_enable) << 1) ^ 228 (static_cast<std::size_t>(depth_bias_enable) << 1) ^
225 (static_cast<std::size_t>(ndc_minus_one_to_one) << 2) ^ 229 (static_cast<std::size_t>(depth_clamp_enable) << 2) ^
230 (static_cast<std::size_t>(ndc_minus_one_to_one) << 3) ^
226 (static_cast<std::size_t>(cull_face) << 24) ^ 231 (static_cast<std::size_t>(cull_face) << 24) ^
227 (static_cast<std::size_t>(front_face) << 48); 232 (static_cast<std::size_t>(front_face) << 48);
228} 233}
229 234
230bool FixedPipelineState::Rasterizer::operator==(const Rasterizer& rhs) const noexcept { 235bool FixedPipelineState::Rasterizer::operator==(const Rasterizer& rhs) const noexcept {
231 return std::tie(cull_enable, depth_bias_enable, ndc_minus_one_to_one, cull_face, front_face) == 236 return std::tie(cull_enable, depth_bias_enable, depth_clamp_enable, ndc_minus_one_to_one,
232 std::tie(rhs.cull_enable, rhs.depth_bias_enable, rhs.ndc_minus_one_to_one, rhs.cull_face, 237 cull_face, front_face) ==
233 rhs.front_face); 238 std::tie(rhs.cull_enable, rhs.depth_bias_enable, rhs.depth_clamp_enable,
239 rhs.ndc_minus_one_to_one, rhs.cull_face, rhs.front_face);
234} 240}
235 241
236std::size_t FixedPipelineState::DepthStencil::Hash() const noexcept { 242std::size_t FixedPipelineState::DepthStencil::Hash() const noexcept {
diff --git a/src/video_core/renderer_vulkan/fixed_pipeline_state.h b/src/video_core/renderer_vulkan/fixed_pipeline_state.h
index 04152c0d4..87056ef37 100644
--- a/src/video_core/renderer_vulkan/fixed_pipeline_state.h
+++ b/src/video_core/renderer_vulkan/fixed_pipeline_state.h
@@ -170,15 +170,17 @@ struct FixedPipelineState {
170 }; 170 };
171 171
172 struct Rasterizer { 172 struct Rasterizer {
173 constexpr Rasterizer(bool cull_enable, bool depth_bias_enable, bool ndc_minus_one_to_one, 173 constexpr Rasterizer(bool cull_enable, bool depth_bias_enable, bool depth_clamp_enable,
174 Maxwell::Cull::CullFace cull_face, Maxwell::Cull::FrontFace front_face) 174 bool ndc_minus_one_to_one, Maxwell::Cull::CullFace cull_face,
175 Maxwell::Cull::FrontFace front_face)
175 : cull_enable{cull_enable}, depth_bias_enable{depth_bias_enable}, 176 : cull_enable{cull_enable}, depth_bias_enable{depth_bias_enable},
176 ndc_minus_one_to_one{ndc_minus_one_to_one}, cull_face{cull_face}, front_face{ 177 depth_clamp_enable{depth_clamp_enable}, ndc_minus_one_to_one{ndc_minus_one_to_one},
177 front_face} {} 178 cull_face{cull_face}, front_face{front_face} {}
178 Rasterizer() = default; 179 Rasterizer() = default;
179 180
180 bool cull_enable; 181 bool cull_enable;
181 bool depth_bias_enable; 182 bool depth_bias_enable;
183 bool depth_clamp_enable;
182 bool ndc_minus_one_to_one; 184 bool ndc_minus_one_to_one;
183 Maxwell::Cull::CullFace cull_face; 185 Maxwell::Cull::CullFace cull_face;
184 Maxwell::Cull::FrontFace front_face; 186 Maxwell::Cull::FrontFace front_face;
diff --git a/src/video_core/renderer_vulkan/maxwell_to_vk.cpp b/src/video_core/renderer_vulkan/maxwell_to_vk.cpp
index 000e3616d..331808113 100644
--- a/src/video_core/renderer_vulkan/maxwell_to_vk.cpp
+++ b/src/video_core/renderer_vulkan/maxwell_to_vk.cpp
@@ -44,7 +44,7 @@ vk::SamplerMipmapMode MipmapMode(Tegra::Texture::TextureMipmapFilter mipmap_filt
44 return {}; 44 return {};
45} 45}
46 46
47vk::SamplerAddressMode WrapMode(Tegra::Texture::WrapMode wrap_mode, 47vk::SamplerAddressMode WrapMode(const VKDevice& device, Tegra::Texture::WrapMode wrap_mode,
48 Tegra::Texture::TextureFilter filter) { 48 Tegra::Texture::TextureFilter filter) {
49 switch (wrap_mode) { 49 switch (wrap_mode) {
50 case Tegra::Texture::WrapMode::Wrap: 50 case Tegra::Texture::WrapMode::Wrap:
@@ -56,7 +56,12 @@ vk::SamplerAddressMode WrapMode(Tegra::Texture::WrapMode wrap_mode,
56 case Tegra::Texture::WrapMode::Border: 56 case Tegra::Texture::WrapMode::Border:
57 return vk::SamplerAddressMode::eClampToBorder; 57 return vk::SamplerAddressMode::eClampToBorder;
58 case Tegra::Texture::WrapMode::Clamp: 58 case Tegra::Texture::WrapMode::Clamp:
59 // TODO(Rodrigo): Emulate GL_CLAMP properly 59 if (device.GetDriverID() == vk::DriverIdKHR::eNvidiaProprietary) {
60 // Nvidia's Vulkan driver defaults to GL_CLAMP on invalid enumerations, we can hack this
61 // by sending an invalid enumeration.
62 return static_cast<vk::SamplerAddressMode>(0xcafe);
63 }
64 // TODO(Rodrigo): Emulate GL_CLAMP properly on other vendors
60 switch (filter) { 65 switch (filter) {
61 case Tegra::Texture::TextureFilter::Nearest: 66 case Tegra::Texture::TextureFilter::Nearest:
62 return vk::SamplerAddressMode::eClampToEdge; 67 return vk::SamplerAddressMode::eClampToEdge;
diff --git a/src/video_core/renderer_vulkan/maxwell_to_vk.h b/src/video_core/renderer_vulkan/maxwell_to_vk.h
index 1534b738b..7e9678b7b 100644
--- a/src/video_core/renderer_vulkan/maxwell_to_vk.h
+++ b/src/video_core/renderer_vulkan/maxwell_to_vk.h
@@ -22,7 +22,7 @@ vk::Filter Filter(Tegra::Texture::TextureFilter filter);
22 22
23vk::SamplerMipmapMode MipmapMode(Tegra::Texture::TextureMipmapFilter mipmap_filter); 23vk::SamplerMipmapMode MipmapMode(Tegra::Texture::TextureMipmapFilter mipmap_filter);
24 24
25vk::SamplerAddressMode WrapMode(Tegra::Texture::WrapMode wrap_mode, 25vk::SamplerAddressMode WrapMode(const VKDevice& device, Tegra::Texture::WrapMode wrap_mode,
26 Tegra::Texture::TextureFilter filter); 26 Tegra::Texture::TextureFilter filter);
27 27
28vk::CompareOp DepthCompareFunction(Tegra::Texture::DepthCompareFunc depth_compare_func); 28vk::CompareOp DepthCompareFunction(Tegra::Texture::DepthCompareFunc depth_compare_func);
diff --git a/src/video_core/renderer_vulkan/renderer_vulkan.h b/src/video_core/renderer_vulkan/renderer_vulkan.h
new file mode 100644
index 000000000..a472c5dc9
--- /dev/null
+++ b/src/video_core/renderer_vulkan/renderer_vulkan.h
@@ -0,0 +1,72 @@
1// Copyright 2018 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <optional>
8#include <vector>
9#include "video_core/renderer_base.h"
10#include "video_core/renderer_vulkan/declarations.h"
11
12namespace Core {
13class System;
14}
15
16namespace Vulkan {
17
18class VKBlitScreen;
19class VKDevice;
20class VKFence;
21class VKMemoryManager;
22class VKResourceManager;
23class VKSwapchain;
24class VKScheduler;
25class VKImage;
26
27struct VKScreenInfo {
28 VKImage* image{};
29 u32 width{};
30 u32 height{};
31 bool is_srgb{};
32};
33
34class RendererVulkan final : public VideoCore::RendererBase {
35public:
36 explicit RendererVulkan(Core::Frontend::EmuWindow& window, Core::System& system);
37 ~RendererVulkan() override;
38
39 /// Swap buffers (render frame)
40 void SwapBuffers(const Tegra::FramebufferConfig* framebuffer) override;
41
42 /// Initialize the renderer
43 bool Init() override;
44
45 /// Shutdown the renderer
46 void ShutDown() override;
47
48private:
49 std::optional<vk::DebugUtilsMessengerEXT> CreateDebugCallback(
50 const vk::DispatchLoaderDynamic& dldi);
51
52 bool PickDevices(const vk::DispatchLoaderDynamic& dldi);
53
54 void Report() const;
55
56 Core::System& system;
57
58 vk::Instance instance;
59 vk::SurfaceKHR surface;
60
61 VKScreenInfo screen_info;
62
63 UniqueDebugUtilsMessengerEXT debug_callback;
64 std::unique_ptr<VKDevice> device;
65 std::unique_ptr<VKSwapchain> swapchain;
66 std::unique_ptr<VKMemoryManager> memory_manager;
67 std::unique_ptr<VKResourceManager> resource_manager;
68 std::unique_ptr<VKScheduler> scheduler;
69 std::unique_ptr<VKBlitScreen> blit_screen;
70};
71
72} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp
index 46da81aaa..1ba544943 100644
--- a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp
@@ -2,124 +2,145 @@
2// Licensed under GPLv2 or any later version 2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included. 3// Refer to the license.txt file included.
4 4
5#include <algorithm>
5#include <cstring> 6#include <cstring>
6#include <memory> 7#include <memory>
7#include <optional> 8#include <optional>
8#include <tuple> 9#include <tuple>
9 10
10#include "common/alignment.h"
11#include "common/assert.h" 11#include "common/assert.h"
12#include "core/memory.h" 12#include "common/bit_util.h"
13#include "video_core/memory_manager.h" 13#include "core/core.h"
14#include "video_core/renderer_vulkan/declarations.h" 14#include "video_core/renderer_vulkan/declarations.h"
15#include "video_core/renderer_vulkan/vk_buffer_cache.h" 15#include "video_core/renderer_vulkan/vk_buffer_cache.h"
16#include "video_core/renderer_vulkan/vk_device.h"
16#include "video_core/renderer_vulkan/vk_scheduler.h" 17#include "video_core/renderer_vulkan/vk_scheduler.h"
17#include "video_core/renderer_vulkan/vk_stream_buffer.h" 18#include "video_core/renderer_vulkan/vk_stream_buffer.h"
18 19
19namespace Vulkan { 20namespace Vulkan {
20 21
21CachedBufferEntry::CachedBufferEntry(VAddr cpu_addr, std::size_t size, u64 offset, 22namespace {
22 std::size_t alignment, u8* host_ptr)
23 : RasterizerCacheObject{host_ptr}, cpu_addr{cpu_addr}, size{size}, offset{offset},
24 alignment{alignment} {}
25
26VKBufferCache::VKBufferCache(Tegra::MemoryManager& tegra_memory_manager,
27 Memory::Memory& cpu_memory_,
28 VideoCore::RasterizerInterface& rasterizer, const VKDevice& device,
29 VKMemoryManager& memory_manager, VKScheduler& scheduler, u64 size)
30 : RasterizerCache{rasterizer}, tegra_memory_manager{tegra_memory_manager}, cpu_memory{
31 cpu_memory_} {
32 const auto usage = vk::BufferUsageFlagBits::eVertexBuffer |
33 vk::BufferUsageFlagBits::eIndexBuffer |
34 vk::BufferUsageFlagBits::eUniformBuffer;
35 const auto access = vk::AccessFlagBits::eVertexAttributeRead | vk::AccessFlagBits::eIndexRead |
36 vk::AccessFlagBits::eUniformRead;
37 stream_buffer =
38 std::make_unique<VKStreamBuffer>(device, memory_manager, scheduler, size, usage, access,
39 vk::PipelineStageFlagBits::eAllCommands);
40 buffer_handle = stream_buffer->GetBuffer();
41}
42 23
43VKBufferCache::~VKBufferCache() = default; 24const auto BufferUsage =
25 vk::BufferUsageFlagBits::eVertexBuffer | vk::BufferUsageFlagBits::eIndexBuffer |
26 vk::BufferUsageFlagBits::eUniformBuffer | vk::BufferUsageFlagBits::eStorageBuffer;
27
28const auto UploadPipelineStage =
29 vk::PipelineStageFlagBits::eTransfer | vk::PipelineStageFlagBits::eVertexInput |
30 vk::PipelineStageFlagBits::eVertexShader | vk::PipelineStageFlagBits::eFragmentShader |
31 vk::PipelineStageFlagBits::eComputeShader;
44 32
45u64 VKBufferCache::UploadMemory(GPUVAddr gpu_addr, std::size_t size, u64 alignment, bool cache) { 33const auto UploadAccessBarriers =
46 const auto cpu_addr{tegra_memory_manager.GpuToCpuAddress(gpu_addr)}; 34 vk::AccessFlagBits::eTransferRead | vk::AccessFlagBits::eShaderRead |
47 ASSERT_MSG(cpu_addr, "Invalid GPU address"); 35 vk::AccessFlagBits::eUniformRead | vk::AccessFlagBits::eVertexAttributeRead |
48 36 vk::AccessFlagBits::eIndexRead;
49 // Cache management is a big overhead, so only cache entries with a given size. 37
50 // TODO: Figure out which size is the best for given games. 38auto CreateStreamBuffer(const VKDevice& device, VKScheduler& scheduler) {
51 cache &= size >= 2048; 39 return std::make_unique<VKStreamBuffer>(device, scheduler, BufferUsage);
52
53 u8* const host_ptr{cpu_memory.GetPointer(*cpu_addr)};
54 if (cache) {
55 const auto entry = TryGet(host_ptr);
56 if (entry) {
57 if (entry->GetSize() >= size && entry->GetAlignment() == alignment) {
58 return entry->GetOffset();
59 }
60 Unregister(entry);
61 }
62 }
63
64 AlignBuffer(alignment);
65 const u64 uploaded_offset = buffer_offset;
66
67 if (host_ptr == nullptr) {
68 return uploaded_offset;
69 }
70
71 std::memcpy(buffer_ptr, host_ptr, size);
72 buffer_ptr += size;
73 buffer_offset += size;
74
75 if (cache) {
76 auto entry = std::make_shared<CachedBufferEntry>(*cpu_addr, size, uploaded_offset,
77 alignment, host_ptr);
78 Register(entry);
79 }
80
81 return uploaded_offset;
82} 40}
83 41
84u64 VKBufferCache::UploadHostMemory(const u8* raw_pointer, std::size_t size, u64 alignment) { 42} // Anonymous namespace
85 AlignBuffer(alignment); 43
86 std::memcpy(buffer_ptr, raw_pointer, size); 44CachedBufferBlock::CachedBufferBlock(const VKDevice& device, VKMemoryManager& memory_manager,
87 const u64 uploaded_offset = buffer_offset; 45 CacheAddr cache_addr, std::size_t size)
46 : VideoCommon::BufferBlock{cache_addr, size} {
47 const vk::BufferCreateInfo buffer_ci({}, static_cast<vk::DeviceSize>(size),
48 BufferUsage | vk::BufferUsageFlagBits::eTransferSrc |
49 vk::BufferUsageFlagBits::eTransferDst,
50 vk::SharingMode::eExclusive, 0, nullptr);
88 51
89 buffer_ptr += size; 52 const auto& dld{device.GetDispatchLoader()};
90 buffer_offset += size; 53 const auto dev{device.GetLogical()};
91 return uploaded_offset; 54 buffer.handle = dev.createBufferUnique(buffer_ci, nullptr, dld);
55 buffer.commit = memory_manager.Commit(*buffer.handle, false);
92} 56}
93 57
94std::tuple<u8*, u64> VKBufferCache::ReserveMemory(std::size_t size, u64 alignment) { 58CachedBufferBlock::~CachedBufferBlock() = default;
95 AlignBuffer(alignment); 59
96 u8* const uploaded_ptr = buffer_ptr; 60VKBufferCache::VKBufferCache(VideoCore::RasterizerInterface& rasterizer, Core::System& system,
97 const u64 uploaded_offset = buffer_offset; 61 const VKDevice& device, VKMemoryManager& memory_manager,
62 VKScheduler& scheduler, VKStagingBufferPool& staging_pool)
63 : VideoCommon::BufferCache<Buffer, vk::Buffer, VKStreamBuffer>{rasterizer, system,
64 CreateStreamBuffer(device,
65 scheduler)},
66 device{device}, memory_manager{memory_manager}, scheduler{scheduler}, staging_pool{
67 staging_pool} {}
98 68
99 buffer_ptr += size; 69VKBufferCache::~VKBufferCache() = default;
100 buffer_offset += size; 70
101 return {uploaded_ptr, uploaded_offset}; 71Buffer VKBufferCache::CreateBlock(CacheAddr cache_addr, std::size_t size) {
72 return std::make_shared<CachedBufferBlock>(device, memory_manager, cache_addr, size);
102} 73}
103 74
104void VKBufferCache::Reserve(std::size_t max_size) { 75const vk::Buffer* VKBufferCache::ToHandle(const Buffer& buffer) {
105 bool invalidate; 76 return buffer->GetHandle();
106 std::tie(buffer_ptr, buffer_offset_base, invalidate) = stream_buffer->Reserve(max_size); 77}
107 buffer_offset = buffer_offset_base; 78
79const vk::Buffer* VKBufferCache::GetEmptyBuffer(std::size_t size) {
80 size = std::max(size, std::size_t(4));
81 const auto& empty = staging_pool.GetUnusedBuffer(size, false);
82 scheduler.RequestOutsideRenderPassOperationContext();
83 scheduler.Record([size, buffer = *empty.handle](vk::CommandBuffer cmdbuf, auto& dld) {
84 cmdbuf.fillBuffer(buffer, 0, size, 0, dld);
85 });
86 return &*empty.handle;
87}
108 88
109 if (invalidate) { 89void VKBufferCache::UploadBlockData(const Buffer& buffer, std::size_t offset, std::size_t size,
110 InvalidateAll(); 90 const u8* data) {
111 } 91 const auto& staging = staging_pool.GetUnusedBuffer(size, true);
92 std::memcpy(staging.commit->Map(size), data, size);
93
94 scheduler.RequestOutsideRenderPassOperationContext();
95 scheduler.Record([staging = *staging.handle, buffer = *buffer->GetHandle(), offset,
96 size](auto cmdbuf, auto& dld) {
97 cmdbuf.copyBuffer(staging, buffer, {{0, offset, size}}, dld);
98 cmdbuf.pipelineBarrier(
99 vk::PipelineStageFlagBits::eTransfer, UploadPipelineStage, {}, {},
100 {vk::BufferMemoryBarrier(vk::AccessFlagBits::eTransferWrite, UploadAccessBarriers,
101 VK_QUEUE_FAMILY_IGNORED, VK_QUEUE_FAMILY_IGNORED, buffer,
102 offset, size)},
103 {}, dld);
104 });
112} 105}
113 106
114void VKBufferCache::Send() { 107void VKBufferCache::DownloadBlockData(const Buffer& buffer, std::size_t offset, std::size_t size,
115 stream_buffer->Send(buffer_offset - buffer_offset_base); 108 u8* data) {
109 const auto& staging = staging_pool.GetUnusedBuffer(size, true);
110 scheduler.RequestOutsideRenderPassOperationContext();
111 scheduler.Record([staging = *staging.handle, buffer = *buffer->GetHandle(), offset,
112 size](auto cmdbuf, auto& dld) {
113 cmdbuf.pipelineBarrier(
114 vk::PipelineStageFlagBits::eVertexShader | vk::PipelineStageFlagBits::eFragmentShader |
115 vk::PipelineStageFlagBits::eComputeShader,
116 vk::PipelineStageFlagBits::eTransfer, {}, {},
117 {vk::BufferMemoryBarrier(vk::AccessFlagBits::eShaderWrite,
118 vk::AccessFlagBits::eTransferRead, VK_QUEUE_FAMILY_IGNORED,
119 VK_QUEUE_FAMILY_IGNORED, buffer, offset, size)},
120 {}, dld);
121 cmdbuf.copyBuffer(buffer, staging, {{offset, 0, size}}, dld);
122 });
123 scheduler.Finish();
124
125 std::memcpy(data, staging.commit->Map(size), size);
116} 126}
117 127
118void VKBufferCache::AlignBuffer(std::size_t alignment) { 128void VKBufferCache::CopyBlock(const Buffer& src, const Buffer& dst, std::size_t src_offset,
119 // Align the offset, not the mapped pointer 129 std::size_t dst_offset, std::size_t size) {
120 const u64 offset_aligned = Common::AlignUp(buffer_offset, alignment); 130 scheduler.RequestOutsideRenderPassOperationContext();
121 buffer_ptr += offset_aligned - buffer_offset; 131 scheduler.Record([src_buffer = *src->GetHandle(), dst_buffer = *dst->GetHandle(), src_offset,
122 buffer_offset = offset_aligned; 132 dst_offset, size](auto cmdbuf, auto& dld) {
133 cmdbuf.copyBuffer(src_buffer, dst_buffer, {{src_offset, dst_offset, size}}, dld);
134 cmdbuf.pipelineBarrier(
135 vk::PipelineStageFlagBits::eTransfer, UploadPipelineStage, {}, {},
136 {vk::BufferMemoryBarrier(vk::AccessFlagBits::eTransferRead,
137 vk::AccessFlagBits::eShaderWrite, VK_QUEUE_FAMILY_IGNORED,
138 VK_QUEUE_FAMILY_IGNORED, src_buffer, src_offset, size),
139 vk::BufferMemoryBarrier(vk::AccessFlagBits::eTransferWrite, UploadAccessBarriers,
140 VK_QUEUE_FAMILY_IGNORED, VK_QUEUE_FAMILY_IGNORED, dst_buffer,
141 dst_offset, size)},
142 {}, dld);
143 });
123} 144}
124 145
125} // namespace Vulkan 146} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_buffer_cache.h b/src/video_core/renderer_vulkan/vk_buffer_cache.h
index daa8ccf66..3f38eed0c 100644
--- a/src/video_core/renderer_vulkan/vk_buffer_cache.h
+++ b/src/video_core/renderer_vulkan/vk_buffer_cache.h
@@ -5,105 +5,74 @@
5#pragma once 5#pragma once
6 6
7#include <memory> 7#include <memory>
8#include <tuple> 8#include <unordered_map>
9#include <vector>
9 10
10#include "common/common_types.h" 11#include "common/common_types.h"
11#include "video_core/gpu.h" 12#include "video_core/buffer_cache/buffer_cache.h"
12#include "video_core/rasterizer_cache.h" 13#include "video_core/rasterizer_cache.h"
13#include "video_core/renderer_vulkan/declarations.h" 14#include "video_core/renderer_vulkan/declarations.h"
14#include "video_core/renderer_vulkan/vk_scheduler.h" 15#include "video_core/renderer_vulkan/vk_memory_manager.h"
16#include "video_core/renderer_vulkan/vk_resource_manager.h"
17#include "video_core/renderer_vulkan/vk_staging_buffer_pool.h"
18#include "video_core/renderer_vulkan/vk_stream_buffer.h"
15 19
16namespace Memory { 20namespace Core {
17class Memory; 21class System;
18}
19
20namespace Tegra {
21class MemoryManager;
22} 22}
23 23
24namespace Vulkan { 24namespace Vulkan {
25 25
26class VKDevice; 26class VKDevice;
27class VKFence;
28class VKMemoryManager; 27class VKMemoryManager;
29class VKStreamBuffer; 28class VKScheduler;
30 29
31class CachedBufferEntry final : public RasterizerCacheObject { 30class CachedBufferBlock final : public VideoCommon::BufferBlock {
32public: 31public:
33 explicit CachedBufferEntry(VAddr cpu_addr, std::size_t size, u64 offset, std::size_t alignment, 32 explicit CachedBufferBlock(const VKDevice& device, VKMemoryManager& memory_manager,
34 u8* host_ptr); 33 CacheAddr cache_addr, std::size_t size);
34 ~CachedBufferBlock();
35 35
36 VAddr GetCpuAddr() const override { 36 const vk::Buffer* GetHandle() const {
37 return cpu_addr; 37 return &*buffer.handle;
38 }
39
40 std::size_t GetSizeInBytes() const override {
41 return size;
42 }
43
44 std::size_t GetSize() const {
45 return size;
46 }
47
48 u64 GetOffset() const {
49 return offset;
50 }
51
52 std::size_t GetAlignment() const {
53 return alignment;
54 } 38 }
55 39
56private: 40private:
57 VAddr cpu_addr{}; 41 VKBuffer buffer;
58 std::size_t size{};
59 u64 offset{};
60 std::size_t alignment{};
61}; 42};
62 43
63class VKBufferCache final : public RasterizerCache<std::shared_ptr<CachedBufferEntry>> { 44using Buffer = std::shared_ptr<CachedBufferBlock>;
45
46class VKBufferCache final : public VideoCommon::BufferCache<Buffer, vk::Buffer, VKStreamBuffer> {
64public: 47public:
65 explicit VKBufferCache(Tegra::MemoryManager& tegra_memory_manager, Memory::Memory& cpu_memory_, 48 explicit VKBufferCache(VideoCore::RasterizerInterface& rasterizer, Core::System& system,
66 VideoCore::RasterizerInterface& rasterizer, const VKDevice& device, 49 const VKDevice& device, VKMemoryManager& memory_manager,
67 VKMemoryManager& memory_manager, VKScheduler& scheduler, u64 size); 50 VKScheduler& scheduler, VKStagingBufferPool& staging_pool);
68 ~VKBufferCache(); 51 ~VKBufferCache();
69 52
70 /// Uploads data from a guest GPU address. Returns host's buffer offset where it's been 53 const vk::Buffer* GetEmptyBuffer(std::size_t size) override;
71 /// allocated.
72 u64 UploadMemory(GPUVAddr gpu_addr, std::size_t size, u64 alignment = 4, bool cache = true);
73 54
74 /// Uploads from a host memory. Returns host's buffer offset where it's been allocated. 55protected:
75 u64 UploadHostMemory(const u8* raw_pointer, std::size_t size, u64 alignment = 4); 56 void WriteBarrier() override {}
76 57
77 /// Reserves memory to be used by host's CPU. Returns mapped address and offset. 58 Buffer CreateBlock(CacheAddr cache_addr, std::size_t size) override;
78 std::tuple<u8*, u64> ReserveMemory(std::size_t size, u64 alignment = 4);
79 59
80 /// Reserves a region of memory to be used in subsequent upload/reserve operations. 60 const vk::Buffer* ToHandle(const Buffer& buffer) override;
81 void Reserve(std::size_t max_size);
82 61
83 /// Ensures that the set data is sent to the device. 62 void UploadBlockData(const Buffer& buffer, std::size_t offset, std::size_t size,
84 void Send(); 63 const u8* data) override;
85 64
86 /// Returns the buffer cache handle. 65 void DownloadBlockData(const Buffer& buffer, std::size_t offset, std::size_t size,
87 vk::Buffer GetBuffer() const { 66 u8* data) override;
88 return buffer_handle;
89 }
90 67
91protected: 68 void CopyBlock(const Buffer& src, const Buffer& dst, std::size_t src_offset,
92 // We do not have to flush this cache as things in it are never modified by us. 69 std::size_t dst_offset, std::size_t size) override;
93 void FlushObjectInner(const std::shared_ptr<CachedBufferEntry>& object) override {}
94 70
95private: 71private:
96 void AlignBuffer(std::size_t alignment); 72 const VKDevice& device;
97 73 VKMemoryManager& memory_manager;
98 Tegra::MemoryManager& tegra_memory_manager; 74 VKScheduler& scheduler;
99 Memory::Memory& cpu_memory; 75 VKStagingBufferPool& staging_pool;
100
101 std::unique_ptr<VKStreamBuffer> stream_buffer;
102 vk::Buffer buffer_handle;
103
104 u8* buffer_ptr = nullptr;
105 u64 buffer_offset = 0;
106 u64 buffer_offset_base = 0;
107}; 76};
108 77
109} // namespace Vulkan 78} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_compute_pass.cpp b/src/video_core/renderer_vulkan/vk_compute_pass.cpp
new file mode 100644
index 000000000..7bdda3d79
--- /dev/null
+++ b/src/video_core/renderer_vulkan/vk_compute_pass.cpp
@@ -0,0 +1,339 @@
1// Copyright 2019 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <cstring>
6#include <memory>
7#include <optional>
8#include <utility>
9#include <vector>
10#include "common/alignment.h"
11#include "common/assert.h"
12#include "common/common_types.h"
13#include "video_core/renderer_vulkan/declarations.h"
14#include "video_core/renderer_vulkan/vk_compute_pass.h"
15#include "video_core/renderer_vulkan/vk_descriptor_pool.h"
16#include "video_core/renderer_vulkan/vk_device.h"
17#include "video_core/renderer_vulkan/vk_scheduler.h"
18#include "video_core/renderer_vulkan/vk_staging_buffer_pool.h"
19#include "video_core/renderer_vulkan/vk_update_descriptor.h"
20
21namespace Vulkan {
22
23namespace {
24
25// Quad array SPIR-V module. Generated from the "shaders/" directory, read the instructions there.
26constexpr u8 quad_array[] = {
27 0x03, 0x02, 0x23, 0x07, 0x00, 0x00, 0x01, 0x00, 0x07, 0x00, 0x08, 0x00, 0x54, 0x00, 0x00, 0x00,
28 0x00, 0x00, 0x00, 0x00, 0x11, 0x00, 0x02, 0x00, 0x01, 0x00, 0x00, 0x00, 0x0b, 0x00, 0x06, 0x00,
29 0x01, 0x00, 0x00, 0x00, 0x47, 0x4c, 0x53, 0x4c, 0x2e, 0x73, 0x74, 0x64, 0x2e, 0x34, 0x35, 0x30,
30 0x00, 0x00, 0x00, 0x00, 0x0e, 0x00, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
31 0x0f, 0x00, 0x06, 0x00, 0x05, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x6d, 0x61, 0x69, 0x6e,
32 0x00, 0x00, 0x00, 0x00, 0x0b, 0x00, 0x00, 0x00, 0x10, 0x00, 0x06, 0x00, 0x04, 0x00, 0x00, 0x00,
33 0x11, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
34 0x47, 0x00, 0x04, 0x00, 0x0b, 0x00, 0x00, 0x00, 0x0b, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x00, 0x00,
35 0x47, 0x00, 0x04, 0x00, 0x13, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
36 0x48, 0x00, 0x05, 0x00, 0x14, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x23, 0x00, 0x00, 0x00,
37 0x00, 0x00, 0x00, 0x00, 0x47, 0x00, 0x03, 0x00, 0x14, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,
38 0x47, 0x00, 0x04, 0x00, 0x16, 0x00, 0x00, 0x00, 0x22, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
39 0x47, 0x00, 0x04, 0x00, 0x16, 0x00, 0x00, 0x00, 0x21, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
40 0x48, 0x00, 0x05, 0x00, 0x29, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x23, 0x00, 0x00, 0x00,
41 0x00, 0x00, 0x00, 0x00, 0x47, 0x00, 0x03, 0x00, 0x29, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
42 0x47, 0x00, 0x04, 0x00, 0x4a, 0x00, 0x00, 0x00, 0x0b, 0x00, 0x00, 0x00, 0x19, 0x00, 0x00, 0x00,
43 0x13, 0x00, 0x02, 0x00, 0x02, 0x00, 0x00, 0x00, 0x21, 0x00, 0x03, 0x00, 0x03, 0x00, 0x00, 0x00,
44 0x02, 0x00, 0x00, 0x00, 0x15, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00,
45 0x00, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x07, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00,
46 0x06, 0x00, 0x00, 0x00, 0x17, 0x00, 0x04, 0x00, 0x09, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00,
47 0x03, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x0a, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
48 0x09, 0x00, 0x00, 0x00, 0x3b, 0x00, 0x04, 0x00, 0x0a, 0x00, 0x00, 0x00, 0x0b, 0x00, 0x00, 0x00,
49 0x01, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00,
50 0x00, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x0d, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
51 0x06, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, 0x11, 0x00, 0x00, 0x00,
52 0x06, 0x00, 0x00, 0x00, 0x1d, 0x00, 0x03, 0x00, 0x13, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00,
53 0x1e, 0x00, 0x03, 0x00, 0x14, 0x00, 0x00, 0x00, 0x13, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00,
54 0x15, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x3b, 0x00, 0x04, 0x00,
55 0x15, 0x00, 0x00, 0x00, 0x16, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x15, 0x00, 0x04, 0x00,
56 0x18, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x14, 0x00, 0x02, 0x00,
57 0x1b, 0x00, 0x00, 0x00, 0x1e, 0x00, 0x03, 0x00, 0x29, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00,
58 0x20, 0x00, 0x04, 0x00, 0x2a, 0x00, 0x00, 0x00, 0x09, 0x00, 0x00, 0x00, 0x29, 0x00, 0x00, 0x00,
59 0x3b, 0x00, 0x04, 0x00, 0x2a, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x00, 0x00, 0x09, 0x00, 0x00, 0x00,
60 0x2b, 0x00, 0x04, 0x00, 0x18, 0x00, 0x00, 0x00, 0x2c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
61 0x20, 0x00, 0x04, 0x00, 0x2d, 0x00, 0x00, 0x00, 0x09, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00,
62 0x2b, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, 0x31, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
63 0x1c, 0x00, 0x04, 0x00, 0x34, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, 0x11, 0x00, 0x00, 0x00,
64 0x2b, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, 0x35, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
65 0x2b, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, 0x36, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
66 0x2b, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, 0x37, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,
67 0x2c, 0x00, 0x09, 0x00, 0x34, 0x00, 0x00, 0x00, 0x38, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00,
68 0x35, 0x00, 0x00, 0x00, 0x36, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x36, 0x00, 0x00, 0x00,
69 0x37, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x3a, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00,
70 0x34, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x44, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
71 0x06, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x04, 0x00, 0x18, 0x00, 0x00, 0x00, 0x47, 0x00, 0x00, 0x00,
72 0x01, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, 0x49, 0x00, 0x00, 0x00,
73 0x00, 0x04, 0x00, 0x00, 0x2c, 0x00, 0x06, 0x00, 0x09, 0x00, 0x00, 0x00, 0x4a, 0x00, 0x00, 0x00,
74 0x49, 0x00, 0x00, 0x00, 0x35, 0x00, 0x00, 0x00, 0x35, 0x00, 0x00, 0x00, 0x36, 0x00, 0x05, 0x00,
75 0x02, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,
76 0xf8, 0x00, 0x02, 0x00, 0x05, 0x00, 0x00, 0x00, 0x3b, 0x00, 0x04, 0x00, 0x3a, 0x00, 0x00, 0x00,
77 0x3b, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00, 0xf9, 0x00, 0x02, 0x00, 0x4c, 0x00, 0x00, 0x00,
78 0xf8, 0x00, 0x02, 0x00, 0x4c, 0x00, 0x00, 0x00, 0xf6, 0x00, 0x04, 0x00, 0x4b, 0x00, 0x00, 0x00,
79 0x4e, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xf9, 0x00, 0x02, 0x00, 0x4d, 0x00, 0x00, 0x00,
80 0xf8, 0x00, 0x02, 0x00, 0x4d, 0x00, 0x00, 0x00, 0x41, 0x00, 0x05, 0x00, 0x0d, 0x00, 0x00, 0x00,
81 0x0e, 0x00, 0x00, 0x00, 0x0b, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x3d, 0x00, 0x04, 0x00,
82 0x06, 0x00, 0x00, 0x00, 0x0f, 0x00, 0x00, 0x00, 0x0e, 0x00, 0x00, 0x00, 0x84, 0x00, 0x05, 0x00,
83 0x06, 0x00, 0x00, 0x00, 0x12, 0x00, 0x00, 0x00, 0x0f, 0x00, 0x00, 0x00, 0x11, 0x00, 0x00, 0x00,
84 0x44, 0x00, 0x05, 0x00, 0x06, 0x00, 0x00, 0x00, 0x17, 0x00, 0x00, 0x00, 0x16, 0x00, 0x00, 0x00,
85 0x00, 0x00, 0x00, 0x00, 0x7c, 0x00, 0x04, 0x00, 0x18, 0x00, 0x00, 0x00, 0x19, 0x00, 0x00, 0x00,
86 0x17, 0x00, 0x00, 0x00, 0x7c, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, 0x1a, 0x00, 0x00, 0x00,
87 0x19, 0x00, 0x00, 0x00, 0xae, 0x00, 0x05, 0x00, 0x1b, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x00, 0x00,
88 0x12, 0x00, 0x00, 0x00, 0x1a, 0x00, 0x00, 0x00, 0xf7, 0x00, 0x03, 0x00, 0x1e, 0x00, 0x00, 0x00,
89 0x00, 0x00, 0x00, 0x00, 0xfa, 0x00, 0x04, 0x00, 0x1c, 0x00, 0x00, 0x00, 0x1d, 0x00, 0x00, 0x00,
90 0x1e, 0x00, 0x00, 0x00, 0xf8, 0x00, 0x02, 0x00, 0x1d, 0x00, 0x00, 0x00, 0xf9, 0x00, 0x02, 0x00,
91 0x4b, 0x00, 0x00, 0x00, 0xf8, 0x00, 0x02, 0x00, 0x1e, 0x00, 0x00, 0x00, 0xf9, 0x00, 0x02, 0x00,
92 0x21, 0x00, 0x00, 0x00, 0xf8, 0x00, 0x02, 0x00, 0x21, 0x00, 0x00, 0x00, 0xf5, 0x00, 0x07, 0x00,
93 0x06, 0x00, 0x00, 0x00, 0x53, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x1e, 0x00, 0x00, 0x00,
94 0x48, 0x00, 0x00, 0x00, 0x22, 0x00, 0x00, 0x00, 0xb0, 0x00, 0x05, 0x00, 0x1b, 0x00, 0x00, 0x00,
95 0x27, 0x00, 0x00, 0x00, 0x53, 0x00, 0x00, 0x00, 0x11, 0x00, 0x00, 0x00, 0xf6, 0x00, 0x04, 0x00,
96 0x23, 0x00, 0x00, 0x00, 0x22, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xfa, 0x00, 0x04, 0x00,
97 0x27, 0x00, 0x00, 0x00, 0x22, 0x00, 0x00, 0x00, 0x23, 0x00, 0x00, 0x00, 0xf8, 0x00, 0x02, 0x00,
98 0x22, 0x00, 0x00, 0x00, 0x41, 0x00, 0x05, 0x00, 0x2d, 0x00, 0x00, 0x00, 0x2e, 0x00, 0x00, 0x00,
99 0x2b, 0x00, 0x00, 0x00, 0x2c, 0x00, 0x00, 0x00, 0x3d, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00,
100 0x2f, 0x00, 0x00, 0x00, 0x2e, 0x00, 0x00, 0x00, 0x84, 0x00, 0x05, 0x00, 0x06, 0x00, 0x00, 0x00,
101 0x32, 0x00, 0x00, 0x00, 0x0f, 0x00, 0x00, 0x00, 0x31, 0x00, 0x00, 0x00, 0x80, 0x00, 0x05, 0x00,
102 0x06, 0x00, 0x00, 0x00, 0x33, 0x00, 0x00, 0x00, 0x2f, 0x00, 0x00, 0x00, 0x32, 0x00, 0x00, 0x00,
103 0x3e, 0x00, 0x03, 0x00, 0x3b, 0x00, 0x00, 0x00, 0x38, 0x00, 0x00, 0x00, 0x41, 0x00, 0x05, 0x00,
104 0x07, 0x00, 0x00, 0x00, 0x3c, 0x00, 0x00, 0x00, 0x3b, 0x00, 0x00, 0x00, 0x53, 0x00, 0x00, 0x00,
105 0x3d, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, 0x3d, 0x00, 0x00, 0x00, 0x3c, 0x00, 0x00, 0x00,
106 0x80, 0x00, 0x05, 0x00, 0x06, 0x00, 0x00, 0x00, 0x3e, 0x00, 0x00, 0x00, 0x33, 0x00, 0x00, 0x00,
107 0x3d, 0x00, 0x00, 0x00, 0x80, 0x00, 0x05, 0x00, 0x06, 0x00, 0x00, 0x00, 0x42, 0x00, 0x00, 0x00,
108 0x12, 0x00, 0x00, 0x00, 0x53, 0x00, 0x00, 0x00, 0x41, 0x00, 0x06, 0x00, 0x44, 0x00, 0x00, 0x00,
109 0x45, 0x00, 0x00, 0x00, 0x16, 0x00, 0x00, 0x00, 0x2c, 0x00, 0x00, 0x00, 0x42, 0x00, 0x00, 0x00,
110 0x3e, 0x00, 0x03, 0x00, 0x45, 0x00, 0x00, 0x00, 0x3e, 0x00, 0x00, 0x00, 0x80, 0x00, 0x05, 0x00,
111 0x06, 0x00, 0x00, 0x00, 0x48, 0x00, 0x00, 0x00, 0x53, 0x00, 0x00, 0x00, 0x47, 0x00, 0x00, 0x00,
112 0xf9, 0x00, 0x02, 0x00, 0x21, 0x00, 0x00, 0x00, 0xf8, 0x00, 0x02, 0x00, 0x23, 0x00, 0x00, 0x00,
113 0xf9, 0x00, 0x02, 0x00, 0x4b, 0x00, 0x00, 0x00, 0xf8, 0x00, 0x02, 0x00, 0x4e, 0x00, 0x00, 0x00,
114 0xf9, 0x00, 0x02, 0x00, 0x4c, 0x00, 0x00, 0x00, 0xf8, 0x00, 0x02, 0x00, 0x4b, 0x00, 0x00, 0x00,
115 0xfd, 0x00, 0x01, 0x00, 0x38, 0x00, 0x01, 0x00};
116
117// Uint8 SPIR-V module. Generated from the "shaders/" directory.
118constexpr u8 uint8_pass[] = {
119 0x03, 0x02, 0x23, 0x07, 0x00, 0x00, 0x01, 0x00, 0x07, 0x00, 0x08, 0x00, 0x2f, 0x00, 0x00, 0x00,
120 0x00, 0x00, 0x00, 0x00, 0x11, 0x00, 0x02, 0x00, 0x01, 0x00, 0x00, 0x00, 0x11, 0x00, 0x02, 0x00,
121 0x51, 0x11, 0x00, 0x00, 0x11, 0x00, 0x02, 0x00, 0x61, 0x11, 0x00, 0x00, 0x0a, 0x00, 0x07, 0x00,
122 0x53, 0x50, 0x56, 0x5f, 0x4b, 0x48, 0x52, 0x5f, 0x31, 0x36, 0x62, 0x69, 0x74, 0x5f, 0x73, 0x74,
123 0x6f, 0x72, 0x61, 0x67, 0x65, 0x00, 0x00, 0x00, 0x0a, 0x00, 0x07, 0x00, 0x53, 0x50, 0x56, 0x5f,
124 0x4b, 0x48, 0x52, 0x5f, 0x38, 0x62, 0x69, 0x74, 0x5f, 0x73, 0x74, 0x6f, 0x72, 0x61, 0x67, 0x65,
125 0x00, 0x00, 0x00, 0x00, 0x0b, 0x00, 0x06, 0x00, 0x01, 0x00, 0x00, 0x00, 0x47, 0x4c, 0x53, 0x4c,
126 0x2e, 0x73, 0x74, 0x64, 0x2e, 0x34, 0x35, 0x30, 0x00, 0x00, 0x00, 0x00, 0x0e, 0x00, 0x03, 0x00,
127 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x0f, 0x00, 0x06, 0x00, 0x05, 0x00, 0x00, 0x00,
128 0x04, 0x00, 0x00, 0x00, 0x6d, 0x61, 0x69, 0x6e, 0x00, 0x00, 0x00, 0x00, 0x0b, 0x00, 0x00, 0x00,
129 0x10, 0x00, 0x06, 0x00, 0x04, 0x00, 0x00, 0x00, 0x11, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00,
130 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, 0x0b, 0x00, 0x00, 0x00,
131 0x0b, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, 0x12, 0x00, 0x00, 0x00,
132 0x06, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x48, 0x00, 0x04, 0x00, 0x13, 0x00, 0x00, 0x00,
133 0x00, 0x00, 0x00, 0x00, 0x18, 0x00, 0x00, 0x00, 0x48, 0x00, 0x05, 0x00, 0x13, 0x00, 0x00, 0x00,
134 0x00, 0x00, 0x00, 0x00, 0x23, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x47, 0x00, 0x03, 0x00,
135 0x13, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, 0x15, 0x00, 0x00, 0x00,
136 0x22, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, 0x15, 0x00, 0x00, 0x00,
137 0x21, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, 0x1f, 0x00, 0x00, 0x00,
138 0x06, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x48, 0x00, 0x04, 0x00, 0x20, 0x00, 0x00, 0x00,
139 0x00, 0x00, 0x00, 0x00, 0x19, 0x00, 0x00, 0x00, 0x48, 0x00, 0x05, 0x00, 0x20, 0x00, 0x00, 0x00,
140 0x00, 0x00, 0x00, 0x00, 0x23, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x47, 0x00, 0x03, 0x00,
141 0x20, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, 0x22, 0x00, 0x00, 0x00,
142 0x22, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, 0x22, 0x00, 0x00, 0x00,
143 0x21, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, 0x2e, 0x00, 0x00, 0x00,
144 0x0b, 0x00, 0x00, 0x00, 0x19, 0x00, 0x00, 0x00, 0x13, 0x00, 0x02, 0x00, 0x02, 0x00, 0x00, 0x00,
145 0x21, 0x00, 0x03, 0x00, 0x03, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x15, 0x00, 0x04, 0x00,
146 0x06, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00,
147 0x07, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, 0x17, 0x00, 0x04, 0x00,
148 0x09, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00,
149 0x0a, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x09, 0x00, 0x00, 0x00, 0x3b, 0x00, 0x04, 0x00,
150 0x0a, 0x00, 0x00, 0x00, 0x0b, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x04, 0x00,
151 0x06, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00,
152 0x0d, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, 0x15, 0x00, 0x04, 0x00,
153 0x11, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x1d, 0x00, 0x03, 0x00,
154 0x12, 0x00, 0x00, 0x00, 0x11, 0x00, 0x00, 0x00, 0x1e, 0x00, 0x03, 0x00, 0x13, 0x00, 0x00, 0x00,
155 0x12, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x14, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
156 0x13, 0x00, 0x00, 0x00, 0x3b, 0x00, 0x04, 0x00, 0x14, 0x00, 0x00, 0x00, 0x15, 0x00, 0x00, 0x00,
157 0x02, 0x00, 0x00, 0x00, 0x15, 0x00, 0x04, 0x00, 0x17, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00,
158 0x01, 0x00, 0x00, 0x00, 0x14, 0x00, 0x02, 0x00, 0x1a, 0x00, 0x00, 0x00, 0x15, 0x00, 0x04, 0x00,
159 0x1e, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x1d, 0x00, 0x03, 0x00,
160 0x1f, 0x00, 0x00, 0x00, 0x1e, 0x00, 0x00, 0x00, 0x1e, 0x00, 0x03, 0x00, 0x20, 0x00, 0x00, 0x00,
161 0x1f, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x21, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
162 0x20, 0x00, 0x00, 0x00, 0x3b, 0x00, 0x04, 0x00, 0x21, 0x00, 0x00, 0x00, 0x22, 0x00, 0x00, 0x00,
163 0x02, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x04, 0x00, 0x17, 0x00, 0x00, 0x00, 0x23, 0x00, 0x00, 0x00,
164 0x00, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x26, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
165 0x11, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x2a, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
166 0x1e, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, 0x2c, 0x00, 0x00, 0x00,
167 0x00, 0x04, 0x00, 0x00, 0x2b, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, 0x2d, 0x00, 0x00, 0x00,
168 0x01, 0x00, 0x00, 0x00, 0x2c, 0x00, 0x06, 0x00, 0x09, 0x00, 0x00, 0x00, 0x2e, 0x00, 0x00, 0x00,
169 0x2c, 0x00, 0x00, 0x00, 0x2d, 0x00, 0x00, 0x00, 0x2d, 0x00, 0x00, 0x00, 0x36, 0x00, 0x05, 0x00,
170 0x02, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,
171 0xf8, 0x00, 0x02, 0x00, 0x05, 0x00, 0x00, 0x00, 0x3b, 0x00, 0x04, 0x00, 0x07, 0x00, 0x00, 0x00,
172 0x08, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00, 0x41, 0x00, 0x05, 0x00, 0x0d, 0x00, 0x00, 0x00,
173 0x0e, 0x00, 0x00, 0x00, 0x0b, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x3d, 0x00, 0x04, 0x00,
174 0x06, 0x00, 0x00, 0x00, 0x0f, 0x00, 0x00, 0x00, 0x0e, 0x00, 0x00, 0x00, 0x3e, 0x00, 0x03, 0x00,
175 0x08, 0x00, 0x00, 0x00, 0x0f, 0x00, 0x00, 0x00, 0x3d, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00,
176 0x10, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x44, 0x00, 0x05, 0x00, 0x06, 0x00, 0x00, 0x00,
177 0x16, 0x00, 0x00, 0x00, 0x15, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x7c, 0x00, 0x04, 0x00,
178 0x17, 0x00, 0x00, 0x00, 0x18, 0x00, 0x00, 0x00, 0x16, 0x00, 0x00, 0x00, 0x7c, 0x00, 0x04, 0x00,
179 0x06, 0x00, 0x00, 0x00, 0x19, 0x00, 0x00, 0x00, 0x18, 0x00, 0x00, 0x00, 0xb0, 0x00, 0x05, 0x00,
180 0x1a, 0x00, 0x00, 0x00, 0x1b, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x19, 0x00, 0x00, 0x00,
181 0xf7, 0x00, 0x03, 0x00, 0x1d, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xfa, 0x00, 0x04, 0x00,
182 0x1b, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x00, 0x00, 0x1d, 0x00, 0x00, 0x00, 0xf8, 0x00, 0x02, 0x00,
183 0x1c, 0x00, 0x00, 0x00, 0x3d, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, 0x24, 0x00, 0x00, 0x00,
184 0x08, 0x00, 0x00, 0x00, 0x3d, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, 0x25, 0x00, 0x00, 0x00,
185 0x08, 0x00, 0x00, 0x00, 0x41, 0x00, 0x06, 0x00, 0x26, 0x00, 0x00, 0x00, 0x27, 0x00, 0x00, 0x00,
186 0x15, 0x00, 0x00, 0x00, 0x23, 0x00, 0x00, 0x00, 0x25, 0x00, 0x00, 0x00, 0x3d, 0x00, 0x04, 0x00,
187 0x11, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00, 0x27, 0x00, 0x00, 0x00, 0x71, 0x00, 0x04, 0x00,
188 0x1e, 0x00, 0x00, 0x00, 0x29, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00, 0x41, 0x00, 0x06, 0x00,
189 0x2a, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x00, 0x00, 0x22, 0x00, 0x00, 0x00, 0x23, 0x00, 0x00, 0x00,
190 0x24, 0x00, 0x00, 0x00, 0x3e, 0x00, 0x03, 0x00, 0x2b, 0x00, 0x00, 0x00, 0x29, 0x00, 0x00, 0x00,
191 0xf9, 0x00, 0x02, 0x00, 0x1d, 0x00, 0x00, 0x00, 0xf8, 0x00, 0x02, 0x00, 0x1d, 0x00, 0x00, 0x00,
192 0xfd, 0x00, 0x01, 0x00, 0x38, 0x00, 0x01, 0x00};
193
194} // Anonymous namespace
195
196VKComputePass::VKComputePass(const VKDevice& device, VKDescriptorPool& descriptor_pool,
197 const std::vector<vk::DescriptorSetLayoutBinding>& bindings,
198 const std::vector<vk::DescriptorUpdateTemplateEntry>& templates,
199 const std::vector<vk::PushConstantRange> push_constants,
200 std::size_t code_size, const u8* code) {
201 const auto dev = device.GetLogical();
202 const auto& dld = device.GetDispatchLoader();
203
204 const vk::DescriptorSetLayoutCreateInfo descriptor_layout_ci(
205 {}, static_cast<u32>(bindings.size()), bindings.data());
206 descriptor_set_layout = dev.createDescriptorSetLayoutUnique(descriptor_layout_ci, nullptr, dld);
207
208 const vk::PipelineLayoutCreateInfo pipeline_layout_ci({}, 1, &*descriptor_set_layout,
209 static_cast<u32>(push_constants.size()),
210 push_constants.data());
211 layout = dev.createPipelineLayoutUnique(pipeline_layout_ci, nullptr, dld);
212
213 if (!templates.empty()) {
214 const vk::DescriptorUpdateTemplateCreateInfo template_ci(
215 {}, static_cast<u32>(templates.size()), templates.data(),
216 vk::DescriptorUpdateTemplateType::eDescriptorSet, *descriptor_set_layout,
217 vk::PipelineBindPoint::eGraphics, *layout, 0);
218 descriptor_template = dev.createDescriptorUpdateTemplateUnique(template_ci, nullptr, dld);
219
220 descriptor_allocator.emplace(descriptor_pool, *descriptor_set_layout);
221 }
222
223 auto code_copy = std::make_unique<u32[]>(code_size / sizeof(u32) + 1);
224 std::memcpy(code_copy.get(), code, code_size);
225 const vk::ShaderModuleCreateInfo module_ci({}, code_size, code_copy.get());
226 module = dev.createShaderModuleUnique(module_ci, nullptr, dld);
227
228 const vk::PipelineShaderStageCreateInfo stage_ci({}, vk::ShaderStageFlagBits::eCompute, *module,
229 "main", nullptr);
230
231 const vk::ComputePipelineCreateInfo pipeline_ci({}, stage_ci, *layout, nullptr, 0);
232 pipeline = dev.createComputePipelineUnique(nullptr, pipeline_ci, nullptr, dld);
233}
234
235VKComputePass::~VKComputePass() = default;
236
237vk::DescriptorSet VKComputePass::CommitDescriptorSet(
238 VKUpdateDescriptorQueue& update_descriptor_queue, VKFence& fence) {
239 if (!descriptor_template) {
240 return {};
241 }
242 const auto set = descriptor_allocator->Commit(fence);
243 update_descriptor_queue.Send(*descriptor_template, set);
244 return set;
245}
246
247QuadArrayPass::QuadArrayPass(const VKDevice& device, VKScheduler& scheduler,
248 VKDescriptorPool& descriptor_pool,
249 VKStagingBufferPool& staging_buffer_pool,
250 VKUpdateDescriptorQueue& update_descriptor_queue)
251 : VKComputePass(device, descriptor_pool,
252 {vk::DescriptorSetLayoutBinding(0, vk::DescriptorType::eStorageBuffer, 1,
253 vk::ShaderStageFlagBits::eCompute, nullptr)},
254 {vk::DescriptorUpdateTemplateEntry(0, 0, 1, vk::DescriptorType::eStorageBuffer,
255 0, sizeof(DescriptorUpdateEntry))},
256 {vk::PushConstantRange(vk::ShaderStageFlagBits::eCompute, 0, sizeof(u32))},
257 std::size(quad_array), quad_array),
258 scheduler{scheduler}, staging_buffer_pool{staging_buffer_pool},
259 update_descriptor_queue{update_descriptor_queue} {}
260
261QuadArrayPass::~QuadArrayPass() = default;
262
263std::pair<const vk::Buffer&, vk::DeviceSize> QuadArrayPass::Assemble(u32 num_vertices, u32 first) {
264 const u32 num_triangle_vertices = num_vertices * 6 / 4;
265 const std::size_t staging_size = num_triangle_vertices * sizeof(u32);
266 auto& buffer = staging_buffer_pool.GetUnusedBuffer(staging_size, false);
267
268 update_descriptor_queue.Acquire();
269 update_descriptor_queue.AddBuffer(&*buffer.handle, 0, staging_size);
270 const auto set = CommitDescriptorSet(update_descriptor_queue, scheduler.GetFence());
271
272 scheduler.RequestOutsideRenderPassOperationContext();
273
274 ASSERT(num_vertices % 4 == 0);
275 const u32 num_quads = num_vertices / 4;
276 scheduler.Record([layout = *layout, pipeline = *pipeline, buffer = *buffer.handle, num_quads,
277 first, set](auto cmdbuf, auto& dld) {
278 constexpr u32 dispatch_size = 1024;
279 cmdbuf.bindPipeline(vk::PipelineBindPoint::eCompute, pipeline, dld);
280 cmdbuf.bindDescriptorSets(vk::PipelineBindPoint::eCompute, layout, 0, {set}, {}, dld);
281 cmdbuf.pushConstants(layout, vk::ShaderStageFlagBits::eCompute, 0, sizeof(first), &first,
282 dld);
283 cmdbuf.dispatch(Common::AlignUp(num_quads, dispatch_size) / dispatch_size, 1, 1, dld);
284
285 const vk::BufferMemoryBarrier barrier(
286 vk::AccessFlagBits::eShaderWrite, vk::AccessFlagBits::eVertexAttributeRead,
287 VK_QUEUE_FAMILY_IGNORED, VK_QUEUE_FAMILY_IGNORED, buffer, 0,
288 static_cast<vk::DeviceSize>(num_quads) * 6 * sizeof(u32));
289 cmdbuf.pipelineBarrier(vk::PipelineStageFlagBits::eComputeShader,
290 vk::PipelineStageFlagBits::eVertexInput, {}, {}, {barrier}, {}, dld);
291 });
292 return {*buffer.handle, 0};
293}
294
295Uint8Pass::Uint8Pass(const VKDevice& device, VKScheduler& scheduler,
296 VKDescriptorPool& descriptor_pool, VKStagingBufferPool& staging_buffer_pool,
297 VKUpdateDescriptorQueue& update_descriptor_queue)
298 : VKComputePass(device, descriptor_pool,
299 {vk::DescriptorSetLayoutBinding(0, vk::DescriptorType::eStorageBuffer, 1,
300 vk::ShaderStageFlagBits::eCompute, nullptr),
301 vk::DescriptorSetLayoutBinding(1, vk::DescriptorType::eStorageBuffer, 1,
302 vk::ShaderStageFlagBits::eCompute, nullptr)},
303 {vk::DescriptorUpdateTemplateEntry(0, 0, 2, vk::DescriptorType::eStorageBuffer,
304 0, sizeof(DescriptorUpdateEntry))},
305 {}, std::size(uint8_pass), uint8_pass),
306 scheduler{scheduler}, staging_buffer_pool{staging_buffer_pool},
307 update_descriptor_queue{update_descriptor_queue} {}
308
309Uint8Pass::~Uint8Pass() = default;
310
311std::pair<const vk::Buffer*, u64> Uint8Pass::Assemble(u32 num_vertices, vk::Buffer src_buffer,
312 u64 src_offset) {
313 const auto staging_size = static_cast<u32>(num_vertices * sizeof(u16));
314 auto& buffer = staging_buffer_pool.GetUnusedBuffer(staging_size, false);
315
316 update_descriptor_queue.Acquire();
317 update_descriptor_queue.AddBuffer(&src_buffer, src_offset, num_vertices);
318 update_descriptor_queue.AddBuffer(&*buffer.handle, 0, staging_size);
319 const auto set = CommitDescriptorSet(update_descriptor_queue, scheduler.GetFence());
320
321 scheduler.RequestOutsideRenderPassOperationContext();
322 scheduler.Record([layout = *layout, pipeline = *pipeline, buffer = *buffer.handle, set,
323 num_vertices](auto cmdbuf, auto& dld) {
324 constexpr u32 dispatch_size = 1024;
325 cmdbuf.bindPipeline(vk::PipelineBindPoint::eCompute, pipeline, dld);
326 cmdbuf.bindDescriptorSets(vk::PipelineBindPoint::eCompute, layout, 0, {set}, {}, dld);
327 cmdbuf.dispatch(Common::AlignUp(num_vertices, dispatch_size) / dispatch_size, 1, 1, dld);
328
329 const vk::BufferMemoryBarrier barrier(
330 vk::AccessFlagBits::eShaderWrite, vk::AccessFlagBits::eVertexAttributeRead,
331 VK_QUEUE_FAMILY_IGNORED, VK_QUEUE_FAMILY_IGNORED, buffer, 0,
332 static_cast<vk::DeviceSize>(num_vertices) * sizeof(u16));
333 cmdbuf.pipelineBarrier(vk::PipelineStageFlagBits::eComputeShader,
334 vk::PipelineStageFlagBits::eVertexInput, {}, {}, {barrier}, {}, dld);
335 });
336 return {&*buffer.handle, 0};
337}
338
339} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_compute_pass.h b/src/video_core/renderer_vulkan/vk_compute_pass.h
new file mode 100644
index 000000000..7057eb837
--- /dev/null
+++ b/src/video_core/renderer_vulkan/vk_compute_pass.h
@@ -0,0 +1,77 @@
1// Copyright 2019 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <optional>
8#include <utility>
9#include <vector>
10#include "common/common_types.h"
11#include "video_core/renderer_vulkan/declarations.h"
12#include "video_core/renderer_vulkan/vk_descriptor_pool.h"
13
14namespace Vulkan {
15
16class VKDevice;
17class VKFence;
18class VKScheduler;
19class VKStagingBufferPool;
20class VKUpdateDescriptorQueue;
21
22class VKComputePass {
23public:
24 explicit VKComputePass(const VKDevice& device, VKDescriptorPool& descriptor_pool,
25 const std::vector<vk::DescriptorSetLayoutBinding>& bindings,
26 const std::vector<vk::DescriptorUpdateTemplateEntry>& templates,
27 const std::vector<vk::PushConstantRange> push_constants,
28 std::size_t code_size, const u8* code);
29 ~VKComputePass();
30
31protected:
32 vk::DescriptorSet CommitDescriptorSet(VKUpdateDescriptorQueue& update_descriptor_queue,
33 VKFence& fence);
34
35 UniqueDescriptorUpdateTemplate descriptor_template;
36 UniquePipelineLayout layout;
37 UniquePipeline pipeline;
38
39private:
40 UniqueDescriptorSetLayout descriptor_set_layout;
41 std::optional<DescriptorAllocator> descriptor_allocator;
42 UniqueShaderModule module;
43};
44
45class QuadArrayPass final : public VKComputePass {
46public:
47 explicit QuadArrayPass(const VKDevice& device, VKScheduler& scheduler,
48 VKDescriptorPool& descriptor_pool,
49 VKStagingBufferPool& staging_buffer_pool,
50 VKUpdateDescriptorQueue& update_descriptor_queue);
51 ~QuadArrayPass();
52
53 std::pair<const vk::Buffer&, vk::DeviceSize> Assemble(u32 num_vertices, u32 first);
54
55private:
56 VKScheduler& scheduler;
57 VKStagingBufferPool& staging_buffer_pool;
58 VKUpdateDescriptorQueue& update_descriptor_queue;
59};
60
61class Uint8Pass final : public VKComputePass {
62public:
63 explicit Uint8Pass(const VKDevice& device, VKScheduler& scheduler,
64 VKDescriptorPool& descriptor_pool, VKStagingBufferPool& staging_buffer_pool,
65 VKUpdateDescriptorQueue& update_descriptor_queue);
66 ~Uint8Pass();
67
68 std::pair<const vk::Buffer*, u64> Assemble(u32 num_vertices, vk::Buffer src_buffer,
69 u64 src_offset);
70
71private:
72 VKScheduler& scheduler;
73 VKStagingBufferPool& staging_buffer_pool;
74 VKUpdateDescriptorQueue& update_descriptor_queue;
75};
76
77} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp
new file mode 100644
index 000000000..9d5b8de7a
--- /dev/null
+++ b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp
@@ -0,0 +1,112 @@
1// Copyright 2019 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <memory>
6#include <vector>
7
8#include "video_core/renderer_vulkan/declarations.h"
9#include "video_core/renderer_vulkan/vk_compute_pipeline.h"
10#include "video_core/renderer_vulkan/vk_descriptor_pool.h"
11#include "video_core/renderer_vulkan/vk_device.h"
12#include "video_core/renderer_vulkan/vk_pipeline_cache.h"
13#include "video_core/renderer_vulkan/vk_resource_manager.h"
14#include "video_core/renderer_vulkan/vk_scheduler.h"
15#include "video_core/renderer_vulkan/vk_shader_decompiler.h"
16#include "video_core/renderer_vulkan/vk_update_descriptor.h"
17
18namespace Vulkan {
19
20VKComputePipeline::VKComputePipeline(const VKDevice& device, VKScheduler& scheduler,
21 VKDescriptorPool& descriptor_pool,
22 VKUpdateDescriptorQueue& update_descriptor_queue,
23 const SPIRVShader& shader)
24 : device{device}, scheduler{scheduler}, entries{shader.entries},
25 descriptor_set_layout{CreateDescriptorSetLayout()},
26 descriptor_allocator{descriptor_pool, *descriptor_set_layout},
27 update_descriptor_queue{update_descriptor_queue}, layout{CreatePipelineLayout()},
28 descriptor_template{CreateDescriptorUpdateTemplate()},
29 shader_module{CreateShaderModule(shader.code)}, pipeline{CreatePipeline()} {}
30
31VKComputePipeline::~VKComputePipeline() = default;
32
33vk::DescriptorSet VKComputePipeline::CommitDescriptorSet() {
34 if (!descriptor_template) {
35 return {};
36 }
37 const auto set = descriptor_allocator.Commit(scheduler.GetFence());
38 update_descriptor_queue.Send(*descriptor_template, set);
39 return set;
40}
41
42UniqueDescriptorSetLayout VKComputePipeline::CreateDescriptorSetLayout() const {
43 std::vector<vk::DescriptorSetLayoutBinding> bindings;
44 u32 binding = 0;
45 const auto AddBindings = [&](vk::DescriptorType descriptor_type, std::size_t num_entries) {
46 // TODO(Rodrigo): Maybe make individual bindings here?
47 for (u32 bindpoint = 0; bindpoint < static_cast<u32>(num_entries); ++bindpoint) {
48 bindings.emplace_back(binding++, descriptor_type, 1, vk::ShaderStageFlagBits::eCompute,
49 nullptr);
50 }
51 };
52 AddBindings(vk::DescriptorType::eUniformBuffer, entries.const_buffers.size());
53 AddBindings(vk::DescriptorType::eStorageBuffer, entries.global_buffers.size());
54 AddBindings(vk::DescriptorType::eUniformTexelBuffer, entries.texel_buffers.size());
55 AddBindings(vk::DescriptorType::eCombinedImageSampler, entries.samplers.size());
56 AddBindings(vk::DescriptorType::eStorageImage, entries.images.size());
57
58 const vk::DescriptorSetLayoutCreateInfo descriptor_set_layout_ci(
59 {}, static_cast<u32>(bindings.size()), bindings.data());
60
61 const auto dev = device.GetLogical();
62 const auto& dld = device.GetDispatchLoader();
63 return dev.createDescriptorSetLayoutUnique(descriptor_set_layout_ci, nullptr, dld);
64}
65
66UniquePipelineLayout VKComputePipeline::CreatePipelineLayout() const {
67 const vk::PipelineLayoutCreateInfo layout_ci({}, 1, &*descriptor_set_layout, 0, nullptr);
68 const auto dev = device.GetLogical();
69 return dev.createPipelineLayoutUnique(layout_ci, nullptr, device.GetDispatchLoader());
70}
71
72UniqueDescriptorUpdateTemplate VKComputePipeline::CreateDescriptorUpdateTemplate() const {
73 std::vector<vk::DescriptorUpdateTemplateEntry> template_entries;
74 u32 binding = 0;
75 u32 offset = 0;
76 FillDescriptorUpdateTemplateEntries(device, entries, binding, offset, template_entries);
77 if (template_entries.empty()) {
78 // If the shader doesn't use descriptor sets, skip template creation.
79 return UniqueDescriptorUpdateTemplate{};
80 }
81
82 const vk::DescriptorUpdateTemplateCreateInfo template_ci(
83 {}, static_cast<u32>(template_entries.size()), template_entries.data(),
84 vk::DescriptorUpdateTemplateType::eDescriptorSet, *descriptor_set_layout,
85 vk::PipelineBindPoint::eGraphics, *layout, DESCRIPTOR_SET);
86
87 const auto dev = device.GetLogical();
88 const auto& dld = device.GetDispatchLoader();
89 return dev.createDescriptorUpdateTemplateUnique(template_ci, nullptr, dld);
90}
91
92UniqueShaderModule VKComputePipeline::CreateShaderModule(const std::vector<u32>& code) const {
93 const vk::ShaderModuleCreateInfo module_ci({}, code.size() * sizeof(u32), code.data());
94 const auto dev = device.GetLogical();
95 return dev.createShaderModuleUnique(module_ci, nullptr, device.GetDispatchLoader());
96}
97
98UniquePipeline VKComputePipeline::CreatePipeline() const {
99 vk::PipelineShaderStageCreateInfo shader_stage_ci({}, vk::ShaderStageFlagBits::eCompute,
100 *shader_module, "main", nullptr);
101 vk::PipelineShaderStageRequiredSubgroupSizeCreateInfoEXT subgroup_size_ci;
102 subgroup_size_ci.requiredSubgroupSize = GuestWarpSize;
103 if (entries.uses_warps && device.IsGuestWarpSizeSupported(vk::ShaderStageFlagBits::eCompute)) {
104 shader_stage_ci.pNext = &subgroup_size_ci;
105 }
106
107 const vk::ComputePipelineCreateInfo create_info({}, shader_stage_ci, *layout, {}, 0);
108 const auto dev = device.GetLogical();
109 return dev.createComputePipelineUnique({}, create_info, nullptr, device.GetDispatchLoader());
110}
111
112} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_compute_pipeline.h b/src/video_core/renderer_vulkan/vk_compute_pipeline.h
new file mode 100644
index 000000000..22235c6c9
--- /dev/null
+++ b/src/video_core/renderer_vulkan/vk_compute_pipeline.h
@@ -0,0 +1,66 @@
1// Copyright 2019 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <memory>
8
9#include "common/common_types.h"
10#include "video_core/renderer_vulkan/declarations.h"
11#include "video_core/renderer_vulkan/vk_descriptor_pool.h"
12#include "video_core/renderer_vulkan/vk_shader_decompiler.h"
13
14namespace Vulkan {
15
16class VKDevice;
17class VKScheduler;
18class VKUpdateDescriptorQueue;
19
20class VKComputePipeline final {
21public:
22 explicit VKComputePipeline(const VKDevice& device, VKScheduler& scheduler,
23 VKDescriptorPool& descriptor_pool,
24 VKUpdateDescriptorQueue& update_descriptor_queue,
25 const SPIRVShader& shader);
26 ~VKComputePipeline();
27
28 vk::DescriptorSet CommitDescriptorSet();
29
30 vk::Pipeline GetHandle() const {
31 return *pipeline;
32 }
33
34 vk::PipelineLayout GetLayout() const {
35 return *layout;
36 }
37
38 const ShaderEntries& GetEntries() {
39 return entries;
40 }
41
42private:
43 UniqueDescriptorSetLayout CreateDescriptorSetLayout() const;
44
45 UniquePipelineLayout CreatePipelineLayout() const;
46
47 UniqueDescriptorUpdateTemplate CreateDescriptorUpdateTemplate() const;
48
49 UniqueShaderModule CreateShaderModule(const std::vector<u32>& code) const;
50
51 UniquePipeline CreatePipeline() const;
52
53 const VKDevice& device;
54 VKScheduler& scheduler;
55 ShaderEntries entries;
56
57 UniqueDescriptorSetLayout descriptor_set_layout;
58 DescriptorAllocator descriptor_allocator;
59 VKUpdateDescriptorQueue& update_descriptor_queue;
60 UniquePipelineLayout layout;
61 UniqueDescriptorUpdateTemplate descriptor_template;
62 UniqueShaderModule shader_module;
63 UniquePipeline pipeline;
64};
65
66} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_descriptor_pool.cpp b/src/video_core/renderer_vulkan/vk_descriptor_pool.cpp
new file mode 100644
index 000000000..cc7c281a0
--- /dev/null
+++ b/src/video_core/renderer_vulkan/vk_descriptor_pool.cpp
@@ -0,0 +1,89 @@
1// Copyright 2019 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <memory>
6#include <vector>
7
8#include "common/common_types.h"
9#include "video_core/renderer_vulkan/declarations.h"
10#include "video_core/renderer_vulkan/vk_descriptor_pool.h"
11#include "video_core/renderer_vulkan/vk_device.h"
12#include "video_core/renderer_vulkan/vk_resource_manager.h"
13
14namespace Vulkan {
15
16// Prefer small grow rates to avoid saturating the descriptor pool with barely used pipelines.
17constexpr std::size_t SETS_GROW_RATE = 0x20;
18
19DescriptorAllocator::DescriptorAllocator(VKDescriptorPool& descriptor_pool,
20 vk::DescriptorSetLayout layout)
21 : VKFencedPool{SETS_GROW_RATE}, descriptor_pool{descriptor_pool}, layout{layout} {}
22
23DescriptorAllocator::~DescriptorAllocator() = default;
24
25vk::DescriptorSet DescriptorAllocator::Commit(VKFence& fence) {
26 return *descriptors[CommitResource(fence)];
27}
28
29void DescriptorAllocator::Allocate(std::size_t begin, std::size_t end) {
30 auto new_sets = descriptor_pool.AllocateDescriptors(layout, end - begin);
31 descriptors.insert(descriptors.end(), std::make_move_iterator(new_sets.begin()),
32 std::make_move_iterator(new_sets.end()));
33}
34
35VKDescriptorPool::VKDescriptorPool(const VKDevice& device)
36 : device{device}, active_pool{AllocateNewPool()} {}
37
38VKDescriptorPool::~VKDescriptorPool() = default;
39
40vk::DescriptorPool VKDescriptorPool::AllocateNewPool() {
41 static constexpr u32 num_sets = 0x20000;
42 static constexpr vk::DescriptorPoolSize pool_sizes[] = {
43 {vk::DescriptorType::eUniformBuffer, num_sets * 90},
44 {vk::DescriptorType::eStorageBuffer, num_sets * 60},
45 {vk::DescriptorType::eUniformTexelBuffer, num_sets * 64},
46 {vk::DescriptorType::eCombinedImageSampler, num_sets * 64},
47 {vk::DescriptorType::eStorageImage, num_sets * 40}};
48
49 const vk::DescriptorPoolCreateInfo create_info(
50 vk::DescriptorPoolCreateFlagBits::eFreeDescriptorSet, num_sets,
51 static_cast<u32>(std::size(pool_sizes)), std::data(pool_sizes));
52 const auto dev = device.GetLogical();
53 return *pools.emplace_back(
54 dev.createDescriptorPoolUnique(create_info, nullptr, device.GetDispatchLoader()));
55}
56
57std::vector<UniqueDescriptorSet> VKDescriptorPool::AllocateDescriptors(
58 vk::DescriptorSetLayout layout, std::size_t count) {
59 std::vector layout_copies(count, layout);
60 vk::DescriptorSetAllocateInfo allocate_info(active_pool, static_cast<u32>(count),
61 layout_copies.data());
62
63 std::vector<vk::DescriptorSet> sets(count);
64 const auto dev = device.GetLogical();
65 const auto& dld = device.GetDispatchLoader();
66 switch (const auto result = dev.allocateDescriptorSets(&allocate_info, sets.data(), dld)) {
67 case vk::Result::eSuccess:
68 break;
69 case vk::Result::eErrorOutOfPoolMemory:
70 active_pool = AllocateNewPool();
71 allocate_info.descriptorPool = active_pool;
72 if (dev.allocateDescriptorSets(&allocate_info, sets.data(), dld) == vk::Result::eSuccess) {
73 break;
74 }
75 [[fallthrough]];
76 default:
77 vk::throwResultException(result, "vk::Device::allocateDescriptorSetsUnique");
78 }
79
80 vk::PoolFree deleter(dev, active_pool, dld);
81 std::vector<UniqueDescriptorSet> unique_sets;
82 unique_sets.reserve(count);
83 for (const auto set : sets) {
84 unique_sets.push_back(UniqueDescriptorSet{set, deleter});
85 }
86 return unique_sets;
87}
88
89} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_descriptor_pool.h b/src/video_core/renderer_vulkan/vk_descriptor_pool.h
new file mode 100644
index 000000000..a441dbc0f
--- /dev/null
+++ b/src/video_core/renderer_vulkan/vk_descriptor_pool.h
@@ -0,0 +1,56 @@
1// Copyright 2019 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <memory>
8#include <vector>
9
10#include "common/common_types.h"
11#include "video_core/renderer_vulkan/declarations.h"
12#include "video_core/renderer_vulkan/vk_resource_manager.h"
13
14namespace Vulkan {
15
16class VKDescriptorPool;
17
18class DescriptorAllocator final : public VKFencedPool {
19public:
20 explicit DescriptorAllocator(VKDescriptorPool& descriptor_pool, vk::DescriptorSetLayout layout);
21 ~DescriptorAllocator() override;
22
23 DescriptorAllocator(const DescriptorAllocator&) = delete;
24
25 vk::DescriptorSet Commit(VKFence& fence);
26
27protected:
28 void Allocate(std::size_t begin, std::size_t end) override;
29
30private:
31 VKDescriptorPool& descriptor_pool;
32 const vk::DescriptorSetLayout layout;
33
34 std::vector<UniqueDescriptorSet> descriptors;
35};
36
37class VKDescriptorPool final {
38 friend DescriptorAllocator;
39
40public:
41 explicit VKDescriptorPool(const VKDevice& device);
42 ~VKDescriptorPool();
43
44private:
45 vk::DescriptorPool AllocateNewPool();
46
47 std::vector<UniqueDescriptorSet> AllocateDescriptors(vk::DescriptorSetLayout layout,
48 std::size_t count);
49
50 const VKDevice& device;
51
52 std::vector<UniqueDescriptorPool> pools;
53 vk::DescriptorPool active_pool;
54};
55
56} // namespace Vulkan \ No newline at end of file
diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp
new file mode 100644
index 000000000..2e0536bf6
--- /dev/null
+++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp
@@ -0,0 +1,271 @@
1// Copyright 2019 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <vector>
6#include "common/assert.h"
7#include "common/common_types.h"
8#include "common/microprofile.h"
9#include "video_core/renderer_vulkan/declarations.h"
10#include "video_core/renderer_vulkan/fixed_pipeline_state.h"
11#include "video_core/renderer_vulkan/maxwell_to_vk.h"
12#include "video_core/renderer_vulkan/vk_descriptor_pool.h"
13#include "video_core/renderer_vulkan/vk_device.h"
14#include "video_core/renderer_vulkan/vk_graphics_pipeline.h"
15#include "video_core/renderer_vulkan/vk_pipeline_cache.h"
16#include "video_core/renderer_vulkan/vk_renderpass_cache.h"
17#include "video_core/renderer_vulkan/vk_scheduler.h"
18#include "video_core/renderer_vulkan/vk_update_descriptor.h"
19
20namespace Vulkan {
21
22MICROPROFILE_DECLARE(Vulkan_PipelineCache);
23
24namespace {
25
26vk::StencilOpState GetStencilFaceState(const FixedPipelineState::StencilFace& face) {
27 return vk::StencilOpState(MaxwellToVK::StencilOp(face.action_stencil_fail),
28 MaxwellToVK::StencilOp(face.action_depth_pass),
29 MaxwellToVK::StencilOp(face.action_depth_fail),
30 MaxwellToVK::ComparisonOp(face.test_func), 0, 0, 0);
31}
32
33bool SupportsPrimitiveRestart(vk::PrimitiveTopology topology) {
34 static constexpr std::array unsupported_topologies = {
35 vk::PrimitiveTopology::ePointList,
36 vk::PrimitiveTopology::eLineList,
37 vk::PrimitiveTopology::eTriangleList,
38 vk::PrimitiveTopology::eLineListWithAdjacency,
39 vk::PrimitiveTopology::eTriangleListWithAdjacency,
40 vk::PrimitiveTopology::ePatchList};
41 return std::find(std::begin(unsupported_topologies), std::end(unsupported_topologies),
42 topology) == std::end(unsupported_topologies);
43}
44
45} // Anonymous namespace
46
47VKGraphicsPipeline::VKGraphicsPipeline(const VKDevice& device, VKScheduler& scheduler,
48 VKDescriptorPool& descriptor_pool,
49 VKUpdateDescriptorQueue& update_descriptor_queue,
50 VKRenderPassCache& renderpass_cache,
51 const GraphicsPipelineCacheKey& key,
52 const std::vector<vk::DescriptorSetLayoutBinding>& bindings,
53 const SPIRVProgram& program)
54 : device{device}, scheduler{scheduler}, fixed_state{key.fixed_state}, hash{key.Hash()},
55 descriptor_set_layout{CreateDescriptorSetLayout(bindings)},
56 descriptor_allocator{descriptor_pool, *descriptor_set_layout},
57 update_descriptor_queue{update_descriptor_queue}, layout{CreatePipelineLayout()},
58 descriptor_template{CreateDescriptorUpdateTemplate(program)}, modules{CreateShaderModules(
59 program)},
60 renderpass{renderpass_cache.GetRenderPass(key.renderpass_params)}, pipeline{CreatePipeline(
61 key.renderpass_params,
62 program)} {}
63
64VKGraphicsPipeline::~VKGraphicsPipeline() = default;
65
66vk::DescriptorSet VKGraphicsPipeline::CommitDescriptorSet() {
67 if (!descriptor_template) {
68 return {};
69 }
70 const auto set = descriptor_allocator.Commit(scheduler.GetFence());
71 update_descriptor_queue.Send(*descriptor_template, set);
72 return set;
73}
74
75UniqueDescriptorSetLayout VKGraphicsPipeline::CreateDescriptorSetLayout(
76 const std::vector<vk::DescriptorSetLayoutBinding>& bindings) const {
77 const vk::DescriptorSetLayoutCreateInfo descriptor_set_layout_ci(
78 {}, static_cast<u32>(bindings.size()), bindings.data());
79
80 const auto dev = device.GetLogical();
81 const auto& dld = device.GetDispatchLoader();
82 return dev.createDescriptorSetLayoutUnique(descriptor_set_layout_ci, nullptr, dld);
83}
84
85UniquePipelineLayout VKGraphicsPipeline::CreatePipelineLayout() const {
86 const vk::PipelineLayoutCreateInfo pipeline_layout_ci({}, 1, &*descriptor_set_layout, 0,
87 nullptr);
88 const auto dev = device.GetLogical();
89 const auto& dld = device.GetDispatchLoader();
90 return dev.createPipelineLayoutUnique(pipeline_layout_ci, nullptr, dld);
91}
92
93UniqueDescriptorUpdateTemplate VKGraphicsPipeline::CreateDescriptorUpdateTemplate(
94 const SPIRVProgram& program) const {
95 std::vector<vk::DescriptorUpdateTemplateEntry> template_entries;
96 u32 binding = 0;
97 u32 offset = 0;
98 for (const auto& stage : program) {
99 if (stage) {
100 FillDescriptorUpdateTemplateEntries(device, stage->entries, binding, offset,
101 template_entries);
102 }
103 }
104 if (template_entries.empty()) {
105 // If the shader doesn't use descriptor sets, skip template creation.
106 return UniqueDescriptorUpdateTemplate{};
107 }
108
109 const vk::DescriptorUpdateTemplateCreateInfo template_ci(
110 {}, static_cast<u32>(template_entries.size()), template_entries.data(),
111 vk::DescriptorUpdateTemplateType::eDescriptorSet, *descriptor_set_layout,
112 vk::PipelineBindPoint::eGraphics, *layout, DESCRIPTOR_SET);
113
114 const auto dev = device.GetLogical();
115 const auto& dld = device.GetDispatchLoader();
116 return dev.createDescriptorUpdateTemplateUnique(template_ci, nullptr, dld);
117}
118
119std::vector<UniqueShaderModule> VKGraphicsPipeline::CreateShaderModules(
120 const SPIRVProgram& program) const {
121 std::vector<UniqueShaderModule> modules;
122 const auto dev = device.GetLogical();
123 const auto& dld = device.GetDispatchLoader();
124 for (std::size_t i = 0; i < Maxwell::MaxShaderStage; ++i) {
125 const auto& stage = program[i];
126 if (!stage) {
127 continue;
128 }
129 const vk::ShaderModuleCreateInfo module_ci({}, stage->code.size() * sizeof(u32),
130 stage->code.data());
131 modules.emplace_back(dev.createShaderModuleUnique(module_ci, nullptr, dld));
132 }
133 return modules;
134}
135
136UniquePipeline VKGraphicsPipeline::CreatePipeline(const RenderPassParams& renderpass_params,
137 const SPIRVProgram& program) const {
138 const auto& vi = fixed_state.vertex_input;
139 const auto& ia = fixed_state.input_assembly;
140 const auto& ds = fixed_state.depth_stencil;
141 const auto& cd = fixed_state.color_blending;
142 const auto& ts = fixed_state.tessellation;
143 const auto& rs = fixed_state.rasterizer;
144
145 std::vector<vk::VertexInputBindingDescription> vertex_bindings;
146 std::vector<vk::VertexInputBindingDivisorDescriptionEXT> vertex_binding_divisors;
147 for (std::size_t i = 0; i < vi.num_bindings; ++i) {
148 const auto& binding = vi.bindings[i];
149 const bool instanced = binding.divisor != 0;
150 const auto rate = instanced ? vk::VertexInputRate::eInstance : vk::VertexInputRate::eVertex;
151 vertex_bindings.emplace_back(binding.index, binding.stride, rate);
152 if (instanced) {
153 vertex_binding_divisors.emplace_back(binding.index, binding.divisor);
154 }
155 }
156
157 std::vector<vk::VertexInputAttributeDescription> vertex_attributes;
158 const auto& input_attributes = program[0]->entries.attributes;
159 for (std::size_t i = 0; i < vi.num_attributes; ++i) {
160 const auto& attribute = vi.attributes[i];
161 if (input_attributes.find(attribute.index) == input_attributes.end()) {
162 // Skip attributes not used by the vertex shaders.
163 continue;
164 }
165 vertex_attributes.emplace_back(attribute.index, attribute.buffer,
166 MaxwellToVK::VertexFormat(attribute.type, attribute.size),
167 attribute.offset);
168 }
169
170 vk::PipelineVertexInputStateCreateInfo vertex_input_ci(
171 {}, static_cast<u32>(vertex_bindings.size()), vertex_bindings.data(),
172 static_cast<u32>(vertex_attributes.size()), vertex_attributes.data());
173
174 const vk::PipelineVertexInputDivisorStateCreateInfoEXT vertex_input_divisor_ci(
175 static_cast<u32>(vertex_binding_divisors.size()), vertex_binding_divisors.data());
176 if (!vertex_binding_divisors.empty()) {
177 vertex_input_ci.pNext = &vertex_input_divisor_ci;
178 }
179
180 const auto primitive_topology = MaxwellToVK::PrimitiveTopology(device, ia.topology);
181 const vk::PipelineInputAssemblyStateCreateInfo input_assembly_ci(
182 {}, primitive_topology,
183 ia.primitive_restart_enable && SupportsPrimitiveRestart(primitive_topology));
184
185 const vk::PipelineTessellationStateCreateInfo tessellation_ci({}, ts.patch_control_points);
186
187 const vk::PipelineViewportStateCreateInfo viewport_ci({}, Maxwell::NumViewports, nullptr,
188 Maxwell::NumViewports, nullptr);
189
190 // TODO(Rodrigo): Find out what's the default register value for front face
191 const vk::PipelineRasterizationStateCreateInfo rasterizer_ci(
192 {}, rs.depth_clamp_enable, false, vk::PolygonMode::eFill,
193 rs.cull_enable ? MaxwellToVK::CullFace(rs.cull_face) : vk::CullModeFlagBits::eNone,
194 rs.cull_enable ? MaxwellToVK::FrontFace(rs.front_face) : vk::FrontFace::eCounterClockwise,
195 rs.depth_bias_enable, 0.0f, 0.0f, 0.0f, 1.0f);
196
197 const vk::PipelineMultisampleStateCreateInfo multisampling_ci(
198 {}, vk::SampleCountFlagBits::e1, false, 0.0f, nullptr, false, false);
199
200 const vk::CompareOp depth_test_compare = ds.depth_test_enable
201 ? MaxwellToVK::ComparisonOp(ds.depth_test_function)
202 : vk::CompareOp::eAlways;
203
204 const vk::PipelineDepthStencilStateCreateInfo depth_stencil_ci(
205 {}, ds.depth_test_enable, ds.depth_write_enable, depth_test_compare, ds.depth_bounds_enable,
206 ds.stencil_enable, GetStencilFaceState(ds.front_stencil),
207 GetStencilFaceState(ds.back_stencil), 0.0f, 0.0f);
208
209 std::array<vk::PipelineColorBlendAttachmentState, Maxwell::NumRenderTargets> cb_attachments;
210 const std::size_t num_attachments =
211 std::min(cd.attachments_count, renderpass_params.color_attachments.size());
212 for (std::size_t i = 0; i < num_attachments; ++i) {
213 constexpr std::array component_table{
214 vk::ColorComponentFlagBits::eR, vk::ColorComponentFlagBits::eG,
215 vk::ColorComponentFlagBits::eB, vk::ColorComponentFlagBits::eA};
216 const auto& blend = cd.attachments[i];
217
218 vk::ColorComponentFlags color_components{};
219 for (std::size_t j = 0; j < component_table.size(); ++j) {
220 if (blend.components[j])
221 color_components |= component_table[j];
222 }
223
224 cb_attachments[i] = vk::PipelineColorBlendAttachmentState(
225 blend.enable, MaxwellToVK::BlendFactor(blend.src_rgb_func),
226 MaxwellToVK::BlendFactor(blend.dst_rgb_func),
227 MaxwellToVK::BlendEquation(blend.rgb_equation),
228 MaxwellToVK::BlendFactor(blend.src_a_func), MaxwellToVK::BlendFactor(blend.dst_a_func),
229 MaxwellToVK::BlendEquation(blend.a_equation), color_components);
230 }
231 const vk::PipelineColorBlendStateCreateInfo color_blending_ci({}, false, vk::LogicOp::eCopy,
232 static_cast<u32>(num_attachments),
233 cb_attachments.data(), {});
234
235 constexpr std::array dynamic_states = {
236 vk::DynamicState::eViewport, vk::DynamicState::eScissor,
237 vk::DynamicState::eDepthBias, vk::DynamicState::eBlendConstants,
238 vk::DynamicState::eDepthBounds, vk::DynamicState::eStencilCompareMask,
239 vk::DynamicState::eStencilWriteMask, vk::DynamicState::eStencilReference};
240 const vk::PipelineDynamicStateCreateInfo dynamic_state_ci(
241 {}, static_cast<u32>(dynamic_states.size()), dynamic_states.data());
242
243 vk::PipelineShaderStageRequiredSubgroupSizeCreateInfoEXT subgroup_size_ci;
244 subgroup_size_ci.requiredSubgroupSize = GuestWarpSize;
245
246 std::vector<vk::PipelineShaderStageCreateInfo> shader_stages;
247 std::size_t module_index = 0;
248 for (std::size_t stage = 0; stage < Maxwell::MaxShaderStage; ++stage) {
249 if (!program[stage]) {
250 continue;
251 }
252 const auto stage_enum = static_cast<Tegra::Engines::ShaderType>(stage);
253 const auto vk_stage = MaxwellToVK::ShaderStage(stage_enum);
254 auto& stage_ci = shader_stages.emplace_back(vk::PipelineShaderStageCreateFlags{}, vk_stage,
255 *modules[module_index++], "main", nullptr);
256 if (program[stage]->entries.uses_warps && device.IsGuestWarpSizeSupported(vk_stage)) {
257 stage_ci.pNext = &subgroup_size_ci;
258 }
259 }
260
261 const vk::GraphicsPipelineCreateInfo create_info(
262 {}, static_cast<u32>(shader_stages.size()), shader_stages.data(), &vertex_input_ci,
263 &input_assembly_ci, &tessellation_ci, &viewport_ci, &rasterizer_ci, &multisampling_ci,
264 &depth_stencil_ci, &color_blending_ci, &dynamic_state_ci, *layout, renderpass, 0, {}, 0);
265
266 const auto dev = device.GetLogical();
267 const auto& dld = device.GetDispatchLoader();
268 return dev.createGraphicsPipelineUnique(nullptr, create_info, nullptr, dld);
269}
270
271} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.h b/src/video_core/renderer_vulkan/vk_graphics_pipeline.h
new file mode 100644
index 000000000..4f5e4ea2d
--- /dev/null
+++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.h
@@ -0,0 +1,90 @@
1// Copyright 2019 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <array>
8#include <memory>
9#include <optional>
10#include <unordered_map>
11#include <vector>
12
13#include "video_core/engines/maxwell_3d.h"
14#include "video_core/renderer_vulkan/declarations.h"
15#include "video_core/renderer_vulkan/fixed_pipeline_state.h"
16#include "video_core/renderer_vulkan/vk_descriptor_pool.h"
17#include "video_core/renderer_vulkan/vk_renderpass_cache.h"
18#include "video_core/renderer_vulkan/vk_resource_manager.h"
19#include "video_core/renderer_vulkan/vk_shader_decompiler.h"
20
21namespace Vulkan {
22
23using Maxwell = Tegra::Engines::Maxwell3D::Regs;
24
25struct GraphicsPipelineCacheKey;
26
27class VKDescriptorPool;
28class VKDevice;
29class VKRenderPassCache;
30class VKScheduler;
31class VKUpdateDescriptorQueue;
32
33using SPIRVProgram = std::array<std::optional<SPIRVShader>, Maxwell::MaxShaderStage>;
34
35class VKGraphicsPipeline final {
36public:
37 explicit VKGraphicsPipeline(const VKDevice& device, VKScheduler& scheduler,
38 VKDescriptorPool& descriptor_pool,
39 VKUpdateDescriptorQueue& update_descriptor_queue,
40 VKRenderPassCache& renderpass_cache,
41 const GraphicsPipelineCacheKey& key,
42 const std::vector<vk::DescriptorSetLayoutBinding>& bindings,
43 const SPIRVProgram& program);
44 ~VKGraphicsPipeline();
45
46 vk::DescriptorSet CommitDescriptorSet();
47
48 vk::Pipeline GetHandle() const {
49 return *pipeline;
50 }
51
52 vk::PipelineLayout GetLayout() const {
53 return *layout;
54 }
55
56 vk::RenderPass GetRenderPass() const {
57 return renderpass;
58 }
59
60private:
61 UniqueDescriptorSetLayout CreateDescriptorSetLayout(
62 const std::vector<vk::DescriptorSetLayoutBinding>& bindings) const;
63
64 UniquePipelineLayout CreatePipelineLayout() const;
65
66 UniqueDescriptorUpdateTemplate CreateDescriptorUpdateTemplate(
67 const SPIRVProgram& program) const;
68
69 std::vector<UniqueShaderModule> CreateShaderModules(const SPIRVProgram& program) const;
70
71 UniquePipeline CreatePipeline(const RenderPassParams& renderpass_params,
72 const SPIRVProgram& program) const;
73
74 const VKDevice& device;
75 VKScheduler& scheduler;
76 const FixedPipelineState fixed_state;
77 const u64 hash;
78
79 UniqueDescriptorSetLayout descriptor_set_layout;
80 DescriptorAllocator descriptor_allocator;
81 VKUpdateDescriptorQueue& update_descriptor_queue;
82 UniquePipelineLayout layout;
83 UniqueDescriptorUpdateTemplate descriptor_template;
84 std::vector<UniqueShaderModule> modules;
85
86 vk::RenderPass renderpass;
87 UniquePipeline pipeline;
88};
89
90} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_memory_manager.cpp b/src/video_core/renderer_vulkan/vk_memory_manager.cpp
index 0451babbf..9cc9979d0 100644
--- a/src/video_core/renderer_vulkan/vk_memory_manager.cpp
+++ b/src/video_core/renderer_vulkan/vk_memory_manager.cpp
@@ -6,6 +6,7 @@
6#include <optional> 6#include <optional>
7#include <tuple> 7#include <tuple>
8#include <vector> 8#include <vector>
9
9#include "common/alignment.h" 10#include "common/alignment.h"
10#include "common/assert.h" 11#include "common/assert.h"
11#include "common/common_types.h" 12#include "common/common_types.h"
@@ -16,34 +17,32 @@
16 17
17namespace Vulkan { 18namespace Vulkan {
18 19
19// TODO(Rodrigo): Fine tune this number 20namespace {
20constexpr u64 ALLOC_CHUNK_SIZE = 64 * 1024 * 1024; 21
22u64 GetAllocationChunkSize(u64 required_size) {
23 static constexpr u64 sizes[] = {16ULL << 20, 32ULL << 20, 64ULL << 20, 128ULL << 20};
24 auto it = std::lower_bound(std::begin(sizes), std::end(sizes), required_size);
25 return it != std::end(sizes) ? *it : Common::AlignUp(required_size, 256ULL << 20);
26}
27
28} // Anonymous namespace
21 29
22class VKMemoryAllocation final { 30class VKMemoryAllocation final {
23public: 31public:
24 explicit VKMemoryAllocation(const VKDevice& device, vk::DeviceMemory memory, 32 explicit VKMemoryAllocation(const VKDevice& device, vk::DeviceMemory memory,
25 vk::MemoryPropertyFlags properties, u64 alloc_size, u32 type) 33 vk::MemoryPropertyFlags properties, u64 allocation_size, u32 type)
26 : device{device}, memory{memory}, properties{properties}, alloc_size{alloc_size}, 34 : device{device}, memory{memory}, properties{properties}, allocation_size{allocation_size},
27 shifted_type{ShiftType(type)}, is_mappable{properties & 35 shifted_type{ShiftType(type)} {}
28 vk::MemoryPropertyFlagBits::eHostVisible} {
29 if (is_mappable) {
30 const auto dev = device.GetLogical();
31 const auto& dld = device.GetDispatchLoader();
32 base_address = static_cast<u8*>(dev.mapMemory(memory, 0, alloc_size, {}, dld));
33 }
34 }
35 36
36 ~VKMemoryAllocation() { 37 ~VKMemoryAllocation() {
37 const auto dev = device.GetLogical(); 38 const auto dev = device.GetLogical();
38 const auto& dld = device.GetDispatchLoader(); 39 const auto& dld = device.GetDispatchLoader();
39 if (is_mappable)
40 dev.unmapMemory(memory, dld);
41 dev.free(memory, nullptr, dld); 40 dev.free(memory, nullptr, dld);
42 } 41 }
43 42
44 VKMemoryCommit Commit(vk::DeviceSize commit_size, vk::DeviceSize alignment) { 43 VKMemoryCommit Commit(vk::DeviceSize commit_size, vk::DeviceSize alignment) {
45 auto found = TryFindFreeSection(free_iterator, alloc_size, static_cast<u64>(commit_size), 44 auto found = TryFindFreeSection(free_iterator, allocation_size,
46 static_cast<u64>(alignment)); 45 static_cast<u64>(commit_size), static_cast<u64>(alignment));
47 if (!found) { 46 if (!found) {
48 found = TryFindFreeSection(0, free_iterator, static_cast<u64>(commit_size), 47 found = TryFindFreeSection(0, free_iterator, static_cast<u64>(commit_size),
49 static_cast<u64>(alignment)); 48 static_cast<u64>(alignment));
@@ -52,8 +51,7 @@ public:
52 return nullptr; 51 return nullptr;
53 } 52 }
54 } 53 }
55 u8* address = is_mappable ? base_address + *found : nullptr; 54 auto commit = std::make_unique<VKMemoryCommitImpl>(device, this, memory, *found,
56 auto commit = std::make_unique<VKMemoryCommitImpl>(this, memory, address, *found,
57 *found + commit_size); 55 *found + commit_size);
58 commits.push_back(commit.get()); 56 commits.push_back(commit.get());
59 57
@@ -65,12 +63,10 @@ public:
65 63
66 void Free(const VKMemoryCommitImpl* commit) { 64 void Free(const VKMemoryCommitImpl* commit) {
67 ASSERT(commit); 65 ASSERT(commit);
68 const auto it = 66
69 std::find_if(commits.begin(), commits.end(), 67 const auto it = std::find(std::begin(commits), std::end(commits), commit);
70 [&](const auto& stored_commit) { return stored_commit == commit; });
71 if (it == commits.end()) { 68 if (it == commits.end()) {
72 LOG_CRITICAL(Render_Vulkan, "Freeing unallocated commit!"); 69 UNREACHABLE_MSG("Freeing unallocated commit!");
73 UNREACHABLE();
74 return; 70 return;
75 } 71 }
76 commits.erase(it); 72 commits.erase(it);
@@ -88,11 +84,11 @@ private:
88 } 84 }
89 85
90 /// A memory allocator, it may return a free region between "start" and "end" with the solicited 86 /// A memory allocator, it may return a free region between "start" and "end" with the solicited
91 /// requeriments. 87 /// requirements.
92 std::optional<u64> TryFindFreeSection(u64 start, u64 end, u64 size, u64 alignment) const { 88 std::optional<u64> TryFindFreeSection(u64 start, u64 end, u64 size, u64 alignment) const {
93 u64 iterator = start; 89 u64 iterator = Common::AlignUp(start, alignment);
94 while (iterator + size < end) { 90 while (iterator + size <= end) {
95 const u64 try_left = Common::AlignUp(iterator, alignment); 91 const u64 try_left = iterator;
96 const u64 try_right = try_left + size; 92 const u64 try_right = try_left + size;
97 93
98 bool overlap = false; 94 bool overlap = false;
@@ -100,7 +96,7 @@ private:
100 const auto [commit_left, commit_right] = commit->interval; 96 const auto [commit_left, commit_right] = commit->interval;
101 if (try_left < commit_right && commit_left < try_right) { 97 if (try_left < commit_right && commit_left < try_right) {
102 // There's an overlap, continue the search where the overlapping commit ends. 98 // There's an overlap, continue the search where the overlapping commit ends.
103 iterator = commit_right; 99 iterator = Common::AlignUp(commit_right, alignment);
104 overlap = true; 100 overlap = true;
105 break; 101 break;
106 } 102 }
@@ -110,6 +106,7 @@ private:
110 return try_left; 106 return try_left;
111 } 107 }
112 } 108 }
109
113 // No free regions where found, return an empty optional. 110 // No free regions where found, return an empty optional.
114 return std::nullopt; 111 return std::nullopt;
115 } 112 }
@@ -117,12 +114,8 @@ private:
117 const VKDevice& device; ///< Vulkan device. 114 const VKDevice& device; ///< Vulkan device.
118 const vk::DeviceMemory memory; ///< Vulkan memory allocation handler. 115 const vk::DeviceMemory memory; ///< Vulkan memory allocation handler.
119 const vk::MemoryPropertyFlags properties; ///< Vulkan properties. 116 const vk::MemoryPropertyFlags properties; ///< Vulkan properties.
120 const u64 alloc_size; ///< Size of this allocation. 117 const u64 allocation_size; ///< Size of this allocation.
121 const u32 shifted_type; ///< Stored Vulkan type of this allocation, shifted. 118 const u32 shifted_type; ///< Stored Vulkan type of this allocation, shifted.
122 const bool is_mappable; ///< Whether the allocation is mappable.
123
124 /// Base address of the mapped pointer.
125 u8* base_address{};
126 119
127 /// Hints where the next free region is likely going to be. 120 /// Hints where the next free region is likely going to be.
128 u64 free_iterator{}; 121 u64 free_iterator{};
@@ -132,13 +125,15 @@ private:
132}; 125};
133 126
134VKMemoryManager::VKMemoryManager(const VKDevice& device) 127VKMemoryManager::VKMemoryManager(const VKDevice& device)
135 : device{device}, props{device.GetPhysical().getMemoryProperties(device.GetDispatchLoader())}, 128 : device{device}, properties{device.GetPhysical().getMemoryProperties(
136 is_memory_unified{GetMemoryUnified(props)} {} 129 device.GetDispatchLoader())},
130 is_memory_unified{GetMemoryUnified(properties)} {}
137 131
138VKMemoryManager::~VKMemoryManager() = default; 132VKMemoryManager::~VKMemoryManager() = default;
139 133
140VKMemoryCommit VKMemoryManager::Commit(const vk::MemoryRequirements& reqs, bool host_visible) { 134VKMemoryCommit VKMemoryManager::Commit(const vk::MemoryRequirements& requirements,
141 ASSERT(reqs.size < ALLOC_CHUNK_SIZE); 135 bool host_visible) {
136 const u64 chunk_size = GetAllocationChunkSize(requirements.size);
142 137
143 // When a host visible commit is asked, search for host visible and coherent, otherwise search 138 // When a host visible commit is asked, search for host visible and coherent, otherwise search
144 // for a fast device local type. 139 // for a fast device local type.
@@ -147,32 +142,21 @@ VKMemoryCommit VKMemoryManager::Commit(const vk::MemoryRequirements& reqs, bool
147 ? vk::MemoryPropertyFlagBits::eHostVisible | vk::MemoryPropertyFlagBits::eHostCoherent 142 ? vk::MemoryPropertyFlagBits::eHostVisible | vk::MemoryPropertyFlagBits::eHostCoherent
148 : vk::MemoryPropertyFlagBits::eDeviceLocal; 143 : vk::MemoryPropertyFlagBits::eDeviceLocal;
149 144
150 const auto TryCommit = [&]() -> VKMemoryCommit { 145 if (auto commit = TryAllocCommit(requirements, wanted_properties)) {
151 for (auto& alloc : allocs) {
152 if (!alloc->IsCompatible(wanted_properties, reqs.memoryTypeBits))
153 continue;
154
155 if (auto commit = alloc->Commit(reqs.size, reqs.alignment); commit) {
156 return commit;
157 }
158 }
159 return {};
160 };
161
162 if (auto commit = TryCommit(); commit) {
163 return commit; 146 return commit;
164 } 147 }
165 148
166 // Commit has failed, allocate more memory. 149 // Commit has failed, allocate more memory.
167 if (!AllocMemory(wanted_properties, reqs.memoryTypeBits, ALLOC_CHUNK_SIZE)) { 150 if (!AllocMemory(wanted_properties, requirements.memoryTypeBits, chunk_size)) {
168 // TODO(Rodrigo): Try to use host memory. 151 // TODO(Rodrigo): Handle these situations in some way like flushing to guest memory.
169 LOG_CRITICAL(Render_Vulkan, "Ran out of memory!"); 152 // Allocation has failed, panic.
170 UNREACHABLE(); 153 UNREACHABLE_MSG("Ran out of VRAM!");
154 return {};
171 } 155 }
172 156
173 // Commit again, this time it won't fail since there's a fresh allocation above. If it does, 157 // Commit again, this time it won't fail since there's a fresh allocation above. If it does,
174 // there's a bug. 158 // there's a bug.
175 auto commit = TryCommit(); 159 auto commit = TryAllocCommit(requirements, wanted_properties);
176 ASSERT(commit); 160 ASSERT(commit);
177 return commit; 161 return commit;
178} 162}
@@ -180,8 +164,7 @@ VKMemoryCommit VKMemoryManager::Commit(const vk::MemoryRequirements& reqs, bool
180VKMemoryCommit VKMemoryManager::Commit(vk::Buffer buffer, bool host_visible) { 164VKMemoryCommit VKMemoryManager::Commit(vk::Buffer buffer, bool host_visible) {
181 const auto dev = device.GetLogical(); 165 const auto dev = device.GetLogical();
182 const auto& dld = device.GetDispatchLoader(); 166 const auto& dld = device.GetDispatchLoader();
183 const auto requeriments = dev.getBufferMemoryRequirements(buffer, dld); 167 auto commit = Commit(dev.getBufferMemoryRequirements(buffer, dld), host_visible);
184 auto commit = Commit(requeriments, host_visible);
185 dev.bindBufferMemory(buffer, commit->GetMemory(), commit->GetOffset(), dld); 168 dev.bindBufferMemory(buffer, commit->GetMemory(), commit->GetOffset(), dld);
186 return commit; 169 return commit;
187} 170}
@@ -189,25 +172,23 @@ VKMemoryCommit VKMemoryManager::Commit(vk::Buffer buffer, bool host_visible) {
189VKMemoryCommit VKMemoryManager::Commit(vk::Image image, bool host_visible) { 172VKMemoryCommit VKMemoryManager::Commit(vk::Image image, bool host_visible) {
190 const auto dev = device.GetLogical(); 173 const auto dev = device.GetLogical();
191 const auto& dld = device.GetDispatchLoader(); 174 const auto& dld = device.GetDispatchLoader();
192 const auto requeriments = dev.getImageMemoryRequirements(image, dld); 175 auto commit = Commit(dev.getImageMemoryRequirements(image, dld), host_visible);
193 auto commit = Commit(requeriments, host_visible);
194 dev.bindImageMemory(image, commit->GetMemory(), commit->GetOffset(), dld); 176 dev.bindImageMemory(image, commit->GetMemory(), commit->GetOffset(), dld);
195 return commit; 177 return commit;
196} 178}
197 179
198bool VKMemoryManager::AllocMemory(vk::MemoryPropertyFlags wanted_properties, u32 type_mask, 180bool VKMemoryManager::AllocMemory(vk::MemoryPropertyFlags wanted_properties, u32 type_mask,
199 u64 size) { 181 u64 size) {
200 const u32 type = [&]() { 182 const u32 type = [&] {
201 for (u32 type_index = 0; type_index < props.memoryTypeCount; ++type_index) { 183 for (u32 type_index = 0; type_index < properties.memoryTypeCount; ++type_index) {
202 const auto flags = props.memoryTypes[type_index].propertyFlags; 184 const auto flags = properties.memoryTypes[type_index].propertyFlags;
203 if ((type_mask & (1U << type_index)) && (flags & wanted_properties)) { 185 if ((type_mask & (1U << type_index)) && (flags & wanted_properties)) {
204 // The type matches in type and in the wanted properties. 186 // The type matches in type and in the wanted properties.
205 return type_index; 187 return type_index;
206 } 188 }
207 } 189 }
208 LOG_CRITICAL(Render_Vulkan, "Couldn't find a compatible memory type!"); 190 UNREACHABLE_MSG("Couldn't find a compatible memory type!");
209 UNREACHABLE(); 191 return 0U;
210 return 0u;
211 }(); 192 }();
212 193
213 const auto dev = device.GetLogical(); 194 const auto dev = device.GetLogical();
@@ -216,19 +197,33 @@ bool VKMemoryManager::AllocMemory(vk::MemoryPropertyFlags wanted_properties, u32
216 // Try to allocate found type. 197 // Try to allocate found type.
217 const vk::MemoryAllocateInfo memory_ai(size, type); 198 const vk::MemoryAllocateInfo memory_ai(size, type);
218 vk::DeviceMemory memory; 199 vk::DeviceMemory memory;
219 if (const vk::Result res = dev.allocateMemory(&memory_ai, nullptr, &memory, dld); 200 if (const auto res = dev.allocateMemory(&memory_ai, nullptr, &memory, dld);
220 res != vk::Result::eSuccess) { 201 res != vk::Result::eSuccess) {
221 LOG_CRITICAL(Render_Vulkan, "Device allocation failed with code {}!", vk::to_string(res)); 202 LOG_CRITICAL(Render_Vulkan, "Device allocation failed with code {}!", vk::to_string(res));
222 return false; 203 return false;
223 } 204 }
224 allocs.push_back( 205 allocations.push_back(
225 std::make_unique<VKMemoryAllocation>(device, memory, wanted_properties, size, type)); 206 std::make_unique<VKMemoryAllocation>(device, memory, wanted_properties, size, type));
226 return true; 207 return true;
227} 208}
228 209
229/*static*/ bool VKMemoryManager::GetMemoryUnified(const vk::PhysicalDeviceMemoryProperties& props) { 210VKMemoryCommit VKMemoryManager::TryAllocCommit(const vk::MemoryRequirements& requirements,
230 for (u32 heap_index = 0; heap_index < props.memoryHeapCount; ++heap_index) { 211 vk::MemoryPropertyFlags wanted_properties) {
231 if (!(props.memoryHeaps[heap_index].flags & vk::MemoryHeapFlagBits::eDeviceLocal)) { 212 for (auto& allocation : allocations) {
213 if (!allocation->IsCompatible(wanted_properties, requirements.memoryTypeBits)) {
214 continue;
215 }
216 if (auto commit = allocation->Commit(requirements.size, requirements.alignment)) {
217 return commit;
218 }
219 }
220 return {};
221}
222
223/*static*/ bool VKMemoryManager::GetMemoryUnified(
224 const vk::PhysicalDeviceMemoryProperties& properties) {
225 for (u32 heap_index = 0; heap_index < properties.memoryHeapCount; ++heap_index) {
226 if (!(properties.memoryHeaps[heap_index].flags & vk::MemoryHeapFlagBits::eDeviceLocal)) {
232 // Memory is considered unified when heaps are device local only. 227 // Memory is considered unified when heaps are device local only.
233 return false; 228 return false;
234 } 229 }
@@ -236,17 +231,28 @@ bool VKMemoryManager::AllocMemory(vk::MemoryPropertyFlags wanted_properties, u32
236 return true; 231 return true;
237} 232}
238 233
239VKMemoryCommitImpl::VKMemoryCommitImpl(VKMemoryAllocation* allocation, vk::DeviceMemory memory, 234VKMemoryCommitImpl::VKMemoryCommitImpl(const VKDevice& device, VKMemoryAllocation* allocation,
240 u8* data, u64 begin, u64 end) 235 vk::DeviceMemory memory, u64 begin, u64 end)
241 : interval(std::make_pair(begin, end)), memory{memory}, allocation{allocation}, data{data} {} 236 : device{device}, interval{begin, end}, memory{memory}, allocation{allocation} {}
242 237
243VKMemoryCommitImpl::~VKMemoryCommitImpl() { 238VKMemoryCommitImpl::~VKMemoryCommitImpl() {
244 allocation->Free(this); 239 allocation->Free(this);
245} 240}
246 241
247u8* VKMemoryCommitImpl::GetData() const { 242MemoryMap VKMemoryCommitImpl::Map(u64 size, u64 offset_) const {
248 ASSERT_MSG(data != nullptr, "Trying to access an unmapped commit."); 243 const auto dev = device.GetLogical();
249 return data; 244 const auto address = reinterpret_cast<u8*>(
245 dev.mapMemory(memory, interval.first + offset_, size, {}, device.GetDispatchLoader()));
246 return MemoryMap{this, address};
247}
248
249void VKMemoryCommitImpl::Unmap() const {
250 const auto dev = device.GetLogical();
251 dev.unmapMemory(memory, device.GetDispatchLoader());
252}
253
254MemoryMap VKMemoryCommitImpl::Map() const {
255 return Map(interval.second - interval.first);
250} 256}
251 257
252} // namespace Vulkan 258} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_memory_manager.h b/src/video_core/renderer_vulkan/vk_memory_manager.h
index 073597b35..cd00bb91b 100644
--- a/src/video_core/renderer_vulkan/vk_memory_manager.h
+++ b/src/video_core/renderer_vulkan/vk_memory_manager.h
@@ -12,6 +12,7 @@
12 12
13namespace Vulkan { 13namespace Vulkan {
14 14
15class MemoryMap;
15class VKDevice; 16class VKDevice;
16class VKMemoryAllocation; 17class VKMemoryAllocation;
17class VKMemoryCommitImpl; 18class VKMemoryCommitImpl;
@@ -21,13 +22,14 @@ using VKMemoryCommit = std::unique_ptr<VKMemoryCommitImpl>;
21class VKMemoryManager final { 22class VKMemoryManager final {
22public: 23public:
23 explicit VKMemoryManager(const VKDevice& device); 24 explicit VKMemoryManager(const VKDevice& device);
25 VKMemoryManager(const VKMemoryManager&) = delete;
24 ~VKMemoryManager(); 26 ~VKMemoryManager();
25 27
26 /** 28 /**
27 * Commits a memory with the specified requeriments. 29 * Commits a memory with the specified requeriments.
28 * @param reqs Requeriments returned from a Vulkan call. 30 * @param requirements Requirements returned from a Vulkan call.
29 * @param host_visible Signals the allocator that it *must* use host visible and coherent 31 * @param host_visible Signals the allocator that it *must* use host visible and coherent
30 * memory. When passing false, it will try to allocate device local memory. 32 * memory. When passing false, it will try to allocate device local memory.
31 * @returns A memory commit. 33 * @returns A memory commit.
32 */ 34 */
33 VKMemoryCommit Commit(const vk::MemoryRequirements& reqs, bool host_visible); 35 VKMemoryCommit Commit(const vk::MemoryRequirements& reqs, bool host_visible);
@@ -47,25 +49,35 @@ private:
47 /// Allocates a chunk of memory. 49 /// Allocates a chunk of memory.
48 bool AllocMemory(vk::MemoryPropertyFlags wanted_properties, u32 type_mask, u64 size); 50 bool AllocMemory(vk::MemoryPropertyFlags wanted_properties, u32 type_mask, u64 size);
49 51
52 /// Tries to allocate a memory commit.
53 VKMemoryCommit TryAllocCommit(const vk::MemoryRequirements& requirements,
54 vk::MemoryPropertyFlags wanted_properties);
55
50 /// Returns true if the device uses an unified memory model. 56 /// Returns true if the device uses an unified memory model.
51 static bool GetMemoryUnified(const vk::PhysicalDeviceMemoryProperties& props); 57 static bool GetMemoryUnified(const vk::PhysicalDeviceMemoryProperties& properties);
52 58
53 const VKDevice& device; ///< Device handler. 59 const VKDevice& device; ///< Device handler.
54 const vk::PhysicalDeviceMemoryProperties props; ///< Physical device properties. 60 const vk::PhysicalDeviceMemoryProperties properties; ///< Physical device properties.
55 const bool is_memory_unified; ///< True if memory model is unified. 61 const bool is_memory_unified; ///< True if memory model is unified.
56 std::vector<std::unique_ptr<VKMemoryAllocation>> allocs; ///< Current allocations. 62 std::vector<std::unique_ptr<VKMemoryAllocation>> allocations; ///< Current allocations.
57}; 63};
58 64
59class VKMemoryCommitImpl final { 65class VKMemoryCommitImpl final {
60 friend VKMemoryAllocation; 66 friend VKMemoryAllocation;
67 friend MemoryMap;
61 68
62public: 69public:
63 explicit VKMemoryCommitImpl(VKMemoryAllocation* allocation, vk::DeviceMemory memory, u8* data, 70 explicit VKMemoryCommitImpl(const VKDevice& device, VKMemoryAllocation* allocation,
64 u64 begin, u64 end); 71 vk::DeviceMemory memory, u64 begin, u64 end);
65 ~VKMemoryCommitImpl(); 72 ~VKMemoryCommitImpl();
66 73
67 /// Returns the writeable memory map. The commit has to be mappable. 74 /// Maps a memory region and returns a pointer to it.
68 u8* GetData() const; 75 /// It's illegal to have more than one memory map at the same time.
76 MemoryMap Map(u64 size, u64 offset = 0) const;
77
78 /// Maps the whole commit and returns a pointer to it.
79 /// It's illegal to have more than one memory map at the same time.
80 MemoryMap Map() const;
69 81
70 /// Returns the Vulkan memory handler. 82 /// Returns the Vulkan memory handler.
71 vk::DeviceMemory GetMemory() const { 83 vk::DeviceMemory GetMemory() const {
@@ -78,10 +90,46 @@ public:
78 } 90 }
79 91
80private: 92private:
93 /// Unmaps memory.
94 void Unmap() const;
95
96 const VKDevice& device; ///< Vulkan device.
81 std::pair<u64, u64> interval{}; ///< Interval where the commit exists. 97 std::pair<u64, u64> interval{}; ///< Interval where the commit exists.
82 vk::DeviceMemory memory; ///< Vulkan device memory handler. 98 vk::DeviceMemory memory; ///< Vulkan device memory handler.
83 VKMemoryAllocation* allocation{}; ///< Pointer to the large memory allocation. 99 VKMemoryAllocation* allocation{}; ///< Pointer to the large memory allocation.
84 u8* data{}; ///< Pointer to the host mapped memory, it has the commit offset included. 100};
101
102/// Holds ownership of a memory map.
103class MemoryMap final {
104public:
105 explicit MemoryMap(const VKMemoryCommitImpl* commit, u8* address)
106 : commit{commit}, address{address} {}
107
108 ~MemoryMap() {
109 if (commit) {
110 commit->Unmap();
111 }
112 }
113
114 /// Prematurely releases the memory map.
115 void Release() {
116 commit->Unmap();
117 commit = nullptr;
118 }
119
120 /// Returns the address of the memory map.
121 u8* GetAddress() const {
122 return address;
123 }
124
125 /// Returns the address of the memory map;
126 operator u8*() const {
127 return address;
128 }
129
130private:
131 const VKMemoryCommitImpl* commit{}; ///< Mapped memory commit.
132 u8* address{}; ///< Address to the mapped memory.
85}; 133};
86 134
87} // namespace Vulkan 135} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
new file mode 100644
index 000000000..48e23d4cd
--- /dev/null
+++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
@@ -0,0 +1,395 @@
1// Copyright 2019 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <algorithm>
6#include <cstddef>
7#include <memory>
8#include <vector>
9
10#include "common/microprofile.h"
11#include "core/core.h"
12#include "core/memory.h"
13#include "video_core/engines/kepler_compute.h"
14#include "video_core/engines/maxwell_3d.h"
15#include "video_core/memory_manager.h"
16#include "video_core/renderer_vulkan/declarations.h"
17#include "video_core/renderer_vulkan/fixed_pipeline_state.h"
18#include "video_core/renderer_vulkan/maxwell_to_vk.h"
19#include "video_core/renderer_vulkan/vk_compute_pipeline.h"
20#include "video_core/renderer_vulkan/vk_descriptor_pool.h"
21#include "video_core/renderer_vulkan/vk_device.h"
22#include "video_core/renderer_vulkan/vk_graphics_pipeline.h"
23#include "video_core/renderer_vulkan/vk_pipeline_cache.h"
24#include "video_core/renderer_vulkan/vk_rasterizer.h"
25#include "video_core/renderer_vulkan/vk_renderpass_cache.h"
26#include "video_core/renderer_vulkan/vk_resource_manager.h"
27#include "video_core/renderer_vulkan/vk_scheduler.h"
28#include "video_core/renderer_vulkan/vk_update_descriptor.h"
29#include "video_core/shader/compiler_settings.h"
30
31namespace Vulkan {
32
33MICROPROFILE_DECLARE(Vulkan_PipelineCache);
34
35using Tegra::Engines::ShaderType;
36
37namespace {
38
39constexpr VideoCommon::Shader::CompilerSettings compiler_settings{
40 VideoCommon::Shader::CompileDepth::FullDecompile};
41
42/// Gets the address for the specified shader stage program
43GPUVAddr GetShaderAddress(Core::System& system, Maxwell::ShaderProgram program) {
44 const auto& gpu{system.GPU().Maxwell3D()};
45 const auto& shader_config{gpu.regs.shader_config[static_cast<std::size_t>(program)]};
46 return gpu.regs.code_address.CodeAddress() + shader_config.offset;
47}
48
49/// Gets if the current instruction offset is a scheduler instruction
50constexpr bool IsSchedInstruction(std::size_t offset, std::size_t main_offset) {
51 // Sched instructions appear once every 4 instructions.
52 constexpr std::size_t SchedPeriod = 4;
53 const std::size_t absolute_offset = offset - main_offset;
54 return (absolute_offset % SchedPeriod) == 0;
55}
56
57/// Calculates the size of a program stream
58std::size_t CalculateProgramSize(const ProgramCode& program, bool is_compute) {
59 const std::size_t start_offset = is_compute ? 0 : 10;
60 // This is the encoded version of BRA that jumps to itself. All Nvidia
61 // shaders end with one.
62 constexpr u64 self_jumping_branch = 0xE2400FFFFF07000FULL;
63 constexpr u64 mask = 0xFFFFFFFFFF7FFFFFULL;
64 std::size_t offset = start_offset;
65 while (offset < program.size()) {
66 const u64 instruction = program[offset];
67 if (!IsSchedInstruction(offset, start_offset)) {
68 if ((instruction & mask) == self_jumping_branch) {
69 // End on Maxwell's "nop" instruction
70 break;
71 }
72 if (instruction == 0) {
73 break;
74 }
75 }
76 ++offset;
77 }
78 // The last instruction is included in the program size
79 return std::min(offset + 1, program.size());
80}
81
82/// Gets the shader program code from memory for the specified address
83ProgramCode GetShaderCode(Tegra::MemoryManager& memory_manager, const GPUVAddr gpu_addr,
84 const u8* host_ptr, bool is_compute) {
85 ProgramCode program_code(VideoCommon::Shader::MAX_PROGRAM_LENGTH);
86 ASSERT_OR_EXECUTE(host_ptr != nullptr, {
87 std::fill(program_code.begin(), program_code.end(), 0);
88 return program_code;
89 });
90 memory_manager.ReadBlockUnsafe(gpu_addr, program_code.data(),
91 program_code.size() * sizeof(u64));
92 program_code.resize(CalculateProgramSize(program_code, is_compute));
93 return program_code;
94}
95
96constexpr std::size_t GetStageFromProgram(std::size_t program) {
97 return program == 0 ? 0 : program - 1;
98}
99
100constexpr ShaderType GetStageFromProgram(Maxwell::ShaderProgram program) {
101 return static_cast<ShaderType>(GetStageFromProgram(static_cast<std::size_t>(program)));
102}
103
104ShaderType GetShaderType(Maxwell::ShaderProgram program) {
105 switch (program) {
106 case Maxwell::ShaderProgram::VertexB:
107 return ShaderType::Vertex;
108 case Maxwell::ShaderProgram::TesselationControl:
109 return ShaderType::TesselationControl;
110 case Maxwell::ShaderProgram::TesselationEval:
111 return ShaderType::TesselationEval;
112 case Maxwell::ShaderProgram::Geometry:
113 return ShaderType::Geometry;
114 case Maxwell::ShaderProgram::Fragment:
115 return ShaderType::Fragment;
116 default:
117 UNIMPLEMENTED_MSG("program={}", static_cast<u32>(program));
118 return ShaderType::Vertex;
119 }
120}
121
122u32 FillDescriptorLayout(const ShaderEntries& entries,
123 std::vector<vk::DescriptorSetLayoutBinding>& bindings,
124 Maxwell::ShaderProgram program_type, u32 base_binding) {
125 const ShaderType stage = GetStageFromProgram(program_type);
126 const vk::ShaderStageFlags stage_flags = MaxwellToVK::ShaderStage(stage);
127
128 u32 binding = base_binding;
129 const auto AddBindings = [&](vk::DescriptorType descriptor_type, std::size_t num_entries) {
130 for (std::size_t i = 0; i < num_entries; ++i) {
131 bindings.emplace_back(binding++, descriptor_type, 1, stage_flags, nullptr);
132 }
133 };
134 AddBindings(vk::DescriptorType::eUniformBuffer, entries.const_buffers.size());
135 AddBindings(vk::DescriptorType::eStorageBuffer, entries.global_buffers.size());
136 AddBindings(vk::DescriptorType::eUniformTexelBuffer, entries.texel_buffers.size());
137 AddBindings(vk::DescriptorType::eCombinedImageSampler, entries.samplers.size());
138 AddBindings(vk::DescriptorType::eStorageImage, entries.images.size());
139 return binding;
140}
141
142} // Anonymous namespace
143
144CachedShader::CachedShader(Core::System& system, Tegra::Engines::ShaderType stage,
145 GPUVAddr gpu_addr, VAddr cpu_addr, u8* host_ptr,
146 ProgramCode program_code, u32 main_offset)
147 : RasterizerCacheObject{host_ptr}, gpu_addr{gpu_addr}, cpu_addr{cpu_addr},
148 program_code{std::move(program_code)}, locker{stage, GetEngine(system, stage)},
149 shader_ir{this->program_code, main_offset, compiler_settings, locker},
150 entries{GenerateShaderEntries(shader_ir)} {}
151
152CachedShader::~CachedShader() = default;
153
154Tegra::Engines::ConstBufferEngineInterface& CachedShader::GetEngine(
155 Core::System& system, Tegra::Engines::ShaderType stage) {
156 if (stage == Tegra::Engines::ShaderType::Compute) {
157 return system.GPU().KeplerCompute();
158 } else {
159 return system.GPU().Maxwell3D();
160 }
161}
162
163VKPipelineCache::VKPipelineCache(Core::System& system, RasterizerVulkan& rasterizer,
164 const VKDevice& device, VKScheduler& scheduler,
165 VKDescriptorPool& descriptor_pool,
166 VKUpdateDescriptorQueue& update_descriptor_queue)
167 : RasterizerCache{rasterizer}, system{system}, device{device}, scheduler{scheduler},
168 descriptor_pool{descriptor_pool}, update_descriptor_queue{update_descriptor_queue},
169 renderpass_cache(device) {}
170
171VKPipelineCache::~VKPipelineCache() = default;
172
173std::array<Shader, Maxwell::MaxShaderProgram> VKPipelineCache::GetShaders() {
174 const auto& gpu = system.GPU().Maxwell3D();
175 auto& dirty = system.GPU().Maxwell3D().dirty.shaders;
176 if (!dirty) {
177 return last_shaders;
178 }
179 dirty = false;
180
181 std::array<Shader, Maxwell::MaxShaderProgram> shaders;
182 for (std::size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) {
183 const auto& shader_config = gpu.regs.shader_config[index];
184 const auto program{static_cast<Maxwell::ShaderProgram>(index)};
185
186 // Skip stages that are not enabled
187 if (!gpu.regs.IsShaderConfigEnabled(index)) {
188 continue;
189 }
190
191 auto& memory_manager{system.GPU().MemoryManager()};
192 const GPUVAddr program_addr{GetShaderAddress(system, program)};
193 const auto host_ptr{memory_manager.GetPointer(program_addr)};
194 auto shader = TryGet(host_ptr);
195 if (!shader) {
196 // No shader found - create a new one
197 constexpr u32 stage_offset = 10;
198 const auto stage = static_cast<Tegra::Engines::ShaderType>(index == 0 ? 0 : index - 1);
199 auto code = GetShaderCode(memory_manager, program_addr, host_ptr, false);
200
201 const std::optional cpu_addr = memory_manager.GpuToCpuAddress(program_addr);
202 ASSERT(cpu_addr);
203
204 shader = std::make_shared<CachedShader>(system, stage, program_addr, *cpu_addr,
205 host_ptr, std::move(code), stage_offset);
206 Register(shader);
207 }
208 shaders[index] = std::move(shader);
209 }
210 return last_shaders = shaders;
211}
212
213VKGraphicsPipeline& VKPipelineCache::GetGraphicsPipeline(const GraphicsPipelineCacheKey& key) {
214 MICROPROFILE_SCOPE(Vulkan_PipelineCache);
215
216 if (last_graphics_pipeline && last_graphics_key == key) {
217 return *last_graphics_pipeline;
218 }
219 last_graphics_key = key;
220
221 const auto [pair, is_cache_miss] = graphics_cache.try_emplace(key);
222 auto& entry = pair->second;
223 if (is_cache_miss) {
224 LOG_INFO(Render_Vulkan, "Compile 0x{:016X}", key.Hash());
225 const auto [program, bindings] = DecompileShaders(key);
226 entry = std::make_unique<VKGraphicsPipeline>(device, scheduler, descriptor_pool,
227 update_descriptor_queue, renderpass_cache, key,
228 bindings, program);
229 }
230 return *(last_graphics_pipeline = entry.get());
231}
232
233VKComputePipeline& VKPipelineCache::GetComputePipeline(const ComputePipelineCacheKey& key) {
234 MICROPROFILE_SCOPE(Vulkan_PipelineCache);
235
236 const auto [pair, is_cache_miss] = compute_cache.try_emplace(key);
237 auto& entry = pair->second;
238 if (!is_cache_miss) {
239 return *entry;
240 }
241 LOG_INFO(Render_Vulkan, "Compile 0x{:016X}", key.Hash());
242
243 auto& memory_manager = system.GPU().MemoryManager();
244 const auto program_addr = key.shader;
245 const auto host_ptr = memory_manager.GetPointer(program_addr);
246
247 auto shader = TryGet(host_ptr);
248 if (!shader) {
249 // No shader found - create a new one
250 const auto cpu_addr = memory_manager.GpuToCpuAddress(program_addr);
251 ASSERT(cpu_addr);
252
253 auto code = GetShaderCode(memory_manager, program_addr, host_ptr, true);
254 constexpr u32 kernel_main_offset = 0;
255 shader = std::make_shared<CachedShader>(system, Tegra::Engines::ShaderType::Compute,
256 program_addr, *cpu_addr, host_ptr, std::move(code),
257 kernel_main_offset);
258 Register(shader);
259 }
260
261 Specialization specialization;
262 specialization.workgroup_size = key.workgroup_size;
263 specialization.shared_memory_size = key.shared_memory_size;
264
265 const SPIRVShader spirv_shader{
266 Decompile(device, shader->GetIR(), ShaderType::Compute, specialization),
267 shader->GetEntries()};
268 entry = std::make_unique<VKComputePipeline>(device, scheduler, descriptor_pool,
269 update_descriptor_queue, spirv_shader);
270 return *entry;
271}
272
273void VKPipelineCache::Unregister(const Shader& shader) {
274 bool finished = false;
275 const auto Finish = [&] {
276 // TODO(Rodrigo): Instead of finishing here, wait for the fences that use this pipeline and
277 // flush.
278 if (finished) {
279 return;
280 }
281 finished = true;
282 scheduler.Finish();
283 };
284
285 const GPUVAddr invalidated_addr = shader->GetGpuAddr();
286 for (auto it = graphics_cache.begin(); it != graphics_cache.end();) {
287 auto& entry = it->first;
288 if (std::find(entry.shaders.begin(), entry.shaders.end(), invalidated_addr) ==
289 entry.shaders.end()) {
290 ++it;
291 continue;
292 }
293 Finish();
294 it = graphics_cache.erase(it);
295 }
296 for (auto it = compute_cache.begin(); it != compute_cache.end();) {
297 auto& entry = it->first;
298 if (entry.shader != invalidated_addr) {
299 ++it;
300 continue;
301 }
302 Finish();
303 it = compute_cache.erase(it);
304 }
305
306 RasterizerCache::Unregister(shader);
307}
308
309std::pair<SPIRVProgram, std::vector<vk::DescriptorSetLayoutBinding>>
310VKPipelineCache::DecompileShaders(const GraphicsPipelineCacheKey& key) {
311 const auto& fixed_state = key.fixed_state;
312 auto& memory_manager = system.GPU().MemoryManager();
313 const auto& gpu = system.GPU().Maxwell3D();
314
315 Specialization specialization;
316 specialization.primitive_topology = fixed_state.input_assembly.topology;
317 if (specialization.primitive_topology == Maxwell::PrimitiveTopology::Points) {
318 ASSERT(fixed_state.input_assembly.point_size != 0.0f);
319 specialization.point_size = fixed_state.input_assembly.point_size;
320 }
321 for (std::size_t i = 0; i < Maxwell::NumVertexAttributes; ++i) {
322 specialization.attribute_types[i] = fixed_state.vertex_input.attributes[i].type;
323 }
324 specialization.ndc_minus_one_to_one = fixed_state.rasterizer.ndc_minus_one_to_one;
325 specialization.tessellation.primitive = fixed_state.tessellation.primitive;
326 specialization.tessellation.spacing = fixed_state.tessellation.spacing;
327 specialization.tessellation.clockwise = fixed_state.tessellation.clockwise;
328 for (const auto& rt : key.renderpass_params.color_attachments) {
329 specialization.enabled_rendertargets.set(rt.index);
330 }
331
332 SPIRVProgram program;
333 std::vector<vk::DescriptorSetLayoutBinding> bindings;
334
335 for (std::size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) {
336 const auto program_enum = static_cast<Maxwell::ShaderProgram>(index);
337
338 // Skip stages that are not enabled
339 if (!gpu.regs.IsShaderConfigEnabled(index)) {
340 continue;
341 }
342
343 const GPUVAddr gpu_addr = GetShaderAddress(system, program_enum);
344 const auto host_ptr = memory_manager.GetPointer(gpu_addr);
345 const auto shader = TryGet(host_ptr);
346 ASSERT(shader);
347
348 const std::size_t stage = index == 0 ? 0 : index - 1; // Stage indices are 0 - 5
349 const auto program_type = GetShaderType(program_enum);
350 const auto& entries = shader->GetEntries();
351 program[stage] = {Decompile(device, shader->GetIR(), program_type, specialization),
352 entries};
353
354 if (program_enum == Maxwell::ShaderProgram::VertexA) {
355 // VertexB was combined with VertexA, so we skip the VertexB iteration
356 ++index;
357 }
358
359 const u32 old_binding = specialization.base_binding;
360 specialization.base_binding =
361 FillDescriptorLayout(entries, bindings, program_enum, specialization.base_binding);
362 ASSERT(old_binding + entries.NumBindings() == specialization.base_binding);
363 }
364 return {std::move(program), std::move(bindings)};
365}
366
367void FillDescriptorUpdateTemplateEntries(
368 const VKDevice& device, const ShaderEntries& entries, u32& binding, u32& offset,
369 std::vector<vk::DescriptorUpdateTemplateEntry>& template_entries) {
370 static constexpr auto entry_size = static_cast<u32>(sizeof(DescriptorUpdateEntry));
371 const auto AddEntry = [&](vk::DescriptorType descriptor_type, std::size_t count_) {
372 const u32 count = static_cast<u32>(count_);
373 if (descriptor_type == vk::DescriptorType::eUniformTexelBuffer &&
374 device.GetDriverID() == vk::DriverIdKHR::eNvidiaProprietary) {
375 // Nvidia has a bug where updating multiple uniform texels at once causes the driver to
376 // crash.
377 for (u32 i = 0; i < count; ++i) {
378 template_entries.emplace_back(binding + i, 0, 1, descriptor_type,
379 offset + i * entry_size, entry_size);
380 }
381 } else if (count != 0) {
382 template_entries.emplace_back(binding, 0, count, descriptor_type, offset, entry_size);
383 }
384 offset += count * entry_size;
385 binding += count;
386 };
387
388 AddEntry(vk::DescriptorType::eUniformBuffer, entries.const_buffers.size());
389 AddEntry(vk::DescriptorType::eStorageBuffer, entries.global_buffers.size());
390 AddEntry(vk::DescriptorType::eUniformTexelBuffer, entries.texel_buffers.size());
391 AddEntry(vk::DescriptorType::eCombinedImageSampler, entries.samplers.size());
392 AddEntry(vk::DescriptorType::eStorageImage, entries.images.size());
393}
394
395} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.h b/src/video_core/renderer_vulkan/vk_pipeline_cache.h
new file mode 100644
index 000000000..8678fc9c3
--- /dev/null
+++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.h
@@ -0,0 +1,200 @@
1// Copyright 2019 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <array>
8#include <cstddef>
9#include <memory>
10#include <tuple>
11#include <type_traits>
12#include <unordered_map>
13#include <utility>
14#include <vector>
15
16#include <boost/functional/hash.hpp>
17
18#include "common/common_types.h"
19#include "video_core/engines/const_buffer_engine_interface.h"
20#include "video_core/engines/maxwell_3d.h"
21#include "video_core/rasterizer_cache.h"
22#include "video_core/renderer_vulkan/declarations.h"
23#include "video_core/renderer_vulkan/fixed_pipeline_state.h"
24#include "video_core/renderer_vulkan/vk_graphics_pipeline.h"
25#include "video_core/renderer_vulkan/vk_renderpass_cache.h"
26#include "video_core/renderer_vulkan/vk_resource_manager.h"
27#include "video_core/renderer_vulkan/vk_shader_decompiler.h"
28#include "video_core/shader/const_buffer_locker.h"
29#include "video_core/shader/shader_ir.h"
30#include "video_core/surface.h"
31
32namespace Core {
33class System;
34}
35
36namespace Vulkan {
37
38class RasterizerVulkan;
39class VKComputePipeline;
40class VKDescriptorPool;
41class VKDevice;
42class VKFence;
43class VKScheduler;
44class VKUpdateDescriptorQueue;
45
46class CachedShader;
47using Shader = std::shared_ptr<CachedShader>;
48using Maxwell = Tegra::Engines::Maxwell3D::Regs;
49
50using ProgramCode = std::vector<u64>;
51
52struct GraphicsPipelineCacheKey {
53 FixedPipelineState fixed_state;
54 std::array<GPUVAddr, Maxwell::MaxShaderProgram> shaders;
55 RenderPassParams renderpass_params;
56
57 std::size_t Hash() const noexcept {
58 std::size_t hash = fixed_state.Hash();
59 for (const auto& shader : shaders) {
60 boost::hash_combine(hash, shader);
61 }
62 boost::hash_combine(hash, renderpass_params.Hash());
63 return hash;
64 }
65
66 bool operator==(const GraphicsPipelineCacheKey& rhs) const noexcept {
67 return std::tie(fixed_state, shaders, renderpass_params) ==
68 std::tie(rhs.fixed_state, rhs.shaders, rhs.renderpass_params);
69 }
70};
71
72struct ComputePipelineCacheKey {
73 GPUVAddr shader{};
74 u32 shared_memory_size{};
75 std::array<u32, 3> workgroup_size{};
76
77 std::size_t Hash() const noexcept {
78 return static_cast<std::size_t>(shader) ^
79 ((static_cast<std::size_t>(shared_memory_size) >> 7) << 40) ^
80 static_cast<std::size_t>(workgroup_size[0]) ^
81 (static_cast<std::size_t>(workgroup_size[1]) << 16) ^
82 (static_cast<std::size_t>(workgroup_size[2]) << 24);
83 }
84
85 bool operator==(const ComputePipelineCacheKey& rhs) const noexcept {
86 return std::tie(shader, shared_memory_size, workgroup_size) ==
87 std::tie(rhs.shader, rhs.shared_memory_size, rhs.workgroup_size);
88 }
89};
90
91} // namespace Vulkan
92
93namespace std {
94
95template <>
96struct hash<Vulkan::GraphicsPipelineCacheKey> {
97 std::size_t operator()(const Vulkan::GraphicsPipelineCacheKey& k) const noexcept {
98 return k.Hash();
99 }
100};
101
102template <>
103struct hash<Vulkan::ComputePipelineCacheKey> {
104 std::size_t operator()(const Vulkan::ComputePipelineCacheKey& k) const noexcept {
105 return k.Hash();
106 }
107};
108
109} // namespace std
110
111namespace Vulkan {
112
113class CachedShader final : public RasterizerCacheObject {
114public:
115 explicit CachedShader(Core::System& system, Tegra::Engines::ShaderType stage, GPUVAddr gpu_addr,
116 VAddr cpu_addr, u8* host_ptr, ProgramCode program_code, u32 main_offset);
117 ~CachedShader();
118
119 GPUVAddr GetGpuAddr() const {
120 return gpu_addr;
121 }
122
123 VAddr GetCpuAddr() const override {
124 return cpu_addr;
125 }
126
127 std::size_t GetSizeInBytes() const override {
128 return program_code.size() * sizeof(u64);
129 }
130
131 VideoCommon::Shader::ShaderIR& GetIR() {
132 return shader_ir;
133 }
134
135 const VideoCommon::Shader::ShaderIR& GetIR() const {
136 return shader_ir;
137 }
138
139 const ShaderEntries& GetEntries() const {
140 return entries;
141 }
142
143private:
144 static Tegra::Engines::ConstBufferEngineInterface& GetEngine(Core::System& system,
145 Tegra::Engines::ShaderType stage);
146
147 GPUVAddr gpu_addr{};
148 VAddr cpu_addr{};
149 ProgramCode program_code;
150 VideoCommon::Shader::ConstBufferLocker locker;
151 VideoCommon::Shader::ShaderIR shader_ir;
152 ShaderEntries entries;
153};
154
155class VKPipelineCache final : public RasterizerCache<Shader> {
156public:
157 explicit VKPipelineCache(Core::System& system, RasterizerVulkan& rasterizer,
158 const VKDevice& device, VKScheduler& scheduler,
159 VKDescriptorPool& descriptor_pool,
160 VKUpdateDescriptorQueue& update_descriptor_queue);
161 ~VKPipelineCache();
162
163 std::array<Shader, Maxwell::MaxShaderProgram> GetShaders();
164
165 VKGraphicsPipeline& GetGraphicsPipeline(const GraphicsPipelineCacheKey& key);
166
167 VKComputePipeline& GetComputePipeline(const ComputePipelineCacheKey& key);
168
169protected:
170 void Unregister(const Shader& shader) override;
171
172 void FlushObjectInner(const Shader& object) override {}
173
174private:
175 std::pair<SPIRVProgram, std::vector<vk::DescriptorSetLayoutBinding>> DecompileShaders(
176 const GraphicsPipelineCacheKey& key);
177
178 Core::System& system;
179 const VKDevice& device;
180 VKScheduler& scheduler;
181 VKDescriptorPool& descriptor_pool;
182 VKUpdateDescriptorQueue& update_descriptor_queue;
183
184 VKRenderPassCache renderpass_cache;
185
186 std::array<Shader, Maxwell::MaxShaderProgram> last_shaders;
187
188 GraphicsPipelineCacheKey last_graphics_key;
189 VKGraphicsPipeline* last_graphics_pipeline = nullptr;
190
191 std::unordered_map<GraphicsPipelineCacheKey, std::unique_ptr<VKGraphicsPipeline>>
192 graphics_cache;
193 std::unordered_map<ComputePipelineCacheKey, std::unique_ptr<VKComputePipeline>> compute_cache;
194};
195
196void FillDescriptorUpdateTemplateEntries(
197 const VKDevice& device, const ShaderEntries& entries, u32& binding, u32& offset,
198 std::vector<vk::DescriptorUpdateTemplateEntry>& template_entries);
199
200} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp
new file mode 100644
index 000000000..d2c6b1189
--- /dev/null
+++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp
@@ -0,0 +1,1141 @@
1// Copyright 2019 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <algorithm>
6#include <array>
7#include <memory>
8#include <mutex>
9#include <vector>
10
11#include <boost/container/static_vector.hpp>
12#include <boost/functional/hash.hpp>
13
14#include "common/alignment.h"
15#include "common/assert.h"
16#include "common/logging/log.h"
17#include "common/microprofile.h"
18#include "core/core.h"
19#include "core/memory.h"
20#include "video_core/engines/kepler_compute.h"
21#include "video_core/engines/maxwell_3d.h"
22#include "video_core/renderer_vulkan/declarations.h"
23#include "video_core/renderer_vulkan/fixed_pipeline_state.h"
24#include "video_core/renderer_vulkan/maxwell_to_vk.h"
25#include "video_core/renderer_vulkan/renderer_vulkan.h"
26#include "video_core/renderer_vulkan/vk_buffer_cache.h"
27#include "video_core/renderer_vulkan/vk_compute_pass.h"
28#include "video_core/renderer_vulkan/vk_compute_pipeline.h"
29#include "video_core/renderer_vulkan/vk_descriptor_pool.h"
30#include "video_core/renderer_vulkan/vk_device.h"
31#include "video_core/renderer_vulkan/vk_graphics_pipeline.h"
32#include "video_core/renderer_vulkan/vk_pipeline_cache.h"
33#include "video_core/renderer_vulkan/vk_rasterizer.h"
34#include "video_core/renderer_vulkan/vk_renderpass_cache.h"
35#include "video_core/renderer_vulkan/vk_resource_manager.h"
36#include "video_core/renderer_vulkan/vk_sampler_cache.h"
37#include "video_core/renderer_vulkan/vk_scheduler.h"
38#include "video_core/renderer_vulkan/vk_staging_buffer_pool.h"
39#include "video_core/renderer_vulkan/vk_texture_cache.h"
40#include "video_core/renderer_vulkan/vk_update_descriptor.h"
41
42namespace Vulkan {
43
44using Maxwell = Tegra::Engines::Maxwell3D::Regs;
45
46MICROPROFILE_DEFINE(Vulkan_WaitForWorker, "Vulkan", "Wait for worker", MP_RGB(255, 192, 192));
47MICROPROFILE_DEFINE(Vulkan_Drawing, "Vulkan", "Record drawing", MP_RGB(192, 128, 128));
48MICROPROFILE_DEFINE(Vulkan_Compute, "Vulkan", "Record compute", MP_RGB(192, 128, 128));
49MICROPROFILE_DEFINE(Vulkan_Clearing, "Vulkan", "Record clearing", MP_RGB(192, 128, 128));
50MICROPROFILE_DEFINE(Vulkan_Geometry, "Vulkan", "Setup geometry", MP_RGB(192, 128, 128));
51MICROPROFILE_DEFINE(Vulkan_ConstBuffers, "Vulkan", "Setup constant buffers", MP_RGB(192, 128, 128));
52MICROPROFILE_DEFINE(Vulkan_GlobalBuffers, "Vulkan", "Setup global buffers", MP_RGB(192, 128, 128));
53MICROPROFILE_DEFINE(Vulkan_RenderTargets, "Vulkan", "Setup render targets", MP_RGB(192, 128, 128));
54MICROPROFILE_DEFINE(Vulkan_Textures, "Vulkan", "Setup textures", MP_RGB(192, 128, 128));
55MICROPROFILE_DEFINE(Vulkan_Images, "Vulkan", "Setup images", MP_RGB(192, 128, 128));
56MICROPROFILE_DEFINE(Vulkan_PipelineCache, "Vulkan", "Pipeline cache", MP_RGB(192, 128, 128));
57
58namespace {
59
60constexpr auto ComputeShaderIndex = static_cast<std::size_t>(Tegra::Engines::ShaderType::Compute);
61
62vk::Viewport GetViewportState(const VKDevice& device, const Maxwell& regs, std::size_t index) {
63 const auto& viewport = regs.viewport_transform[index];
64 const float x = viewport.translate_x - viewport.scale_x;
65 const float y = viewport.translate_y - viewport.scale_y;
66 const float width = viewport.scale_x * 2.0f;
67 const float height = viewport.scale_y * 2.0f;
68
69 const float reduce_z = regs.depth_mode == Maxwell::DepthMode::MinusOneToOne;
70 float near = viewport.translate_z - viewport.scale_z * reduce_z;
71 float far = viewport.translate_z + viewport.scale_z;
72 if (!device.IsExtDepthRangeUnrestrictedSupported()) {
73 near = std::clamp(near, 0.0f, 1.0f);
74 far = std::clamp(far, 0.0f, 1.0f);
75 }
76
77 return vk::Viewport(x, y, width != 0 ? width : 1.0f, height != 0 ? height : 1.0f, near, far);
78}
79
80constexpr vk::Rect2D GetScissorState(const Maxwell& regs, std::size_t index) {
81 const auto& scissor = regs.scissor_test[index];
82 if (!scissor.enable) {
83 return {{0, 0}, {INT32_MAX, INT32_MAX}};
84 }
85 const u32 width = scissor.max_x - scissor.min_x;
86 const u32 height = scissor.max_y - scissor.min_y;
87 return {{static_cast<s32>(scissor.min_x), static_cast<s32>(scissor.min_y)}, {width, height}};
88}
89
90std::array<GPUVAddr, Maxwell::MaxShaderProgram> GetShaderAddresses(
91 const std::array<Shader, Maxwell::MaxShaderProgram>& shaders) {
92 std::array<GPUVAddr, Maxwell::MaxShaderProgram> addresses;
93 for (std::size_t i = 0; i < std::size(addresses); ++i) {
94 addresses[i] = shaders[i] ? shaders[i]->GetGpuAddr() : 0;
95 }
96 return addresses;
97}
98
99void TransitionImages(const std::vector<ImageView>& views, vk::PipelineStageFlags pipeline_stage,
100 vk::AccessFlags access) {
101 for (auto& [view, layout] : views) {
102 view->Transition(*layout, pipeline_stage, access);
103 }
104}
105
106template <typename Engine, typename Entry>
107Tegra::Texture::FullTextureInfo GetTextureInfo(const Engine& engine, const Entry& entry,
108 std::size_t stage) {
109 const auto stage_type = static_cast<Tegra::Engines::ShaderType>(stage);
110 if (entry.IsBindless()) {
111 const Tegra::Texture::TextureHandle tex_handle =
112 engine.AccessConstBuffer32(stage_type, entry.GetBuffer(), entry.GetOffset());
113 return engine.GetTextureInfo(tex_handle);
114 }
115 if constexpr (std::is_same_v<Engine, Tegra::Engines::Maxwell3D>) {
116 return engine.GetStageTexture(stage_type, entry.GetOffset());
117 } else {
118 return engine.GetTexture(entry.GetOffset());
119 }
120}
121
122} // Anonymous namespace
123
124class BufferBindings final {
125public:
126 void AddVertexBinding(const vk::Buffer* buffer, vk::DeviceSize offset) {
127 vertex.buffer_ptrs[vertex.num_buffers] = buffer;
128 vertex.offsets[vertex.num_buffers] = offset;
129 ++vertex.num_buffers;
130 }
131
132 void SetIndexBinding(const vk::Buffer* buffer, vk::DeviceSize offset, vk::IndexType type) {
133 index.buffer = buffer;
134 index.offset = offset;
135 index.type = type;
136 }
137
138 void Bind(VKScheduler& scheduler) const {
139 // Use this large switch case to avoid dispatching more memory in the record lambda than
140 // what we need. It looks horrible, but it's the best we can do on standard C++.
141 switch (vertex.num_buffers) {
142 case 0:
143 return BindStatic<0>(scheduler);
144 case 1:
145 return BindStatic<1>(scheduler);
146 case 2:
147 return BindStatic<2>(scheduler);
148 case 3:
149 return BindStatic<3>(scheduler);
150 case 4:
151 return BindStatic<4>(scheduler);
152 case 5:
153 return BindStatic<5>(scheduler);
154 case 6:
155 return BindStatic<6>(scheduler);
156 case 7:
157 return BindStatic<7>(scheduler);
158 case 8:
159 return BindStatic<8>(scheduler);
160 case 9:
161 return BindStatic<9>(scheduler);
162 case 10:
163 return BindStatic<10>(scheduler);
164 case 11:
165 return BindStatic<11>(scheduler);
166 case 12:
167 return BindStatic<12>(scheduler);
168 case 13:
169 return BindStatic<13>(scheduler);
170 case 14:
171 return BindStatic<14>(scheduler);
172 case 15:
173 return BindStatic<15>(scheduler);
174 case 16:
175 return BindStatic<16>(scheduler);
176 case 17:
177 return BindStatic<17>(scheduler);
178 case 18:
179 return BindStatic<18>(scheduler);
180 case 19:
181 return BindStatic<19>(scheduler);
182 case 20:
183 return BindStatic<20>(scheduler);
184 case 21:
185 return BindStatic<21>(scheduler);
186 case 22:
187 return BindStatic<22>(scheduler);
188 case 23:
189 return BindStatic<23>(scheduler);
190 case 24:
191 return BindStatic<24>(scheduler);
192 case 25:
193 return BindStatic<25>(scheduler);
194 case 26:
195 return BindStatic<26>(scheduler);
196 case 27:
197 return BindStatic<27>(scheduler);
198 case 28:
199 return BindStatic<28>(scheduler);
200 case 29:
201 return BindStatic<29>(scheduler);
202 case 30:
203 return BindStatic<30>(scheduler);
204 case 31:
205 return BindStatic<31>(scheduler);
206 case 32:
207 return BindStatic<32>(scheduler);
208 }
209 UNREACHABLE();
210 }
211
212private:
213 // Some of these fields are intentionally left uninitialized to avoid initializing them twice.
214 struct {
215 std::size_t num_buffers = 0;
216 std::array<const vk::Buffer*, Maxwell::NumVertexArrays> buffer_ptrs;
217 std::array<vk::DeviceSize, Maxwell::NumVertexArrays> offsets;
218 } vertex;
219
220 struct {
221 const vk::Buffer* buffer = nullptr;
222 vk::DeviceSize offset;
223 vk::IndexType type;
224 } index;
225
226 template <std::size_t N>
227 void BindStatic(VKScheduler& scheduler) const {
228 if (index.buffer != nullptr) {
229 BindStatic<N, true>(scheduler);
230 } else {
231 BindStatic<N, false>(scheduler);
232 }
233 }
234
235 template <std::size_t N, bool is_indexed>
236 void BindStatic(VKScheduler& scheduler) const {
237 static_assert(N <= Maxwell::NumVertexArrays);
238 if constexpr (N == 0) {
239 return;
240 }
241
242 std::array<vk::Buffer, N> buffers;
243 std::transform(vertex.buffer_ptrs.begin(), vertex.buffer_ptrs.begin() + N, buffers.begin(),
244 [](const auto ptr) { return *ptr; });
245
246 std::array<vk::DeviceSize, N> offsets;
247 std::copy(vertex.offsets.begin(), vertex.offsets.begin() + N, offsets.begin());
248
249 if constexpr (is_indexed) {
250 // Indexed draw
251 scheduler.Record([buffers, offsets, index_buffer = *index.buffer,
252 index_offset = index.offset,
253 index_type = index.type](auto cmdbuf, auto& dld) {
254 cmdbuf.bindIndexBuffer(index_buffer, index_offset, index_type, dld);
255 cmdbuf.bindVertexBuffers(0, static_cast<u32>(N), buffers.data(), offsets.data(),
256 dld);
257 });
258 } else {
259 // Array draw
260 scheduler.Record([buffers, offsets](auto cmdbuf, auto& dld) {
261 cmdbuf.bindVertexBuffers(0, static_cast<u32>(N), buffers.data(), offsets.data(),
262 dld);
263 });
264 }
265 }
266};
267
268void RasterizerVulkan::DrawParameters::Draw(vk::CommandBuffer cmdbuf,
269 const vk::DispatchLoaderDynamic& dld) const {
270 if (is_indexed) {
271 cmdbuf.drawIndexed(num_vertices, num_instances, 0, base_vertex, base_instance, dld);
272 } else {
273 cmdbuf.draw(num_vertices, num_instances, base_vertex, base_instance, dld);
274 }
275}
276
277RasterizerVulkan::RasterizerVulkan(Core::System& system, Core::Frontend::EmuWindow& renderer,
278 VKScreenInfo& screen_info, const VKDevice& device,
279 VKResourceManager& resource_manager,
280 VKMemoryManager& memory_manager, VKScheduler& scheduler)
281 : RasterizerAccelerated{system.Memory()}, system{system}, render_window{renderer},
282 screen_info{screen_info}, device{device}, resource_manager{resource_manager},
283 memory_manager{memory_manager}, scheduler{scheduler},
284 staging_pool(device, memory_manager, scheduler), descriptor_pool(device),
285 update_descriptor_queue(device, scheduler),
286 quad_array_pass(device, scheduler, descriptor_pool, staging_pool, update_descriptor_queue),
287 uint8_pass(device, scheduler, descriptor_pool, staging_pool, update_descriptor_queue),
288 texture_cache(system, *this, device, resource_manager, memory_manager, scheduler,
289 staging_pool),
290 pipeline_cache(system, *this, device, scheduler, descriptor_pool, update_descriptor_queue),
291 buffer_cache(*this, system, device, memory_manager, scheduler, staging_pool),
292 sampler_cache(device) {}
293
294RasterizerVulkan::~RasterizerVulkan() = default;
295
296bool RasterizerVulkan::DrawBatch(bool is_indexed) {
297 Draw(is_indexed, false);
298 return true;
299}
300
301bool RasterizerVulkan::DrawMultiBatch(bool is_indexed) {
302 Draw(is_indexed, true);
303 return true;
304}
305
306void RasterizerVulkan::Draw(bool is_indexed, bool is_instanced) {
307 MICROPROFILE_SCOPE(Vulkan_Drawing);
308
309 FlushWork();
310
311 const auto& gpu = system.GPU().Maxwell3D();
312 GraphicsPipelineCacheKey key{GetFixedPipelineState(gpu.regs)};
313
314 buffer_cache.Map(CalculateGraphicsStreamBufferSize(is_indexed));
315
316 BufferBindings buffer_bindings;
317 const DrawParameters draw_params =
318 SetupGeometry(key.fixed_state, buffer_bindings, is_indexed, is_instanced);
319
320 update_descriptor_queue.Acquire();
321 sampled_views.clear();
322 image_views.clear();
323
324 const auto shaders = pipeline_cache.GetShaders();
325 key.shaders = GetShaderAddresses(shaders);
326 SetupShaderDescriptors(shaders);
327
328 buffer_cache.Unmap();
329
330 const auto texceptions = UpdateAttachments();
331 SetupImageTransitions(texceptions, color_attachments, zeta_attachment);
332
333 key.renderpass_params = GetRenderPassParams(texceptions);
334
335 auto& pipeline = pipeline_cache.GetGraphicsPipeline(key);
336 scheduler.BindGraphicsPipeline(pipeline.GetHandle());
337
338 const auto renderpass = pipeline.GetRenderPass();
339 const auto [framebuffer, render_area] = ConfigureFramebuffers(renderpass);
340 scheduler.RequestRenderpass({renderpass, framebuffer, {{0, 0}, render_area}, 0, nullptr});
341
342 UpdateDynamicStates();
343
344 buffer_bindings.Bind(scheduler);
345
346 if (device.IsNvDeviceDiagnosticCheckpoints()) {
347 scheduler.Record(
348 [&pipeline](auto cmdbuf, auto& dld) { cmdbuf.setCheckpointNV(&pipeline, dld); });
349 }
350
351 const auto pipeline_layout = pipeline.GetLayout();
352 const auto descriptor_set = pipeline.CommitDescriptorSet();
353 scheduler.Record([pipeline_layout, descriptor_set, draw_params](auto cmdbuf, auto& dld) {
354 if (descriptor_set) {
355 cmdbuf.bindDescriptorSets(vk::PipelineBindPoint::eGraphics, pipeline_layout,
356 DESCRIPTOR_SET, 1, &descriptor_set, 0, nullptr, dld);
357 }
358 draw_params.Draw(cmdbuf, dld);
359 });
360}
361
362void RasterizerVulkan::Clear() {
363 MICROPROFILE_SCOPE(Vulkan_Clearing);
364
365 const auto& gpu = system.GPU().Maxwell3D();
366 if (!system.GPU().Maxwell3D().ShouldExecute()) {
367 return;
368 }
369
370 const auto& regs = gpu.regs;
371 const bool use_color = regs.clear_buffers.R || regs.clear_buffers.G || regs.clear_buffers.B ||
372 regs.clear_buffers.A;
373 const bool use_depth = regs.clear_buffers.Z;
374 const bool use_stencil = regs.clear_buffers.S;
375 if (!use_color && !use_depth && !use_stencil) {
376 return;
377 }
378 // Clearing images requires to be out of a renderpass
379 scheduler.RequestOutsideRenderPassOperationContext();
380
381 // TODO(Rodrigo): Implement clears rendering a quad or using beginning a renderpass.
382
383 if (use_color) {
384 View color_view;
385 {
386 MICROPROFILE_SCOPE(Vulkan_RenderTargets);
387 color_view = texture_cache.GetColorBufferSurface(regs.clear_buffers.RT.Value(), false);
388 }
389
390 color_view->Transition(vk::ImageLayout::eTransferDstOptimal,
391 vk::PipelineStageFlagBits::eTransfer,
392 vk::AccessFlagBits::eTransferWrite);
393
394 const std::array clear_color = {regs.clear_color[0], regs.clear_color[1],
395 regs.clear_color[2], regs.clear_color[3]};
396 const vk::ClearColorValue clear(clear_color);
397 scheduler.Record([image = color_view->GetImage(),
398 subresource = color_view->GetImageSubresourceRange(),
399 clear](auto cmdbuf, auto& dld) {
400 cmdbuf.clearColorImage(image, vk::ImageLayout::eTransferDstOptimal, clear, subresource,
401 dld);
402 });
403 }
404 if (use_depth || use_stencil) {
405 View zeta_surface;
406 {
407 MICROPROFILE_SCOPE(Vulkan_RenderTargets);
408 zeta_surface = texture_cache.GetDepthBufferSurface(false);
409 }
410
411 zeta_surface->Transition(vk::ImageLayout::eTransferDstOptimal,
412 vk::PipelineStageFlagBits::eTransfer,
413 vk::AccessFlagBits::eTransferWrite);
414
415 const vk::ClearDepthStencilValue clear(regs.clear_depth,
416 static_cast<u32>(regs.clear_stencil));
417 scheduler.Record([image = zeta_surface->GetImage(),
418 subresource = zeta_surface->GetImageSubresourceRange(),
419 clear](auto cmdbuf, auto& dld) {
420 cmdbuf.clearDepthStencilImage(image, vk::ImageLayout::eTransferDstOptimal, clear,
421 subresource, dld);
422 });
423 }
424}
425
426void RasterizerVulkan::DispatchCompute(GPUVAddr code_addr) {
427 MICROPROFILE_SCOPE(Vulkan_Compute);
428 update_descriptor_queue.Acquire();
429 sampled_views.clear();
430 image_views.clear();
431
432 const auto& launch_desc = system.GPU().KeplerCompute().launch_description;
433 const ComputePipelineCacheKey key{
434 code_addr,
435 launch_desc.shared_alloc,
436 {launch_desc.block_dim_x, launch_desc.block_dim_y, launch_desc.block_dim_z}};
437 auto& pipeline = pipeline_cache.GetComputePipeline(key);
438
439 // Compute dispatches can't be executed inside a renderpass
440 scheduler.RequestOutsideRenderPassOperationContext();
441
442 buffer_cache.Map(CalculateComputeStreamBufferSize());
443
444 const auto& entries = pipeline.GetEntries();
445 SetupComputeConstBuffers(entries);
446 SetupComputeGlobalBuffers(entries);
447 SetupComputeTexelBuffers(entries);
448 SetupComputeTextures(entries);
449 SetupComputeImages(entries);
450
451 buffer_cache.Unmap();
452
453 TransitionImages(sampled_views, vk::PipelineStageFlagBits::eComputeShader,
454 vk::AccessFlagBits::eShaderRead);
455 TransitionImages(image_views, vk::PipelineStageFlagBits::eComputeShader,
456 vk::AccessFlagBits::eShaderRead | vk::AccessFlagBits::eShaderWrite);
457
458 if (device.IsNvDeviceDiagnosticCheckpoints()) {
459 scheduler.Record(
460 [&pipeline](auto cmdbuf, auto& dld) { cmdbuf.setCheckpointNV(nullptr, dld); });
461 }
462
463 scheduler.Record([grid_x = launch_desc.grid_dim_x, grid_y = launch_desc.grid_dim_y,
464 grid_z = launch_desc.grid_dim_z, pipeline_handle = pipeline.GetHandle(),
465 layout = pipeline.GetLayout(),
466 descriptor_set = pipeline.CommitDescriptorSet()](auto cmdbuf, auto& dld) {
467 cmdbuf.bindPipeline(vk::PipelineBindPoint::eCompute, pipeline_handle, dld);
468 cmdbuf.bindDescriptorSets(vk::PipelineBindPoint::eCompute, layout, DESCRIPTOR_SET, 1,
469 &descriptor_set, 0, nullptr, dld);
470 cmdbuf.dispatch(grid_x, grid_y, grid_z, dld);
471 });
472}
473
474void RasterizerVulkan::FlushAll() {}
475
476void RasterizerVulkan::FlushRegion(CacheAddr addr, u64 size) {
477 texture_cache.FlushRegion(addr, size);
478 buffer_cache.FlushRegion(addr, size);
479}
480
481void RasterizerVulkan::InvalidateRegion(CacheAddr addr, u64 size) {
482 texture_cache.InvalidateRegion(addr, size);
483 pipeline_cache.InvalidateRegion(addr, size);
484 buffer_cache.InvalidateRegion(addr, size);
485}
486
487void RasterizerVulkan::FlushAndInvalidateRegion(CacheAddr addr, u64 size) {
488 FlushRegion(addr, size);
489 InvalidateRegion(addr, size);
490}
491
492void RasterizerVulkan::FlushCommands() {
493 if (draw_counter > 0) {
494 draw_counter = 0;
495 scheduler.Flush();
496 }
497}
498
499void RasterizerVulkan::TickFrame() {
500 draw_counter = 0;
501 update_descriptor_queue.TickFrame();
502 buffer_cache.TickFrame();
503 staging_pool.TickFrame();
504}
505
506bool RasterizerVulkan::AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Regs::Surface& src,
507 const Tegra::Engines::Fermi2D::Regs::Surface& dst,
508 const Tegra::Engines::Fermi2D::Config& copy_config) {
509 texture_cache.DoFermiCopy(src, dst, copy_config);
510 return true;
511}
512
513bool RasterizerVulkan::AccelerateDisplay(const Tegra::FramebufferConfig& config,
514 VAddr framebuffer_addr, u32 pixel_stride) {
515 if (!framebuffer_addr) {
516 return false;
517 }
518
519 const u8* host_ptr{system.Memory().GetPointer(framebuffer_addr)};
520 const auto surface{texture_cache.TryFindFramebufferSurface(host_ptr)};
521 if (!surface) {
522 return false;
523 }
524
525 // Verify that the cached surface is the same size and format as the requested framebuffer
526 const auto& params{surface->GetSurfaceParams()};
527 const auto& pixel_format{
528 VideoCore::Surface::PixelFormatFromGPUPixelFormat(config.pixel_format)};
529 ASSERT_MSG(params.width == config.width, "Framebuffer width is different");
530 ASSERT_MSG(params.height == config.height, "Framebuffer height is different");
531
532 screen_info.image = &surface->GetImage();
533 screen_info.width = params.width;
534 screen_info.height = params.height;
535 screen_info.is_srgb = surface->GetSurfaceParams().srgb_conversion;
536 return true;
537}
538
539void RasterizerVulkan::FlushWork() {
540 static constexpr u32 DRAWS_TO_DISPATCH = 4096;
541
542 // Only check multiples of 8 draws
543 static_assert(DRAWS_TO_DISPATCH % 8 == 0);
544 if ((++draw_counter & 7) != 7) {
545 return;
546 }
547
548 if (draw_counter < DRAWS_TO_DISPATCH) {
549 // Send recorded tasks to the worker thread
550 scheduler.DispatchWork();
551 return;
552 }
553
554 // Otherwise (every certain number of draws) flush execution.
555 // This submits commands to the Vulkan driver.
556 scheduler.Flush();
557 draw_counter = 0;
558}
559
560RasterizerVulkan::Texceptions RasterizerVulkan::UpdateAttachments() {
561 MICROPROFILE_SCOPE(Vulkan_RenderTargets);
562 auto& dirty = system.GPU().Maxwell3D().dirty;
563 const bool update_rendertargets = dirty.render_settings;
564 dirty.render_settings = false;
565
566 texture_cache.GuardRenderTargets(true);
567
568 Texceptions texceptions;
569 for (std::size_t rt = 0; rt < Maxwell::NumRenderTargets; ++rt) {
570 if (update_rendertargets) {
571 color_attachments[rt] = texture_cache.GetColorBufferSurface(rt, true);
572 }
573 if (color_attachments[rt] && WalkAttachmentOverlaps(*color_attachments[rt])) {
574 texceptions.set(rt);
575 }
576 }
577
578 if (update_rendertargets) {
579 zeta_attachment = texture_cache.GetDepthBufferSurface(true);
580 }
581 if (zeta_attachment && WalkAttachmentOverlaps(*zeta_attachment)) {
582 texceptions.set(ZETA_TEXCEPTION_INDEX);
583 }
584
585 texture_cache.GuardRenderTargets(false);
586
587 return texceptions;
588}
589
590bool RasterizerVulkan::WalkAttachmentOverlaps(const CachedSurfaceView& attachment) {
591 bool overlap = false;
592 for (auto& [view, layout] : sampled_views) {
593 if (!attachment.IsSameSurface(*view)) {
594 continue;
595 }
596 overlap = true;
597 *layout = vk::ImageLayout::eGeneral;
598 }
599 return overlap;
600}
601
602std::tuple<vk::Framebuffer, vk::Extent2D> RasterizerVulkan::ConfigureFramebuffers(
603 vk::RenderPass renderpass) {
604 FramebufferCacheKey key{renderpass, std::numeric_limits<u32>::max(),
605 std::numeric_limits<u32>::max()};
606
607 const auto MarkAsModifiedAndPush = [&](const View& view) {
608 if (view == nullptr) {
609 return false;
610 }
611 key.views.push_back(view->GetHandle());
612 key.width = std::min(key.width, view->GetWidth());
613 key.height = std::min(key.height, view->GetHeight());
614 return true;
615 };
616
617 for (std::size_t index = 0; index < std::size(color_attachments); ++index) {
618 if (MarkAsModifiedAndPush(color_attachments[index])) {
619 texture_cache.MarkColorBufferInUse(index);
620 }
621 }
622 if (MarkAsModifiedAndPush(zeta_attachment)) {
623 texture_cache.MarkDepthBufferInUse();
624 }
625
626 const auto [fbentry, is_cache_miss] = framebuffer_cache.try_emplace(key);
627 auto& framebuffer = fbentry->second;
628 if (is_cache_miss) {
629 const vk::FramebufferCreateInfo framebuffer_ci({}, key.renderpass,
630 static_cast<u32>(key.views.size()),
631 key.views.data(), key.width, key.height, 1);
632 const auto dev = device.GetLogical();
633 const auto& dld = device.GetDispatchLoader();
634 framebuffer = dev.createFramebufferUnique(framebuffer_ci, nullptr, dld);
635 }
636
637 return {*framebuffer, vk::Extent2D{key.width, key.height}};
638}
639
640RasterizerVulkan::DrawParameters RasterizerVulkan::SetupGeometry(FixedPipelineState& fixed_state,
641 BufferBindings& buffer_bindings,
642 bool is_indexed,
643 bool is_instanced) {
644 MICROPROFILE_SCOPE(Vulkan_Geometry);
645
646 const auto& gpu = system.GPU().Maxwell3D();
647 const auto& regs = gpu.regs;
648
649 SetupVertexArrays(fixed_state.vertex_input, buffer_bindings);
650
651 const u32 base_instance = regs.vb_base_instance;
652 const u32 num_instances = is_instanced ? gpu.mme_draw.instance_count : 1;
653 const u32 base_vertex = is_indexed ? regs.vb_element_base : regs.vertex_buffer.first;
654 const u32 num_vertices = is_indexed ? regs.index_array.count : regs.vertex_buffer.count;
655
656 DrawParameters params{base_instance, num_instances, base_vertex, num_vertices, is_indexed};
657 SetupIndexBuffer(buffer_bindings, params, is_indexed);
658
659 return params;
660}
661
662void RasterizerVulkan::SetupShaderDescriptors(
663 const std::array<Shader, Maxwell::MaxShaderProgram>& shaders) {
664 texture_cache.GuardSamplers(true);
665
666 for (std::size_t stage = 0; stage < Maxwell::MaxShaderStage; ++stage) {
667 // Skip VertexA stage
668 const auto& shader = shaders[stage + 1];
669 if (!shader) {
670 continue;
671 }
672 const auto& entries = shader->GetEntries();
673 SetupGraphicsConstBuffers(entries, stage);
674 SetupGraphicsGlobalBuffers(entries, stage);
675 SetupGraphicsTexelBuffers(entries, stage);
676 SetupGraphicsTextures(entries, stage);
677 SetupGraphicsImages(entries, stage);
678 }
679 texture_cache.GuardSamplers(false);
680}
681
682void RasterizerVulkan::SetupImageTransitions(
683 Texceptions texceptions, const std::array<View, Maxwell::NumRenderTargets>& color_attachments,
684 const View& zeta_attachment) {
685 TransitionImages(sampled_views, vk::PipelineStageFlagBits::eAllGraphics,
686 vk::AccessFlagBits::eShaderRead);
687 TransitionImages(image_views, vk::PipelineStageFlagBits::eAllGraphics,
688 vk::AccessFlagBits::eShaderRead | vk::AccessFlagBits::eShaderWrite);
689
690 for (std::size_t rt = 0; rt < std::size(color_attachments); ++rt) {
691 const auto color_attachment = color_attachments[rt];
692 if (color_attachment == nullptr) {
693 continue;
694 }
695 const auto image_layout =
696 texceptions[rt] ? vk::ImageLayout::eGeneral : vk::ImageLayout::eColorAttachmentOptimal;
697 color_attachment->Transition(
698 image_layout, vk::PipelineStageFlagBits::eColorAttachmentOutput,
699 vk::AccessFlagBits::eColorAttachmentRead | vk::AccessFlagBits::eColorAttachmentWrite);
700 }
701
702 if (zeta_attachment != nullptr) {
703 const auto image_layout = texceptions[ZETA_TEXCEPTION_INDEX]
704 ? vk::ImageLayout::eGeneral
705 : vk::ImageLayout::eDepthStencilAttachmentOptimal;
706 zeta_attachment->Transition(image_layout, vk::PipelineStageFlagBits::eLateFragmentTests,
707 vk::AccessFlagBits::eDepthStencilAttachmentRead |
708 vk::AccessFlagBits::eDepthStencilAttachmentWrite);
709 }
710}
711
712void RasterizerVulkan::UpdateDynamicStates() {
713 auto& gpu = system.GPU().Maxwell3D();
714 UpdateViewportsState(gpu);
715 UpdateScissorsState(gpu);
716 UpdateDepthBias(gpu);
717 UpdateBlendConstants(gpu);
718 UpdateDepthBounds(gpu);
719 UpdateStencilFaces(gpu);
720}
721
722void RasterizerVulkan::SetupVertexArrays(FixedPipelineState::VertexInput& vertex_input,
723 BufferBindings& buffer_bindings) {
724 const auto& regs = system.GPU().Maxwell3D().regs;
725
726 for (u32 index = 0; index < static_cast<u32>(Maxwell::NumVertexAttributes); ++index) {
727 const auto& attrib = regs.vertex_attrib_format[index];
728 if (!attrib.IsValid()) {
729 continue;
730 }
731
732 const auto& buffer = regs.vertex_array[attrib.buffer];
733 ASSERT(buffer.IsEnabled());
734
735 vertex_input.attributes[vertex_input.num_attributes++] =
736 FixedPipelineState::VertexAttribute(index, attrib.buffer, attrib.type, attrib.size,
737 attrib.offset);
738 }
739
740 for (u32 index = 0; index < static_cast<u32>(Maxwell::NumVertexArrays); ++index) {
741 const auto& vertex_array = regs.vertex_array[index];
742 if (!vertex_array.IsEnabled()) {
743 continue;
744 }
745
746 const GPUVAddr start{vertex_array.StartAddress()};
747 const GPUVAddr end{regs.vertex_array_limit[index].LimitAddress()};
748
749 ASSERT(end > start);
750 const std::size_t size{end - start + 1};
751 const auto [buffer, offset] = buffer_cache.UploadMemory(start, size);
752
753 vertex_input.bindings[vertex_input.num_bindings++] = FixedPipelineState::VertexBinding(
754 index, vertex_array.stride,
755 regs.instanced_arrays.IsInstancingEnabled(index) ? vertex_array.divisor : 0);
756 buffer_bindings.AddVertexBinding(buffer, offset);
757 }
758}
759
760void RasterizerVulkan::SetupIndexBuffer(BufferBindings& buffer_bindings, DrawParameters& params,
761 bool is_indexed) {
762 const auto& regs = system.GPU().Maxwell3D().regs;
763 switch (regs.draw.topology) {
764 case Maxwell::PrimitiveTopology::Quads:
765 if (params.is_indexed) {
766 UNIMPLEMENTED();
767 } else {
768 const auto [buffer, offset] =
769 quad_array_pass.Assemble(params.num_vertices, params.base_vertex);
770 buffer_bindings.SetIndexBinding(&buffer, offset, vk::IndexType::eUint32);
771 params.base_vertex = 0;
772 params.num_vertices = params.num_vertices * 6 / 4;
773 params.is_indexed = true;
774 }
775 break;
776 default: {
777 if (!is_indexed) {
778 break;
779 }
780 const GPUVAddr gpu_addr = regs.index_array.IndexStart();
781 auto [buffer, offset] = buffer_cache.UploadMemory(gpu_addr, CalculateIndexBufferSize());
782
783 auto format = regs.index_array.format;
784 const bool is_uint8 = format == Maxwell::IndexFormat::UnsignedByte;
785 if (is_uint8 && !device.IsExtIndexTypeUint8Supported()) {
786 std::tie(buffer, offset) = uint8_pass.Assemble(params.num_vertices, *buffer, offset);
787 format = Maxwell::IndexFormat::UnsignedShort;
788 }
789
790 buffer_bindings.SetIndexBinding(buffer, offset, MaxwellToVK::IndexFormat(device, format));
791 break;
792 }
793 }
794}
795
796void RasterizerVulkan::SetupGraphicsConstBuffers(const ShaderEntries& entries, std::size_t stage) {
797 MICROPROFILE_SCOPE(Vulkan_ConstBuffers);
798 const auto& gpu = system.GPU().Maxwell3D();
799 const auto& shader_stage = gpu.state.shader_stages[stage];
800 for (const auto& entry : entries.const_buffers) {
801 SetupConstBuffer(entry, shader_stage.const_buffers[entry.GetIndex()]);
802 }
803}
804
805void RasterizerVulkan::SetupGraphicsGlobalBuffers(const ShaderEntries& entries, std::size_t stage) {
806 MICROPROFILE_SCOPE(Vulkan_GlobalBuffers);
807 auto& gpu{system.GPU()};
808 const auto cbufs{gpu.Maxwell3D().state.shader_stages[stage]};
809
810 for (const auto& entry : entries.global_buffers) {
811 const auto addr = cbufs.const_buffers[entry.GetCbufIndex()].address + entry.GetCbufOffset();
812 SetupGlobalBuffer(entry, addr);
813 }
814}
815
816void RasterizerVulkan::SetupGraphicsTexelBuffers(const ShaderEntries& entries, std::size_t stage) {
817 MICROPROFILE_SCOPE(Vulkan_Textures);
818 const auto& gpu = system.GPU().Maxwell3D();
819 for (const auto& entry : entries.texel_buffers) {
820 const auto image = GetTextureInfo(gpu, entry, stage).tic;
821 SetupTexelBuffer(image, entry);
822 }
823}
824
825void RasterizerVulkan::SetupGraphicsTextures(const ShaderEntries& entries, std::size_t stage) {
826 MICROPROFILE_SCOPE(Vulkan_Textures);
827 const auto& gpu = system.GPU().Maxwell3D();
828 for (const auto& entry : entries.samplers) {
829 const auto texture = GetTextureInfo(gpu, entry, stage);
830 SetupTexture(texture, entry);
831 }
832}
833
834void RasterizerVulkan::SetupGraphicsImages(const ShaderEntries& entries, std::size_t stage) {
835 MICROPROFILE_SCOPE(Vulkan_Images);
836 const auto& gpu = system.GPU().KeplerCompute();
837 for (const auto& entry : entries.images) {
838 const auto tic = GetTextureInfo(gpu, entry, stage).tic;
839 SetupImage(tic, entry);
840 }
841}
842
843void RasterizerVulkan::SetupComputeConstBuffers(const ShaderEntries& entries) {
844 MICROPROFILE_SCOPE(Vulkan_ConstBuffers);
845 const auto& launch_desc = system.GPU().KeplerCompute().launch_description;
846 for (const auto& entry : entries.const_buffers) {
847 const auto& config = launch_desc.const_buffer_config[entry.GetIndex()];
848 const std::bitset<8> mask = launch_desc.const_buffer_enable_mask.Value();
849 Tegra::Engines::ConstBufferInfo buffer;
850 buffer.address = config.Address();
851 buffer.size = config.size;
852 buffer.enabled = mask[entry.GetIndex()];
853 SetupConstBuffer(entry, buffer);
854 }
855}
856
857void RasterizerVulkan::SetupComputeGlobalBuffers(const ShaderEntries& entries) {
858 MICROPROFILE_SCOPE(Vulkan_GlobalBuffers);
859 const auto cbufs{system.GPU().KeplerCompute().launch_description.const_buffer_config};
860 for (const auto& entry : entries.global_buffers) {
861 const auto addr{cbufs[entry.GetCbufIndex()].Address() + entry.GetCbufOffset()};
862 SetupGlobalBuffer(entry, addr);
863 }
864}
865
866void RasterizerVulkan::SetupComputeTexelBuffers(const ShaderEntries& entries) {
867 MICROPROFILE_SCOPE(Vulkan_Textures);
868 const auto& gpu = system.GPU().KeplerCompute();
869 for (const auto& entry : entries.texel_buffers) {
870 const auto image = GetTextureInfo(gpu, entry, ComputeShaderIndex).tic;
871 SetupTexelBuffer(image, entry);
872 }
873}
874
875void RasterizerVulkan::SetupComputeTextures(const ShaderEntries& entries) {
876 MICROPROFILE_SCOPE(Vulkan_Textures);
877 const auto& gpu = system.GPU().KeplerCompute();
878 for (const auto& entry : entries.samplers) {
879 const auto texture = GetTextureInfo(gpu, entry, ComputeShaderIndex);
880 SetupTexture(texture, entry);
881 }
882}
883
884void RasterizerVulkan::SetupComputeImages(const ShaderEntries& entries) {
885 MICROPROFILE_SCOPE(Vulkan_Images);
886 const auto& gpu = system.GPU().KeplerCompute();
887 for (const auto& entry : entries.images) {
888 const auto tic = GetTextureInfo(gpu, entry, ComputeShaderIndex).tic;
889 SetupImage(tic, entry);
890 }
891}
892
893void RasterizerVulkan::SetupConstBuffer(const ConstBufferEntry& entry,
894 const Tegra::Engines::ConstBufferInfo& buffer) {
895 // Align the size to avoid bad std140 interactions
896 const std::size_t size =
897 Common::AlignUp(CalculateConstBufferSize(entry, buffer), 4 * sizeof(float));
898 ASSERT(size <= MaxConstbufferSize);
899
900 const auto [buffer_handle, offset] =
901 buffer_cache.UploadMemory(buffer.address, size, device.GetUniformBufferAlignment());
902
903 update_descriptor_queue.AddBuffer(buffer_handle, offset, size);
904}
905
906void RasterizerVulkan::SetupGlobalBuffer(const GlobalBufferEntry& entry, GPUVAddr address) {
907 auto& memory_manager{system.GPU().MemoryManager()};
908 const auto actual_addr = memory_manager.Read<u64>(address);
909 const auto size = memory_manager.Read<u32>(address + 8);
910
911 if (size == 0) {
912 // Sometimes global memory pointers don't have a proper size. Upload a dummy entry because
913 // Vulkan doesn't like empty buffers.
914 constexpr std::size_t dummy_size = 4;
915 const auto buffer = buffer_cache.GetEmptyBuffer(dummy_size);
916 update_descriptor_queue.AddBuffer(buffer, 0, dummy_size);
917 return;
918 }
919
920 const auto [buffer, offset] = buffer_cache.UploadMemory(
921 actual_addr, size, device.GetStorageBufferAlignment(), entry.IsWritten());
922 update_descriptor_queue.AddBuffer(buffer, offset, size);
923}
924
925void RasterizerVulkan::SetupTexelBuffer(const Tegra::Texture::TICEntry& tic,
926 const TexelBufferEntry& entry) {
927 const auto view = texture_cache.GetTextureSurface(tic, entry);
928 ASSERT(view->IsBufferView());
929
930 update_descriptor_queue.AddTexelBuffer(view->GetBufferView());
931}
932
933void RasterizerVulkan::SetupTexture(const Tegra::Texture::FullTextureInfo& texture,
934 const SamplerEntry& entry) {
935 auto view = texture_cache.GetTextureSurface(texture.tic, entry);
936 ASSERT(!view->IsBufferView());
937
938 const auto image_view = view->GetHandle(texture.tic.x_source, texture.tic.y_source,
939 texture.tic.z_source, texture.tic.w_source);
940 const auto sampler = sampler_cache.GetSampler(texture.tsc);
941 update_descriptor_queue.AddSampledImage(sampler, image_view);
942
943 const auto image_layout = update_descriptor_queue.GetLastImageLayout();
944 *image_layout = vk::ImageLayout::eShaderReadOnlyOptimal;
945 sampled_views.push_back(ImageView{std::move(view), image_layout});
946}
947
948void RasterizerVulkan::SetupImage(const Tegra::Texture::TICEntry& tic, const ImageEntry& entry) {
949 auto view = texture_cache.GetImageSurface(tic, entry);
950
951 if (entry.IsWritten()) {
952 view->MarkAsModified(texture_cache.Tick());
953 }
954
955 UNIMPLEMENTED_IF(tic.IsBuffer());
956
957 const auto image_view = view->GetHandle(tic.x_source, tic.y_source, tic.z_source, tic.w_source);
958 update_descriptor_queue.AddImage(image_view);
959
960 const auto image_layout = update_descriptor_queue.GetLastImageLayout();
961 *image_layout = vk::ImageLayout::eGeneral;
962 image_views.push_back(ImageView{std::move(view), image_layout});
963}
964
965void RasterizerVulkan::UpdateViewportsState(Tegra::Engines::Maxwell3D& gpu) {
966 if (!gpu.dirty.viewport_transform && scheduler.TouchViewports()) {
967 return;
968 }
969 gpu.dirty.viewport_transform = false;
970 const auto& regs = gpu.regs;
971 const std::array viewports{
972 GetViewportState(device, regs, 0), GetViewportState(device, regs, 1),
973 GetViewportState(device, regs, 2), GetViewportState(device, regs, 3),
974 GetViewportState(device, regs, 4), GetViewportState(device, regs, 5),
975 GetViewportState(device, regs, 6), GetViewportState(device, regs, 7),
976 GetViewportState(device, regs, 8), GetViewportState(device, regs, 9),
977 GetViewportState(device, regs, 10), GetViewportState(device, regs, 11),
978 GetViewportState(device, regs, 12), GetViewportState(device, regs, 13),
979 GetViewportState(device, regs, 14), GetViewportState(device, regs, 15)};
980 scheduler.Record([viewports](auto cmdbuf, auto& dld) {
981 cmdbuf.setViewport(0, static_cast<u32>(viewports.size()), viewports.data(), dld);
982 });
983}
984
985void RasterizerVulkan::UpdateScissorsState(Tegra::Engines::Maxwell3D& gpu) {
986 if (!gpu.dirty.scissor_test && scheduler.TouchScissors()) {
987 return;
988 }
989 gpu.dirty.scissor_test = false;
990 const auto& regs = gpu.regs;
991 const std::array scissors = {
992 GetScissorState(regs, 0), GetScissorState(regs, 1), GetScissorState(regs, 2),
993 GetScissorState(regs, 3), GetScissorState(regs, 4), GetScissorState(regs, 5),
994 GetScissorState(regs, 6), GetScissorState(regs, 7), GetScissorState(regs, 8),
995 GetScissorState(regs, 9), GetScissorState(regs, 10), GetScissorState(regs, 11),
996 GetScissorState(regs, 12), GetScissorState(regs, 13), GetScissorState(regs, 14),
997 GetScissorState(regs, 15)};
998 scheduler.Record([scissors](auto cmdbuf, auto& dld) {
999 cmdbuf.setScissor(0, static_cast<u32>(scissors.size()), scissors.data(), dld);
1000 });
1001}
1002
1003void RasterizerVulkan::UpdateDepthBias(Tegra::Engines::Maxwell3D& gpu) {
1004 if (!gpu.dirty.polygon_offset && scheduler.TouchDepthBias()) {
1005 return;
1006 }
1007 gpu.dirty.polygon_offset = false;
1008 const auto& regs = gpu.regs;
1009 scheduler.Record([constant = regs.polygon_offset_units, clamp = regs.polygon_offset_clamp,
1010 factor = regs.polygon_offset_factor](auto cmdbuf, auto& dld) {
1011 cmdbuf.setDepthBias(constant, clamp, factor / 2.0f, dld);
1012 });
1013}
1014
1015void RasterizerVulkan::UpdateBlendConstants(Tegra::Engines::Maxwell3D& gpu) {
1016 if (!gpu.dirty.blend_state && scheduler.TouchBlendConstants()) {
1017 return;
1018 }
1019 gpu.dirty.blend_state = false;
1020 const std::array blend_color = {gpu.regs.blend_color.r, gpu.regs.blend_color.g,
1021 gpu.regs.blend_color.b, gpu.regs.blend_color.a};
1022 scheduler.Record([blend_color](auto cmdbuf, auto& dld) {
1023 cmdbuf.setBlendConstants(blend_color.data(), dld);
1024 });
1025}
1026
1027void RasterizerVulkan::UpdateDepthBounds(Tegra::Engines::Maxwell3D& gpu) {
1028 if (!gpu.dirty.depth_bounds_values && scheduler.TouchDepthBounds()) {
1029 return;
1030 }
1031 gpu.dirty.depth_bounds_values = false;
1032 const auto& regs = gpu.regs;
1033 scheduler.Record([min = regs.depth_bounds[0], max = regs.depth_bounds[1]](
1034 auto cmdbuf, auto& dld) { cmdbuf.setDepthBounds(min, max, dld); });
1035}
1036
1037void RasterizerVulkan::UpdateStencilFaces(Tegra::Engines::Maxwell3D& gpu) {
1038 if (!gpu.dirty.stencil_test && scheduler.TouchStencilValues()) {
1039 return;
1040 }
1041 gpu.dirty.stencil_test = false;
1042 const auto& regs = gpu.regs;
1043 if (regs.stencil_two_side_enable) {
1044 // Separate values per face
1045 scheduler.Record(
1046 [front_ref = regs.stencil_front_func_ref, front_write_mask = regs.stencil_front_mask,
1047 front_test_mask = regs.stencil_front_func_mask, back_ref = regs.stencil_back_func_ref,
1048 back_write_mask = regs.stencil_back_mask,
1049 back_test_mask = regs.stencil_back_func_mask](auto cmdbuf, auto& dld) {
1050 // Front face
1051 cmdbuf.setStencilReference(vk::StencilFaceFlagBits::eFront, front_ref, dld);
1052 cmdbuf.setStencilWriteMask(vk::StencilFaceFlagBits::eFront, front_write_mask, dld);
1053 cmdbuf.setStencilCompareMask(vk::StencilFaceFlagBits::eFront, front_test_mask, dld);
1054
1055 // Back face
1056 cmdbuf.setStencilReference(vk::StencilFaceFlagBits::eBack, back_ref, dld);
1057 cmdbuf.setStencilWriteMask(vk::StencilFaceFlagBits::eBack, back_write_mask, dld);
1058 cmdbuf.setStencilCompareMask(vk::StencilFaceFlagBits::eBack, back_test_mask, dld);
1059 });
1060 } else {
1061 // Front face defines both faces
1062 scheduler.Record([ref = regs.stencil_back_func_ref, write_mask = regs.stencil_back_mask,
1063 test_mask = regs.stencil_back_func_mask](auto cmdbuf, auto& dld) {
1064 cmdbuf.setStencilReference(vk::StencilFaceFlagBits::eFrontAndBack, ref, dld);
1065 cmdbuf.setStencilWriteMask(vk::StencilFaceFlagBits::eFrontAndBack, write_mask, dld);
1066 cmdbuf.setStencilCompareMask(vk::StencilFaceFlagBits::eFrontAndBack, test_mask, dld);
1067 });
1068 }
1069}
1070
1071std::size_t RasterizerVulkan::CalculateGraphicsStreamBufferSize(bool is_indexed) const {
1072 std::size_t size = CalculateVertexArraysSize();
1073 if (is_indexed) {
1074 size = Common::AlignUp(size, 4) + CalculateIndexBufferSize();
1075 }
1076 size += Maxwell::MaxConstBuffers * (MaxConstbufferSize + device.GetUniformBufferAlignment());
1077 return size;
1078}
1079
1080std::size_t RasterizerVulkan::CalculateComputeStreamBufferSize() const {
1081 return Tegra::Engines::KeplerCompute::NumConstBuffers *
1082 (Maxwell::MaxConstBufferSize + device.GetUniformBufferAlignment());
1083}
1084
1085std::size_t RasterizerVulkan::CalculateVertexArraysSize() const {
1086 const auto& regs = system.GPU().Maxwell3D().regs;
1087
1088 std::size_t size = 0;
1089 for (u32 index = 0; index < Maxwell::NumVertexArrays; ++index) {
1090 // This implementation assumes that all attributes are used in the shader.
1091 const GPUVAddr start{regs.vertex_array[index].StartAddress()};
1092 const GPUVAddr end{regs.vertex_array_limit[index].LimitAddress()};
1093 DEBUG_ASSERT(end > start);
1094
1095 size += (end - start + 1) * regs.vertex_array[index].enable;
1096 }
1097 return size;
1098}
1099
1100std::size_t RasterizerVulkan::CalculateIndexBufferSize() const {
1101 const auto& regs = system.GPU().Maxwell3D().regs;
1102 return static_cast<std::size_t>(regs.index_array.count) *
1103 static_cast<std::size_t>(regs.index_array.FormatSizeInBytes());
1104}
1105
1106std::size_t RasterizerVulkan::CalculateConstBufferSize(
1107 const ConstBufferEntry& entry, const Tegra::Engines::ConstBufferInfo& buffer) const {
1108 if (entry.IsIndirect()) {
1109 // Buffer is accessed indirectly, so upload the entire thing
1110 return buffer.size;
1111 } else {
1112 // Buffer is accessed directly, upload just what we use
1113 return entry.GetSize();
1114 }
1115}
1116
1117RenderPassParams RasterizerVulkan::GetRenderPassParams(Texceptions texceptions) const {
1118 using namespace VideoCore::Surface;
1119
1120 const auto& regs = system.GPU().Maxwell3D().regs;
1121 RenderPassParams renderpass_params;
1122
1123 for (std::size_t rt = 0; rt < static_cast<std::size_t>(regs.rt_control.count); ++rt) {
1124 const auto& rendertarget = regs.rt[rt];
1125 if (rendertarget.Address() == 0 || rendertarget.format == Tegra::RenderTargetFormat::NONE)
1126 continue;
1127 renderpass_params.color_attachments.push_back(RenderPassParams::ColorAttachment{
1128 static_cast<u32>(rt), PixelFormatFromRenderTargetFormat(rendertarget.format),
1129 texceptions.test(rt)});
1130 }
1131
1132 renderpass_params.has_zeta = regs.zeta_enable;
1133 if (renderpass_params.has_zeta) {
1134 renderpass_params.zeta_pixel_format = PixelFormatFromDepthFormat(regs.zeta.format);
1135 renderpass_params.zeta_texception = texceptions[ZETA_TEXCEPTION_INDEX];
1136 }
1137
1138 return renderpass_params;
1139}
1140
1141} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.h b/src/video_core/renderer_vulkan/vk_rasterizer.h
new file mode 100644
index 000000000..7be71e734
--- /dev/null
+++ b/src/video_core/renderer_vulkan/vk_rasterizer.h
@@ -0,0 +1,263 @@
1// Copyright 2019 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <array>
8#include <bitset>
9#include <memory>
10#include <utility>
11#include <vector>
12
13#include <boost/container/static_vector.hpp>
14#include <boost/functional/hash.hpp>
15
16#include "common/common_types.h"
17#include "video_core/memory_manager.h"
18#include "video_core/rasterizer_accelerated.h"
19#include "video_core/rasterizer_interface.h"
20#include "video_core/renderer_vulkan/declarations.h"
21#include "video_core/renderer_vulkan/fixed_pipeline_state.h"
22#include "video_core/renderer_vulkan/vk_buffer_cache.h"
23#include "video_core/renderer_vulkan/vk_compute_pass.h"
24#include "video_core/renderer_vulkan/vk_descriptor_pool.h"
25#include "video_core/renderer_vulkan/vk_memory_manager.h"
26#include "video_core/renderer_vulkan/vk_pipeline_cache.h"
27#include "video_core/renderer_vulkan/vk_renderpass_cache.h"
28#include "video_core/renderer_vulkan/vk_resource_manager.h"
29#include "video_core/renderer_vulkan/vk_sampler_cache.h"
30#include "video_core/renderer_vulkan/vk_scheduler.h"
31#include "video_core/renderer_vulkan/vk_staging_buffer_pool.h"
32#include "video_core/renderer_vulkan/vk_texture_cache.h"
33#include "video_core/renderer_vulkan/vk_update_descriptor.h"
34
35namespace Core {
36class System;
37}
38
39namespace Core::Frontend {
40class EmuWindow;
41}
42
43namespace Tegra::Engines {
44class Maxwell3D;
45}
46
47namespace Vulkan {
48
49struct VKScreenInfo;
50
51using ImageViewsPack =
52 boost::container::static_vector<vk::ImageView, Maxwell::NumRenderTargets + 1>;
53
54struct FramebufferCacheKey {
55 vk::RenderPass renderpass{};
56 u32 width = 0;
57 u32 height = 0;
58 ImageViewsPack views;
59
60 std::size_t Hash() const noexcept {
61 std::size_t hash = 0;
62 boost::hash_combine(hash, static_cast<VkRenderPass>(renderpass));
63 for (const auto& view : views) {
64 boost::hash_combine(hash, static_cast<VkImageView>(view));
65 }
66 boost::hash_combine(hash, width);
67 boost::hash_combine(hash, height);
68 return hash;
69 }
70
71 bool operator==(const FramebufferCacheKey& rhs) const noexcept {
72 return std::tie(renderpass, views, width, height) ==
73 std::tie(rhs.renderpass, rhs.views, rhs.width, rhs.height);
74 }
75};
76
77} // namespace Vulkan
78
79namespace std {
80
81template <>
82struct hash<Vulkan::FramebufferCacheKey> {
83 std::size_t operator()(const Vulkan::FramebufferCacheKey& k) const noexcept {
84 return k.Hash();
85 }
86};
87
88} // namespace std
89
90namespace Vulkan {
91
92class BufferBindings;
93
94struct ImageView {
95 View view;
96 vk::ImageLayout* layout = nullptr;
97};
98
99class RasterizerVulkan : public VideoCore::RasterizerAccelerated {
100public:
101 explicit RasterizerVulkan(Core::System& system, Core::Frontend::EmuWindow& render_window,
102 VKScreenInfo& screen_info, const VKDevice& device,
103 VKResourceManager& resource_manager, VKMemoryManager& memory_manager,
104 VKScheduler& scheduler);
105 ~RasterizerVulkan() override;
106
107 bool DrawBatch(bool is_indexed) override;
108 bool DrawMultiBatch(bool is_indexed) override;
109 void Clear() override;
110 void DispatchCompute(GPUVAddr code_addr) override;
111 void FlushAll() override;
112 void FlushRegion(CacheAddr addr, u64 size) override;
113 void InvalidateRegion(CacheAddr addr, u64 size) override;
114 void FlushAndInvalidateRegion(CacheAddr addr, u64 size) override;
115 void FlushCommands() override;
116 void TickFrame() override;
117 bool AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Regs::Surface& src,
118 const Tegra::Engines::Fermi2D::Regs::Surface& dst,
119 const Tegra::Engines::Fermi2D::Config& copy_config) override;
120 bool AccelerateDisplay(const Tegra::FramebufferConfig& config, VAddr framebuffer_addr,
121 u32 pixel_stride) override;
122
123 /// Maximum supported size that a constbuffer can have in bytes.
124 static constexpr std::size_t MaxConstbufferSize = 0x10000;
125 static_assert(MaxConstbufferSize % (4 * sizeof(float)) == 0,
126 "The maximum size of a constbuffer must be a multiple of the size of GLvec4");
127
128private:
129 struct DrawParameters {
130 void Draw(vk::CommandBuffer cmdbuf, const vk::DispatchLoaderDynamic& dld) const;
131
132 u32 base_instance = 0;
133 u32 num_instances = 0;
134 u32 base_vertex = 0;
135 u32 num_vertices = 0;
136 bool is_indexed = 0;
137 };
138
139 using Texceptions = std::bitset<Maxwell::NumRenderTargets + 1>;
140
141 static constexpr std::size_t ZETA_TEXCEPTION_INDEX = 8;
142
143 void Draw(bool is_indexed, bool is_instanced);
144
145 void FlushWork();
146
147 Texceptions UpdateAttachments();
148
149 std::tuple<vk::Framebuffer, vk::Extent2D> ConfigureFramebuffers(vk::RenderPass renderpass);
150
151 /// Setups geometry buffers and state.
152 DrawParameters SetupGeometry(FixedPipelineState& fixed_state, BufferBindings& buffer_bindings,
153 bool is_indexed, bool is_instanced);
154
155 /// Setup descriptors in the graphics pipeline.
156 void SetupShaderDescriptors(const std::array<Shader, Maxwell::MaxShaderProgram>& shaders);
157
158 void SetupImageTransitions(Texceptions texceptions,
159 const std::array<View, Maxwell::NumRenderTargets>& color_attachments,
160 const View& zeta_attachment);
161
162 void UpdateDynamicStates();
163
164 bool WalkAttachmentOverlaps(const CachedSurfaceView& attachment);
165
166 void SetupVertexArrays(FixedPipelineState::VertexInput& vertex_input,
167 BufferBindings& buffer_bindings);
168
169 void SetupIndexBuffer(BufferBindings& buffer_bindings, DrawParameters& params, bool is_indexed);
170
171 /// Setup constant buffers in the graphics pipeline.
172 void SetupGraphicsConstBuffers(const ShaderEntries& entries, std::size_t stage);
173
174 /// Setup global buffers in the graphics pipeline.
175 void SetupGraphicsGlobalBuffers(const ShaderEntries& entries, std::size_t stage);
176
177 /// Setup texel buffers in the graphics pipeline.
178 void SetupGraphicsTexelBuffers(const ShaderEntries& entries, std::size_t stage);
179
180 /// Setup textures in the graphics pipeline.
181 void SetupGraphicsTextures(const ShaderEntries& entries, std::size_t stage);
182
183 /// Setup images in the graphics pipeline.
184 void SetupGraphicsImages(const ShaderEntries& entries, std::size_t stage);
185
186 /// Setup constant buffers in the compute pipeline.
187 void SetupComputeConstBuffers(const ShaderEntries& entries);
188
189 /// Setup global buffers in the compute pipeline.
190 void SetupComputeGlobalBuffers(const ShaderEntries& entries);
191
192 /// Setup texel buffers in the compute pipeline.
193 void SetupComputeTexelBuffers(const ShaderEntries& entries);
194
195 /// Setup textures in the compute pipeline.
196 void SetupComputeTextures(const ShaderEntries& entries);
197
198 /// Setup images in the compute pipeline.
199 void SetupComputeImages(const ShaderEntries& entries);
200
201 void SetupConstBuffer(const ConstBufferEntry& entry,
202 const Tegra::Engines::ConstBufferInfo& buffer);
203
204 void SetupGlobalBuffer(const GlobalBufferEntry& entry, GPUVAddr address);
205
206 void SetupTexelBuffer(const Tegra::Texture::TICEntry& image, const TexelBufferEntry& entry);
207
208 void SetupTexture(const Tegra::Texture::FullTextureInfo& texture, const SamplerEntry& entry);
209
210 void SetupImage(const Tegra::Texture::TICEntry& tic, const ImageEntry& entry);
211
212 void UpdateViewportsState(Tegra::Engines::Maxwell3D& gpu);
213 void UpdateScissorsState(Tegra::Engines::Maxwell3D& gpu);
214 void UpdateDepthBias(Tegra::Engines::Maxwell3D& gpu);
215 void UpdateBlendConstants(Tegra::Engines::Maxwell3D& gpu);
216 void UpdateDepthBounds(Tegra::Engines::Maxwell3D& gpu);
217 void UpdateStencilFaces(Tegra::Engines::Maxwell3D& gpu);
218
219 std::size_t CalculateGraphicsStreamBufferSize(bool is_indexed) const;
220
221 std::size_t CalculateComputeStreamBufferSize() const;
222
223 std::size_t CalculateVertexArraysSize() const;
224
225 std::size_t CalculateIndexBufferSize() const;
226
227 std::size_t CalculateConstBufferSize(const ConstBufferEntry& entry,
228 const Tegra::Engines::ConstBufferInfo& buffer) const;
229
230 RenderPassParams GetRenderPassParams(Texceptions texceptions) const;
231
232 Core::System& system;
233 Core::Frontend::EmuWindow& render_window;
234 VKScreenInfo& screen_info;
235 const VKDevice& device;
236 VKResourceManager& resource_manager;
237 VKMemoryManager& memory_manager;
238 VKScheduler& scheduler;
239
240 VKStagingBufferPool staging_pool;
241 VKDescriptorPool descriptor_pool;
242 VKUpdateDescriptorQueue update_descriptor_queue;
243 QuadArrayPass quad_array_pass;
244 Uint8Pass uint8_pass;
245
246 VKTextureCache texture_cache;
247 VKPipelineCache pipeline_cache;
248 VKBufferCache buffer_cache;
249 VKSamplerCache sampler_cache;
250
251 std::array<View, Maxwell::NumRenderTargets> color_attachments;
252 View zeta_attachment;
253
254 std::vector<ImageView> sampled_views;
255 std::vector<ImageView> image_views;
256
257 u32 draw_counter = 0;
258
259 // TODO(Rodrigo): Invalidate on image destruction
260 std::unordered_map<FramebufferCacheKey, UniqueFramebuffer> framebuffer_cache;
261};
262
263} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_renderpass_cache.cpp b/src/video_core/renderer_vulkan/vk_renderpass_cache.cpp
new file mode 100644
index 000000000..93f5d7ba0
--- /dev/null
+++ b/src/video_core/renderer_vulkan/vk_renderpass_cache.cpp
@@ -0,0 +1,100 @@
1// Copyright 2019 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <memory>
6#include <vector>
7
8#include "video_core/engines/maxwell_3d.h"
9#include "video_core/renderer_vulkan/declarations.h"
10#include "video_core/renderer_vulkan/maxwell_to_vk.h"
11#include "video_core/renderer_vulkan/vk_device.h"
12#include "video_core/renderer_vulkan/vk_renderpass_cache.h"
13
14namespace Vulkan {
15
16VKRenderPassCache::VKRenderPassCache(const VKDevice& device) : device{device} {}
17
18VKRenderPassCache::~VKRenderPassCache() = default;
19
20vk::RenderPass VKRenderPassCache::GetRenderPass(const RenderPassParams& params) {
21 const auto [pair, is_cache_miss] = cache.try_emplace(params);
22 auto& entry = pair->second;
23 if (is_cache_miss) {
24 entry = CreateRenderPass(params);
25 }
26 return *entry;
27}
28
29UniqueRenderPass VKRenderPassCache::CreateRenderPass(const RenderPassParams& params) const {
30 std::vector<vk::AttachmentDescription> descriptors;
31 std::vector<vk::AttachmentReference> color_references;
32
33 for (std::size_t rt = 0; rt < params.color_attachments.size(); ++rt) {
34 const auto attachment = params.color_attachments[rt];
35 const auto format =
36 MaxwellToVK::SurfaceFormat(device, FormatType::Optimal, attachment.pixel_format);
37 ASSERT_MSG(format.attachable, "Trying to attach a non-attachable format with format={}",
38 static_cast<u32>(attachment.pixel_format));
39
40 // TODO(Rodrigo): Add eMayAlias when it's needed.
41 const auto color_layout = attachment.is_texception
42 ? vk::ImageLayout::eGeneral
43 : vk::ImageLayout::eColorAttachmentOptimal;
44 descriptors.emplace_back(vk::AttachmentDescriptionFlagBits::eMayAlias, format.format,
45 vk::SampleCountFlagBits::e1, vk::AttachmentLoadOp::eLoad,
46 vk::AttachmentStoreOp::eStore, vk::AttachmentLoadOp::eDontCare,
47 vk::AttachmentStoreOp::eDontCare, color_layout, color_layout);
48 color_references.emplace_back(static_cast<u32>(rt), color_layout);
49 }
50
51 vk::AttachmentReference zeta_attachment_ref;
52 if (params.has_zeta) {
53 const auto format =
54 MaxwellToVK::SurfaceFormat(device, FormatType::Optimal, params.zeta_pixel_format);
55 ASSERT_MSG(format.attachable, "Trying to attach a non-attachable format with format={}",
56 static_cast<u32>(params.zeta_pixel_format));
57
58 const auto zeta_layout = params.zeta_texception
59 ? vk::ImageLayout::eGeneral
60 : vk::ImageLayout::eDepthStencilAttachmentOptimal;
61 descriptors.emplace_back(vk::AttachmentDescriptionFlags{}, format.format,
62 vk::SampleCountFlagBits::e1, vk::AttachmentLoadOp::eLoad,
63 vk::AttachmentStoreOp::eStore, vk::AttachmentLoadOp::eLoad,
64 vk::AttachmentStoreOp::eStore, zeta_layout, zeta_layout);
65 zeta_attachment_ref =
66 vk::AttachmentReference(static_cast<u32>(params.color_attachments.size()), zeta_layout);
67 }
68
69 const vk::SubpassDescription subpass_description(
70 {}, vk::PipelineBindPoint::eGraphics, 0, nullptr, static_cast<u32>(color_references.size()),
71 color_references.data(), nullptr, params.has_zeta ? &zeta_attachment_ref : nullptr, 0,
72 nullptr);
73
74 vk::AccessFlags access;
75 vk::PipelineStageFlags stage;
76 if (!color_references.empty()) {
77 access |=
78 vk::AccessFlagBits::eColorAttachmentRead | vk::AccessFlagBits::eColorAttachmentWrite;
79 stage |= vk::PipelineStageFlagBits::eColorAttachmentOutput;
80 }
81
82 if (params.has_zeta) {
83 access |= vk::AccessFlagBits::eDepthStencilAttachmentRead |
84 vk::AccessFlagBits::eDepthStencilAttachmentWrite;
85 stage |= vk::PipelineStageFlagBits::eLateFragmentTests;
86 }
87
88 const vk::SubpassDependency subpass_dependency(VK_SUBPASS_EXTERNAL, 0, stage, stage, {}, access,
89 {});
90
91 const vk::RenderPassCreateInfo create_info({}, static_cast<u32>(descriptors.size()),
92 descriptors.data(), 1, &subpass_description, 1,
93 &subpass_dependency);
94
95 const auto dev = device.GetLogical();
96 const auto& dld = device.GetDispatchLoader();
97 return dev.createRenderPassUnique(create_info, nullptr, dld);
98}
99
100} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_renderpass_cache.h b/src/video_core/renderer_vulkan/vk_renderpass_cache.h
new file mode 100644
index 000000000..b49b2db48
--- /dev/null
+++ b/src/video_core/renderer_vulkan/vk_renderpass_cache.h
@@ -0,0 +1,97 @@
1// Copyright 2019 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <memory>
8#include <tuple>
9#include <unordered_map>
10
11#include <boost/container/static_vector.hpp>
12#include <boost/functional/hash.hpp>
13
14#include "video_core/engines/maxwell_3d.h"
15#include "video_core/renderer_vulkan/declarations.h"
16#include "video_core/surface.h"
17
18namespace Vulkan {
19
20class VKDevice;
21
22// TODO(Rodrigo): Optimize this structure for faster hashing
23
24struct RenderPassParams {
25 struct ColorAttachment {
26 u32 index = 0;
27 VideoCore::Surface::PixelFormat pixel_format = VideoCore::Surface::PixelFormat::Invalid;
28 bool is_texception = false;
29
30 std::size_t Hash() const noexcept {
31 return static_cast<std::size_t>(pixel_format) |
32 static_cast<std::size_t>(is_texception) << 6 |
33 static_cast<std::size_t>(index) << 7;
34 }
35
36 bool operator==(const ColorAttachment& rhs) const noexcept {
37 return std::tie(index, pixel_format, is_texception) ==
38 std::tie(rhs.index, rhs.pixel_format, rhs.is_texception);
39 }
40 };
41
42 boost::container::static_vector<ColorAttachment,
43 Tegra::Engines::Maxwell3D::Regs::NumRenderTargets>
44 color_attachments{};
45 // TODO(Rodrigo): Unify has_zeta into zeta_pixel_format and zeta_component_type.
46 VideoCore::Surface::PixelFormat zeta_pixel_format = VideoCore::Surface::PixelFormat::Invalid;
47 bool has_zeta = false;
48 bool zeta_texception = false;
49
50 std::size_t Hash() const noexcept {
51 std::size_t hash = 0;
52 for (const auto& rt : color_attachments) {
53 boost::hash_combine(hash, rt.Hash());
54 }
55 boost::hash_combine(hash, zeta_pixel_format);
56 boost::hash_combine(hash, has_zeta);
57 boost::hash_combine(hash, zeta_texception);
58 return hash;
59 }
60
61 bool operator==(const RenderPassParams& rhs) const {
62 return std::tie(color_attachments, zeta_pixel_format, has_zeta, zeta_texception) ==
63 std::tie(rhs.color_attachments, rhs.zeta_pixel_format, rhs.has_zeta,
64 rhs.zeta_texception);
65 }
66};
67
68} // namespace Vulkan
69
70namespace std {
71
72template <>
73struct hash<Vulkan::RenderPassParams> {
74 std::size_t operator()(const Vulkan::RenderPassParams& k) const noexcept {
75 return k.Hash();
76 }
77};
78
79} // namespace std
80
81namespace Vulkan {
82
83class VKRenderPassCache final {
84public:
85 explicit VKRenderPassCache(const VKDevice& device);
86 ~VKRenderPassCache();
87
88 vk::RenderPass GetRenderPass(const RenderPassParams& params);
89
90private:
91 UniqueRenderPass CreateRenderPass(const RenderPassParams& params) const;
92
93 const VKDevice& device;
94 std::unordered_map<RenderPassParams, UniqueRenderPass> cache;
95};
96
97} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_sampler_cache.cpp b/src/video_core/renderer_vulkan/vk_sampler_cache.cpp
index 1ce583f75..0a8ec8398 100644
--- a/src/video_core/renderer_vulkan/vk_sampler_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_sampler_cache.cpp
@@ -46,9 +46,9 @@ UniqueSampler VKSamplerCache::CreateSampler(const Tegra::Texture::TSCEntry& tsc)
46 {}, MaxwellToVK::Sampler::Filter(tsc.mag_filter), 46 {}, MaxwellToVK::Sampler::Filter(tsc.mag_filter),
47 MaxwellToVK::Sampler::Filter(tsc.min_filter), 47 MaxwellToVK::Sampler::Filter(tsc.min_filter),
48 MaxwellToVK::Sampler::MipmapMode(tsc.mipmap_filter), 48 MaxwellToVK::Sampler::MipmapMode(tsc.mipmap_filter),
49 MaxwellToVK::Sampler::WrapMode(tsc.wrap_u, tsc.mag_filter), 49 MaxwellToVK::Sampler::WrapMode(device, tsc.wrap_u, tsc.mag_filter),
50 MaxwellToVK::Sampler::WrapMode(tsc.wrap_v, tsc.mag_filter), 50 MaxwellToVK::Sampler::WrapMode(device, tsc.wrap_v, tsc.mag_filter),
51 MaxwellToVK::Sampler::WrapMode(tsc.wrap_p, tsc.mag_filter), tsc.GetLodBias(), 51 MaxwellToVK::Sampler::WrapMode(device, tsc.wrap_p, tsc.mag_filter), tsc.GetLodBias(),
52 has_anisotropy, max_anisotropy, tsc.depth_compare_enabled, 52 has_anisotropy, max_anisotropy, tsc.depth_compare_enabled,
53 MaxwellToVK::Sampler::DepthCompareFunction(tsc.depth_compare_func), tsc.GetMinLod(), 53 MaxwellToVK::Sampler::DepthCompareFunction(tsc.depth_compare_func), tsc.GetMinLod(),
54 tsc.GetMaxLod(), vk_border_color.value_or(vk::BorderColor::eFloatTransparentBlack), 54 tsc.GetMaxLod(), vk_border_color.value_or(vk::BorderColor::eFloatTransparentBlack),
diff --git a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp
index a8baf91de..0cf97cafa 100644
--- a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp
+++ b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp
@@ -954,6 +954,10 @@ private:
954 954
955 Expression Visit(const Node& node) { 955 Expression Visit(const Node& node) {
956 if (const auto operation = std::get_if<OperationNode>(&*node)) { 956 if (const auto operation = std::get_if<OperationNode>(&*node)) {
957 if (const auto amend_index = operation->GetAmendIndex()) {
958 [[maybe_unused]] const Type type = Visit(ir.GetAmendNode(*amend_index)).type;
959 ASSERT(type == Type::Void);
960 }
957 const auto operation_index = static_cast<std::size_t>(operation->GetCode()); 961 const auto operation_index = static_cast<std::size_t>(operation->GetCode());
958 const auto decompiler = operation_decompilers[operation_index]; 962 const auto decompiler = operation_decompilers[operation_index];
959 if (decompiler == nullptr) { 963 if (decompiler == nullptr) {
@@ -1142,6 +1146,10 @@ private:
1142 } 1146 }
1143 1147
1144 if (const auto conditional = std::get_if<ConditionalNode>(&*node)) { 1148 if (const auto conditional = std::get_if<ConditionalNode>(&*node)) {
1149 if (const auto amend_index = conditional->GetAmendIndex()) {
1150 [[maybe_unused]] const Type type = Visit(ir.GetAmendNode(*amend_index)).type;
1151 ASSERT(type == Type::Void);
1152 }
1145 // It's invalid to call conditional on nested nodes, use an operation instead 1153 // It's invalid to call conditional on nested nodes, use an operation instead
1146 const Id true_label = OpLabel(); 1154 const Id true_label = OpLabel();
1147 const Id skip_label = OpLabel(); 1155 const Id skip_label = OpLabel();
@@ -1788,6 +1796,11 @@ private:
1788 return {}; 1796 return {};
1789 } 1797 }
1790 1798
1799 Expression UAtomicAdd(Operation) {
1800 UNIMPLEMENTED();
1801 return {};
1802 }
1803
1791 Expression Branch(Operation operation) { 1804 Expression Branch(Operation operation) {
1792 const auto& target = std::get<ImmediateNode>(*operation[0]); 1805 const auto& target = std::get<ImmediateNode>(*operation[0]);
1793 OpStore(jmp_to, Constant(t_uint, target.GetValue())); 1806 OpStore(jmp_to, Constant(t_uint, target.GetValue()));
@@ -2365,6 +2378,8 @@ private:
2365 &SPIRVDecompiler::AtomicImageXor, 2378 &SPIRVDecompiler::AtomicImageXor,
2366 &SPIRVDecompiler::AtomicImageExchange, 2379 &SPIRVDecompiler::AtomicImageExchange,
2367 2380
2381 &SPIRVDecompiler::UAtomicAdd,
2382
2368 &SPIRVDecompiler::Branch, 2383 &SPIRVDecompiler::Branch,
2369 &SPIRVDecompiler::BranchIndirect, 2384 &SPIRVDecompiler::BranchIndirect,
2370 &SPIRVDecompiler::PushFlowStack, 2385 &SPIRVDecompiler::PushFlowStack,
diff --git a/src/video_core/renderer_vulkan/vk_shader_util.cpp b/src/video_core/renderer_vulkan/vk_shader_util.cpp
new file mode 100644
index 000000000..b97c4cb3d
--- /dev/null
+++ b/src/video_core/renderer_vulkan/vk_shader_util.cpp
@@ -0,0 +1,34 @@
1// Copyright 2018 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <cstring>
6#include <memory>
7#include <vector>
8#include "common/alignment.h"
9#include "common/assert.h"
10#include "common/common_types.h"
11#include "video_core/renderer_vulkan/declarations.h"
12#include "video_core/renderer_vulkan/vk_device.h"
13#include "video_core/renderer_vulkan/vk_shader_util.h"
14
15namespace Vulkan {
16
17UniqueShaderModule BuildShader(const VKDevice& device, std::size_t code_size, const u8* code_data) {
18 // Avoid undefined behavior by copying to a staging allocation
19 ASSERT(code_size % sizeof(u32) == 0);
20 const auto data = std::make_unique<u32[]>(code_size / sizeof(u32));
21 std::memcpy(data.get(), code_data, code_size);
22
23 const auto dev = device.GetLogical();
24 const auto& dld = device.GetDispatchLoader();
25 const vk::ShaderModuleCreateInfo shader_ci({}, code_size, data.get());
26 vk::ShaderModule shader_module;
27 if (dev.createShaderModule(&shader_ci, nullptr, &shader_module, dld) != vk::Result::eSuccess) {
28 UNREACHABLE_MSG("Shader module failed to build!");
29 }
30
31 return UniqueShaderModule(shader_module, vk::ObjectDestroy(dev, nullptr, dld));
32}
33
34} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_shader_util.h b/src/video_core/renderer_vulkan/vk_shader_util.h
new file mode 100644
index 000000000..c06d65970
--- /dev/null
+++ b/src/video_core/renderer_vulkan/vk_shader_util.h
@@ -0,0 +1,17 @@
1// Copyright 2018 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <vector>
8#include "common/common_types.h"
9#include "video_core/renderer_vulkan/declarations.h"
10
11namespace Vulkan {
12
13class VKDevice;
14
15UniqueShaderModule BuildShader(const VKDevice& device, std::size_t code_size, const u8* code_data);
16
17} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_staging_buffer_pool.h b/src/video_core/renderer_vulkan/vk_staging_buffer_pool.h
index 02310375f..4d9488f49 100644
--- a/src/video_core/renderer_vulkan/vk_staging_buffer_pool.h
+++ b/src/video_core/renderer_vulkan/vk_staging_buffer_pool.h
@@ -13,6 +13,7 @@
13 13
14#include "video_core/renderer_vulkan/declarations.h" 14#include "video_core/renderer_vulkan/declarations.h"
15#include "video_core/renderer_vulkan/vk_memory_manager.h" 15#include "video_core/renderer_vulkan/vk_memory_manager.h"
16#include "video_core/renderer_vulkan/vk_resource_manager.h"
16 17
17namespace Vulkan { 18namespace Vulkan {
18 19
diff --git a/src/video_core/renderer_vulkan/vk_stream_buffer.cpp b/src/video_core/renderer_vulkan/vk_stream_buffer.cpp
index 62f1427f5..d48d3b44c 100644
--- a/src/video_core/renderer_vulkan/vk_stream_buffer.cpp
+++ b/src/video_core/renderer_vulkan/vk_stream_buffer.cpp
@@ -3,86 +3,144 @@
3// Refer to the license.txt file included. 3// Refer to the license.txt file included.
4 4
5#include <algorithm> 5#include <algorithm>
6#include <memory>
7#include <optional> 6#include <optional>
7#include <tuple>
8#include <vector> 8#include <vector>
9 9
10#include "common/alignment.h"
10#include "common/assert.h" 11#include "common/assert.h"
11#include "video_core/renderer_vulkan/declarations.h" 12#include "video_core/renderer_vulkan/declarations.h"
12#include "video_core/renderer_vulkan/vk_device.h" 13#include "video_core/renderer_vulkan/vk_device.h"
13#include "video_core/renderer_vulkan/vk_memory_manager.h"
14#include "video_core/renderer_vulkan/vk_resource_manager.h" 14#include "video_core/renderer_vulkan/vk_resource_manager.h"
15#include "video_core/renderer_vulkan/vk_scheduler.h" 15#include "video_core/renderer_vulkan/vk_scheduler.h"
16#include "video_core/renderer_vulkan/vk_stream_buffer.h" 16#include "video_core/renderer_vulkan/vk_stream_buffer.h"
17 17
18namespace Vulkan { 18namespace Vulkan {
19 19
20namespace {
21
20constexpr u64 WATCHES_INITIAL_RESERVE = 0x4000; 22constexpr u64 WATCHES_INITIAL_RESERVE = 0x4000;
21constexpr u64 WATCHES_RESERVE_CHUNK = 0x1000; 23constexpr u64 WATCHES_RESERVE_CHUNK = 0x1000;
22 24
23VKStreamBuffer::VKStreamBuffer(const VKDevice& device, VKMemoryManager& memory_manager, 25constexpr u64 STREAM_BUFFER_SIZE = 256 * 1024 * 1024;
24 VKScheduler& scheduler, u64 size, vk::BufferUsageFlags usage, 26
25 vk::AccessFlags access, vk::PipelineStageFlags pipeline_stage) 27std::optional<u32> FindMemoryType(const VKDevice& device, u32 filter,
26 : device{device}, scheduler{scheduler}, buffer_size{size}, access{access}, pipeline_stage{ 28 vk::MemoryPropertyFlags wanted) {
27 pipeline_stage} { 29 const auto properties = device.GetPhysical().getMemoryProperties(device.GetDispatchLoader());
28 CreateBuffers(memory_manager, usage); 30 for (u32 i = 0; i < properties.memoryTypeCount; i++) {
29 ReserveWatches(WATCHES_INITIAL_RESERVE); 31 if (!(filter & (1 << i))) {
32 continue;
33 }
34 if ((properties.memoryTypes[i].propertyFlags & wanted) == wanted) {
35 return i;
36 }
37 }
38 return {};
39}
40
41} // Anonymous namespace
42
43VKStreamBuffer::VKStreamBuffer(const VKDevice& device, VKScheduler& scheduler,
44 vk::BufferUsageFlags usage)
45 : device{device}, scheduler{scheduler} {
46 CreateBuffers(usage);
47 ReserveWatches(current_watches, WATCHES_INITIAL_RESERVE);
48 ReserveWatches(previous_watches, WATCHES_INITIAL_RESERVE);
30} 49}
31 50
32VKStreamBuffer::~VKStreamBuffer() = default; 51VKStreamBuffer::~VKStreamBuffer() = default;
33 52
34std::tuple<u8*, u64, bool> VKStreamBuffer::Reserve(u64 size) { 53std::tuple<u8*, u64, bool> VKStreamBuffer::Map(u64 size, u64 alignment) {
35 ASSERT(size <= buffer_size); 54 ASSERT(size <= STREAM_BUFFER_SIZE);
36 mapped_size = size; 55 mapped_size = size;
37 56
38 if (offset + size > buffer_size) { 57 if (alignment > 0) {
39 // The buffer would overflow, save the amount of used buffers, signal an invalidation and 58 offset = Common::AlignUp(offset, alignment);
40 // reset the state. 59 }
41 invalidation_mark = used_watches; 60
42 used_watches = 0; 61 WaitPendingOperations(offset);
62
63 bool invalidated = false;
64 if (offset + size > STREAM_BUFFER_SIZE) {
65 // The buffer would overflow, save the amount of used watches and reset the state.
66 invalidation_mark = current_watch_cursor;
67 current_watch_cursor = 0;
43 offset = 0; 68 offset = 0;
69
70 // Swap watches and reset waiting cursors.
71 std::swap(previous_watches, current_watches);
72 wait_cursor = 0;
73 wait_bound = 0;
74
75 // Ensure that we don't wait for uncommitted fences.
76 scheduler.Flush();
77
78 invalidated = true;
44 } 79 }
45 80
46 return {mapped_pointer + offset, offset, invalidation_mark.has_value()}; 81 const auto dev = device.GetLogical();
82 const auto& dld = device.GetDispatchLoader();
83 const auto pointer = reinterpret_cast<u8*>(dev.mapMemory(*memory, offset, size, {}, dld));
84 return {pointer, offset, invalidated};
47} 85}
48 86
49void VKStreamBuffer::Send(u64 size) { 87void VKStreamBuffer::Unmap(u64 size) {
50 ASSERT_MSG(size <= mapped_size, "Reserved size is too small"); 88 ASSERT_MSG(size <= mapped_size, "Reserved size is too small");
51 89
52 if (invalidation_mark) { 90 const auto dev = device.GetLogical();
53 // TODO(Rodrigo): Find a better way to invalidate than waiting for all watches to finish. 91 dev.unmapMemory(*memory, device.GetDispatchLoader());
54 scheduler.Flush(); 92
55 std::for_each(watches.begin(), watches.begin() + *invalidation_mark, 93 offset += size;
56 [&](auto& resource) { resource->Wait(); });
57 invalidation_mark = std::nullopt;
58 }
59 94
60 if (used_watches + 1 >= watches.size()) { 95 if (current_watch_cursor + 1 >= current_watches.size()) {
61 // Ensure that there are enough watches. 96 // Ensure that there are enough watches.
62 ReserveWatches(WATCHES_RESERVE_CHUNK); 97 ReserveWatches(current_watches, WATCHES_RESERVE_CHUNK);
63 } 98 }
64 // Add a watch for this allocation. 99 auto& watch = current_watches[current_watch_cursor++];
65 watches[used_watches++]->Watch(scheduler.GetFence()); 100 watch.upper_bound = offset;
66 101 watch.fence.Watch(scheduler.GetFence());
67 offset += size;
68} 102}
69 103
70void VKStreamBuffer::CreateBuffers(VKMemoryManager& memory_manager, vk::BufferUsageFlags usage) { 104void VKStreamBuffer::CreateBuffers(vk::BufferUsageFlags usage) {
71 const vk::BufferCreateInfo buffer_ci({}, buffer_size, usage, vk::SharingMode::eExclusive, 0, 105 const vk::BufferCreateInfo buffer_ci({}, STREAM_BUFFER_SIZE, usage, vk::SharingMode::eExclusive,
72 nullptr); 106 0, nullptr);
73
74 const auto dev = device.GetLogical(); 107 const auto dev = device.GetLogical();
75 const auto& dld = device.GetDispatchLoader(); 108 const auto& dld = device.GetDispatchLoader();
76 buffer = dev.createBufferUnique(buffer_ci, nullptr, dld); 109 buffer = dev.createBufferUnique(buffer_ci, nullptr, dld);
77 commit = memory_manager.Commit(*buffer, true); 110
78 mapped_pointer = commit->GetData(); 111 const auto requirements = dev.getBufferMemoryRequirements(*buffer, dld);
112 // Prefer device local host visible allocations (this should hit AMD's pinned memory).
113 auto type = FindMemoryType(device, requirements.memoryTypeBits,
114 vk::MemoryPropertyFlagBits::eHostVisible |
115 vk::MemoryPropertyFlagBits::eHostCoherent |
116 vk::MemoryPropertyFlagBits::eDeviceLocal);
117 if (!type) {
118 // Otherwise search for a host visible allocation.
119 type = FindMemoryType(device, requirements.memoryTypeBits,
120 vk::MemoryPropertyFlagBits::eHostVisible |
121 vk::MemoryPropertyFlagBits::eHostCoherent);
122 ASSERT_MSG(type, "No host visible and coherent memory type found");
123 }
124 const vk::MemoryAllocateInfo alloc_ci(requirements.size, *type);
125 memory = dev.allocateMemoryUnique(alloc_ci, nullptr, dld);
126
127 dev.bindBufferMemory(*buffer, *memory, 0, dld);
79} 128}
80 129
81void VKStreamBuffer::ReserveWatches(std::size_t grow_size) { 130void VKStreamBuffer::ReserveWatches(std::vector<Watch>& watches, std::size_t grow_size) {
82 const std::size_t previous_size = watches.size(); 131 watches.resize(watches.size() + grow_size);
83 watches.resize(previous_size + grow_size); 132}
84 std::generate(watches.begin() + previous_size, watches.end(), 133
85 []() { return std::make_unique<VKFenceWatch>(); }); 134void VKStreamBuffer::WaitPendingOperations(u64 requested_upper_bound) {
135 if (!invalidation_mark) {
136 return;
137 }
138 while (requested_upper_bound < wait_bound && wait_cursor < *invalidation_mark) {
139 auto& watch = previous_watches[wait_cursor];
140 wait_bound = watch.upper_bound;
141 watch.fence.Wait();
142 ++wait_cursor;
143 }
86} 144}
87 145
88} // namespace Vulkan 146} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_stream_buffer.h b/src/video_core/renderer_vulkan/vk_stream_buffer.h
index 842e54162..187c0c612 100644
--- a/src/video_core/renderer_vulkan/vk_stream_buffer.h
+++ b/src/video_core/renderer_vulkan/vk_stream_buffer.h
@@ -4,28 +4,24 @@
4 4
5#pragma once 5#pragma once
6 6
7#include <memory>
8#include <optional> 7#include <optional>
9#include <tuple> 8#include <tuple>
10#include <vector> 9#include <vector>
11 10
12#include "common/common_types.h" 11#include "common/common_types.h"
13#include "video_core/renderer_vulkan/declarations.h" 12#include "video_core/renderer_vulkan/declarations.h"
14#include "video_core/renderer_vulkan/vk_memory_manager.h"
15 13
16namespace Vulkan { 14namespace Vulkan {
17 15
18class VKDevice; 16class VKDevice;
19class VKFence; 17class VKFence;
20class VKFenceWatch; 18class VKFenceWatch;
21class VKResourceManager;
22class VKScheduler; 19class VKScheduler;
23 20
24class VKStreamBuffer { 21class VKStreamBuffer final {
25public: 22public:
26 explicit VKStreamBuffer(const VKDevice& device, VKMemoryManager& memory_manager, 23 explicit VKStreamBuffer(const VKDevice& device, VKScheduler& scheduler,
27 VKScheduler& scheduler, u64 size, vk::BufferUsageFlags usage, 24 vk::BufferUsageFlags usage);
28 vk::AccessFlags access, vk::PipelineStageFlags pipeline_stage);
29 ~VKStreamBuffer(); 25 ~VKStreamBuffer();
30 26
31 /** 27 /**
@@ -34,39 +30,47 @@ public:
34 * @returns A tuple in the following order: Raw memory pointer (with offset added), buffer 30 * @returns A tuple in the following order: Raw memory pointer (with offset added), buffer
35 * offset and a boolean that's true when buffer has been invalidated. 31 * offset and a boolean that's true when buffer has been invalidated.
36 */ 32 */
37 std::tuple<u8*, u64, bool> Reserve(u64 size); 33 std::tuple<u8*, u64, bool> Map(u64 size, u64 alignment);
38 34
39 /// Ensures that "size" bytes of memory are available to the GPU, potentially recording a copy. 35 /// Ensures that "size" bytes of memory are available to the GPU, potentially recording a copy.
40 void Send(u64 size); 36 void Unmap(u64 size);
41 37
42 vk::Buffer GetBuffer() const { 38 vk::Buffer GetHandle() const {
43 return *buffer; 39 return *buffer;
44 } 40 }
45 41
46private: 42private:
43 struct Watch final {
44 VKFenceWatch fence;
45 u64 upper_bound{};
46 };
47
47 /// Creates Vulkan buffer handles committing the required the required memory. 48 /// Creates Vulkan buffer handles committing the required the required memory.
48 void CreateBuffers(VKMemoryManager& memory_manager, vk::BufferUsageFlags usage); 49 void CreateBuffers(vk::BufferUsageFlags usage);
49 50
50 /// Increases the amount of watches available. 51 /// Increases the amount of watches available.
51 void ReserveWatches(std::size_t grow_size); 52 void ReserveWatches(std::vector<Watch>& watches, std::size_t grow_size);
53
54 void WaitPendingOperations(u64 requested_upper_bound);
52 55
53 const VKDevice& device; ///< Vulkan device manager. 56 const VKDevice& device; ///< Vulkan device manager.
54 VKScheduler& scheduler; ///< Command scheduler. 57 VKScheduler& scheduler; ///< Command scheduler.
55 const u64 buffer_size; ///< Total size of the stream buffer.
56 const vk::AccessFlags access; ///< Access usage of this stream buffer. 58 const vk::AccessFlags access; ///< Access usage of this stream buffer.
57 const vk::PipelineStageFlags pipeline_stage; ///< Pipeline usage of this stream buffer. 59 const vk::PipelineStageFlags pipeline_stage; ///< Pipeline usage of this stream buffer.
58 60
59 UniqueBuffer buffer; ///< Mapped buffer. 61 UniqueBuffer buffer; ///< Mapped buffer.
60 VKMemoryCommit commit; ///< Memory commit. 62 UniqueDeviceMemory memory; ///< Memory allocation.
61 u8* mapped_pointer{}; ///< Pointer to the host visible commit
62 63
63 u64 offset{}; ///< Buffer iterator. 64 u64 offset{}; ///< Buffer iterator.
64 u64 mapped_size{}; ///< Size reserved for the current copy. 65 u64 mapped_size{}; ///< Size reserved for the current copy.
65 66
66 std::vector<std::unique_ptr<VKFenceWatch>> watches; ///< Total watches 67 std::vector<Watch> current_watches; ///< Watches recorded in the current iteration.
67 std::size_t used_watches{}; ///< Count of watches, reset on invalidation. 68 std::size_t current_watch_cursor{}; ///< Count of watches, reset on invalidation.
68 std::optional<std::size_t> 69 std::optional<std::size_t> invalidation_mark; ///< Number of watches used in the previous cycle.
69 invalidation_mark{}; ///< Number of watches used in the current invalidation. 70
71 std::vector<Watch> previous_watches; ///< Watches used in the previous iteration.
72 std::size_t wait_cursor{}; ///< Last watch being waited for completion.
73 u64 wait_bound{}; ///< Highest offset being watched for completion.
70}; 74};
71 75
72} // namespace Vulkan 76} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.cpp b/src/video_core/renderer_vulkan/vk_texture_cache.cpp
new file mode 100644
index 000000000..51b0d38a6
--- /dev/null
+++ b/src/video_core/renderer_vulkan/vk_texture_cache.cpp
@@ -0,0 +1,475 @@
1// Copyright 2019 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <algorithm>
6#include <array>
7#include <cstddef>
8#include <cstring>
9#include <memory>
10#include <variant>
11#include <vector>
12
13#include "common/alignment.h"
14#include "common/assert.h"
15#include "common/common_types.h"
16#include "core/core.h"
17#include "core/memory.h"
18#include "video_core/engines/maxwell_3d.h"
19#include "video_core/morton.h"
20#include "video_core/renderer_vulkan/declarations.h"
21#include "video_core/renderer_vulkan/maxwell_to_vk.h"
22#include "video_core/renderer_vulkan/vk_device.h"
23#include "video_core/renderer_vulkan/vk_memory_manager.h"
24#include "video_core/renderer_vulkan/vk_rasterizer.h"
25#include "video_core/renderer_vulkan/vk_staging_buffer_pool.h"
26#include "video_core/renderer_vulkan/vk_texture_cache.h"
27#include "video_core/surface.h"
28#include "video_core/textures/convert.h"
29
30namespace Vulkan {
31
32using VideoCore::MortonSwizzle;
33using VideoCore::MortonSwizzleMode;
34
35using Tegra::Texture::SwizzleSource;
36using VideoCore::Surface::PixelFormat;
37using VideoCore::Surface::SurfaceCompression;
38using VideoCore::Surface::SurfaceTarget;
39
40namespace {
41
42vk::ImageType SurfaceTargetToImage(SurfaceTarget target) {
43 switch (target) {
44 case SurfaceTarget::Texture1D:
45 case SurfaceTarget::Texture1DArray:
46 return vk::ImageType::e1D;
47 case SurfaceTarget::Texture2D:
48 case SurfaceTarget::Texture2DArray:
49 case SurfaceTarget::TextureCubemap:
50 case SurfaceTarget::TextureCubeArray:
51 return vk::ImageType::e2D;
52 case SurfaceTarget::Texture3D:
53 return vk::ImageType::e3D;
54 }
55 UNREACHABLE_MSG("Unknown texture target={}", static_cast<u32>(target));
56 return {};
57}
58
59vk::ImageAspectFlags PixelFormatToImageAspect(PixelFormat pixel_format) {
60 if (pixel_format < PixelFormat::MaxColorFormat) {
61 return vk::ImageAspectFlagBits::eColor;
62 } else if (pixel_format < PixelFormat::MaxDepthFormat) {
63 return vk::ImageAspectFlagBits::eDepth;
64 } else if (pixel_format < PixelFormat::MaxDepthStencilFormat) {
65 return vk::ImageAspectFlagBits::eDepth | vk::ImageAspectFlagBits::eStencil;
66 } else {
67 UNREACHABLE_MSG("Invalid pixel format={}", static_cast<u32>(pixel_format));
68 return vk::ImageAspectFlagBits::eColor;
69 }
70}
71
72vk::ImageViewType GetImageViewType(SurfaceTarget target) {
73 switch (target) {
74 case SurfaceTarget::Texture1D:
75 return vk::ImageViewType::e1D;
76 case SurfaceTarget::Texture2D:
77 return vk::ImageViewType::e2D;
78 case SurfaceTarget::Texture3D:
79 return vk::ImageViewType::e3D;
80 case SurfaceTarget::Texture1DArray:
81 return vk::ImageViewType::e1DArray;
82 case SurfaceTarget::Texture2DArray:
83 return vk::ImageViewType::e2DArray;
84 case SurfaceTarget::TextureCubemap:
85 return vk::ImageViewType::eCube;
86 case SurfaceTarget::TextureCubeArray:
87 return vk::ImageViewType::eCubeArray;
88 case SurfaceTarget::TextureBuffer:
89 break;
90 }
91 UNREACHABLE();
92 return {};
93}
94
95UniqueBuffer CreateBuffer(const VKDevice& device, const SurfaceParams& params) {
96 // TODO(Rodrigo): Move texture buffer creation to the buffer cache
97 const vk::BufferCreateInfo buffer_ci({}, params.GetHostSizeInBytes(),
98 vk::BufferUsageFlagBits::eUniformTexelBuffer |
99 vk::BufferUsageFlagBits::eTransferSrc |
100 vk::BufferUsageFlagBits::eTransferDst,
101 vk::SharingMode::eExclusive, 0, nullptr);
102 const auto dev = device.GetLogical();
103 const auto& dld = device.GetDispatchLoader();
104 return dev.createBufferUnique(buffer_ci, nullptr, dld);
105}
106
107vk::BufferViewCreateInfo GenerateBufferViewCreateInfo(const VKDevice& device,
108 const SurfaceParams& params,
109 vk::Buffer buffer) {
110 ASSERT(params.IsBuffer());
111
112 const auto format =
113 MaxwellToVK::SurfaceFormat(device, FormatType::Buffer, params.pixel_format).format;
114 return vk::BufferViewCreateInfo({}, buffer, format, 0, params.GetHostSizeInBytes());
115}
116
117vk::ImageCreateInfo GenerateImageCreateInfo(const VKDevice& device, const SurfaceParams& params) {
118 constexpr auto sample_count = vk::SampleCountFlagBits::e1;
119 constexpr auto tiling = vk::ImageTiling::eOptimal;
120
121 ASSERT(!params.IsBuffer());
122
123 const auto [format, attachable, storage] =
124 MaxwellToVK::SurfaceFormat(device, FormatType::Optimal, params.pixel_format);
125
126 auto image_usage = vk::ImageUsageFlagBits::eSampled | vk::ImageUsageFlagBits::eTransferDst |
127 vk::ImageUsageFlagBits::eTransferSrc;
128 if (attachable) {
129 image_usage |= params.IsPixelFormatZeta() ? vk::ImageUsageFlagBits::eDepthStencilAttachment
130 : vk::ImageUsageFlagBits::eColorAttachment;
131 }
132 if (storage) {
133 image_usage |= vk::ImageUsageFlagBits::eStorage;
134 }
135
136 vk::ImageCreateFlags flags;
137 vk::Extent3D extent;
138 switch (params.target) {
139 case SurfaceTarget::TextureCubemap:
140 case SurfaceTarget::TextureCubeArray:
141 flags |= vk::ImageCreateFlagBits::eCubeCompatible;
142 [[fallthrough]];
143 case SurfaceTarget::Texture1D:
144 case SurfaceTarget::Texture1DArray:
145 case SurfaceTarget::Texture2D:
146 case SurfaceTarget::Texture2DArray:
147 extent = vk::Extent3D(params.width, params.height, 1);
148 break;
149 case SurfaceTarget::Texture3D:
150 extent = vk::Extent3D(params.width, params.height, params.depth);
151 break;
152 case SurfaceTarget::TextureBuffer:
153 UNREACHABLE();
154 }
155
156 return vk::ImageCreateInfo(flags, SurfaceTargetToImage(params.target), format, extent,
157 params.num_levels, static_cast<u32>(params.GetNumLayers()),
158 sample_count, tiling, image_usage, vk::SharingMode::eExclusive, 0,
159 nullptr, vk::ImageLayout::eUndefined);
160}
161
162} // Anonymous namespace
163
164CachedSurface::CachedSurface(Core::System& system, const VKDevice& device,
165 VKResourceManager& resource_manager, VKMemoryManager& memory_manager,
166 VKScheduler& scheduler, VKStagingBufferPool& staging_pool,
167 GPUVAddr gpu_addr, const SurfaceParams& params)
168 : SurfaceBase<View>{gpu_addr, params}, system{system}, device{device},
169 resource_manager{resource_manager}, memory_manager{memory_manager}, scheduler{scheduler},
170 staging_pool{staging_pool} {
171 if (params.IsBuffer()) {
172 buffer = CreateBuffer(device, params);
173 commit = memory_manager.Commit(*buffer, false);
174
175 const auto buffer_view_ci = GenerateBufferViewCreateInfo(device, params, *buffer);
176 format = buffer_view_ci.format;
177
178 const auto dev = device.GetLogical();
179 const auto& dld = device.GetDispatchLoader();
180 buffer_view = dev.createBufferViewUnique(buffer_view_ci, nullptr, dld);
181 } else {
182 const auto image_ci = GenerateImageCreateInfo(device, params);
183 format = image_ci.format;
184
185 image.emplace(device, scheduler, image_ci, PixelFormatToImageAspect(params.pixel_format));
186 commit = memory_manager.Commit(image->GetHandle(), false);
187 }
188
189 // TODO(Rodrigo): Move this to a virtual function.
190 main_view = CreateViewInner(
191 ViewParams(params.target, 0, static_cast<u32>(params.GetNumLayers()), 0, params.num_levels),
192 true);
193}
194
195CachedSurface::~CachedSurface() = default;
196
197void CachedSurface::UploadTexture(const std::vector<u8>& staging_buffer) {
198 // To upload data we have to be outside of a renderpass
199 scheduler.RequestOutsideRenderPassOperationContext();
200
201 if (params.IsBuffer()) {
202 UploadBuffer(staging_buffer);
203 } else {
204 UploadImage(staging_buffer);
205 }
206}
207
208void CachedSurface::DownloadTexture(std::vector<u8>& staging_buffer) {
209 UNIMPLEMENTED_IF(params.IsBuffer());
210
211 if (params.pixel_format == VideoCore::Surface::PixelFormat::A1B5G5R5U) {
212 LOG_WARNING(Render_Vulkan, "A1B5G5R5 flushing is stubbed");
213 }
214
215 // We can't copy images to buffers inside a renderpass
216 scheduler.RequestOutsideRenderPassOperationContext();
217
218 FullTransition(vk::PipelineStageFlagBits::eTransfer, vk::AccessFlagBits::eTransferRead,
219 vk::ImageLayout::eTransferSrcOptimal);
220
221 const auto& buffer = staging_pool.GetUnusedBuffer(host_memory_size, true);
222 // TODO(Rodrigo): Do this in a single copy
223 for (u32 level = 0; level < params.num_levels; ++level) {
224 scheduler.Record([image = image->GetHandle(), buffer = *buffer.handle,
225 copy = GetBufferImageCopy(level)](auto cmdbuf, auto& dld) {
226 cmdbuf.copyImageToBuffer(image, vk::ImageLayout::eTransferSrcOptimal, buffer, {copy},
227 dld);
228 });
229 }
230 scheduler.Finish();
231
232 // TODO(Rodrigo): Use an intern buffer for staging buffers and avoid this unnecessary memcpy.
233 std::memcpy(staging_buffer.data(), buffer.commit->Map(host_memory_size), host_memory_size);
234}
235
236void CachedSurface::DecorateSurfaceName() {
237 // TODO(Rodrigo): Add name decorations
238}
239
240View CachedSurface::CreateView(const ViewParams& params) {
241 return CreateViewInner(params, false);
242}
243
244View CachedSurface::CreateViewInner(const ViewParams& params, bool is_proxy) {
245 // TODO(Rodrigo): Add name decorations
246 return views[params] = std::make_shared<CachedSurfaceView>(device, *this, params, is_proxy);
247}
248
249void CachedSurface::UploadBuffer(const std::vector<u8>& staging_buffer) {
250 const auto& src_buffer = staging_pool.GetUnusedBuffer(host_memory_size, true);
251 std::memcpy(src_buffer.commit->Map(host_memory_size), staging_buffer.data(), host_memory_size);
252
253 scheduler.Record([src_buffer = *src_buffer.handle, dst_buffer = *buffer,
254 size = params.GetHostSizeInBytes()](auto cmdbuf, auto& dld) {
255 const vk::BufferCopy copy(0, 0, size);
256 cmdbuf.copyBuffer(src_buffer, dst_buffer, {copy}, dld);
257
258 cmdbuf.pipelineBarrier(
259 vk::PipelineStageFlagBits::eTransfer, vk::PipelineStageFlagBits::eVertexShader, {}, {},
260 {vk::BufferMemoryBarrier(vk::AccessFlagBits::eTransferWrite,
261 vk::AccessFlagBits::eShaderRead, 0, 0, dst_buffer, 0, size)},
262 {}, dld);
263 });
264}
265
266void CachedSurface::UploadImage(const std::vector<u8>& staging_buffer) {
267 const auto& src_buffer = staging_pool.GetUnusedBuffer(host_memory_size, true);
268 std::memcpy(src_buffer.commit->Map(host_memory_size), staging_buffer.data(), host_memory_size);
269
270 FullTransition(vk::PipelineStageFlagBits::eTransfer, vk::AccessFlagBits::eTransferWrite,
271 vk::ImageLayout::eTransferDstOptimal);
272
273 for (u32 level = 0; level < params.num_levels; ++level) {
274 vk::BufferImageCopy copy = GetBufferImageCopy(level);
275 const auto& dld = device.GetDispatchLoader();
276 if (image->GetAspectMask() ==
277 (vk::ImageAspectFlagBits::eDepth | vk::ImageAspectFlagBits::eStencil)) {
278 vk::BufferImageCopy depth = copy;
279 vk::BufferImageCopy stencil = copy;
280 depth.imageSubresource.aspectMask = vk::ImageAspectFlagBits::eDepth;
281 stencil.imageSubresource.aspectMask = vk::ImageAspectFlagBits::eStencil;
282 scheduler.Record([buffer = *src_buffer.handle, image = image->GetHandle(), depth,
283 stencil](auto cmdbuf, auto& dld) {
284 cmdbuf.copyBufferToImage(buffer, image, vk::ImageLayout::eTransferDstOptimal,
285 {depth, stencil}, dld);
286 });
287 } else {
288 scheduler.Record([buffer = *src_buffer.handle, image = image->GetHandle(),
289 copy](auto cmdbuf, auto& dld) {
290 cmdbuf.copyBufferToImage(buffer, image, vk::ImageLayout::eTransferDstOptimal,
291 {copy}, dld);
292 });
293 }
294 }
295}
296
297vk::BufferImageCopy CachedSurface::GetBufferImageCopy(u32 level) const {
298 const u32 vk_depth = params.target == SurfaceTarget::Texture3D ? params.GetMipDepth(level) : 1;
299 const auto compression_type = params.GetCompressionType();
300 const std::size_t mip_offset = compression_type == SurfaceCompression::Converted
301 ? params.GetConvertedMipmapOffset(level)
302 : params.GetHostMipmapLevelOffset(level);
303
304 return vk::BufferImageCopy(
305 mip_offset, 0, 0,
306 {image->GetAspectMask(), level, 0, static_cast<u32>(params.GetNumLayers())}, {0, 0, 0},
307 {params.GetMipWidth(level), params.GetMipHeight(level), vk_depth});
308}
309
310vk::ImageSubresourceRange CachedSurface::GetImageSubresourceRange() const {
311 return {image->GetAspectMask(), 0, params.num_levels, 0,
312 static_cast<u32>(params.GetNumLayers())};
313}
314
315CachedSurfaceView::CachedSurfaceView(const VKDevice& device, CachedSurface& surface,
316 const ViewParams& params, bool is_proxy)
317 : VideoCommon::ViewBase{params}, params{surface.GetSurfaceParams()},
318 image{surface.GetImageHandle()}, buffer_view{surface.GetBufferViewHandle()},
319 aspect_mask{surface.GetAspectMask()}, device{device}, surface{surface},
320 base_layer{params.base_layer}, num_layers{params.num_layers}, base_level{params.base_level},
321 num_levels{params.num_levels}, image_view_type{image ? GetImageViewType(params.target)
322 : vk::ImageViewType{}} {}
323
324CachedSurfaceView::~CachedSurfaceView() = default;
325
326vk::ImageView CachedSurfaceView::GetHandle(SwizzleSource x_source, SwizzleSource y_source,
327 SwizzleSource z_source, SwizzleSource w_source) {
328 const u32 swizzle = EncodeSwizzle(x_source, y_source, z_source, w_source);
329 if (last_image_view && last_swizzle == swizzle) {
330 return last_image_view;
331 }
332 last_swizzle = swizzle;
333
334 const auto [entry, is_cache_miss] = view_cache.try_emplace(swizzle);
335 auto& image_view = entry->second;
336 if (!is_cache_miss) {
337 return last_image_view = *image_view;
338 }
339
340 auto swizzle_x = MaxwellToVK::SwizzleSource(x_source);
341 auto swizzle_y = MaxwellToVK::SwizzleSource(y_source);
342 auto swizzle_z = MaxwellToVK::SwizzleSource(z_source);
343 auto swizzle_w = MaxwellToVK::SwizzleSource(w_source);
344
345 if (params.pixel_format == VideoCore::Surface::PixelFormat::A1B5G5R5U) {
346 // A1B5G5R5 is implemented as A1R5G5B5, we have to change the swizzle here.
347 std::swap(swizzle_x, swizzle_z);
348 }
349
350 // Games can sample depth or stencil values on textures. This is decided by the swizzle value on
351 // hardware. To emulate this on Vulkan we specify it in the aspect.
352 vk::ImageAspectFlags aspect = aspect_mask;
353 if (aspect == (vk::ImageAspectFlagBits::eDepth | vk::ImageAspectFlagBits::eStencil)) {
354 UNIMPLEMENTED_IF(x_source != SwizzleSource::R && x_source != SwizzleSource::G);
355 const bool is_first = x_source == SwizzleSource::R;
356 switch (params.pixel_format) {
357 case VideoCore::Surface::PixelFormat::Z24S8:
358 case VideoCore::Surface::PixelFormat::Z32FS8:
359 aspect = is_first ? vk::ImageAspectFlagBits::eDepth : vk::ImageAspectFlagBits::eStencil;
360 break;
361 case VideoCore::Surface::PixelFormat::S8Z24:
362 aspect = is_first ? vk::ImageAspectFlagBits::eStencil : vk::ImageAspectFlagBits::eDepth;
363 break;
364 default:
365 aspect = vk::ImageAspectFlagBits::eDepth;
366 UNIMPLEMENTED();
367 }
368
369 // Vulkan doesn't seem to understand swizzling of a depth stencil image, use identity
370 swizzle_x = vk::ComponentSwizzle::eR;
371 swizzle_y = vk::ComponentSwizzle::eG;
372 swizzle_z = vk::ComponentSwizzle::eB;
373 swizzle_w = vk::ComponentSwizzle::eA;
374 }
375
376 const vk::ImageViewCreateInfo image_view_ci(
377 {}, surface.GetImageHandle(), image_view_type, surface.GetImage().GetFormat(),
378 {swizzle_x, swizzle_y, swizzle_z, swizzle_w},
379 {aspect, base_level, num_levels, base_layer, num_layers});
380
381 const auto dev = device.GetLogical();
382 image_view = dev.createImageViewUnique(image_view_ci, nullptr, device.GetDispatchLoader());
383 return last_image_view = *image_view;
384}
385
386VKTextureCache::VKTextureCache(Core::System& system, VideoCore::RasterizerInterface& rasterizer,
387 const VKDevice& device, VKResourceManager& resource_manager,
388 VKMemoryManager& memory_manager, VKScheduler& scheduler,
389 VKStagingBufferPool& staging_pool)
390 : TextureCache(system, rasterizer), device{device}, resource_manager{resource_manager},
391 memory_manager{memory_manager}, scheduler{scheduler}, staging_pool{staging_pool} {}
392
393VKTextureCache::~VKTextureCache() = default;
394
395Surface VKTextureCache::CreateSurface(GPUVAddr gpu_addr, const SurfaceParams& params) {
396 return std::make_shared<CachedSurface>(system, device, resource_manager, memory_manager,
397 scheduler, staging_pool, gpu_addr, params);
398}
399
400void VKTextureCache::ImageCopy(Surface& src_surface, Surface& dst_surface,
401 const VideoCommon::CopyParams& copy_params) {
402 const bool src_3d = src_surface->GetSurfaceParams().target == SurfaceTarget::Texture3D;
403 const bool dst_3d = dst_surface->GetSurfaceParams().target == SurfaceTarget::Texture3D;
404 UNIMPLEMENTED_IF(src_3d);
405
406 // The texture cache handles depth in OpenGL terms, we have to handle it as subresource and
407 // dimension respectively.
408 const u32 dst_base_layer = dst_3d ? 0 : copy_params.dest_z;
409 const u32 dst_offset_z = dst_3d ? copy_params.dest_z : 0;
410
411 const u32 extent_z = dst_3d ? copy_params.depth : 1;
412 const u32 num_layers = dst_3d ? 1 : copy_params.depth;
413
414 // We can't copy inside a renderpass
415 scheduler.RequestOutsideRenderPassOperationContext();
416
417 src_surface->Transition(copy_params.source_z, copy_params.depth, copy_params.source_level, 1,
418 vk::PipelineStageFlagBits::eTransfer, vk::AccessFlagBits::eTransferRead,
419 vk::ImageLayout::eTransferSrcOptimal);
420 dst_surface->Transition(
421 dst_base_layer, num_layers, copy_params.dest_level, 1, vk::PipelineStageFlagBits::eTransfer,
422 vk::AccessFlagBits::eTransferWrite, vk::ImageLayout::eTransferDstOptimal);
423
424 const auto& dld{device.GetDispatchLoader()};
425 const vk::ImageSubresourceLayers src_subresource(
426 src_surface->GetAspectMask(), copy_params.source_level, copy_params.source_z, num_layers);
427 const vk::ImageSubresourceLayers dst_subresource(
428 dst_surface->GetAspectMask(), copy_params.dest_level, dst_base_layer, num_layers);
429 const vk::Offset3D src_offset(copy_params.source_x, copy_params.source_y, 0);
430 const vk::Offset3D dst_offset(copy_params.dest_x, copy_params.dest_y, dst_offset_z);
431 const vk::Extent3D extent(copy_params.width, copy_params.height, extent_z);
432 const vk::ImageCopy copy(src_subresource, src_offset, dst_subresource, dst_offset, extent);
433 const vk::Image src_image = src_surface->GetImageHandle();
434 const vk::Image dst_image = dst_surface->GetImageHandle();
435 scheduler.Record([src_image, dst_image, copy](auto cmdbuf, auto& dld) {
436 cmdbuf.copyImage(src_image, vk::ImageLayout::eTransferSrcOptimal, dst_image,
437 vk::ImageLayout::eTransferDstOptimal, {copy}, dld);
438 });
439}
440
441void VKTextureCache::ImageBlit(View& src_view, View& dst_view,
442 const Tegra::Engines::Fermi2D::Config& copy_config) {
443 // We can't blit inside a renderpass
444 scheduler.RequestOutsideRenderPassOperationContext();
445
446 src_view->Transition(vk::ImageLayout::eTransferSrcOptimal, vk::PipelineStageFlagBits::eTransfer,
447 vk::AccessFlagBits::eTransferRead);
448 dst_view->Transition(vk::ImageLayout::eTransferDstOptimal, vk::PipelineStageFlagBits::eTransfer,
449 vk::AccessFlagBits::eTransferWrite);
450
451 const auto& cfg = copy_config;
452 const auto src_top_left = vk::Offset3D(cfg.src_rect.left, cfg.src_rect.top, 0);
453 const auto src_bot_right = vk::Offset3D(cfg.src_rect.right, cfg.src_rect.bottom, 1);
454 const auto dst_top_left = vk::Offset3D(cfg.dst_rect.left, cfg.dst_rect.top, 0);
455 const auto dst_bot_right = vk::Offset3D(cfg.dst_rect.right, cfg.dst_rect.bottom, 1);
456 const vk::ImageBlit blit(src_view->GetImageSubresourceLayers(), {src_top_left, src_bot_right},
457 dst_view->GetImageSubresourceLayers(), {dst_top_left, dst_bot_right});
458 const bool is_linear = copy_config.filter == Tegra::Engines::Fermi2D::Filter::Linear;
459
460 const auto& dld{device.GetDispatchLoader()};
461 scheduler.Record([src_image = src_view->GetImage(), dst_image = dst_view->GetImage(), blit,
462 is_linear](auto cmdbuf, auto& dld) {
463 cmdbuf.blitImage(src_image, vk::ImageLayout::eTransferSrcOptimal, dst_image,
464 vk::ImageLayout::eTransferDstOptimal, {blit},
465 is_linear ? vk::Filter::eLinear : vk::Filter::eNearest, dld);
466 });
467}
468
469void VKTextureCache::BufferCopy(Surface& src_surface, Surface& dst_surface) {
470 // Currently unimplemented. PBO copies should be dropped and we should use a render pass to
471 // convert from color to depth and viceversa.
472 LOG_WARNING(Render_Vulkan, "Unimplemented");
473}
474
475} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.h b/src/video_core/renderer_vulkan/vk_texture_cache.h
new file mode 100644
index 000000000..d3edbe80c
--- /dev/null
+++ b/src/video_core/renderer_vulkan/vk_texture_cache.h
@@ -0,0 +1,239 @@
1// Copyright 2019 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <memory>
8#include <unordered_map>
9
10#include "common/assert.h"
11#include "common/common_types.h"
12#include "common/logging/log.h"
13#include "common/math_util.h"
14#include "video_core/gpu.h"
15#include "video_core/rasterizer_cache.h"
16#include "video_core/renderer_vulkan/declarations.h"
17#include "video_core/renderer_vulkan/vk_image.h"
18#include "video_core/renderer_vulkan/vk_memory_manager.h"
19#include "video_core/renderer_vulkan/vk_scheduler.h"
20#include "video_core/texture_cache/surface_base.h"
21#include "video_core/texture_cache/texture_cache.h"
22#include "video_core/textures/decoders.h"
23
24namespace Core {
25class System;
26}
27
28namespace VideoCore {
29class RasterizerInterface;
30}
31
32namespace Vulkan {
33
34class RasterizerVulkan;
35class VKDevice;
36class VKResourceManager;
37class VKScheduler;
38class VKStagingBufferPool;
39
40class CachedSurfaceView;
41class CachedSurface;
42
43using Surface = std::shared_ptr<CachedSurface>;
44using View = std::shared_ptr<CachedSurfaceView>;
45using TextureCacheBase = VideoCommon::TextureCache<Surface, View>;
46
47using VideoCommon::SurfaceParams;
48using VideoCommon::ViewParams;
49
50class CachedSurface final : public VideoCommon::SurfaceBase<View> {
51 friend CachedSurfaceView;
52
53public:
54 explicit CachedSurface(Core::System& system, const VKDevice& device,
55 VKResourceManager& resource_manager, VKMemoryManager& memory_manager,
56 VKScheduler& scheduler, VKStagingBufferPool& staging_pool,
57 GPUVAddr gpu_addr, const SurfaceParams& params);
58 ~CachedSurface();
59
60 void UploadTexture(const std::vector<u8>& staging_buffer) override;
61 void DownloadTexture(std::vector<u8>& staging_buffer) override;
62
63 void FullTransition(vk::PipelineStageFlags new_stage_mask, vk::AccessFlags new_access,
64 vk::ImageLayout new_layout) {
65 image->Transition(0, static_cast<u32>(params.GetNumLayers()), 0, params.num_levels,
66 new_stage_mask, new_access, new_layout);
67 }
68
69 void Transition(u32 base_layer, u32 num_layers, u32 base_level, u32 num_levels,
70 vk::PipelineStageFlags new_stage_mask, vk::AccessFlags new_access,
71 vk::ImageLayout new_layout) {
72 image->Transition(base_layer, num_layers, base_level, num_levels, new_stage_mask,
73 new_access, new_layout);
74 }
75
76 VKImage& GetImage() {
77 return *image;
78 }
79
80 const VKImage& GetImage() const {
81 return *image;
82 }
83
84 vk::Image GetImageHandle() const {
85 return image->GetHandle();
86 }
87
88 vk::ImageAspectFlags GetAspectMask() const {
89 return image->GetAspectMask();
90 }
91
92 vk::BufferView GetBufferViewHandle() const {
93 return *buffer_view;
94 }
95
96protected:
97 void DecorateSurfaceName();
98
99 View CreateView(const ViewParams& params) override;
100 View CreateViewInner(const ViewParams& params, bool is_proxy);
101
102private:
103 void UploadBuffer(const std::vector<u8>& staging_buffer);
104
105 void UploadImage(const std::vector<u8>& staging_buffer);
106
107 vk::BufferImageCopy GetBufferImageCopy(u32 level) const;
108
109 vk::ImageSubresourceRange GetImageSubresourceRange() const;
110
111 Core::System& system;
112 const VKDevice& device;
113 VKResourceManager& resource_manager;
114 VKMemoryManager& memory_manager;
115 VKScheduler& scheduler;
116 VKStagingBufferPool& staging_pool;
117
118 std::optional<VKImage> image;
119 UniqueBuffer buffer;
120 UniqueBufferView buffer_view;
121 VKMemoryCommit commit;
122
123 vk::Format format;
124};
125
126class CachedSurfaceView final : public VideoCommon::ViewBase {
127public:
128 explicit CachedSurfaceView(const VKDevice& device, CachedSurface& surface,
129 const ViewParams& params, bool is_proxy);
130 ~CachedSurfaceView();
131
132 vk::ImageView GetHandle(Tegra::Texture::SwizzleSource x_source,
133 Tegra::Texture::SwizzleSource y_source,
134 Tegra::Texture::SwizzleSource z_source,
135 Tegra::Texture::SwizzleSource w_source);
136
137 bool IsSameSurface(const CachedSurfaceView& rhs) const {
138 return &surface == &rhs.surface;
139 }
140
141 vk::ImageView GetHandle() {
142 return GetHandle(Tegra::Texture::SwizzleSource::R, Tegra::Texture::SwizzleSource::G,
143 Tegra::Texture::SwizzleSource::B, Tegra::Texture::SwizzleSource::A);
144 }
145
146 u32 GetWidth() const {
147 return params.GetMipWidth(base_level);
148 }
149
150 u32 GetHeight() const {
151 return params.GetMipHeight(base_level);
152 }
153
154 bool IsBufferView() const {
155 return buffer_view;
156 }
157
158 vk::Image GetImage() const {
159 return image;
160 }
161
162 vk::BufferView GetBufferView() const {
163 return buffer_view;
164 }
165
166 vk::ImageSubresourceRange GetImageSubresourceRange() const {
167 return {aspect_mask, base_level, num_levels, base_layer, num_layers};
168 }
169
170 vk::ImageSubresourceLayers GetImageSubresourceLayers() const {
171 return {surface.GetAspectMask(), base_level, base_layer, num_layers};
172 }
173
174 void Transition(vk::ImageLayout new_layout, vk::PipelineStageFlags new_stage_mask,
175 vk::AccessFlags new_access) const {
176 surface.Transition(base_layer, num_layers, base_level, num_levels, new_stage_mask,
177 new_access, new_layout);
178 }
179
180 void MarkAsModified(u64 tick) {
181 surface.MarkAsModified(true, tick);
182 }
183
184private:
185 static u32 EncodeSwizzle(Tegra::Texture::SwizzleSource x_source,
186 Tegra::Texture::SwizzleSource y_source,
187 Tegra::Texture::SwizzleSource z_source,
188 Tegra::Texture::SwizzleSource w_source) {
189 return (static_cast<u32>(x_source) << 24) | (static_cast<u32>(y_source) << 16) |
190 (static_cast<u32>(z_source) << 8) | static_cast<u32>(w_source);
191 }
192
193 // Store a copy of these values to avoid double dereference when reading them
194 const SurfaceParams params;
195 const vk::Image image;
196 const vk::BufferView buffer_view;
197 const vk::ImageAspectFlags aspect_mask;
198
199 const VKDevice& device;
200 CachedSurface& surface;
201 const u32 base_layer;
202 const u32 num_layers;
203 const u32 base_level;
204 const u32 num_levels;
205 const vk::ImageViewType image_view_type;
206
207 vk::ImageView last_image_view;
208 u32 last_swizzle{};
209
210 std::unordered_map<u32, UniqueImageView> view_cache;
211};
212
213class VKTextureCache final : public TextureCacheBase {
214public:
215 explicit VKTextureCache(Core::System& system, VideoCore::RasterizerInterface& rasterizer,
216 const VKDevice& device, VKResourceManager& resource_manager,
217 VKMemoryManager& memory_manager, VKScheduler& scheduler,
218 VKStagingBufferPool& staging_pool);
219 ~VKTextureCache();
220
221private:
222 Surface CreateSurface(GPUVAddr gpu_addr, const SurfaceParams& params) override;
223
224 void ImageCopy(Surface& src_surface, Surface& dst_surface,
225 const VideoCommon::CopyParams& copy_params) override;
226
227 void ImageBlit(View& src_view, View& dst_view,
228 const Tegra::Engines::Fermi2D::Config& copy_config) override;
229
230 void BufferCopy(Surface& src_surface, Surface& dst_surface) override;
231
232 const VKDevice& device;
233 VKResourceManager& resource_manager;
234 VKMemoryManager& memory_manager;
235 VKScheduler& scheduler;
236 VKStagingBufferPool& staging_pool;
237};
238
239} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_update_descriptor.cpp b/src/video_core/renderer_vulkan/vk_update_descriptor.cpp
new file mode 100644
index 000000000..0e577b9ff
--- /dev/null
+++ b/src/video_core/renderer_vulkan/vk_update_descriptor.cpp
@@ -0,0 +1,57 @@
1// Copyright 2019 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <variant>
6#include <boost/container/static_vector.hpp>
7
8#include "common/assert.h"
9#include "common/logging/log.h"
10#include "video_core/renderer_vulkan/declarations.h"
11#include "video_core/renderer_vulkan/vk_device.h"
12#include "video_core/renderer_vulkan/vk_scheduler.h"
13#include "video_core/renderer_vulkan/vk_update_descriptor.h"
14
15namespace Vulkan {
16
17VKUpdateDescriptorQueue::VKUpdateDescriptorQueue(const VKDevice& device, VKScheduler& scheduler)
18 : device{device}, scheduler{scheduler} {}
19
20VKUpdateDescriptorQueue::~VKUpdateDescriptorQueue() = default;
21
22void VKUpdateDescriptorQueue::TickFrame() {
23 payload.clear();
24}
25
26void VKUpdateDescriptorQueue::Acquire() {
27 entries.clear();
28}
29
30void VKUpdateDescriptorQueue::Send(vk::DescriptorUpdateTemplate update_template,
31 vk::DescriptorSet set) {
32 if (payload.size() + entries.size() >= payload.max_size()) {
33 LOG_WARNING(Render_Vulkan, "Payload overflow, waiting for worker thread");
34 scheduler.WaitWorker();
35 payload.clear();
36 }
37
38 const auto payload_start = payload.data() + payload.size();
39 for (const auto& entry : entries) {
40 if (const auto image = std::get_if<vk::DescriptorImageInfo>(&entry)) {
41 payload.push_back(*image);
42 } else if (const auto buffer = std::get_if<Buffer>(&entry)) {
43 payload.emplace_back(*buffer->buffer, buffer->offset, buffer->size);
44 } else if (const auto texel = std::get_if<vk::BufferView>(&entry)) {
45 payload.push_back(*texel);
46 } else {
47 UNREACHABLE();
48 }
49 }
50
51 scheduler.Record([dev = device.GetLogical(), payload_start, set,
52 update_template]([[maybe_unused]] auto cmdbuf, auto& dld) {
53 dev.updateDescriptorSetWithTemplate(set, update_template, payload_start, dld);
54 });
55}
56
57} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_update_descriptor.h b/src/video_core/renderer_vulkan/vk_update_descriptor.h
new file mode 100644
index 000000000..8c825aa29
--- /dev/null
+++ b/src/video_core/renderer_vulkan/vk_update_descriptor.h
@@ -0,0 +1,86 @@
1// Copyright 2019 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <type_traits>
8#include <variant>
9#include <boost/container/static_vector.hpp>
10
11#include "common/common_types.h"
12#include "video_core/renderer_vulkan/declarations.h"
13
14namespace Vulkan {
15
16class VKDevice;
17class VKScheduler;
18
19class DescriptorUpdateEntry {
20public:
21 explicit DescriptorUpdateEntry() : image{} {}
22
23 DescriptorUpdateEntry(vk::DescriptorImageInfo image) : image{image} {}
24
25 DescriptorUpdateEntry(vk::Buffer buffer, vk::DeviceSize offset, vk::DeviceSize size)
26 : buffer{buffer, offset, size} {}
27
28 DescriptorUpdateEntry(vk::BufferView texel_buffer) : texel_buffer{texel_buffer} {}
29
30private:
31 union {
32 vk::DescriptorImageInfo image;
33 vk::DescriptorBufferInfo buffer;
34 vk::BufferView texel_buffer;
35 };
36};
37
38class VKUpdateDescriptorQueue final {
39public:
40 explicit VKUpdateDescriptorQueue(const VKDevice& device, VKScheduler& scheduler);
41 ~VKUpdateDescriptorQueue();
42
43 void TickFrame();
44
45 void Acquire();
46
47 void Send(vk::DescriptorUpdateTemplate update_template, vk::DescriptorSet set);
48
49 void AddSampledImage(vk::Sampler sampler, vk::ImageView image_view) {
50 entries.emplace_back(vk::DescriptorImageInfo{sampler, image_view, {}});
51 }
52
53 void AddImage(vk::ImageView image_view) {
54 entries.emplace_back(vk::DescriptorImageInfo{{}, image_view, {}});
55 }
56
57 void AddBuffer(const vk::Buffer* buffer, u64 offset, std::size_t size) {
58 entries.push_back(Buffer{buffer, offset, size});
59 }
60
61 void AddTexelBuffer(vk::BufferView texel_buffer) {
62 entries.emplace_back(texel_buffer);
63 }
64
65 vk::ImageLayout* GetLastImageLayout() {
66 return &std::get<vk::DescriptorImageInfo>(entries.back()).imageLayout;
67 }
68
69private:
70 struct Buffer {
71 const vk::Buffer* buffer{};
72 u64 offset{};
73 std::size_t size{};
74 };
75 using Variant = std::variant<vk::DescriptorImageInfo, Buffer, vk::BufferView>;
76 // Old gcc versions don't consider this trivially copyable.
77 // static_assert(std::is_trivially_copyable_v<Variant>);
78
79 const VKDevice& device;
80 VKScheduler& scheduler;
81
82 boost::container::static_vector<Variant, 0x400> entries;
83 boost::container::static_vector<DescriptorUpdateEntry, 0x10000> payload;
84};
85
86} // namespace Vulkan
diff --git a/src/video_core/shader/control_flow.cpp b/src/video_core/shader/control_flow.cpp
index b427ac873..0229733b6 100644
--- a/src/video_core/shader/control_flow.cpp
+++ b/src/video_core/shader/control_flow.cpp
@@ -65,7 +65,7 @@ struct BlockInfo {
65 65
66struct CFGRebuildState { 66struct CFGRebuildState {
67 explicit CFGRebuildState(const ProgramCode& program_code, u32 start, ConstBufferLocker& locker) 67 explicit CFGRebuildState(const ProgramCode& program_code, u32 start, ConstBufferLocker& locker)
68 : program_code{program_code}, start{start}, locker{locker} {} 68 : program_code{program_code}, locker{locker}, start{start} {}
69 69
70 const ProgramCode& program_code; 70 const ProgramCode& program_code;
71 ConstBufferLocker& locker; 71 ConstBufferLocker& locker;
diff --git a/src/video_core/shader/decode/memory.cpp b/src/video_core/shader/decode/memory.cpp
index c934d0719..7591a715f 100644
--- a/src/video_core/shader/decode/memory.cpp
+++ b/src/video_core/shader/decode/memory.cpp
@@ -6,6 +6,7 @@
6#include <vector> 6#include <vector>
7#include <fmt/format.h> 7#include <fmt/format.h>
8 8
9#include "common/alignment.h"
9#include "common/assert.h" 10#include "common/assert.h"
10#include "common/common_types.h" 11#include "common/common_types.h"
11#include "common/logging/log.h" 12#include "common/logging/log.h"
@@ -15,6 +16,8 @@
15 16
16namespace VideoCommon::Shader { 17namespace VideoCommon::Shader {
17 18
19using Tegra::Shader::AtomicOp;
20using Tegra::Shader::AtomicType;
18using Tegra::Shader::Attribute; 21using Tegra::Shader::Attribute;
19using Tegra::Shader::Instruction; 22using Tegra::Shader::Instruction;
20using Tegra::Shader::OpCode; 23using Tegra::Shader::OpCode;
@@ -22,34 +25,39 @@ using Tegra::Shader::Register;
22 25
23namespace { 26namespace {
24 27
25u32 GetLdgMemorySize(Tegra::Shader::UniformType uniform_type) { 28bool IsUnaligned(Tegra::Shader::UniformType uniform_type) {
29 return uniform_type == Tegra::Shader::UniformType::UnsignedByte ||
30 uniform_type == Tegra::Shader::UniformType::UnsignedShort;
31}
32
33u32 GetUnalignedMask(Tegra::Shader::UniformType uniform_type) {
26 switch (uniform_type) { 34 switch (uniform_type) {
27 case Tegra::Shader::UniformType::UnsignedByte: 35 case Tegra::Shader::UniformType::UnsignedByte:
28 case Tegra::Shader::UniformType::Single: 36 return 0b11;
29 return 1; 37 case Tegra::Shader::UniformType::UnsignedShort:
30 case Tegra::Shader::UniformType::Double: 38 return 0b10;
31 return 2;
32 case Tegra::Shader::UniformType::Quad:
33 case Tegra::Shader::UniformType::UnsignedQuad:
34 return 4;
35 default: 39 default:
36 UNIMPLEMENTED_MSG("Unimplemented size={}!", static_cast<u32>(uniform_type)); 40 UNREACHABLE();
37 return 1; 41 return 0;
38 } 42 }
39} 43}
40 44
41u32 GetStgMemorySize(Tegra::Shader::UniformType uniform_type) { 45u32 GetMemorySize(Tegra::Shader::UniformType uniform_type) {
42 switch (uniform_type) { 46 switch (uniform_type) {
47 case Tegra::Shader::UniformType::UnsignedByte:
48 return 8;
49 case Tegra::Shader::UniformType::UnsignedShort:
50 return 16;
43 case Tegra::Shader::UniformType::Single: 51 case Tegra::Shader::UniformType::Single:
44 return 1; 52 return 32;
45 case Tegra::Shader::UniformType::Double: 53 case Tegra::Shader::UniformType::Double:
46 return 2; 54 return 64;
47 case Tegra::Shader::UniformType::Quad: 55 case Tegra::Shader::UniformType::Quad:
48 case Tegra::Shader::UniformType::UnsignedQuad: 56 case Tegra::Shader::UniformType::UnsignedQuad:
49 return 4; 57 return 128;
50 default: 58 default:
51 UNIMPLEMENTED_MSG("Unimplemented size={}!", static_cast<u32>(uniform_type)); 59 UNIMPLEMENTED_MSG("Unimplemented size={}!", static_cast<u32>(uniform_type));
52 return 1; 60 return 32;
53 } 61 }
54} 62}
55 63
@@ -184,9 +192,10 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) {
184 }(); 192 }();
185 193
186 const auto [real_address_base, base_address, descriptor] = 194 const auto [real_address_base, base_address, descriptor] =
187 TrackGlobalMemory(bb, instr, false); 195 TrackGlobalMemory(bb, instr, true, false);
188 196
189 const u32 count = GetLdgMemorySize(type); 197 const u32 size = GetMemorySize(type);
198 const u32 count = Common::AlignUp(size, 32) / 32;
190 if (!real_address_base || !base_address) { 199 if (!real_address_base || !base_address) {
191 // Tracking failed, load zeroes. 200 // Tracking failed, load zeroes.
192 for (u32 i = 0; i < count; ++i) { 201 for (u32 i = 0; i < count; ++i) {
@@ -200,14 +209,15 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) {
200 const Node real_address = Operation(OperationCode::UAdd, real_address_base, it_offset); 209 const Node real_address = Operation(OperationCode::UAdd, real_address_base, it_offset);
201 Node gmem = MakeNode<GmemNode>(real_address, base_address, descriptor); 210 Node gmem = MakeNode<GmemNode>(real_address, base_address, descriptor);
202 211
203 if (type == Tegra::Shader::UniformType::UnsignedByte) { 212 // To handle unaligned loads get the bytes used to dereference global memory and extract
204 // To handle unaligned loads get the byte used to dereferenced global memory 213 // those bytes from the loaded u32.
205 // and extract that byte from the loaded uint32. 214 if (IsUnaligned(type)) {
206 Node byte = Operation(OperationCode::UBitwiseAnd, real_address, Immediate(3)); 215 Node mask = Immediate(GetUnalignedMask(type));
207 byte = Operation(OperationCode::ULogicalShiftLeft, std::move(byte), Immediate(3)); 216 Node offset = Operation(OperationCode::UBitwiseAnd, real_address, std::move(mask));
217 offset = Operation(OperationCode::ULogicalShiftLeft, offset, Immediate(3));
208 218
209 gmem = Operation(OperationCode::UBitfieldExtract, std::move(gmem), std::move(byte), 219 gmem = Operation(OperationCode::UBitfieldExtract, std::move(gmem),
210 Immediate(8)); 220 std::move(offset), Immediate(size));
211 } 221 }
212 222
213 SetTemporary(bb, i, gmem); 223 SetTemporary(bb, i, gmem);
@@ -295,23 +305,53 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) {
295 } 305 }
296 }(); 306 }();
297 307
308 // For unaligned reads we have to read memory too.
309 const bool is_read = IsUnaligned(type);
298 const auto [real_address_base, base_address, descriptor] = 310 const auto [real_address_base, base_address, descriptor] =
299 TrackGlobalMemory(bb, instr, true); 311 TrackGlobalMemory(bb, instr, is_read, true);
300 if (!real_address_base || !base_address) { 312 if (!real_address_base || !base_address) {
301 // Tracking failed, skip the store. 313 // Tracking failed, skip the store.
302 break; 314 break;
303 } 315 }
304 316
305 const u32 count = GetStgMemorySize(type); 317 const u32 size = GetMemorySize(type);
318 const u32 count = Common::AlignUp(size, 32) / 32;
306 for (u32 i = 0; i < count; ++i) { 319 for (u32 i = 0; i < count; ++i) {
307 const Node it_offset = Immediate(i * 4); 320 const Node it_offset = Immediate(i * 4);
308 const Node real_address = Operation(OperationCode::UAdd, real_address_base, it_offset); 321 const Node real_address = Operation(OperationCode::UAdd, real_address_base, it_offset);
309 const Node gmem = MakeNode<GmemNode>(real_address, base_address, descriptor); 322 const Node gmem = MakeNode<GmemNode>(real_address, base_address, descriptor);
310 const Node value = GetRegister(instr.gpr0.Value() + i); 323 Node value = GetRegister(instr.gpr0.Value() + i);
324
325 if (IsUnaligned(type)) {
326 Node mask = Immediate(GetUnalignedMask(type));
327 Node offset = Operation(OperationCode::UBitwiseAnd, real_address, std::move(mask));
328 offset = Operation(OperationCode::ULogicalShiftLeft, offset, Immediate(3));
329
330 value = Operation(OperationCode::UBitfieldInsert, gmem, std::move(value), offset,
331 Immediate(size));
332 }
333
311 bb.push_back(Operation(OperationCode::Assign, gmem, value)); 334 bb.push_back(Operation(OperationCode::Assign, gmem, value));
312 } 335 }
313 break; 336 break;
314 } 337 }
338 case OpCode::Id::ATOMS: {
339 UNIMPLEMENTED_IF_MSG(instr.atoms.operation != AtomicOp::Add, "operation={}",
340 static_cast<int>(instr.atoms.operation.Value()));
341 UNIMPLEMENTED_IF_MSG(instr.atoms.type != AtomicType::U32, "type={}",
342 static_cast<int>(instr.atoms.type.Value()));
343
344 const s32 offset = instr.atoms.GetImmediateOffset();
345 Node address = GetRegister(instr.gpr8);
346 address = Operation(OperationCode::IAdd, std::move(address), Immediate(offset));
347
348 Node memory = GetSharedMemory(std::move(address));
349 Node data = GetRegister(instr.gpr20);
350
351 Node value = Operation(OperationCode::UAtomicAdd, std::move(memory), std::move(data));
352 SetRegister(bb, instr.gpr0, std::move(value));
353 break;
354 }
315 case OpCode::Id::AL2P: { 355 case OpCode::Id::AL2P: {
316 // Ignore al2p.direction since we don't care about it. 356 // Ignore al2p.direction since we don't care about it.
317 357
@@ -336,7 +376,7 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) {
336 376
337std::tuple<Node, Node, GlobalMemoryBase> ShaderIR::TrackGlobalMemory(NodeBlock& bb, 377std::tuple<Node, Node, GlobalMemoryBase> ShaderIR::TrackGlobalMemory(NodeBlock& bb,
338 Instruction instr, 378 Instruction instr,
339 bool is_write) { 379 bool is_read, bool is_write) {
340 const auto addr_register{GetRegister(instr.gmem.gpr)}; 380 const auto addr_register{GetRegister(instr.gmem.gpr)};
341 const auto immediate_offset{static_cast<u32>(instr.gmem.offset)}; 381 const auto immediate_offset{static_cast<u32>(instr.gmem.offset)};
342 382
@@ -351,11 +391,8 @@ std::tuple<Node, Node, GlobalMemoryBase> ShaderIR::TrackGlobalMemory(NodeBlock&
351 const GlobalMemoryBase descriptor{index, offset}; 391 const GlobalMemoryBase descriptor{index, offset};
352 const auto& [entry, is_new] = used_global_memory.try_emplace(descriptor); 392 const auto& [entry, is_new] = used_global_memory.try_emplace(descriptor);
353 auto& usage = entry->second; 393 auto& usage = entry->second;
354 if (is_write) { 394 usage.is_written |= is_write;
355 usage.is_written = true; 395 usage.is_read |= is_read;
356 } else {
357 usage.is_read = true;
358 }
359 396
360 const auto real_address = 397 const auto real_address =
361 Operation(OperationCode::UAdd, NO_PRECISE, Immediate(immediate_offset), addr_register); 398 Operation(OperationCode::UAdd, NO_PRECISE, Immediate(immediate_offset), addr_register);
diff --git a/src/video_core/shader/decode/texture.cpp b/src/video_core/shader/decode/texture.cpp
index 4b14cdf58..cd984f763 100644
--- a/src/video_core/shader/decode/texture.cpp
+++ b/src/video_core/shader/decode/texture.cpp
@@ -794,14 +794,10 @@ std::tuple<std::size_t, std::size_t> ShaderIR::ValidateAndGetCoordinateElement(
794 794
795std::vector<Node> ShaderIR::GetAoffiCoordinates(Node aoffi_reg, std::size_t coord_count, 795std::vector<Node> ShaderIR::GetAoffiCoordinates(Node aoffi_reg, std::size_t coord_count,
796 bool is_tld4) { 796 bool is_tld4) {
797 const auto [coord_offsets, size, wrap_value, 797 const std::array coord_offsets = is_tld4 ? std::array{0U, 8U, 16U} : std::array{0U, 4U, 8U};
798 diff_value] = [is_tld4]() -> std::tuple<std::array<u32, 3>, u32, s32, s32> { 798 const u32 size = is_tld4 ? 6 : 4;
799 if (is_tld4) { 799 const s32 wrap_value = is_tld4 ? 32 : 8;
800 return {{0, 8, 16}, 6, 32, 64}; 800 const s32 diff_value = is_tld4 ? 64 : 16;
801 } else {
802 return {{0, 4, 8}, 4, 8, 16};
803 }
804 }();
805 const u32 mask = (1U << size) - 1; 801 const u32 mask = (1U << size) - 1;
806 802
807 std::vector<Node> aoffi; 803 std::vector<Node> aoffi;
@@ -814,7 +810,7 @@ std::vector<Node> ShaderIR::GetAoffiCoordinates(Node aoffi_reg, std::size_t coor
814 LOG_WARNING(HW_GPU, 810 LOG_WARNING(HW_GPU,
815 "AOFFI constant folding failed, some hardware might have graphical issues"); 811 "AOFFI constant folding failed, some hardware might have graphical issues");
816 for (std::size_t coord = 0; coord < coord_count; ++coord) { 812 for (std::size_t coord = 0; coord < coord_count; ++coord) {
817 const Node value = BitfieldExtract(aoffi_reg, coord_offsets.at(coord), size); 813 const Node value = BitfieldExtract(aoffi_reg, coord_offsets[coord], size);
818 const Node condition = 814 const Node condition =
819 Operation(OperationCode::LogicalIGreaterEqual, value, Immediate(wrap_value)); 815 Operation(OperationCode::LogicalIGreaterEqual, value, Immediate(wrap_value));
820 const Node negative = Operation(OperationCode::IAdd, value, Immediate(-diff_value)); 816 const Node negative = Operation(OperationCode::IAdd, value, Immediate(-diff_value));
@@ -824,7 +820,7 @@ std::vector<Node> ShaderIR::GetAoffiCoordinates(Node aoffi_reg, std::size_t coor
824 } 820 }
825 821
826 for (std::size_t coord = 0; coord < coord_count; ++coord) { 822 for (std::size_t coord = 0; coord < coord_count; ++coord) {
827 s32 value = (*aoffi_immediate >> coord_offsets.at(coord)) & mask; 823 s32 value = (*aoffi_immediate >> coord_offsets[coord]) & mask;
828 if (value >= wrap_value) { 824 if (value >= wrap_value) {
829 value -= diff_value; 825 value -= diff_value;
830 } 826 }
diff --git a/src/video_core/shader/node.h b/src/video_core/shader/node.h
index 4d2f4d6a8..075c7d07c 100644
--- a/src/video_core/shader/node.h
+++ b/src/video_core/shader/node.h
@@ -162,6 +162,8 @@ enum class OperationCode {
162 AtomicImageXor, /// (MetaImage, int[N] coords) -> void 162 AtomicImageXor, /// (MetaImage, int[N] coords) -> void
163 AtomicImageExchange, /// (MetaImage, int[N] coords) -> void 163 AtomicImageExchange, /// (MetaImage, int[N] coords) -> void
164 164
165 UAtomicAdd, /// (smem, uint) -> uint
166
165 Branch, /// (uint branch_target) -> void 167 Branch, /// (uint branch_target) -> void
166 BranchIndirect, /// (uint branch_target) -> void 168 BranchIndirect, /// (uint branch_target) -> void
167 PushFlowStack, /// (uint branch_target) -> void 169 PushFlowStack, /// (uint branch_target) -> void
@@ -392,8 +394,30 @@ struct MetaImage {
392using Meta = 394using Meta =
393 std::variant<MetaArithmetic, MetaTexture, MetaImage, MetaStackClass, Tegra::Shader::HalfType>; 395 std::variant<MetaArithmetic, MetaTexture, MetaImage, MetaStackClass, Tegra::Shader::HalfType>;
394 396
397class AmendNode {
398public:
399 std::optional<std::size_t> GetAmendIndex() const {
400 if (amend_index == amend_null_index) {
401 return std::nullopt;
402 }
403 return {amend_index};
404 }
405
406 void SetAmendIndex(std::size_t index) {
407 amend_index = index;
408 }
409
410 void ClearAmend() {
411 amend_index = amend_null_index;
412 }
413
414private:
415 static constexpr std::size_t amend_null_index = 0xFFFFFFFFFFFFFFFFULL;
416 std::size_t amend_index{amend_null_index};
417};
418
395/// Holds any kind of operation that can be done in the IR 419/// Holds any kind of operation that can be done in the IR
396class OperationNode final { 420class OperationNode final : public AmendNode {
397public: 421public:
398 explicit OperationNode(OperationCode code) : OperationNode(code, Meta{}) {} 422 explicit OperationNode(OperationCode code) : OperationNode(code, Meta{}) {}
399 423
@@ -433,7 +457,7 @@ private:
433}; 457};
434 458
435/// Encloses inside any kind of node that returns a boolean conditionally-executed code 459/// Encloses inside any kind of node that returns a boolean conditionally-executed code
436class ConditionalNode final { 460class ConditionalNode final : public AmendNode {
437public: 461public:
438 explicit ConditionalNode(Node condition, std::vector<Node>&& code) 462 explicit ConditionalNode(Node condition, std::vector<Node>&& code)
439 : condition{std::move(condition)}, code{std::move(code)} {} 463 : condition{std::move(condition)}, code{std::move(code)} {}
diff --git a/src/video_core/shader/shader_ir.cpp b/src/video_core/shader/shader_ir.cpp
index 1d9825c76..31eecb3f4 100644
--- a/src/video_core/shader/shader_ir.cpp
+++ b/src/video_core/shader/shader_ir.cpp
@@ -446,4 +446,10 @@ Node ShaderIR::BitfieldInsert(Node base, Node insert, u32 offset, u32 bits) {
446 Immediate(bits)); 446 Immediate(bits));
447} 447}
448 448
449std::size_t ShaderIR::DeclareAmend(Node new_amend) {
450 const std::size_t id = amend_code.size();
451 amend_code.push_back(new_amend);
452 return id;
453}
454
449} // namespace VideoCommon::Shader 455} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/shader_ir.h b/src/video_core/shader/shader_ir.h
index baed06ccd..ba1db4c11 100644
--- a/src/video_core/shader/shader_ir.h
+++ b/src/video_core/shader/shader_ir.h
@@ -176,6 +176,10 @@ public:
176 /// Returns a condition code evaluated from internal flags 176 /// Returns a condition code evaluated from internal flags
177 Node GetConditionCode(Tegra::Shader::ConditionCode cc) const; 177 Node GetConditionCode(Tegra::Shader::ConditionCode cc) const;
178 178
179 const Node& GetAmendNode(std::size_t index) const {
180 return amend_code[index];
181 }
182
179private: 183private:
180 friend class ASTDecoder; 184 friend class ASTDecoder;
181 185
@@ -390,7 +394,10 @@ private:
390 394
391 std::tuple<Node, Node, GlobalMemoryBase> TrackGlobalMemory(NodeBlock& bb, 395 std::tuple<Node, Node, GlobalMemoryBase> TrackGlobalMemory(NodeBlock& bb,
392 Tegra::Shader::Instruction instr, 396 Tegra::Shader::Instruction instr,
393 bool is_write); 397 bool is_read, bool is_write);
398
399 /// Register new amending code and obtain the reference id.
400 std::size_t DeclareAmend(Node new_amend);
394 401
395 const ProgramCode& program_code; 402 const ProgramCode& program_code;
396 const u32 main_offset; 403 const u32 main_offset;
@@ -406,6 +413,7 @@ private:
406 std::map<u32, NodeBlock> basic_blocks; 413 std::map<u32, NodeBlock> basic_blocks;
407 NodeBlock global_code; 414 NodeBlock global_code;
408 ASTManager program_manager{true, true}; 415 ASTManager program_manager{true, true};
416 std::vector<Node> amend_code;
409 417
410 std::set<u32> used_registers; 418 std::set<u32> used_registers;
411 std::set<Tegra::Shader::Pred> used_predicates; 419 std::set<Tegra::Shader::Pred> used_predicates;
diff --git a/src/video_core/texture_cache/format_lookup_table.cpp b/src/video_core/texture_cache/format_lookup_table.cpp
index 271e67533..81fb9f633 100644
--- a/src/video_core/texture_cache/format_lookup_table.cpp
+++ b/src/video_core/texture_cache/format_lookup_table.cpp
@@ -95,7 +95,7 @@ constexpr std::array<Table, 74> DefinitionTable = {{
95 {TextureFormat::ZF32, C, FLOAT, FLOAT, FLOAT, FLOAT, PixelFormat::Z32F}, 95 {TextureFormat::ZF32, C, FLOAT, FLOAT, FLOAT, FLOAT, PixelFormat::Z32F},
96 {TextureFormat::Z16, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::Z16}, 96 {TextureFormat::Z16, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::Z16},
97 {TextureFormat::S8Z24, C, UINT, UNORM, UNORM, UNORM, PixelFormat::S8Z24}, 97 {TextureFormat::S8Z24, C, UINT, UNORM, UNORM, UNORM, PixelFormat::S8Z24},
98 {TextureFormat::ZF32_X24S8, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::Z32FS8}, 98 {TextureFormat::ZF32_X24S8, C, FLOAT, UINT, UNORM, UNORM, PixelFormat::Z32FS8},
99 99
100 {TextureFormat::DXT1, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::DXT1}, 100 {TextureFormat::DXT1, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::DXT1},
101 {TextureFormat::DXT1, S, UNORM, UNORM, UNORM, UNORM, PixelFormat::DXT1_SRGB}, 101 {TextureFormat::DXT1, S, UNORM, UNORM, UNORM, UNORM, PixelFormat::DXT1_SRGB},
diff --git a/src/video_core/texture_cache/surface_params.h b/src/video_core/texture_cache/surface_params.h
index 992b5c022..9256fd6d9 100644
--- a/src/video_core/texture_cache/surface_params.h
+++ b/src/video_core/texture_cache/surface_params.h
@@ -209,6 +209,11 @@ public:
209 return target == VideoCore::Surface::SurfaceTarget::TextureBuffer; 209 return target == VideoCore::Surface::SurfaceTarget::TextureBuffer;
210 } 210 }
211 211
212 /// Returns the number of layers in the surface.
213 std::size_t GetNumLayers() const {
214 return is_layered ? depth : 1;
215 }
216
212 /// Returns the debug name of the texture for use in graphic debuggers. 217 /// Returns the debug name of the texture for use in graphic debuggers.
213 std::string TargetName() const; 218 std::string TargetName() const;
214 219
@@ -287,10 +292,6 @@ private:
287 /// Returns the size of a layer 292 /// Returns the size of a layer
288 std::size_t GetLayerSize(bool as_host_size, bool uncompressed) const; 293 std::size_t GetLayerSize(bool as_host_size, bool uncompressed) const;
289 294
290 std::size_t GetNumLayers() const {
291 return is_layered ? depth : 1;
292 }
293
294 /// Returns true if these parameters are from a layered surface. 295 /// Returns true if these parameters are from a layered surface.
295 bool IsLayered() const; 296 bool IsLayered() const;
296}; 297};
diff --git a/src/yuzu/bootmanager.cpp b/src/yuzu/bootmanager.cpp
index 07a720494..7490fb718 100644
--- a/src/yuzu/bootmanager.cpp
+++ b/src/yuzu/bootmanager.cpp
@@ -215,18 +215,11 @@ void GRenderWindow::moveContext() {
215} 215}
216 216
217void GRenderWindow::SwapBuffers() { 217void GRenderWindow::SwapBuffers() {
218 // In our multi-threaded QWidget use case we shouldn't need to call `makeCurrent`,
219 // since we never call `doneCurrent` in this thread.
220 // However:
221 // - The Qt debug runtime prints a bogus warning on the console if `makeCurrent` wasn't called
222 // since the last time `swapBuffers` was executed;
223 // - On macOS, if `makeCurrent` isn't called explicitly, resizing the buffer breaks.
224 context->makeCurrent(child);
225
226 context->swapBuffers(child); 218 context->swapBuffers(child);
219
227 if (!first_frame) { 220 if (!first_frame) {
228 emit FirstFrameDisplayed();
229 first_frame = true; 221 first_frame = true;
222 emit FirstFrameDisplayed();
230 } 223 }
231} 224}
232 225
diff --git a/src/yuzu/configuration/configure_gamelist.cpp b/src/yuzu/configuration/configure_gamelist.cpp
index daedbc33e..e43e84d39 100644
--- a/src/yuzu/configuration/configure_gamelist.cpp
+++ b/src/yuzu/configuration/configure_gamelist.cpp
@@ -21,10 +21,8 @@ constexpr std::array default_icon_sizes{
21}; 21};
22 22
23constexpr std::array row_text_names{ 23constexpr std::array row_text_names{
24 QT_TR_NOOP("Filename"), 24 QT_TR_NOOP("Filename"), QT_TR_NOOP("Filetype"), QT_TR_NOOP("Title ID"),
25 QT_TR_NOOP("Filetype"), 25 QT_TR_NOOP("Title Name"), QT_TR_NOOP("None"),
26 QT_TR_NOOP("Title ID"),
27 QT_TR_NOOP("Title Name"),
28}; 26};
29} // Anonymous namespace 27} // Anonymous namespace
30 28
@@ -46,6 +44,12 @@ ConfigureGameList::ConfigureGameList(QWidget* parent)
46 &ConfigureGameList::RequestGameListUpdate); 44 &ConfigureGameList::RequestGameListUpdate);
47 connect(ui->row_2_text_combobox, QOverload<int>::of(&QComboBox::currentIndexChanged), this, 45 connect(ui->row_2_text_combobox, QOverload<int>::of(&QComboBox::currentIndexChanged), this,
48 &ConfigureGameList::RequestGameListUpdate); 46 &ConfigureGameList::RequestGameListUpdate);
47
48 // Update text ComboBoxes after user interaction.
49 connect(ui->row_1_text_combobox, QOverload<int>::of(&QComboBox::activated),
50 [=]() { ConfigureGameList::UpdateSecondRowComboBox(); });
51 connect(ui->row_2_text_combobox, QOverload<int>::of(&QComboBox::activated),
52 [=]() { ConfigureGameList::UpdateFirstRowComboBox(); });
49} 53}
50 54
51ConfigureGameList::~ConfigureGameList() = default; 55ConfigureGameList::~ConfigureGameList() = default;
@@ -68,10 +72,6 @@ void ConfigureGameList::SetConfiguration() {
68 ui->show_add_ons->setChecked(UISettings::values.show_add_ons); 72 ui->show_add_ons->setChecked(UISettings::values.show_add_ons);
69 ui->icon_size_combobox->setCurrentIndex( 73 ui->icon_size_combobox->setCurrentIndex(
70 ui->icon_size_combobox->findData(UISettings::values.icon_size)); 74 ui->icon_size_combobox->findData(UISettings::values.icon_size));
71 ui->row_1_text_combobox->setCurrentIndex(
72 ui->row_1_text_combobox->findData(UISettings::values.row_1_text_id));
73 ui->row_2_text_combobox->setCurrentIndex(
74 ui->row_2_text_combobox->findData(UISettings::values.row_2_text_id));
75} 75}
76 76
77void ConfigureGameList::changeEvent(QEvent* event) { 77void ConfigureGameList::changeEvent(QEvent* event) {
@@ -104,10 +104,43 @@ void ConfigureGameList::InitializeIconSizeComboBox() {
104} 104}
105 105
106void ConfigureGameList::InitializeRowComboBoxes() { 106void ConfigureGameList::InitializeRowComboBoxes() {
107 for (std::size_t i = 0; i < row_text_names.size(); ++i) { 107 UpdateFirstRowComboBox(true);
108 const QString row_text_name = QString::fromUtf8(row_text_names[i]); 108 UpdateSecondRowComboBox(true);
109}
110
111void ConfigureGameList::UpdateFirstRowComboBox(bool init) {
112 const int currentIndex =
113 init ? UISettings::values.row_1_text_id
114 : ui->row_1_text_combobox->findData(ui->row_1_text_combobox->currentData());
109 115
116 ui->row_1_text_combobox->clear();
117
118 for (std::size_t i = 0; i < row_text_names.size(); i++) {
119 const QString row_text_name = QString::fromUtf8(row_text_names[i]);
110 ui->row_1_text_combobox->addItem(row_text_name, QVariant::fromValue(i)); 120 ui->row_1_text_combobox->addItem(row_text_name, QVariant::fromValue(i));
121 }
122
123 ui->row_1_text_combobox->setCurrentIndex(ui->row_1_text_combobox->findData(currentIndex));
124
125 ui->row_1_text_combobox->removeItem(4); // None
126 ui->row_1_text_combobox->removeItem(
127 ui->row_1_text_combobox->findData(ui->row_2_text_combobox->currentData()));
128}
129
130void ConfigureGameList::UpdateSecondRowComboBox(bool init) {
131 const int currentIndex =
132 init ? UISettings::values.row_2_text_id
133 : ui->row_2_text_combobox->findData(ui->row_2_text_combobox->currentData());
134
135 ui->row_2_text_combobox->clear();
136
137 for (std::size_t i = 0; i < row_text_names.size(); ++i) {
138 const QString row_text_name = QString::fromUtf8(row_text_names[i]);
111 ui->row_2_text_combobox->addItem(row_text_name, QVariant::fromValue(i)); 139 ui->row_2_text_combobox->addItem(row_text_name, QVariant::fromValue(i));
112 } 140 }
141
142 ui->row_2_text_combobox->setCurrentIndex(ui->row_2_text_combobox->findData(currentIndex));
143
144 ui->row_2_text_combobox->removeItem(
145 ui->row_2_text_combobox->findData(ui->row_1_text_combobox->currentData()));
113} 146}
diff --git a/src/yuzu/configuration/configure_gamelist.h b/src/yuzu/configuration/configure_gamelist.h
index e11822919..ecd3fa174 100644
--- a/src/yuzu/configuration/configure_gamelist.h
+++ b/src/yuzu/configuration/configure_gamelist.h
@@ -31,5 +31,8 @@ private:
31 void InitializeIconSizeComboBox(); 31 void InitializeIconSizeComboBox();
32 void InitializeRowComboBoxes(); 32 void InitializeRowComboBoxes();
33 33
34 void UpdateFirstRowComboBox(bool init = false);
35 void UpdateSecondRowComboBox(bool init = false);
36
34 std::unique_ptr<Ui::ConfigureGameList> ui; 37 std::unique_ptr<Ui::ConfigureGameList> ui;
35}; 38};
diff --git a/src/yuzu/configuration/configure_hotkeys.cpp b/src/yuzu/configuration/configure_hotkeys.cpp
index 3ea0b8d67..fa9052136 100644
--- a/src/yuzu/configuration/configure_hotkeys.cpp
+++ b/src/yuzu/configuration/configure_hotkeys.cpp
@@ -48,6 +48,7 @@ void ConfigureHotkeys::Populate(const HotkeyRegistry& registry) {
48 } 48 }
49 49
50 ui->hotkey_list->expandAll(); 50 ui->hotkey_list->expandAll();
51 ui->hotkey_list->resizeColumnToContents(0);
51} 52}
52 53
53void ConfigureHotkeys::changeEvent(QEvent* event) { 54void ConfigureHotkeys::changeEvent(QEvent* event) {
diff --git a/src/yuzu/game_list_p.h b/src/yuzu/game_list_p.h
index 1c2b37afd..7cde72d1b 100644
--- a/src/yuzu/game_list_p.h
+++ b/src/yuzu/game_list_p.h
@@ -108,11 +108,14 @@ public:
108 }}; 108 }};
109 109
110 const auto& row1 = row_data.at(UISettings::values.row_1_text_id); 110 const auto& row1 = row_data.at(UISettings::values.row_1_text_id);
111 const auto& row2 = row_data.at(UISettings::values.row_2_text_id); 111 const int row2_id = UISettings::values.row_2_text_id;
112 112
113 if (row1.isEmpty() || row1 == row2) 113 if (row2_id == 4) // None
114 return row2; 114 return row1;
115 if (row2.isEmpty()) 115
116 const auto& row2 = row_data.at(row2_id);
117
118 if (row1 == row2)
116 return row1; 119 return row1;
117 120
118 return QString(row1 + QStringLiteral("\n ") + row2); 121 return QString(row1 + QStringLiteral("\n ") + row2);
diff --git a/src/yuzu/main.cpp b/src/yuzu/main.cpp
index b21fbf826..b5dd3e0d6 100644
--- a/src/yuzu/main.cpp
+++ b/src/yuzu/main.cpp
@@ -526,19 +526,30 @@ void GMainWindow::InitializeHotkeys() {
526 526
527 const QString main_window = QStringLiteral("Main Window"); 527 const QString main_window = QStringLiteral("Main Window");
528 const QString load_file = QStringLiteral("Load File"); 528 const QString load_file = QStringLiteral("Load File");
529 const QString load_amiibo = QStringLiteral("Load Amiibo");
529 const QString exit_yuzu = QStringLiteral("Exit yuzu"); 530 const QString exit_yuzu = QStringLiteral("Exit yuzu");
531 const QString restart_emulation = QStringLiteral("Restart Emulation");
530 const QString stop_emulation = QStringLiteral("Stop Emulation"); 532 const QString stop_emulation = QStringLiteral("Stop Emulation");
531 const QString toggle_filter_bar = QStringLiteral("Toggle Filter Bar"); 533 const QString toggle_filter_bar = QStringLiteral("Toggle Filter Bar");
532 const QString toggle_status_bar = QStringLiteral("Toggle Status Bar"); 534 const QString toggle_status_bar = QStringLiteral("Toggle Status Bar");
533 const QString fullscreen = QStringLiteral("Fullscreen"); 535 const QString fullscreen = QStringLiteral("Fullscreen");
536 const QString capture_screenshot = QStringLiteral("Capture Screenshot");
534 537
535 ui.action_Load_File->setShortcut(hotkey_registry.GetKeySequence(main_window, load_file)); 538 ui.action_Load_File->setShortcut(hotkey_registry.GetKeySequence(main_window, load_file));
536 ui.action_Load_File->setShortcutContext( 539 ui.action_Load_File->setShortcutContext(
537 hotkey_registry.GetShortcutContext(main_window, load_file)); 540 hotkey_registry.GetShortcutContext(main_window, load_file));
538 541
542 ui.action_Load_Amiibo->setShortcut(hotkey_registry.GetKeySequence(main_window, load_amiibo));
543 ui.action_Load_Amiibo->setShortcutContext(
544 hotkey_registry.GetShortcutContext(main_window, load_amiibo));
545
539 ui.action_Exit->setShortcut(hotkey_registry.GetKeySequence(main_window, exit_yuzu)); 546 ui.action_Exit->setShortcut(hotkey_registry.GetKeySequence(main_window, exit_yuzu));
540 ui.action_Exit->setShortcutContext(hotkey_registry.GetShortcutContext(main_window, exit_yuzu)); 547 ui.action_Exit->setShortcutContext(hotkey_registry.GetShortcutContext(main_window, exit_yuzu));
541 548
549 ui.action_Restart->setShortcut(hotkey_registry.GetKeySequence(main_window, restart_emulation));
550 ui.action_Restart->setShortcutContext(
551 hotkey_registry.GetShortcutContext(main_window, restart_emulation));
552
542 ui.action_Stop->setShortcut(hotkey_registry.GetKeySequence(main_window, stop_emulation)); 553 ui.action_Stop->setShortcut(hotkey_registry.GetKeySequence(main_window, stop_emulation));
543 ui.action_Stop->setShortcutContext( 554 ui.action_Stop->setShortcutContext(
544 hotkey_registry.GetShortcutContext(main_window, stop_emulation)); 555 hotkey_registry.GetShortcutContext(main_window, stop_emulation));
@@ -553,6 +564,11 @@ void GMainWindow::InitializeHotkeys() {
553 ui.action_Show_Status_Bar->setShortcutContext( 564 ui.action_Show_Status_Bar->setShortcutContext(
554 hotkey_registry.GetShortcutContext(main_window, toggle_status_bar)); 565 hotkey_registry.GetShortcutContext(main_window, toggle_status_bar));
555 566
567 ui.action_Capture_Screenshot->setShortcut(
568 hotkey_registry.GetKeySequence(main_window, capture_screenshot));
569 ui.action_Capture_Screenshot->setShortcutContext(
570 hotkey_registry.GetShortcutContext(main_window, capture_screenshot));
571
556 connect(hotkey_registry.GetHotkey(main_window, QStringLiteral("Load File"), this), 572 connect(hotkey_registry.GetHotkey(main_window, QStringLiteral("Load File"), this),
557 &QShortcut::activated, this, &GMainWindow::OnMenuLoadFile); 573 &QShortcut::activated, this, &GMainWindow::OnMenuLoadFile);
558 connect( 574 connect(
diff --git a/src/yuzu/main.ui b/src/yuzu/main.ui
index 21f422500..a2c9e4547 100644
--- a/src/yuzu/main.ui
+++ b/src/yuzu/main.ui
@@ -15,7 +15,7 @@
15 </property> 15 </property>
16 <property name="windowIcon"> 16 <property name="windowIcon">
17 <iconset> 17 <iconset>
18 <normaloff>src/pcafe/res/icon3_64x64.ico</normaloff>src/pcafe/res/icon3_64x64.ico</iconset> 18 <normaloff>../dist/yuzu.ico</normaloff>../dist/yuzu.ico</iconset>
19 </property> 19 </property>
20 <property name="tabShape"> 20 <property name="tabShape">
21 <enum>QTabWidget::Rounded</enum> 21 <enum>QTabWidget::Rounded</enum>
@@ -98,6 +98,7 @@
98 <addaction name="action_Display_Dock_Widget_Headers"/> 98 <addaction name="action_Display_Dock_Widget_Headers"/>
99 <addaction name="action_Show_Filter_Bar"/> 99 <addaction name="action_Show_Filter_Bar"/>
100 <addaction name="action_Show_Status_Bar"/> 100 <addaction name="action_Show_Status_Bar"/>
101 <addaction name="separator"/>
101 <addaction name="menu_View_Debugging"/> 102 <addaction name="menu_View_Debugging"/>
102 </widget> 103 </widget>
103 <widget class="QMenu" name="menu_Tools"> 104 <widget class="QMenu" name="menu_Tools">