summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/common/CMakeLists.txt5
-rw-r--r--src/common/telemetry.cpp15
-rw-r--r--src/common/x64/cpu_detect.cpp68
-rw-r--r--src/common/x64/cpu_detect.h31
-rw-r--r--src/core/hle/kernel/physical_memory.h5
-rw-r--r--src/core/hle/kernel/process.cpp4
-rw-r--r--src/core/hle/kernel/vm_manager.cpp37
-rw-r--r--src/core/loader/elf.cpp3
-rw-r--r--src/core/loader/kip.cpp5
-rw-r--r--src/core/loader/nso.cpp12
-rw-r--r--src/core/memory.cpp11
-rw-r--r--src/core/memory.h16
-rw-r--r--src/video_core/CMakeLists.txt5
-rw-r--r--src/video_core/engines/maxwell_3d.h14
-rw-r--r--src/video_core/engines/shader_bytecode.h37
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer.cpp1
-rw-r--r--src/video_core/renderer_opengl/gl_shader_cache.cpp11
-rw-r--r--src/video_core/renderer_opengl/gl_shader_decompiler.cpp12
-rw-r--r--src/video_core/renderer_opengl/gl_state.cpp1
-rw-r--r--src/video_core/renderer_opengl/gl_state.h3
-rw-r--r--src/video_core/renderer_opengl/gl_texture_cache.cpp16
-rw-r--r--src/video_core/renderer_opengl/utils.cpp17
-rw-r--r--src/video_core/renderer_opengl/utils.h14
-rw-r--r--src/video_core/renderer_vulkan/maxwell_to_vk.cpp9
-rw-r--r--src/video_core/renderer_vulkan/maxwell_to_vk.h2
-rw-r--r--src/video_core/renderer_vulkan/vk_sampler_cache.cpp6
-rw-r--r--src/video_core/renderer_vulkan/vk_shader_decompiler.cpp7
-rw-r--r--src/video_core/renderer_vulkan/vk_staging_buffer_pool.h1
-rw-r--r--src/video_core/renderer_vulkan/vk_texture_cache.cpp475
-rw-r--r--src/video_core/renderer_vulkan/vk_texture_cache.h239
-rw-r--r--src/video_core/shader/control_flow.cpp2
-rw-r--r--src/video_core/shader/decode/memory.cpp103
-rw-r--r--src/video_core/shader/decode/texture.cpp16
-rw-r--r--src/video_core/shader/node.h2
-rw-r--r--src/video_core/shader/shader_ir.h2
-rw-r--r--src/video_core/texture_cache/format_lookup_table.cpp2
-rw-r--r--src/video_core/texture_cache/surface_params.h9
-rw-r--r--src/yuzu/configuration/configure_hotkeys.cpp1
-rw-r--r--src/yuzu/main.ui2
39 files changed, 974 insertions, 247 deletions
diff --git a/src/common/CMakeLists.txt b/src/common/CMakeLists.txt
index 9b0c3db68..9afc6105d 100644
--- a/src/common/CMakeLists.txt
+++ b/src/common/CMakeLists.txt
@@ -15,6 +15,10 @@ endif ()
15if (DEFINED ENV{DISPLAYVERSION}) 15if (DEFINED ENV{DISPLAYVERSION})
16 set(DISPLAY_VERSION $ENV{DISPLAYVERSION}) 16 set(DISPLAY_VERSION $ENV{DISPLAYVERSION})
17endif () 17endif ()
18
19# Pass the path to git to the GenerateSCMRev.cmake as well
20find_package(Git QUIET)
21
18add_custom_command(OUTPUT scm_rev.cpp 22add_custom_command(OUTPUT scm_rev.cpp
19 COMMAND ${CMAKE_COMMAND} 23 COMMAND ${CMAKE_COMMAND}
20 -DSRC_DIR="${CMAKE_SOURCE_DIR}" 24 -DSRC_DIR="${CMAKE_SOURCE_DIR}"
@@ -23,6 +27,7 @@ add_custom_command(OUTPUT scm_rev.cpp
23 -DTITLE_BAR_FORMAT_RUNNING="${TITLE_BAR_FORMAT_RUNNING}" 27 -DTITLE_BAR_FORMAT_RUNNING="${TITLE_BAR_FORMAT_RUNNING}"
24 -DBUILD_TAG="${BUILD_TAG}" 28 -DBUILD_TAG="${BUILD_TAG}"
25 -DBUILD_ID="${DISPLAY_VERSION}" 29 -DBUILD_ID="${DISPLAY_VERSION}"
30 -DGIT_EXECUTABLE="${GIT_EXECUTABLE}"
26 -P "${CMAKE_SOURCE_DIR}/CMakeModules/GenerateSCMRev.cmake" 31 -P "${CMAKE_SOURCE_DIR}/CMakeModules/GenerateSCMRev.cmake"
27 DEPENDS 32 DEPENDS
28 # WARNING! It was too much work to try and make a common location for this list, 33 # WARNING! It was too much work to try and make a common location for this list,
diff --git a/src/common/telemetry.cpp b/src/common/telemetry.cpp
index f53a8d193..200c6489a 100644
--- a/src/common/telemetry.cpp
+++ b/src/common/telemetry.cpp
@@ -44,20 +44,6 @@ template class Field<std::string>;
44template class Field<const char*>; 44template class Field<const char*>;
45template class Field<std::chrono::microseconds>; 45template class Field<std::chrono::microseconds>;
46 46
47#ifdef ARCHITECTURE_x86_64
48static const char* CpuVendorToStr(Common::CPUVendor vendor) {
49 switch (vendor) {
50 case Common::CPUVendor::INTEL:
51 return "Intel";
52 case Common::CPUVendor::AMD:
53 return "Amd";
54 case Common::CPUVendor::OTHER:
55 return "Other";
56 }
57 UNREACHABLE();
58}
59#endif
60
61void AppendBuildInfo(FieldCollection& fc) { 47void AppendBuildInfo(FieldCollection& fc) {
62 const bool is_git_dirty{std::strstr(Common::g_scm_desc, "dirty") != nullptr}; 48 const bool is_git_dirty{std::strstr(Common::g_scm_desc, "dirty") != nullptr};
63 fc.AddField(FieldType::App, "Git_IsDirty", is_git_dirty); 49 fc.AddField(FieldType::App, "Git_IsDirty", is_git_dirty);
@@ -71,7 +57,6 @@ void AppendCPUInfo(FieldCollection& fc) {
71#ifdef ARCHITECTURE_x86_64 57#ifdef ARCHITECTURE_x86_64
72 fc.AddField(FieldType::UserSystem, "CPU_Model", Common::GetCPUCaps().cpu_string); 58 fc.AddField(FieldType::UserSystem, "CPU_Model", Common::GetCPUCaps().cpu_string);
73 fc.AddField(FieldType::UserSystem, "CPU_BrandString", Common::GetCPUCaps().brand_string); 59 fc.AddField(FieldType::UserSystem, "CPU_BrandString", Common::GetCPUCaps().brand_string);
74 fc.AddField(FieldType::UserSystem, "CPU_Vendor", CpuVendorToStr(Common::GetCPUCaps().vendor));
75 fc.AddField(FieldType::UserSystem, "CPU_Extension_x64_AES", Common::GetCPUCaps().aes); 60 fc.AddField(FieldType::UserSystem, "CPU_Extension_x64_AES", Common::GetCPUCaps().aes);
76 fc.AddField(FieldType::UserSystem, "CPU_Extension_x64_AVX", Common::GetCPUCaps().avx); 61 fc.AddField(FieldType::UserSystem, "CPU_Extension_x64_AVX", Common::GetCPUCaps().avx);
77 fc.AddField(FieldType::UserSystem, "CPU_Extension_x64_AVX2", Common::GetCPUCaps().avx2); 62 fc.AddField(FieldType::UserSystem, "CPU_Extension_x64_AVX2", Common::GetCPUCaps().avx2);
diff --git a/src/common/x64/cpu_detect.cpp b/src/common/x64/cpu_detect.cpp
index 2dfcd39c8..c9349a6b4 100644
--- a/src/common/x64/cpu_detect.cpp
+++ b/src/common/x64/cpu_detect.cpp
@@ -3,8 +3,6 @@
3// Refer to the license.txt file included. 3// Refer to the license.txt file included.
4 4
5#include <cstring> 5#include <cstring>
6#include <string>
7#include <thread>
8#include "common/common_types.h" 6#include "common/common_types.h"
9#include "common/x64/cpu_detect.h" 7#include "common/x64/cpu_detect.h"
10 8
@@ -51,8 +49,6 @@ namespace Common {
51static CPUCaps Detect() { 49static CPUCaps Detect() {
52 CPUCaps caps = {}; 50 CPUCaps caps = {};
53 51
54 caps.num_cores = std::thread::hardware_concurrency();
55
56 // Assumes the CPU supports the CPUID instruction. Those that don't would likely not support 52 // Assumes the CPU supports the CPUID instruction. Those that don't would likely not support
57 // yuzu at all anyway 53 // yuzu at all anyway
58 54
@@ -70,12 +66,6 @@ static CPUCaps Detect() {
70 __cpuid(cpu_id, 0x80000000); 66 __cpuid(cpu_id, 0x80000000);
71 67
72 u32 max_ex_fn = cpu_id[0]; 68 u32 max_ex_fn = cpu_id[0];
73 if (!strcmp(caps.brand_string, "GenuineIntel"))
74 caps.vendor = CPUVendor::INTEL;
75 else if (!strcmp(caps.brand_string, "AuthenticAMD"))
76 caps.vendor = CPUVendor::AMD;
77 else
78 caps.vendor = CPUVendor::OTHER;
79 69
80 // Set reasonable default brand string even if brand string not available 70 // Set reasonable default brand string even if brand string not available
81 strcpy(caps.cpu_string, caps.brand_string); 71 strcpy(caps.cpu_string, caps.brand_string);
@@ -96,15 +86,9 @@ static CPUCaps Detect() {
96 caps.sse4_1 = true; 86 caps.sse4_1 = true;
97 if ((cpu_id[2] >> 20) & 1) 87 if ((cpu_id[2] >> 20) & 1)
98 caps.sse4_2 = true; 88 caps.sse4_2 = true;
99 if ((cpu_id[2] >> 22) & 1)
100 caps.movbe = true;
101 if ((cpu_id[2] >> 25) & 1) 89 if ((cpu_id[2] >> 25) & 1)
102 caps.aes = true; 90 caps.aes = true;
103 91
104 if ((cpu_id[3] >> 24) & 1) {
105 caps.fxsave_fxrstor = true;
106 }
107
108 // AVX support requires 3 separate checks: 92 // AVX support requires 3 separate checks:
109 // - Is the AVX bit set in CPUID? 93 // - Is the AVX bit set in CPUID?
110 // - Is the XSAVE bit set in CPUID? 94 // - Is the XSAVE bit set in CPUID?
@@ -129,8 +113,6 @@ static CPUCaps Detect() {
129 } 113 }
130 } 114 }
131 115
132 caps.flush_to_zero = caps.sse;
133
134 if (max_ex_fn >= 0x80000004) { 116 if (max_ex_fn >= 0x80000004) {
135 // Extract CPU model string 117 // Extract CPU model string
136 __cpuid(cpu_id, 0x80000002); 118 __cpuid(cpu_id, 0x80000002);
@@ -144,14 +126,8 @@ static CPUCaps Detect() {
144 if (max_ex_fn >= 0x80000001) { 126 if (max_ex_fn >= 0x80000001) {
145 // Check for more features 127 // Check for more features
146 __cpuid(cpu_id, 0x80000001); 128 __cpuid(cpu_id, 0x80000001);
147 if (cpu_id[2] & 1)
148 caps.lahf_sahf_64 = true;
149 if ((cpu_id[2] >> 5) & 1)
150 caps.lzcnt = true;
151 if ((cpu_id[2] >> 16) & 1) 129 if ((cpu_id[2] >> 16) & 1)
152 caps.fma4 = true; 130 caps.fma4 = true;
153 if ((cpu_id[3] >> 29) & 1)
154 caps.long_mode = true;
155 } 131 }
156 132
157 return caps; 133 return caps;
@@ -162,48 +138,4 @@ const CPUCaps& GetCPUCaps() {
162 return caps; 138 return caps;
163} 139}
164 140
165std::string GetCPUCapsString() {
166 auto caps = GetCPUCaps();
167
168 std::string sum(caps.cpu_string);
169 sum += " (";
170 sum += caps.brand_string;
171 sum += ")";
172
173 if (caps.sse)
174 sum += ", SSE";
175 if (caps.sse2) {
176 sum += ", SSE2";
177 if (!caps.flush_to_zero)
178 sum += " (without DAZ)";
179 }
180
181 if (caps.sse3)
182 sum += ", SSE3";
183 if (caps.ssse3)
184 sum += ", SSSE3";
185 if (caps.sse4_1)
186 sum += ", SSE4.1";
187 if (caps.sse4_2)
188 sum += ", SSE4.2";
189 if (caps.avx)
190 sum += ", AVX";
191 if (caps.avx2)
192 sum += ", AVX2";
193 if (caps.bmi1)
194 sum += ", BMI1";
195 if (caps.bmi2)
196 sum += ", BMI2";
197 if (caps.fma)
198 sum += ", FMA";
199 if (caps.aes)
200 sum += ", AES";
201 if (caps.movbe)
202 sum += ", MOVBE";
203 if (caps.long_mode)
204 sum += ", 64-bit support";
205
206 return sum;
207}
208
209} // namespace Common 141} // namespace Common
diff --git a/src/common/x64/cpu_detect.h b/src/common/x64/cpu_detect.h
index 0af3a8adb..20f2ba234 100644
--- a/src/common/x64/cpu_detect.h
+++ b/src/common/x64/cpu_detect.h
@@ -4,23 +4,12 @@
4 4
5#pragma once 5#pragma once
6 6
7#include <string>
8
9namespace Common { 7namespace Common {
10 8
11/// x86/x64 CPU vendors that may be detected by this module
12enum class CPUVendor {
13 INTEL,
14 AMD,
15 OTHER,
16};
17
18/// x86/x64 CPU capabilities that may be detected by this module 9/// x86/x64 CPU capabilities that may be detected by this module
19struct CPUCaps { 10struct CPUCaps {
20 CPUVendor vendor;
21 char cpu_string[0x21]; 11 char cpu_string[0x21];
22 char brand_string[0x41]; 12 char brand_string[0x41];
23 int num_cores;
24 bool sse; 13 bool sse;
25 bool sse2; 14 bool sse2;
26 bool sse3; 15 bool sse3;
@@ -35,20 +24,6 @@ struct CPUCaps {
35 bool fma; 24 bool fma;
36 bool fma4; 25 bool fma4;
37 bool aes; 26 bool aes;
38
39 // Support for the FXSAVE and FXRSTOR instructions
40 bool fxsave_fxrstor;
41
42 bool movbe;
43
44 // This flag indicates that the hardware supports some mode in which denormal inputs and outputs
45 // are automatically set to (signed) zero.
46 bool flush_to_zero;
47
48 // Support for LAHF and SAHF instructions in 64-bit mode
49 bool lahf_sahf_64;
50
51 bool long_mode;
52}; 27};
53 28
54/** 29/**
@@ -57,10 +32,4 @@ struct CPUCaps {
57 */ 32 */
58const CPUCaps& GetCPUCaps(); 33const CPUCaps& GetCPUCaps();
59 34
60/**
61 * Gets a string summary of the name and supported capabilities of the host CPU
62 * @return String summary
63 */
64std::string GetCPUCapsString();
65
66} // namespace Common 35} // namespace Common
diff --git a/src/core/hle/kernel/physical_memory.h b/src/core/hle/kernel/physical_memory.h
index 090565310..b689e8e8b 100644
--- a/src/core/hle/kernel/physical_memory.h
+++ b/src/core/hle/kernel/physical_memory.h
@@ -14,6 +14,9 @@ namespace Kernel {
14// - Second to ensure all host backing memory used is aligned to 256 bytes due 14// - Second to ensure all host backing memory used is aligned to 256 bytes due
15// to strict alignment restrictions on GPU memory. 15// to strict alignment restrictions on GPU memory.
16 16
17using PhysicalMemory = std::vector<u8, Common::AlignmentAllocator<u8, 256>>; 17using PhysicalMemoryVector = std::vector<u8, Common::AlignmentAllocator<u8, 256>>;
18class PhysicalMemory final : public PhysicalMemoryVector {
19 using PhysicalMemoryVector::PhysicalMemoryVector;
20};
18 21
19} // namespace Kernel 22} // namespace Kernel
diff --git a/src/core/hle/kernel/process.cpp b/src/core/hle/kernel/process.cpp
index 12ea4ebe3..b9035a0be 100644
--- a/src/core/hle/kernel/process.cpp
+++ b/src/core/hle/kernel/process.cpp
@@ -317,6 +317,8 @@ void Process::FreeTLSRegion(VAddr tls_address) {
317} 317}
318 318
319void Process::LoadModule(CodeSet module_, VAddr base_addr) { 319void Process::LoadModule(CodeSet module_, VAddr base_addr) {
320 code_memory_size += module_.memory.size();
321
320 const auto memory = std::make_shared<PhysicalMemory>(std::move(module_.memory)); 322 const auto memory = std::make_shared<PhysicalMemory>(std::move(module_.memory));
321 323
322 const auto MapSegment = [&](const CodeSet::Segment& segment, VMAPermission permissions, 324 const auto MapSegment = [&](const CodeSet::Segment& segment, VMAPermission permissions,
@@ -332,8 +334,6 @@ void Process::LoadModule(CodeSet module_, VAddr base_addr) {
332 MapSegment(module_.CodeSegment(), VMAPermission::ReadExecute, MemoryState::Code); 334 MapSegment(module_.CodeSegment(), VMAPermission::ReadExecute, MemoryState::Code);
333 MapSegment(module_.RODataSegment(), VMAPermission::Read, MemoryState::CodeData); 335 MapSegment(module_.RODataSegment(), VMAPermission::Read, MemoryState::CodeData);
334 MapSegment(module_.DataSegment(), VMAPermission::ReadWrite, MemoryState::CodeData); 336 MapSegment(module_.DataSegment(), VMAPermission::ReadWrite, MemoryState::CodeData);
335
336 code_memory_size += module_.memory.size();
337} 337}
338 338
339Process::Process(Core::System& system) 339Process::Process(Core::System& system)
diff --git a/src/core/hle/kernel/vm_manager.cpp b/src/core/hle/kernel/vm_manager.cpp
index a9a20ef76..0b3500fce 100644
--- a/src/core/hle/kernel/vm_manager.cpp
+++ b/src/core/hle/kernel/vm_manager.cpp
@@ -3,6 +3,7 @@
3// Refer to the license.txt file included. 3// Refer to the license.txt file included.
4 4
5#include <algorithm> 5#include <algorithm>
6#include <cstring>
6#include <iterator> 7#include <iterator>
7#include <utility> 8#include <utility>
8#include "common/alignment.h" 9#include "common/alignment.h"
@@ -269,18 +270,9 @@ ResultVal<VAddr> VMManager::SetHeapSize(u64 size) {
269 // If necessary, expand backing vector to cover new heap extents in 270 // If necessary, expand backing vector to cover new heap extents in
270 // the case of allocating. Otherwise, shrink the backing memory, 271 // the case of allocating. Otherwise, shrink the backing memory,
271 // if a smaller heap has been requested. 272 // if a smaller heap has been requested.
272 const u64 old_heap_size = GetCurrentHeapSize(); 273 heap_memory->resize(size);
273 if (size > old_heap_size) { 274 heap_memory->shrink_to_fit();
274 const u64 alloc_size = size - old_heap_size; 275 RefreshMemoryBlockMappings(heap_memory.get());
275
276 heap_memory->insert(heap_memory->end(), alloc_size, 0);
277 RefreshMemoryBlockMappings(heap_memory.get());
278 } else if (size < old_heap_size) {
279 heap_memory->resize(size);
280 heap_memory->shrink_to_fit();
281
282 RefreshMemoryBlockMappings(heap_memory.get());
283 }
284 276
285 heap_end = heap_region_base + size; 277 heap_end = heap_region_base + size;
286 ASSERT(GetCurrentHeapSize() == heap_memory->size()); 278 ASSERT(GetCurrentHeapSize() == heap_memory->size());
@@ -752,24 +744,20 @@ void VMManager::MergeAdjacentVMA(VirtualMemoryArea& left, const VirtualMemoryAre
752 // Always merge allocated memory blocks, even when they don't share the same backing block. 744 // Always merge allocated memory blocks, even when they don't share the same backing block.
753 if (left.type == VMAType::AllocatedMemoryBlock && 745 if (left.type == VMAType::AllocatedMemoryBlock &&
754 (left.backing_block != right.backing_block || left.offset + left.size != right.offset)) { 746 (left.backing_block != right.backing_block || left.offset + left.size != right.offset)) {
755 const auto right_begin = right.backing_block->begin() + right.offset;
756 const auto right_end = right_begin + right.size;
757 747
758 // Check if we can save work. 748 // Check if we can save work.
759 if (left.offset == 0 && left.size == left.backing_block->size()) { 749 if (left.offset == 0 && left.size == left.backing_block->size()) {
760 // Fast case: left is an entire backing block. 750 // Fast case: left is an entire backing block.
761 left.backing_block->insert(left.backing_block->end(), right_begin, right_end); 751 left.backing_block->resize(left.size + right.size);
752 std::memcpy(left.backing_block->data() + left.size,
753 right.backing_block->data() + right.offset, right.size);
762 } else { 754 } else {
763 // Slow case: make a new memory block for left and right. 755 // Slow case: make a new memory block for left and right.
764 const auto left_begin = left.backing_block->begin() + left.offset;
765 const auto left_end = left_begin + left.size;
766 const auto left_size = static_cast<std::size_t>(std::distance(left_begin, left_end));
767 const auto right_size = static_cast<std::size_t>(std::distance(right_begin, right_end));
768
769 auto new_memory = std::make_shared<PhysicalMemory>(); 756 auto new_memory = std::make_shared<PhysicalMemory>();
770 new_memory->reserve(left_size + right_size); 757 new_memory->resize(left.size + right.size);
771 new_memory->insert(new_memory->end(), left_begin, left_end); 758 std::memcpy(new_memory->data(), left.backing_block->data() + left.offset, left.size);
772 new_memory->insert(new_memory->end(), right_begin, right_end); 759 std::memcpy(new_memory->data() + left.size, right.backing_block->data() + right.offset,
760 right.size);
773 761
774 left.backing_block = std::move(new_memory); 762 left.backing_block = std::move(new_memory);
775 left.offset = 0; 763 left.offset = 0;
@@ -792,8 +780,7 @@ void VMManager::UpdatePageTableForVMA(const VirtualMemoryArea& vma) {
792 memory.UnmapRegion(page_table, vma.base, vma.size); 780 memory.UnmapRegion(page_table, vma.base, vma.size);
793 break; 781 break;
794 case VMAType::AllocatedMemoryBlock: 782 case VMAType::AllocatedMemoryBlock:
795 memory.MapMemoryRegion(page_table, vma.base, vma.size, 783 memory.MapMemoryRegion(page_table, vma.base, vma.size, *vma.backing_block, vma.offset);
796 vma.backing_block->data() + vma.offset);
797 break; 784 break;
798 case VMAType::BackingMemory: 785 case VMAType::BackingMemory:
799 memory.MapMemoryRegion(page_table, vma.base, vma.size, vma.backing_memory); 786 memory.MapMemoryRegion(page_table, vma.base, vma.size, vma.backing_memory);
diff --git a/src/core/loader/elf.cpp b/src/core/loader/elf.cpp
index f1795fdd6..8908e5328 100644
--- a/src/core/loader/elf.cpp
+++ b/src/core/loader/elf.cpp
@@ -335,7 +335,8 @@ Kernel::CodeSet ElfReader::LoadInto(VAddr vaddr) {
335 codeset_segment->addr = segment_addr; 335 codeset_segment->addr = segment_addr;
336 codeset_segment->size = aligned_size; 336 codeset_segment->size = aligned_size;
337 337
338 memcpy(&program_image[current_image_position], GetSegmentPtr(i), p->p_filesz); 338 std::memcpy(program_image.data() + current_image_position, GetSegmentPtr(i),
339 p->p_filesz);
339 current_image_position += aligned_size; 340 current_image_position += aligned_size;
340 } 341 }
341 } 342 }
diff --git a/src/core/loader/kip.cpp b/src/core/loader/kip.cpp
index 474b55cb1..092103abe 100644
--- a/src/core/loader/kip.cpp
+++ b/src/core/loader/kip.cpp
@@ -2,6 +2,7 @@
2// Licensed under GPLv2 or any later version 2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included. 3// Refer to the license.txt file included.
4 4
5#include <cstring>
5#include "core/file_sys/kernel_executable.h" 6#include "core/file_sys/kernel_executable.h"
6#include "core/file_sys/program_metadata.h" 7#include "core/file_sys/program_metadata.h"
7#include "core/gdbstub/gdbstub.h" 8#include "core/gdbstub/gdbstub.h"
@@ -76,8 +77,8 @@ AppLoader::LoadResult AppLoader_KIP::Load(Kernel::Process& process) {
76 segment.addr = offset; 77 segment.addr = offset;
77 segment.offset = offset; 78 segment.offset = offset;
78 segment.size = PageAlignSize(static_cast<u32>(data.size())); 79 segment.size = PageAlignSize(static_cast<u32>(data.size()));
79 program_image.resize(offset); 80 program_image.resize(offset + data.size());
80 program_image.insert(program_image.end(), data.begin(), data.end()); 81 std::memcpy(program_image.data() + offset, data.data(), data.size());
81 }; 82 };
82 83
83 load_segment(codeset.CodeSegment(), kip->GetTextSection(), kip->GetTextOffset()); 84 load_segment(codeset.CodeSegment(), kip->GetTextSection(), kip->GetTextOffset());
diff --git a/src/core/loader/nso.cpp b/src/core/loader/nso.cpp
index f629892ae..515c5accb 100644
--- a/src/core/loader/nso.cpp
+++ b/src/core/loader/nso.cpp
@@ -3,6 +3,7 @@
3// Refer to the license.txt file included. 3// Refer to the license.txt file included.
4 4
5#include <cinttypes> 5#include <cinttypes>
6#include <cstring>
6#include <vector> 7#include <vector>
7 8
8#include "common/common_funcs.h" 9#include "common/common_funcs.h"
@@ -96,8 +97,9 @@ std::optional<VAddr> AppLoader_NSO::LoadModule(Kernel::Process& process,
96 if (nso_header.IsSegmentCompressed(i)) { 97 if (nso_header.IsSegmentCompressed(i)) {
97 data = DecompressSegment(data, nso_header.segments[i]); 98 data = DecompressSegment(data, nso_header.segments[i]);
98 } 99 }
99 program_image.resize(nso_header.segments[i].location); 100 program_image.resize(nso_header.segments[i].location + data.size());
100 program_image.insert(program_image.end(), data.begin(), data.end()); 101 std::memcpy(program_image.data() + nso_header.segments[i].location, data.data(),
102 data.size());
101 codeset.segments[i].addr = nso_header.segments[i].location; 103 codeset.segments[i].addr = nso_header.segments[i].location;
102 codeset.segments[i].offset = nso_header.segments[i].location; 104 codeset.segments[i].offset = nso_header.segments[i].location;
103 codeset.segments[i].size = PageAlignSize(static_cast<u32>(data.size())); 105 codeset.segments[i].size = PageAlignSize(static_cast<u32>(data.size()));
@@ -139,12 +141,12 @@ std::optional<VAddr> AppLoader_NSO::LoadModule(Kernel::Process& process,
139 std::vector<u8> pi_header; 141 std::vector<u8> pi_header;
140 pi_header.insert(pi_header.begin(), reinterpret_cast<u8*>(&nso_header), 142 pi_header.insert(pi_header.begin(), reinterpret_cast<u8*>(&nso_header),
141 reinterpret_cast<u8*>(&nso_header) + sizeof(NSOHeader)); 143 reinterpret_cast<u8*>(&nso_header) + sizeof(NSOHeader));
142 pi_header.insert(pi_header.begin() + sizeof(NSOHeader), program_image.begin(), 144 pi_header.insert(pi_header.begin() + sizeof(NSOHeader), program_image.data(),
143 program_image.end()); 145 program_image.data() + program_image.size());
144 146
145 pi_header = pm->PatchNSO(pi_header, file.GetName()); 147 pi_header = pm->PatchNSO(pi_header, file.GetName());
146 148
147 std::copy(pi_header.begin() + sizeof(NSOHeader), pi_header.end(), program_image.begin()); 149 std::copy(pi_header.begin() + sizeof(NSOHeader), pi_header.end(), program_image.data());
148 } 150 }
149 151
150 // Apply cheats if they exist and the program has a valid title ID 152 // Apply cheats if they exist and the program has a valid title ID
diff --git a/src/core/memory.cpp b/src/core/memory.cpp
index 3c2a29d9b..f0888327f 100644
--- a/src/core/memory.cpp
+++ b/src/core/memory.cpp
@@ -14,6 +14,7 @@
14#include "common/swap.h" 14#include "common/swap.h"
15#include "core/arm/arm_interface.h" 15#include "core/arm/arm_interface.h"
16#include "core/core.h" 16#include "core/core.h"
17#include "core/hle/kernel/physical_memory.h"
17#include "core/hle/kernel/process.h" 18#include "core/hle/kernel/process.h"
18#include "core/hle/kernel/vm_manager.h" 19#include "core/hle/kernel/vm_manager.h"
19#include "core/memory.h" 20#include "core/memory.h"
@@ -38,6 +39,11 @@ struct Memory::Impl {
38 system.ArmInterface(3).PageTableChanged(*current_page_table, address_space_width); 39 system.ArmInterface(3).PageTableChanged(*current_page_table, address_space_width);
39 } 40 }
40 41
42 void MapMemoryRegion(Common::PageTable& page_table, VAddr base, u64 size,
43 Kernel::PhysicalMemory& memory, VAddr offset) {
44 MapMemoryRegion(page_table, base, size, memory.data() + offset);
45 }
46
41 void MapMemoryRegion(Common::PageTable& page_table, VAddr base, u64 size, u8* target) { 47 void MapMemoryRegion(Common::PageTable& page_table, VAddr base, u64 size, u8* target) {
42 ASSERT_MSG((size & PAGE_MASK) == 0, "non-page aligned size: {:016X}", size); 48 ASSERT_MSG((size & PAGE_MASK) == 0, "non-page aligned size: {:016X}", size);
43 ASSERT_MSG((base & PAGE_MASK) == 0, "non-page aligned base: {:016X}", base); 49 ASSERT_MSG((base & PAGE_MASK) == 0, "non-page aligned base: {:016X}", base);
@@ -601,6 +607,11 @@ void Memory::SetCurrentPageTable(Kernel::Process& process) {
601 impl->SetCurrentPageTable(process); 607 impl->SetCurrentPageTable(process);
602} 608}
603 609
610void Memory::MapMemoryRegion(Common::PageTable& page_table, VAddr base, u64 size,
611 Kernel::PhysicalMemory& memory, VAddr offset) {
612 impl->MapMemoryRegion(page_table, base, size, memory, offset);
613}
614
604void Memory::MapMemoryRegion(Common::PageTable& page_table, VAddr base, u64 size, u8* target) { 615void Memory::MapMemoryRegion(Common::PageTable& page_table, VAddr base, u64 size, u8* target) {
605 impl->MapMemoryRegion(page_table, base, size, target); 616 impl->MapMemoryRegion(page_table, base, size, target);
606} 617}
diff --git a/src/core/memory.h b/src/core/memory.h
index 1428a6d60..8913a9da4 100644
--- a/src/core/memory.h
+++ b/src/core/memory.h
@@ -19,8 +19,9 @@ class System;
19} 19}
20 20
21namespace Kernel { 21namespace Kernel {
22class PhysicalMemory;
22class Process; 23class Process;
23} 24} // namespace Kernel
24 25
25namespace Memory { 26namespace Memory {
26 27
@@ -66,6 +67,19 @@ public:
66 void SetCurrentPageTable(Kernel::Process& process); 67 void SetCurrentPageTable(Kernel::Process& process);
67 68
68 /** 69 /**
70 * Maps an physical buffer onto a region of the emulated process address space.
71 *
72 * @param page_table The page table of the emulated process.
73 * @param base The address to start mapping at. Must be page-aligned.
74 * @param size The amount of bytes to map. Must be page-aligned.
75 * @param memory Physical buffer with the memory backing the mapping. Must be of length
76 * at least `size + offset`.
77 * @param offset The offset within the physical memory. Must be page-aligned.
78 */
79 void MapMemoryRegion(Common::PageTable& page_table, VAddr base, u64 size,
80 Kernel::PhysicalMemory& memory, VAddr offset);
81
82 /**
69 * Maps an allocated buffer onto a region of the emulated process address space. 83 * Maps an allocated buffer onto a region of the emulated process address space.
70 * 84 *
71 * @param page_table The page table of the emulated process. 85 * @param page_table The page table of the emulated process.
diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt
index 142852082..729ee4a01 100644
--- a/src/video_core/CMakeLists.txt
+++ b/src/video_core/CMakeLists.txt
@@ -190,8 +190,11 @@ if (ENABLE_VULKAN)
190 renderer_vulkan/vk_stream_buffer.h 190 renderer_vulkan/vk_stream_buffer.h
191 renderer_vulkan/vk_swapchain.cpp 191 renderer_vulkan/vk_swapchain.cpp
192 renderer_vulkan/vk_swapchain.h 192 renderer_vulkan/vk_swapchain.h
193 renderer_vulkan/vk_texture_cache.cpp
194 renderer_vulkan/vk_texture_cache.h
193 renderer_vulkan/vk_update_descriptor.cpp 195 renderer_vulkan/vk_update_descriptor.cpp
194 renderer_vulkan/vk_update_descriptor.h) 196 renderer_vulkan/vk_update_descriptor.h
197 )
195 198
196 target_include_directories(video_core PRIVATE sirit ../../externals/Vulkan-Headers/include) 199 target_include_directories(video_core PRIVATE sirit ../../externals/Vulkan-Headers/include)
197 target_compile_definitions(video_core PRIVATE HAS_VULKAN) 200 target_compile_definitions(video_core PRIVATE HAS_VULKAN)
diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h
index a35e7a195..ee79260fc 100644
--- a/src/video_core/engines/maxwell_3d.h
+++ b/src/video_core/engines/maxwell_3d.h
@@ -1018,7 +1018,14 @@ public:
1018 } 1018 }
1019 } instanced_arrays; 1019 } instanced_arrays;
1020 1020
1021 INSERT_UNION_PADDING_WORDS(0x6); 1021 INSERT_UNION_PADDING_WORDS(0x4);
1022
1023 union {
1024 BitField<0, 1, u32> enable;
1025 BitField<4, 8, u32> unk4;
1026 } vp_point_size;
1027
1028 INSERT_UNION_PADDING_WORDS(1);
1022 1029
1023 Cull cull; 1030 Cull cull;
1024 1031
@@ -1271,8 +1278,6 @@ public:
1271 1278
1272 } dirty{}; 1279 } dirty{};
1273 1280
1274 std::array<u8, Regs::NUM_REGS> dirty_pointers{};
1275
1276 /// Reads a register value located at the input method address 1281 /// Reads a register value located at the input method address
1277 u32 GetRegisterValue(u32 method) const; 1282 u32 GetRegisterValue(u32 method) const;
1278 1283
@@ -1367,6 +1372,8 @@ private:
1367 1372
1368 bool execute_on{true}; 1373 bool execute_on{true};
1369 1374
1375 std::array<u8, Regs::NUM_REGS> dirty_pointers{};
1376
1370 /// Retrieves information about a specific TIC entry from the TIC buffer. 1377 /// Retrieves information about a specific TIC entry from the TIC buffer.
1371 Texture::TICEntry GetTICEntry(u32 tic_index) const; 1378 Texture::TICEntry GetTICEntry(u32 tic_index) const;
1372 1379
@@ -1503,6 +1510,7 @@ ASSERT_REG_POSITION(primitive_restart, 0x591);
1503ASSERT_REG_POSITION(index_array, 0x5F2); 1510ASSERT_REG_POSITION(index_array, 0x5F2);
1504ASSERT_REG_POSITION(polygon_offset_clamp, 0x61F); 1511ASSERT_REG_POSITION(polygon_offset_clamp, 0x61F);
1505ASSERT_REG_POSITION(instanced_arrays, 0x620); 1512ASSERT_REG_POSITION(instanced_arrays, 0x620);
1513ASSERT_REG_POSITION(vp_point_size, 0x644);
1506ASSERT_REG_POSITION(cull, 0x646); 1514ASSERT_REG_POSITION(cull, 0x646);
1507ASSERT_REG_POSITION(pixel_center_integer, 0x649); 1515ASSERT_REG_POSITION(pixel_center_integer, 0x649);
1508ASSERT_REG_POSITION(viewport_transform_enabled, 0x64B); 1516ASSERT_REG_POSITION(viewport_transform_enabled, 0x64B);
diff --git a/src/video_core/engines/shader_bytecode.h b/src/video_core/engines/shader_bytecode.h
index 57b57c647..6f98bd827 100644
--- a/src/video_core/engines/shader_bytecode.h
+++ b/src/video_core/engines/shader_bytecode.h
@@ -215,6 +215,18 @@ enum class F2fRoundingOp : u64 {
215 Trunc = 11, 215 Trunc = 11,
216}; 216};
217 217
218enum class AtomicOp : u64 {
219 Add = 0,
220 Min = 1,
221 Max = 2,
222 Inc = 3,
223 Dec = 4,
224 And = 5,
225 Or = 6,
226 Xor = 7,
227 Exch = 8,
228};
229
218enum class UniformType : u64 { 230enum class UniformType : u64 {
219 UnsignedByte = 0, 231 UnsignedByte = 0,
220 SignedByte = 1, 232 SignedByte = 1,
@@ -236,6 +248,13 @@ enum class StoreType : u64 {
236 Bits128 = 6, 248 Bits128 = 6,
237}; 249};
238 250
251enum class AtomicType : u64 {
252 U32 = 0,
253 S32 = 1,
254 U64 = 2,
255 S64 = 3,
256};
257
239enum class IMinMaxExchange : u64 { 258enum class IMinMaxExchange : u64 {
240 None = 0, 259 None = 0,
241 XLo = 1, 260 XLo = 1,
@@ -939,6 +958,16 @@ union Instruction {
939 } stg; 958 } stg;
940 959
941 union { 960 union {
961 BitField<52, 4, AtomicOp> operation;
962 BitField<28, 2, AtomicType> type;
963 BitField<30, 22, s64> offset;
964
965 s32 GetImmediateOffset() const {
966 return static_cast<s32>(offset << 2);
967 }
968 } atoms;
969
970 union {
942 BitField<32, 1, PhysicalAttributeDirection> direction; 971 BitField<32, 1, PhysicalAttributeDirection> direction;
943 BitField<47, 3, AttributeSize> size; 972 BitField<47, 3, AttributeSize> size;
944 BitField<20, 11, u64> address; 973 BitField<20, 11, u64> address;
@@ -1659,9 +1688,10 @@ public:
1659 ST_A, 1688 ST_A,
1660 ST_L, 1689 ST_L,
1661 ST_S, 1690 ST_S,
1662 ST, // Store in generic memory 1691 ST, // Store in generic memory
1663 STG, // Store in global memory 1692 STG, // Store in global memory
1664 AL2P, // Transforms attribute memory into physical memory 1693 ATOMS, // Atomic operation on shared memory
1694 AL2P, // Transforms attribute memory into physical memory
1665 TEX, 1695 TEX,
1666 TEX_B, // Texture Load Bindless 1696 TEX_B, // Texture Load Bindless
1667 TXQ, // Texture Query 1697 TXQ, // Texture Query
@@ -1964,6 +1994,7 @@ private:
1964 INST("1110111101010---", Id::ST_L, Type::Memory, "ST_L"), 1994 INST("1110111101010---", Id::ST_L, Type::Memory, "ST_L"),
1965 INST("101-------------", Id::ST, Type::Memory, "ST"), 1995 INST("101-------------", Id::ST, Type::Memory, "ST"),
1966 INST("1110111011011---", Id::STG, Type::Memory, "STG"), 1996 INST("1110111011011---", Id::STG, Type::Memory, "STG"),
1997 INST("11101100--------", Id::ATOMS, Type::Memory, "ATOMS"),
1967 INST("1110111110100---", Id::AL2P, Type::Memory, "AL2P"), 1998 INST("1110111110100---", Id::AL2P, Type::Memory, "AL2P"),
1968 INST("110000----111---", Id::TEX, Type::Texture, "TEX"), 1999 INST("110000----111---", Id::TEX, Type::Texture, "TEX"),
1969 INST("1101111010111---", Id::TEX_B, Type::Texture, "TEX_B"), 2000 INST("1101111010111---", Id::TEX_B, Type::Texture, "TEX_B"),
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp
index 672051102..926bccd42 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp
@@ -1272,6 +1272,7 @@ void RasterizerOpenGL::SyncPointState() {
1272 const auto& regs = system.GPU().Maxwell3D().regs; 1272 const auto& regs = system.GPU().Maxwell3D().regs;
1273 // Limit the point size to 1 since nouveau sometimes sets a point size of 0 (and that's invalid 1273 // Limit the point size to 1 since nouveau sometimes sets a point size of 0 (and that's invalid
1274 // in OpenGL). 1274 // in OpenGL).
1275 state.point.program_control = regs.vp_point_size.enable ? GL_TRUE : GL_FALSE;
1275 state.point.size = std::max(1.0f, regs.point_size); 1276 state.point.size = std::max(1.0f, regs.point_size);
1276} 1277}
1277 1278
diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp
index de742d11c..a4acb3796 100644
--- a/src/video_core/renderer_opengl/gl_shader_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp
@@ -34,9 +34,6 @@ using VideoCommon::Shader::ShaderIR;
34 34
35namespace { 35namespace {
36 36
37// One UBO is always reserved for emulation values on staged shaders
38constexpr u32 STAGE_RESERVED_UBOS = 1;
39
40constexpr u32 STAGE_MAIN_OFFSET = 10; 37constexpr u32 STAGE_MAIN_OFFSET = 10;
41constexpr u32 KERNEL_MAIN_OFFSET = 0; 38constexpr u32 KERNEL_MAIN_OFFSET = 0;
42 39
@@ -243,7 +240,6 @@ CachedProgram BuildShader(const Device& device, u64 unique_identifier, ShaderTyp
243 if (!code_b.empty()) { 240 if (!code_b.empty()) {
244 ir_b.emplace(code_b, main_offset, COMPILER_SETTINGS, locker); 241 ir_b.emplace(code_b, main_offset, COMPILER_SETTINGS, locker);
245 } 242 }
246 const auto entries = GLShader::GetEntries(ir);
247 243
248 std::string source = fmt::format(R"(// {} 244 std::string source = fmt::format(R"(// {}
249#version 430 core 245#version 430 core
@@ -314,9 +310,10 @@ std::unordered_set<GLenum> GetSupportedFormats() {
314 310
315CachedShader::CachedShader(const ShaderParameters& params, ShaderType shader_type, 311CachedShader::CachedShader(const ShaderParameters& params, ShaderType shader_type,
316 GLShader::ShaderEntries entries, ProgramCode code, ProgramCode code_b) 312 GLShader::ShaderEntries entries, ProgramCode code, ProgramCode code_b)
317 : RasterizerCacheObject{params.host_ptr}, system{params.system}, disk_cache{params.disk_cache}, 313 : RasterizerCacheObject{params.host_ptr}, system{params.system},
318 device{params.device}, cpu_addr{params.cpu_addr}, unique_identifier{params.unique_identifier}, 314 disk_cache{params.disk_cache}, device{params.device}, cpu_addr{params.cpu_addr},
319 shader_type{shader_type}, entries{entries}, code{std::move(code)}, code_b{std::move(code_b)} { 315 unique_identifier{params.unique_identifier}, shader_type{shader_type},
316 entries{std::move(entries)}, code{std::move(code)}, code_b{std::move(code_b)} {
320 if (!params.precompiled_variants) { 317 if (!params.precompiled_variants) {
321 return; 318 return;
322 } 319 }
diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
index f9f7a97b5..19751939a 100644
--- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
@@ -1856,6 +1856,16 @@ private:
1856 Type::Uint}; 1856 Type::Uint};
1857 } 1857 }
1858 1858
1859 template <const std::string_view& opname, Type type>
1860 Expression Atomic(Operation operation) {
1861 ASSERT(stage == ShaderType::Compute);
1862 auto& smem = std::get<SmemNode>(*operation[0]);
1863
1864 return {fmt::format("atomic{}(smem[{} >> 2], {})", opname, Visit(smem.GetAddress()).AsInt(),
1865 Visit(operation[1]).As(type)),
1866 type};
1867 }
1868
1859 Expression Branch(Operation operation) { 1869 Expression Branch(Operation operation) {
1860 const auto target = std::get_if<ImmediateNode>(&*operation[0]); 1870 const auto target = std::get_if<ImmediateNode>(&*operation[0]);
1861 UNIMPLEMENTED_IF(!target); 1871 UNIMPLEMENTED_IF(!target);
@@ -2194,6 +2204,8 @@ private:
2194 &GLSLDecompiler::AtomicImage<Func::Xor>, 2204 &GLSLDecompiler::AtomicImage<Func::Xor>,
2195 &GLSLDecompiler::AtomicImage<Func::Exchange>, 2205 &GLSLDecompiler::AtomicImage<Func::Exchange>,
2196 2206
2207 &GLSLDecompiler::Atomic<Func::Add, Type::Uint>,
2208
2197 &GLSLDecompiler::Branch, 2209 &GLSLDecompiler::Branch,
2198 &GLSLDecompiler::BranchIndirect, 2210 &GLSLDecompiler::BranchIndirect,
2199 &GLSLDecompiler::PushFlowStack, 2211 &GLSLDecompiler::PushFlowStack,
diff --git a/src/video_core/renderer_opengl/gl_state.cpp b/src/video_core/renderer_opengl/gl_state.cpp
index df2e2395a..cc185e9e1 100644
--- a/src/video_core/renderer_opengl/gl_state.cpp
+++ b/src/video_core/renderer_opengl/gl_state.cpp
@@ -127,6 +127,7 @@ void OpenGLState::ApplyClipDistances() {
127} 127}
128 128
129void OpenGLState::ApplyPointSize() { 129void OpenGLState::ApplyPointSize() {
130 Enable(GL_PROGRAM_POINT_SIZE, cur_state.point.program_control, point.program_control);
130 if (UpdateValue(cur_state.point.size, point.size)) { 131 if (UpdateValue(cur_state.point.size, point.size)) {
131 glPointSize(point.size); 132 glPointSize(point.size);
132 } 133 }
diff --git a/src/video_core/renderer_opengl/gl_state.h b/src/video_core/renderer_opengl/gl_state.h
index fb180f302..71d418776 100644
--- a/src/video_core/renderer_opengl/gl_state.h
+++ b/src/video_core/renderer_opengl/gl_state.h
@@ -131,7 +131,8 @@ public:
131 std::array<Viewport, Tegra::Engines::Maxwell3D::Regs::NumViewports> viewports; 131 std::array<Viewport, Tegra::Engines::Maxwell3D::Regs::NumViewports> viewports;
132 132
133 struct { 133 struct {
134 float size = 1.0f; // GL_POINT_SIZE 134 GLboolean program_control = GL_FALSE; // GL_PROGRAM_POINT_SIZE
135 GLfloat size = 1.0f; // GL_POINT_SIZE
135 } point; 136 } point;
136 137
137 struct { 138 struct {
diff --git a/src/video_core/renderer_opengl/gl_texture_cache.cpp b/src/video_core/renderer_opengl/gl_texture_cache.cpp
index b790b0ef4..e95eb069e 100644
--- a/src/video_core/renderer_opengl/gl_texture_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_texture_cache.cpp
@@ -44,7 +44,7 @@ struct FormatTuple {
44 44
45constexpr std::array<FormatTuple, VideoCore::Surface::MaxPixelFormat> tex_format_tuples = {{ 45constexpr std::array<FormatTuple, VideoCore::Surface::MaxPixelFormat> tex_format_tuples = {{
46 {GL_RGBA8, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV, false}, // ABGR8U 46 {GL_RGBA8, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV, false}, // ABGR8U
47 {GL_RGBA8, GL_RGBA, GL_BYTE, false}, // ABGR8S 47 {GL_RGBA8_SNORM, GL_RGBA, GL_BYTE, false}, // ABGR8S
48 {GL_RGBA8UI, GL_RGBA_INTEGER, GL_UNSIGNED_BYTE, false}, // ABGR8UI 48 {GL_RGBA8UI, GL_RGBA_INTEGER, GL_UNSIGNED_BYTE, false}, // ABGR8UI
49 {GL_RGB565, GL_RGB, GL_UNSIGNED_SHORT_5_6_5_REV, false}, // B5G6R5U 49 {GL_RGB565, GL_RGB, GL_UNSIGNED_SHORT_5_6_5_REV, false}, // B5G6R5U
50 {GL_RGB10_A2, GL_RGBA, GL_UNSIGNED_INT_2_10_10_10_REV, false}, // A2B10G10R10U 50 {GL_RGB10_A2, GL_RGBA, GL_UNSIGNED_INT_2_10_10_10_REV, false}, // A2B10G10R10U
@@ -83,9 +83,9 @@ constexpr std::array<FormatTuple, VideoCore::Surface::MaxPixelFormat> tex_format
83 {GL_RGB32F, GL_RGB, GL_FLOAT, false}, // RGB32F 83 {GL_RGB32F, GL_RGB, GL_FLOAT, false}, // RGB32F
84 {GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV, false}, // RGBA8_SRGB 84 {GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV, false}, // RGBA8_SRGB
85 {GL_RG8, GL_RG, GL_UNSIGNED_BYTE, false}, // RG8U 85 {GL_RG8, GL_RG, GL_UNSIGNED_BYTE, false}, // RG8U
86 {GL_RG8, GL_RG, GL_BYTE, false}, // RG8S 86 {GL_RG8_SNORM, GL_RG, GL_BYTE, false}, // RG8S
87 {GL_RG32UI, GL_RG_INTEGER, GL_UNSIGNED_INT, false}, // RG32UI 87 {GL_RG32UI, GL_RG_INTEGER, GL_UNSIGNED_INT, false}, // RG32UI
88 {GL_RGB16F, GL_RGBA16, GL_HALF_FLOAT, false}, // RGBX16F 88 {GL_RGB16F, GL_RGBA, GL_HALF_FLOAT, false}, // RGBX16F
89 {GL_R32UI, GL_RED_INTEGER, GL_UNSIGNED_INT, false}, // R32UI 89 {GL_R32UI, GL_RED_INTEGER, GL_UNSIGNED_INT, false}, // R32UI
90 {GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE, false}, // ASTC_2D_8X8 90 {GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE, false}, // ASTC_2D_8X8
91 {GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE, false}, // ASTC_2D_8X5 91 {GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE, false}, // ASTC_2D_8X5
@@ -253,14 +253,12 @@ void CachedSurface::DownloadTexture(std::vector<u8>& staging_buffer) {
253 glPixelStorei(GL_PACK_ALIGNMENT, std::min(8U, params.GetRowAlignment(level))); 253 glPixelStorei(GL_PACK_ALIGNMENT, std::min(8U, params.GetRowAlignment(level)));
254 glPixelStorei(GL_PACK_ROW_LENGTH, static_cast<GLint>(params.GetMipWidth(level))); 254 glPixelStorei(GL_PACK_ROW_LENGTH, static_cast<GLint>(params.GetMipWidth(level)));
255 const std::size_t mip_offset = params.GetHostMipmapLevelOffset(level); 255 const std::size_t mip_offset = params.GetHostMipmapLevelOffset(level);
256 u8* const mip_data = staging_buffer.data() + mip_offset;
257 const GLsizei size = static_cast<GLsizei>(params.GetHostMipmapSize(level));
256 if (is_compressed) { 258 if (is_compressed) {
257 glGetCompressedTextureImage(texture.handle, level, 259 glGetCompressedTextureImage(texture.handle, level, size, mip_data);
258 static_cast<GLsizei>(params.GetHostMipmapSize(level)),
259 staging_buffer.data() + mip_offset);
260 } else { 260 } else {
261 glGetTextureImage(texture.handle, level, format, type, 261 glGetTextureImage(texture.handle, level, format, type, size, mip_data);
262 static_cast<GLsizei>(params.GetHostMipmapSize(level)),
263 staging_buffer.data() + mip_offset);
264 } 262 }
265 } 263 }
266} 264}
diff --git a/src/video_core/renderer_opengl/utils.cpp b/src/video_core/renderer_opengl/utils.cpp
index 9770dda1c..ac99e6385 100644
--- a/src/video_core/renderer_opengl/utils.cpp
+++ b/src/video_core/renderer_opengl/utils.cpp
@@ -6,16 +6,20 @@
6#include <vector> 6#include <vector>
7 7
8#include <fmt/format.h> 8#include <fmt/format.h>
9
10#include <glad/glad.h> 9#include <glad/glad.h>
11 10
12#include "common/assert.h"
13#include "common/common_types.h" 11#include "common/common_types.h"
14#include "common/scope_exit.h"
15#include "video_core/renderer_opengl/utils.h" 12#include "video_core/renderer_opengl/utils.h"
16 13
17namespace OpenGL { 14namespace OpenGL {
18 15
16struct VertexArrayPushBuffer::Entry {
17 GLuint binding_index{};
18 const GLuint* buffer{};
19 GLintptr offset{};
20 GLsizei stride{};
21};
22
19VertexArrayPushBuffer::VertexArrayPushBuffer() = default; 23VertexArrayPushBuffer::VertexArrayPushBuffer() = default;
20 24
21VertexArrayPushBuffer::~VertexArrayPushBuffer() = default; 25VertexArrayPushBuffer::~VertexArrayPushBuffer() = default;
@@ -47,6 +51,13 @@ void VertexArrayPushBuffer::Bind() {
47 } 51 }
48} 52}
49 53
54struct BindBuffersRangePushBuffer::Entry {
55 GLuint binding;
56 const GLuint* buffer;
57 GLintptr offset;
58 GLsizeiptr size;
59};
60
50BindBuffersRangePushBuffer::BindBuffersRangePushBuffer(GLenum target) : target{target} {} 61BindBuffersRangePushBuffer::BindBuffersRangePushBuffer(GLenum target) : target{target} {}
51 62
52BindBuffersRangePushBuffer::~BindBuffersRangePushBuffer() = default; 63BindBuffersRangePushBuffer::~BindBuffersRangePushBuffer() = default;
diff --git a/src/video_core/renderer_opengl/utils.h b/src/video_core/renderer_opengl/utils.h
index d56153fe7..3ad7c02d4 100644
--- a/src/video_core/renderer_opengl/utils.h
+++ b/src/video_core/renderer_opengl/utils.h
@@ -26,12 +26,7 @@ public:
26 void Bind(); 26 void Bind();
27 27
28private: 28private:
29 struct Entry { 29 struct Entry;
30 GLuint binding_index{};
31 const GLuint* buffer{};
32 GLintptr offset{};
33 GLsizei stride{};
34 };
35 30
36 GLuint vao{}; 31 GLuint vao{};
37 const GLuint* index_buffer{}; 32 const GLuint* index_buffer{};
@@ -50,12 +45,7 @@ public:
50 void Bind(); 45 void Bind();
51 46
52private: 47private:
53 struct Entry { 48 struct Entry;
54 GLuint binding;
55 const GLuint* buffer;
56 GLintptr offset;
57 GLsizeiptr size;
58 };
59 49
60 GLenum target; 50 GLenum target;
61 std::vector<Entry> entries; 51 std::vector<Entry> entries;
diff --git a/src/video_core/renderer_vulkan/maxwell_to_vk.cpp b/src/video_core/renderer_vulkan/maxwell_to_vk.cpp
index 000e3616d..331808113 100644
--- a/src/video_core/renderer_vulkan/maxwell_to_vk.cpp
+++ b/src/video_core/renderer_vulkan/maxwell_to_vk.cpp
@@ -44,7 +44,7 @@ vk::SamplerMipmapMode MipmapMode(Tegra::Texture::TextureMipmapFilter mipmap_filt
44 return {}; 44 return {};
45} 45}
46 46
47vk::SamplerAddressMode WrapMode(Tegra::Texture::WrapMode wrap_mode, 47vk::SamplerAddressMode WrapMode(const VKDevice& device, Tegra::Texture::WrapMode wrap_mode,
48 Tegra::Texture::TextureFilter filter) { 48 Tegra::Texture::TextureFilter filter) {
49 switch (wrap_mode) { 49 switch (wrap_mode) {
50 case Tegra::Texture::WrapMode::Wrap: 50 case Tegra::Texture::WrapMode::Wrap:
@@ -56,7 +56,12 @@ vk::SamplerAddressMode WrapMode(Tegra::Texture::WrapMode wrap_mode,
56 case Tegra::Texture::WrapMode::Border: 56 case Tegra::Texture::WrapMode::Border:
57 return vk::SamplerAddressMode::eClampToBorder; 57 return vk::SamplerAddressMode::eClampToBorder;
58 case Tegra::Texture::WrapMode::Clamp: 58 case Tegra::Texture::WrapMode::Clamp:
59 // TODO(Rodrigo): Emulate GL_CLAMP properly 59 if (device.GetDriverID() == vk::DriverIdKHR::eNvidiaProprietary) {
60 // Nvidia's Vulkan driver defaults to GL_CLAMP on invalid enumerations, we can hack this
61 // by sending an invalid enumeration.
62 return static_cast<vk::SamplerAddressMode>(0xcafe);
63 }
64 // TODO(Rodrigo): Emulate GL_CLAMP properly on other vendors
60 switch (filter) { 65 switch (filter) {
61 case Tegra::Texture::TextureFilter::Nearest: 66 case Tegra::Texture::TextureFilter::Nearest:
62 return vk::SamplerAddressMode::eClampToEdge; 67 return vk::SamplerAddressMode::eClampToEdge;
diff --git a/src/video_core/renderer_vulkan/maxwell_to_vk.h b/src/video_core/renderer_vulkan/maxwell_to_vk.h
index 1534b738b..7e9678b7b 100644
--- a/src/video_core/renderer_vulkan/maxwell_to_vk.h
+++ b/src/video_core/renderer_vulkan/maxwell_to_vk.h
@@ -22,7 +22,7 @@ vk::Filter Filter(Tegra::Texture::TextureFilter filter);
22 22
23vk::SamplerMipmapMode MipmapMode(Tegra::Texture::TextureMipmapFilter mipmap_filter); 23vk::SamplerMipmapMode MipmapMode(Tegra::Texture::TextureMipmapFilter mipmap_filter);
24 24
25vk::SamplerAddressMode WrapMode(Tegra::Texture::WrapMode wrap_mode, 25vk::SamplerAddressMode WrapMode(const VKDevice& device, Tegra::Texture::WrapMode wrap_mode,
26 Tegra::Texture::TextureFilter filter); 26 Tegra::Texture::TextureFilter filter);
27 27
28vk::CompareOp DepthCompareFunction(Tegra::Texture::DepthCompareFunc depth_compare_func); 28vk::CompareOp DepthCompareFunction(Tegra::Texture::DepthCompareFunc depth_compare_func);
diff --git a/src/video_core/renderer_vulkan/vk_sampler_cache.cpp b/src/video_core/renderer_vulkan/vk_sampler_cache.cpp
index 1ce583f75..0a8ec8398 100644
--- a/src/video_core/renderer_vulkan/vk_sampler_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_sampler_cache.cpp
@@ -46,9 +46,9 @@ UniqueSampler VKSamplerCache::CreateSampler(const Tegra::Texture::TSCEntry& tsc)
46 {}, MaxwellToVK::Sampler::Filter(tsc.mag_filter), 46 {}, MaxwellToVK::Sampler::Filter(tsc.mag_filter),
47 MaxwellToVK::Sampler::Filter(tsc.min_filter), 47 MaxwellToVK::Sampler::Filter(tsc.min_filter),
48 MaxwellToVK::Sampler::MipmapMode(tsc.mipmap_filter), 48 MaxwellToVK::Sampler::MipmapMode(tsc.mipmap_filter),
49 MaxwellToVK::Sampler::WrapMode(tsc.wrap_u, tsc.mag_filter), 49 MaxwellToVK::Sampler::WrapMode(device, tsc.wrap_u, tsc.mag_filter),
50 MaxwellToVK::Sampler::WrapMode(tsc.wrap_v, tsc.mag_filter), 50 MaxwellToVK::Sampler::WrapMode(device, tsc.wrap_v, tsc.mag_filter),
51 MaxwellToVK::Sampler::WrapMode(tsc.wrap_p, tsc.mag_filter), tsc.GetLodBias(), 51 MaxwellToVK::Sampler::WrapMode(device, tsc.wrap_p, tsc.mag_filter), tsc.GetLodBias(),
52 has_anisotropy, max_anisotropy, tsc.depth_compare_enabled, 52 has_anisotropy, max_anisotropy, tsc.depth_compare_enabled,
53 MaxwellToVK::Sampler::DepthCompareFunction(tsc.depth_compare_func), tsc.GetMinLod(), 53 MaxwellToVK::Sampler::DepthCompareFunction(tsc.depth_compare_func), tsc.GetMinLod(),
54 tsc.GetMaxLod(), vk_border_color.value_or(vk::BorderColor::eFloatTransparentBlack), 54 tsc.GetMaxLod(), vk_border_color.value_or(vk::BorderColor::eFloatTransparentBlack),
diff --git a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp
index 8fe852ce8..0cf97cafa 100644
--- a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp
+++ b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp
@@ -1796,6 +1796,11 @@ private:
1796 return {}; 1796 return {};
1797 } 1797 }
1798 1798
1799 Expression UAtomicAdd(Operation) {
1800 UNIMPLEMENTED();
1801 return {};
1802 }
1803
1799 Expression Branch(Operation operation) { 1804 Expression Branch(Operation operation) {
1800 const auto& target = std::get<ImmediateNode>(*operation[0]); 1805 const auto& target = std::get<ImmediateNode>(*operation[0]);
1801 OpStore(jmp_to, Constant(t_uint, target.GetValue())); 1806 OpStore(jmp_to, Constant(t_uint, target.GetValue()));
@@ -2373,6 +2378,8 @@ private:
2373 &SPIRVDecompiler::AtomicImageXor, 2378 &SPIRVDecompiler::AtomicImageXor,
2374 &SPIRVDecompiler::AtomicImageExchange, 2379 &SPIRVDecompiler::AtomicImageExchange,
2375 2380
2381 &SPIRVDecompiler::UAtomicAdd,
2382
2376 &SPIRVDecompiler::Branch, 2383 &SPIRVDecompiler::Branch,
2377 &SPIRVDecompiler::BranchIndirect, 2384 &SPIRVDecompiler::BranchIndirect,
2378 &SPIRVDecompiler::PushFlowStack, 2385 &SPIRVDecompiler::PushFlowStack,
diff --git a/src/video_core/renderer_vulkan/vk_staging_buffer_pool.h b/src/video_core/renderer_vulkan/vk_staging_buffer_pool.h
index 02310375f..4d9488f49 100644
--- a/src/video_core/renderer_vulkan/vk_staging_buffer_pool.h
+++ b/src/video_core/renderer_vulkan/vk_staging_buffer_pool.h
@@ -13,6 +13,7 @@
13 13
14#include "video_core/renderer_vulkan/declarations.h" 14#include "video_core/renderer_vulkan/declarations.h"
15#include "video_core/renderer_vulkan/vk_memory_manager.h" 15#include "video_core/renderer_vulkan/vk_memory_manager.h"
16#include "video_core/renderer_vulkan/vk_resource_manager.h"
16 17
17namespace Vulkan { 18namespace Vulkan {
18 19
diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.cpp b/src/video_core/renderer_vulkan/vk_texture_cache.cpp
new file mode 100644
index 000000000..51b0d38a6
--- /dev/null
+++ b/src/video_core/renderer_vulkan/vk_texture_cache.cpp
@@ -0,0 +1,475 @@
1// Copyright 2019 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <algorithm>
6#include <array>
7#include <cstddef>
8#include <cstring>
9#include <memory>
10#include <variant>
11#include <vector>
12
13#include "common/alignment.h"
14#include "common/assert.h"
15#include "common/common_types.h"
16#include "core/core.h"
17#include "core/memory.h"
18#include "video_core/engines/maxwell_3d.h"
19#include "video_core/morton.h"
20#include "video_core/renderer_vulkan/declarations.h"
21#include "video_core/renderer_vulkan/maxwell_to_vk.h"
22#include "video_core/renderer_vulkan/vk_device.h"
23#include "video_core/renderer_vulkan/vk_memory_manager.h"
24#include "video_core/renderer_vulkan/vk_rasterizer.h"
25#include "video_core/renderer_vulkan/vk_staging_buffer_pool.h"
26#include "video_core/renderer_vulkan/vk_texture_cache.h"
27#include "video_core/surface.h"
28#include "video_core/textures/convert.h"
29
30namespace Vulkan {
31
32using VideoCore::MortonSwizzle;
33using VideoCore::MortonSwizzleMode;
34
35using Tegra::Texture::SwizzleSource;
36using VideoCore::Surface::PixelFormat;
37using VideoCore::Surface::SurfaceCompression;
38using VideoCore::Surface::SurfaceTarget;
39
40namespace {
41
42vk::ImageType SurfaceTargetToImage(SurfaceTarget target) {
43 switch (target) {
44 case SurfaceTarget::Texture1D:
45 case SurfaceTarget::Texture1DArray:
46 return vk::ImageType::e1D;
47 case SurfaceTarget::Texture2D:
48 case SurfaceTarget::Texture2DArray:
49 case SurfaceTarget::TextureCubemap:
50 case SurfaceTarget::TextureCubeArray:
51 return vk::ImageType::e2D;
52 case SurfaceTarget::Texture3D:
53 return vk::ImageType::e3D;
54 }
55 UNREACHABLE_MSG("Unknown texture target={}", static_cast<u32>(target));
56 return {};
57}
58
59vk::ImageAspectFlags PixelFormatToImageAspect(PixelFormat pixel_format) {
60 if (pixel_format < PixelFormat::MaxColorFormat) {
61 return vk::ImageAspectFlagBits::eColor;
62 } else if (pixel_format < PixelFormat::MaxDepthFormat) {
63 return vk::ImageAspectFlagBits::eDepth;
64 } else if (pixel_format < PixelFormat::MaxDepthStencilFormat) {
65 return vk::ImageAspectFlagBits::eDepth | vk::ImageAspectFlagBits::eStencil;
66 } else {
67 UNREACHABLE_MSG("Invalid pixel format={}", static_cast<u32>(pixel_format));
68 return vk::ImageAspectFlagBits::eColor;
69 }
70}
71
72vk::ImageViewType GetImageViewType(SurfaceTarget target) {
73 switch (target) {
74 case SurfaceTarget::Texture1D:
75 return vk::ImageViewType::e1D;
76 case SurfaceTarget::Texture2D:
77 return vk::ImageViewType::e2D;
78 case SurfaceTarget::Texture3D:
79 return vk::ImageViewType::e3D;
80 case SurfaceTarget::Texture1DArray:
81 return vk::ImageViewType::e1DArray;
82 case SurfaceTarget::Texture2DArray:
83 return vk::ImageViewType::e2DArray;
84 case SurfaceTarget::TextureCubemap:
85 return vk::ImageViewType::eCube;
86 case SurfaceTarget::TextureCubeArray:
87 return vk::ImageViewType::eCubeArray;
88 case SurfaceTarget::TextureBuffer:
89 break;
90 }
91 UNREACHABLE();
92 return {};
93}
94
95UniqueBuffer CreateBuffer(const VKDevice& device, const SurfaceParams& params) {
96 // TODO(Rodrigo): Move texture buffer creation to the buffer cache
97 const vk::BufferCreateInfo buffer_ci({}, params.GetHostSizeInBytes(),
98 vk::BufferUsageFlagBits::eUniformTexelBuffer |
99 vk::BufferUsageFlagBits::eTransferSrc |
100 vk::BufferUsageFlagBits::eTransferDst,
101 vk::SharingMode::eExclusive, 0, nullptr);
102 const auto dev = device.GetLogical();
103 const auto& dld = device.GetDispatchLoader();
104 return dev.createBufferUnique(buffer_ci, nullptr, dld);
105}
106
107vk::BufferViewCreateInfo GenerateBufferViewCreateInfo(const VKDevice& device,
108 const SurfaceParams& params,
109 vk::Buffer buffer) {
110 ASSERT(params.IsBuffer());
111
112 const auto format =
113 MaxwellToVK::SurfaceFormat(device, FormatType::Buffer, params.pixel_format).format;
114 return vk::BufferViewCreateInfo({}, buffer, format, 0, params.GetHostSizeInBytes());
115}
116
117vk::ImageCreateInfo GenerateImageCreateInfo(const VKDevice& device, const SurfaceParams& params) {
118 constexpr auto sample_count = vk::SampleCountFlagBits::e1;
119 constexpr auto tiling = vk::ImageTiling::eOptimal;
120
121 ASSERT(!params.IsBuffer());
122
123 const auto [format, attachable, storage] =
124 MaxwellToVK::SurfaceFormat(device, FormatType::Optimal, params.pixel_format);
125
126 auto image_usage = vk::ImageUsageFlagBits::eSampled | vk::ImageUsageFlagBits::eTransferDst |
127 vk::ImageUsageFlagBits::eTransferSrc;
128 if (attachable) {
129 image_usage |= params.IsPixelFormatZeta() ? vk::ImageUsageFlagBits::eDepthStencilAttachment
130 : vk::ImageUsageFlagBits::eColorAttachment;
131 }
132 if (storage) {
133 image_usage |= vk::ImageUsageFlagBits::eStorage;
134 }
135
136 vk::ImageCreateFlags flags;
137 vk::Extent3D extent;
138 switch (params.target) {
139 case SurfaceTarget::TextureCubemap:
140 case SurfaceTarget::TextureCubeArray:
141 flags |= vk::ImageCreateFlagBits::eCubeCompatible;
142 [[fallthrough]];
143 case SurfaceTarget::Texture1D:
144 case SurfaceTarget::Texture1DArray:
145 case SurfaceTarget::Texture2D:
146 case SurfaceTarget::Texture2DArray:
147 extent = vk::Extent3D(params.width, params.height, 1);
148 break;
149 case SurfaceTarget::Texture3D:
150 extent = vk::Extent3D(params.width, params.height, params.depth);
151 break;
152 case SurfaceTarget::TextureBuffer:
153 UNREACHABLE();
154 }
155
156 return vk::ImageCreateInfo(flags, SurfaceTargetToImage(params.target), format, extent,
157 params.num_levels, static_cast<u32>(params.GetNumLayers()),
158 sample_count, tiling, image_usage, vk::SharingMode::eExclusive, 0,
159 nullptr, vk::ImageLayout::eUndefined);
160}
161
162} // Anonymous namespace
163
164CachedSurface::CachedSurface(Core::System& system, const VKDevice& device,
165 VKResourceManager& resource_manager, VKMemoryManager& memory_manager,
166 VKScheduler& scheduler, VKStagingBufferPool& staging_pool,
167 GPUVAddr gpu_addr, const SurfaceParams& params)
168 : SurfaceBase<View>{gpu_addr, params}, system{system}, device{device},
169 resource_manager{resource_manager}, memory_manager{memory_manager}, scheduler{scheduler},
170 staging_pool{staging_pool} {
171 if (params.IsBuffer()) {
172 buffer = CreateBuffer(device, params);
173 commit = memory_manager.Commit(*buffer, false);
174
175 const auto buffer_view_ci = GenerateBufferViewCreateInfo(device, params, *buffer);
176 format = buffer_view_ci.format;
177
178 const auto dev = device.GetLogical();
179 const auto& dld = device.GetDispatchLoader();
180 buffer_view = dev.createBufferViewUnique(buffer_view_ci, nullptr, dld);
181 } else {
182 const auto image_ci = GenerateImageCreateInfo(device, params);
183 format = image_ci.format;
184
185 image.emplace(device, scheduler, image_ci, PixelFormatToImageAspect(params.pixel_format));
186 commit = memory_manager.Commit(image->GetHandle(), false);
187 }
188
189 // TODO(Rodrigo): Move this to a virtual function.
190 main_view = CreateViewInner(
191 ViewParams(params.target, 0, static_cast<u32>(params.GetNumLayers()), 0, params.num_levels),
192 true);
193}
194
195CachedSurface::~CachedSurface() = default;
196
197void CachedSurface::UploadTexture(const std::vector<u8>& staging_buffer) {
198 // To upload data we have to be outside of a renderpass
199 scheduler.RequestOutsideRenderPassOperationContext();
200
201 if (params.IsBuffer()) {
202 UploadBuffer(staging_buffer);
203 } else {
204 UploadImage(staging_buffer);
205 }
206}
207
208void CachedSurface::DownloadTexture(std::vector<u8>& staging_buffer) {
209 UNIMPLEMENTED_IF(params.IsBuffer());
210
211 if (params.pixel_format == VideoCore::Surface::PixelFormat::A1B5G5R5U) {
212 LOG_WARNING(Render_Vulkan, "A1B5G5R5 flushing is stubbed");
213 }
214
215 // We can't copy images to buffers inside a renderpass
216 scheduler.RequestOutsideRenderPassOperationContext();
217
218 FullTransition(vk::PipelineStageFlagBits::eTransfer, vk::AccessFlagBits::eTransferRead,
219 vk::ImageLayout::eTransferSrcOptimal);
220
221 const auto& buffer = staging_pool.GetUnusedBuffer(host_memory_size, true);
222 // TODO(Rodrigo): Do this in a single copy
223 for (u32 level = 0; level < params.num_levels; ++level) {
224 scheduler.Record([image = image->GetHandle(), buffer = *buffer.handle,
225 copy = GetBufferImageCopy(level)](auto cmdbuf, auto& dld) {
226 cmdbuf.copyImageToBuffer(image, vk::ImageLayout::eTransferSrcOptimal, buffer, {copy},
227 dld);
228 });
229 }
230 scheduler.Finish();
231
232 // TODO(Rodrigo): Use an intern buffer for staging buffers and avoid this unnecessary memcpy.
233 std::memcpy(staging_buffer.data(), buffer.commit->Map(host_memory_size), host_memory_size);
234}
235
236void CachedSurface::DecorateSurfaceName() {
237 // TODO(Rodrigo): Add name decorations
238}
239
240View CachedSurface::CreateView(const ViewParams& params) {
241 return CreateViewInner(params, false);
242}
243
244View CachedSurface::CreateViewInner(const ViewParams& params, bool is_proxy) {
245 // TODO(Rodrigo): Add name decorations
246 return views[params] = std::make_shared<CachedSurfaceView>(device, *this, params, is_proxy);
247}
248
249void CachedSurface::UploadBuffer(const std::vector<u8>& staging_buffer) {
250 const auto& src_buffer = staging_pool.GetUnusedBuffer(host_memory_size, true);
251 std::memcpy(src_buffer.commit->Map(host_memory_size), staging_buffer.data(), host_memory_size);
252
253 scheduler.Record([src_buffer = *src_buffer.handle, dst_buffer = *buffer,
254 size = params.GetHostSizeInBytes()](auto cmdbuf, auto& dld) {
255 const vk::BufferCopy copy(0, 0, size);
256 cmdbuf.copyBuffer(src_buffer, dst_buffer, {copy}, dld);
257
258 cmdbuf.pipelineBarrier(
259 vk::PipelineStageFlagBits::eTransfer, vk::PipelineStageFlagBits::eVertexShader, {}, {},
260 {vk::BufferMemoryBarrier(vk::AccessFlagBits::eTransferWrite,
261 vk::AccessFlagBits::eShaderRead, 0, 0, dst_buffer, 0, size)},
262 {}, dld);
263 });
264}
265
266void CachedSurface::UploadImage(const std::vector<u8>& staging_buffer) {
267 const auto& src_buffer = staging_pool.GetUnusedBuffer(host_memory_size, true);
268 std::memcpy(src_buffer.commit->Map(host_memory_size), staging_buffer.data(), host_memory_size);
269
270 FullTransition(vk::PipelineStageFlagBits::eTransfer, vk::AccessFlagBits::eTransferWrite,
271 vk::ImageLayout::eTransferDstOptimal);
272
273 for (u32 level = 0; level < params.num_levels; ++level) {
274 vk::BufferImageCopy copy = GetBufferImageCopy(level);
275 const auto& dld = device.GetDispatchLoader();
276 if (image->GetAspectMask() ==
277 (vk::ImageAspectFlagBits::eDepth | vk::ImageAspectFlagBits::eStencil)) {
278 vk::BufferImageCopy depth = copy;
279 vk::BufferImageCopy stencil = copy;
280 depth.imageSubresource.aspectMask = vk::ImageAspectFlagBits::eDepth;
281 stencil.imageSubresource.aspectMask = vk::ImageAspectFlagBits::eStencil;
282 scheduler.Record([buffer = *src_buffer.handle, image = image->GetHandle(), depth,
283 stencil](auto cmdbuf, auto& dld) {
284 cmdbuf.copyBufferToImage(buffer, image, vk::ImageLayout::eTransferDstOptimal,
285 {depth, stencil}, dld);
286 });
287 } else {
288 scheduler.Record([buffer = *src_buffer.handle, image = image->GetHandle(),
289 copy](auto cmdbuf, auto& dld) {
290 cmdbuf.copyBufferToImage(buffer, image, vk::ImageLayout::eTransferDstOptimal,
291 {copy}, dld);
292 });
293 }
294 }
295}
296
297vk::BufferImageCopy CachedSurface::GetBufferImageCopy(u32 level) const {
298 const u32 vk_depth = params.target == SurfaceTarget::Texture3D ? params.GetMipDepth(level) : 1;
299 const auto compression_type = params.GetCompressionType();
300 const std::size_t mip_offset = compression_type == SurfaceCompression::Converted
301 ? params.GetConvertedMipmapOffset(level)
302 : params.GetHostMipmapLevelOffset(level);
303
304 return vk::BufferImageCopy(
305 mip_offset, 0, 0,
306 {image->GetAspectMask(), level, 0, static_cast<u32>(params.GetNumLayers())}, {0, 0, 0},
307 {params.GetMipWidth(level), params.GetMipHeight(level), vk_depth});
308}
309
310vk::ImageSubresourceRange CachedSurface::GetImageSubresourceRange() const {
311 return {image->GetAspectMask(), 0, params.num_levels, 0,
312 static_cast<u32>(params.GetNumLayers())};
313}
314
315CachedSurfaceView::CachedSurfaceView(const VKDevice& device, CachedSurface& surface,
316 const ViewParams& params, bool is_proxy)
317 : VideoCommon::ViewBase{params}, params{surface.GetSurfaceParams()},
318 image{surface.GetImageHandle()}, buffer_view{surface.GetBufferViewHandle()},
319 aspect_mask{surface.GetAspectMask()}, device{device}, surface{surface},
320 base_layer{params.base_layer}, num_layers{params.num_layers}, base_level{params.base_level},
321 num_levels{params.num_levels}, image_view_type{image ? GetImageViewType(params.target)
322 : vk::ImageViewType{}} {}
323
324CachedSurfaceView::~CachedSurfaceView() = default;
325
326vk::ImageView CachedSurfaceView::GetHandle(SwizzleSource x_source, SwizzleSource y_source,
327 SwizzleSource z_source, SwizzleSource w_source) {
328 const u32 swizzle = EncodeSwizzle(x_source, y_source, z_source, w_source);
329 if (last_image_view && last_swizzle == swizzle) {
330 return last_image_view;
331 }
332 last_swizzle = swizzle;
333
334 const auto [entry, is_cache_miss] = view_cache.try_emplace(swizzle);
335 auto& image_view = entry->second;
336 if (!is_cache_miss) {
337 return last_image_view = *image_view;
338 }
339
340 auto swizzle_x = MaxwellToVK::SwizzleSource(x_source);
341 auto swizzle_y = MaxwellToVK::SwizzleSource(y_source);
342 auto swizzle_z = MaxwellToVK::SwizzleSource(z_source);
343 auto swizzle_w = MaxwellToVK::SwizzleSource(w_source);
344
345 if (params.pixel_format == VideoCore::Surface::PixelFormat::A1B5G5R5U) {
346 // A1B5G5R5 is implemented as A1R5G5B5, we have to change the swizzle here.
347 std::swap(swizzle_x, swizzle_z);
348 }
349
350 // Games can sample depth or stencil values on textures. This is decided by the swizzle value on
351 // hardware. To emulate this on Vulkan we specify it in the aspect.
352 vk::ImageAspectFlags aspect = aspect_mask;
353 if (aspect == (vk::ImageAspectFlagBits::eDepth | vk::ImageAspectFlagBits::eStencil)) {
354 UNIMPLEMENTED_IF(x_source != SwizzleSource::R && x_source != SwizzleSource::G);
355 const bool is_first = x_source == SwizzleSource::R;
356 switch (params.pixel_format) {
357 case VideoCore::Surface::PixelFormat::Z24S8:
358 case VideoCore::Surface::PixelFormat::Z32FS8:
359 aspect = is_first ? vk::ImageAspectFlagBits::eDepth : vk::ImageAspectFlagBits::eStencil;
360 break;
361 case VideoCore::Surface::PixelFormat::S8Z24:
362 aspect = is_first ? vk::ImageAspectFlagBits::eStencil : vk::ImageAspectFlagBits::eDepth;
363 break;
364 default:
365 aspect = vk::ImageAspectFlagBits::eDepth;
366 UNIMPLEMENTED();
367 }
368
369 // Vulkan doesn't seem to understand swizzling of a depth stencil image, use identity
370 swizzle_x = vk::ComponentSwizzle::eR;
371 swizzle_y = vk::ComponentSwizzle::eG;
372 swizzle_z = vk::ComponentSwizzle::eB;
373 swizzle_w = vk::ComponentSwizzle::eA;
374 }
375
376 const vk::ImageViewCreateInfo image_view_ci(
377 {}, surface.GetImageHandle(), image_view_type, surface.GetImage().GetFormat(),
378 {swizzle_x, swizzle_y, swizzle_z, swizzle_w},
379 {aspect, base_level, num_levels, base_layer, num_layers});
380
381 const auto dev = device.GetLogical();
382 image_view = dev.createImageViewUnique(image_view_ci, nullptr, device.GetDispatchLoader());
383 return last_image_view = *image_view;
384}
385
386VKTextureCache::VKTextureCache(Core::System& system, VideoCore::RasterizerInterface& rasterizer,
387 const VKDevice& device, VKResourceManager& resource_manager,
388 VKMemoryManager& memory_manager, VKScheduler& scheduler,
389 VKStagingBufferPool& staging_pool)
390 : TextureCache(system, rasterizer), device{device}, resource_manager{resource_manager},
391 memory_manager{memory_manager}, scheduler{scheduler}, staging_pool{staging_pool} {}
392
393VKTextureCache::~VKTextureCache() = default;
394
395Surface VKTextureCache::CreateSurface(GPUVAddr gpu_addr, const SurfaceParams& params) {
396 return std::make_shared<CachedSurface>(system, device, resource_manager, memory_manager,
397 scheduler, staging_pool, gpu_addr, params);
398}
399
400void VKTextureCache::ImageCopy(Surface& src_surface, Surface& dst_surface,
401 const VideoCommon::CopyParams& copy_params) {
402 const bool src_3d = src_surface->GetSurfaceParams().target == SurfaceTarget::Texture3D;
403 const bool dst_3d = dst_surface->GetSurfaceParams().target == SurfaceTarget::Texture3D;
404 UNIMPLEMENTED_IF(src_3d);
405
406 // The texture cache handles depth in OpenGL terms, we have to handle it as subresource and
407 // dimension respectively.
408 const u32 dst_base_layer = dst_3d ? 0 : copy_params.dest_z;
409 const u32 dst_offset_z = dst_3d ? copy_params.dest_z : 0;
410
411 const u32 extent_z = dst_3d ? copy_params.depth : 1;
412 const u32 num_layers = dst_3d ? 1 : copy_params.depth;
413
414 // We can't copy inside a renderpass
415 scheduler.RequestOutsideRenderPassOperationContext();
416
417 src_surface->Transition(copy_params.source_z, copy_params.depth, copy_params.source_level, 1,
418 vk::PipelineStageFlagBits::eTransfer, vk::AccessFlagBits::eTransferRead,
419 vk::ImageLayout::eTransferSrcOptimal);
420 dst_surface->Transition(
421 dst_base_layer, num_layers, copy_params.dest_level, 1, vk::PipelineStageFlagBits::eTransfer,
422 vk::AccessFlagBits::eTransferWrite, vk::ImageLayout::eTransferDstOptimal);
423
424 const auto& dld{device.GetDispatchLoader()};
425 const vk::ImageSubresourceLayers src_subresource(
426 src_surface->GetAspectMask(), copy_params.source_level, copy_params.source_z, num_layers);
427 const vk::ImageSubresourceLayers dst_subresource(
428 dst_surface->GetAspectMask(), copy_params.dest_level, dst_base_layer, num_layers);
429 const vk::Offset3D src_offset(copy_params.source_x, copy_params.source_y, 0);
430 const vk::Offset3D dst_offset(copy_params.dest_x, copy_params.dest_y, dst_offset_z);
431 const vk::Extent3D extent(copy_params.width, copy_params.height, extent_z);
432 const vk::ImageCopy copy(src_subresource, src_offset, dst_subresource, dst_offset, extent);
433 const vk::Image src_image = src_surface->GetImageHandle();
434 const vk::Image dst_image = dst_surface->GetImageHandle();
435 scheduler.Record([src_image, dst_image, copy](auto cmdbuf, auto& dld) {
436 cmdbuf.copyImage(src_image, vk::ImageLayout::eTransferSrcOptimal, dst_image,
437 vk::ImageLayout::eTransferDstOptimal, {copy}, dld);
438 });
439}
440
441void VKTextureCache::ImageBlit(View& src_view, View& dst_view,
442 const Tegra::Engines::Fermi2D::Config& copy_config) {
443 // We can't blit inside a renderpass
444 scheduler.RequestOutsideRenderPassOperationContext();
445
446 src_view->Transition(vk::ImageLayout::eTransferSrcOptimal, vk::PipelineStageFlagBits::eTransfer,
447 vk::AccessFlagBits::eTransferRead);
448 dst_view->Transition(vk::ImageLayout::eTransferDstOptimal, vk::PipelineStageFlagBits::eTransfer,
449 vk::AccessFlagBits::eTransferWrite);
450
451 const auto& cfg = copy_config;
452 const auto src_top_left = vk::Offset3D(cfg.src_rect.left, cfg.src_rect.top, 0);
453 const auto src_bot_right = vk::Offset3D(cfg.src_rect.right, cfg.src_rect.bottom, 1);
454 const auto dst_top_left = vk::Offset3D(cfg.dst_rect.left, cfg.dst_rect.top, 0);
455 const auto dst_bot_right = vk::Offset3D(cfg.dst_rect.right, cfg.dst_rect.bottom, 1);
456 const vk::ImageBlit blit(src_view->GetImageSubresourceLayers(), {src_top_left, src_bot_right},
457 dst_view->GetImageSubresourceLayers(), {dst_top_left, dst_bot_right});
458 const bool is_linear = copy_config.filter == Tegra::Engines::Fermi2D::Filter::Linear;
459
460 const auto& dld{device.GetDispatchLoader()};
461 scheduler.Record([src_image = src_view->GetImage(), dst_image = dst_view->GetImage(), blit,
462 is_linear](auto cmdbuf, auto& dld) {
463 cmdbuf.blitImage(src_image, vk::ImageLayout::eTransferSrcOptimal, dst_image,
464 vk::ImageLayout::eTransferDstOptimal, {blit},
465 is_linear ? vk::Filter::eLinear : vk::Filter::eNearest, dld);
466 });
467}
468
469void VKTextureCache::BufferCopy(Surface& src_surface, Surface& dst_surface) {
470 // Currently unimplemented. PBO copies should be dropped and we should use a render pass to
471 // convert from color to depth and viceversa.
472 LOG_WARNING(Render_Vulkan, "Unimplemented");
473}
474
475} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.h b/src/video_core/renderer_vulkan/vk_texture_cache.h
new file mode 100644
index 000000000..d3edbe80c
--- /dev/null
+++ b/src/video_core/renderer_vulkan/vk_texture_cache.h
@@ -0,0 +1,239 @@
1// Copyright 2019 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <memory>
8#include <unordered_map>
9
10#include "common/assert.h"
11#include "common/common_types.h"
12#include "common/logging/log.h"
13#include "common/math_util.h"
14#include "video_core/gpu.h"
15#include "video_core/rasterizer_cache.h"
16#include "video_core/renderer_vulkan/declarations.h"
17#include "video_core/renderer_vulkan/vk_image.h"
18#include "video_core/renderer_vulkan/vk_memory_manager.h"
19#include "video_core/renderer_vulkan/vk_scheduler.h"
20#include "video_core/texture_cache/surface_base.h"
21#include "video_core/texture_cache/texture_cache.h"
22#include "video_core/textures/decoders.h"
23
24namespace Core {
25class System;
26}
27
28namespace VideoCore {
29class RasterizerInterface;
30}
31
32namespace Vulkan {
33
34class RasterizerVulkan;
35class VKDevice;
36class VKResourceManager;
37class VKScheduler;
38class VKStagingBufferPool;
39
40class CachedSurfaceView;
41class CachedSurface;
42
43using Surface = std::shared_ptr<CachedSurface>;
44using View = std::shared_ptr<CachedSurfaceView>;
45using TextureCacheBase = VideoCommon::TextureCache<Surface, View>;
46
47using VideoCommon::SurfaceParams;
48using VideoCommon::ViewParams;
49
50class CachedSurface final : public VideoCommon::SurfaceBase<View> {
51 friend CachedSurfaceView;
52
53public:
54 explicit CachedSurface(Core::System& system, const VKDevice& device,
55 VKResourceManager& resource_manager, VKMemoryManager& memory_manager,
56 VKScheduler& scheduler, VKStagingBufferPool& staging_pool,
57 GPUVAddr gpu_addr, const SurfaceParams& params);
58 ~CachedSurface();
59
60 void UploadTexture(const std::vector<u8>& staging_buffer) override;
61 void DownloadTexture(std::vector<u8>& staging_buffer) override;
62
63 void FullTransition(vk::PipelineStageFlags new_stage_mask, vk::AccessFlags new_access,
64 vk::ImageLayout new_layout) {
65 image->Transition(0, static_cast<u32>(params.GetNumLayers()), 0, params.num_levels,
66 new_stage_mask, new_access, new_layout);
67 }
68
69 void Transition(u32 base_layer, u32 num_layers, u32 base_level, u32 num_levels,
70 vk::PipelineStageFlags new_stage_mask, vk::AccessFlags new_access,
71 vk::ImageLayout new_layout) {
72 image->Transition(base_layer, num_layers, base_level, num_levels, new_stage_mask,
73 new_access, new_layout);
74 }
75
76 VKImage& GetImage() {
77 return *image;
78 }
79
80 const VKImage& GetImage() const {
81 return *image;
82 }
83
84 vk::Image GetImageHandle() const {
85 return image->GetHandle();
86 }
87
88 vk::ImageAspectFlags GetAspectMask() const {
89 return image->GetAspectMask();
90 }
91
92 vk::BufferView GetBufferViewHandle() const {
93 return *buffer_view;
94 }
95
96protected:
97 void DecorateSurfaceName();
98
99 View CreateView(const ViewParams& params) override;
100 View CreateViewInner(const ViewParams& params, bool is_proxy);
101
102private:
103 void UploadBuffer(const std::vector<u8>& staging_buffer);
104
105 void UploadImage(const std::vector<u8>& staging_buffer);
106
107 vk::BufferImageCopy GetBufferImageCopy(u32 level) const;
108
109 vk::ImageSubresourceRange GetImageSubresourceRange() const;
110
111 Core::System& system;
112 const VKDevice& device;
113 VKResourceManager& resource_manager;
114 VKMemoryManager& memory_manager;
115 VKScheduler& scheduler;
116 VKStagingBufferPool& staging_pool;
117
118 std::optional<VKImage> image;
119 UniqueBuffer buffer;
120 UniqueBufferView buffer_view;
121 VKMemoryCommit commit;
122
123 vk::Format format;
124};
125
126class CachedSurfaceView final : public VideoCommon::ViewBase {
127public:
128 explicit CachedSurfaceView(const VKDevice& device, CachedSurface& surface,
129 const ViewParams& params, bool is_proxy);
130 ~CachedSurfaceView();
131
132 vk::ImageView GetHandle(Tegra::Texture::SwizzleSource x_source,
133 Tegra::Texture::SwizzleSource y_source,
134 Tegra::Texture::SwizzleSource z_source,
135 Tegra::Texture::SwizzleSource w_source);
136
137 bool IsSameSurface(const CachedSurfaceView& rhs) const {
138 return &surface == &rhs.surface;
139 }
140
141 vk::ImageView GetHandle() {
142 return GetHandle(Tegra::Texture::SwizzleSource::R, Tegra::Texture::SwizzleSource::G,
143 Tegra::Texture::SwizzleSource::B, Tegra::Texture::SwizzleSource::A);
144 }
145
146 u32 GetWidth() const {
147 return params.GetMipWidth(base_level);
148 }
149
150 u32 GetHeight() const {
151 return params.GetMipHeight(base_level);
152 }
153
154 bool IsBufferView() const {
155 return buffer_view;
156 }
157
158 vk::Image GetImage() const {
159 return image;
160 }
161
162 vk::BufferView GetBufferView() const {
163 return buffer_view;
164 }
165
166 vk::ImageSubresourceRange GetImageSubresourceRange() const {
167 return {aspect_mask, base_level, num_levels, base_layer, num_layers};
168 }
169
170 vk::ImageSubresourceLayers GetImageSubresourceLayers() const {
171 return {surface.GetAspectMask(), base_level, base_layer, num_layers};
172 }
173
174 void Transition(vk::ImageLayout new_layout, vk::PipelineStageFlags new_stage_mask,
175 vk::AccessFlags new_access) const {
176 surface.Transition(base_layer, num_layers, base_level, num_levels, new_stage_mask,
177 new_access, new_layout);
178 }
179
180 void MarkAsModified(u64 tick) {
181 surface.MarkAsModified(true, tick);
182 }
183
184private:
185 static u32 EncodeSwizzle(Tegra::Texture::SwizzleSource x_source,
186 Tegra::Texture::SwizzleSource y_source,
187 Tegra::Texture::SwizzleSource z_source,
188 Tegra::Texture::SwizzleSource w_source) {
189 return (static_cast<u32>(x_source) << 24) | (static_cast<u32>(y_source) << 16) |
190 (static_cast<u32>(z_source) << 8) | static_cast<u32>(w_source);
191 }
192
193 // Store a copy of these values to avoid double dereference when reading them
194 const SurfaceParams params;
195 const vk::Image image;
196 const vk::BufferView buffer_view;
197 const vk::ImageAspectFlags aspect_mask;
198
199 const VKDevice& device;
200 CachedSurface& surface;
201 const u32 base_layer;
202 const u32 num_layers;
203 const u32 base_level;
204 const u32 num_levels;
205 const vk::ImageViewType image_view_type;
206
207 vk::ImageView last_image_view;
208 u32 last_swizzle{};
209
210 std::unordered_map<u32, UniqueImageView> view_cache;
211};
212
213class VKTextureCache final : public TextureCacheBase {
214public:
215 explicit VKTextureCache(Core::System& system, VideoCore::RasterizerInterface& rasterizer,
216 const VKDevice& device, VKResourceManager& resource_manager,
217 VKMemoryManager& memory_manager, VKScheduler& scheduler,
218 VKStagingBufferPool& staging_pool);
219 ~VKTextureCache();
220
221private:
222 Surface CreateSurface(GPUVAddr gpu_addr, const SurfaceParams& params) override;
223
224 void ImageCopy(Surface& src_surface, Surface& dst_surface,
225 const VideoCommon::CopyParams& copy_params) override;
226
227 void ImageBlit(View& src_view, View& dst_view,
228 const Tegra::Engines::Fermi2D::Config& copy_config) override;
229
230 void BufferCopy(Surface& src_surface, Surface& dst_surface) override;
231
232 const VKDevice& device;
233 VKResourceManager& resource_manager;
234 VKMemoryManager& memory_manager;
235 VKScheduler& scheduler;
236 VKStagingBufferPool& staging_pool;
237};
238
239} // namespace Vulkan
diff --git a/src/video_core/shader/control_flow.cpp b/src/video_core/shader/control_flow.cpp
index b427ac873..0229733b6 100644
--- a/src/video_core/shader/control_flow.cpp
+++ b/src/video_core/shader/control_flow.cpp
@@ -65,7 +65,7 @@ struct BlockInfo {
65 65
66struct CFGRebuildState { 66struct CFGRebuildState {
67 explicit CFGRebuildState(const ProgramCode& program_code, u32 start, ConstBufferLocker& locker) 67 explicit CFGRebuildState(const ProgramCode& program_code, u32 start, ConstBufferLocker& locker)
68 : program_code{program_code}, start{start}, locker{locker} {} 68 : program_code{program_code}, locker{locker}, start{start} {}
69 69
70 const ProgramCode& program_code; 70 const ProgramCode& program_code;
71 ConstBufferLocker& locker; 71 ConstBufferLocker& locker;
diff --git a/src/video_core/shader/decode/memory.cpp b/src/video_core/shader/decode/memory.cpp
index c934d0719..7591a715f 100644
--- a/src/video_core/shader/decode/memory.cpp
+++ b/src/video_core/shader/decode/memory.cpp
@@ -6,6 +6,7 @@
6#include <vector> 6#include <vector>
7#include <fmt/format.h> 7#include <fmt/format.h>
8 8
9#include "common/alignment.h"
9#include "common/assert.h" 10#include "common/assert.h"
10#include "common/common_types.h" 11#include "common/common_types.h"
11#include "common/logging/log.h" 12#include "common/logging/log.h"
@@ -15,6 +16,8 @@
15 16
16namespace VideoCommon::Shader { 17namespace VideoCommon::Shader {
17 18
19using Tegra::Shader::AtomicOp;
20using Tegra::Shader::AtomicType;
18using Tegra::Shader::Attribute; 21using Tegra::Shader::Attribute;
19using Tegra::Shader::Instruction; 22using Tegra::Shader::Instruction;
20using Tegra::Shader::OpCode; 23using Tegra::Shader::OpCode;
@@ -22,34 +25,39 @@ using Tegra::Shader::Register;
22 25
23namespace { 26namespace {
24 27
25u32 GetLdgMemorySize(Tegra::Shader::UniformType uniform_type) { 28bool IsUnaligned(Tegra::Shader::UniformType uniform_type) {
29 return uniform_type == Tegra::Shader::UniformType::UnsignedByte ||
30 uniform_type == Tegra::Shader::UniformType::UnsignedShort;
31}
32
33u32 GetUnalignedMask(Tegra::Shader::UniformType uniform_type) {
26 switch (uniform_type) { 34 switch (uniform_type) {
27 case Tegra::Shader::UniformType::UnsignedByte: 35 case Tegra::Shader::UniformType::UnsignedByte:
28 case Tegra::Shader::UniformType::Single: 36 return 0b11;
29 return 1; 37 case Tegra::Shader::UniformType::UnsignedShort:
30 case Tegra::Shader::UniformType::Double: 38 return 0b10;
31 return 2;
32 case Tegra::Shader::UniformType::Quad:
33 case Tegra::Shader::UniformType::UnsignedQuad:
34 return 4;
35 default: 39 default:
36 UNIMPLEMENTED_MSG("Unimplemented size={}!", static_cast<u32>(uniform_type)); 40 UNREACHABLE();
37 return 1; 41 return 0;
38 } 42 }
39} 43}
40 44
41u32 GetStgMemorySize(Tegra::Shader::UniformType uniform_type) { 45u32 GetMemorySize(Tegra::Shader::UniformType uniform_type) {
42 switch (uniform_type) { 46 switch (uniform_type) {
47 case Tegra::Shader::UniformType::UnsignedByte:
48 return 8;
49 case Tegra::Shader::UniformType::UnsignedShort:
50 return 16;
43 case Tegra::Shader::UniformType::Single: 51 case Tegra::Shader::UniformType::Single:
44 return 1; 52 return 32;
45 case Tegra::Shader::UniformType::Double: 53 case Tegra::Shader::UniformType::Double:
46 return 2; 54 return 64;
47 case Tegra::Shader::UniformType::Quad: 55 case Tegra::Shader::UniformType::Quad:
48 case Tegra::Shader::UniformType::UnsignedQuad: 56 case Tegra::Shader::UniformType::UnsignedQuad:
49 return 4; 57 return 128;
50 default: 58 default:
51 UNIMPLEMENTED_MSG("Unimplemented size={}!", static_cast<u32>(uniform_type)); 59 UNIMPLEMENTED_MSG("Unimplemented size={}!", static_cast<u32>(uniform_type));
52 return 1; 60 return 32;
53 } 61 }
54} 62}
55 63
@@ -184,9 +192,10 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) {
184 }(); 192 }();
185 193
186 const auto [real_address_base, base_address, descriptor] = 194 const auto [real_address_base, base_address, descriptor] =
187 TrackGlobalMemory(bb, instr, false); 195 TrackGlobalMemory(bb, instr, true, false);
188 196
189 const u32 count = GetLdgMemorySize(type); 197 const u32 size = GetMemorySize(type);
198 const u32 count = Common::AlignUp(size, 32) / 32;
190 if (!real_address_base || !base_address) { 199 if (!real_address_base || !base_address) {
191 // Tracking failed, load zeroes. 200 // Tracking failed, load zeroes.
192 for (u32 i = 0; i < count; ++i) { 201 for (u32 i = 0; i < count; ++i) {
@@ -200,14 +209,15 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) {
200 const Node real_address = Operation(OperationCode::UAdd, real_address_base, it_offset); 209 const Node real_address = Operation(OperationCode::UAdd, real_address_base, it_offset);
201 Node gmem = MakeNode<GmemNode>(real_address, base_address, descriptor); 210 Node gmem = MakeNode<GmemNode>(real_address, base_address, descriptor);
202 211
203 if (type == Tegra::Shader::UniformType::UnsignedByte) { 212 // To handle unaligned loads get the bytes used to dereference global memory and extract
204 // To handle unaligned loads get the byte used to dereferenced global memory 213 // those bytes from the loaded u32.
205 // and extract that byte from the loaded uint32. 214 if (IsUnaligned(type)) {
206 Node byte = Operation(OperationCode::UBitwiseAnd, real_address, Immediate(3)); 215 Node mask = Immediate(GetUnalignedMask(type));
207 byte = Operation(OperationCode::ULogicalShiftLeft, std::move(byte), Immediate(3)); 216 Node offset = Operation(OperationCode::UBitwiseAnd, real_address, std::move(mask));
217 offset = Operation(OperationCode::ULogicalShiftLeft, offset, Immediate(3));
208 218
209 gmem = Operation(OperationCode::UBitfieldExtract, std::move(gmem), std::move(byte), 219 gmem = Operation(OperationCode::UBitfieldExtract, std::move(gmem),
210 Immediate(8)); 220 std::move(offset), Immediate(size));
211 } 221 }
212 222
213 SetTemporary(bb, i, gmem); 223 SetTemporary(bb, i, gmem);
@@ -295,23 +305,53 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) {
295 } 305 }
296 }(); 306 }();
297 307
308 // For unaligned reads we have to read memory too.
309 const bool is_read = IsUnaligned(type);
298 const auto [real_address_base, base_address, descriptor] = 310 const auto [real_address_base, base_address, descriptor] =
299 TrackGlobalMemory(bb, instr, true); 311 TrackGlobalMemory(bb, instr, is_read, true);
300 if (!real_address_base || !base_address) { 312 if (!real_address_base || !base_address) {
301 // Tracking failed, skip the store. 313 // Tracking failed, skip the store.
302 break; 314 break;
303 } 315 }
304 316
305 const u32 count = GetStgMemorySize(type); 317 const u32 size = GetMemorySize(type);
318 const u32 count = Common::AlignUp(size, 32) / 32;
306 for (u32 i = 0; i < count; ++i) { 319 for (u32 i = 0; i < count; ++i) {
307 const Node it_offset = Immediate(i * 4); 320 const Node it_offset = Immediate(i * 4);
308 const Node real_address = Operation(OperationCode::UAdd, real_address_base, it_offset); 321 const Node real_address = Operation(OperationCode::UAdd, real_address_base, it_offset);
309 const Node gmem = MakeNode<GmemNode>(real_address, base_address, descriptor); 322 const Node gmem = MakeNode<GmemNode>(real_address, base_address, descriptor);
310 const Node value = GetRegister(instr.gpr0.Value() + i); 323 Node value = GetRegister(instr.gpr0.Value() + i);
324
325 if (IsUnaligned(type)) {
326 Node mask = Immediate(GetUnalignedMask(type));
327 Node offset = Operation(OperationCode::UBitwiseAnd, real_address, std::move(mask));
328 offset = Operation(OperationCode::ULogicalShiftLeft, offset, Immediate(3));
329
330 value = Operation(OperationCode::UBitfieldInsert, gmem, std::move(value), offset,
331 Immediate(size));
332 }
333
311 bb.push_back(Operation(OperationCode::Assign, gmem, value)); 334 bb.push_back(Operation(OperationCode::Assign, gmem, value));
312 } 335 }
313 break; 336 break;
314 } 337 }
338 case OpCode::Id::ATOMS: {
339 UNIMPLEMENTED_IF_MSG(instr.atoms.operation != AtomicOp::Add, "operation={}",
340 static_cast<int>(instr.atoms.operation.Value()));
341 UNIMPLEMENTED_IF_MSG(instr.atoms.type != AtomicType::U32, "type={}",
342 static_cast<int>(instr.atoms.type.Value()));
343
344 const s32 offset = instr.atoms.GetImmediateOffset();
345 Node address = GetRegister(instr.gpr8);
346 address = Operation(OperationCode::IAdd, std::move(address), Immediate(offset));
347
348 Node memory = GetSharedMemory(std::move(address));
349 Node data = GetRegister(instr.gpr20);
350
351 Node value = Operation(OperationCode::UAtomicAdd, std::move(memory), std::move(data));
352 SetRegister(bb, instr.gpr0, std::move(value));
353 break;
354 }
315 case OpCode::Id::AL2P: { 355 case OpCode::Id::AL2P: {
316 // Ignore al2p.direction since we don't care about it. 356 // Ignore al2p.direction since we don't care about it.
317 357
@@ -336,7 +376,7 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) {
336 376
337std::tuple<Node, Node, GlobalMemoryBase> ShaderIR::TrackGlobalMemory(NodeBlock& bb, 377std::tuple<Node, Node, GlobalMemoryBase> ShaderIR::TrackGlobalMemory(NodeBlock& bb,
338 Instruction instr, 378 Instruction instr,
339 bool is_write) { 379 bool is_read, bool is_write) {
340 const auto addr_register{GetRegister(instr.gmem.gpr)}; 380 const auto addr_register{GetRegister(instr.gmem.gpr)};
341 const auto immediate_offset{static_cast<u32>(instr.gmem.offset)}; 381 const auto immediate_offset{static_cast<u32>(instr.gmem.offset)};
342 382
@@ -351,11 +391,8 @@ std::tuple<Node, Node, GlobalMemoryBase> ShaderIR::TrackGlobalMemory(NodeBlock&
351 const GlobalMemoryBase descriptor{index, offset}; 391 const GlobalMemoryBase descriptor{index, offset};
352 const auto& [entry, is_new] = used_global_memory.try_emplace(descriptor); 392 const auto& [entry, is_new] = used_global_memory.try_emplace(descriptor);
353 auto& usage = entry->second; 393 auto& usage = entry->second;
354 if (is_write) { 394 usage.is_written |= is_write;
355 usage.is_written = true; 395 usage.is_read |= is_read;
356 } else {
357 usage.is_read = true;
358 }
359 396
360 const auto real_address = 397 const auto real_address =
361 Operation(OperationCode::UAdd, NO_PRECISE, Immediate(immediate_offset), addr_register); 398 Operation(OperationCode::UAdd, NO_PRECISE, Immediate(immediate_offset), addr_register);
diff --git a/src/video_core/shader/decode/texture.cpp b/src/video_core/shader/decode/texture.cpp
index 4b14cdf58..cd984f763 100644
--- a/src/video_core/shader/decode/texture.cpp
+++ b/src/video_core/shader/decode/texture.cpp
@@ -794,14 +794,10 @@ std::tuple<std::size_t, std::size_t> ShaderIR::ValidateAndGetCoordinateElement(
794 794
795std::vector<Node> ShaderIR::GetAoffiCoordinates(Node aoffi_reg, std::size_t coord_count, 795std::vector<Node> ShaderIR::GetAoffiCoordinates(Node aoffi_reg, std::size_t coord_count,
796 bool is_tld4) { 796 bool is_tld4) {
797 const auto [coord_offsets, size, wrap_value, 797 const std::array coord_offsets = is_tld4 ? std::array{0U, 8U, 16U} : std::array{0U, 4U, 8U};
798 diff_value] = [is_tld4]() -> std::tuple<std::array<u32, 3>, u32, s32, s32> { 798 const u32 size = is_tld4 ? 6 : 4;
799 if (is_tld4) { 799 const s32 wrap_value = is_tld4 ? 32 : 8;
800 return {{0, 8, 16}, 6, 32, 64}; 800 const s32 diff_value = is_tld4 ? 64 : 16;
801 } else {
802 return {{0, 4, 8}, 4, 8, 16};
803 }
804 }();
805 const u32 mask = (1U << size) - 1; 801 const u32 mask = (1U << size) - 1;
806 802
807 std::vector<Node> aoffi; 803 std::vector<Node> aoffi;
@@ -814,7 +810,7 @@ std::vector<Node> ShaderIR::GetAoffiCoordinates(Node aoffi_reg, std::size_t coor
814 LOG_WARNING(HW_GPU, 810 LOG_WARNING(HW_GPU,
815 "AOFFI constant folding failed, some hardware might have graphical issues"); 811 "AOFFI constant folding failed, some hardware might have graphical issues");
816 for (std::size_t coord = 0; coord < coord_count; ++coord) { 812 for (std::size_t coord = 0; coord < coord_count; ++coord) {
817 const Node value = BitfieldExtract(aoffi_reg, coord_offsets.at(coord), size); 813 const Node value = BitfieldExtract(aoffi_reg, coord_offsets[coord], size);
818 const Node condition = 814 const Node condition =
819 Operation(OperationCode::LogicalIGreaterEqual, value, Immediate(wrap_value)); 815 Operation(OperationCode::LogicalIGreaterEqual, value, Immediate(wrap_value));
820 const Node negative = Operation(OperationCode::IAdd, value, Immediate(-diff_value)); 816 const Node negative = Operation(OperationCode::IAdd, value, Immediate(-diff_value));
@@ -824,7 +820,7 @@ std::vector<Node> ShaderIR::GetAoffiCoordinates(Node aoffi_reg, std::size_t coor
824 } 820 }
825 821
826 for (std::size_t coord = 0; coord < coord_count; ++coord) { 822 for (std::size_t coord = 0; coord < coord_count; ++coord) {
827 s32 value = (*aoffi_immediate >> coord_offsets.at(coord)) & mask; 823 s32 value = (*aoffi_immediate >> coord_offsets[coord]) & mask;
828 if (value >= wrap_value) { 824 if (value >= wrap_value) {
829 value -= diff_value; 825 value -= diff_value;
830 } 826 }
diff --git a/src/video_core/shader/node.h b/src/video_core/shader/node.h
index 4e155542a..075c7d07c 100644
--- a/src/video_core/shader/node.h
+++ b/src/video_core/shader/node.h
@@ -162,6 +162,8 @@ enum class OperationCode {
162 AtomicImageXor, /// (MetaImage, int[N] coords) -> void 162 AtomicImageXor, /// (MetaImage, int[N] coords) -> void
163 AtomicImageExchange, /// (MetaImage, int[N] coords) -> void 163 AtomicImageExchange, /// (MetaImage, int[N] coords) -> void
164 164
165 UAtomicAdd, /// (smem, uint) -> uint
166
165 Branch, /// (uint branch_target) -> void 167 Branch, /// (uint branch_target) -> void
166 BranchIndirect, /// (uint branch_target) -> void 168 BranchIndirect, /// (uint branch_target) -> void
167 PushFlowStack, /// (uint branch_target) -> void 169 PushFlowStack, /// (uint branch_target) -> void
diff --git a/src/video_core/shader/shader_ir.h b/src/video_core/shader/shader_ir.h
index aacd0a0da..ba1db4c11 100644
--- a/src/video_core/shader/shader_ir.h
+++ b/src/video_core/shader/shader_ir.h
@@ -394,7 +394,7 @@ private:
394 394
395 std::tuple<Node, Node, GlobalMemoryBase> TrackGlobalMemory(NodeBlock& bb, 395 std::tuple<Node, Node, GlobalMemoryBase> TrackGlobalMemory(NodeBlock& bb,
396 Tegra::Shader::Instruction instr, 396 Tegra::Shader::Instruction instr,
397 bool is_write); 397 bool is_read, bool is_write);
398 398
399 /// Register new amending code and obtain the reference id. 399 /// Register new amending code and obtain the reference id.
400 std::size_t DeclareAmend(Node new_amend); 400 std::size_t DeclareAmend(Node new_amend);
diff --git a/src/video_core/texture_cache/format_lookup_table.cpp b/src/video_core/texture_cache/format_lookup_table.cpp
index 271e67533..81fb9f633 100644
--- a/src/video_core/texture_cache/format_lookup_table.cpp
+++ b/src/video_core/texture_cache/format_lookup_table.cpp
@@ -95,7 +95,7 @@ constexpr std::array<Table, 74> DefinitionTable = {{
95 {TextureFormat::ZF32, C, FLOAT, FLOAT, FLOAT, FLOAT, PixelFormat::Z32F}, 95 {TextureFormat::ZF32, C, FLOAT, FLOAT, FLOAT, FLOAT, PixelFormat::Z32F},
96 {TextureFormat::Z16, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::Z16}, 96 {TextureFormat::Z16, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::Z16},
97 {TextureFormat::S8Z24, C, UINT, UNORM, UNORM, UNORM, PixelFormat::S8Z24}, 97 {TextureFormat::S8Z24, C, UINT, UNORM, UNORM, UNORM, PixelFormat::S8Z24},
98 {TextureFormat::ZF32_X24S8, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::Z32FS8}, 98 {TextureFormat::ZF32_X24S8, C, FLOAT, UINT, UNORM, UNORM, PixelFormat::Z32FS8},
99 99
100 {TextureFormat::DXT1, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::DXT1}, 100 {TextureFormat::DXT1, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::DXT1},
101 {TextureFormat::DXT1, S, UNORM, UNORM, UNORM, UNORM, PixelFormat::DXT1_SRGB}, 101 {TextureFormat::DXT1, S, UNORM, UNORM, UNORM, UNORM, PixelFormat::DXT1_SRGB},
diff --git a/src/video_core/texture_cache/surface_params.h b/src/video_core/texture_cache/surface_params.h
index 992b5c022..9256fd6d9 100644
--- a/src/video_core/texture_cache/surface_params.h
+++ b/src/video_core/texture_cache/surface_params.h
@@ -209,6 +209,11 @@ public:
209 return target == VideoCore::Surface::SurfaceTarget::TextureBuffer; 209 return target == VideoCore::Surface::SurfaceTarget::TextureBuffer;
210 } 210 }
211 211
212 /// Returns the number of layers in the surface.
213 std::size_t GetNumLayers() const {
214 return is_layered ? depth : 1;
215 }
216
212 /// Returns the debug name of the texture for use in graphic debuggers. 217 /// Returns the debug name of the texture for use in graphic debuggers.
213 std::string TargetName() const; 218 std::string TargetName() const;
214 219
@@ -287,10 +292,6 @@ private:
287 /// Returns the size of a layer 292 /// Returns the size of a layer
288 std::size_t GetLayerSize(bool as_host_size, bool uncompressed) const; 293 std::size_t GetLayerSize(bool as_host_size, bool uncompressed) const;
289 294
290 std::size_t GetNumLayers() const {
291 return is_layered ? depth : 1;
292 }
293
294 /// Returns true if these parameters are from a layered surface. 295 /// Returns true if these parameters are from a layered surface.
295 bool IsLayered() const; 296 bool IsLayered() const;
296}; 297};
diff --git a/src/yuzu/configuration/configure_hotkeys.cpp b/src/yuzu/configuration/configure_hotkeys.cpp
index 3ea0b8d67..fa9052136 100644
--- a/src/yuzu/configuration/configure_hotkeys.cpp
+++ b/src/yuzu/configuration/configure_hotkeys.cpp
@@ -48,6 +48,7 @@ void ConfigureHotkeys::Populate(const HotkeyRegistry& registry) {
48 } 48 }
49 49
50 ui->hotkey_list->expandAll(); 50 ui->hotkey_list->expandAll();
51 ui->hotkey_list->resizeColumnToContents(0);
51} 52}
52 53
53void ConfigureHotkeys::changeEvent(QEvent* event) { 54void ConfigureHotkeys::changeEvent(QEvent* event) {
diff --git a/src/yuzu/main.ui b/src/yuzu/main.ui
index 581a10ddc..a2c9e4547 100644
--- a/src/yuzu/main.ui
+++ b/src/yuzu/main.ui
@@ -15,7 +15,7 @@
15 </property> 15 </property>
16 <property name="windowIcon"> 16 <property name="windowIcon">
17 <iconset> 17 <iconset>
18 <normaloff>src/pcafe/res/icon3_64x64.ico</normaloff>src/pcafe/res/icon3_64x64.ico</iconset> 18 <normaloff>../dist/yuzu.ico</normaloff>../dist/yuzu.ico</iconset>
19 </property> 19 </property>
20 <property name="tabShape"> 20 <property name="tabShape">
21 <enum>QTabWidget::Rounded</enum> 21 <enum>QTabWidget::Rounded</enum>