diff options
21 files changed, 834 insertions, 150 deletions
diff --git a/CMakeModules/GenerateSCMRev.cmake b/CMakeModules/GenerateSCMRev.cmake index 21e03ae98..fa7ae835f 100644 --- a/CMakeModules/GenerateSCMRev.cmake +++ b/CMakeModules/GenerateSCMRev.cmake | |||
| @@ -5,6 +5,10 @@ function(get_timestamp _var) | |||
| 5 | endfunction() | 5 | endfunction() |
| 6 | 6 | ||
| 7 | list(APPEND CMAKE_MODULE_PATH "${SRC_DIR}/externals/cmake-modules") | 7 | list(APPEND CMAKE_MODULE_PATH "${SRC_DIR}/externals/cmake-modules") |
| 8 | |||
| 9 | # Find the package here with the known path so that the GetGit commands can find it as well | ||
| 10 | find_package(Git QUIET PATHS "${GIT_EXECUTABLE}") | ||
| 11 | |||
| 8 | # generate git/build information | 12 | # generate git/build information |
| 9 | include(GetGitRevisionDescription) | 13 | include(GetGitRevisionDescription) |
| 10 | get_git_head_revision(GIT_REF_SPEC GIT_REV) | 14 | get_git_head_revision(GIT_REF_SPEC GIT_REV) |
diff --git a/src/common/CMakeLists.txt b/src/common/CMakeLists.txt index 9b0c3db68..9afc6105d 100644 --- a/src/common/CMakeLists.txt +++ b/src/common/CMakeLists.txt | |||
| @@ -15,6 +15,10 @@ endif () | |||
| 15 | if (DEFINED ENV{DISPLAYVERSION}) | 15 | if (DEFINED ENV{DISPLAYVERSION}) |
| 16 | set(DISPLAY_VERSION $ENV{DISPLAYVERSION}) | 16 | set(DISPLAY_VERSION $ENV{DISPLAYVERSION}) |
| 17 | endif () | 17 | endif () |
| 18 | |||
| 19 | # Pass the path to git to the GenerateSCMRev.cmake as well | ||
| 20 | find_package(Git QUIET) | ||
| 21 | |||
| 18 | add_custom_command(OUTPUT scm_rev.cpp | 22 | add_custom_command(OUTPUT scm_rev.cpp |
| 19 | COMMAND ${CMAKE_COMMAND} | 23 | COMMAND ${CMAKE_COMMAND} |
| 20 | -DSRC_DIR="${CMAKE_SOURCE_DIR}" | 24 | -DSRC_DIR="${CMAKE_SOURCE_DIR}" |
| @@ -23,6 +27,7 @@ add_custom_command(OUTPUT scm_rev.cpp | |||
| 23 | -DTITLE_BAR_FORMAT_RUNNING="${TITLE_BAR_FORMAT_RUNNING}" | 27 | -DTITLE_BAR_FORMAT_RUNNING="${TITLE_BAR_FORMAT_RUNNING}" |
| 24 | -DBUILD_TAG="${BUILD_TAG}" | 28 | -DBUILD_TAG="${BUILD_TAG}" |
| 25 | -DBUILD_ID="${DISPLAY_VERSION}" | 29 | -DBUILD_ID="${DISPLAY_VERSION}" |
| 30 | -DGIT_EXECUTABLE="${GIT_EXECUTABLE}" | ||
| 26 | -P "${CMAKE_SOURCE_DIR}/CMakeModules/GenerateSCMRev.cmake" | 31 | -P "${CMAKE_SOURCE_DIR}/CMakeModules/GenerateSCMRev.cmake" |
| 27 | DEPENDS | 32 | DEPENDS |
| 28 | # WARNING! It was too much work to try and make a common location for this list, | 33 | # WARNING! It was too much work to try and make a common location for this list, |
diff --git a/src/common/telemetry.cpp b/src/common/telemetry.cpp index f53a8d193..200c6489a 100644 --- a/src/common/telemetry.cpp +++ b/src/common/telemetry.cpp | |||
| @@ -44,20 +44,6 @@ template class Field<std::string>; | |||
| 44 | template class Field<const char*>; | 44 | template class Field<const char*>; |
| 45 | template class Field<std::chrono::microseconds>; | 45 | template class Field<std::chrono::microseconds>; |
| 46 | 46 | ||
| 47 | #ifdef ARCHITECTURE_x86_64 | ||
| 48 | static const char* CpuVendorToStr(Common::CPUVendor vendor) { | ||
| 49 | switch (vendor) { | ||
| 50 | case Common::CPUVendor::INTEL: | ||
| 51 | return "Intel"; | ||
| 52 | case Common::CPUVendor::AMD: | ||
| 53 | return "Amd"; | ||
| 54 | case Common::CPUVendor::OTHER: | ||
| 55 | return "Other"; | ||
| 56 | } | ||
| 57 | UNREACHABLE(); | ||
| 58 | } | ||
| 59 | #endif | ||
| 60 | |||
| 61 | void AppendBuildInfo(FieldCollection& fc) { | 47 | void AppendBuildInfo(FieldCollection& fc) { |
| 62 | const bool is_git_dirty{std::strstr(Common::g_scm_desc, "dirty") != nullptr}; | 48 | const bool is_git_dirty{std::strstr(Common::g_scm_desc, "dirty") != nullptr}; |
| 63 | fc.AddField(FieldType::App, "Git_IsDirty", is_git_dirty); | 49 | fc.AddField(FieldType::App, "Git_IsDirty", is_git_dirty); |
| @@ -71,7 +57,6 @@ void AppendCPUInfo(FieldCollection& fc) { | |||
| 71 | #ifdef ARCHITECTURE_x86_64 | 57 | #ifdef ARCHITECTURE_x86_64 |
| 72 | fc.AddField(FieldType::UserSystem, "CPU_Model", Common::GetCPUCaps().cpu_string); | 58 | fc.AddField(FieldType::UserSystem, "CPU_Model", Common::GetCPUCaps().cpu_string); |
| 73 | fc.AddField(FieldType::UserSystem, "CPU_BrandString", Common::GetCPUCaps().brand_string); | 59 | fc.AddField(FieldType::UserSystem, "CPU_BrandString", Common::GetCPUCaps().brand_string); |
| 74 | fc.AddField(FieldType::UserSystem, "CPU_Vendor", CpuVendorToStr(Common::GetCPUCaps().vendor)); | ||
| 75 | fc.AddField(FieldType::UserSystem, "CPU_Extension_x64_AES", Common::GetCPUCaps().aes); | 60 | fc.AddField(FieldType::UserSystem, "CPU_Extension_x64_AES", Common::GetCPUCaps().aes); |
| 76 | fc.AddField(FieldType::UserSystem, "CPU_Extension_x64_AVX", Common::GetCPUCaps().avx); | 61 | fc.AddField(FieldType::UserSystem, "CPU_Extension_x64_AVX", Common::GetCPUCaps().avx); |
| 77 | fc.AddField(FieldType::UserSystem, "CPU_Extension_x64_AVX2", Common::GetCPUCaps().avx2); | 62 | fc.AddField(FieldType::UserSystem, "CPU_Extension_x64_AVX2", Common::GetCPUCaps().avx2); |
diff --git a/src/common/x64/cpu_detect.cpp b/src/common/x64/cpu_detect.cpp index 2dfcd39c8..c9349a6b4 100644 --- a/src/common/x64/cpu_detect.cpp +++ b/src/common/x64/cpu_detect.cpp | |||
| @@ -3,8 +3,6 @@ | |||
| 3 | // Refer to the license.txt file included. | 3 | // Refer to the license.txt file included. |
| 4 | 4 | ||
| 5 | #include <cstring> | 5 | #include <cstring> |
| 6 | #include <string> | ||
| 7 | #include <thread> | ||
| 8 | #include "common/common_types.h" | 6 | #include "common/common_types.h" |
| 9 | #include "common/x64/cpu_detect.h" | 7 | #include "common/x64/cpu_detect.h" |
| 10 | 8 | ||
| @@ -51,8 +49,6 @@ namespace Common { | |||
| 51 | static CPUCaps Detect() { | 49 | static CPUCaps Detect() { |
| 52 | CPUCaps caps = {}; | 50 | CPUCaps caps = {}; |
| 53 | 51 | ||
| 54 | caps.num_cores = std::thread::hardware_concurrency(); | ||
| 55 | |||
| 56 | // Assumes the CPU supports the CPUID instruction. Those that don't would likely not support | 52 | // Assumes the CPU supports the CPUID instruction. Those that don't would likely not support |
| 57 | // yuzu at all anyway | 53 | // yuzu at all anyway |
| 58 | 54 | ||
| @@ -70,12 +66,6 @@ static CPUCaps Detect() { | |||
| 70 | __cpuid(cpu_id, 0x80000000); | 66 | __cpuid(cpu_id, 0x80000000); |
| 71 | 67 | ||
| 72 | u32 max_ex_fn = cpu_id[0]; | 68 | u32 max_ex_fn = cpu_id[0]; |
| 73 | if (!strcmp(caps.brand_string, "GenuineIntel")) | ||
| 74 | caps.vendor = CPUVendor::INTEL; | ||
| 75 | else if (!strcmp(caps.brand_string, "AuthenticAMD")) | ||
| 76 | caps.vendor = CPUVendor::AMD; | ||
| 77 | else | ||
| 78 | caps.vendor = CPUVendor::OTHER; | ||
| 79 | 69 | ||
| 80 | // Set reasonable default brand string even if brand string not available | 70 | // Set reasonable default brand string even if brand string not available |
| 81 | strcpy(caps.cpu_string, caps.brand_string); | 71 | strcpy(caps.cpu_string, caps.brand_string); |
| @@ -96,15 +86,9 @@ static CPUCaps Detect() { | |||
| 96 | caps.sse4_1 = true; | 86 | caps.sse4_1 = true; |
| 97 | if ((cpu_id[2] >> 20) & 1) | 87 | if ((cpu_id[2] >> 20) & 1) |
| 98 | caps.sse4_2 = true; | 88 | caps.sse4_2 = true; |
| 99 | if ((cpu_id[2] >> 22) & 1) | ||
| 100 | caps.movbe = true; | ||
| 101 | if ((cpu_id[2] >> 25) & 1) | 89 | if ((cpu_id[2] >> 25) & 1) |
| 102 | caps.aes = true; | 90 | caps.aes = true; |
| 103 | 91 | ||
| 104 | if ((cpu_id[3] >> 24) & 1) { | ||
| 105 | caps.fxsave_fxrstor = true; | ||
| 106 | } | ||
| 107 | |||
| 108 | // AVX support requires 3 separate checks: | 92 | // AVX support requires 3 separate checks: |
| 109 | // - Is the AVX bit set in CPUID? | 93 | // - Is the AVX bit set in CPUID? |
| 110 | // - Is the XSAVE bit set in CPUID? | 94 | // - Is the XSAVE bit set in CPUID? |
| @@ -129,8 +113,6 @@ static CPUCaps Detect() { | |||
| 129 | } | 113 | } |
| 130 | } | 114 | } |
| 131 | 115 | ||
| 132 | caps.flush_to_zero = caps.sse; | ||
| 133 | |||
| 134 | if (max_ex_fn >= 0x80000004) { | 116 | if (max_ex_fn >= 0x80000004) { |
| 135 | // Extract CPU model string | 117 | // Extract CPU model string |
| 136 | __cpuid(cpu_id, 0x80000002); | 118 | __cpuid(cpu_id, 0x80000002); |
| @@ -144,14 +126,8 @@ static CPUCaps Detect() { | |||
| 144 | if (max_ex_fn >= 0x80000001) { | 126 | if (max_ex_fn >= 0x80000001) { |
| 145 | // Check for more features | 127 | // Check for more features |
| 146 | __cpuid(cpu_id, 0x80000001); | 128 | __cpuid(cpu_id, 0x80000001); |
| 147 | if (cpu_id[2] & 1) | ||
| 148 | caps.lahf_sahf_64 = true; | ||
| 149 | if ((cpu_id[2] >> 5) & 1) | ||
| 150 | caps.lzcnt = true; | ||
| 151 | if ((cpu_id[2] >> 16) & 1) | 129 | if ((cpu_id[2] >> 16) & 1) |
| 152 | caps.fma4 = true; | 130 | caps.fma4 = true; |
| 153 | if ((cpu_id[3] >> 29) & 1) | ||
| 154 | caps.long_mode = true; | ||
| 155 | } | 131 | } |
| 156 | 132 | ||
| 157 | return caps; | 133 | return caps; |
| @@ -162,48 +138,4 @@ const CPUCaps& GetCPUCaps() { | |||
| 162 | return caps; | 138 | return caps; |
| 163 | } | 139 | } |
| 164 | 140 | ||
| 165 | std::string GetCPUCapsString() { | ||
| 166 | auto caps = GetCPUCaps(); | ||
| 167 | |||
| 168 | std::string sum(caps.cpu_string); | ||
| 169 | sum += " ("; | ||
| 170 | sum += caps.brand_string; | ||
| 171 | sum += ")"; | ||
| 172 | |||
| 173 | if (caps.sse) | ||
| 174 | sum += ", SSE"; | ||
| 175 | if (caps.sse2) { | ||
| 176 | sum += ", SSE2"; | ||
| 177 | if (!caps.flush_to_zero) | ||
| 178 | sum += " (without DAZ)"; | ||
| 179 | } | ||
| 180 | |||
| 181 | if (caps.sse3) | ||
| 182 | sum += ", SSE3"; | ||
| 183 | if (caps.ssse3) | ||
| 184 | sum += ", SSSE3"; | ||
| 185 | if (caps.sse4_1) | ||
| 186 | sum += ", SSE4.1"; | ||
| 187 | if (caps.sse4_2) | ||
| 188 | sum += ", SSE4.2"; | ||
| 189 | if (caps.avx) | ||
| 190 | sum += ", AVX"; | ||
| 191 | if (caps.avx2) | ||
| 192 | sum += ", AVX2"; | ||
| 193 | if (caps.bmi1) | ||
| 194 | sum += ", BMI1"; | ||
| 195 | if (caps.bmi2) | ||
| 196 | sum += ", BMI2"; | ||
| 197 | if (caps.fma) | ||
| 198 | sum += ", FMA"; | ||
| 199 | if (caps.aes) | ||
| 200 | sum += ", AES"; | ||
| 201 | if (caps.movbe) | ||
| 202 | sum += ", MOVBE"; | ||
| 203 | if (caps.long_mode) | ||
| 204 | sum += ", 64-bit support"; | ||
| 205 | |||
| 206 | return sum; | ||
| 207 | } | ||
| 208 | |||
| 209 | } // namespace Common | 141 | } // namespace Common |
diff --git a/src/common/x64/cpu_detect.h b/src/common/x64/cpu_detect.h index 0af3a8adb..20f2ba234 100644 --- a/src/common/x64/cpu_detect.h +++ b/src/common/x64/cpu_detect.h | |||
| @@ -4,23 +4,12 @@ | |||
| 4 | 4 | ||
| 5 | #pragma once | 5 | #pragma once |
| 6 | 6 | ||
| 7 | #include <string> | ||
| 8 | |||
| 9 | namespace Common { | 7 | namespace Common { |
| 10 | 8 | ||
| 11 | /// x86/x64 CPU vendors that may be detected by this module | ||
| 12 | enum class CPUVendor { | ||
| 13 | INTEL, | ||
| 14 | AMD, | ||
| 15 | OTHER, | ||
| 16 | }; | ||
| 17 | |||
| 18 | /// x86/x64 CPU capabilities that may be detected by this module | 9 | /// x86/x64 CPU capabilities that may be detected by this module |
| 19 | struct CPUCaps { | 10 | struct CPUCaps { |
| 20 | CPUVendor vendor; | ||
| 21 | char cpu_string[0x21]; | 11 | char cpu_string[0x21]; |
| 22 | char brand_string[0x41]; | 12 | char brand_string[0x41]; |
| 23 | int num_cores; | ||
| 24 | bool sse; | 13 | bool sse; |
| 25 | bool sse2; | 14 | bool sse2; |
| 26 | bool sse3; | 15 | bool sse3; |
| @@ -35,20 +24,6 @@ struct CPUCaps { | |||
| 35 | bool fma; | 24 | bool fma; |
| 36 | bool fma4; | 25 | bool fma4; |
| 37 | bool aes; | 26 | bool aes; |
| 38 | |||
| 39 | // Support for the FXSAVE and FXRSTOR instructions | ||
| 40 | bool fxsave_fxrstor; | ||
| 41 | |||
| 42 | bool movbe; | ||
| 43 | |||
| 44 | // This flag indicates that the hardware supports some mode in which denormal inputs and outputs | ||
| 45 | // are automatically set to (signed) zero. | ||
| 46 | bool flush_to_zero; | ||
| 47 | |||
| 48 | // Support for LAHF and SAHF instructions in 64-bit mode | ||
| 49 | bool lahf_sahf_64; | ||
| 50 | |||
| 51 | bool long_mode; | ||
| 52 | }; | 27 | }; |
| 53 | 28 | ||
| 54 | /** | 29 | /** |
| @@ -57,10 +32,4 @@ struct CPUCaps { | |||
| 57 | */ | 32 | */ |
| 58 | const CPUCaps& GetCPUCaps(); | 33 | const CPUCaps& GetCPUCaps(); |
| 59 | 34 | ||
| 60 | /** | ||
| 61 | * Gets a string summary of the name and supported capabilities of the host CPU | ||
| 62 | * @return String summary | ||
| 63 | */ | ||
| 64 | std::string GetCPUCapsString(); | ||
| 65 | |||
| 66 | } // namespace Common | 35 | } // namespace Common |
diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt index 142852082..729ee4a01 100644 --- a/src/video_core/CMakeLists.txt +++ b/src/video_core/CMakeLists.txt | |||
| @@ -190,8 +190,11 @@ if (ENABLE_VULKAN) | |||
| 190 | renderer_vulkan/vk_stream_buffer.h | 190 | renderer_vulkan/vk_stream_buffer.h |
| 191 | renderer_vulkan/vk_swapchain.cpp | 191 | renderer_vulkan/vk_swapchain.cpp |
| 192 | renderer_vulkan/vk_swapchain.h | 192 | renderer_vulkan/vk_swapchain.h |
| 193 | renderer_vulkan/vk_texture_cache.cpp | ||
| 194 | renderer_vulkan/vk_texture_cache.h | ||
| 193 | renderer_vulkan/vk_update_descriptor.cpp | 195 | renderer_vulkan/vk_update_descriptor.cpp |
| 194 | renderer_vulkan/vk_update_descriptor.h) | 196 | renderer_vulkan/vk_update_descriptor.h |
| 197 | ) | ||
| 195 | 198 | ||
| 196 | target_include_directories(video_core PRIVATE sirit ../../externals/Vulkan-Headers/include) | 199 | target_include_directories(video_core PRIVATE sirit ../../externals/Vulkan-Headers/include) |
| 197 | target_compile_definitions(video_core PRIVATE HAS_VULKAN) | 200 | target_compile_definitions(video_core PRIVATE HAS_VULKAN) |
diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h index 2e6c7da19..ee79260fc 100644 --- a/src/video_core/engines/maxwell_3d.h +++ b/src/video_core/engines/maxwell_3d.h | |||
| @@ -1278,8 +1278,6 @@ public: | |||
| 1278 | 1278 | ||
| 1279 | } dirty{}; | 1279 | } dirty{}; |
| 1280 | 1280 | ||
| 1281 | std::array<u8, Regs::NUM_REGS> dirty_pointers{}; | ||
| 1282 | |||
| 1283 | /// Reads a register value located at the input method address | 1281 | /// Reads a register value located at the input method address |
| 1284 | u32 GetRegisterValue(u32 method) const; | 1282 | u32 GetRegisterValue(u32 method) const; |
| 1285 | 1283 | ||
| @@ -1374,6 +1372,8 @@ private: | |||
| 1374 | 1372 | ||
| 1375 | bool execute_on{true}; | 1373 | bool execute_on{true}; |
| 1376 | 1374 | ||
| 1375 | std::array<u8, Regs::NUM_REGS> dirty_pointers{}; | ||
| 1376 | |||
| 1377 | /// Retrieves information about a specific TIC entry from the TIC buffer. | 1377 | /// Retrieves information about a specific TIC entry from the TIC buffer. |
| 1378 | Texture::TICEntry GetTICEntry(u32 tic_index) const; | 1378 | Texture::TICEntry GetTICEntry(u32 tic_index) const; |
| 1379 | 1379 | ||
diff --git a/src/video_core/engines/shader_bytecode.h b/src/video_core/engines/shader_bytecode.h index 57b57c647..6f98bd827 100644 --- a/src/video_core/engines/shader_bytecode.h +++ b/src/video_core/engines/shader_bytecode.h | |||
| @@ -215,6 +215,18 @@ enum class F2fRoundingOp : u64 { | |||
| 215 | Trunc = 11, | 215 | Trunc = 11, |
| 216 | }; | 216 | }; |
| 217 | 217 | ||
| 218 | enum class AtomicOp : u64 { | ||
| 219 | Add = 0, | ||
| 220 | Min = 1, | ||
| 221 | Max = 2, | ||
| 222 | Inc = 3, | ||
| 223 | Dec = 4, | ||
| 224 | And = 5, | ||
| 225 | Or = 6, | ||
| 226 | Xor = 7, | ||
| 227 | Exch = 8, | ||
| 228 | }; | ||
| 229 | |||
| 218 | enum class UniformType : u64 { | 230 | enum class UniformType : u64 { |
| 219 | UnsignedByte = 0, | 231 | UnsignedByte = 0, |
| 220 | SignedByte = 1, | 232 | SignedByte = 1, |
| @@ -236,6 +248,13 @@ enum class StoreType : u64 { | |||
| 236 | Bits128 = 6, | 248 | Bits128 = 6, |
| 237 | }; | 249 | }; |
| 238 | 250 | ||
| 251 | enum class AtomicType : u64 { | ||
| 252 | U32 = 0, | ||
| 253 | S32 = 1, | ||
| 254 | U64 = 2, | ||
| 255 | S64 = 3, | ||
| 256 | }; | ||
| 257 | |||
| 239 | enum class IMinMaxExchange : u64 { | 258 | enum class IMinMaxExchange : u64 { |
| 240 | None = 0, | 259 | None = 0, |
| 241 | XLo = 1, | 260 | XLo = 1, |
| @@ -939,6 +958,16 @@ union Instruction { | |||
| 939 | } stg; | 958 | } stg; |
| 940 | 959 | ||
| 941 | union { | 960 | union { |
| 961 | BitField<52, 4, AtomicOp> operation; | ||
| 962 | BitField<28, 2, AtomicType> type; | ||
| 963 | BitField<30, 22, s64> offset; | ||
| 964 | |||
| 965 | s32 GetImmediateOffset() const { | ||
| 966 | return static_cast<s32>(offset << 2); | ||
| 967 | } | ||
| 968 | } atoms; | ||
| 969 | |||
| 970 | union { | ||
| 942 | BitField<32, 1, PhysicalAttributeDirection> direction; | 971 | BitField<32, 1, PhysicalAttributeDirection> direction; |
| 943 | BitField<47, 3, AttributeSize> size; | 972 | BitField<47, 3, AttributeSize> size; |
| 944 | BitField<20, 11, u64> address; | 973 | BitField<20, 11, u64> address; |
| @@ -1659,9 +1688,10 @@ public: | |||
| 1659 | ST_A, | 1688 | ST_A, |
| 1660 | ST_L, | 1689 | ST_L, |
| 1661 | ST_S, | 1690 | ST_S, |
| 1662 | ST, // Store in generic memory | 1691 | ST, // Store in generic memory |
| 1663 | STG, // Store in global memory | 1692 | STG, // Store in global memory |
| 1664 | AL2P, // Transforms attribute memory into physical memory | 1693 | ATOMS, // Atomic operation on shared memory |
| 1694 | AL2P, // Transforms attribute memory into physical memory | ||
| 1665 | TEX, | 1695 | TEX, |
| 1666 | TEX_B, // Texture Load Bindless | 1696 | TEX_B, // Texture Load Bindless |
| 1667 | TXQ, // Texture Query | 1697 | TXQ, // Texture Query |
| @@ -1964,6 +1994,7 @@ private: | |||
| 1964 | INST("1110111101010---", Id::ST_L, Type::Memory, "ST_L"), | 1994 | INST("1110111101010---", Id::ST_L, Type::Memory, "ST_L"), |
| 1965 | INST("101-------------", Id::ST, Type::Memory, "ST"), | 1995 | INST("101-------------", Id::ST, Type::Memory, "ST"), |
| 1966 | INST("1110111011011---", Id::STG, Type::Memory, "STG"), | 1996 | INST("1110111011011---", Id::STG, Type::Memory, "STG"), |
| 1997 | INST("11101100--------", Id::ATOMS, Type::Memory, "ATOMS"), | ||
| 1967 | INST("1110111110100---", Id::AL2P, Type::Memory, "AL2P"), | 1998 | INST("1110111110100---", Id::AL2P, Type::Memory, "AL2P"), |
| 1968 | INST("110000----111---", Id::TEX, Type::Texture, "TEX"), | 1999 | INST("110000----111---", Id::TEX, Type::Texture, "TEX"), |
| 1969 | INST("1101111010111---", Id::TEX_B, Type::Texture, "TEX_B"), | 2000 | INST("1101111010111---", Id::TEX_B, Type::Texture, "TEX_B"), |
diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp index f9f7a97b5..19751939a 100644 --- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp +++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp | |||
| @@ -1856,6 +1856,16 @@ private: | |||
| 1856 | Type::Uint}; | 1856 | Type::Uint}; |
| 1857 | } | 1857 | } |
| 1858 | 1858 | ||
| 1859 | template <const std::string_view& opname, Type type> | ||
| 1860 | Expression Atomic(Operation operation) { | ||
| 1861 | ASSERT(stage == ShaderType::Compute); | ||
| 1862 | auto& smem = std::get<SmemNode>(*operation[0]); | ||
| 1863 | |||
| 1864 | return {fmt::format("atomic{}(smem[{} >> 2], {})", opname, Visit(smem.GetAddress()).AsInt(), | ||
| 1865 | Visit(operation[1]).As(type)), | ||
| 1866 | type}; | ||
| 1867 | } | ||
| 1868 | |||
| 1859 | Expression Branch(Operation operation) { | 1869 | Expression Branch(Operation operation) { |
| 1860 | const auto target = std::get_if<ImmediateNode>(&*operation[0]); | 1870 | const auto target = std::get_if<ImmediateNode>(&*operation[0]); |
| 1861 | UNIMPLEMENTED_IF(!target); | 1871 | UNIMPLEMENTED_IF(!target); |
| @@ -2194,6 +2204,8 @@ private: | |||
| 2194 | &GLSLDecompiler::AtomicImage<Func::Xor>, | 2204 | &GLSLDecompiler::AtomicImage<Func::Xor>, |
| 2195 | &GLSLDecompiler::AtomicImage<Func::Exchange>, | 2205 | &GLSLDecompiler::AtomicImage<Func::Exchange>, |
| 2196 | 2206 | ||
| 2207 | &GLSLDecompiler::Atomic<Func::Add, Type::Uint>, | ||
| 2208 | |||
| 2197 | &GLSLDecompiler::Branch, | 2209 | &GLSLDecompiler::Branch, |
| 2198 | &GLSLDecompiler::BranchIndirect, | 2210 | &GLSLDecompiler::BranchIndirect, |
| 2199 | &GLSLDecompiler::PushFlowStack, | 2211 | &GLSLDecompiler::PushFlowStack, |
diff --git a/src/video_core/renderer_opengl/gl_texture_cache.cpp b/src/video_core/renderer_opengl/gl_texture_cache.cpp index b790b0ef4..e95eb069e 100644 --- a/src/video_core/renderer_opengl/gl_texture_cache.cpp +++ b/src/video_core/renderer_opengl/gl_texture_cache.cpp | |||
| @@ -44,7 +44,7 @@ struct FormatTuple { | |||
| 44 | 44 | ||
| 45 | constexpr std::array<FormatTuple, VideoCore::Surface::MaxPixelFormat> tex_format_tuples = {{ | 45 | constexpr std::array<FormatTuple, VideoCore::Surface::MaxPixelFormat> tex_format_tuples = {{ |
| 46 | {GL_RGBA8, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV, false}, // ABGR8U | 46 | {GL_RGBA8, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV, false}, // ABGR8U |
| 47 | {GL_RGBA8, GL_RGBA, GL_BYTE, false}, // ABGR8S | 47 | {GL_RGBA8_SNORM, GL_RGBA, GL_BYTE, false}, // ABGR8S |
| 48 | {GL_RGBA8UI, GL_RGBA_INTEGER, GL_UNSIGNED_BYTE, false}, // ABGR8UI | 48 | {GL_RGBA8UI, GL_RGBA_INTEGER, GL_UNSIGNED_BYTE, false}, // ABGR8UI |
| 49 | {GL_RGB565, GL_RGB, GL_UNSIGNED_SHORT_5_6_5_REV, false}, // B5G6R5U | 49 | {GL_RGB565, GL_RGB, GL_UNSIGNED_SHORT_5_6_5_REV, false}, // B5G6R5U |
| 50 | {GL_RGB10_A2, GL_RGBA, GL_UNSIGNED_INT_2_10_10_10_REV, false}, // A2B10G10R10U | 50 | {GL_RGB10_A2, GL_RGBA, GL_UNSIGNED_INT_2_10_10_10_REV, false}, // A2B10G10R10U |
| @@ -83,9 +83,9 @@ constexpr std::array<FormatTuple, VideoCore::Surface::MaxPixelFormat> tex_format | |||
| 83 | {GL_RGB32F, GL_RGB, GL_FLOAT, false}, // RGB32F | 83 | {GL_RGB32F, GL_RGB, GL_FLOAT, false}, // RGB32F |
| 84 | {GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV, false}, // RGBA8_SRGB | 84 | {GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV, false}, // RGBA8_SRGB |
| 85 | {GL_RG8, GL_RG, GL_UNSIGNED_BYTE, false}, // RG8U | 85 | {GL_RG8, GL_RG, GL_UNSIGNED_BYTE, false}, // RG8U |
| 86 | {GL_RG8, GL_RG, GL_BYTE, false}, // RG8S | 86 | {GL_RG8_SNORM, GL_RG, GL_BYTE, false}, // RG8S |
| 87 | {GL_RG32UI, GL_RG_INTEGER, GL_UNSIGNED_INT, false}, // RG32UI | 87 | {GL_RG32UI, GL_RG_INTEGER, GL_UNSIGNED_INT, false}, // RG32UI |
| 88 | {GL_RGB16F, GL_RGBA16, GL_HALF_FLOAT, false}, // RGBX16F | 88 | {GL_RGB16F, GL_RGBA, GL_HALF_FLOAT, false}, // RGBX16F |
| 89 | {GL_R32UI, GL_RED_INTEGER, GL_UNSIGNED_INT, false}, // R32UI | 89 | {GL_R32UI, GL_RED_INTEGER, GL_UNSIGNED_INT, false}, // R32UI |
| 90 | {GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE, false}, // ASTC_2D_8X8 | 90 | {GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE, false}, // ASTC_2D_8X8 |
| 91 | {GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE, false}, // ASTC_2D_8X5 | 91 | {GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE, false}, // ASTC_2D_8X5 |
| @@ -253,14 +253,12 @@ void CachedSurface::DownloadTexture(std::vector<u8>& staging_buffer) { | |||
| 253 | glPixelStorei(GL_PACK_ALIGNMENT, std::min(8U, params.GetRowAlignment(level))); | 253 | glPixelStorei(GL_PACK_ALIGNMENT, std::min(8U, params.GetRowAlignment(level))); |
| 254 | glPixelStorei(GL_PACK_ROW_LENGTH, static_cast<GLint>(params.GetMipWidth(level))); | 254 | glPixelStorei(GL_PACK_ROW_LENGTH, static_cast<GLint>(params.GetMipWidth(level))); |
| 255 | const std::size_t mip_offset = params.GetHostMipmapLevelOffset(level); | 255 | const std::size_t mip_offset = params.GetHostMipmapLevelOffset(level); |
| 256 | u8* const mip_data = staging_buffer.data() + mip_offset; | ||
| 257 | const GLsizei size = static_cast<GLsizei>(params.GetHostMipmapSize(level)); | ||
| 256 | if (is_compressed) { | 258 | if (is_compressed) { |
| 257 | glGetCompressedTextureImage(texture.handle, level, | 259 | glGetCompressedTextureImage(texture.handle, level, size, mip_data); |
| 258 | static_cast<GLsizei>(params.GetHostMipmapSize(level)), | ||
| 259 | staging_buffer.data() + mip_offset); | ||
| 260 | } else { | 260 | } else { |
| 261 | glGetTextureImage(texture.handle, level, format, type, | 261 | glGetTextureImage(texture.handle, level, format, type, size, mip_data); |
| 262 | static_cast<GLsizei>(params.GetHostMipmapSize(level)), | ||
| 263 | staging_buffer.data() + mip_offset); | ||
| 264 | } | 262 | } |
| 265 | } | 263 | } |
| 266 | } | 264 | } |
diff --git a/src/video_core/renderer_opengl/utils.cpp b/src/video_core/renderer_opengl/utils.cpp index 9770dda1c..ac99e6385 100644 --- a/src/video_core/renderer_opengl/utils.cpp +++ b/src/video_core/renderer_opengl/utils.cpp | |||
| @@ -6,16 +6,20 @@ | |||
| 6 | #include <vector> | 6 | #include <vector> |
| 7 | 7 | ||
| 8 | #include <fmt/format.h> | 8 | #include <fmt/format.h> |
| 9 | |||
| 10 | #include <glad/glad.h> | 9 | #include <glad/glad.h> |
| 11 | 10 | ||
| 12 | #include "common/assert.h" | ||
| 13 | #include "common/common_types.h" | 11 | #include "common/common_types.h" |
| 14 | #include "common/scope_exit.h" | ||
| 15 | #include "video_core/renderer_opengl/utils.h" | 12 | #include "video_core/renderer_opengl/utils.h" |
| 16 | 13 | ||
| 17 | namespace OpenGL { | 14 | namespace OpenGL { |
| 18 | 15 | ||
| 16 | struct VertexArrayPushBuffer::Entry { | ||
| 17 | GLuint binding_index{}; | ||
| 18 | const GLuint* buffer{}; | ||
| 19 | GLintptr offset{}; | ||
| 20 | GLsizei stride{}; | ||
| 21 | }; | ||
| 22 | |||
| 19 | VertexArrayPushBuffer::VertexArrayPushBuffer() = default; | 23 | VertexArrayPushBuffer::VertexArrayPushBuffer() = default; |
| 20 | 24 | ||
| 21 | VertexArrayPushBuffer::~VertexArrayPushBuffer() = default; | 25 | VertexArrayPushBuffer::~VertexArrayPushBuffer() = default; |
| @@ -47,6 +51,13 @@ void VertexArrayPushBuffer::Bind() { | |||
| 47 | } | 51 | } |
| 48 | } | 52 | } |
| 49 | 53 | ||
| 54 | struct BindBuffersRangePushBuffer::Entry { | ||
| 55 | GLuint binding; | ||
| 56 | const GLuint* buffer; | ||
| 57 | GLintptr offset; | ||
| 58 | GLsizeiptr size; | ||
| 59 | }; | ||
| 60 | |||
| 50 | BindBuffersRangePushBuffer::BindBuffersRangePushBuffer(GLenum target) : target{target} {} | 61 | BindBuffersRangePushBuffer::BindBuffersRangePushBuffer(GLenum target) : target{target} {} |
| 51 | 62 | ||
| 52 | BindBuffersRangePushBuffer::~BindBuffersRangePushBuffer() = default; | 63 | BindBuffersRangePushBuffer::~BindBuffersRangePushBuffer() = default; |
diff --git a/src/video_core/renderer_opengl/utils.h b/src/video_core/renderer_opengl/utils.h index d56153fe7..3ad7c02d4 100644 --- a/src/video_core/renderer_opengl/utils.h +++ b/src/video_core/renderer_opengl/utils.h | |||
| @@ -26,12 +26,7 @@ public: | |||
| 26 | void Bind(); | 26 | void Bind(); |
| 27 | 27 | ||
| 28 | private: | 28 | private: |
| 29 | struct Entry { | 29 | struct Entry; |
| 30 | GLuint binding_index{}; | ||
| 31 | const GLuint* buffer{}; | ||
| 32 | GLintptr offset{}; | ||
| 33 | GLsizei stride{}; | ||
| 34 | }; | ||
| 35 | 30 | ||
| 36 | GLuint vao{}; | 31 | GLuint vao{}; |
| 37 | const GLuint* index_buffer{}; | 32 | const GLuint* index_buffer{}; |
| @@ -50,12 +45,7 @@ public: | |||
| 50 | void Bind(); | 45 | void Bind(); |
| 51 | 46 | ||
| 52 | private: | 47 | private: |
| 53 | struct Entry { | 48 | struct Entry; |
| 54 | GLuint binding; | ||
| 55 | const GLuint* buffer; | ||
| 56 | GLintptr offset; | ||
| 57 | GLsizeiptr size; | ||
| 58 | }; | ||
| 59 | 49 | ||
| 60 | GLenum target; | 50 | GLenum target; |
| 61 | std::vector<Entry> entries; | 51 | std::vector<Entry> entries; |
diff --git a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp index 8fe852ce8..0cf97cafa 100644 --- a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp +++ b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp | |||
| @@ -1796,6 +1796,11 @@ private: | |||
| 1796 | return {}; | 1796 | return {}; |
| 1797 | } | 1797 | } |
| 1798 | 1798 | ||
| 1799 | Expression UAtomicAdd(Operation) { | ||
| 1800 | UNIMPLEMENTED(); | ||
| 1801 | return {}; | ||
| 1802 | } | ||
| 1803 | |||
| 1799 | Expression Branch(Operation operation) { | 1804 | Expression Branch(Operation operation) { |
| 1800 | const auto& target = std::get<ImmediateNode>(*operation[0]); | 1805 | const auto& target = std::get<ImmediateNode>(*operation[0]); |
| 1801 | OpStore(jmp_to, Constant(t_uint, target.GetValue())); | 1806 | OpStore(jmp_to, Constant(t_uint, target.GetValue())); |
| @@ -2373,6 +2378,8 @@ private: | |||
| 2373 | &SPIRVDecompiler::AtomicImageXor, | 2378 | &SPIRVDecompiler::AtomicImageXor, |
| 2374 | &SPIRVDecompiler::AtomicImageExchange, | 2379 | &SPIRVDecompiler::AtomicImageExchange, |
| 2375 | 2380 | ||
| 2381 | &SPIRVDecompiler::UAtomicAdd, | ||
| 2382 | |||
| 2376 | &SPIRVDecompiler::Branch, | 2383 | &SPIRVDecompiler::Branch, |
| 2377 | &SPIRVDecompiler::BranchIndirect, | 2384 | &SPIRVDecompiler::BranchIndirect, |
| 2378 | &SPIRVDecompiler::PushFlowStack, | 2385 | &SPIRVDecompiler::PushFlowStack, |
diff --git a/src/video_core/renderer_vulkan/vk_staging_buffer_pool.h b/src/video_core/renderer_vulkan/vk_staging_buffer_pool.h index 02310375f..4d9488f49 100644 --- a/src/video_core/renderer_vulkan/vk_staging_buffer_pool.h +++ b/src/video_core/renderer_vulkan/vk_staging_buffer_pool.h | |||
| @@ -13,6 +13,7 @@ | |||
| 13 | 13 | ||
| 14 | #include "video_core/renderer_vulkan/declarations.h" | 14 | #include "video_core/renderer_vulkan/declarations.h" |
| 15 | #include "video_core/renderer_vulkan/vk_memory_manager.h" | 15 | #include "video_core/renderer_vulkan/vk_memory_manager.h" |
| 16 | #include "video_core/renderer_vulkan/vk_resource_manager.h" | ||
| 16 | 17 | ||
| 17 | namespace Vulkan { | 18 | namespace Vulkan { |
| 18 | 19 | ||
diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.cpp b/src/video_core/renderer_vulkan/vk_texture_cache.cpp new file mode 100644 index 000000000..51b0d38a6 --- /dev/null +++ b/src/video_core/renderer_vulkan/vk_texture_cache.cpp | |||
| @@ -0,0 +1,475 @@ | |||
| 1 | // Copyright 2019 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include <algorithm> | ||
| 6 | #include <array> | ||
| 7 | #include <cstddef> | ||
| 8 | #include <cstring> | ||
| 9 | #include <memory> | ||
| 10 | #include <variant> | ||
| 11 | #include <vector> | ||
| 12 | |||
| 13 | #include "common/alignment.h" | ||
| 14 | #include "common/assert.h" | ||
| 15 | #include "common/common_types.h" | ||
| 16 | #include "core/core.h" | ||
| 17 | #include "core/memory.h" | ||
| 18 | #include "video_core/engines/maxwell_3d.h" | ||
| 19 | #include "video_core/morton.h" | ||
| 20 | #include "video_core/renderer_vulkan/declarations.h" | ||
| 21 | #include "video_core/renderer_vulkan/maxwell_to_vk.h" | ||
| 22 | #include "video_core/renderer_vulkan/vk_device.h" | ||
| 23 | #include "video_core/renderer_vulkan/vk_memory_manager.h" | ||
| 24 | #include "video_core/renderer_vulkan/vk_rasterizer.h" | ||
| 25 | #include "video_core/renderer_vulkan/vk_staging_buffer_pool.h" | ||
| 26 | #include "video_core/renderer_vulkan/vk_texture_cache.h" | ||
| 27 | #include "video_core/surface.h" | ||
| 28 | #include "video_core/textures/convert.h" | ||
| 29 | |||
| 30 | namespace Vulkan { | ||
| 31 | |||
| 32 | using VideoCore::MortonSwizzle; | ||
| 33 | using VideoCore::MortonSwizzleMode; | ||
| 34 | |||
| 35 | using Tegra::Texture::SwizzleSource; | ||
| 36 | using VideoCore::Surface::PixelFormat; | ||
| 37 | using VideoCore::Surface::SurfaceCompression; | ||
| 38 | using VideoCore::Surface::SurfaceTarget; | ||
| 39 | |||
| 40 | namespace { | ||
| 41 | |||
| 42 | vk::ImageType SurfaceTargetToImage(SurfaceTarget target) { | ||
| 43 | switch (target) { | ||
| 44 | case SurfaceTarget::Texture1D: | ||
| 45 | case SurfaceTarget::Texture1DArray: | ||
| 46 | return vk::ImageType::e1D; | ||
| 47 | case SurfaceTarget::Texture2D: | ||
| 48 | case SurfaceTarget::Texture2DArray: | ||
| 49 | case SurfaceTarget::TextureCubemap: | ||
| 50 | case SurfaceTarget::TextureCubeArray: | ||
| 51 | return vk::ImageType::e2D; | ||
| 52 | case SurfaceTarget::Texture3D: | ||
| 53 | return vk::ImageType::e3D; | ||
| 54 | } | ||
| 55 | UNREACHABLE_MSG("Unknown texture target={}", static_cast<u32>(target)); | ||
| 56 | return {}; | ||
| 57 | } | ||
| 58 | |||
| 59 | vk::ImageAspectFlags PixelFormatToImageAspect(PixelFormat pixel_format) { | ||
| 60 | if (pixel_format < PixelFormat::MaxColorFormat) { | ||
| 61 | return vk::ImageAspectFlagBits::eColor; | ||
| 62 | } else if (pixel_format < PixelFormat::MaxDepthFormat) { | ||
| 63 | return vk::ImageAspectFlagBits::eDepth; | ||
| 64 | } else if (pixel_format < PixelFormat::MaxDepthStencilFormat) { | ||
| 65 | return vk::ImageAspectFlagBits::eDepth | vk::ImageAspectFlagBits::eStencil; | ||
| 66 | } else { | ||
| 67 | UNREACHABLE_MSG("Invalid pixel format={}", static_cast<u32>(pixel_format)); | ||
| 68 | return vk::ImageAspectFlagBits::eColor; | ||
| 69 | } | ||
| 70 | } | ||
| 71 | |||
| 72 | vk::ImageViewType GetImageViewType(SurfaceTarget target) { | ||
| 73 | switch (target) { | ||
| 74 | case SurfaceTarget::Texture1D: | ||
| 75 | return vk::ImageViewType::e1D; | ||
| 76 | case SurfaceTarget::Texture2D: | ||
| 77 | return vk::ImageViewType::e2D; | ||
| 78 | case SurfaceTarget::Texture3D: | ||
| 79 | return vk::ImageViewType::e3D; | ||
| 80 | case SurfaceTarget::Texture1DArray: | ||
| 81 | return vk::ImageViewType::e1DArray; | ||
| 82 | case SurfaceTarget::Texture2DArray: | ||
| 83 | return vk::ImageViewType::e2DArray; | ||
| 84 | case SurfaceTarget::TextureCubemap: | ||
| 85 | return vk::ImageViewType::eCube; | ||
| 86 | case SurfaceTarget::TextureCubeArray: | ||
| 87 | return vk::ImageViewType::eCubeArray; | ||
| 88 | case SurfaceTarget::TextureBuffer: | ||
| 89 | break; | ||
| 90 | } | ||
| 91 | UNREACHABLE(); | ||
| 92 | return {}; | ||
| 93 | } | ||
| 94 | |||
| 95 | UniqueBuffer CreateBuffer(const VKDevice& device, const SurfaceParams& params) { | ||
| 96 | // TODO(Rodrigo): Move texture buffer creation to the buffer cache | ||
| 97 | const vk::BufferCreateInfo buffer_ci({}, params.GetHostSizeInBytes(), | ||
| 98 | vk::BufferUsageFlagBits::eUniformTexelBuffer | | ||
| 99 | vk::BufferUsageFlagBits::eTransferSrc | | ||
| 100 | vk::BufferUsageFlagBits::eTransferDst, | ||
| 101 | vk::SharingMode::eExclusive, 0, nullptr); | ||
| 102 | const auto dev = device.GetLogical(); | ||
| 103 | const auto& dld = device.GetDispatchLoader(); | ||
| 104 | return dev.createBufferUnique(buffer_ci, nullptr, dld); | ||
| 105 | } | ||
| 106 | |||
| 107 | vk::BufferViewCreateInfo GenerateBufferViewCreateInfo(const VKDevice& device, | ||
| 108 | const SurfaceParams& params, | ||
| 109 | vk::Buffer buffer) { | ||
| 110 | ASSERT(params.IsBuffer()); | ||
| 111 | |||
| 112 | const auto format = | ||
| 113 | MaxwellToVK::SurfaceFormat(device, FormatType::Buffer, params.pixel_format).format; | ||
| 114 | return vk::BufferViewCreateInfo({}, buffer, format, 0, params.GetHostSizeInBytes()); | ||
| 115 | } | ||
| 116 | |||
| 117 | vk::ImageCreateInfo GenerateImageCreateInfo(const VKDevice& device, const SurfaceParams& params) { | ||
| 118 | constexpr auto sample_count = vk::SampleCountFlagBits::e1; | ||
| 119 | constexpr auto tiling = vk::ImageTiling::eOptimal; | ||
| 120 | |||
| 121 | ASSERT(!params.IsBuffer()); | ||
| 122 | |||
| 123 | const auto [format, attachable, storage] = | ||
| 124 | MaxwellToVK::SurfaceFormat(device, FormatType::Optimal, params.pixel_format); | ||
| 125 | |||
| 126 | auto image_usage = vk::ImageUsageFlagBits::eSampled | vk::ImageUsageFlagBits::eTransferDst | | ||
| 127 | vk::ImageUsageFlagBits::eTransferSrc; | ||
| 128 | if (attachable) { | ||
| 129 | image_usage |= params.IsPixelFormatZeta() ? vk::ImageUsageFlagBits::eDepthStencilAttachment | ||
| 130 | : vk::ImageUsageFlagBits::eColorAttachment; | ||
| 131 | } | ||
| 132 | if (storage) { | ||
| 133 | image_usage |= vk::ImageUsageFlagBits::eStorage; | ||
| 134 | } | ||
| 135 | |||
| 136 | vk::ImageCreateFlags flags; | ||
| 137 | vk::Extent3D extent; | ||
| 138 | switch (params.target) { | ||
| 139 | case SurfaceTarget::TextureCubemap: | ||
| 140 | case SurfaceTarget::TextureCubeArray: | ||
| 141 | flags |= vk::ImageCreateFlagBits::eCubeCompatible; | ||
| 142 | [[fallthrough]]; | ||
| 143 | case SurfaceTarget::Texture1D: | ||
| 144 | case SurfaceTarget::Texture1DArray: | ||
| 145 | case SurfaceTarget::Texture2D: | ||
| 146 | case SurfaceTarget::Texture2DArray: | ||
| 147 | extent = vk::Extent3D(params.width, params.height, 1); | ||
| 148 | break; | ||
| 149 | case SurfaceTarget::Texture3D: | ||
| 150 | extent = vk::Extent3D(params.width, params.height, params.depth); | ||
| 151 | break; | ||
| 152 | case SurfaceTarget::TextureBuffer: | ||
| 153 | UNREACHABLE(); | ||
| 154 | } | ||
| 155 | |||
| 156 | return vk::ImageCreateInfo(flags, SurfaceTargetToImage(params.target), format, extent, | ||
| 157 | params.num_levels, static_cast<u32>(params.GetNumLayers()), | ||
| 158 | sample_count, tiling, image_usage, vk::SharingMode::eExclusive, 0, | ||
| 159 | nullptr, vk::ImageLayout::eUndefined); | ||
| 160 | } | ||
| 161 | |||
| 162 | } // Anonymous namespace | ||
| 163 | |||
| 164 | CachedSurface::CachedSurface(Core::System& system, const VKDevice& device, | ||
| 165 | VKResourceManager& resource_manager, VKMemoryManager& memory_manager, | ||
| 166 | VKScheduler& scheduler, VKStagingBufferPool& staging_pool, | ||
| 167 | GPUVAddr gpu_addr, const SurfaceParams& params) | ||
| 168 | : SurfaceBase<View>{gpu_addr, params}, system{system}, device{device}, | ||
| 169 | resource_manager{resource_manager}, memory_manager{memory_manager}, scheduler{scheduler}, | ||
| 170 | staging_pool{staging_pool} { | ||
| 171 | if (params.IsBuffer()) { | ||
| 172 | buffer = CreateBuffer(device, params); | ||
| 173 | commit = memory_manager.Commit(*buffer, false); | ||
| 174 | |||
| 175 | const auto buffer_view_ci = GenerateBufferViewCreateInfo(device, params, *buffer); | ||
| 176 | format = buffer_view_ci.format; | ||
| 177 | |||
| 178 | const auto dev = device.GetLogical(); | ||
| 179 | const auto& dld = device.GetDispatchLoader(); | ||
| 180 | buffer_view = dev.createBufferViewUnique(buffer_view_ci, nullptr, dld); | ||
| 181 | } else { | ||
| 182 | const auto image_ci = GenerateImageCreateInfo(device, params); | ||
| 183 | format = image_ci.format; | ||
| 184 | |||
| 185 | image.emplace(device, scheduler, image_ci, PixelFormatToImageAspect(params.pixel_format)); | ||
| 186 | commit = memory_manager.Commit(image->GetHandle(), false); | ||
| 187 | } | ||
| 188 | |||
| 189 | // TODO(Rodrigo): Move this to a virtual function. | ||
| 190 | main_view = CreateViewInner( | ||
| 191 | ViewParams(params.target, 0, static_cast<u32>(params.GetNumLayers()), 0, params.num_levels), | ||
| 192 | true); | ||
| 193 | } | ||
| 194 | |||
| 195 | CachedSurface::~CachedSurface() = default; | ||
| 196 | |||
| 197 | void CachedSurface::UploadTexture(const std::vector<u8>& staging_buffer) { | ||
| 198 | // To upload data we have to be outside of a renderpass | ||
| 199 | scheduler.RequestOutsideRenderPassOperationContext(); | ||
| 200 | |||
| 201 | if (params.IsBuffer()) { | ||
| 202 | UploadBuffer(staging_buffer); | ||
| 203 | } else { | ||
| 204 | UploadImage(staging_buffer); | ||
| 205 | } | ||
| 206 | } | ||
| 207 | |||
| 208 | void CachedSurface::DownloadTexture(std::vector<u8>& staging_buffer) { | ||
| 209 | UNIMPLEMENTED_IF(params.IsBuffer()); | ||
| 210 | |||
| 211 | if (params.pixel_format == VideoCore::Surface::PixelFormat::A1B5G5R5U) { | ||
| 212 | LOG_WARNING(Render_Vulkan, "A1B5G5R5 flushing is stubbed"); | ||
| 213 | } | ||
| 214 | |||
| 215 | // We can't copy images to buffers inside a renderpass | ||
| 216 | scheduler.RequestOutsideRenderPassOperationContext(); | ||
| 217 | |||
| 218 | FullTransition(vk::PipelineStageFlagBits::eTransfer, vk::AccessFlagBits::eTransferRead, | ||
| 219 | vk::ImageLayout::eTransferSrcOptimal); | ||
| 220 | |||
| 221 | const auto& buffer = staging_pool.GetUnusedBuffer(host_memory_size, true); | ||
| 222 | // TODO(Rodrigo): Do this in a single copy | ||
| 223 | for (u32 level = 0; level < params.num_levels; ++level) { | ||
| 224 | scheduler.Record([image = image->GetHandle(), buffer = *buffer.handle, | ||
| 225 | copy = GetBufferImageCopy(level)](auto cmdbuf, auto& dld) { | ||
| 226 | cmdbuf.copyImageToBuffer(image, vk::ImageLayout::eTransferSrcOptimal, buffer, {copy}, | ||
| 227 | dld); | ||
| 228 | }); | ||
| 229 | } | ||
| 230 | scheduler.Finish(); | ||
| 231 | |||
| 232 | // TODO(Rodrigo): Use an intern buffer for staging buffers and avoid this unnecessary memcpy. | ||
| 233 | std::memcpy(staging_buffer.data(), buffer.commit->Map(host_memory_size), host_memory_size); | ||
| 234 | } | ||
| 235 | |||
| 236 | void CachedSurface::DecorateSurfaceName() { | ||
| 237 | // TODO(Rodrigo): Add name decorations | ||
| 238 | } | ||
| 239 | |||
| 240 | View CachedSurface::CreateView(const ViewParams& params) { | ||
| 241 | return CreateViewInner(params, false); | ||
| 242 | } | ||
| 243 | |||
| 244 | View CachedSurface::CreateViewInner(const ViewParams& params, bool is_proxy) { | ||
| 245 | // TODO(Rodrigo): Add name decorations | ||
| 246 | return views[params] = std::make_shared<CachedSurfaceView>(device, *this, params, is_proxy); | ||
| 247 | } | ||
| 248 | |||
| 249 | void CachedSurface::UploadBuffer(const std::vector<u8>& staging_buffer) { | ||
| 250 | const auto& src_buffer = staging_pool.GetUnusedBuffer(host_memory_size, true); | ||
| 251 | std::memcpy(src_buffer.commit->Map(host_memory_size), staging_buffer.data(), host_memory_size); | ||
| 252 | |||
| 253 | scheduler.Record([src_buffer = *src_buffer.handle, dst_buffer = *buffer, | ||
| 254 | size = params.GetHostSizeInBytes()](auto cmdbuf, auto& dld) { | ||
| 255 | const vk::BufferCopy copy(0, 0, size); | ||
| 256 | cmdbuf.copyBuffer(src_buffer, dst_buffer, {copy}, dld); | ||
| 257 | |||
| 258 | cmdbuf.pipelineBarrier( | ||
| 259 | vk::PipelineStageFlagBits::eTransfer, vk::PipelineStageFlagBits::eVertexShader, {}, {}, | ||
| 260 | {vk::BufferMemoryBarrier(vk::AccessFlagBits::eTransferWrite, | ||
| 261 | vk::AccessFlagBits::eShaderRead, 0, 0, dst_buffer, 0, size)}, | ||
| 262 | {}, dld); | ||
| 263 | }); | ||
| 264 | } | ||
| 265 | |||
| 266 | void CachedSurface::UploadImage(const std::vector<u8>& staging_buffer) { | ||
| 267 | const auto& src_buffer = staging_pool.GetUnusedBuffer(host_memory_size, true); | ||
| 268 | std::memcpy(src_buffer.commit->Map(host_memory_size), staging_buffer.data(), host_memory_size); | ||
| 269 | |||
| 270 | FullTransition(vk::PipelineStageFlagBits::eTransfer, vk::AccessFlagBits::eTransferWrite, | ||
| 271 | vk::ImageLayout::eTransferDstOptimal); | ||
| 272 | |||
| 273 | for (u32 level = 0; level < params.num_levels; ++level) { | ||
| 274 | vk::BufferImageCopy copy = GetBufferImageCopy(level); | ||
| 275 | const auto& dld = device.GetDispatchLoader(); | ||
| 276 | if (image->GetAspectMask() == | ||
| 277 | (vk::ImageAspectFlagBits::eDepth | vk::ImageAspectFlagBits::eStencil)) { | ||
| 278 | vk::BufferImageCopy depth = copy; | ||
| 279 | vk::BufferImageCopy stencil = copy; | ||
| 280 | depth.imageSubresource.aspectMask = vk::ImageAspectFlagBits::eDepth; | ||
| 281 | stencil.imageSubresource.aspectMask = vk::ImageAspectFlagBits::eStencil; | ||
| 282 | scheduler.Record([buffer = *src_buffer.handle, image = image->GetHandle(), depth, | ||
| 283 | stencil](auto cmdbuf, auto& dld) { | ||
| 284 | cmdbuf.copyBufferToImage(buffer, image, vk::ImageLayout::eTransferDstOptimal, | ||
| 285 | {depth, stencil}, dld); | ||
| 286 | }); | ||
| 287 | } else { | ||
| 288 | scheduler.Record([buffer = *src_buffer.handle, image = image->GetHandle(), | ||
| 289 | copy](auto cmdbuf, auto& dld) { | ||
| 290 | cmdbuf.copyBufferToImage(buffer, image, vk::ImageLayout::eTransferDstOptimal, | ||
| 291 | {copy}, dld); | ||
| 292 | }); | ||
| 293 | } | ||
| 294 | } | ||
| 295 | } | ||
| 296 | |||
| 297 | vk::BufferImageCopy CachedSurface::GetBufferImageCopy(u32 level) const { | ||
| 298 | const u32 vk_depth = params.target == SurfaceTarget::Texture3D ? params.GetMipDepth(level) : 1; | ||
| 299 | const auto compression_type = params.GetCompressionType(); | ||
| 300 | const std::size_t mip_offset = compression_type == SurfaceCompression::Converted | ||
| 301 | ? params.GetConvertedMipmapOffset(level) | ||
| 302 | : params.GetHostMipmapLevelOffset(level); | ||
| 303 | |||
| 304 | return vk::BufferImageCopy( | ||
| 305 | mip_offset, 0, 0, | ||
| 306 | {image->GetAspectMask(), level, 0, static_cast<u32>(params.GetNumLayers())}, {0, 0, 0}, | ||
| 307 | {params.GetMipWidth(level), params.GetMipHeight(level), vk_depth}); | ||
| 308 | } | ||
| 309 | |||
| 310 | vk::ImageSubresourceRange CachedSurface::GetImageSubresourceRange() const { | ||
| 311 | return {image->GetAspectMask(), 0, params.num_levels, 0, | ||
| 312 | static_cast<u32>(params.GetNumLayers())}; | ||
| 313 | } | ||
| 314 | |||
| 315 | CachedSurfaceView::CachedSurfaceView(const VKDevice& device, CachedSurface& surface, | ||
| 316 | const ViewParams& params, bool is_proxy) | ||
| 317 | : VideoCommon::ViewBase{params}, params{surface.GetSurfaceParams()}, | ||
| 318 | image{surface.GetImageHandle()}, buffer_view{surface.GetBufferViewHandle()}, | ||
| 319 | aspect_mask{surface.GetAspectMask()}, device{device}, surface{surface}, | ||
| 320 | base_layer{params.base_layer}, num_layers{params.num_layers}, base_level{params.base_level}, | ||
| 321 | num_levels{params.num_levels}, image_view_type{image ? GetImageViewType(params.target) | ||
| 322 | : vk::ImageViewType{}} {} | ||
| 323 | |||
| 324 | CachedSurfaceView::~CachedSurfaceView() = default; | ||
| 325 | |||
| 326 | vk::ImageView CachedSurfaceView::GetHandle(SwizzleSource x_source, SwizzleSource y_source, | ||
| 327 | SwizzleSource z_source, SwizzleSource w_source) { | ||
| 328 | const u32 swizzle = EncodeSwizzle(x_source, y_source, z_source, w_source); | ||
| 329 | if (last_image_view && last_swizzle == swizzle) { | ||
| 330 | return last_image_view; | ||
| 331 | } | ||
| 332 | last_swizzle = swizzle; | ||
| 333 | |||
| 334 | const auto [entry, is_cache_miss] = view_cache.try_emplace(swizzle); | ||
| 335 | auto& image_view = entry->second; | ||
| 336 | if (!is_cache_miss) { | ||
| 337 | return last_image_view = *image_view; | ||
| 338 | } | ||
| 339 | |||
| 340 | auto swizzle_x = MaxwellToVK::SwizzleSource(x_source); | ||
| 341 | auto swizzle_y = MaxwellToVK::SwizzleSource(y_source); | ||
| 342 | auto swizzle_z = MaxwellToVK::SwizzleSource(z_source); | ||
| 343 | auto swizzle_w = MaxwellToVK::SwizzleSource(w_source); | ||
| 344 | |||
| 345 | if (params.pixel_format == VideoCore::Surface::PixelFormat::A1B5G5R5U) { | ||
| 346 | // A1B5G5R5 is implemented as A1R5G5B5, we have to change the swizzle here. | ||
| 347 | std::swap(swizzle_x, swizzle_z); | ||
| 348 | } | ||
| 349 | |||
| 350 | // Games can sample depth or stencil values on textures. This is decided by the swizzle value on | ||
| 351 | // hardware. To emulate this on Vulkan we specify it in the aspect. | ||
| 352 | vk::ImageAspectFlags aspect = aspect_mask; | ||
| 353 | if (aspect == (vk::ImageAspectFlagBits::eDepth | vk::ImageAspectFlagBits::eStencil)) { | ||
| 354 | UNIMPLEMENTED_IF(x_source != SwizzleSource::R && x_source != SwizzleSource::G); | ||
| 355 | const bool is_first = x_source == SwizzleSource::R; | ||
| 356 | switch (params.pixel_format) { | ||
| 357 | case VideoCore::Surface::PixelFormat::Z24S8: | ||
| 358 | case VideoCore::Surface::PixelFormat::Z32FS8: | ||
| 359 | aspect = is_first ? vk::ImageAspectFlagBits::eDepth : vk::ImageAspectFlagBits::eStencil; | ||
| 360 | break; | ||
| 361 | case VideoCore::Surface::PixelFormat::S8Z24: | ||
| 362 | aspect = is_first ? vk::ImageAspectFlagBits::eStencil : vk::ImageAspectFlagBits::eDepth; | ||
| 363 | break; | ||
| 364 | default: | ||
| 365 | aspect = vk::ImageAspectFlagBits::eDepth; | ||
| 366 | UNIMPLEMENTED(); | ||
| 367 | } | ||
| 368 | |||
| 369 | // Vulkan doesn't seem to understand swizzling of a depth stencil image, use identity | ||
| 370 | swizzle_x = vk::ComponentSwizzle::eR; | ||
| 371 | swizzle_y = vk::ComponentSwizzle::eG; | ||
| 372 | swizzle_z = vk::ComponentSwizzle::eB; | ||
| 373 | swizzle_w = vk::ComponentSwizzle::eA; | ||
| 374 | } | ||
| 375 | |||
| 376 | const vk::ImageViewCreateInfo image_view_ci( | ||
| 377 | {}, surface.GetImageHandle(), image_view_type, surface.GetImage().GetFormat(), | ||
| 378 | {swizzle_x, swizzle_y, swizzle_z, swizzle_w}, | ||
| 379 | {aspect, base_level, num_levels, base_layer, num_layers}); | ||
| 380 | |||
| 381 | const auto dev = device.GetLogical(); | ||
| 382 | image_view = dev.createImageViewUnique(image_view_ci, nullptr, device.GetDispatchLoader()); | ||
| 383 | return last_image_view = *image_view; | ||
| 384 | } | ||
| 385 | |||
| 386 | VKTextureCache::VKTextureCache(Core::System& system, VideoCore::RasterizerInterface& rasterizer, | ||
| 387 | const VKDevice& device, VKResourceManager& resource_manager, | ||
| 388 | VKMemoryManager& memory_manager, VKScheduler& scheduler, | ||
| 389 | VKStagingBufferPool& staging_pool) | ||
| 390 | : TextureCache(system, rasterizer), device{device}, resource_manager{resource_manager}, | ||
| 391 | memory_manager{memory_manager}, scheduler{scheduler}, staging_pool{staging_pool} {} | ||
| 392 | |||
| 393 | VKTextureCache::~VKTextureCache() = default; | ||
| 394 | |||
| 395 | Surface VKTextureCache::CreateSurface(GPUVAddr gpu_addr, const SurfaceParams& params) { | ||
| 396 | return std::make_shared<CachedSurface>(system, device, resource_manager, memory_manager, | ||
| 397 | scheduler, staging_pool, gpu_addr, params); | ||
| 398 | } | ||
| 399 | |||
| 400 | void VKTextureCache::ImageCopy(Surface& src_surface, Surface& dst_surface, | ||
| 401 | const VideoCommon::CopyParams& copy_params) { | ||
| 402 | const bool src_3d = src_surface->GetSurfaceParams().target == SurfaceTarget::Texture3D; | ||
| 403 | const bool dst_3d = dst_surface->GetSurfaceParams().target == SurfaceTarget::Texture3D; | ||
| 404 | UNIMPLEMENTED_IF(src_3d); | ||
| 405 | |||
| 406 | // The texture cache handles depth in OpenGL terms, we have to handle it as subresource and | ||
| 407 | // dimension respectively. | ||
| 408 | const u32 dst_base_layer = dst_3d ? 0 : copy_params.dest_z; | ||
| 409 | const u32 dst_offset_z = dst_3d ? copy_params.dest_z : 0; | ||
| 410 | |||
| 411 | const u32 extent_z = dst_3d ? copy_params.depth : 1; | ||
| 412 | const u32 num_layers = dst_3d ? 1 : copy_params.depth; | ||
| 413 | |||
| 414 | // We can't copy inside a renderpass | ||
| 415 | scheduler.RequestOutsideRenderPassOperationContext(); | ||
| 416 | |||
| 417 | src_surface->Transition(copy_params.source_z, copy_params.depth, copy_params.source_level, 1, | ||
| 418 | vk::PipelineStageFlagBits::eTransfer, vk::AccessFlagBits::eTransferRead, | ||
| 419 | vk::ImageLayout::eTransferSrcOptimal); | ||
| 420 | dst_surface->Transition( | ||
| 421 | dst_base_layer, num_layers, copy_params.dest_level, 1, vk::PipelineStageFlagBits::eTransfer, | ||
| 422 | vk::AccessFlagBits::eTransferWrite, vk::ImageLayout::eTransferDstOptimal); | ||
| 423 | |||
| 424 | const auto& dld{device.GetDispatchLoader()}; | ||
| 425 | const vk::ImageSubresourceLayers src_subresource( | ||
| 426 | src_surface->GetAspectMask(), copy_params.source_level, copy_params.source_z, num_layers); | ||
| 427 | const vk::ImageSubresourceLayers dst_subresource( | ||
| 428 | dst_surface->GetAspectMask(), copy_params.dest_level, dst_base_layer, num_layers); | ||
| 429 | const vk::Offset3D src_offset(copy_params.source_x, copy_params.source_y, 0); | ||
| 430 | const vk::Offset3D dst_offset(copy_params.dest_x, copy_params.dest_y, dst_offset_z); | ||
| 431 | const vk::Extent3D extent(copy_params.width, copy_params.height, extent_z); | ||
| 432 | const vk::ImageCopy copy(src_subresource, src_offset, dst_subresource, dst_offset, extent); | ||
| 433 | const vk::Image src_image = src_surface->GetImageHandle(); | ||
| 434 | const vk::Image dst_image = dst_surface->GetImageHandle(); | ||
| 435 | scheduler.Record([src_image, dst_image, copy](auto cmdbuf, auto& dld) { | ||
| 436 | cmdbuf.copyImage(src_image, vk::ImageLayout::eTransferSrcOptimal, dst_image, | ||
| 437 | vk::ImageLayout::eTransferDstOptimal, {copy}, dld); | ||
| 438 | }); | ||
| 439 | } | ||
| 440 | |||
| 441 | void VKTextureCache::ImageBlit(View& src_view, View& dst_view, | ||
| 442 | const Tegra::Engines::Fermi2D::Config& copy_config) { | ||
| 443 | // We can't blit inside a renderpass | ||
| 444 | scheduler.RequestOutsideRenderPassOperationContext(); | ||
| 445 | |||
| 446 | src_view->Transition(vk::ImageLayout::eTransferSrcOptimal, vk::PipelineStageFlagBits::eTransfer, | ||
| 447 | vk::AccessFlagBits::eTransferRead); | ||
| 448 | dst_view->Transition(vk::ImageLayout::eTransferDstOptimal, vk::PipelineStageFlagBits::eTransfer, | ||
| 449 | vk::AccessFlagBits::eTransferWrite); | ||
| 450 | |||
| 451 | const auto& cfg = copy_config; | ||
| 452 | const auto src_top_left = vk::Offset3D(cfg.src_rect.left, cfg.src_rect.top, 0); | ||
| 453 | const auto src_bot_right = vk::Offset3D(cfg.src_rect.right, cfg.src_rect.bottom, 1); | ||
| 454 | const auto dst_top_left = vk::Offset3D(cfg.dst_rect.left, cfg.dst_rect.top, 0); | ||
| 455 | const auto dst_bot_right = vk::Offset3D(cfg.dst_rect.right, cfg.dst_rect.bottom, 1); | ||
| 456 | const vk::ImageBlit blit(src_view->GetImageSubresourceLayers(), {src_top_left, src_bot_right}, | ||
| 457 | dst_view->GetImageSubresourceLayers(), {dst_top_left, dst_bot_right}); | ||
| 458 | const bool is_linear = copy_config.filter == Tegra::Engines::Fermi2D::Filter::Linear; | ||
| 459 | |||
| 460 | const auto& dld{device.GetDispatchLoader()}; | ||
| 461 | scheduler.Record([src_image = src_view->GetImage(), dst_image = dst_view->GetImage(), blit, | ||
| 462 | is_linear](auto cmdbuf, auto& dld) { | ||
| 463 | cmdbuf.blitImage(src_image, vk::ImageLayout::eTransferSrcOptimal, dst_image, | ||
| 464 | vk::ImageLayout::eTransferDstOptimal, {blit}, | ||
| 465 | is_linear ? vk::Filter::eLinear : vk::Filter::eNearest, dld); | ||
| 466 | }); | ||
| 467 | } | ||
| 468 | |||
| 469 | void VKTextureCache::BufferCopy(Surface& src_surface, Surface& dst_surface) { | ||
| 470 | // Currently unimplemented. PBO copies should be dropped and we should use a render pass to | ||
| 471 | // convert from color to depth and viceversa. | ||
| 472 | LOG_WARNING(Render_Vulkan, "Unimplemented"); | ||
| 473 | } | ||
| 474 | |||
| 475 | } // namespace Vulkan | ||
diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.h b/src/video_core/renderer_vulkan/vk_texture_cache.h new file mode 100644 index 000000000..d3edbe80c --- /dev/null +++ b/src/video_core/renderer_vulkan/vk_texture_cache.h | |||
| @@ -0,0 +1,239 @@ | |||
| 1 | // Copyright 2019 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include <memory> | ||
| 8 | #include <unordered_map> | ||
| 9 | |||
| 10 | #include "common/assert.h" | ||
| 11 | #include "common/common_types.h" | ||
| 12 | #include "common/logging/log.h" | ||
| 13 | #include "common/math_util.h" | ||
| 14 | #include "video_core/gpu.h" | ||
| 15 | #include "video_core/rasterizer_cache.h" | ||
| 16 | #include "video_core/renderer_vulkan/declarations.h" | ||
| 17 | #include "video_core/renderer_vulkan/vk_image.h" | ||
| 18 | #include "video_core/renderer_vulkan/vk_memory_manager.h" | ||
| 19 | #include "video_core/renderer_vulkan/vk_scheduler.h" | ||
| 20 | #include "video_core/texture_cache/surface_base.h" | ||
| 21 | #include "video_core/texture_cache/texture_cache.h" | ||
| 22 | #include "video_core/textures/decoders.h" | ||
| 23 | |||
| 24 | namespace Core { | ||
| 25 | class System; | ||
| 26 | } | ||
| 27 | |||
| 28 | namespace VideoCore { | ||
| 29 | class RasterizerInterface; | ||
| 30 | } | ||
| 31 | |||
| 32 | namespace Vulkan { | ||
| 33 | |||
| 34 | class RasterizerVulkan; | ||
| 35 | class VKDevice; | ||
| 36 | class VKResourceManager; | ||
| 37 | class VKScheduler; | ||
| 38 | class VKStagingBufferPool; | ||
| 39 | |||
| 40 | class CachedSurfaceView; | ||
| 41 | class CachedSurface; | ||
| 42 | |||
| 43 | using Surface = std::shared_ptr<CachedSurface>; | ||
| 44 | using View = std::shared_ptr<CachedSurfaceView>; | ||
| 45 | using TextureCacheBase = VideoCommon::TextureCache<Surface, View>; | ||
| 46 | |||
| 47 | using VideoCommon::SurfaceParams; | ||
| 48 | using VideoCommon::ViewParams; | ||
| 49 | |||
| 50 | class CachedSurface final : public VideoCommon::SurfaceBase<View> { | ||
| 51 | friend CachedSurfaceView; | ||
| 52 | |||
| 53 | public: | ||
| 54 | explicit CachedSurface(Core::System& system, const VKDevice& device, | ||
| 55 | VKResourceManager& resource_manager, VKMemoryManager& memory_manager, | ||
| 56 | VKScheduler& scheduler, VKStagingBufferPool& staging_pool, | ||
| 57 | GPUVAddr gpu_addr, const SurfaceParams& params); | ||
| 58 | ~CachedSurface(); | ||
| 59 | |||
| 60 | void UploadTexture(const std::vector<u8>& staging_buffer) override; | ||
| 61 | void DownloadTexture(std::vector<u8>& staging_buffer) override; | ||
| 62 | |||
| 63 | void FullTransition(vk::PipelineStageFlags new_stage_mask, vk::AccessFlags new_access, | ||
| 64 | vk::ImageLayout new_layout) { | ||
| 65 | image->Transition(0, static_cast<u32>(params.GetNumLayers()), 0, params.num_levels, | ||
| 66 | new_stage_mask, new_access, new_layout); | ||
| 67 | } | ||
| 68 | |||
| 69 | void Transition(u32 base_layer, u32 num_layers, u32 base_level, u32 num_levels, | ||
| 70 | vk::PipelineStageFlags new_stage_mask, vk::AccessFlags new_access, | ||
| 71 | vk::ImageLayout new_layout) { | ||
| 72 | image->Transition(base_layer, num_layers, base_level, num_levels, new_stage_mask, | ||
| 73 | new_access, new_layout); | ||
| 74 | } | ||
| 75 | |||
| 76 | VKImage& GetImage() { | ||
| 77 | return *image; | ||
| 78 | } | ||
| 79 | |||
| 80 | const VKImage& GetImage() const { | ||
| 81 | return *image; | ||
| 82 | } | ||
| 83 | |||
| 84 | vk::Image GetImageHandle() const { | ||
| 85 | return image->GetHandle(); | ||
| 86 | } | ||
| 87 | |||
| 88 | vk::ImageAspectFlags GetAspectMask() const { | ||
| 89 | return image->GetAspectMask(); | ||
| 90 | } | ||
| 91 | |||
| 92 | vk::BufferView GetBufferViewHandle() const { | ||
| 93 | return *buffer_view; | ||
| 94 | } | ||
| 95 | |||
| 96 | protected: | ||
| 97 | void DecorateSurfaceName(); | ||
| 98 | |||
| 99 | View CreateView(const ViewParams& params) override; | ||
| 100 | View CreateViewInner(const ViewParams& params, bool is_proxy); | ||
| 101 | |||
| 102 | private: | ||
| 103 | void UploadBuffer(const std::vector<u8>& staging_buffer); | ||
| 104 | |||
| 105 | void UploadImage(const std::vector<u8>& staging_buffer); | ||
| 106 | |||
| 107 | vk::BufferImageCopy GetBufferImageCopy(u32 level) const; | ||
| 108 | |||
| 109 | vk::ImageSubresourceRange GetImageSubresourceRange() const; | ||
| 110 | |||
| 111 | Core::System& system; | ||
| 112 | const VKDevice& device; | ||
| 113 | VKResourceManager& resource_manager; | ||
| 114 | VKMemoryManager& memory_manager; | ||
| 115 | VKScheduler& scheduler; | ||
| 116 | VKStagingBufferPool& staging_pool; | ||
| 117 | |||
| 118 | std::optional<VKImage> image; | ||
| 119 | UniqueBuffer buffer; | ||
| 120 | UniqueBufferView buffer_view; | ||
| 121 | VKMemoryCommit commit; | ||
| 122 | |||
| 123 | vk::Format format; | ||
| 124 | }; | ||
| 125 | |||
| 126 | class CachedSurfaceView final : public VideoCommon::ViewBase { | ||
| 127 | public: | ||
| 128 | explicit CachedSurfaceView(const VKDevice& device, CachedSurface& surface, | ||
| 129 | const ViewParams& params, bool is_proxy); | ||
| 130 | ~CachedSurfaceView(); | ||
| 131 | |||
| 132 | vk::ImageView GetHandle(Tegra::Texture::SwizzleSource x_source, | ||
| 133 | Tegra::Texture::SwizzleSource y_source, | ||
| 134 | Tegra::Texture::SwizzleSource z_source, | ||
| 135 | Tegra::Texture::SwizzleSource w_source); | ||
| 136 | |||
| 137 | bool IsSameSurface(const CachedSurfaceView& rhs) const { | ||
| 138 | return &surface == &rhs.surface; | ||
| 139 | } | ||
| 140 | |||
| 141 | vk::ImageView GetHandle() { | ||
| 142 | return GetHandle(Tegra::Texture::SwizzleSource::R, Tegra::Texture::SwizzleSource::G, | ||
| 143 | Tegra::Texture::SwizzleSource::B, Tegra::Texture::SwizzleSource::A); | ||
| 144 | } | ||
| 145 | |||
| 146 | u32 GetWidth() const { | ||
| 147 | return params.GetMipWidth(base_level); | ||
| 148 | } | ||
| 149 | |||
| 150 | u32 GetHeight() const { | ||
| 151 | return params.GetMipHeight(base_level); | ||
| 152 | } | ||
| 153 | |||
| 154 | bool IsBufferView() const { | ||
| 155 | return buffer_view; | ||
| 156 | } | ||
| 157 | |||
| 158 | vk::Image GetImage() const { | ||
| 159 | return image; | ||
| 160 | } | ||
| 161 | |||
| 162 | vk::BufferView GetBufferView() const { | ||
| 163 | return buffer_view; | ||
| 164 | } | ||
| 165 | |||
| 166 | vk::ImageSubresourceRange GetImageSubresourceRange() const { | ||
| 167 | return {aspect_mask, base_level, num_levels, base_layer, num_layers}; | ||
| 168 | } | ||
| 169 | |||
| 170 | vk::ImageSubresourceLayers GetImageSubresourceLayers() const { | ||
| 171 | return {surface.GetAspectMask(), base_level, base_layer, num_layers}; | ||
| 172 | } | ||
| 173 | |||
| 174 | void Transition(vk::ImageLayout new_layout, vk::PipelineStageFlags new_stage_mask, | ||
| 175 | vk::AccessFlags new_access) const { | ||
| 176 | surface.Transition(base_layer, num_layers, base_level, num_levels, new_stage_mask, | ||
| 177 | new_access, new_layout); | ||
| 178 | } | ||
| 179 | |||
| 180 | void MarkAsModified(u64 tick) { | ||
| 181 | surface.MarkAsModified(true, tick); | ||
| 182 | } | ||
| 183 | |||
| 184 | private: | ||
| 185 | static u32 EncodeSwizzle(Tegra::Texture::SwizzleSource x_source, | ||
| 186 | Tegra::Texture::SwizzleSource y_source, | ||
| 187 | Tegra::Texture::SwizzleSource z_source, | ||
| 188 | Tegra::Texture::SwizzleSource w_source) { | ||
| 189 | return (static_cast<u32>(x_source) << 24) | (static_cast<u32>(y_source) << 16) | | ||
| 190 | (static_cast<u32>(z_source) << 8) | static_cast<u32>(w_source); | ||
| 191 | } | ||
| 192 | |||
| 193 | // Store a copy of these values to avoid double dereference when reading them | ||
| 194 | const SurfaceParams params; | ||
| 195 | const vk::Image image; | ||
| 196 | const vk::BufferView buffer_view; | ||
| 197 | const vk::ImageAspectFlags aspect_mask; | ||
| 198 | |||
| 199 | const VKDevice& device; | ||
| 200 | CachedSurface& surface; | ||
| 201 | const u32 base_layer; | ||
| 202 | const u32 num_layers; | ||
| 203 | const u32 base_level; | ||
| 204 | const u32 num_levels; | ||
| 205 | const vk::ImageViewType image_view_type; | ||
| 206 | |||
| 207 | vk::ImageView last_image_view; | ||
| 208 | u32 last_swizzle{}; | ||
| 209 | |||
| 210 | std::unordered_map<u32, UniqueImageView> view_cache; | ||
| 211 | }; | ||
| 212 | |||
| 213 | class VKTextureCache final : public TextureCacheBase { | ||
| 214 | public: | ||
| 215 | explicit VKTextureCache(Core::System& system, VideoCore::RasterizerInterface& rasterizer, | ||
| 216 | const VKDevice& device, VKResourceManager& resource_manager, | ||
| 217 | VKMemoryManager& memory_manager, VKScheduler& scheduler, | ||
| 218 | VKStagingBufferPool& staging_pool); | ||
| 219 | ~VKTextureCache(); | ||
| 220 | |||
| 221 | private: | ||
| 222 | Surface CreateSurface(GPUVAddr gpu_addr, const SurfaceParams& params) override; | ||
| 223 | |||
| 224 | void ImageCopy(Surface& src_surface, Surface& dst_surface, | ||
| 225 | const VideoCommon::CopyParams& copy_params) override; | ||
| 226 | |||
| 227 | void ImageBlit(View& src_view, View& dst_view, | ||
| 228 | const Tegra::Engines::Fermi2D::Config& copy_config) override; | ||
| 229 | |||
| 230 | void BufferCopy(Surface& src_surface, Surface& dst_surface) override; | ||
| 231 | |||
| 232 | const VKDevice& device; | ||
| 233 | VKResourceManager& resource_manager; | ||
| 234 | VKMemoryManager& memory_manager; | ||
| 235 | VKScheduler& scheduler; | ||
| 236 | VKStagingBufferPool& staging_pool; | ||
| 237 | }; | ||
| 238 | |||
| 239 | } // namespace Vulkan | ||
diff --git a/src/video_core/shader/decode/memory.cpp b/src/video_core/shader/decode/memory.cpp index 8cc84e935..7591a715f 100644 --- a/src/video_core/shader/decode/memory.cpp +++ b/src/video_core/shader/decode/memory.cpp | |||
| @@ -16,6 +16,8 @@ | |||
| 16 | 16 | ||
| 17 | namespace VideoCommon::Shader { | 17 | namespace VideoCommon::Shader { |
| 18 | 18 | ||
| 19 | using Tegra::Shader::AtomicOp; | ||
| 20 | using Tegra::Shader::AtomicType; | ||
| 19 | using Tegra::Shader::Attribute; | 21 | using Tegra::Shader::Attribute; |
| 20 | using Tegra::Shader::Instruction; | 22 | using Tegra::Shader::Instruction; |
| 21 | using Tegra::Shader::OpCode; | 23 | using Tegra::Shader::OpCode; |
| @@ -333,6 +335,23 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) { | |||
| 333 | } | 335 | } |
| 334 | break; | 336 | break; |
| 335 | } | 337 | } |
| 338 | case OpCode::Id::ATOMS: { | ||
| 339 | UNIMPLEMENTED_IF_MSG(instr.atoms.operation != AtomicOp::Add, "operation={}", | ||
| 340 | static_cast<int>(instr.atoms.operation.Value())); | ||
| 341 | UNIMPLEMENTED_IF_MSG(instr.atoms.type != AtomicType::U32, "type={}", | ||
| 342 | static_cast<int>(instr.atoms.type.Value())); | ||
| 343 | |||
| 344 | const s32 offset = instr.atoms.GetImmediateOffset(); | ||
| 345 | Node address = GetRegister(instr.gpr8); | ||
| 346 | address = Operation(OperationCode::IAdd, std::move(address), Immediate(offset)); | ||
| 347 | |||
| 348 | Node memory = GetSharedMemory(std::move(address)); | ||
| 349 | Node data = GetRegister(instr.gpr20); | ||
| 350 | |||
| 351 | Node value = Operation(OperationCode::UAtomicAdd, std::move(memory), std::move(data)); | ||
| 352 | SetRegister(bb, instr.gpr0, std::move(value)); | ||
| 353 | break; | ||
| 354 | } | ||
| 336 | case OpCode::Id::AL2P: { | 355 | case OpCode::Id::AL2P: { |
| 337 | // Ignore al2p.direction since we don't care about it. | 356 | // Ignore al2p.direction since we don't care about it. |
| 338 | 357 | ||
diff --git a/src/video_core/shader/node.h b/src/video_core/shader/node.h index 4e155542a..075c7d07c 100644 --- a/src/video_core/shader/node.h +++ b/src/video_core/shader/node.h | |||
| @@ -162,6 +162,8 @@ enum class OperationCode { | |||
| 162 | AtomicImageXor, /// (MetaImage, int[N] coords) -> void | 162 | AtomicImageXor, /// (MetaImage, int[N] coords) -> void |
| 163 | AtomicImageExchange, /// (MetaImage, int[N] coords) -> void | 163 | AtomicImageExchange, /// (MetaImage, int[N] coords) -> void |
| 164 | 164 | ||
| 165 | UAtomicAdd, /// (smem, uint) -> uint | ||
| 166 | |||
| 165 | Branch, /// (uint branch_target) -> void | 167 | Branch, /// (uint branch_target) -> void |
| 166 | BranchIndirect, /// (uint branch_target) -> void | 168 | BranchIndirect, /// (uint branch_target) -> void |
| 167 | PushFlowStack, /// (uint branch_target) -> void | 169 | PushFlowStack, /// (uint branch_target) -> void |
diff --git a/src/video_core/texture_cache/format_lookup_table.cpp b/src/video_core/texture_cache/format_lookup_table.cpp index 271e67533..81fb9f633 100644 --- a/src/video_core/texture_cache/format_lookup_table.cpp +++ b/src/video_core/texture_cache/format_lookup_table.cpp | |||
| @@ -95,7 +95,7 @@ constexpr std::array<Table, 74> DefinitionTable = {{ | |||
| 95 | {TextureFormat::ZF32, C, FLOAT, FLOAT, FLOAT, FLOAT, PixelFormat::Z32F}, | 95 | {TextureFormat::ZF32, C, FLOAT, FLOAT, FLOAT, FLOAT, PixelFormat::Z32F}, |
| 96 | {TextureFormat::Z16, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::Z16}, | 96 | {TextureFormat::Z16, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::Z16}, |
| 97 | {TextureFormat::S8Z24, C, UINT, UNORM, UNORM, UNORM, PixelFormat::S8Z24}, | 97 | {TextureFormat::S8Z24, C, UINT, UNORM, UNORM, UNORM, PixelFormat::S8Z24}, |
| 98 | {TextureFormat::ZF32_X24S8, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::Z32FS8}, | 98 | {TextureFormat::ZF32_X24S8, C, FLOAT, UINT, UNORM, UNORM, PixelFormat::Z32FS8}, |
| 99 | 99 | ||
| 100 | {TextureFormat::DXT1, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::DXT1}, | 100 | {TextureFormat::DXT1, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::DXT1}, |
| 101 | {TextureFormat::DXT1, S, UNORM, UNORM, UNORM, UNORM, PixelFormat::DXT1_SRGB}, | 101 | {TextureFormat::DXT1, S, UNORM, UNORM, UNORM, UNORM, PixelFormat::DXT1_SRGB}, |
diff --git a/src/video_core/texture_cache/surface_params.h b/src/video_core/texture_cache/surface_params.h index 992b5c022..9256fd6d9 100644 --- a/src/video_core/texture_cache/surface_params.h +++ b/src/video_core/texture_cache/surface_params.h | |||
| @@ -209,6 +209,11 @@ public: | |||
| 209 | return target == VideoCore::Surface::SurfaceTarget::TextureBuffer; | 209 | return target == VideoCore::Surface::SurfaceTarget::TextureBuffer; |
| 210 | } | 210 | } |
| 211 | 211 | ||
| 212 | /// Returns the number of layers in the surface. | ||
| 213 | std::size_t GetNumLayers() const { | ||
| 214 | return is_layered ? depth : 1; | ||
| 215 | } | ||
| 216 | |||
| 212 | /// Returns the debug name of the texture for use in graphic debuggers. | 217 | /// Returns the debug name of the texture for use in graphic debuggers. |
| 213 | std::string TargetName() const; | 218 | std::string TargetName() const; |
| 214 | 219 | ||
| @@ -287,10 +292,6 @@ private: | |||
| 287 | /// Returns the size of a layer | 292 | /// Returns the size of a layer |
| 288 | std::size_t GetLayerSize(bool as_host_size, bool uncompressed) const; | 293 | std::size_t GetLayerSize(bool as_host_size, bool uncompressed) const; |
| 289 | 294 | ||
| 290 | std::size_t GetNumLayers() const { | ||
| 291 | return is_layered ? depth : 1; | ||
| 292 | } | ||
| 293 | |||
| 294 | /// Returns true if these parameters are from a layered surface. | 295 | /// Returns true if these parameters are from a layered surface. |
| 295 | bool IsLayered() const; | 296 | bool IsLayered() const; |
| 296 | }; | 297 | }; |
diff --git a/src/yuzu/main.ui b/src/yuzu/main.ui index 21f422500..dd5371059 100644 --- a/src/yuzu/main.ui +++ b/src/yuzu/main.ui | |||
| @@ -15,7 +15,7 @@ | |||
| 15 | </property> | 15 | </property> |
| 16 | <property name="windowIcon"> | 16 | <property name="windowIcon"> |
| 17 | <iconset> | 17 | <iconset> |
| 18 | <normaloff>src/pcafe/res/icon3_64x64.ico</normaloff>src/pcafe/res/icon3_64x64.ico</iconset> | 18 | <normaloff>../dist/yuzu.ico</normaloff>../dist/yuzu.ico</iconset> |
| 19 | </property> | 19 | </property> |
| 20 | <property name="tabShape"> | 20 | <property name="tabShape"> |
| 21 | <enum>QTabWidget::Rounded</enum> | 21 | <enum>QTabWidget::Rounded</enum> |