summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
m---------externals/Vulkan-Headers0
-rw-r--r--src/core/hle/service/acc/acc.cpp124
-rw-r--r--src/core/hle/service/acc/acc.h1
-rw-r--r--src/core/hle/service/acc/acc_su.cpp2
-rw-r--r--src/core/hle/service/acc/profile_manager.cpp11
-rw-r--r--src/core/hle/service/acc/profile_manager.h2
-rw-r--r--src/video_core/engines/maxwell_3d.cpp12
-rw-r--r--src/video_core/engines/maxwell_3d.h8
-rw-r--r--src/video_core/engines/shader_bytecode.h32
-rw-r--r--src/video_core/macro_interpreter.cpp4
-rw-r--r--src/video_core/renderer_opengl/gl_shader_decompiler.cpp145
-rw-r--r--src/video_core/renderer_opengl/gl_shader_disk_cache.cpp20
-rw-r--r--src/video_core/renderer_vulkan/vk_device.cpp301
-rw-r--r--src/video_core/renderer_vulkan/vk_device.h62
-rw-r--r--src/video_core/renderer_vulkan/vk_shader_decompiler.cpp48
-rw-r--r--src/video_core/shader/decode/image.cpp92
-rw-r--r--src/video_core/shader/node.h57
-rw-r--r--src/video_core/shader/shader_ir.h10
-rw-r--r--src/video_core/surface.cpp20
-rw-r--r--src/video_core/surface.h2
20 files changed, 759 insertions, 194 deletions
diff --git a/externals/Vulkan-Headers b/externals/Vulkan-Headers
Subproject d05c8df88da98ec1ab3bc600d7f5783b4060895 Subproject fd568d51ed3d9bc6132e1639d7492453a08fe1b
diff --git a/src/core/hle/service/acc/acc.cpp b/src/core/hle/service/acc/acc.cpp
index c01ee3eda..a7c55e116 100644
--- a/src/core/hle/service/acc/acc.cpp
+++ b/src/core/hle/service/acc/acc.cpp
@@ -31,6 +31,9 @@
31 31
32namespace Service::Account { 32namespace Service::Account {
33 33
34constexpr ResultCode ERR_INVALID_BUFFER_SIZE{ErrorModule::Account, 30};
35constexpr ResultCode ERR_FAILED_SAVE_DATA{ErrorModule::Account, 100};
36
34static std::string GetImagePath(Common::UUID uuid) { 37static std::string GetImagePath(Common::UUID uuid) {
35 return FileUtil::GetUserPath(FileUtil::UserPath::NANDDir) + 38 return FileUtil::GetUserPath(FileUtil::UserPath::NANDDir) +
36 "/system/save/8000000000000010/su/avators/" + uuid.FormatSwitch() + ".jpg"; 39 "/system/save/8000000000000010/su/avators/" + uuid.FormatSwitch() + ".jpg";
@@ -41,20 +44,31 @@ static constexpr u32 SanitizeJPEGSize(std::size_t size) {
41 return static_cast<u32>(std::min(size, max_jpeg_image_size)); 44 return static_cast<u32>(std::min(size, max_jpeg_image_size));
42} 45}
43 46
44class IProfile final : public ServiceFramework<IProfile> { 47class IProfileCommon : public ServiceFramework<IProfileCommon> {
45public: 48public:
46 explicit IProfile(Common::UUID user_id, ProfileManager& profile_manager) 49 explicit IProfileCommon(const char* name, bool editor_commands, Common::UUID user_id,
47 : ServiceFramework("IProfile"), profile_manager(profile_manager), user_id(user_id) { 50 ProfileManager& profile_manager)
51 : ServiceFramework(name), profile_manager(profile_manager), user_id(user_id) {
48 static const FunctionInfo functions[] = { 52 static const FunctionInfo functions[] = {
49 {0, &IProfile::Get, "Get"}, 53 {0, &IProfileCommon::Get, "Get"},
50 {1, &IProfile::GetBase, "GetBase"}, 54 {1, &IProfileCommon::GetBase, "GetBase"},
51 {10, &IProfile::GetImageSize, "GetImageSize"}, 55 {10, &IProfileCommon::GetImageSize, "GetImageSize"},
52 {11, &IProfile::LoadImage, "LoadImage"}, 56 {11, &IProfileCommon::LoadImage, "LoadImage"},
53 }; 57 };
58
54 RegisterHandlers(functions); 59 RegisterHandlers(functions);
60
61 if (editor_commands) {
62 static const FunctionInfo editor_functions[] = {
63 {100, &IProfileCommon::Store, "Store"},
64 {101, &IProfileCommon::StoreWithImage, "StoreWithImage"},
65 };
66
67 RegisterHandlers(editor_functions);
68 }
55 } 69 }
56 70
57private: 71protected:
58 void Get(Kernel::HLERequestContext& ctx) { 72 void Get(Kernel::HLERequestContext& ctx) {
59 LOG_INFO(Service_ACC, "called user_id={}", user_id.Format()); 73 LOG_INFO(Service_ACC, "called user_id={}", user_id.Format());
60 ProfileBase profile_base{}; 74 ProfileBase profile_base{};
@@ -127,10 +141,91 @@ private:
127 } 141 }
128 } 142 }
129 143
130 const ProfileManager& profile_manager; 144 void Store(Kernel::HLERequestContext& ctx) {
145 IPC::RequestParser rp{ctx};
146 const auto base = rp.PopRaw<ProfileBase>();
147
148 const auto user_data = ctx.ReadBuffer();
149
150 LOG_DEBUG(Service_ACC, "called, username='{}', timestamp={:016X}, uuid={}",
151 Common::StringFromFixedZeroTerminatedBuffer(
152 reinterpret_cast<const char*>(base.username.data()), base.username.size()),
153 base.timestamp, base.user_uuid.Format());
154
155 if (user_data.size() < sizeof(ProfileData)) {
156 LOG_ERROR(Service_ACC, "ProfileData buffer too small!");
157 IPC::ResponseBuilder rb{ctx, 2};
158 rb.Push(ERR_INVALID_BUFFER_SIZE);
159 return;
160 }
161
162 ProfileData data;
163 std::memcpy(&data, user_data.data(), sizeof(ProfileData));
164
165 if (!profile_manager.SetProfileBaseAndData(user_id, base, data)) {
166 LOG_ERROR(Service_ACC, "Failed to update profile data and base!");
167 IPC::ResponseBuilder rb{ctx, 2};
168 rb.Push(ERR_FAILED_SAVE_DATA);
169 return;
170 }
171
172 IPC::ResponseBuilder rb{ctx, 2};
173 rb.Push(RESULT_SUCCESS);
174 }
175
176 void StoreWithImage(Kernel::HLERequestContext& ctx) {
177 IPC::RequestParser rp{ctx};
178 const auto base = rp.PopRaw<ProfileBase>();
179
180 const auto user_data = ctx.ReadBuffer();
181 const auto image_data = ctx.ReadBuffer(1);
182
183 LOG_DEBUG(Service_ACC, "called, username='{}', timestamp={:016X}, uuid={}",
184 Common::StringFromFixedZeroTerminatedBuffer(
185 reinterpret_cast<const char*>(base.username.data()), base.username.size()),
186 base.timestamp, base.user_uuid.Format());
187
188 if (user_data.size() < sizeof(ProfileData)) {
189 LOG_ERROR(Service_ACC, "ProfileData buffer too small!");
190 IPC::ResponseBuilder rb{ctx, 2};
191 rb.Push(ERR_INVALID_BUFFER_SIZE);
192 return;
193 }
194
195 ProfileData data;
196 std::memcpy(&data, user_data.data(), sizeof(ProfileData));
197
198 FileUtil::IOFile image(GetImagePath(user_id), "wb");
199
200 if (!image.IsOpen() || !image.Resize(image_data.size()) ||
201 image.WriteBytes(image_data.data(), image_data.size()) != image_data.size() ||
202 !profile_manager.SetProfileBaseAndData(user_id, base, data)) {
203 LOG_ERROR(Service_ACC, "Failed to update profile data, base, and image!");
204 IPC::ResponseBuilder rb{ctx, 2};
205 rb.Push(ERR_FAILED_SAVE_DATA);
206 return;
207 }
208
209 IPC::ResponseBuilder rb{ctx, 2};
210 rb.Push(RESULT_SUCCESS);
211 }
212
213 ProfileManager& profile_manager;
131 Common::UUID user_id; ///< The user id this profile refers to. 214 Common::UUID user_id; ///< The user id this profile refers to.
132}; 215};
133 216
217class IProfile final : public IProfileCommon {
218public:
219 IProfile(Common::UUID user_id, ProfileManager& profile_manager)
220 : IProfileCommon("IProfile", false, user_id, profile_manager) {}
221};
222
223class IProfileEditor final : public IProfileCommon {
224public:
225 IProfileEditor(Common::UUID user_id, ProfileManager& profile_manager)
226 : IProfileCommon("IProfileEditor", true, user_id, profile_manager) {}
227};
228
134class IManagerForApplication final : public ServiceFramework<IManagerForApplication> { 229class IManagerForApplication final : public ServiceFramework<IManagerForApplication> {
135public: 230public:
136 IManagerForApplication() : ServiceFramework("IManagerForApplication") { 231 IManagerForApplication() : ServiceFramework("IManagerForApplication") {
@@ -322,6 +417,17 @@ void Module::Interface::IsUserAccountSwitchLocked(Kernel::HLERequestContext& ctx
322 rb.Push(is_locked); 417 rb.Push(is_locked);
323} 418}
324 419
420void Module::Interface::GetProfileEditor(Kernel::HLERequestContext& ctx) {
421 IPC::RequestParser rp{ctx};
422 Common::UUID user_id = rp.PopRaw<Common::UUID>();
423
424 LOG_DEBUG(Service_ACC, "called, user_id={}", user_id.Format());
425
426 IPC::ResponseBuilder rb{ctx, 2, 0, 1};
427 rb.Push(RESULT_SUCCESS);
428 rb.PushIpcInterface<IProfileEditor>(user_id, *profile_manager);
429}
430
325void Module::Interface::TrySelectUserWithoutInteraction(Kernel::HLERequestContext& ctx) { 431void Module::Interface::TrySelectUserWithoutInteraction(Kernel::HLERequestContext& ctx) {
326 LOG_DEBUG(Service_ACC, "called"); 432 LOG_DEBUG(Service_ACC, "called");
327 // A u8 is passed into this function which we can safely ignore. It's to determine if we have 433 // A u8 is passed into this function which we can safely ignore. It's to determine if we have
diff --git a/src/core/hle/service/acc/acc.h b/src/core/hle/service/acc/acc.h
index f651773b7..7a7dc9ec6 100644
--- a/src/core/hle/service/acc/acc.h
+++ b/src/core/hle/service/acc/acc.h
@@ -32,6 +32,7 @@ public:
32 void IsUserRegistrationRequestPermitted(Kernel::HLERequestContext& ctx); 32 void IsUserRegistrationRequestPermitted(Kernel::HLERequestContext& ctx);
33 void TrySelectUserWithoutInteraction(Kernel::HLERequestContext& ctx); 33 void TrySelectUserWithoutInteraction(Kernel::HLERequestContext& ctx);
34 void IsUserAccountSwitchLocked(Kernel::HLERequestContext& ctx); 34 void IsUserAccountSwitchLocked(Kernel::HLERequestContext& ctx);
35 void GetProfileEditor(Kernel::HLERequestContext& ctx);
35 36
36 private: 37 private:
37 ResultCode InitializeApplicationInfoBase(u64 process_id); 38 ResultCode InitializeApplicationInfoBase(u64 process_id);
diff --git a/src/core/hle/service/acc/acc_su.cpp b/src/core/hle/service/acc/acc_su.cpp
index 1b7ec3ed0..0d1663657 100644
--- a/src/core/hle/service/acc/acc_su.cpp
+++ b/src/core/hle/service/acc/acc_su.cpp
@@ -41,7 +41,7 @@ ACC_SU::ACC_SU(std::shared_ptr<Module> module, std::shared_ptr<ProfileManager> p
41 {202, nullptr, "CancelUserRegistration"}, 41 {202, nullptr, "CancelUserRegistration"},
42 {203, nullptr, "DeleteUser"}, 42 {203, nullptr, "DeleteUser"},
43 {204, nullptr, "SetUserPosition"}, 43 {204, nullptr, "SetUserPosition"},
44 {205, nullptr, "GetProfileEditor"}, 44 {205, &ACC_SU::GetProfileEditor, "GetProfileEditor"},
45 {206, nullptr, "CompleteUserRegistrationForcibly"}, 45 {206, nullptr, "CompleteUserRegistrationForcibly"},
46 {210, nullptr, "CreateFloatingRegistrationRequest"}, 46 {210, nullptr, "CreateFloatingRegistrationRequest"},
47 {230, nullptr, "AuthenticateServiceAsync"}, 47 {230, nullptr, "AuthenticateServiceAsync"},
diff --git a/src/core/hle/service/acc/profile_manager.cpp b/src/core/hle/service/acc/profile_manager.cpp
index 49aa5908b..8f9986326 100644
--- a/src/core/hle/service/acc/profile_manager.cpp
+++ b/src/core/hle/service/acc/profile_manager.cpp
@@ -305,6 +305,17 @@ bool ProfileManager::SetProfileBase(UUID uuid, const ProfileBase& profile_new) {
305 return true; 305 return true;
306} 306}
307 307
308bool ProfileManager::SetProfileBaseAndData(Common::UUID uuid, const ProfileBase& profile_new,
309 const ProfileData& data_new) {
310 const auto index = GetUserIndex(uuid);
311 if (index.has_value() && SetProfileBase(uuid, profile_new)) {
312 profiles[*index].data = data_new;
313 return true;
314 }
315
316 return false;
317}
318
308void ProfileManager::ParseUserSaveFile() { 319void ProfileManager::ParseUserSaveFile() {
309 FileUtil::IOFile save(FileUtil::GetUserPath(FileUtil::UserPath::NANDDir) + 320 FileUtil::IOFile save(FileUtil::GetUserPath(FileUtil::UserPath::NANDDir) +
310 ACC_SAVE_AVATORS_BASE_PATH + "profiles.dat", 321 ACC_SAVE_AVATORS_BASE_PATH + "profiles.dat",
diff --git a/src/core/hle/service/acc/profile_manager.h b/src/core/hle/service/acc/profile_manager.h
index fd7abb541..5a6d28925 100644
--- a/src/core/hle/service/acc/profile_manager.h
+++ b/src/core/hle/service/acc/profile_manager.h
@@ -91,6 +91,8 @@ public:
91 91
92 bool RemoveUser(Common::UUID uuid); 92 bool RemoveUser(Common::UUID uuid);
93 bool SetProfileBase(Common::UUID uuid, const ProfileBase& profile_new); 93 bool SetProfileBase(Common::UUID uuid, const ProfileBase& profile_new);
94 bool SetProfileBaseAndData(Common::UUID uuid, const ProfileBase& profile_new,
95 const ProfileData& data_new);
94 96
95private: 97private:
96 void ParseUserSaveFile(); 98 void ParseUserSaveFile();
diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp
index 965c4c06b..c7a3c85a0 100644
--- a/src/video_core/engines/maxwell_3d.cpp
+++ b/src/video_core/engines/maxwell_3d.cpp
@@ -332,6 +332,10 @@ void Maxwell3D::CallMethod(const GPU::MethodCall& method_call) {
332 ProcessMacroBind(method_call.argument); 332 ProcessMacroBind(method_call.argument);
333 break; 333 break;
334 } 334 }
335 case MAXWELL3D_REG_INDEX(firmware[4]): {
336 ProcessFirmwareCall4();
337 break;
338 }
335 case MAXWELL3D_REG_INDEX(const_buffer.cb_data[0]): 339 case MAXWELL3D_REG_INDEX(const_buffer.cb_data[0]):
336 case MAXWELL3D_REG_INDEX(const_buffer.cb_data[1]): 340 case MAXWELL3D_REG_INDEX(const_buffer.cb_data[1]):
337 case MAXWELL3D_REG_INDEX(const_buffer.cb_data[2]): 341 case MAXWELL3D_REG_INDEX(const_buffer.cb_data[2]):
@@ -422,6 +426,14 @@ void Maxwell3D::ProcessMacroBind(u32 data) {
422 macro_positions[regs.macros.entry++] = data; 426 macro_positions[regs.macros.entry++] = data;
423} 427}
424 428
429void Maxwell3D::ProcessFirmwareCall4() {
430 LOG_WARNING(HW_GPU, "(STUBBED) called");
431
432 // Firmware call 4 is a blob that changes some registers depending on its parameters.
433 // These registers don't affect emulation and so are stubbed by setting 0xd00 to 1.
434 regs.reg_array[0xd00] = 1;
435}
436
425void Maxwell3D::ProcessQueryGet() { 437void Maxwell3D::ProcessQueryGet() {
426 const GPUVAddr sequence_address{regs.query.QueryAddress()}; 438 const GPUVAddr sequence_address{regs.query.QueryAddress()};
427 // Since the sequence address is given as a GPU VAddr, we have to convert it to an application 439 // Since the sequence address is given as a GPU VAddr, we have to convert it to an application
diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h
index f67a5389f..e5ec90717 100644
--- a/src/video_core/engines/maxwell_3d.h
+++ b/src/video_core/engines/maxwell_3d.h
@@ -1089,7 +1089,9 @@ public:
1089 INSERT_PADDING_WORDS(14); 1089 INSERT_PADDING_WORDS(14);
1090 } shader_config[MaxShaderProgram]; 1090 } shader_config[MaxShaderProgram];
1091 1091
1092 INSERT_PADDING_WORDS(0x80); 1092 INSERT_PADDING_WORDS(0x60);
1093
1094 u32 firmware[0x20];
1093 1095
1094 struct { 1096 struct {
1095 u32 cb_size; 1097 u32 cb_size;
@@ -1319,6 +1321,9 @@ private:
1319 /// Handles writes to the macro bind register. 1321 /// Handles writes to the macro bind register.
1320 void ProcessMacroBind(u32 data); 1322 void ProcessMacroBind(u32 data);
1321 1323
1324 /// Handles firmware blob 4
1325 void ProcessFirmwareCall4();
1326
1322 /// Handles a write to the CLEAR_BUFFERS register. 1327 /// Handles a write to the CLEAR_BUFFERS register.
1323 void ProcessClearBuffers(); 1328 void ProcessClearBuffers();
1324 1329
@@ -1431,6 +1436,7 @@ ASSERT_REG_POSITION(vertex_array[0], 0x700);
1431ASSERT_REG_POSITION(independent_blend, 0x780); 1436ASSERT_REG_POSITION(independent_blend, 0x780);
1432ASSERT_REG_POSITION(vertex_array_limit[0], 0x7C0); 1437ASSERT_REG_POSITION(vertex_array_limit[0], 0x7C0);
1433ASSERT_REG_POSITION(shader_config[0], 0x800); 1438ASSERT_REG_POSITION(shader_config[0], 0x800);
1439ASSERT_REG_POSITION(firmware, 0x8C0);
1434ASSERT_REG_POSITION(const_buffer, 0x8E0); 1440ASSERT_REG_POSITION(const_buffer, 0x8E0);
1435ASSERT_REG_POSITION(cb_bind[0], 0x904); 1441ASSERT_REG_POSITION(cb_bind[0], 0x904);
1436ASSERT_REG_POSITION(tex_cb_index, 0x982); 1442ASSERT_REG_POSITION(tex_cb_index, 0x982);
diff --git a/src/video_core/engines/shader_bytecode.h b/src/video_core/engines/shader_bytecode.h
index bd8c1ada0..052e6d24e 100644
--- a/src/video_core/engines/shader_bytecode.h
+++ b/src/video_core/engines/shader_bytecode.h
@@ -544,6 +544,28 @@ enum class VoteOperation : u64 {
544 Eq = 2, // allThreadsEqualNV 544 Eq = 2, // allThreadsEqualNV
545}; 545};
546 546
547enum class ImageAtomicSize : u64 {
548 U32 = 0,
549 S32 = 1,
550 U64 = 2,
551 F32 = 3,
552 S64 = 5,
553 SD32 = 6,
554 SD64 = 7,
555};
556
557enum class ImageAtomicOperation : u64 {
558 Add = 0,
559 Min = 1,
560 Max = 2,
561 Inc = 3,
562 Dec = 4,
563 And = 5,
564 Or = 6,
565 Xor = 7,
566 Exch = 8,
567};
568
547union Instruction { 569union Instruction {
548 Instruction& operator=(const Instruction& instr) { 570 Instruction& operator=(const Instruction& instr) {
549 value = instr.value; 571 value = instr.value;
@@ -1392,6 +1414,14 @@ union Instruction {
1392 } sust; 1414 } sust;
1393 1415
1394 union { 1416 union {
1417 BitField<28, 1, u64> is_ba;
1418 BitField<51, 3, ImageAtomicSize> size;
1419 BitField<33, 3, ImageType> image_type;
1420 BitField<29, 4, ImageAtomicOperation> operation;
1421 BitField<49, 2, OutOfBoundsStore> out_of_bounds_store;
1422 } suatom_d;
1423
1424 union {
1395 BitField<20, 24, u64> target; 1425 BitField<20, 24, u64> target;
1396 BitField<5, 1, u64> constant_buffer; 1426 BitField<5, 1, u64> constant_buffer;
1397 1427
@@ -1543,6 +1573,7 @@ public:
1543 TMML_B, // Texture Mip Map Level 1573 TMML_B, // Texture Mip Map Level
1544 TMML, // Texture Mip Map Level 1574 TMML, // Texture Mip Map Level
1545 SUST, // Surface Store 1575 SUST, // Surface Store
1576 SUATOM, // Surface Atomic Operation
1546 EXIT, 1577 EXIT,
1547 NOP, 1578 NOP,
1548 IPA, 1579 IPA,
@@ -1826,6 +1857,7 @@ private:
1826 INST("110111110110----", Id::TMML_B, Type::Texture, "TMML_B"), 1857 INST("110111110110----", Id::TMML_B, Type::Texture, "TMML_B"),
1827 INST("1101111101011---", Id::TMML, Type::Texture, "TMML"), 1858 INST("1101111101011---", Id::TMML, Type::Texture, "TMML"),
1828 INST("11101011001-----", Id::SUST, Type::Image, "SUST"), 1859 INST("11101011001-----", Id::SUST, Type::Image, "SUST"),
1860 INST("1110101000------", Id::SUATOM, Type::Image, "SUATOM_D"),
1829 INST("0101000010110---", Id::NOP, Type::Trivial, "NOP"), 1861 INST("0101000010110---", Id::NOP, Type::Trivial, "NOP"),
1830 INST("11100000--------", Id::IPA, Type::Trivial, "IPA"), 1862 INST("11100000--------", Id::IPA, Type::Trivial, "IPA"),
1831 INST("1111101111100---", Id::OUT_R, Type::Trivial, "OUT_R"), 1863 INST("1111101111100---", Id::OUT_R, Type::Trivial, "OUT_R"),
diff --git a/src/video_core/macro_interpreter.cpp b/src/video_core/macro_interpreter.cpp
index 4e1cb98db..62afc0d11 100644
--- a/src/video_core/macro_interpreter.cpp
+++ b/src/video_core/macro_interpreter.cpp
@@ -131,9 +131,7 @@ bool MacroInterpreter::Step(u32 offset, bool is_delay_slot) {
131 131
132 // An instruction with the Exit flag will not actually 132 // An instruction with the Exit flag will not actually
133 // cause an exit if it's executed inside a delay slot. 133 // cause an exit if it's executed inside a delay slot.
134 // TODO(Blinkhawk): Reversed to always exit. The behavior explained above requires further 134 if (opcode.is_exit && !is_delay_slot) {
135 // testing on the MME code.
136 if (opcode.is_exit) {
137 // Exit has a delay slot, execute the next instruction 135 // Exit has a delay slot, execute the next instruction
138 Step(offset, true); 136 Step(offset, true);
139 return false; 137 return false;
diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
index 6edb2ca38..137b23740 100644
--- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
@@ -706,7 +706,7 @@ private:
706 void DeclareImages() { 706 void DeclareImages() {
707 const auto& images{ir.GetImages()}; 707 const auto& images{ir.GetImages()};
708 for (const auto& [offset, image] : images) { 708 for (const auto& [offset, image] : images) {
709 const std::string image_type = [&]() { 709 const char* image_type = [&] {
710 switch (image.GetType()) { 710 switch (image.GetType()) {
711 case Tegra::Shader::ImageType::Texture1D: 711 case Tegra::Shader::ImageType::Texture1D:
712 return "image1D"; 712 return "image1D";
@@ -725,6 +725,23 @@ private:
725 return "image1D"; 725 return "image1D";
726 } 726 }
727 }(); 727 }();
728
729 const auto [type_prefix, format] = [&]() -> std::pair<const char*, const char*> {
730 if (!image.IsSizeKnown()) {
731 return {"", ""};
732 }
733 switch (image.GetSize()) {
734 case Tegra::Shader::ImageAtomicSize::U32:
735 return {"u", "r32ui, "};
736 case Tegra::Shader::ImageAtomicSize::S32:
737 return {"i", "r32i, "};
738 default:
739 UNIMPLEMENTED_MSG("Unimplemented atomic size={}",
740 static_cast<u32>(image.GetSize()));
741 return {"", ""};
742 }
743 }();
744
728 std::string qualifier = "coherent volatile"; 745 std::string qualifier = "coherent volatile";
729 if (image.IsRead() && !image.IsWritten()) { 746 if (image.IsRead() && !image.IsWritten()) {
730 qualifier += " readonly"; 747 qualifier += " readonly";
@@ -1180,6 +1197,74 @@ private:
1180 return expr; 1197 return expr;
1181 } 1198 }
1182 1199
1200 std::string BuildIntegerCoordinates(Operation operation) {
1201 constexpr std::array constructors{"int(", "ivec2(", "ivec3(", "ivec4("};
1202 const std::size_t coords_count{operation.GetOperandsCount()};
1203 std::string expr = constructors.at(coords_count - 1);
1204 for (std::size_t i = 0; i < coords_count; ++i) {
1205 expr += VisitOperand(operation, i).AsInt();
1206 if (i + 1 < coords_count) {
1207 expr += ", ";
1208 }
1209 }
1210 expr += ')';
1211 return expr;
1212 }
1213
1214 std::string BuildImageValues(Operation operation) {
1215 const auto meta{std::get<MetaImage>(operation.GetMeta())};
1216 const auto [constructors, type] = [&]() -> std::pair<std::array<const char*, 4>, Type> {
1217 constexpr std::array float_constructors{"float", "vec2", "vec3", "vec4"};
1218 if (!meta.image.IsSizeKnown()) {
1219 return {float_constructors, Type::Float};
1220 }
1221 switch (meta.image.GetSize()) {
1222 case Tegra::Shader::ImageAtomicSize::U32:
1223 return {{"uint", "uvec2", "uvec3", "uvec4"}, Type::Uint};
1224 case Tegra::Shader::ImageAtomicSize::S32:
1225 return {{"int", "ivec2", "ivec3", "ivec4"}, Type::Uint};
1226 default:
1227 UNIMPLEMENTED_MSG("Unimplemented image size={}",
1228 static_cast<u32>(meta.image.GetSize()));
1229 return {float_constructors, Type::Float};
1230 }
1231 }();
1232
1233 const std::size_t values_count{meta.values.size()};
1234 std::string expr = fmt::format("{}(", constructors.at(values_count - 1));
1235 for (std::size_t i = 0; i < values_count; ++i) {
1236 expr += Visit(meta.values.at(i)).As(type);
1237 if (i + 1 < values_count) {
1238 expr += ", ";
1239 }
1240 }
1241 expr += ')';
1242 return expr;
1243 }
1244
1245 Expression AtomicImage(Operation operation, const char* opname) {
1246 constexpr std::array constructors{"int(", "ivec2(", "ivec3(", "ivec4("};
1247 const auto meta{std::get<MetaImage>(operation.GetMeta())};
1248 ASSERT(meta.values.size() == 1);
1249 ASSERT(meta.image.IsSizeKnown());
1250
1251 const auto type = [&]() {
1252 switch (const auto size = meta.image.GetSize()) {
1253 case Tegra::Shader::ImageAtomicSize::U32:
1254 return Type::Uint;
1255 case Tegra::Shader::ImageAtomicSize::S32:
1256 return Type::Int;
1257 default:
1258 UNIMPLEMENTED_MSG("Unimplemented image size={}", static_cast<u32>(size));
1259 return Type::Uint;
1260 }
1261 }();
1262
1263 return {fmt::format("{}({}, {}, {})", opname, GetImage(meta.image),
1264 BuildIntegerCoordinates(operation), Visit(meta.values[0]).As(type)),
1265 type};
1266 }
1267
1183 Expression Assign(Operation operation) { 1268 Expression Assign(Operation operation) {
1184 const Node& dest = operation[0]; 1269 const Node& dest = operation[0];
1185 const Node& src = operation[1]; 1270 const Node& src = operation[1];
@@ -1694,36 +1779,37 @@ private:
1694 } 1779 }
1695 1780
1696 Expression ImageStore(Operation operation) { 1781 Expression ImageStore(Operation operation) {
1697 constexpr std::array constructors{"int(", "ivec2(", "ivec3(", "ivec4("};
1698 const auto meta{std::get<MetaImage>(operation.GetMeta())}; 1782 const auto meta{std::get<MetaImage>(operation.GetMeta())};
1783 code.AddLine("imageStore({}, {}, {});", GetImage(meta.image),
1784 BuildIntegerCoordinates(operation), BuildImageValues(operation));
1785 return {};
1786 }
1699 1787
1700 std::string expr = "imageStore("; 1788 Expression AtomicImageAdd(Operation operation) {
1701 expr += GetImage(meta.image); 1789 return AtomicImage(operation, "imageAtomicAdd");
1702 expr += ", "; 1790 }
1703 1791
1704 const std::size_t coords_count{operation.GetOperandsCount()}; 1792 Expression AtomicImageMin(Operation operation) {
1705 expr += constructors.at(coords_count - 1); 1793 return AtomicImage(operation, "imageAtomicMin");
1706 for (std::size_t i = 0; i < coords_count; ++i) { 1794 }
1707 expr += VisitOperand(operation, i).AsInt();
1708 if (i + 1 < coords_count) {
1709 expr += ", ";
1710 }
1711 }
1712 expr += "), ";
1713 1795
1714 const std::size_t values_count{meta.values.size()}; 1796 Expression AtomicImageMax(Operation operation) {
1715 UNIMPLEMENTED_IF(values_count != 4); 1797 return AtomicImage(operation, "imageAtomicMax");
1716 expr += "vec4("; 1798 }
1717 for (std::size_t i = 0; i < values_count; ++i) { 1799 Expression AtomicImageAnd(Operation operation) {
1718 expr += Visit(meta.values.at(i)).AsFloat(); 1800 return AtomicImage(operation, "imageAtomicAnd");
1719 if (i + 1 < values_count) { 1801 }
1720 expr += ", ";
1721 }
1722 }
1723 expr += "));";
1724 1802
1725 code.AddLine(expr); 1803 Expression AtomicImageOr(Operation operation) {
1726 return {}; 1804 return AtomicImage(operation, "imageAtomicOr");
1805 }
1806
1807 Expression AtomicImageXor(Operation operation) {
1808 return AtomicImage(operation, "imageAtomicXor");
1809 }
1810
1811 Expression AtomicImageExchange(Operation operation) {
1812 return AtomicImage(operation, "imageAtomicExchange");
1727 } 1813 }
1728 1814
1729 Expression Branch(Operation operation) { 1815 Expression Branch(Operation operation) {
@@ -2019,6 +2105,13 @@ private:
2019 &GLSLDecompiler::TexelFetch, 2105 &GLSLDecompiler::TexelFetch,
2020 2106
2021 &GLSLDecompiler::ImageStore, 2107 &GLSLDecompiler::ImageStore,
2108 &GLSLDecompiler::AtomicImageAdd,
2109 &GLSLDecompiler::AtomicImageMin,
2110 &GLSLDecompiler::AtomicImageMax,
2111 &GLSLDecompiler::AtomicImageAnd,
2112 &GLSLDecompiler::AtomicImageOr,
2113 &GLSLDecompiler::AtomicImageXor,
2114 &GLSLDecompiler::AtomicImageExchange,
2022 2115
2023 &GLSLDecompiler::Branch, 2116 &GLSLDecompiler::Branch,
2024 &GLSLDecompiler::BranchIndirect, 2117 &GLSLDecompiler::BranchIndirect,
diff --git a/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp b/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp
index 5450feedf..f141c4e3b 100644
--- a/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp
@@ -341,16 +341,22 @@ std::optional<ShaderDiskCacheDecompiled> ShaderDiskCacheOpenGL::LoadDecompiledEn
341 u64 index{}; 341 u64 index{};
342 u32 type{}; 342 u32 type{};
343 u8 is_bindless{}; 343 u8 is_bindless{};
344 u8 is_read{};
345 u8 is_written{}; 344 u8 is_written{};
345 u8 is_read{};
346 u8 is_size_known{};
347 u32 size{};
346 if (!LoadObjectFromPrecompiled(offset) || !LoadObjectFromPrecompiled(index) || 348 if (!LoadObjectFromPrecompiled(offset) || !LoadObjectFromPrecompiled(index) ||
347 !LoadObjectFromPrecompiled(type) || !LoadObjectFromPrecompiled(is_bindless) || 349 !LoadObjectFromPrecompiled(type) || !LoadObjectFromPrecompiled(is_bindless) ||
348 !LoadObjectFromPrecompiled(is_read) || !LoadObjectFromPrecompiled(is_written)) { 350 !LoadObjectFromPrecompiled(is_written) || !LoadObjectFromPrecompiled(is_read) ||
351 !LoadObjectFromPrecompiled(is_size_known) || !LoadObjectFromPrecompiled(size)) {
349 return {}; 352 return {};
350 } 353 }
351 entry.entries.images.emplace_back(static_cast<u64>(offset), static_cast<std::size_t>(index), 354 entry.entries.images.emplace_back(
352 static_cast<Tegra::Shader::ImageType>(type), 355 static_cast<std::size_t>(offset), static_cast<std::size_t>(index),
353 is_bindless != 0, is_written != 0, is_read != 0); 356 static_cast<Tegra::Shader::ImageType>(type), is_bindless != 0, is_written != 0,
357 is_read != 0,
358 is_size_known ? std::make_optional(static_cast<Tegra::Shader::ImageAtomicSize>(size))
359 : std::nullopt);
354 } 360 }
355 361
356 u32 global_memory_count{}; 362 u32 global_memory_count{};
@@ -429,12 +435,14 @@ bool ShaderDiskCacheOpenGL::SaveDecompiledFile(u64 unique_identifier, const std:
429 return false; 435 return false;
430 } 436 }
431 for (const auto& image : entries.images) { 437 for (const auto& image : entries.images) {
438 const u32 size = image.IsSizeKnown() ? static_cast<u32>(image.GetSize()) : 0U;
432 if (!SaveObjectToPrecompiled(static_cast<u64>(image.GetOffset())) || 439 if (!SaveObjectToPrecompiled(static_cast<u64>(image.GetOffset())) ||
433 !SaveObjectToPrecompiled(static_cast<u64>(image.GetIndex())) || 440 !SaveObjectToPrecompiled(static_cast<u64>(image.GetIndex())) ||
434 !SaveObjectToPrecompiled(static_cast<u32>(image.GetType())) || 441 !SaveObjectToPrecompiled(static_cast<u32>(image.GetType())) ||
435 !SaveObjectToPrecompiled(static_cast<u8>(image.IsBindless() ? 1 : 0)) || 442 !SaveObjectToPrecompiled(static_cast<u8>(image.IsBindless() ? 1 : 0)) ||
443 !SaveObjectToPrecompiled(static_cast<u8>(image.IsWritten() ? 1 : 0)) ||
436 !SaveObjectToPrecompiled(static_cast<u8>(image.IsRead() ? 1 : 0)) || 444 !SaveObjectToPrecompiled(static_cast<u8>(image.IsRead() ? 1 : 0)) ||
437 !SaveObjectToPrecompiled(static_cast<u8>(image.IsWritten() ? 1 : 0))) { 445 !SaveObjectToPrecompiled(image.IsSizeKnown()) || !SaveObjectToPrecompiled(size)) {
438 return false; 446 return false;
439 } 447 }
440 } 448 }
diff --git a/src/video_core/renderer_vulkan/vk_device.cpp b/src/video_core/renderer_vulkan/vk_device.cpp
index 3b966ddc3..897cbb4e8 100644
--- a/src/video_core/renderer_vulkan/vk_device.cpp
+++ b/src/video_core/renderer_vulkan/vk_device.cpp
@@ -2,9 +2,10 @@
2// Licensed under GPLv2 or any later version 2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included. 3// Refer to the license.txt file included.
4 4
5#include <map> 5#include <bitset>
6#include <optional> 6#include <optional>
7#include <set> 7#include <set>
8#include <string_view>
8#include <vector> 9#include <vector>
9#include "common/assert.h" 10#include "common/assert.h"
10#include "video_core/renderer_vulkan/declarations.h" 11#include "video_core/renderer_vulkan/declarations.h"
@@ -12,13 +13,32 @@
12 13
13namespace Vulkan { 14namespace Vulkan {
14 15
16namespace {
17
18template <typename T>
19void SetNext(void**& next, T& data) {
20 *next = &data;
21 next = &data.pNext;
22}
23
24template <typename T>
25T GetFeatures(vk::PhysicalDevice physical, vk::DispatchLoaderDynamic dldi) {
26 vk::PhysicalDeviceFeatures2 features;
27 T extension_features;
28 features.pNext = &extension_features;
29 physical.getFeatures2(&features, dldi);
30 return extension_features;
31}
32
33} // Anonymous namespace
34
15namespace Alternatives { 35namespace Alternatives {
16 36
17constexpr std::array<vk::Format, 3> Depth24UnormS8Uint = { 37constexpr std::array Depth24UnormS8Uint = {vk::Format::eD32SfloatS8Uint,
18 vk::Format::eD32SfloatS8Uint, vk::Format::eD16UnormS8Uint, {}}; 38 vk::Format::eD16UnormS8Uint, vk::Format{}};
19constexpr std::array<vk::Format, 3> Depth16UnormS8Uint = { 39constexpr std::array Depth16UnormS8Uint = {vk::Format::eD24UnormS8Uint,
20 vk::Format::eD24UnormS8Uint, vk::Format::eD32SfloatS8Uint, {}}; 40 vk::Format::eD32SfloatS8Uint, vk::Format{}};
21constexpr std::array<vk::Format, 2> Astc = {vk::Format::eA8B8G8R8UnormPack32, {}}; 41constexpr std::array Astc = {vk::Format::eA8B8G8R8UnormPack32, vk::Format{}};
22 42
23} // namespace Alternatives 43} // namespace Alternatives
24 44
@@ -58,16 +78,53 @@ VKDevice::VKDevice(const vk::DispatchLoaderDynamic& dldi, vk::PhysicalDevice phy
58VKDevice::~VKDevice() = default; 78VKDevice::~VKDevice() = default;
59 79
60bool VKDevice::Create(const vk::DispatchLoaderDynamic& dldi, vk::Instance instance) { 80bool VKDevice::Create(const vk::DispatchLoaderDynamic& dldi, vk::Instance instance) {
61 vk::PhysicalDeviceFeatures device_features;
62 device_features.vertexPipelineStoresAndAtomics = true;
63 device_features.independentBlend = true;
64 device_features.textureCompressionASTC_LDR = is_optimal_astc_supported;
65
66 const auto queue_cis = GetDeviceQueueCreateInfos(); 81 const auto queue_cis = GetDeviceQueueCreateInfos();
67 const std::vector<const char*> extensions = LoadExtensions(dldi); 82 const std::vector extensions = LoadExtensions(dldi);
68 const vk::DeviceCreateInfo device_ci({}, static_cast<u32>(queue_cis.size()), queue_cis.data(), 83
69 0, nullptr, static_cast<u32>(extensions.size()), 84 vk::PhysicalDeviceFeatures2 features2;
70 extensions.data(), &device_features); 85 void** next = &features2.pNext;
86 auto& features = features2.features;
87 features.vertexPipelineStoresAndAtomics = true;
88 features.independentBlend = true;
89 features.depthClamp = true;
90 features.samplerAnisotropy = true;
91 features.largePoints = true;
92 features.textureCompressionASTC_LDR = is_optimal_astc_supported;
93
94 vk::PhysicalDeviceVertexAttributeDivisorFeaturesEXT vertex_divisor;
95 vertex_divisor.vertexAttributeInstanceRateDivisor = true;
96 vertex_divisor.vertexAttributeInstanceRateZeroDivisor = true;
97 SetNext(next, vertex_divisor);
98
99 vk::PhysicalDeviceFloat16Int8FeaturesKHR float16_int8;
100 if (is_float16_supported) {
101 float16_int8.shaderFloat16 = true;
102 SetNext(next, float16_int8);
103 } else {
104 LOG_INFO(Render_Vulkan, "Device doesn't support float16 natively");
105 }
106
107 vk::PhysicalDeviceUniformBufferStandardLayoutFeaturesKHR std430_layout;
108 if (khr_uniform_buffer_standard_layout) {
109 std430_layout.uniformBufferStandardLayout = true;
110 SetNext(next, std430_layout);
111 } else {
112 LOG_INFO(Render_Vulkan, "Device doesn't support packed UBOs");
113 }
114
115 vk::PhysicalDeviceIndexTypeUint8FeaturesEXT index_type_uint8;
116 if (ext_index_type_uint8) {
117 index_type_uint8.indexTypeUint8 = true;
118 SetNext(next, index_type_uint8);
119 } else {
120 LOG_INFO(Render_Vulkan, "Device doesn't support uint8 indexes");
121 }
122
123 vk::DeviceCreateInfo device_ci({}, static_cast<u32>(queue_cis.size()), queue_cis.data(), 0,
124 nullptr, static_cast<u32>(extensions.size()), extensions.data(),
125 nullptr);
126 device_ci.pNext = &features2;
127
71 vk::Device dummy_logical; 128 vk::Device dummy_logical;
72 if (physical.createDevice(&device_ci, nullptr, &dummy_logical, dldi) != vk::Result::eSuccess) { 129 if (physical.createDevice(&device_ci, nullptr, &dummy_logical, dldi) != vk::Result::eSuccess) {
73 LOG_CRITICAL(Render_Vulkan, "Logical device failed to be created!"); 130 LOG_CRITICAL(Render_Vulkan, "Logical device failed to be created!");
@@ -78,6 +135,17 @@ bool VKDevice::Create(const vk::DispatchLoaderDynamic& dldi, vk::Instance instan
78 logical = UniqueDevice( 135 logical = UniqueDevice(
79 dummy_logical, vk::ObjectDestroy<vk::NoParent, vk::DispatchLoaderDynamic>(nullptr, dld)); 136 dummy_logical, vk::ObjectDestroy<vk::NoParent, vk::DispatchLoaderDynamic>(nullptr, dld));
80 137
138 if (khr_driver_properties) {
139 vk::PhysicalDeviceDriverPropertiesKHR driver;
140 vk::PhysicalDeviceProperties2 properties;
141 properties.pNext = &driver;
142 physical.getProperties2(&properties, dld);
143 driver_id = driver.driverID;
144 LOG_INFO(Render_Vulkan, "Driver: {} {}", driver.driverName, driver.driverInfo);
145 } else {
146 LOG_INFO(Render_Vulkan, "Driver: Unknown");
147 }
148
81 graphics_queue = logical->getQueue(graphics_family, 0, dld); 149 graphics_queue = logical->getQueue(graphics_family, 0, dld);
82 present_queue = logical->getQueue(present_family, 0, dld); 150 present_queue = logical->getQueue(present_family, 0, dld);
83 return true; 151 return true;
@@ -92,20 +160,19 @@ vk::Format VKDevice::GetSupportedFormat(vk::Format wanted_format,
92 // The wanted format is not supported by hardware, search for alternatives 160 // The wanted format is not supported by hardware, search for alternatives
93 const vk::Format* alternatives = GetFormatAlternatives(wanted_format); 161 const vk::Format* alternatives = GetFormatAlternatives(wanted_format);
94 if (alternatives == nullptr) { 162 if (alternatives == nullptr) {
95 LOG_CRITICAL(Render_Vulkan, 163 UNREACHABLE_MSG("Format={} with usage={} and type={} has no defined alternatives and host "
96 "Format={} with usage={} and type={} has no defined alternatives and host " 164 "hardware does not support it",
97 "hardware does not support it", 165 vk::to_string(wanted_format), vk::to_string(wanted_usage),
98 vk::to_string(wanted_format), vk::to_string(wanted_usage), 166 static_cast<u32>(format_type));
99 static_cast<u32>(format_type));
100 UNREACHABLE();
101 return wanted_format; 167 return wanted_format;
102 } 168 }
103 169
104 std::size_t i = 0; 170 std::size_t i = 0;
105 for (vk::Format alternative = alternatives[0]; alternative != vk::Format{}; 171 for (vk::Format alternative = alternatives[0]; alternative != vk::Format{};
106 alternative = alternatives[++i]) { 172 alternative = alternatives[++i]) {
107 if (!IsFormatSupported(alternative, wanted_usage, format_type)) 173 if (!IsFormatSupported(alternative, wanted_usage, format_type)) {
108 continue; 174 continue;
175 }
109 LOG_WARNING(Render_Vulkan, 176 LOG_WARNING(Render_Vulkan,
110 "Emulating format={} with alternative format={} with usage={} and type={}", 177 "Emulating format={} with alternative format={} with usage={} and type={}",
111 static_cast<u32>(wanted_format), static_cast<u32>(alternative), 178 static_cast<u32>(wanted_format), static_cast<u32>(alternative),
@@ -114,12 +181,10 @@ vk::Format VKDevice::GetSupportedFormat(vk::Format wanted_format,
114 } 181 }
115 182
116 // No alternatives found, panic 183 // No alternatives found, panic
117 LOG_CRITICAL(Render_Vulkan, 184 UNREACHABLE_MSG("Format={} with usage={} and type={} is not supported by the host hardware and "
118 "Format={} with usage={} and type={} is not supported by the host hardware and " 185 "doesn't support any of the alternatives",
119 "doesn't support any of the alternatives", 186 static_cast<u32>(wanted_format), static_cast<u32>(wanted_usage),
120 static_cast<u32>(wanted_format), static_cast<u32>(wanted_usage), 187 static_cast<u32>(format_type));
121 static_cast<u32>(format_type));
122 UNREACHABLE();
123 return wanted_format; 188 return wanted_format;
124} 189}
125 190
@@ -132,7 +197,7 @@ bool VKDevice::IsOptimalAstcSupported(const vk::PhysicalDeviceFeatures& features
132 vk::FormatFeatureFlagBits::eSampledImage | vk::FormatFeatureFlagBits::eBlitSrc | 197 vk::FormatFeatureFlagBits::eSampledImage | vk::FormatFeatureFlagBits::eBlitSrc |
133 vk::FormatFeatureFlagBits::eBlitDst | vk::FormatFeatureFlagBits::eTransferSrc | 198 vk::FormatFeatureFlagBits::eBlitDst | vk::FormatFeatureFlagBits::eTransferSrc |
134 vk::FormatFeatureFlagBits::eTransferDst}; 199 vk::FormatFeatureFlagBits::eTransferDst};
135 constexpr std::array<vk::Format, 9> astc_formats = { 200 constexpr std::array astc_formats = {
136 vk::Format::eAstc4x4UnormBlock, vk::Format::eAstc4x4SrgbBlock, 201 vk::Format::eAstc4x4UnormBlock, vk::Format::eAstc4x4SrgbBlock,
137 vk::Format::eAstc8x8SrgbBlock, vk::Format::eAstc8x6SrgbBlock, 202 vk::Format::eAstc8x8SrgbBlock, vk::Format::eAstc8x6SrgbBlock,
138 vk::Format::eAstc5x4SrgbBlock, vk::Format::eAstc5x5UnormBlock, 203 vk::Format::eAstc5x4SrgbBlock, vk::Format::eAstc5x5UnormBlock,
@@ -151,76 +216,120 @@ bool VKDevice::IsFormatSupported(vk::Format wanted_format, vk::FormatFeatureFlag
151 FormatType format_type) const { 216 FormatType format_type) const {
152 const auto it = format_properties.find(wanted_format); 217 const auto it = format_properties.find(wanted_format);
153 if (it == format_properties.end()) { 218 if (it == format_properties.end()) {
154 LOG_CRITICAL(Render_Vulkan, "Unimplemented format query={}", vk::to_string(wanted_format)); 219 UNIMPLEMENTED_MSG("Unimplemented format query={}", vk::to_string(wanted_format));
155 UNREACHABLE();
156 return true; 220 return true;
157 } 221 }
158 const vk::FormatFeatureFlags supported_usage = GetFormatFeatures(it->second, format_type); 222 const auto supported_usage = GetFormatFeatures(it->second, format_type);
159 return (supported_usage & wanted_usage) == wanted_usage; 223 return (supported_usage & wanted_usage) == wanted_usage;
160} 224}
161 225
162bool VKDevice::IsSuitable(const vk::DispatchLoaderDynamic& dldi, vk::PhysicalDevice physical, 226bool VKDevice::IsSuitable(const vk::DispatchLoaderDynamic& dldi, vk::PhysicalDevice physical,
163 vk::SurfaceKHR surface) { 227 vk::SurfaceKHR surface) {
164 bool has_swapchain{}; 228 LOG_INFO(Render_Vulkan, "{}", physical.getProperties(dldi).deviceName);
229 bool is_suitable = true;
230
231 constexpr std::array required_extensions = {VK_KHR_SWAPCHAIN_EXTENSION_NAME,
232 VK_EXT_VERTEX_ATTRIBUTE_DIVISOR_EXTENSION_NAME};
233 std::bitset<required_extensions.size()> available_extensions{};
234
165 for (const auto& prop : physical.enumerateDeviceExtensionProperties(nullptr, dldi)) { 235 for (const auto& prop : physical.enumerateDeviceExtensionProperties(nullptr, dldi)) {
166 has_swapchain |= prop.extensionName == std::string(VK_KHR_SWAPCHAIN_EXTENSION_NAME); 236 for (std::size_t i = 0; i < required_extensions.size(); ++i) {
237 if (available_extensions[i]) {
238 continue;
239 }
240 available_extensions[i] =
241 required_extensions[i] == std::string_view{prop.extensionName};
242 }
167 } 243 }
168 if (!has_swapchain) { 244 if (!available_extensions.all()) {
169 // The device doesn't support creating swapchains. 245 for (std::size_t i = 0; i < required_extensions.size(); ++i) {
170 return false; 246 if (available_extensions[i]) {
247 continue;
248 }
249 LOG_INFO(Render_Vulkan, "Missing required extension: {}", required_extensions[i]);
250 is_suitable = false;
251 }
171 } 252 }
172 253
173 bool has_graphics{}, has_present{}; 254 bool has_graphics{}, has_present{};
174 const auto queue_family_properties = physical.getQueueFamilyProperties(dldi); 255 const auto queue_family_properties = physical.getQueueFamilyProperties(dldi);
175 for (u32 i = 0; i < static_cast<u32>(queue_family_properties.size()); ++i) { 256 for (u32 i = 0; i < static_cast<u32>(queue_family_properties.size()); ++i) {
176 const auto& family = queue_family_properties[i]; 257 const auto& family = queue_family_properties[i];
177 if (family.queueCount == 0) 258 if (family.queueCount == 0) {
178 continue; 259 continue;
179 260 }
180 has_graphics |= 261 has_graphics |=
181 (family.queueFlags & vk::QueueFlagBits::eGraphics) != static_cast<vk::QueueFlagBits>(0); 262 (family.queueFlags & vk::QueueFlagBits::eGraphics) != static_cast<vk::QueueFlagBits>(0);
182 has_present |= physical.getSurfaceSupportKHR(i, surface, dldi) != 0; 263 has_present |= physical.getSurfaceSupportKHR(i, surface, dldi) != 0;
183 } 264 }
184 if (!has_graphics || !has_present) { 265 if (!has_graphics || !has_present) {
185 // The device doesn't have a graphics and present queue. 266 LOG_INFO(Render_Vulkan, "Device lacks a graphics and present queue");
186 return false; 267 is_suitable = false;
187 } 268 }
188 269
189 // TODO(Rodrigo): Check if the device matches all requeriments. 270 // TODO(Rodrigo): Check if the device matches all requeriments.
190 const auto properties{physical.getProperties(dldi)}; 271 const auto properties{physical.getProperties(dldi)};
191 const auto limits{properties.limits}; 272 const auto& limits{properties.limits};
192 if (limits.maxUniformBufferRange < 65536) { 273
193 return false; 274 constexpr u32 required_ubo_size = 65536;
275 if (limits.maxUniformBufferRange < required_ubo_size) {
276 LOG_INFO(Render_Vulkan, "Device UBO size {} is too small, {} is required)",
277 limits.maxUniformBufferRange, required_ubo_size);
278 is_suitable = false;
194 } 279 }
195 280
196 const vk::PhysicalDeviceFeatures features{physical.getFeatures(dldi)}; 281 const auto features{physical.getFeatures(dldi)};
197 if (!features.vertexPipelineStoresAndAtomics || !features.independentBlend) { 282 const std::array feature_report = {
198 return false; 283 std::make_pair(features.vertexPipelineStoresAndAtomics, "vertexPipelineStoresAndAtomics"),
284 std::make_pair(features.independentBlend, "independentBlend"),
285 std::make_pair(features.depthClamp, "depthClamp"),
286 std::make_pair(features.samplerAnisotropy, "samplerAnisotropy"),
287 std::make_pair(features.largePoints, "largePoints"),
288 };
289 for (const auto& [supported, name] : feature_report) {
290 if (supported) {
291 continue;
292 }
293 LOG_INFO(Render_Vulkan, "Missing required feature: {}", name);
294 is_suitable = false;
199 } 295 }
200 296
201 // Device is suitable. 297 return is_suitable;
202 return true;
203} 298}
204 299
205std::vector<const char*> VKDevice::LoadExtensions(const vk::DispatchLoaderDynamic& dldi) { 300std::vector<const char*> VKDevice::LoadExtensions(const vk::DispatchLoaderDynamic& dldi) {
206 std::vector<const char*> extensions; 301 std::vector<const char*> extensions;
207 extensions.reserve(2); 302 extensions.reserve(7);
208 extensions.push_back(VK_KHR_SWAPCHAIN_EXTENSION_NAME); 303 extensions.push_back(VK_KHR_SWAPCHAIN_EXTENSION_NAME);
304 extensions.push_back(VK_EXT_VERTEX_ATTRIBUTE_DIVISOR_EXTENSION_NAME);
209 305
210 const auto Test = [&](const vk::ExtensionProperties& extension, 306 const auto Test = [&](const vk::ExtensionProperties& extension,
211 std::optional<std::reference_wrapper<bool>> status, const char* name, 307 std::optional<std::reference_wrapper<bool>> status, const char* name,
212 u32 revision) { 308 bool push) {
213 if (extension.extensionName != std::string(name)) { 309 if (extension.extensionName != std::string_view(name)) {
214 return; 310 return;
215 } 311 }
216 extensions.push_back(name); 312 if (push) {
313 extensions.push_back(name);
314 }
217 if (status) { 315 if (status) {
218 status->get() = true; 316 status->get() = true;
219 } 317 }
220 }; 318 };
221 319
320 bool khr_shader_float16_int8{};
222 for (const auto& extension : physical.enumerateDeviceExtensionProperties(nullptr, dldi)) { 321 for (const auto& extension : physical.enumerateDeviceExtensionProperties(nullptr, dldi)) {
223 Test(extension, ext_scalar_block_layout, VK_EXT_SCALAR_BLOCK_LAYOUT_EXTENSION_NAME, 1); 322 Test(extension, khr_uniform_buffer_standard_layout,
323 VK_KHR_UNIFORM_BUFFER_STANDARD_LAYOUT_EXTENSION_NAME, true);
324 Test(extension, ext_index_type_uint8, VK_EXT_INDEX_TYPE_UINT8_EXTENSION_NAME, true);
325 Test(extension, khr_driver_properties, VK_KHR_DRIVER_PROPERTIES_EXTENSION_NAME, true);
326 Test(extension, khr_shader_float16_int8, VK_KHR_SHADER_FLOAT16_INT8_EXTENSION_NAME, false);
327 }
328
329 if (khr_shader_float16_int8) {
330 is_float16_supported =
331 GetFeatures<vk::PhysicalDeviceFloat16Int8FeaturesKHR>(physical, dldi).shaderFloat16;
332 extensions.push_back(VK_KHR_SHADER_FLOAT16_INT8_EXTENSION_NAME);
224 } 333 }
225 334
226 return extensions; 335 return extensions;
@@ -250,9 +359,10 @@ void VKDevice::SetupFamilies(const vk::DispatchLoaderDynamic& dldi, vk::SurfaceK
250} 359}
251 360
252void VKDevice::SetupProperties(const vk::DispatchLoaderDynamic& dldi) { 361void VKDevice::SetupProperties(const vk::DispatchLoaderDynamic& dldi) {
253 const vk::PhysicalDeviceProperties props = physical.getProperties(dldi); 362 const auto props = physical.getProperties(dldi);
254 device_type = props.deviceType; 363 device_type = props.deviceType;
255 uniform_buffer_alignment = static_cast<u64>(props.limits.minUniformBufferOffsetAlignment); 364 uniform_buffer_alignment = static_cast<u64>(props.limits.minUniformBufferOffsetAlignment);
365 storage_buffer_alignment = static_cast<u64>(props.limits.minStorageBufferOffsetAlignment);
256 max_storage_buffer_range = static_cast<u64>(props.limits.maxStorageBufferRange); 366 max_storage_buffer_range = static_cast<u64>(props.limits.maxStorageBufferRange);
257} 367}
258 368
@@ -273,42 +383,53 @@ std::vector<vk::DeviceQueueCreateInfo> VKDevice::GetDeviceQueueCreateInfos() con
273 return queue_cis; 383 return queue_cis;
274} 384}
275 385
276std::map<vk::Format, vk::FormatProperties> VKDevice::GetFormatProperties( 386std::unordered_map<vk::Format, vk::FormatProperties> VKDevice::GetFormatProperties(
277 const vk::DispatchLoaderDynamic& dldi, vk::PhysicalDevice physical) { 387 const vk::DispatchLoaderDynamic& dldi, vk::PhysicalDevice physical) {
278 static constexpr std::array formats{vk::Format::eA8B8G8R8UnormPack32, 388 constexpr std::array formats{vk::Format::eA8B8G8R8UnormPack32,
279 vk::Format::eB5G6R5UnormPack16, 389 vk::Format::eA8B8G8R8SnormPack32,
280 vk::Format::eA2B10G10R10UnormPack32, 390 vk::Format::eA8B8G8R8SrgbPack32,
281 vk::Format::eR32G32B32A32Sfloat, 391 vk::Format::eB5G6R5UnormPack16,
282 vk::Format::eR16G16Unorm, 392 vk::Format::eA2B10G10R10UnormPack32,
283 vk::Format::eR16G16Snorm, 393 vk::Format::eR32G32B32A32Sfloat,
284 vk::Format::eR8G8B8A8Srgb, 394 vk::Format::eR16G16B16A16Uint,
285 vk::Format::eR8Unorm, 395 vk::Format::eR16G16Unorm,
286 vk::Format::eB10G11R11UfloatPack32, 396 vk::Format::eR16G16Snorm,
287 vk::Format::eR32Sfloat, 397 vk::Format::eR16G16Sfloat,
288 vk::Format::eR16Sfloat, 398 vk::Format::eR16Unorm,
289 vk::Format::eR16G16B16A16Sfloat, 399 vk::Format::eR8G8B8A8Srgb,
290 vk::Format::eD32Sfloat, 400 vk::Format::eR8G8Unorm,
291 vk::Format::eD16Unorm, 401 vk::Format::eR8G8Snorm,
292 vk::Format::eD16UnormS8Uint, 402 vk::Format::eR8Unorm,
293 vk::Format::eD24UnormS8Uint, 403 vk::Format::eB10G11R11UfloatPack32,
294 vk::Format::eD32SfloatS8Uint, 404 vk::Format::eR32Sfloat,
295 vk::Format::eBc1RgbaUnormBlock, 405 vk::Format::eR16Sfloat,
296 vk::Format::eBc2UnormBlock, 406 vk::Format::eR16G16B16A16Sfloat,
297 vk::Format::eBc3UnormBlock, 407 vk::Format::eB8G8R8A8Unorm,
298 vk::Format::eBc4UnormBlock, 408 vk::Format::eD32Sfloat,
299 vk::Format::eBc5UnormBlock, 409 vk::Format::eD16Unorm,
300 vk::Format::eBc5SnormBlock, 410 vk::Format::eD16UnormS8Uint,
301 vk::Format::eBc7UnormBlock, 411 vk::Format::eD24UnormS8Uint,
302 vk::Format::eAstc4x4UnormBlock, 412 vk::Format::eD32SfloatS8Uint,
303 vk::Format::eAstc4x4SrgbBlock, 413 vk::Format::eBc1RgbaUnormBlock,
304 vk::Format::eAstc8x8SrgbBlock, 414 vk::Format::eBc2UnormBlock,
305 vk::Format::eAstc8x6SrgbBlock, 415 vk::Format::eBc3UnormBlock,
306 vk::Format::eAstc5x4SrgbBlock, 416 vk::Format::eBc4UnormBlock,
307 vk::Format::eAstc5x5UnormBlock, 417 vk::Format::eBc5UnormBlock,
308 vk::Format::eAstc5x5SrgbBlock, 418 vk::Format::eBc5SnormBlock,
309 vk::Format::eAstc10x8UnormBlock, 419 vk::Format::eBc7UnormBlock,
310 vk::Format::eAstc10x8SrgbBlock}; 420 vk::Format::eBc1RgbaSrgbBlock,
311 std::map<vk::Format, vk::FormatProperties> format_properties; 421 vk::Format::eBc3SrgbBlock,
422 vk::Format::eBc7SrgbBlock,
423 vk::Format::eAstc4x4UnormBlock,
424 vk::Format::eAstc4x4SrgbBlock,
425 vk::Format::eAstc8x8SrgbBlock,
426 vk::Format::eAstc8x6SrgbBlock,
427 vk::Format::eAstc5x4SrgbBlock,
428 vk::Format::eAstc5x5UnormBlock,
429 vk::Format::eAstc5x5SrgbBlock,
430 vk::Format::eAstc10x8UnormBlock,
431 vk::Format::eAstc10x8SrgbBlock};
432 std::unordered_map<vk::Format, vk::FormatProperties> format_properties;
312 for (const auto format : formats) { 433 for (const auto format : formats) {
313 format_properties.emplace(format, physical.getFormatProperties(format, dldi)); 434 format_properties.emplace(format, physical.getFormatProperties(format, dldi));
314 } 435 }
diff --git a/src/video_core/renderer_vulkan/vk_device.h b/src/video_core/renderer_vulkan/vk_device.h
index 537825d8b..010d4c3d6 100644
--- a/src/video_core/renderer_vulkan/vk_device.h
+++ b/src/video_core/renderer_vulkan/vk_device.h
@@ -4,7 +4,7 @@
4 4
5#pragma once 5#pragma once
6 6
7#include <map> 7#include <unordered_map>
8#include <vector> 8#include <vector>
9#include "common/common_types.h" 9#include "common/common_types.h"
10#include "video_core/renderer_vulkan/declarations.h" 10#include "video_core/renderer_vulkan/declarations.h"
@@ -69,16 +69,26 @@ public:
69 return present_family; 69 return present_family;
70 } 70 }
71 71
72 /// Returns if the device is integrated with the host CPU. 72 /// Returns true if the device is integrated with the host CPU.
73 bool IsIntegrated() const { 73 bool IsIntegrated() const {
74 return device_type == vk::PhysicalDeviceType::eIntegratedGpu; 74 return device_type == vk::PhysicalDeviceType::eIntegratedGpu;
75 } 75 }
76 76
77 /// Returns the driver ID.
78 vk::DriverIdKHR GetDriverID() const {
79 return driver_id;
80 }
81
77 /// Returns uniform buffer alignment requeriment. 82 /// Returns uniform buffer alignment requeriment.
78 u64 GetUniformBufferAlignment() const { 83 u64 GetUniformBufferAlignment() const {
79 return uniform_buffer_alignment; 84 return uniform_buffer_alignment;
80 } 85 }
81 86
87 /// Returns storage alignment requeriment.
88 u64 GetStorageBufferAlignment() const {
89 return storage_buffer_alignment;
90 }
91
82 /// Returns the maximum range for storage buffers. 92 /// Returns the maximum range for storage buffers.
83 u64 GetMaxStorageBufferRange() const { 93 u64 GetMaxStorageBufferRange() const {
84 return max_storage_buffer_range; 94 return max_storage_buffer_range;
@@ -89,9 +99,19 @@ public:
89 return is_optimal_astc_supported; 99 return is_optimal_astc_supported;
90 } 100 }
91 101
102 /// Returns true if the device supports float16 natively
103 bool IsFloat16Supported() const {
104 return is_float16_supported;
105 }
106
92 /// Returns true if the device supports VK_EXT_scalar_block_layout. 107 /// Returns true if the device supports VK_EXT_scalar_block_layout.
93 bool IsExtScalarBlockLayoutSupported() const { 108 bool IsKhrUniformBufferStandardLayoutSupported() const {
94 return ext_scalar_block_layout; 109 return khr_uniform_buffer_standard_layout;
110 }
111
112 /// Returns true if the device supports VK_EXT_index_type_uint8.
113 bool IsExtIndexTypeUint8Supported() const {
114 return ext_index_type_uint8;
95 } 115 }
96 116
97 /// Checks if the physical device is suitable. 117 /// Checks if the physical device is suitable.
@@ -123,22 +143,28 @@ private:
123 FormatType format_type) const; 143 FormatType format_type) const;
124 144
125 /// Returns the device properties for Vulkan formats. 145 /// Returns the device properties for Vulkan formats.
126 static std::map<vk::Format, vk::FormatProperties> GetFormatProperties( 146 static std::unordered_map<vk::Format, vk::FormatProperties> GetFormatProperties(
127 const vk::DispatchLoaderDynamic& dldi, vk::PhysicalDevice physical); 147 const vk::DispatchLoaderDynamic& dldi, vk::PhysicalDevice physical);
128 148
129 const vk::PhysicalDevice physical; ///< Physical device. 149 const vk::PhysicalDevice physical; ///< Physical device.
130 vk::DispatchLoaderDynamic dld; ///< Device function pointers. 150 vk::DispatchLoaderDynamic dld; ///< Device function pointers.
131 UniqueDevice logical; ///< Logical device. 151 UniqueDevice logical; ///< Logical device.
132 vk::Queue graphics_queue; ///< Main graphics queue. 152 vk::Queue graphics_queue; ///< Main graphics queue.
133 vk::Queue present_queue; ///< Main present queue. 153 vk::Queue present_queue; ///< Main present queue.
134 u32 graphics_family{}; ///< Main graphics queue family index. 154 u32 graphics_family{}; ///< Main graphics queue family index.
135 u32 present_family{}; ///< Main present queue family index. 155 u32 present_family{}; ///< Main present queue family index.
136 vk::PhysicalDeviceType device_type; ///< Physical device type. 156 vk::PhysicalDeviceType device_type; ///< Physical device type.
137 u64 uniform_buffer_alignment{}; ///< Uniform buffer alignment requeriment. 157 vk::DriverIdKHR driver_id{}; ///< Driver ID.
138 u64 max_storage_buffer_range{}; ///< Max storage buffer size. 158 u64 uniform_buffer_alignment{}; ///< Uniform buffer alignment requeriment.
139 bool is_optimal_astc_supported{}; ///< Support for native ASTC. 159 u64 storage_buffer_alignment{}; ///< Storage buffer alignment requeriment.
140 bool ext_scalar_block_layout{}; ///< Support for VK_EXT_scalar_block_layout. 160 u64 max_storage_buffer_range{}; ///< Max storage buffer size.
141 std::map<vk::Format, vk::FormatProperties> format_properties; ///< Format properties dictionary. 161 bool is_optimal_astc_supported{}; ///< Support for native ASTC.
162 bool is_float16_supported{}; ///< Support for float16 arithmetics.
163 bool khr_uniform_buffer_standard_layout{}; ///< Support for std430 on UBOs.
164 bool ext_index_type_uint8{}; ///< Support for VK_EXT_index_type_uint8.
165 bool khr_driver_properties{}; ///< Support for VK_KHR_driver_properties.
166 std::unordered_map<vk::Format, vk::FormatProperties>
167 format_properties; ///< Format properties dictionary.
142}; 168};
143 169
144} // namespace Vulkan 170} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp
index a35b45c9c..b9153934e 100644
--- a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp
+++ b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp
@@ -370,8 +370,8 @@ private:
370 u32 binding = const_buffers_base_binding; 370 u32 binding = const_buffers_base_binding;
371 for (const auto& entry : ir.GetConstantBuffers()) { 371 for (const auto& entry : ir.GetConstantBuffers()) {
372 const auto [index, size] = entry; 372 const auto [index, size] = entry;
373 const Id type = 373 const Id type = device.IsKhrUniformBufferStandardLayoutSupported() ? t_cbuf_scalar_ubo
374 device.IsExtScalarBlockLayoutSupported() ? t_cbuf_scalar_ubo : t_cbuf_std140_ubo; 374 : t_cbuf_std140_ubo;
375 const Id id = OpVariable(type, spv::StorageClass::Uniform); 375 const Id id = OpVariable(type, spv::StorageClass::Uniform);
376 AddGlobalVariable(Name(id, fmt::format("cbuf_{}", index))); 376 AddGlobalVariable(Name(id, fmt::format("cbuf_{}", index)));
377 377
@@ -565,7 +565,7 @@ private:
565 const Id buffer_id = constant_buffers.at(cbuf->GetIndex()); 565 const Id buffer_id = constant_buffers.at(cbuf->GetIndex());
566 566
567 Id pointer{}; 567 Id pointer{};
568 if (device.IsExtScalarBlockLayoutSupported()) { 568 if (device.IsKhrUniformBufferStandardLayoutSupported()) {
569 const Id buffer_offset = Emit(OpShiftRightLogical( 569 const Id buffer_offset = Emit(OpShiftRightLogical(
570 t_uint, BitcastTo<Type::Uint>(Visit(offset)), Constant(t_uint, 2u))); 570 t_uint, BitcastTo<Type::Uint>(Visit(offset)), Constant(t_uint, 2u)));
571 pointer = Emit( 571 pointer = Emit(
@@ -944,6 +944,41 @@ private:
944 return {}; 944 return {};
945 } 945 }
946 946
947 Id AtomicImageAdd(Operation operation) {
948 UNIMPLEMENTED();
949 return {};
950 }
951
952 Id AtomicImageMin(Operation operation) {
953 UNIMPLEMENTED();
954 return {};
955 }
956
957 Id AtomicImageMax(Operation operation) {
958 UNIMPLEMENTED();
959 return {};
960 }
961
962 Id AtomicImageAnd(Operation operation) {
963 UNIMPLEMENTED();
964 return {};
965 }
966
967 Id AtomicImageOr(Operation operation) {
968 UNIMPLEMENTED();
969 return {};
970 }
971
972 Id AtomicImageXor(Operation operation) {
973 UNIMPLEMENTED();
974 return {};
975 }
976
977 Id AtomicImageExchange(Operation operation) {
978 UNIMPLEMENTED();
979 return {};
980 }
981
947 Id Branch(Operation operation) { 982 Id Branch(Operation operation) {
948 const auto target = std::get_if<ImmediateNode>(&*operation[0]); 983 const auto target = std::get_if<ImmediateNode>(&*operation[0]);
949 UNIMPLEMENTED_IF(!target); 984 UNIMPLEMENTED_IF(!target);
@@ -1366,6 +1401,13 @@ private:
1366 &SPIRVDecompiler::TexelFetch, 1401 &SPIRVDecompiler::TexelFetch,
1367 1402
1368 &SPIRVDecompiler::ImageStore, 1403 &SPIRVDecompiler::ImageStore,
1404 &SPIRVDecompiler::AtomicImageAdd,
1405 &SPIRVDecompiler::AtomicImageMin,
1406 &SPIRVDecompiler::AtomicImageMax,
1407 &SPIRVDecompiler::AtomicImageAnd,
1408 &SPIRVDecompiler::AtomicImageOr,
1409 &SPIRVDecompiler::AtomicImageXor,
1410 &SPIRVDecompiler::AtomicImageExchange,
1369 1411
1370 &SPIRVDecompiler::Branch, 1412 &SPIRVDecompiler::Branch,
1371 &SPIRVDecompiler::BranchIndirect, 1413 &SPIRVDecompiler::BranchIndirect,
diff --git a/src/video_core/shader/decode/image.cpp b/src/video_core/shader/decode/image.cpp
index 008109a99..d54fb88c9 100644
--- a/src/video_core/shader/decode/image.cpp
+++ b/src/video_core/shader/decode/image.cpp
@@ -44,7 +44,6 @@ u32 ShaderIR::DecodeImage(NodeBlock& bb, u32 pc) {
44 switch (opcode->get().GetId()) { 44 switch (opcode->get().GetId()) {
45 case OpCode::Id::SUST: { 45 case OpCode::Id::SUST: {
46 UNIMPLEMENTED_IF(instr.sust.mode != Tegra::Shader::SurfaceDataMode::P); 46 UNIMPLEMENTED_IF(instr.sust.mode != Tegra::Shader::SurfaceDataMode::P);
47 UNIMPLEMENTED_IF(instr.sust.image_type == Tegra::Shader::ImageType::TextureBuffer);
48 UNIMPLEMENTED_IF(instr.sust.out_of_bounds_store != Tegra::Shader::OutOfBoundsStore::Ignore); 47 UNIMPLEMENTED_IF(instr.sust.out_of_bounds_store != Tegra::Shader::OutOfBoundsStore::Ignore);
49 UNIMPLEMENTED_IF(instr.sust.component_mask_selector != 0xf); // Ensure we have an RGBA store 48 UNIMPLEMENTED_IF(instr.sust.component_mask_selector != 0xf); // Ensure we have an RGBA store
50 49
@@ -66,8 +65,46 @@ u32 ShaderIR::DecodeImage(NodeBlock& bb, u32 pc) {
66 image.MarkWrite(); 65 image.MarkWrite();
67 66
68 MetaImage meta{image, values}; 67 MetaImage meta{image, values};
69 const Node store{Operation(OperationCode::ImageStore, meta, std::move(coords))}; 68 bb.push_back(Operation(OperationCode::ImageStore, meta, std::move(coords)));
70 bb.push_back(store); 69 break;
70 }
71 case OpCode::Id::SUATOM: {
72 UNIMPLEMENTED_IF(instr.suatom_d.is_ba != 0);
73
74 Node value = GetRegister(instr.gpr0);
75
76 std::vector<Node> coords;
77 const std::size_t num_coords{GetImageTypeNumCoordinates(instr.sust.image_type)};
78 for (std::size_t i = 0; i < num_coords; ++i) {
79 coords.push_back(GetRegister(instr.gpr8.Value() + i));
80 }
81
82 const OperationCode operation_code = [instr] {
83 switch (instr.suatom_d.operation) {
84 case Tegra::Shader::ImageAtomicOperation::Add:
85 return OperationCode::AtomicImageAdd;
86 case Tegra::Shader::ImageAtomicOperation::Min:
87 return OperationCode::AtomicImageMin;
88 case Tegra::Shader::ImageAtomicOperation::Max:
89 return OperationCode::AtomicImageMax;
90 case Tegra::Shader::ImageAtomicOperation::And:
91 return OperationCode::AtomicImageAnd;
92 case Tegra::Shader::ImageAtomicOperation::Or:
93 return OperationCode::AtomicImageOr;
94 case Tegra::Shader::ImageAtomicOperation::Xor:
95 return OperationCode::AtomicImageXor;
96 case Tegra::Shader::ImageAtomicOperation::Exch:
97 return OperationCode::AtomicImageExchange;
98 default:
99 UNIMPLEMENTED_MSG("Unimplemented operation={}",
100 static_cast<u32>(instr.suatom_d.operation.Value()));
101 return OperationCode::AtomicImageAdd;
102 }
103 }();
104
105 const auto& image{GetImage(instr.image, instr.suatom_d.image_type, instr.suatom_d.size)};
106 MetaImage meta{image, {std::move(value)}};
107 SetRegister(bb, instr.gpr0, Operation(operation_code, meta, std::move(coords)));
71 break; 108 break;
72 } 109 }
73 default: 110 default:
@@ -77,38 +114,51 @@ u32 ShaderIR::DecodeImage(NodeBlock& bb, u32 pc) {
77 return pc; 114 return pc;
78} 115}
79 116
80Image& ShaderIR::GetImage(Tegra::Shader::Image image, Tegra::Shader::ImageType type) { 117Image& ShaderIR::GetImage(Tegra::Shader::Image image, Tegra::Shader::ImageType type,
81 const auto offset{static_cast<u64>(image.index.Value())}; 118 std::optional<Tegra::Shader::ImageAtomicSize> size) {
82 119 const auto offset{static_cast<std::size_t>(image.index.Value())};
83 // If this image has already been used, return the existing mapping. 120 if (const auto image = TryUseExistingImage(offset, type, size)) {
84 const auto it = used_images.find(offset); 121 return *image;
85 if (it != used_images.end()) {
86 ASSERT(it->second.GetType() == type);
87 return it->second;
88 } 122 }
89 123
90 // Otherwise create a new mapping for this image.
91 const std::size_t next_index{used_images.size()}; 124 const std::size_t next_index{used_images.size()};
92 return used_images.emplace(offset, Image{offset, next_index, type}).first->second; 125 return used_images.emplace(offset, Image{offset, next_index, type, size}).first->second;
93} 126}
94 127
95Image& ShaderIR::GetBindlessImage(Tegra::Shader::Register reg, Tegra::Shader::ImageType type) { 128Image& ShaderIR::GetBindlessImage(Tegra::Shader::Register reg, Tegra::Shader::ImageType type,
129 std::optional<Tegra::Shader::ImageAtomicSize> size) {
96 const Node image_register{GetRegister(reg)}; 130 const Node image_register{GetRegister(reg)};
97 const auto [base_image, cbuf_index, cbuf_offset]{ 131 const auto [base_image, cbuf_index, cbuf_offset]{
98 TrackCbuf(image_register, global_code, static_cast<s64>(global_code.size()))}; 132 TrackCbuf(image_register, global_code, static_cast<s64>(global_code.size()))};
99 const auto cbuf_key{(static_cast<u64>(cbuf_index) << 32) | static_cast<u64>(cbuf_offset)}; 133 const auto cbuf_key{(static_cast<u64>(cbuf_index) << 32) | static_cast<u64>(cbuf_offset)};
100 134
101 // If this image has already been used, return the existing mapping. 135 if (const auto image = TryUseExistingImage(cbuf_key, type, size)) {
102 const auto it = used_images.find(cbuf_key); 136 return *image;
103 if (it != used_images.end()) {
104 ASSERT(it->second.GetType() == type);
105 return it->second;
106 } 137 }
107 138
108 // Otherwise create a new mapping for this image.
109 const std::size_t next_index{used_images.size()}; 139 const std::size_t next_index{used_images.size()};
110 return used_images.emplace(cbuf_key, Image{cbuf_index, cbuf_offset, next_index, type}) 140 return used_images.emplace(cbuf_key, Image{cbuf_index, cbuf_offset, next_index, type, size})
111 .first->second; 141 .first->second;
112} 142}
113 143
144Image* ShaderIR::TryUseExistingImage(u64 offset, Tegra::Shader::ImageType type,
145 std::optional<Tegra::Shader::ImageAtomicSize> size) {
146 auto it = used_images.find(offset);
147 if (it == used_images.end()) {
148 return nullptr;
149 }
150 auto& image = it->second;
151 ASSERT(image.GetType() == type);
152
153 if (size) {
154 // We know the size, if it's known it has to be the same as before, otherwise we can set it.
155 if (image.IsSizeKnown()) {
156 ASSERT(image.GetSize() == size);
157 } else {
158 image.SetSize(*size);
159 }
160 }
161 return &image;
162}
163
114} // namespace VideoCommon::Shader 164} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/node.h b/src/video_core/shader/node.h
index b29aedce8..b47b201cf 100644
--- a/src/video_core/shader/node.h
+++ b/src/video_core/shader/node.h
@@ -7,6 +7,7 @@
7#include <array> 7#include <array>
8#include <cstddef> 8#include <cstddef>
9#include <memory> 9#include <memory>
10#include <optional>
10#include <string> 11#include <string>
11#include <tuple> 12#include <tuple>
12#include <utility> 13#include <utility>
@@ -148,7 +149,14 @@ enum class OperationCode {
148 TextureQueryLod, /// (MetaTexture, float[N] coords) -> float4 149 TextureQueryLod, /// (MetaTexture, float[N] coords) -> float4
149 TexelFetch, /// (MetaTexture, int[N], int) -> float4 150 TexelFetch, /// (MetaTexture, int[N], int) -> float4
150 151
151 ImageStore, /// (MetaImage, float[N] coords) -> void 152 ImageStore, /// (MetaImage, int[N] values) -> void
153 AtomicImageAdd, /// (MetaImage, int[N] coords) -> void
154 AtomicImageMin, /// (MetaImage, int[N] coords) -> void
155 AtomicImageMax, /// (MetaImage, int[N] coords) -> void
156 AtomicImageAnd, /// (MetaImage, int[N] coords) -> void
157 AtomicImageOr, /// (MetaImage, int[N] coords) -> void
158 AtomicImageXor, /// (MetaImage, int[N] coords) -> void
159 AtomicImageExchange, /// (MetaImage, int[N] coords) -> void
152 160
153 Branch, /// (uint branch_target) -> void 161 Branch, /// (uint branch_target) -> void
154 BranchIndirect, /// (uint branch_target) -> void 162 BranchIndirect, /// (uint branch_target) -> void
@@ -275,25 +283,32 @@ private:
275 283
276class Image final { 284class Image final {
277public: 285public:
278 constexpr explicit Image(u64 offset, std::size_t index, Tegra::Shader::ImageType type) 286 constexpr explicit Image(std::size_t offset, std::size_t index, Tegra::Shader::ImageType type,
279 : offset{offset}, index{index}, type{type}, is_bindless{false} {} 287 std::optional<Tegra::Shader::ImageAtomicSize> size)
288 : offset{offset}, index{index}, type{type}, is_bindless{false}, size{size} {}
280 289
281 constexpr explicit Image(u32 cbuf_index, u32 cbuf_offset, std::size_t index, 290 constexpr explicit Image(u32 cbuf_index, u32 cbuf_offset, std::size_t index,
282 Tegra::Shader::ImageType type) 291 Tegra::Shader::ImageType type,
292 std::optional<Tegra::Shader::ImageAtomicSize> size)
283 : offset{(static_cast<u64>(cbuf_index) << 32) | cbuf_offset}, index{index}, type{type}, 293 : offset{(static_cast<u64>(cbuf_index) << 32) | cbuf_offset}, index{index}, type{type},
284 is_bindless{true} {} 294 is_bindless{true}, size{size} {}
285 295
286 constexpr explicit Image(std::size_t offset, std::size_t index, Tegra::Shader::ImageType type, 296 constexpr explicit Image(std::size_t offset, std::size_t index, Tegra::Shader::ImageType type,
287 bool is_bindless, bool is_written, bool is_read) 297 bool is_bindless, bool is_written, bool is_read,
298 std::optional<Tegra::Shader::ImageAtomicSize> size)
288 : offset{offset}, index{index}, type{type}, is_bindless{is_bindless}, 299 : offset{offset}, index{index}, type{type}, is_bindless{is_bindless},
289 is_written{is_written}, is_read{is_read} {} 300 is_written{is_written}, is_read{is_read}, size{size} {}
301
302 void MarkWrite() {
303 is_written = true;
304 }
290 305
291 void MarkRead() { 306 void MarkRead() {
292 is_read = true; 307 is_read = true;
293 } 308 }
294 309
295 void MarkWrite() { 310 void SetSize(Tegra::Shader::ImageAtomicSize size_) {
296 is_written = true; 311 size = size_;
297 } 312 }
298 313
299 constexpr std::size_t GetOffset() const { 314 constexpr std::size_t GetOffset() const {
@@ -312,25 +327,39 @@ public:
312 return is_bindless; 327 return is_bindless;
313 } 328 }
314 329
315 constexpr bool IsRead() const {
316 return is_read;
317 }
318
319 constexpr bool IsWritten() const { 330 constexpr bool IsWritten() const {
320 return is_written; 331 return is_written;
321 } 332 }
322 333
334 constexpr bool IsRead() const {
335 return is_read;
336 }
337
323 constexpr std::pair<u32, u32> GetBindlessCBuf() const { 338 constexpr std::pair<u32, u32> GetBindlessCBuf() const {
324 return {static_cast<u32>(offset >> 32), static_cast<u32>(offset)}; 339 return {static_cast<u32>(offset >> 32), static_cast<u32>(offset)};
325 } 340 }
326 341
342 constexpr bool IsSizeKnown() const {
343 return size.has_value();
344 }
345
346 constexpr Tegra::Shader::ImageAtomicSize GetSize() const {
347 return size.value();
348 }
349
350 constexpr bool operator<(const Image& rhs) const {
351 return std::tie(offset, index, type, size, is_bindless) <
352 std::tie(rhs.offset, rhs.index, rhs.type, rhs.size, rhs.is_bindless);
353 }
354
327private: 355private:
328 u64 offset{}; 356 u64 offset{};
329 std::size_t index{}; 357 std::size_t index{};
330 Tegra::Shader::ImageType type{}; 358 Tegra::Shader::ImageType type{};
331 bool is_bindless{}; 359 bool is_bindless{};
332 bool is_read{};
333 bool is_written{}; 360 bool is_written{};
361 bool is_read{};
362 std::optional<Tegra::Shader::ImageAtomicSize> size{};
334}; 363};
335 364
336struct GlobalMemoryBase { 365struct GlobalMemoryBase {
diff --git a/src/video_core/shader/shader_ir.h b/src/video_core/shader/shader_ir.h
index 0f891eace..62816bd56 100644
--- a/src/video_core/shader/shader_ir.h
+++ b/src/video_core/shader/shader_ir.h
@@ -272,10 +272,16 @@ private:
272 bool is_shadow); 272 bool is_shadow);
273 273
274 /// Accesses an image. 274 /// Accesses an image.
275 Image& GetImage(Tegra::Shader::Image image, Tegra::Shader::ImageType type); 275 Image& GetImage(Tegra::Shader::Image image, Tegra::Shader::ImageType type,
276 std::optional<Tegra::Shader::ImageAtomicSize> size = {});
276 277
277 /// Access a bindless image sampler. 278 /// Access a bindless image sampler.
278 Image& GetBindlessImage(Tegra::Shader::Register reg, Tegra::Shader::ImageType type); 279 Image& GetBindlessImage(Tegra::Shader::Register reg, Tegra::Shader::ImageType type,
280 std::optional<Tegra::Shader::ImageAtomicSize> size = {});
281
282 /// Tries to access an existing image, updating it's state as needed
283 Image* TryUseExistingImage(u64 offset, Tegra::Shader::ImageType type,
284 std::optional<Tegra::Shader::ImageAtomicSize> size);
279 285
280 /// Extracts a sequence of bits from a node 286 /// Extracts a sequence of bits from a node
281 Node BitfieldExtract(Node value, u32 offset, u32 bits); 287 Node BitfieldExtract(Node value, u32 offset, u32 bits);
diff --git a/src/video_core/surface.cpp b/src/video_core/surface.cpp
index 4ceb219be..53d0142cb 100644
--- a/src/video_core/surface.cpp
+++ b/src/video_core/surface.cpp
@@ -513,6 +513,26 @@ bool IsPixelFormatASTC(PixelFormat format) {
513 } 513 }
514} 514}
515 515
516bool IsPixelFormatSRGB(PixelFormat format) {
517 switch (format) {
518 case PixelFormat::RGBA8_SRGB:
519 case PixelFormat::BGRA8_SRGB:
520 case PixelFormat::DXT1_SRGB:
521 case PixelFormat::DXT23_SRGB:
522 case PixelFormat::DXT45_SRGB:
523 case PixelFormat::BC7U_SRGB:
524 case PixelFormat::ASTC_2D_4X4_SRGB:
525 case PixelFormat::ASTC_2D_8X8_SRGB:
526 case PixelFormat::ASTC_2D_8X5_SRGB:
527 case PixelFormat::ASTC_2D_5X4_SRGB:
528 case PixelFormat::ASTC_2D_5X5_SRGB:
529 case PixelFormat::ASTC_2D_10X8_SRGB:
530 return true;
531 default:
532 return false;
533 }
534}
535
516std::pair<u32, u32> GetASTCBlockSize(PixelFormat format) { 536std::pair<u32, u32> GetASTCBlockSize(PixelFormat format) {
517 return {GetDefaultBlockWidth(format), GetDefaultBlockHeight(format)}; 537 return {GetDefaultBlockWidth(format), GetDefaultBlockHeight(format)};
518} 538}
diff --git a/src/video_core/surface.h b/src/video_core/surface.h
index 83f31c12c..19268b7cd 100644
--- a/src/video_core/surface.h
+++ b/src/video_core/surface.h
@@ -547,6 +547,8 @@ SurfaceType GetFormatType(PixelFormat pixel_format);
547 547
548bool IsPixelFormatASTC(PixelFormat format); 548bool IsPixelFormatASTC(PixelFormat format);
549 549
550bool IsPixelFormatSRGB(PixelFormat format);
551
550std::pair<u32, u32> GetASTCBlockSize(PixelFormat format); 552std::pair<u32, u32> GetASTCBlockSize(PixelFormat format);
551 553
552/// Returns true if the specified PixelFormat is a BCn format, e.g. DXT or DXN 554/// Returns true if the specified PixelFormat is a BCn format, e.g. DXT or DXN