diff options
20 files changed, 759 insertions, 194 deletions
diff --git a/externals/Vulkan-Headers b/externals/Vulkan-Headers | |||
| Subproject d05c8df88da98ec1ab3bc600d7f5783b4060895 | Subproject fd568d51ed3d9bc6132e1639d7492453a08fe1b | ||
diff --git a/src/core/hle/service/acc/acc.cpp b/src/core/hle/service/acc/acc.cpp index c01ee3eda..a7c55e116 100644 --- a/src/core/hle/service/acc/acc.cpp +++ b/src/core/hle/service/acc/acc.cpp | |||
| @@ -31,6 +31,9 @@ | |||
| 31 | 31 | ||
| 32 | namespace Service::Account { | 32 | namespace Service::Account { |
| 33 | 33 | ||
| 34 | constexpr ResultCode ERR_INVALID_BUFFER_SIZE{ErrorModule::Account, 30}; | ||
| 35 | constexpr ResultCode ERR_FAILED_SAVE_DATA{ErrorModule::Account, 100}; | ||
| 36 | |||
| 34 | static std::string GetImagePath(Common::UUID uuid) { | 37 | static std::string GetImagePath(Common::UUID uuid) { |
| 35 | return FileUtil::GetUserPath(FileUtil::UserPath::NANDDir) + | 38 | return FileUtil::GetUserPath(FileUtil::UserPath::NANDDir) + |
| 36 | "/system/save/8000000000000010/su/avators/" + uuid.FormatSwitch() + ".jpg"; | 39 | "/system/save/8000000000000010/su/avators/" + uuid.FormatSwitch() + ".jpg"; |
| @@ -41,20 +44,31 @@ static constexpr u32 SanitizeJPEGSize(std::size_t size) { | |||
| 41 | return static_cast<u32>(std::min(size, max_jpeg_image_size)); | 44 | return static_cast<u32>(std::min(size, max_jpeg_image_size)); |
| 42 | } | 45 | } |
| 43 | 46 | ||
| 44 | class IProfile final : public ServiceFramework<IProfile> { | 47 | class IProfileCommon : public ServiceFramework<IProfileCommon> { |
| 45 | public: | 48 | public: |
| 46 | explicit IProfile(Common::UUID user_id, ProfileManager& profile_manager) | 49 | explicit IProfileCommon(const char* name, bool editor_commands, Common::UUID user_id, |
| 47 | : ServiceFramework("IProfile"), profile_manager(profile_manager), user_id(user_id) { | 50 | ProfileManager& profile_manager) |
| 51 | : ServiceFramework(name), profile_manager(profile_manager), user_id(user_id) { | ||
| 48 | static const FunctionInfo functions[] = { | 52 | static const FunctionInfo functions[] = { |
| 49 | {0, &IProfile::Get, "Get"}, | 53 | {0, &IProfileCommon::Get, "Get"}, |
| 50 | {1, &IProfile::GetBase, "GetBase"}, | 54 | {1, &IProfileCommon::GetBase, "GetBase"}, |
| 51 | {10, &IProfile::GetImageSize, "GetImageSize"}, | 55 | {10, &IProfileCommon::GetImageSize, "GetImageSize"}, |
| 52 | {11, &IProfile::LoadImage, "LoadImage"}, | 56 | {11, &IProfileCommon::LoadImage, "LoadImage"}, |
| 53 | }; | 57 | }; |
| 58 | |||
| 54 | RegisterHandlers(functions); | 59 | RegisterHandlers(functions); |
| 60 | |||
| 61 | if (editor_commands) { | ||
| 62 | static const FunctionInfo editor_functions[] = { | ||
| 63 | {100, &IProfileCommon::Store, "Store"}, | ||
| 64 | {101, &IProfileCommon::StoreWithImage, "StoreWithImage"}, | ||
| 65 | }; | ||
| 66 | |||
| 67 | RegisterHandlers(editor_functions); | ||
| 68 | } | ||
| 55 | } | 69 | } |
| 56 | 70 | ||
| 57 | private: | 71 | protected: |
| 58 | void Get(Kernel::HLERequestContext& ctx) { | 72 | void Get(Kernel::HLERequestContext& ctx) { |
| 59 | LOG_INFO(Service_ACC, "called user_id={}", user_id.Format()); | 73 | LOG_INFO(Service_ACC, "called user_id={}", user_id.Format()); |
| 60 | ProfileBase profile_base{}; | 74 | ProfileBase profile_base{}; |
| @@ -127,10 +141,91 @@ private: | |||
| 127 | } | 141 | } |
| 128 | } | 142 | } |
| 129 | 143 | ||
| 130 | const ProfileManager& profile_manager; | 144 | void Store(Kernel::HLERequestContext& ctx) { |
| 145 | IPC::RequestParser rp{ctx}; | ||
| 146 | const auto base = rp.PopRaw<ProfileBase>(); | ||
| 147 | |||
| 148 | const auto user_data = ctx.ReadBuffer(); | ||
| 149 | |||
| 150 | LOG_DEBUG(Service_ACC, "called, username='{}', timestamp={:016X}, uuid={}", | ||
| 151 | Common::StringFromFixedZeroTerminatedBuffer( | ||
| 152 | reinterpret_cast<const char*>(base.username.data()), base.username.size()), | ||
| 153 | base.timestamp, base.user_uuid.Format()); | ||
| 154 | |||
| 155 | if (user_data.size() < sizeof(ProfileData)) { | ||
| 156 | LOG_ERROR(Service_ACC, "ProfileData buffer too small!"); | ||
| 157 | IPC::ResponseBuilder rb{ctx, 2}; | ||
| 158 | rb.Push(ERR_INVALID_BUFFER_SIZE); | ||
| 159 | return; | ||
| 160 | } | ||
| 161 | |||
| 162 | ProfileData data; | ||
| 163 | std::memcpy(&data, user_data.data(), sizeof(ProfileData)); | ||
| 164 | |||
| 165 | if (!profile_manager.SetProfileBaseAndData(user_id, base, data)) { | ||
| 166 | LOG_ERROR(Service_ACC, "Failed to update profile data and base!"); | ||
| 167 | IPC::ResponseBuilder rb{ctx, 2}; | ||
| 168 | rb.Push(ERR_FAILED_SAVE_DATA); | ||
| 169 | return; | ||
| 170 | } | ||
| 171 | |||
| 172 | IPC::ResponseBuilder rb{ctx, 2}; | ||
| 173 | rb.Push(RESULT_SUCCESS); | ||
| 174 | } | ||
| 175 | |||
| 176 | void StoreWithImage(Kernel::HLERequestContext& ctx) { | ||
| 177 | IPC::RequestParser rp{ctx}; | ||
| 178 | const auto base = rp.PopRaw<ProfileBase>(); | ||
| 179 | |||
| 180 | const auto user_data = ctx.ReadBuffer(); | ||
| 181 | const auto image_data = ctx.ReadBuffer(1); | ||
| 182 | |||
| 183 | LOG_DEBUG(Service_ACC, "called, username='{}', timestamp={:016X}, uuid={}", | ||
| 184 | Common::StringFromFixedZeroTerminatedBuffer( | ||
| 185 | reinterpret_cast<const char*>(base.username.data()), base.username.size()), | ||
| 186 | base.timestamp, base.user_uuid.Format()); | ||
| 187 | |||
| 188 | if (user_data.size() < sizeof(ProfileData)) { | ||
| 189 | LOG_ERROR(Service_ACC, "ProfileData buffer too small!"); | ||
| 190 | IPC::ResponseBuilder rb{ctx, 2}; | ||
| 191 | rb.Push(ERR_INVALID_BUFFER_SIZE); | ||
| 192 | return; | ||
| 193 | } | ||
| 194 | |||
| 195 | ProfileData data; | ||
| 196 | std::memcpy(&data, user_data.data(), sizeof(ProfileData)); | ||
| 197 | |||
| 198 | FileUtil::IOFile image(GetImagePath(user_id), "wb"); | ||
| 199 | |||
| 200 | if (!image.IsOpen() || !image.Resize(image_data.size()) || | ||
| 201 | image.WriteBytes(image_data.data(), image_data.size()) != image_data.size() || | ||
| 202 | !profile_manager.SetProfileBaseAndData(user_id, base, data)) { | ||
| 203 | LOG_ERROR(Service_ACC, "Failed to update profile data, base, and image!"); | ||
| 204 | IPC::ResponseBuilder rb{ctx, 2}; | ||
| 205 | rb.Push(ERR_FAILED_SAVE_DATA); | ||
| 206 | return; | ||
| 207 | } | ||
| 208 | |||
| 209 | IPC::ResponseBuilder rb{ctx, 2}; | ||
| 210 | rb.Push(RESULT_SUCCESS); | ||
| 211 | } | ||
| 212 | |||
| 213 | ProfileManager& profile_manager; | ||
| 131 | Common::UUID user_id; ///< The user id this profile refers to. | 214 | Common::UUID user_id; ///< The user id this profile refers to. |
| 132 | }; | 215 | }; |
| 133 | 216 | ||
| 217 | class IProfile final : public IProfileCommon { | ||
| 218 | public: | ||
| 219 | IProfile(Common::UUID user_id, ProfileManager& profile_manager) | ||
| 220 | : IProfileCommon("IProfile", false, user_id, profile_manager) {} | ||
| 221 | }; | ||
| 222 | |||
| 223 | class IProfileEditor final : public IProfileCommon { | ||
| 224 | public: | ||
| 225 | IProfileEditor(Common::UUID user_id, ProfileManager& profile_manager) | ||
| 226 | : IProfileCommon("IProfileEditor", true, user_id, profile_manager) {} | ||
| 227 | }; | ||
| 228 | |||
| 134 | class IManagerForApplication final : public ServiceFramework<IManagerForApplication> { | 229 | class IManagerForApplication final : public ServiceFramework<IManagerForApplication> { |
| 135 | public: | 230 | public: |
| 136 | IManagerForApplication() : ServiceFramework("IManagerForApplication") { | 231 | IManagerForApplication() : ServiceFramework("IManagerForApplication") { |
| @@ -322,6 +417,17 @@ void Module::Interface::IsUserAccountSwitchLocked(Kernel::HLERequestContext& ctx | |||
| 322 | rb.Push(is_locked); | 417 | rb.Push(is_locked); |
| 323 | } | 418 | } |
| 324 | 419 | ||
| 420 | void Module::Interface::GetProfileEditor(Kernel::HLERequestContext& ctx) { | ||
| 421 | IPC::RequestParser rp{ctx}; | ||
| 422 | Common::UUID user_id = rp.PopRaw<Common::UUID>(); | ||
| 423 | |||
| 424 | LOG_DEBUG(Service_ACC, "called, user_id={}", user_id.Format()); | ||
| 425 | |||
| 426 | IPC::ResponseBuilder rb{ctx, 2, 0, 1}; | ||
| 427 | rb.Push(RESULT_SUCCESS); | ||
| 428 | rb.PushIpcInterface<IProfileEditor>(user_id, *profile_manager); | ||
| 429 | } | ||
| 430 | |||
| 325 | void Module::Interface::TrySelectUserWithoutInteraction(Kernel::HLERequestContext& ctx) { | 431 | void Module::Interface::TrySelectUserWithoutInteraction(Kernel::HLERequestContext& ctx) { |
| 326 | LOG_DEBUG(Service_ACC, "called"); | 432 | LOG_DEBUG(Service_ACC, "called"); |
| 327 | // A u8 is passed into this function which we can safely ignore. It's to determine if we have | 433 | // A u8 is passed into this function which we can safely ignore. It's to determine if we have |
diff --git a/src/core/hle/service/acc/acc.h b/src/core/hle/service/acc/acc.h index f651773b7..7a7dc9ec6 100644 --- a/src/core/hle/service/acc/acc.h +++ b/src/core/hle/service/acc/acc.h | |||
| @@ -32,6 +32,7 @@ public: | |||
| 32 | void IsUserRegistrationRequestPermitted(Kernel::HLERequestContext& ctx); | 32 | void IsUserRegistrationRequestPermitted(Kernel::HLERequestContext& ctx); |
| 33 | void TrySelectUserWithoutInteraction(Kernel::HLERequestContext& ctx); | 33 | void TrySelectUserWithoutInteraction(Kernel::HLERequestContext& ctx); |
| 34 | void IsUserAccountSwitchLocked(Kernel::HLERequestContext& ctx); | 34 | void IsUserAccountSwitchLocked(Kernel::HLERequestContext& ctx); |
| 35 | void GetProfileEditor(Kernel::HLERequestContext& ctx); | ||
| 35 | 36 | ||
| 36 | private: | 37 | private: |
| 37 | ResultCode InitializeApplicationInfoBase(u64 process_id); | 38 | ResultCode InitializeApplicationInfoBase(u64 process_id); |
diff --git a/src/core/hle/service/acc/acc_su.cpp b/src/core/hle/service/acc/acc_su.cpp index 1b7ec3ed0..0d1663657 100644 --- a/src/core/hle/service/acc/acc_su.cpp +++ b/src/core/hle/service/acc/acc_su.cpp | |||
| @@ -41,7 +41,7 @@ ACC_SU::ACC_SU(std::shared_ptr<Module> module, std::shared_ptr<ProfileManager> p | |||
| 41 | {202, nullptr, "CancelUserRegistration"}, | 41 | {202, nullptr, "CancelUserRegistration"}, |
| 42 | {203, nullptr, "DeleteUser"}, | 42 | {203, nullptr, "DeleteUser"}, |
| 43 | {204, nullptr, "SetUserPosition"}, | 43 | {204, nullptr, "SetUserPosition"}, |
| 44 | {205, nullptr, "GetProfileEditor"}, | 44 | {205, &ACC_SU::GetProfileEditor, "GetProfileEditor"}, |
| 45 | {206, nullptr, "CompleteUserRegistrationForcibly"}, | 45 | {206, nullptr, "CompleteUserRegistrationForcibly"}, |
| 46 | {210, nullptr, "CreateFloatingRegistrationRequest"}, | 46 | {210, nullptr, "CreateFloatingRegistrationRequest"}, |
| 47 | {230, nullptr, "AuthenticateServiceAsync"}, | 47 | {230, nullptr, "AuthenticateServiceAsync"}, |
diff --git a/src/core/hle/service/acc/profile_manager.cpp b/src/core/hle/service/acc/profile_manager.cpp index 49aa5908b..8f9986326 100644 --- a/src/core/hle/service/acc/profile_manager.cpp +++ b/src/core/hle/service/acc/profile_manager.cpp | |||
| @@ -305,6 +305,17 @@ bool ProfileManager::SetProfileBase(UUID uuid, const ProfileBase& profile_new) { | |||
| 305 | return true; | 305 | return true; |
| 306 | } | 306 | } |
| 307 | 307 | ||
| 308 | bool ProfileManager::SetProfileBaseAndData(Common::UUID uuid, const ProfileBase& profile_new, | ||
| 309 | const ProfileData& data_new) { | ||
| 310 | const auto index = GetUserIndex(uuid); | ||
| 311 | if (index.has_value() && SetProfileBase(uuid, profile_new)) { | ||
| 312 | profiles[*index].data = data_new; | ||
| 313 | return true; | ||
| 314 | } | ||
| 315 | |||
| 316 | return false; | ||
| 317 | } | ||
| 318 | |||
| 308 | void ProfileManager::ParseUserSaveFile() { | 319 | void ProfileManager::ParseUserSaveFile() { |
| 309 | FileUtil::IOFile save(FileUtil::GetUserPath(FileUtil::UserPath::NANDDir) + | 320 | FileUtil::IOFile save(FileUtil::GetUserPath(FileUtil::UserPath::NANDDir) + |
| 310 | ACC_SAVE_AVATORS_BASE_PATH + "profiles.dat", | 321 | ACC_SAVE_AVATORS_BASE_PATH + "profiles.dat", |
diff --git a/src/core/hle/service/acc/profile_manager.h b/src/core/hle/service/acc/profile_manager.h index fd7abb541..5a6d28925 100644 --- a/src/core/hle/service/acc/profile_manager.h +++ b/src/core/hle/service/acc/profile_manager.h | |||
| @@ -91,6 +91,8 @@ public: | |||
| 91 | 91 | ||
| 92 | bool RemoveUser(Common::UUID uuid); | 92 | bool RemoveUser(Common::UUID uuid); |
| 93 | bool SetProfileBase(Common::UUID uuid, const ProfileBase& profile_new); | 93 | bool SetProfileBase(Common::UUID uuid, const ProfileBase& profile_new); |
| 94 | bool SetProfileBaseAndData(Common::UUID uuid, const ProfileBase& profile_new, | ||
| 95 | const ProfileData& data_new); | ||
| 94 | 96 | ||
| 95 | private: | 97 | private: |
| 96 | void ParseUserSaveFile(); | 98 | void ParseUserSaveFile(); |
diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp index 965c4c06b..c7a3c85a0 100644 --- a/src/video_core/engines/maxwell_3d.cpp +++ b/src/video_core/engines/maxwell_3d.cpp | |||
| @@ -332,6 +332,10 @@ void Maxwell3D::CallMethod(const GPU::MethodCall& method_call) { | |||
| 332 | ProcessMacroBind(method_call.argument); | 332 | ProcessMacroBind(method_call.argument); |
| 333 | break; | 333 | break; |
| 334 | } | 334 | } |
| 335 | case MAXWELL3D_REG_INDEX(firmware[4]): { | ||
| 336 | ProcessFirmwareCall4(); | ||
| 337 | break; | ||
| 338 | } | ||
| 335 | case MAXWELL3D_REG_INDEX(const_buffer.cb_data[0]): | 339 | case MAXWELL3D_REG_INDEX(const_buffer.cb_data[0]): |
| 336 | case MAXWELL3D_REG_INDEX(const_buffer.cb_data[1]): | 340 | case MAXWELL3D_REG_INDEX(const_buffer.cb_data[1]): |
| 337 | case MAXWELL3D_REG_INDEX(const_buffer.cb_data[2]): | 341 | case MAXWELL3D_REG_INDEX(const_buffer.cb_data[2]): |
| @@ -422,6 +426,14 @@ void Maxwell3D::ProcessMacroBind(u32 data) { | |||
| 422 | macro_positions[regs.macros.entry++] = data; | 426 | macro_positions[regs.macros.entry++] = data; |
| 423 | } | 427 | } |
| 424 | 428 | ||
| 429 | void Maxwell3D::ProcessFirmwareCall4() { | ||
| 430 | LOG_WARNING(HW_GPU, "(STUBBED) called"); | ||
| 431 | |||
| 432 | // Firmware call 4 is a blob that changes some registers depending on its parameters. | ||
| 433 | // These registers don't affect emulation and so are stubbed by setting 0xd00 to 1. | ||
| 434 | regs.reg_array[0xd00] = 1; | ||
| 435 | } | ||
| 436 | |||
| 425 | void Maxwell3D::ProcessQueryGet() { | 437 | void Maxwell3D::ProcessQueryGet() { |
| 426 | const GPUVAddr sequence_address{regs.query.QueryAddress()}; | 438 | const GPUVAddr sequence_address{regs.query.QueryAddress()}; |
| 427 | // Since the sequence address is given as a GPU VAddr, we have to convert it to an application | 439 | // Since the sequence address is given as a GPU VAddr, we have to convert it to an application |
diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h index f67a5389f..e5ec90717 100644 --- a/src/video_core/engines/maxwell_3d.h +++ b/src/video_core/engines/maxwell_3d.h | |||
| @@ -1089,7 +1089,9 @@ public: | |||
| 1089 | INSERT_PADDING_WORDS(14); | 1089 | INSERT_PADDING_WORDS(14); |
| 1090 | } shader_config[MaxShaderProgram]; | 1090 | } shader_config[MaxShaderProgram]; |
| 1091 | 1091 | ||
| 1092 | INSERT_PADDING_WORDS(0x80); | 1092 | INSERT_PADDING_WORDS(0x60); |
| 1093 | |||
| 1094 | u32 firmware[0x20]; | ||
| 1093 | 1095 | ||
| 1094 | struct { | 1096 | struct { |
| 1095 | u32 cb_size; | 1097 | u32 cb_size; |
| @@ -1319,6 +1321,9 @@ private: | |||
| 1319 | /// Handles writes to the macro bind register. | 1321 | /// Handles writes to the macro bind register. |
| 1320 | void ProcessMacroBind(u32 data); | 1322 | void ProcessMacroBind(u32 data); |
| 1321 | 1323 | ||
| 1324 | /// Handles firmware blob 4 | ||
| 1325 | void ProcessFirmwareCall4(); | ||
| 1326 | |||
| 1322 | /// Handles a write to the CLEAR_BUFFERS register. | 1327 | /// Handles a write to the CLEAR_BUFFERS register. |
| 1323 | void ProcessClearBuffers(); | 1328 | void ProcessClearBuffers(); |
| 1324 | 1329 | ||
| @@ -1431,6 +1436,7 @@ ASSERT_REG_POSITION(vertex_array[0], 0x700); | |||
| 1431 | ASSERT_REG_POSITION(independent_blend, 0x780); | 1436 | ASSERT_REG_POSITION(independent_blend, 0x780); |
| 1432 | ASSERT_REG_POSITION(vertex_array_limit[0], 0x7C0); | 1437 | ASSERT_REG_POSITION(vertex_array_limit[0], 0x7C0); |
| 1433 | ASSERT_REG_POSITION(shader_config[0], 0x800); | 1438 | ASSERT_REG_POSITION(shader_config[0], 0x800); |
| 1439 | ASSERT_REG_POSITION(firmware, 0x8C0); | ||
| 1434 | ASSERT_REG_POSITION(const_buffer, 0x8E0); | 1440 | ASSERT_REG_POSITION(const_buffer, 0x8E0); |
| 1435 | ASSERT_REG_POSITION(cb_bind[0], 0x904); | 1441 | ASSERT_REG_POSITION(cb_bind[0], 0x904); |
| 1436 | ASSERT_REG_POSITION(tex_cb_index, 0x982); | 1442 | ASSERT_REG_POSITION(tex_cb_index, 0x982); |
diff --git a/src/video_core/engines/shader_bytecode.h b/src/video_core/engines/shader_bytecode.h index bd8c1ada0..052e6d24e 100644 --- a/src/video_core/engines/shader_bytecode.h +++ b/src/video_core/engines/shader_bytecode.h | |||
| @@ -544,6 +544,28 @@ enum class VoteOperation : u64 { | |||
| 544 | Eq = 2, // allThreadsEqualNV | 544 | Eq = 2, // allThreadsEqualNV |
| 545 | }; | 545 | }; |
| 546 | 546 | ||
| 547 | enum class ImageAtomicSize : u64 { | ||
| 548 | U32 = 0, | ||
| 549 | S32 = 1, | ||
| 550 | U64 = 2, | ||
| 551 | F32 = 3, | ||
| 552 | S64 = 5, | ||
| 553 | SD32 = 6, | ||
| 554 | SD64 = 7, | ||
| 555 | }; | ||
| 556 | |||
| 557 | enum class ImageAtomicOperation : u64 { | ||
| 558 | Add = 0, | ||
| 559 | Min = 1, | ||
| 560 | Max = 2, | ||
| 561 | Inc = 3, | ||
| 562 | Dec = 4, | ||
| 563 | And = 5, | ||
| 564 | Or = 6, | ||
| 565 | Xor = 7, | ||
| 566 | Exch = 8, | ||
| 567 | }; | ||
| 568 | |||
| 547 | union Instruction { | 569 | union Instruction { |
| 548 | Instruction& operator=(const Instruction& instr) { | 570 | Instruction& operator=(const Instruction& instr) { |
| 549 | value = instr.value; | 571 | value = instr.value; |
| @@ -1392,6 +1414,14 @@ union Instruction { | |||
| 1392 | } sust; | 1414 | } sust; |
| 1393 | 1415 | ||
| 1394 | union { | 1416 | union { |
| 1417 | BitField<28, 1, u64> is_ba; | ||
| 1418 | BitField<51, 3, ImageAtomicSize> size; | ||
| 1419 | BitField<33, 3, ImageType> image_type; | ||
| 1420 | BitField<29, 4, ImageAtomicOperation> operation; | ||
| 1421 | BitField<49, 2, OutOfBoundsStore> out_of_bounds_store; | ||
| 1422 | } suatom_d; | ||
| 1423 | |||
| 1424 | union { | ||
| 1395 | BitField<20, 24, u64> target; | 1425 | BitField<20, 24, u64> target; |
| 1396 | BitField<5, 1, u64> constant_buffer; | 1426 | BitField<5, 1, u64> constant_buffer; |
| 1397 | 1427 | ||
| @@ -1543,6 +1573,7 @@ public: | |||
| 1543 | TMML_B, // Texture Mip Map Level | 1573 | TMML_B, // Texture Mip Map Level |
| 1544 | TMML, // Texture Mip Map Level | 1574 | TMML, // Texture Mip Map Level |
| 1545 | SUST, // Surface Store | 1575 | SUST, // Surface Store |
| 1576 | SUATOM, // Surface Atomic Operation | ||
| 1546 | EXIT, | 1577 | EXIT, |
| 1547 | NOP, | 1578 | NOP, |
| 1548 | IPA, | 1579 | IPA, |
| @@ -1826,6 +1857,7 @@ private: | |||
| 1826 | INST("110111110110----", Id::TMML_B, Type::Texture, "TMML_B"), | 1857 | INST("110111110110----", Id::TMML_B, Type::Texture, "TMML_B"), |
| 1827 | INST("1101111101011---", Id::TMML, Type::Texture, "TMML"), | 1858 | INST("1101111101011---", Id::TMML, Type::Texture, "TMML"), |
| 1828 | INST("11101011001-----", Id::SUST, Type::Image, "SUST"), | 1859 | INST("11101011001-----", Id::SUST, Type::Image, "SUST"), |
| 1860 | INST("1110101000------", Id::SUATOM, Type::Image, "SUATOM_D"), | ||
| 1829 | INST("0101000010110---", Id::NOP, Type::Trivial, "NOP"), | 1861 | INST("0101000010110---", Id::NOP, Type::Trivial, "NOP"), |
| 1830 | INST("11100000--------", Id::IPA, Type::Trivial, "IPA"), | 1862 | INST("11100000--------", Id::IPA, Type::Trivial, "IPA"), |
| 1831 | INST("1111101111100---", Id::OUT_R, Type::Trivial, "OUT_R"), | 1863 | INST("1111101111100---", Id::OUT_R, Type::Trivial, "OUT_R"), |
diff --git a/src/video_core/macro_interpreter.cpp b/src/video_core/macro_interpreter.cpp index 4e1cb98db..62afc0d11 100644 --- a/src/video_core/macro_interpreter.cpp +++ b/src/video_core/macro_interpreter.cpp | |||
| @@ -131,9 +131,7 @@ bool MacroInterpreter::Step(u32 offset, bool is_delay_slot) { | |||
| 131 | 131 | ||
| 132 | // An instruction with the Exit flag will not actually | 132 | // An instruction with the Exit flag will not actually |
| 133 | // cause an exit if it's executed inside a delay slot. | 133 | // cause an exit if it's executed inside a delay slot. |
| 134 | // TODO(Blinkhawk): Reversed to always exit. The behavior explained above requires further | 134 | if (opcode.is_exit && !is_delay_slot) { |
| 135 | // testing on the MME code. | ||
| 136 | if (opcode.is_exit) { | ||
| 137 | // Exit has a delay slot, execute the next instruction | 135 | // Exit has a delay slot, execute the next instruction |
| 138 | Step(offset, true); | 136 | Step(offset, true); |
| 139 | return false; | 137 | return false; |
diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp index 6edb2ca38..137b23740 100644 --- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp +++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp | |||
| @@ -706,7 +706,7 @@ private: | |||
| 706 | void DeclareImages() { | 706 | void DeclareImages() { |
| 707 | const auto& images{ir.GetImages()}; | 707 | const auto& images{ir.GetImages()}; |
| 708 | for (const auto& [offset, image] : images) { | 708 | for (const auto& [offset, image] : images) { |
| 709 | const std::string image_type = [&]() { | 709 | const char* image_type = [&] { |
| 710 | switch (image.GetType()) { | 710 | switch (image.GetType()) { |
| 711 | case Tegra::Shader::ImageType::Texture1D: | 711 | case Tegra::Shader::ImageType::Texture1D: |
| 712 | return "image1D"; | 712 | return "image1D"; |
| @@ -725,6 +725,23 @@ private: | |||
| 725 | return "image1D"; | 725 | return "image1D"; |
| 726 | } | 726 | } |
| 727 | }(); | 727 | }(); |
| 728 | |||
| 729 | const auto [type_prefix, format] = [&]() -> std::pair<const char*, const char*> { | ||
| 730 | if (!image.IsSizeKnown()) { | ||
| 731 | return {"", ""}; | ||
| 732 | } | ||
| 733 | switch (image.GetSize()) { | ||
| 734 | case Tegra::Shader::ImageAtomicSize::U32: | ||
| 735 | return {"u", "r32ui, "}; | ||
| 736 | case Tegra::Shader::ImageAtomicSize::S32: | ||
| 737 | return {"i", "r32i, "}; | ||
| 738 | default: | ||
| 739 | UNIMPLEMENTED_MSG("Unimplemented atomic size={}", | ||
| 740 | static_cast<u32>(image.GetSize())); | ||
| 741 | return {"", ""}; | ||
| 742 | } | ||
| 743 | }(); | ||
| 744 | |||
| 728 | std::string qualifier = "coherent volatile"; | 745 | std::string qualifier = "coherent volatile"; |
| 729 | if (image.IsRead() && !image.IsWritten()) { | 746 | if (image.IsRead() && !image.IsWritten()) { |
| 730 | qualifier += " readonly"; | 747 | qualifier += " readonly"; |
| @@ -1180,6 +1197,74 @@ private: | |||
| 1180 | return expr; | 1197 | return expr; |
| 1181 | } | 1198 | } |
| 1182 | 1199 | ||
| 1200 | std::string BuildIntegerCoordinates(Operation operation) { | ||
| 1201 | constexpr std::array constructors{"int(", "ivec2(", "ivec3(", "ivec4("}; | ||
| 1202 | const std::size_t coords_count{operation.GetOperandsCount()}; | ||
| 1203 | std::string expr = constructors.at(coords_count - 1); | ||
| 1204 | for (std::size_t i = 0; i < coords_count; ++i) { | ||
| 1205 | expr += VisitOperand(operation, i).AsInt(); | ||
| 1206 | if (i + 1 < coords_count) { | ||
| 1207 | expr += ", "; | ||
| 1208 | } | ||
| 1209 | } | ||
| 1210 | expr += ')'; | ||
| 1211 | return expr; | ||
| 1212 | } | ||
| 1213 | |||
| 1214 | std::string BuildImageValues(Operation operation) { | ||
| 1215 | const auto meta{std::get<MetaImage>(operation.GetMeta())}; | ||
| 1216 | const auto [constructors, type] = [&]() -> std::pair<std::array<const char*, 4>, Type> { | ||
| 1217 | constexpr std::array float_constructors{"float", "vec2", "vec3", "vec4"}; | ||
| 1218 | if (!meta.image.IsSizeKnown()) { | ||
| 1219 | return {float_constructors, Type::Float}; | ||
| 1220 | } | ||
| 1221 | switch (meta.image.GetSize()) { | ||
| 1222 | case Tegra::Shader::ImageAtomicSize::U32: | ||
| 1223 | return {{"uint", "uvec2", "uvec3", "uvec4"}, Type::Uint}; | ||
| 1224 | case Tegra::Shader::ImageAtomicSize::S32: | ||
| 1225 | return {{"int", "ivec2", "ivec3", "ivec4"}, Type::Uint}; | ||
| 1226 | default: | ||
| 1227 | UNIMPLEMENTED_MSG("Unimplemented image size={}", | ||
| 1228 | static_cast<u32>(meta.image.GetSize())); | ||
| 1229 | return {float_constructors, Type::Float}; | ||
| 1230 | } | ||
| 1231 | }(); | ||
| 1232 | |||
| 1233 | const std::size_t values_count{meta.values.size()}; | ||
| 1234 | std::string expr = fmt::format("{}(", constructors.at(values_count - 1)); | ||
| 1235 | for (std::size_t i = 0; i < values_count; ++i) { | ||
| 1236 | expr += Visit(meta.values.at(i)).As(type); | ||
| 1237 | if (i + 1 < values_count) { | ||
| 1238 | expr += ", "; | ||
| 1239 | } | ||
| 1240 | } | ||
| 1241 | expr += ')'; | ||
| 1242 | return expr; | ||
| 1243 | } | ||
| 1244 | |||
| 1245 | Expression AtomicImage(Operation operation, const char* opname) { | ||
| 1246 | constexpr std::array constructors{"int(", "ivec2(", "ivec3(", "ivec4("}; | ||
| 1247 | const auto meta{std::get<MetaImage>(operation.GetMeta())}; | ||
| 1248 | ASSERT(meta.values.size() == 1); | ||
| 1249 | ASSERT(meta.image.IsSizeKnown()); | ||
| 1250 | |||
| 1251 | const auto type = [&]() { | ||
| 1252 | switch (const auto size = meta.image.GetSize()) { | ||
| 1253 | case Tegra::Shader::ImageAtomicSize::U32: | ||
| 1254 | return Type::Uint; | ||
| 1255 | case Tegra::Shader::ImageAtomicSize::S32: | ||
| 1256 | return Type::Int; | ||
| 1257 | default: | ||
| 1258 | UNIMPLEMENTED_MSG("Unimplemented image size={}", static_cast<u32>(size)); | ||
| 1259 | return Type::Uint; | ||
| 1260 | } | ||
| 1261 | }(); | ||
| 1262 | |||
| 1263 | return {fmt::format("{}({}, {}, {})", opname, GetImage(meta.image), | ||
| 1264 | BuildIntegerCoordinates(operation), Visit(meta.values[0]).As(type)), | ||
| 1265 | type}; | ||
| 1266 | } | ||
| 1267 | |||
| 1183 | Expression Assign(Operation operation) { | 1268 | Expression Assign(Operation operation) { |
| 1184 | const Node& dest = operation[0]; | 1269 | const Node& dest = operation[0]; |
| 1185 | const Node& src = operation[1]; | 1270 | const Node& src = operation[1]; |
| @@ -1694,36 +1779,37 @@ private: | |||
| 1694 | } | 1779 | } |
| 1695 | 1780 | ||
| 1696 | Expression ImageStore(Operation operation) { | 1781 | Expression ImageStore(Operation operation) { |
| 1697 | constexpr std::array constructors{"int(", "ivec2(", "ivec3(", "ivec4("}; | ||
| 1698 | const auto meta{std::get<MetaImage>(operation.GetMeta())}; | 1782 | const auto meta{std::get<MetaImage>(operation.GetMeta())}; |
| 1783 | code.AddLine("imageStore({}, {}, {});", GetImage(meta.image), | ||
| 1784 | BuildIntegerCoordinates(operation), BuildImageValues(operation)); | ||
| 1785 | return {}; | ||
| 1786 | } | ||
| 1699 | 1787 | ||
| 1700 | std::string expr = "imageStore("; | 1788 | Expression AtomicImageAdd(Operation operation) { |
| 1701 | expr += GetImage(meta.image); | 1789 | return AtomicImage(operation, "imageAtomicAdd"); |
| 1702 | expr += ", "; | 1790 | } |
| 1703 | 1791 | ||
| 1704 | const std::size_t coords_count{operation.GetOperandsCount()}; | 1792 | Expression AtomicImageMin(Operation operation) { |
| 1705 | expr += constructors.at(coords_count - 1); | 1793 | return AtomicImage(operation, "imageAtomicMin"); |
| 1706 | for (std::size_t i = 0; i < coords_count; ++i) { | 1794 | } |
| 1707 | expr += VisitOperand(operation, i).AsInt(); | ||
| 1708 | if (i + 1 < coords_count) { | ||
| 1709 | expr += ", "; | ||
| 1710 | } | ||
| 1711 | } | ||
| 1712 | expr += "), "; | ||
| 1713 | 1795 | ||
| 1714 | const std::size_t values_count{meta.values.size()}; | 1796 | Expression AtomicImageMax(Operation operation) { |
| 1715 | UNIMPLEMENTED_IF(values_count != 4); | 1797 | return AtomicImage(operation, "imageAtomicMax"); |
| 1716 | expr += "vec4("; | 1798 | } |
| 1717 | for (std::size_t i = 0; i < values_count; ++i) { | 1799 | Expression AtomicImageAnd(Operation operation) { |
| 1718 | expr += Visit(meta.values.at(i)).AsFloat(); | 1800 | return AtomicImage(operation, "imageAtomicAnd"); |
| 1719 | if (i + 1 < values_count) { | 1801 | } |
| 1720 | expr += ", "; | ||
| 1721 | } | ||
| 1722 | } | ||
| 1723 | expr += "));"; | ||
| 1724 | 1802 | ||
| 1725 | code.AddLine(expr); | 1803 | Expression AtomicImageOr(Operation operation) { |
| 1726 | return {}; | 1804 | return AtomicImage(operation, "imageAtomicOr"); |
| 1805 | } | ||
| 1806 | |||
| 1807 | Expression AtomicImageXor(Operation operation) { | ||
| 1808 | return AtomicImage(operation, "imageAtomicXor"); | ||
| 1809 | } | ||
| 1810 | |||
| 1811 | Expression AtomicImageExchange(Operation operation) { | ||
| 1812 | return AtomicImage(operation, "imageAtomicExchange"); | ||
| 1727 | } | 1813 | } |
| 1728 | 1814 | ||
| 1729 | Expression Branch(Operation operation) { | 1815 | Expression Branch(Operation operation) { |
| @@ -2019,6 +2105,13 @@ private: | |||
| 2019 | &GLSLDecompiler::TexelFetch, | 2105 | &GLSLDecompiler::TexelFetch, |
| 2020 | 2106 | ||
| 2021 | &GLSLDecompiler::ImageStore, | 2107 | &GLSLDecompiler::ImageStore, |
| 2108 | &GLSLDecompiler::AtomicImageAdd, | ||
| 2109 | &GLSLDecompiler::AtomicImageMin, | ||
| 2110 | &GLSLDecompiler::AtomicImageMax, | ||
| 2111 | &GLSLDecompiler::AtomicImageAnd, | ||
| 2112 | &GLSLDecompiler::AtomicImageOr, | ||
| 2113 | &GLSLDecompiler::AtomicImageXor, | ||
| 2114 | &GLSLDecompiler::AtomicImageExchange, | ||
| 2022 | 2115 | ||
| 2023 | &GLSLDecompiler::Branch, | 2116 | &GLSLDecompiler::Branch, |
| 2024 | &GLSLDecompiler::BranchIndirect, | 2117 | &GLSLDecompiler::BranchIndirect, |
diff --git a/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp b/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp index 5450feedf..f141c4e3b 100644 --- a/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp | |||
| @@ -341,16 +341,22 @@ std::optional<ShaderDiskCacheDecompiled> ShaderDiskCacheOpenGL::LoadDecompiledEn | |||
| 341 | u64 index{}; | 341 | u64 index{}; |
| 342 | u32 type{}; | 342 | u32 type{}; |
| 343 | u8 is_bindless{}; | 343 | u8 is_bindless{}; |
| 344 | u8 is_read{}; | ||
| 345 | u8 is_written{}; | 344 | u8 is_written{}; |
| 345 | u8 is_read{}; | ||
| 346 | u8 is_size_known{}; | ||
| 347 | u32 size{}; | ||
| 346 | if (!LoadObjectFromPrecompiled(offset) || !LoadObjectFromPrecompiled(index) || | 348 | if (!LoadObjectFromPrecompiled(offset) || !LoadObjectFromPrecompiled(index) || |
| 347 | !LoadObjectFromPrecompiled(type) || !LoadObjectFromPrecompiled(is_bindless) || | 349 | !LoadObjectFromPrecompiled(type) || !LoadObjectFromPrecompiled(is_bindless) || |
| 348 | !LoadObjectFromPrecompiled(is_read) || !LoadObjectFromPrecompiled(is_written)) { | 350 | !LoadObjectFromPrecompiled(is_written) || !LoadObjectFromPrecompiled(is_read) || |
| 351 | !LoadObjectFromPrecompiled(is_size_known) || !LoadObjectFromPrecompiled(size)) { | ||
| 349 | return {}; | 352 | return {}; |
| 350 | } | 353 | } |
| 351 | entry.entries.images.emplace_back(static_cast<u64>(offset), static_cast<std::size_t>(index), | 354 | entry.entries.images.emplace_back( |
| 352 | static_cast<Tegra::Shader::ImageType>(type), | 355 | static_cast<std::size_t>(offset), static_cast<std::size_t>(index), |
| 353 | is_bindless != 0, is_written != 0, is_read != 0); | 356 | static_cast<Tegra::Shader::ImageType>(type), is_bindless != 0, is_written != 0, |
| 357 | is_read != 0, | ||
| 358 | is_size_known ? std::make_optional(static_cast<Tegra::Shader::ImageAtomicSize>(size)) | ||
| 359 | : std::nullopt); | ||
| 354 | } | 360 | } |
| 355 | 361 | ||
| 356 | u32 global_memory_count{}; | 362 | u32 global_memory_count{}; |
| @@ -429,12 +435,14 @@ bool ShaderDiskCacheOpenGL::SaveDecompiledFile(u64 unique_identifier, const std: | |||
| 429 | return false; | 435 | return false; |
| 430 | } | 436 | } |
| 431 | for (const auto& image : entries.images) { | 437 | for (const auto& image : entries.images) { |
| 438 | const u32 size = image.IsSizeKnown() ? static_cast<u32>(image.GetSize()) : 0U; | ||
| 432 | if (!SaveObjectToPrecompiled(static_cast<u64>(image.GetOffset())) || | 439 | if (!SaveObjectToPrecompiled(static_cast<u64>(image.GetOffset())) || |
| 433 | !SaveObjectToPrecompiled(static_cast<u64>(image.GetIndex())) || | 440 | !SaveObjectToPrecompiled(static_cast<u64>(image.GetIndex())) || |
| 434 | !SaveObjectToPrecompiled(static_cast<u32>(image.GetType())) || | 441 | !SaveObjectToPrecompiled(static_cast<u32>(image.GetType())) || |
| 435 | !SaveObjectToPrecompiled(static_cast<u8>(image.IsBindless() ? 1 : 0)) || | 442 | !SaveObjectToPrecompiled(static_cast<u8>(image.IsBindless() ? 1 : 0)) || |
| 443 | !SaveObjectToPrecompiled(static_cast<u8>(image.IsWritten() ? 1 : 0)) || | ||
| 436 | !SaveObjectToPrecompiled(static_cast<u8>(image.IsRead() ? 1 : 0)) || | 444 | !SaveObjectToPrecompiled(static_cast<u8>(image.IsRead() ? 1 : 0)) || |
| 437 | !SaveObjectToPrecompiled(static_cast<u8>(image.IsWritten() ? 1 : 0))) { | 445 | !SaveObjectToPrecompiled(image.IsSizeKnown()) || !SaveObjectToPrecompiled(size)) { |
| 438 | return false; | 446 | return false; |
| 439 | } | 447 | } |
| 440 | } | 448 | } |
diff --git a/src/video_core/renderer_vulkan/vk_device.cpp b/src/video_core/renderer_vulkan/vk_device.cpp index 3b966ddc3..897cbb4e8 100644 --- a/src/video_core/renderer_vulkan/vk_device.cpp +++ b/src/video_core/renderer_vulkan/vk_device.cpp | |||
| @@ -2,9 +2,10 @@ | |||
| 2 | // Licensed under GPLv2 or any later version | 2 | // Licensed under GPLv2 or any later version |
| 3 | // Refer to the license.txt file included. | 3 | // Refer to the license.txt file included. |
| 4 | 4 | ||
| 5 | #include <map> | 5 | #include <bitset> |
| 6 | #include <optional> | 6 | #include <optional> |
| 7 | #include <set> | 7 | #include <set> |
| 8 | #include <string_view> | ||
| 8 | #include <vector> | 9 | #include <vector> |
| 9 | #include "common/assert.h" | 10 | #include "common/assert.h" |
| 10 | #include "video_core/renderer_vulkan/declarations.h" | 11 | #include "video_core/renderer_vulkan/declarations.h" |
| @@ -12,13 +13,32 @@ | |||
| 12 | 13 | ||
| 13 | namespace Vulkan { | 14 | namespace Vulkan { |
| 14 | 15 | ||
| 16 | namespace { | ||
| 17 | |||
| 18 | template <typename T> | ||
| 19 | void SetNext(void**& next, T& data) { | ||
| 20 | *next = &data; | ||
| 21 | next = &data.pNext; | ||
| 22 | } | ||
| 23 | |||
| 24 | template <typename T> | ||
| 25 | T GetFeatures(vk::PhysicalDevice physical, vk::DispatchLoaderDynamic dldi) { | ||
| 26 | vk::PhysicalDeviceFeatures2 features; | ||
| 27 | T extension_features; | ||
| 28 | features.pNext = &extension_features; | ||
| 29 | physical.getFeatures2(&features, dldi); | ||
| 30 | return extension_features; | ||
| 31 | } | ||
| 32 | |||
| 33 | } // Anonymous namespace | ||
| 34 | |||
| 15 | namespace Alternatives { | 35 | namespace Alternatives { |
| 16 | 36 | ||
| 17 | constexpr std::array<vk::Format, 3> Depth24UnormS8Uint = { | 37 | constexpr std::array Depth24UnormS8Uint = {vk::Format::eD32SfloatS8Uint, |
| 18 | vk::Format::eD32SfloatS8Uint, vk::Format::eD16UnormS8Uint, {}}; | 38 | vk::Format::eD16UnormS8Uint, vk::Format{}}; |
| 19 | constexpr std::array<vk::Format, 3> Depth16UnormS8Uint = { | 39 | constexpr std::array Depth16UnormS8Uint = {vk::Format::eD24UnormS8Uint, |
| 20 | vk::Format::eD24UnormS8Uint, vk::Format::eD32SfloatS8Uint, {}}; | 40 | vk::Format::eD32SfloatS8Uint, vk::Format{}}; |
| 21 | constexpr std::array<vk::Format, 2> Astc = {vk::Format::eA8B8G8R8UnormPack32, {}}; | 41 | constexpr std::array Astc = {vk::Format::eA8B8G8R8UnormPack32, vk::Format{}}; |
| 22 | 42 | ||
| 23 | } // namespace Alternatives | 43 | } // namespace Alternatives |
| 24 | 44 | ||
| @@ -58,16 +78,53 @@ VKDevice::VKDevice(const vk::DispatchLoaderDynamic& dldi, vk::PhysicalDevice phy | |||
| 58 | VKDevice::~VKDevice() = default; | 78 | VKDevice::~VKDevice() = default; |
| 59 | 79 | ||
| 60 | bool VKDevice::Create(const vk::DispatchLoaderDynamic& dldi, vk::Instance instance) { | 80 | bool VKDevice::Create(const vk::DispatchLoaderDynamic& dldi, vk::Instance instance) { |
| 61 | vk::PhysicalDeviceFeatures device_features; | ||
| 62 | device_features.vertexPipelineStoresAndAtomics = true; | ||
| 63 | device_features.independentBlend = true; | ||
| 64 | device_features.textureCompressionASTC_LDR = is_optimal_astc_supported; | ||
| 65 | |||
| 66 | const auto queue_cis = GetDeviceQueueCreateInfos(); | 81 | const auto queue_cis = GetDeviceQueueCreateInfos(); |
| 67 | const std::vector<const char*> extensions = LoadExtensions(dldi); | 82 | const std::vector extensions = LoadExtensions(dldi); |
| 68 | const vk::DeviceCreateInfo device_ci({}, static_cast<u32>(queue_cis.size()), queue_cis.data(), | 83 | |
| 69 | 0, nullptr, static_cast<u32>(extensions.size()), | 84 | vk::PhysicalDeviceFeatures2 features2; |
| 70 | extensions.data(), &device_features); | 85 | void** next = &features2.pNext; |
| 86 | auto& features = features2.features; | ||
| 87 | features.vertexPipelineStoresAndAtomics = true; | ||
| 88 | features.independentBlend = true; | ||
| 89 | features.depthClamp = true; | ||
| 90 | features.samplerAnisotropy = true; | ||
| 91 | features.largePoints = true; | ||
| 92 | features.textureCompressionASTC_LDR = is_optimal_astc_supported; | ||
| 93 | |||
| 94 | vk::PhysicalDeviceVertexAttributeDivisorFeaturesEXT vertex_divisor; | ||
| 95 | vertex_divisor.vertexAttributeInstanceRateDivisor = true; | ||
| 96 | vertex_divisor.vertexAttributeInstanceRateZeroDivisor = true; | ||
| 97 | SetNext(next, vertex_divisor); | ||
| 98 | |||
| 99 | vk::PhysicalDeviceFloat16Int8FeaturesKHR float16_int8; | ||
| 100 | if (is_float16_supported) { | ||
| 101 | float16_int8.shaderFloat16 = true; | ||
| 102 | SetNext(next, float16_int8); | ||
| 103 | } else { | ||
| 104 | LOG_INFO(Render_Vulkan, "Device doesn't support float16 natively"); | ||
| 105 | } | ||
| 106 | |||
| 107 | vk::PhysicalDeviceUniformBufferStandardLayoutFeaturesKHR std430_layout; | ||
| 108 | if (khr_uniform_buffer_standard_layout) { | ||
| 109 | std430_layout.uniformBufferStandardLayout = true; | ||
| 110 | SetNext(next, std430_layout); | ||
| 111 | } else { | ||
| 112 | LOG_INFO(Render_Vulkan, "Device doesn't support packed UBOs"); | ||
| 113 | } | ||
| 114 | |||
| 115 | vk::PhysicalDeviceIndexTypeUint8FeaturesEXT index_type_uint8; | ||
| 116 | if (ext_index_type_uint8) { | ||
| 117 | index_type_uint8.indexTypeUint8 = true; | ||
| 118 | SetNext(next, index_type_uint8); | ||
| 119 | } else { | ||
| 120 | LOG_INFO(Render_Vulkan, "Device doesn't support uint8 indexes"); | ||
| 121 | } | ||
| 122 | |||
| 123 | vk::DeviceCreateInfo device_ci({}, static_cast<u32>(queue_cis.size()), queue_cis.data(), 0, | ||
| 124 | nullptr, static_cast<u32>(extensions.size()), extensions.data(), | ||
| 125 | nullptr); | ||
| 126 | device_ci.pNext = &features2; | ||
| 127 | |||
| 71 | vk::Device dummy_logical; | 128 | vk::Device dummy_logical; |
| 72 | if (physical.createDevice(&device_ci, nullptr, &dummy_logical, dldi) != vk::Result::eSuccess) { | 129 | if (physical.createDevice(&device_ci, nullptr, &dummy_logical, dldi) != vk::Result::eSuccess) { |
| 73 | LOG_CRITICAL(Render_Vulkan, "Logical device failed to be created!"); | 130 | LOG_CRITICAL(Render_Vulkan, "Logical device failed to be created!"); |
| @@ -78,6 +135,17 @@ bool VKDevice::Create(const vk::DispatchLoaderDynamic& dldi, vk::Instance instan | |||
| 78 | logical = UniqueDevice( | 135 | logical = UniqueDevice( |
| 79 | dummy_logical, vk::ObjectDestroy<vk::NoParent, vk::DispatchLoaderDynamic>(nullptr, dld)); | 136 | dummy_logical, vk::ObjectDestroy<vk::NoParent, vk::DispatchLoaderDynamic>(nullptr, dld)); |
| 80 | 137 | ||
| 138 | if (khr_driver_properties) { | ||
| 139 | vk::PhysicalDeviceDriverPropertiesKHR driver; | ||
| 140 | vk::PhysicalDeviceProperties2 properties; | ||
| 141 | properties.pNext = &driver; | ||
| 142 | physical.getProperties2(&properties, dld); | ||
| 143 | driver_id = driver.driverID; | ||
| 144 | LOG_INFO(Render_Vulkan, "Driver: {} {}", driver.driverName, driver.driverInfo); | ||
| 145 | } else { | ||
| 146 | LOG_INFO(Render_Vulkan, "Driver: Unknown"); | ||
| 147 | } | ||
| 148 | |||
| 81 | graphics_queue = logical->getQueue(graphics_family, 0, dld); | 149 | graphics_queue = logical->getQueue(graphics_family, 0, dld); |
| 82 | present_queue = logical->getQueue(present_family, 0, dld); | 150 | present_queue = logical->getQueue(present_family, 0, dld); |
| 83 | return true; | 151 | return true; |
| @@ -92,20 +160,19 @@ vk::Format VKDevice::GetSupportedFormat(vk::Format wanted_format, | |||
| 92 | // The wanted format is not supported by hardware, search for alternatives | 160 | // The wanted format is not supported by hardware, search for alternatives |
| 93 | const vk::Format* alternatives = GetFormatAlternatives(wanted_format); | 161 | const vk::Format* alternatives = GetFormatAlternatives(wanted_format); |
| 94 | if (alternatives == nullptr) { | 162 | if (alternatives == nullptr) { |
| 95 | LOG_CRITICAL(Render_Vulkan, | 163 | UNREACHABLE_MSG("Format={} with usage={} and type={} has no defined alternatives and host " |
| 96 | "Format={} with usage={} and type={} has no defined alternatives and host " | 164 | "hardware does not support it", |
| 97 | "hardware does not support it", | 165 | vk::to_string(wanted_format), vk::to_string(wanted_usage), |
| 98 | vk::to_string(wanted_format), vk::to_string(wanted_usage), | 166 | static_cast<u32>(format_type)); |
| 99 | static_cast<u32>(format_type)); | ||
| 100 | UNREACHABLE(); | ||
| 101 | return wanted_format; | 167 | return wanted_format; |
| 102 | } | 168 | } |
| 103 | 169 | ||
| 104 | std::size_t i = 0; | 170 | std::size_t i = 0; |
| 105 | for (vk::Format alternative = alternatives[0]; alternative != vk::Format{}; | 171 | for (vk::Format alternative = alternatives[0]; alternative != vk::Format{}; |
| 106 | alternative = alternatives[++i]) { | 172 | alternative = alternatives[++i]) { |
| 107 | if (!IsFormatSupported(alternative, wanted_usage, format_type)) | 173 | if (!IsFormatSupported(alternative, wanted_usage, format_type)) { |
| 108 | continue; | 174 | continue; |
| 175 | } | ||
| 109 | LOG_WARNING(Render_Vulkan, | 176 | LOG_WARNING(Render_Vulkan, |
| 110 | "Emulating format={} with alternative format={} with usage={} and type={}", | 177 | "Emulating format={} with alternative format={} with usage={} and type={}", |
| 111 | static_cast<u32>(wanted_format), static_cast<u32>(alternative), | 178 | static_cast<u32>(wanted_format), static_cast<u32>(alternative), |
| @@ -114,12 +181,10 @@ vk::Format VKDevice::GetSupportedFormat(vk::Format wanted_format, | |||
| 114 | } | 181 | } |
| 115 | 182 | ||
| 116 | // No alternatives found, panic | 183 | // No alternatives found, panic |
| 117 | LOG_CRITICAL(Render_Vulkan, | 184 | UNREACHABLE_MSG("Format={} with usage={} and type={} is not supported by the host hardware and " |
| 118 | "Format={} with usage={} and type={} is not supported by the host hardware and " | 185 | "doesn't support any of the alternatives", |
| 119 | "doesn't support any of the alternatives", | 186 | static_cast<u32>(wanted_format), static_cast<u32>(wanted_usage), |
| 120 | static_cast<u32>(wanted_format), static_cast<u32>(wanted_usage), | 187 | static_cast<u32>(format_type)); |
| 121 | static_cast<u32>(format_type)); | ||
| 122 | UNREACHABLE(); | ||
| 123 | return wanted_format; | 188 | return wanted_format; |
| 124 | } | 189 | } |
| 125 | 190 | ||
| @@ -132,7 +197,7 @@ bool VKDevice::IsOptimalAstcSupported(const vk::PhysicalDeviceFeatures& features | |||
| 132 | vk::FormatFeatureFlagBits::eSampledImage | vk::FormatFeatureFlagBits::eBlitSrc | | 197 | vk::FormatFeatureFlagBits::eSampledImage | vk::FormatFeatureFlagBits::eBlitSrc | |
| 133 | vk::FormatFeatureFlagBits::eBlitDst | vk::FormatFeatureFlagBits::eTransferSrc | | 198 | vk::FormatFeatureFlagBits::eBlitDst | vk::FormatFeatureFlagBits::eTransferSrc | |
| 134 | vk::FormatFeatureFlagBits::eTransferDst}; | 199 | vk::FormatFeatureFlagBits::eTransferDst}; |
| 135 | constexpr std::array<vk::Format, 9> astc_formats = { | 200 | constexpr std::array astc_formats = { |
| 136 | vk::Format::eAstc4x4UnormBlock, vk::Format::eAstc4x4SrgbBlock, | 201 | vk::Format::eAstc4x4UnormBlock, vk::Format::eAstc4x4SrgbBlock, |
| 137 | vk::Format::eAstc8x8SrgbBlock, vk::Format::eAstc8x6SrgbBlock, | 202 | vk::Format::eAstc8x8SrgbBlock, vk::Format::eAstc8x6SrgbBlock, |
| 138 | vk::Format::eAstc5x4SrgbBlock, vk::Format::eAstc5x5UnormBlock, | 203 | vk::Format::eAstc5x4SrgbBlock, vk::Format::eAstc5x5UnormBlock, |
| @@ -151,76 +216,120 @@ bool VKDevice::IsFormatSupported(vk::Format wanted_format, vk::FormatFeatureFlag | |||
| 151 | FormatType format_type) const { | 216 | FormatType format_type) const { |
| 152 | const auto it = format_properties.find(wanted_format); | 217 | const auto it = format_properties.find(wanted_format); |
| 153 | if (it == format_properties.end()) { | 218 | if (it == format_properties.end()) { |
| 154 | LOG_CRITICAL(Render_Vulkan, "Unimplemented format query={}", vk::to_string(wanted_format)); | 219 | UNIMPLEMENTED_MSG("Unimplemented format query={}", vk::to_string(wanted_format)); |
| 155 | UNREACHABLE(); | ||
| 156 | return true; | 220 | return true; |
| 157 | } | 221 | } |
| 158 | const vk::FormatFeatureFlags supported_usage = GetFormatFeatures(it->second, format_type); | 222 | const auto supported_usage = GetFormatFeatures(it->second, format_type); |
| 159 | return (supported_usage & wanted_usage) == wanted_usage; | 223 | return (supported_usage & wanted_usage) == wanted_usage; |
| 160 | } | 224 | } |
| 161 | 225 | ||
| 162 | bool VKDevice::IsSuitable(const vk::DispatchLoaderDynamic& dldi, vk::PhysicalDevice physical, | 226 | bool VKDevice::IsSuitable(const vk::DispatchLoaderDynamic& dldi, vk::PhysicalDevice physical, |
| 163 | vk::SurfaceKHR surface) { | 227 | vk::SurfaceKHR surface) { |
| 164 | bool has_swapchain{}; | 228 | LOG_INFO(Render_Vulkan, "{}", physical.getProperties(dldi).deviceName); |
| 229 | bool is_suitable = true; | ||
| 230 | |||
| 231 | constexpr std::array required_extensions = {VK_KHR_SWAPCHAIN_EXTENSION_NAME, | ||
| 232 | VK_EXT_VERTEX_ATTRIBUTE_DIVISOR_EXTENSION_NAME}; | ||
| 233 | std::bitset<required_extensions.size()> available_extensions{}; | ||
| 234 | |||
| 165 | for (const auto& prop : physical.enumerateDeviceExtensionProperties(nullptr, dldi)) { | 235 | for (const auto& prop : physical.enumerateDeviceExtensionProperties(nullptr, dldi)) { |
| 166 | has_swapchain |= prop.extensionName == std::string(VK_KHR_SWAPCHAIN_EXTENSION_NAME); | 236 | for (std::size_t i = 0; i < required_extensions.size(); ++i) { |
| 237 | if (available_extensions[i]) { | ||
| 238 | continue; | ||
| 239 | } | ||
| 240 | available_extensions[i] = | ||
| 241 | required_extensions[i] == std::string_view{prop.extensionName}; | ||
| 242 | } | ||
| 167 | } | 243 | } |
| 168 | if (!has_swapchain) { | 244 | if (!available_extensions.all()) { |
| 169 | // The device doesn't support creating swapchains. | 245 | for (std::size_t i = 0; i < required_extensions.size(); ++i) { |
| 170 | return false; | 246 | if (available_extensions[i]) { |
| 247 | continue; | ||
| 248 | } | ||
| 249 | LOG_INFO(Render_Vulkan, "Missing required extension: {}", required_extensions[i]); | ||
| 250 | is_suitable = false; | ||
| 251 | } | ||
| 171 | } | 252 | } |
| 172 | 253 | ||
| 173 | bool has_graphics{}, has_present{}; | 254 | bool has_graphics{}, has_present{}; |
| 174 | const auto queue_family_properties = physical.getQueueFamilyProperties(dldi); | 255 | const auto queue_family_properties = physical.getQueueFamilyProperties(dldi); |
| 175 | for (u32 i = 0; i < static_cast<u32>(queue_family_properties.size()); ++i) { | 256 | for (u32 i = 0; i < static_cast<u32>(queue_family_properties.size()); ++i) { |
| 176 | const auto& family = queue_family_properties[i]; | 257 | const auto& family = queue_family_properties[i]; |
| 177 | if (family.queueCount == 0) | 258 | if (family.queueCount == 0) { |
| 178 | continue; | 259 | continue; |
| 179 | 260 | } | |
| 180 | has_graphics |= | 261 | has_graphics |= |
| 181 | (family.queueFlags & vk::QueueFlagBits::eGraphics) != static_cast<vk::QueueFlagBits>(0); | 262 | (family.queueFlags & vk::QueueFlagBits::eGraphics) != static_cast<vk::QueueFlagBits>(0); |
| 182 | has_present |= physical.getSurfaceSupportKHR(i, surface, dldi) != 0; | 263 | has_present |= physical.getSurfaceSupportKHR(i, surface, dldi) != 0; |
| 183 | } | 264 | } |
| 184 | if (!has_graphics || !has_present) { | 265 | if (!has_graphics || !has_present) { |
| 185 | // The device doesn't have a graphics and present queue. | 266 | LOG_INFO(Render_Vulkan, "Device lacks a graphics and present queue"); |
| 186 | return false; | 267 | is_suitable = false; |
| 187 | } | 268 | } |
| 188 | 269 | ||
| 189 | // TODO(Rodrigo): Check if the device matches all requeriments. | 270 | // TODO(Rodrigo): Check if the device matches all requeriments. |
| 190 | const auto properties{physical.getProperties(dldi)}; | 271 | const auto properties{physical.getProperties(dldi)}; |
| 191 | const auto limits{properties.limits}; | 272 | const auto& limits{properties.limits}; |
| 192 | if (limits.maxUniformBufferRange < 65536) { | 273 | |
| 193 | return false; | 274 | constexpr u32 required_ubo_size = 65536; |
| 275 | if (limits.maxUniformBufferRange < required_ubo_size) { | ||
| 276 | LOG_INFO(Render_Vulkan, "Device UBO size {} is too small, {} is required)", | ||
| 277 | limits.maxUniformBufferRange, required_ubo_size); | ||
| 278 | is_suitable = false; | ||
| 194 | } | 279 | } |
| 195 | 280 | ||
| 196 | const vk::PhysicalDeviceFeatures features{physical.getFeatures(dldi)}; | 281 | const auto features{physical.getFeatures(dldi)}; |
| 197 | if (!features.vertexPipelineStoresAndAtomics || !features.independentBlend) { | 282 | const std::array feature_report = { |
| 198 | return false; | 283 | std::make_pair(features.vertexPipelineStoresAndAtomics, "vertexPipelineStoresAndAtomics"), |
| 284 | std::make_pair(features.independentBlend, "independentBlend"), | ||
| 285 | std::make_pair(features.depthClamp, "depthClamp"), | ||
| 286 | std::make_pair(features.samplerAnisotropy, "samplerAnisotropy"), | ||
| 287 | std::make_pair(features.largePoints, "largePoints"), | ||
| 288 | }; | ||
| 289 | for (const auto& [supported, name] : feature_report) { | ||
| 290 | if (supported) { | ||
| 291 | continue; | ||
| 292 | } | ||
| 293 | LOG_INFO(Render_Vulkan, "Missing required feature: {}", name); | ||
| 294 | is_suitable = false; | ||
| 199 | } | 295 | } |
| 200 | 296 | ||
| 201 | // Device is suitable. | 297 | return is_suitable; |
| 202 | return true; | ||
| 203 | } | 298 | } |
| 204 | 299 | ||
| 205 | std::vector<const char*> VKDevice::LoadExtensions(const vk::DispatchLoaderDynamic& dldi) { | 300 | std::vector<const char*> VKDevice::LoadExtensions(const vk::DispatchLoaderDynamic& dldi) { |
| 206 | std::vector<const char*> extensions; | 301 | std::vector<const char*> extensions; |
| 207 | extensions.reserve(2); | 302 | extensions.reserve(7); |
| 208 | extensions.push_back(VK_KHR_SWAPCHAIN_EXTENSION_NAME); | 303 | extensions.push_back(VK_KHR_SWAPCHAIN_EXTENSION_NAME); |
| 304 | extensions.push_back(VK_EXT_VERTEX_ATTRIBUTE_DIVISOR_EXTENSION_NAME); | ||
| 209 | 305 | ||
| 210 | const auto Test = [&](const vk::ExtensionProperties& extension, | 306 | const auto Test = [&](const vk::ExtensionProperties& extension, |
| 211 | std::optional<std::reference_wrapper<bool>> status, const char* name, | 307 | std::optional<std::reference_wrapper<bool>> status, const char* name, |
| 212 | u32 revision) { | 308 | bool push) { |
| 213 | if (extension.extensionName != std::string(name)) { | 309 | if (extension.extensionName != std::string_view(name)) { |
| 214 | return; | 310 | return; |
| 215 | } | 311 | } |
| 216 | extensions.push_back(name); | 312 | if (push) { |
| 313 | extensions.push_back(name); | ||
| 314 | } | ||
| 217 | if (status) { | 315 | if (status) { |
| 218 | status->get() = true; | 316 | status->get() = true; |
| 219 | } | 317 | } |
| 220 | }; | 318 | }; |
| 221 | 319 | ||
| 320 | bool khr_shader_float16_int8{}; | ||
| 222 | for (const auto& extension : physical.enumerateDeviceExtensionProperties(nullptr, dldi)) { | 321 | for (const auto& extension : physical.enumerateDeviceExtensionProperties(nullptr, dldi)) { |
| 223 | Test(extension, ext_scalar_block_layout, VK_EXT_SCALAR_BLOCK_LAYOUT_EXTENSION_NAME, 1); | 322 | Test(extension, khr_uniform_buffer_standard_layout, |
| 323 | VK_KHR_UNIFORM_BUFFER_STANDARD_LAYOUT_EXTENSION_NAME, true); | ||
| 324 | Test(extension, ext_index_type_uint8, VK_EXT_INDEX_TYPE_UINT8_EXTENSION_NAME, true); | ||
| 325 | Test(extension, khr_driver_properties, VK_KHR_DRIVER_PROPERTIES_EXTENSION_NAME, true); | ||
| 326 | Test(extension, khr_shader_float16_int8, VK_KHR_SHADER_FLOAT16_INT8_EXTENSION_NAME, false); | ||
| 327 | } | ||
| 328 | |||
| 329 | if (khr_shader_float16_int8) { | ||
| 330 | is_float16_supported = | ||
| 331 | GetFeatures<vk::PhysicalDeviceFloat16Int8FeaturesKHR>(physical, dldi).shaderFloat16; | ||
| 332 | extensions.push_back(VK_KHR_SHADER_FLOAT16_INT8_EXTENSION_NAME); | ||
| 224 | } | 333 | } |
| 225 | 334 | ||
| 226 | return extensions; | 335 | return extensions; |
| @@ -250,9 +359,10 @@ void VKDevice::SetupFamilies(const vk::DispatchLoaderDynamic& dldi, vk::SurfaceK | |||
| 250 | } | 359 | } |
| 251 | 360 | ||
| 252 | void VKDevice::SetupProperties(const vk::DispatchLoaderDynamic& dldi) { | 361 | void VKDevice::SetupProperties(const vk::DispatchLoaderDynamic& dldi) { |
| 253 | const vk::PhysicalDeviceProperties props = physical.getProperties(dldi); | 362 | const auto props = physical.getProperties(dldi); |
| 254 | device_type = props.deviceType; | 363 | device_type = props.deviceType; |
| 255 | uniform_buffer_alignment = static_cast<u64>(props.limits.minUniformBufferOffsetAlignment); | 364 | uniform_buffer_alignment = static_cast<u64>(props.limits.minUniformBufferOffsetAlignment); |
| 365 | storage_buffer_alignment = static_cast<u64>(props.limits.minStorageBufferOffsetAlignment); | ||
| 256 | max_storage_buffer_range = static_cast<u64>(props.limits.maxStorageBufferRange); | 366 | max_storage_buffer_range = static_cast<u64>(props.limits.maxStorageBufferRange); |
| 257 | } | 367 | } |
| 258 | 368 | ||
| @@ -273,42 +383,53 @@ std::vector<vk::DeviceQueueCreateInfo> VKDevice::GetDeviceQueueCreateInfos() con | |||
| 273 | return queue_cis; | 383 | return queue_cis; |
| 274 | } | 384 | } |
| 275 | 385 | ||
| 276 | std::map<vk::Format, vk::FormatProperties> VKDevice::GetFormatProperties( | 386 | std::unordered_map<vk::Format, vk::FormatProperties> VKDevice::GetFormatProperties( |
| 277 | const vk::DispatchLoaderDynamic& dldi, vk::PhysicalDevice physical) { | 387 | const vk::DispatchLoaderDynamic& dldi, vk::PhysicalDevice physical) { |
| 278 | static constexpr std::array formats{vk::Format::eA8B8G8R8UnormPack32, | 388 | constexpr std::array formats{vk::Format::eA8B8G8R8UnormPack32, |
| 279 | vk::Format::eB5G6R5UnormPack16, | 389 | vk::Format::eA8B8G8R8SnormPack32, |
| 280 | vk::Format::eA2B10G10R10UnormPack32, | 390 | vk::Format::eA8B8G8R8SrgbPack32, |
| 281 | vk::Format::eR32G32B32A32Sfloat, | 391 | vk::Format::eB5G6R5UnormPack16, |
| 282 | vk::Format::eR16G16Unorm, | 392 | vk::Format::eA2B10G10R10UnormPack32, |
| 283 | vk::Format::eR16G16Snorm, | 393 | vk::Format::eR32G32B32A32Sfloat, |
| 284 | vk::Format::eR8G8B8A8Srgb, | 394 | vk::Format::eR16G16B16A16Uint, |
| 285 | vk::Format::eR8Unorm, | 395 | vk::Format::eR16G16Unorm, |
| 286 | vk::Format::eB10G11R11UfloatPack32, | 396 | vk::Format::eR16G16Snorm, |
| 287 | vk::Format::eR32Sfloat, | 397 | vk::Format::eR16G16Sfloat, |
| 288 | vk::Format::eR16Sfloat, | 398 | vk::Format::eR16Unorm, |
| 289 | vk::Format::eR16G16B16A16Sfloat, | 399 | vk::Format::eR8G8B8A8Srgb, |
| 290 | vk::Format::eD32Sfloat, | 400 | vk::Format::eR8G8Unorm, |
| 291 | vk::Format::eD16Unorm, | 401 | vk::Format::eR8G8Snorm, |
| 292 | vk::Format::eD16UnormS8Uint, | 402 | vk::Format::eR8Unorm, |
| 293 | vk::Format::eD24UnormS8Uint, | 403 | vk::Format::eB10G11R11UfloatPack32, |
| 294 | vk::Format::eD32SfloatS8Uint, | 404 | vk::Format::eR32Sfloat, |
| 295 | vk::Format::eBc1RgbaUnormBlock, | 405 | vk::Format::eR16Sfloat, |
| 296 | vk::Format::eBc2UnormBlock, | 406 | vk::Format::eR16G16B16A16Sfloat, |
| 297 | vk::Format::eBc3UnormBlock, | 407 | vk::Format::eB8G8R8A8Unorm, |
| 298 | vk::Format::eBc4UnormBlock, | 408 | vk::Format::eD32Sfloat, |
| 299 | vk::Format::eBc5UnormBlock, | 409 | vk::Format::eD16Unorm, |
| 300 | vk::Format::eBc5SnormBlock, | 410 | vk::Format::eD16UnormS8Uint, |
| 301 | vk::Format::eBc7UnormBlock, | 411 | vk::Format::eD24UnormS8Uint, |
| 302 | vk::Format::eAstc4x4UnormBlock, | 412 | vk::Format::eD32SfloatS8Uint, |
| 303 | vk::Format::eAstc4x4SrgbBlock, | 413 | vk::Format::eBc1RgbaUnormBlock, |
| 304 | vk::Format::eAstc8x8SrgbBlock, | 414 | vk::Format::eBc2UnormBlock, |
| 305 | vk::Format::eAstc8x6SrgbBlock, | 415 | vk::Format::eBc3UnormBlock, |
| 306 | vk::Format::eAstc5x4SrgbBlock, | 416 | vk::Format::eBc4UnormBlock, |
| 307 | vk::Format::eAstc5x5UnormBlock, | 417 | vk::Format::eBc5UnormBlock, |
| 308 | vk::Format::eAstc5x5SrgbBlock, | 418 | vk::Format::eBc5SnormBlock, |
| 309 | vk::Format::eAstc10x8UnormBlock, | 419 | vk::Format::eBc7UnormBlock, |
| 310 | vk::Format::eAstc10x8SrgbBlock}; | 420 | vk::Format::eBc1RgbaSrgbBlock, |
| 311 | std::map<vk::Format, vk::FormatProperties> format_properties; | 421 | vk::Format::eBc3SrgbBlock, |
| 422 | vk::Format::eBc7SrgbBlock, | ||
| 423 | vk::Format::eAstc4x4UnormBlock, | ||
| 424 | vk::Format::eAstc4x4SrgbBlock, | ||
| 425 | vk::Format::eAstc8x8SrgbBlock, | ||
| 426 | vk::Format::eAstc8x6SrgbBlock, | ||
| 427 | vk::Format::eAstc5x4SrgbBlock, | ||
| 428 | vk::Format::eAstc5x5UnormBlock, | ||
| 429 | vk::Format::eAstc5x5SrgbBlock, | ||
| 430 | vk::Format::eAstc10x8UnormBlock, | ||
| 431 | vk::Format::eAstc10x8SrgbBlock}; | ||
| 432 | std::unordered_map<vk::Format, vk::FormatProperties> format_properties; | ||
| 312 | for (const auto format : formats) { | 433 | for (const auto format : formats) { |
| 313 | format_properties.emplace(format, physical.getFormatProperties(format, dldi)); | 434 | format_properties.emplace(format, physical.getFormatProperties(format, dldi)); |
| 314 | } | 435 | } |
diff --git a/src/video_core/renderer_vulkan/vk_device.h b/src/video_core/renderer_vulkan/vk_device.h index 537825d8b..010d4c3d6 100644 --- a/src/video_core/renderer_vulkan/vk_device.h +++ b/src/video_core/renderer_vulkan/vk_device.h | |||
| @@ -4,7 +4,7 @@ | |||
| 4 | 4 | ||
| 5 | #pragma once | 5 | #pragma once |
| 6 | 6 | ||
| 7 | #include <map> | 7 | #include <unordered_map> |
| 8 | #include <vector> | 8 | #include <vector> |
| 9 | #include "common/common_types.h" | 9 | #include "common/common_types.h" |
| 10 | #include "video_core/renderer_vulkan/declarations.h" | 10 | #include "video_core/renderer_vulkan/declarations.h" |
| @@ -69,16 +69,26 @@ public: | |||
| 69 | return present_family; | 69 | return present_family; |
| 70 | } | 70 | } |
| 71 | 71 | ||
| 72 | /// Returns if the device is integrated with the host CPU. | 72 | /// Returns true if the device is integrated with the host CPU. |
| 73 | bool IsIntegrated() const { | 73 | bool IsIntegrated() const { |
| 74 | return device_type == vk::PhysicalDeviceType::eIntegratedGpu; | 74 | return device_type == vk::PhysicalDeviceType::eIntegratedGpu; |
| 75 | } | 75 | } |
| 76 | 76 | ||
| 77 | /// Returns the driver ID. | ||
| 78 | vk::DriverIdKHR GetDriverID() const { | ||
| 79 | return driver_id; | ||
| 80 | } | ||
| 81 | |||
| 77 | /// Returns uniform buffer alignment requeriment. | 82 | /// Returns uniform buffer alignment requeriment. |
| 78 | u64 GetUniformBufferAlignment() const { | 83 | u64 GetUniformBufferAlignment() const { |
| 79 | return uniform_buffer_alignment; | 84 | return uniform_buffer_alignment; |
| 80 | } | 85 | } |
| 81 | 86 | ||
| 87 | /// Returns storage alignment requeriment. | ||
| 88 | u64 GetStorageBufferAlignment() const { | ||
| 89 | return storage_buffer_alignment; | ||
| 90 | } | ||
| 91 | |||
| 82 | /// Returns the maximum range for storage buffers. | 92 | /// Returns the maximum range for storage buffers. |
| 83 | u64 GetMaxStorageBufferRange() const { | 93 | u64 GetMaxStorageBufferRange() const { |
| 84 | return max_storage_buffer_range; | 94 | return max_storage_buffer_range; |
| @@ -89,9 +99,19 @@ public: | |||
| 89 | return is_optimal_astc_supported; | 99 | return is_optimal_astc_supported; |
| 90 | } | 100 | } |
| 91 | 101 | ||
| 102 | /// Returns true if the device supports float16 natively | ||
| 103 | bool IsFloat16Supported() const { | ||
| 104 | return is_float16_supported; | ||
| 105 | } | ||
| 106 | |||
| 92 | /// Returns true if the device supports VK_EXT_scalar_block_layout. | 107 | /// Returns true if the device supports VK_EXT_scalar_block_layout. |
| 93 | bool IsExtScalarBlockLayoutSupported() const { | 108 | bool IsKhrUniformBufferStandardLayoutSupported() const { |
| 94 | return ext_scalar_block_layout; | 109 | return khr_uniform_buffer_standard_layout; |
| 110 | } | ||
| 111 | |||
| 112 | /// Returns true if the device supports VK_EXT_index_type_uint8. | ||
| 113 | bool IsExtIndexTypeUint8Supported() const { | ||
| 114 | return ext_index_type_uint8; | ||
| 95 | } | 115 | } |
| 96 | 116 | ||
| 97 | /// Checks if the physical device is suitable. | 117 | /// Checks if the physical device is suitable. |
| @@ -123,22 +143,28 @@ private: | |||
| 123 | FormatType format_type) const; | 143 | FormatType format_type) const; |
| 124 | 144 | ||
| 125 | /// Returns the device properties for Vulkan formats. | 145 | /// Returns the device properties for Vulkan formats. |
| 126 | static std::map<vk::Format, vk::FormatProperties> GetFormatProperties( | 146 | static std::unordered_map<vk::Format, vk::FormatProperties> GetFormatProperties( |
| 127 | const vk::DispatchLoaderDynamic& dldi, vk::PhysicalDevice physical); | 147 | const vk::DispatchLoaderDynamic& dldi, vk::PhysicalDevice physical); |
| 128 | 148 | ||
| 129 | const vk::PhysicalDevice physical; ///< Physical device. | 149 | const vk::PhysicalDevice physical; ///< Physical device. |
| 130 | vk::DispatchLoaderDynamic dld; ///< Device function pointers. | 150 | vk::DispatchLoaderDynamic dld; ///< Device function pointers. |
| 131 | UniqueDevice logical; ///< Logical device. | 151 | UniqueDevice logical; ///< Logical device. |
| 132 | vk::Queue graphics_queue; ///< Main graphics queue. | 152 | vk::Queue graphics_queue; ///< Main graphics queue. |
| 133 | vk::Queue present_queue; ///< Main present queue. | 153 | vk::Queue present_queue; ///< Main present queue. |
| 134 | u32 graphics_family{}; ///< Main graphics queue family index. | 154 | u32 graphics_family{}; ///< Main graphics queue family index. |
| 135 | u32 present_family{}; ///< Main present queue family index. | 155 | u32 present_family{}; ///< Main present queue family index. |
| 136 | vk::PhysicalDeviceType device_type; ///< Physical device type. | 156 | vk::PhysicalDeviceType device_type; ///< Physical device type. |
| 137 | u64 uniform_buffer_alignment{}; ///< Uniform buffer alignment requeriment. | 157 | vk::DriverIdKHR driver_id{}; ///< Driver ID. |
| 138 | u64 max_storage_buffer_range{}; ///< Max storage buffer size. | 158 | u64 uniform_buffer_alignment{}; ///< Uniform buffer alignment requeriment. |
| 139 | bool is_optimal_astc_supported{}; ///< Support for native ASTC. | 159 | u64 storage_buffer_alignment{}; ///< Storage buffer alignment requeriment. |
| 140 | bool ext_scalar_block_layout{}; ///< Support for VK_EXT_scalar_block_layout. | 160 | u64 max_storage_buffer_range{}; ///< Max storage buffer size. |
| 141 | std::map<vk::Format, vk::FormatProperties> format_properties; ///< Format properties dictionary. | 161 | bool is_optimal_astc_supported{}; ///< Support for native ASTC. |
| 162 | bool is_float16_supported{}; ///< Support for float16 arithmetics. | ||
| 163 | bool khr_uniform_buffer_standard_layout{}; ///< Support for std430 on UBOs. | ||
| 164 | bool ext_index_type_uint8{}; ///< Support for VK_EXT_index_type_uint8. | ||
| 165 | bool khr_driver_properties{}; ///< Support for VK_KHR_driver_properties. | ||
| 166 | std::unordered_map<vk::Format, vk::FormatProperties> | ||
| 167 | format_properties; ///< Format properties dictionary. | ||
| 142 | }; | 168 | }; |
| 143 | 169 | ||
| 144 | } // namespace Vulkan | 170 | } // namespace Vulkan |
diff --git a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp index a35b45c9c..b9153934e 100644 --- a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp +++ b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp | |||
| @@ -370,8 +370,8 @@ private: | |||
| 370 | u32 binding = const_buffers_base_binding; | 370 | u32 binding = const_buffers_base_binding; |
| 371 | for (const auto& entry : ir.GetConstantBuffers()) { | 371 | for (const auto& entry : ir.GetConstantBuffers()) { |
| 372 | const auto [index, size] = entry; | 372 | const auto [index, size] = entry; |
| 373 | const Id type = | 373 | const Id type = device.IsKhrUniformBufferStandardLayoutSupported() ? t_cbuf_scalar_ubo |
| 374 | device.IsExtScalarBlockLayoutSupported() ? t_cbuf_scalar_ubo : t_cbuf_std140_ubo; | 374 | : t_cbuf_std140_ubo; |
| 375 | const Id id = OpVariable(type, spv::StorageClass::Uniform); | 375 | const Id id = OpVariable(type, spv::StorageClass::Uniform); |
| 376 | AddGlobalVariable(Name(id, fmt::format("cbuf_{}", index))); | 376 | AddGlobalVariable(Name(id, fmt::format("cbuf_{}", index))); |
| 377 | 377 | ||
| @@ -565,7 +565,7 @@ private: | |||
| 565 | const Id buffer_id = constant_buffers.at(cbuf->GetIndex()); | 565 | const Id buffer_id = constant_buffers.at(cbuf->GetIndex()); |
| 566 | 566 | ||
| 567 | Id pointer{}; | 567 | Id pointer{}; |
| 568 | if (device.IsExtScalarBlockLayoutSupported()) { | 568 | if (device.IsKhrUniformBufferStandardLayoutSupported()) { |
| 569 | const Id buffer_offset = Emit(OpShiftRightLogical( | 569 | const Id buffer_offset = Emit(OpShiftRightLogical( |
| 570 | t_uint, BitcastTo<Type::Uint>(Visit(offset)), Constant(t_uint, 2u))); | 570 | t_uint, BitcastTo<Type::Uint>(Visit(offset)), Constant(t_uint, 2u))); |
| 571 | pointer = Emit( | 571 | pointer = Emit( |
| @@ -944,6 +944,41 @@ private: | |||
| 944 | return {}; | 944 | return {}; |
| 945 | } | 945 | } |
| 946 | 946 | ||
| 947 | Id AtomicImageAdd(Operation operation) { | ||
| 948 | UNIMPLEMENTED(); | ||
| 949 | return {}; | ||
| 950 | } | ||
| 951 | |||
| 952 | Id AtomicImageMin(Operation operation) { | ||
| 953 | UNIMPLEMENTED(); | ||
| 954 | return {}; | ||
| 955 | } | ||
| 956 | |||
| 957 | Id AtomicImageMax(Operation operation) { | ||
| 958 | UNIMPLEMENTED(); | ||
| 959 | return {}; | ||
| 960 | } | ||
| 961 | |||
| 962 | Id AtomicImageAnd(Operation operation) { | ||
| 963 | UNIMPLEMENTED(); | ||
| 964 | return {}; | ||
| 965 | } | ||
| 966 | |||
| 967 | Id AtomicImageOr(Operation operation) { | ||
| 968 | UNIMPLEMENTED(); | ||
| 969 | return {}; | ||
| 970 | } | ||
| 971 | |||
| 972 | Id AtomicImageXor(Operation operation) { | ||
| 973 | UNIMPLEMENTED(); | ||
| 974 | return {}; | ||
| 975 | } | ||
| 976 | |||
| 977 | Id AtomicImageExchange(Operation operation) { | ||
| 978 | UNIMPLEMENTED(); | ||
| 979 | return {}; | ||
| 980 | } | ||
| 981 | |||
| 947 | Id Branch(Operation operation) { | 982 | Id Branch(Operation operation) { |
| 948 | const auto target = std::get_if<ImmediateNode>(&*operation[0]); | 983 | const auto target = std::get_if<ImmediateNode>(&*operation[0]); |
| 949 | UNIMPLEMENTED_IF(!target); | 984 | UNIMPLEMENTED_IF(!target); |
| @@ -1366,6 +1401,13 @@ private: | |||
| 1366 | &SPIRVDecompiler::TexelFetch, | 1401 | &SPIRVDecompiler::TexelFetch, |
| 1367 | 1402 | ||
| 1368 | &SPIRVDecompiler::ImageStore, | 1403 | &SPIRVDecompiler::ImageStore, |
| 1404 | &SPIRVDecompiler::AtomicImageAdd, | ||
| 1405 | &SPIRVDecompiler::AtomicImageMin, | ||
| 1406 | &SPIRVDecompiler::AtomicImageMax, | ||
| 1407 | &SPIRVDecompiler::AtomicImageAnd, | ||
| 1408 | &SPIRVDecompiler::AtomicImageOr, | ||
| 1409 | &SPIRVDecompiler::AtomicImageXor, | ||
| 1410 | &SPIRVDecompiler::AtomicImageExchange, | ||
| 1369 | 1411 | ||
| 1370 | &SPIRVDecompiler::Branch, | 1412 | &SPIRVDecompiler::Branch, |
| 1371 | &SPIRVDecompiler::BranchIndirect, | 1413 | &SPIRVDecompiler::BranchIndirect, |
diff --git a/src/video_core/shader/decode/image.cpp b/src/video_core/shader/decode/image.cpp index 008109a99..d54fb88c9 100644 --- a/src/video_core/shader/decode/image.cpp +++ b/src/video_core/shader/decode/image.cpp | |||
| @@ -44,7 +44,6 @@ u32 ShaderIR::DecodeImage(NodeBlock& bb, u32 pc) { | |||
| 44 | switch (opcode->get().GetId()) { | 44 | switch (opcode->get().GetId()) { |
| 45 | case OpCode::Id::SUST: { | 45 | case OpCode::Id::SUST: { |
| 46 | UNIMPLEMENTED_IF(instr.sust.mode != Tegra::Shader::SurfaceDataMode::P); | 46 | UNIMPLEMENTED_IF(instr.sust.mode != Tegra::Shader::SurfaceDataMode::P); |
| 47 | UNIMPLEMENTED_IF(instr.sust.image_type == Tegra::Shader::ImageType::TextureBuffer); | ||
| 48 | UNIMPLEMENTED_IF(instr.sust.out_of_bounds_store != Tegra::Shader::OutOfBoundsStore::Ignore); | 47 | UNIMPLEMENTED_IF(instr.sust.out_of_bounds_store != Tegra::Shader::OutOfBoundsStore::Ignore); |
| 49 | UNIMPLEMENTED_IF(instr.sust.component_mask_selector != 0xf); // Ensure we have an RGBA store | 48 | UNIMPLEMENTED_IF(instr.sust.component_mask_selector != 0xf); // Ensure we have an RGBA store |
| 50 | 49 | ||
| @@ -66,8 +65,46 @@ u32 ShaderIR::DecodeImage(NodeBlock& bb, u32 pc) { | |||
| 66 | image.MarkWrite(); | 65 | image.MarkWrite(); |
| 67 | 66 | ||
| 68 | MetaImage meta{image, values}; | 67 | MetaImage meta{image, values}; |
| 69 | const Node store{Operation(OperationCode::ImageStore, meta, std::move(coords))}; | 68 | bb.push_back(Operation(OperationCode::ImageStore, meta, std::move(coords))); |
| 70 | bb.push_back(store); | 69 | break; |
| 70 | } | ||
| 71 | case OpCode::Id::SUATOM: { | ||
| 72 | UNIMPLEMENTED_IF(instr.suatom_d.is_ba != 0); | ||
| 73 | |||
| 74 | Node value = GetRegister(instr.gpr0); | ||
| 75 | |||
| 76 | std::vector<Node> coords; | ||
| 77 | const std::size_t num_coords{GetImageTypeNumCoordinates(instr.sust.image_type)}; | ||
| 78 | for (std::size_t i = 0; i < num_coords; ++i) { | ||
| 79 | coords.push_back(GetRegister(instr.gpr8.Value() + i)); | ||
| 80 | } | ||
| 81 | |||
| 82 | const OperationCode operation_code = [instr] { | ||
| 83 | switch (instr.suatom_d.operation) { | ||
| 84 | case Tegra::Shader::ImageAtomicOperation::Add: | ||
| 85 | return OperationCode::AtomicImageAdd; | ||
| 86 | case Tegra::Shader::ImageAtomicOperation::Min: | ||
| 87 | return OperationCode::AtomicImageMin; | ||
| 88 | case Tegra::Shader::ImageAtomicOperation::Max: | ||
| 89 | return OperationCode::AtomicImageMax; | ||
| 90 | case Tegra::Shader::ImageAtomicOperation::And: | ||
| 91 | return OperationCode::AtomicImageAnd; | ||
| 92 | case Tegra::Shader::ImageAtomicOperation::Or: | ||
| 93 | return OperationCode::AtomicImageOr; | ||
| 94 | case Tegra::Shader::ImageAtomicOperation::Xor: | ||
| 95 | return OperationCode::AtomicImageXor; | ||
| 96 | case Tegra::Shader::ImageAtomicOperation::Exch: | ||
| 97 | return OperationCode::AtomicImageExchange; | ||
| 98 | default: | ||
| 99 | UNIMPLEMENTED_MSG("Unimplemented operation={}", | ||
| 100 | static_cast<u32>(instr.suatom_d.operation.Value())); | ||
| 101 | return OperationCode::AtomicImageAdd; | ||
| 102 | } | ||
| 103 | }(); | ||
| 104 | |||
| 105 | const auto& image{GetImage(instr.image, instr.suatom_d.image_type, instr.suatom_d.size)}; | ||
| 106 | MetaImage meta{image, {std::move(value)}}; | ||
| 107 | SetRegister(bb, instr.gpr0, Operation(operation_code, meta, std::move(coords))); | ||
| 71 | break; | 108 | break; |
| 72 | } | 109 | } |
| 73 | default: | 110 | default: |
| @@ -77,38 +114,51 @@ u32 ShaderIR::DecodeImage(NodeBlock& bb, u32 pc) { | |||
| 77 | return pc; | 114 | return pc; |
| 78 | } | 115 | } |
| 79 | 116 | ||
| 80 | Image& ShaderIR::GetImage(Tegra::Shader::Image image, Tegra::Shader::ImageType type) { | 117 | Image& ShaderIR::GetImage(Tegra::Shader::Image image, Tegra::Shader::ImageType type, |
| 81 | const auto offset{static_cast<u64>(image.index.Value())}; | 118 | std::optional<Tegra::Shader::ImageAtomicSize> size) { |
| 82 | 119 | const auto offset{static_cast<std::size_t>(image.index.Value())}; | |
| 83 | // If this image has already been used, return the existing mapping. | 120 | if (const auto image = TryUseExistingImage(offset, type, size)) { |
| 84 | const auto it = used_images.find(offset); | 121 | return *image; |
| 85 | if (it != used_images.end()) { | ||
| 86 | ASSERT(it->second.GetType() == type); | ||
| 87 | return it->second; | ||
| 88 | } | 122 | } |
| 89 | 123 | ||
| 90 | // Otherwise create a new mapping for this image. | ||
| 91 | const std::size_t next_index{used_images.size()}; | 124 | const std::size_t next_index{used_images.size()}; |
| 92 | return used_images.emplace(offset, Image{offset, next_index, type}).first->second; | 125 | return used_images.emplace(offset, Image{offset, next_index, type, size}).first->second; |
| 93 | } | 126 | } |
| 94 | 127 | ||
| 95 | Image& ShaderIR::GetBindlessImage(Tegra::Shader::Register reg, Tegra::Shader::ImageType type) { | 128 | Image& ShaderIR::GetBindlessImage(Tegra::Shader::Register reg, Tegra::Shader::ImageType type, |
| 129 | std::optional<Tegra::Shader::ImageAtomicSize> size) { | ||
| 96 | const Node image_register{GetRegister(reg)}; | 130 | const Node image_register{GetRegister(reg)}; |
| 97 | const auto [base_image, cbuf_index, cbuf_offset]{ | 131 | const auto [base_image, cbuf_index, cbuf_offset]{ |
| 98 | TrackCbuf(image_register, global_code, static_cast<s64>(global_code.size()))}; | 132 | TrackCbuf(image_register, global_code, static_cast<s64>(global_code.size()))}; |
| 99 | const auto cbuf_key{(static_cast<u64>(cbuf_index) << 32) | static_cast<u64>(cbuf_offset)}; | 133 | const auto cbuf_key{(static_cast<u64>(cbuf_index) << 32) | static_cast<u64>(cbuf_offset)}; |
| 100 | 134 | ||
| 101 | // If this image has already been used, return the existing mapping. | 135 | if (const auto image = TryUseExistingImage(cbuf_key, type, size)) { |
| 102 | const auto it = used_images.find(cbuf_key); | 136 | return *image; |
| 103 | if (it != used_images.end()) { | ||
| 104 | ASSERT(it->second.GetType() == type); | ||
| 105 | return it->second; | ||
| 106 | } | 137 | } |
| 107 | 138 | ||
| 108 | // Otherwise create a new mapping for this image. | ||
| 109 | const std::size_t next_index{used_images.size()}; | 139 | const std::size_t next_index{used_images.size()}; |
| 110 | return used_images.emplace(cbuf_key, Image{cbuf_index, cbuf_offset, next_index, type}) | 140 | return used_images.emplace(cbuf_key, Image{cbuf_index, cbuf_offset, next_index, type, size}) |
| 111 | .first->second; | 141 | .first->second; |
| 112 | } | 142 | } |
| 113 | 143 | ||
| 144 | Image* ShaderIR::TryUseExistingImage(u64 offset, Tegra::Shader::ImageType type, | ||
| 145 | std::optional<Tegra::Shader::ImageAtomicSize> size) { | ||
| 146 | auto it = used_images.find(offset); | ||
| 147 | if (it == used_images.end()) { | ||
| 148 | return nullptr; | ||
| 149 | } | ||
| 150 | auto& image = it->second; | ||
| 151 | ASSERT(image.GetType() == type); | ||
| 152 | |||
| 153 | if (size) { | ||
| 154 | // We know the size, if it's known it has to be the same as before, otherwise we can set it. | ||
| 155 | if (image.IsSizeKnown()) { | ||
| 156 | ASSERT(image.GetSize() == size); | ||
| 157 | } else { | ||
| 158 | image.SetSize(*size); | ||
| 159 | } | ||
| 160 | } | ||
| 161 | return ℑ | ||
| 162 | } | ||
| 163 | |||
| 114 | } // namespace VideoCommon::Shader | 164 | } // namespace VideoCommon::Shader |
diff --git a/src/video_core/shader/node.h b/src/video_core/shader/node.h index b29aedce8..b47b201cf 100644 --- a/src/video_core/shader/node.h +++ b/src/video_core/shader/node.h | |||
| @@ -7,6 +7,7 @@ | |||
| 7 | #include <array> | 7 | #include <array> |
| 8 | #include <cstddef> | 8 | #include <cstddef> |
| 9 | #include <memory> | 9 | #include <memory> |
| 10 | #include <optional> | ||
| 10 | #include <string> | 11 | #include <string> |
| 11 | #include <tuple> | 12 | #include <tuple> |
| 12 | #include <utility> | 13 | #include <utility> |
| @@ -148,7 +149,14 @@ enum class OperationCode { | |||
| 148 | TextureQueryLod, /// (MetaTexture, float[N] coords) -> float4 | 149 | TextureQueryLod, /// (MetaTexture, float[N] coords) -> float4 |
| 149 | TexelFetch, /// (MetaTexture, int[N], int) -> float4 | 150 | TexelFetch, /// (MetaTexture, int[N], int) -> float4 |
| 150 | 151 | ||
| 151 | ImageStore, /// (MetaImage, float[N] coords) -> void | 152 | ImageStore, /// (MetaImage, int[N] values) -> void |
| 153 | AtomicImageAdd, /// (MetaImage, int[N] coords) -> void | ||
| 154 | AtomicImageMin, /// (MetaImage, int[N] coords) -> void | ||
| 155 | AtomicImageMax, /// (MetaImage, int[N] coords) -> void | ||
| 156 | AtomicImageAnd, /// (MetaImage, int[N] coords) -> void | ||
| 157 | AtomicImageOr, /// (MetaImage, int[N] coords) -> void | ||
| 158 | AtomicImageXor, /// (MetaImage, int[N] coords) -> void | ||
| 159 | AtomicImageExchange, /// (MetaImage, int[N] coords) -> void | ||
| 152 | 160 | ||
| 153 | Branch, /// (uint branch_target) -> void | 161 | Branch, /// (uint branch_target) -> void |
| 154 | BranchIndirect, /// (uint branch_target) -> void | 162 | BranchIndirect, /// (uint branch_target) -> void |
| @@ -275,25 +283,32 @@ private: | |||
| 275 | 283 | ||
| 276 | class Image final { | 284 | class Image final { |
| 277 | public: | 285 | public: |
| 278 | constexpr explicit Image(u64 offset, std::size_t index, Tegra::Shader::ImageType type) | 286 | constexpr explicit Image(std::size_t offset, std::size_t index, Tegra::Shader::ImageType type, |
| 279 | : offset{offset}, index{index}, type{type}, is_bindless{false} {} | 287 | std::optional<Tegra::Shader::ImageAtomicSize> size) |
| 288 | : offset{offset}, index{index}, type{type}, is_bindless{false}, size{size} {} | ||
| 280 | 289 | ||
| 281 | constexpr explicit Image(u32 cbuf_index, u32 cbuf_offset, std::size_t index, | 290 | constexpr explicit Image(u32 cbuf_index, u32 cbuf_offset, std::size_t index, |
| 282 | Tegra::Shader::ImageType type) | 291 | Tegra::Shader::ImageType type, |
| 292 | std::optional<Tegra::Shader::ImageAtomicSize> size) | ||
| 283 | : offset{(static_cast<u64>(cbuf_index) << 32) | cbuf_offset}, index{index}, type{type}, | 293 | : offset{(static_cast<u64>(cbuf_index) << 32) | cbuf_offset}, index{index}, type{type}, |
| 284 | is_bindless{true} {} | 294 | is_bindless{true}, size{size} {} |
| 285 | 295 | ||
| 286 | constexpr explicit Image(std::size_t offset, std::size_t index, Tegra::Shader::ImageType type, | 296 | constexpr explicit Image(std::size_t offset, std::size_t index, Tegra::Shader::ImageType type, |
| 287 | bool is_bindless, bool is_written, bool is_read) | 297 | bool is_bindless, bool is_written, bool is_read, |
| 298 | std::optional<Tegra::Shader::ImageAtomicSize> size) | ||
| 288 | : offset{offset}, index{index}, type{type}, is_bindless{is_bindless}, | 299 | : offset{offset}, index{index}, type{type}, is_bindless{is_bindless}, |
| 289 | is_written{is_written}, is_read{is_read} {} | 300 | is_written{is_written}, is_read{is_read}, size{size} {} |
| 301 | |||
| 302 | void MarkWrite() { | ||
| 303 | is_written = true; | ||
| 304 | } | ||
| 290 | 305 | ||
| 291 | void MarkRead() { | 306 | void MarkRead() { |
| 292 | is_read = true; | 307 | is_read = true; |
| 293 | } | 308 | } |
| 294 | 309 | ||
| 295 | void MarkWrite() { | 310 | void SetSize(Tegra::Shader::ImageAtomicSize size_) { |
| 296 | is_written = true; | 311 | size = size_; |
| 297 | } | 312 | } |
| 298 | 313 | ||
| 299 | constexpr std::size_t GetOffset() const { | 314 | constexpr std::size_t GetOffset() const { |
| @@ -312,25 +327,39 @@ public: | |||
| 312 | return is_bindless; | 327 | return is_bindless; |
| 313 | } | 328 | } |
| 314 | 329 | ||
| 315 | constexpr bool IsRead() const { | ||
| 316 | return is_read; | ||
| 317 | } | ||
| 318 | |||
| 319 | constexpr bool IsWritten() const { | 330 | constexpr bool IsWritten() const { |
| 320 | return is_written; | 331 | return is_written; |
| 321 | } | 332 | } |
| 322 | 333 | ||
| 334 | constexpr bool IsRead() const { | ||
| 335 | return is_read; | ||
| 336 | } | ||
| 337 | |||
| 323 | constexpr std::pair<u32, u32> GetBindlessCBuf() const { | 338 | constexpr std::pair<u32, u32> GetBindlessCBuf() const { |
| 324 | return {static_cast<u32>(offset >> 32), static_cast<u32>(offset)}; | 339 | return {static_cast<u32>(offset >> 32), static_cast<u32>(offset)}; |
| 325 | } | 340 | } |
| 326 | 341 | ||
| 342 | constexpr bool IsSizeKnown() const { | ||
| 343 | return size.has_value(); | ||
| 344 | } | ||
| 345 | |||
| 346 | constexpr Tegra::Shader::ImageAtomicSize GetSize() const { | ||
| 347 | return size.value(); | ||
| 348 | } | ||
| 349 | |||
| 350 | constexpr bool operator<(const Image& rhs) const { | ||
| 351 | return std::tie(offset, index, type, size, is_bindless) < | ||
| 352 | std::tie(rhs.offset, rhs.index, rhs.type, rhs.size, rhs.is_bindless); | ||
| 353 | } | ||
| 354 | |||
| 327 | private: | 355 | private: |
| 328 | u64 offset{}; | 356 | u64 offset{}; |
| 329 | std::size_t index{}; | 357 | std::size_t index{}; |
| 330 | Tegra::Shader::ImageType type{}; | 358 | Tegra::Shader::ImageType type{}; |
| 331 | bool is_bindless{}; | 359 | bool is_bindless{}; |
| 332 | bool is_read{}; | ||
| 333 | bool is_written{}; | 360 | bool is_written{}; |
| 361 | bool is_read{}; | ||
| 362 | std::optional<Tegra::Shader::ImageAtomicSize> size{}; | ||
| 334 | }; | 363 | }; |
| 335 | 364 | ||
| 336 | struct GlobalMemoryBase { | 365 | struct GlobalMemoryBase { |
diff --git a/src/video_core/shader/shader_ir.h b/src/video_core/shader/shader_ir.h index 0f891eace..62816bd56 100644 --- a/src/video_core/shader/shader_ir.h +++ b/src/video_core/shader/shader_ir.h | |||
| @@ -272,10 +272,16 @@ private: | |||
| 272 | bool is_shadow); | 272 | bool is_shadow); |
| 273 | 273 | ||
| 274 | /// Accesses an image. | 274 | /// Accesses an image. |
| 275 | Image& GetImage(Tegra::Shader::Image image, Tegra::Shader::ImageType type); | 275 | Image& GetImage(Tegra::Shader::Image image, Tegra::Shader::ImageType type, |
| 276 | std::optional<Tegra::Shader::ImageAtomicSize> size = {}); | ||
| 276 | 277 | ||
| 277 | /// Access a bindless image sampler. | 278 | /// Access a bindless image sampler. |
| 278 | Image& GetBindlessImage(Tegra::Shader::Register reg, Tegra::Shader::ImageType type); | 279 | Image& GetBindlessImage(Tegra::Shader::Register reg, Tegra::Shader::ImageType type, |
| 280 | std::optional<Tegra::Shader::ImageAtomicSize> size = {}); | ||
| 281 | |||
| 282 | /// Tries to access an existing image, updating it's state as needed | ||
| 283 | Image* TryUseExistingImage(u64 offset, Tegra::Shader::ImageType type, | ||
| 284 | std::optional<Tegra::Shader::ImageAtomicSize> size); | ||
| 279 | 285 | ||
| 280 | /// Extracts a sequence of bits from a node | 286 | /// Extracts a sequence of bits from a node |
| 281 | Node BitfieldExtract(Node value, u32 offset, u32 bits); | 287 | Node BitfieldExtract(Node value, u32 offset, u32 bits); |
diff --git a/src/video_core/surface.cpp b/src/video_core/surface.cpp index 4ceb219be..53d0142cb 100644 --- a/src/video_core/surface.cpp +++ b/src/video_core/surface.cpp | |||
| @@ -513,6 +513,26 @@ bool IsPixelFormatASTC(PixelFormat format) { | |||
| 513 | } | 513 | } |
| 514 | } | 514 | } |
| 515 | 515 | ||
| 516 | bool IsPixelFormatSRGB(PixelFormat format) { | ||
| 517 | switch (format) { | ||
| 518 | case PixelFormat::RGBA8_SRGB: | ||
| 519 | case PixelFormat::BGRA8_SRGB: | ||
| 520 | case PixelFormat::DXT1_SRGB: | ||
| 521 | case PixelFormat::DXT23_SRGB: | ||
| 522 | case PixelFormat::DXT45_SRGB: | ||
| 523 | case PixelFormat::BC7U_SRGB: | ||
| 524 | case PixelFormat::ASTC_2D_4X4_SRGB: | ||
| 525 | case PixelFormat::ASTC_2D_8X8_SRGB: | ||
| 526 | case PixelFormat::ASTC_2D_8X5_SRGB: | ||
| 527 | case PixelFormat::ASTC_2D_5X4_SRGB: | ||
| 528 | case PixelFormat::ASTC_2D_5X5_SRGB: | ||
| 529 | case PixelFormat::ASTC_2D_10X8_SRGB: | ||
| 530 | return true; | ||
| 531 | default: | ||
| 532 | return false; | ||
| 533 | } | ||
| 534 | } | ||
| 535 | |||
| 516 | std::pair<u32, u32> GetASTCBlockSize(PixelFormat format) { | 536 | std::pair<u32, u32> GetASTCBlockSize(PixelFormat format) { |
| 517 | return {GetDefaultBlockWidth(format), GetDefaultBlockHeight(format)}; | 537 | return {GetDefaultBlockWidth(format), GetDefaultBlockHeight(format)}; |
| 518 | } | 538 | } |
diff --git a/src/video_core/surface.h b/src/video_core/surface.h index 83f31c12c..19268b7cd 100644 --- a/src/video_core/surface.h +++ b/src/video_core/surface.h | |||
| @@ -547,6 +547,8 @@ SurfaceType GetFormatType(PixelFormat pixel_format); | |||
| 547 | 547 | ||
| 548 | bool IsPixelFormatASTC(PixelFormat format); | 548 | bool IsPixelFormatASTC(PixelFormat format); |
| 549 | 549 | ||
| 550 | bool IsPixelFormatSRGB(PixelFormat format); | ||
| 551 | |||
| 550 | std::pair<u32, u32> GetASTCBlockSize(PixelFormat format); | 552 | std::pair<u32, u32> GetASTCBlockSize(PixelFormat format); |
| 551 | 553 | ||
| 552 | /// Returns true if the specified PixelFormat is a BCn format, e.g. DXT or DXN | 554 | /// Returns true if the specified PixelFormat is a BCn format, e.g. DXT or DXN |