diff options
Diffstat (limited to 'src')
| -rw-r--r-- | src/video_core/engines/shader_bytecode.h | 8 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_device.cpp | 22 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_device.h | 5 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_shader_cache.cpp | 16 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_shader_decompiler.cpp | 147 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_shader_decompiler.h | 1 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_shader_disk_cache.cpp | 22 | ||||
| -rw-r--r-- | src/video_core/renderer_vulkan/vk_shader_decompiler.cpp | 15 | ||||
| -rw-r--r-- | src/video_core/shader/decode/image.cpp | 137 | ||||
| -rw-r--r-- | src/video_core/shader/node.h | 46 | ||||
| -rw-r--r-- | src/video_core/shader/shader_ir.h | 9 |
11 files changed, 199 insertions, 229 deletions
diff --git a/src/video_core/engines/shader_bytecode.h b/src/video_core/engines/shader_bytecode.h index 28272ef6f..7a6355ce2 100644 --- a/src/video_core/engines/shader_bytecode.h +++ b/src/video_core/engines/shader_bytecode.h | |||
| @@ -544,7 +544,7 @@ enum class VoteOperation : u64 { | |||
| 544 | Eq = 2, // allThreadsEqualNV | 544 | Eq = 2, // allThreadsEqualNV |
| 545 | }; | 545 | }; |
| 546 | 546 | ||
| 547 | enum class ImageAtomicSize : u64 { | 547 | enum class ImageAtomicOperationType : u64 { |
| 548 | U32 = 0, | 548 | U32 = 0, |
| 549 | S32 = 1, | 549 | S32 = 1, |
| 550 | U64 = 2, | 550 | U64 = 2, |
| @@ -1432,11 +1432,11 @@ union Instruction { | |||
| 1432 | ASSERT(mode == SurfaceDataMode::D_BA); | 1432 | ASSERT(mode == SurfaceDataMode::D_BA); |
| 1433 | return store_data_layout; | 1433 | return store_data_layout; |
| 1434 | } | 1434 | } |
| 1435 | } sust; | 1435 | } suldst; |
| 1436 | 1436 | ||
| 1437 | union { | 1437 | union { |
| 1438 | BitField<28, 1, u64> is_ba; | 1438 | BitField<28, 1, u64> is_ba; |
| 1439 | BitField<51, 3, ImageAtomicSize> size; | 1439 | BitField<51, 3, ImageAtomicOperationType> operation_type; |
| 1440 | BitField<33, 3, ImageType> image_type; | 1440 | BitField<33, 3, ImageType> image_type; |
| 1441 | BitField<29, 4, ImageAtomicOperation> operation; | 1441 | BitField<29, 4, ImageAtomicOperation> operation; |
| 1442 | BitField<49, 2, OutOfBoundsStore> out_of_bounds_store; | 1442 | BitField<49, 2, OutOfBoundsStore> out_of_bounds_store; |
| @@ -1595,6 +1595,7 @@ public: | |||
| 1595 | TMML_B, // Texture Mip Map Level | 1595 | TMML_B, // Texture Mip Map Level |
| 1596 | TMML, // Texture Mip Map Level | 1596 | TMML, // Texture Mip Map Level |
| 1597 | SUST, // Surface Store | 1597 | SUST, // Surface Store |
| 1598 | SULD, // Surface Load | ||
| 1598 | SUATOM, // Surface Atomic Operation | 1599 | SUATOM, // Surface Atomic Operation |
| 1599 | EXIT, | 1600 | EXIT, |
| 1600 | NOP, | 1601 | NOP, |
| @@ -1884,6 +1885,7 @@ private: | |||
| 1884 | INST("110111110110----", Id::TMML_B, Type::Texture, "TMML_B"), | 1885 | INST("110111110110----", Id::TMML_B, Type::Texture, "TMML_B"), |
| 1885 | INST("1101111101011---", Id::TMML, Type::Texture, "TMML"), | 1886 | INST("1101111101011---", Id::TMML, Type::Texture, "TMML"), |
| 1886 | INST("11101011001-----", Id::SUST, Type::Image, "SUST"), | 1887 | INST("11101011001-----", Id::SUST, Type::Image, "SUST"), |
| 1888 | INST("11101011000-----", Id::SULD, Type::Image, "SULD"), | ||
| 1887 | INST("1110101000------", Id::SUATOM, Type::Image, "SUATOM_D"), | 1889 | INST("1110101000------", Id::SUATOM, Type::Image, "SUATOM_D"), |
| 1888 | INST("0101000010110---", Id::NOP, Type::Trivial, "NOP"), | 1890 | INST("0101000010110---", Id::NOP, Type::Trivial, "NOP"), |
| 1889 | INST("11100000--------", Id::IPA, Type::Trivial, "IPA"), | 1891 | INST("11100000--------", Id::IPA, Type::Trivial, "IPA"), |
diff --git a/src/video_core/renderer_opengl/gl_device.cpp b/src/video_core/renderer_opengl/gl_device.cpp index 4f59a87b4..64de7e425 100644 --- a/src/video_core/renderer_opengl/gl_device.cpp +++ b/src/video_core/renderer_opengl/gl_device.cpp | |||
| @@ -2,8 +2,10 @@ | |||
| 2 | // Licensed under GPLv2 or any later version | 2 | // Licensed under GPLv2 or any later version |
| 3 | // Refer to the license.txt file included. | 3 | // Refer to the license.txt file included. |
| 4 | 4 | ||
| 5 | #include <algorithm> | ||
| 5 | #include <array> | 6 | #include <array> |
| 6 | #include <cstddef> | 7 | #include <cstddef> |
| 8 | #include <vector> | ||
| 7 | #include <glad/glad.h> | 9 | #include <glad/glad.h> |
| 8 | 10 | ||
| 9 | #include "common/logging/log.h" | 11 | #include "common/logging/log.h" |
| @@ -30,9 +32,27 @@ bool TestProgram(const GLchar* glsl) { | |||
| 30 | return link_status == GL_TRUE; | 32 | return link_status == GL_TRUE; |
| 31 | } | 33 | } |
| 32 | 34 | ||
| 35 | std::vector<std::string_view> GetExtensions() { | ||
| 36 | GLint num_extensions; | ||
| 37 | glGetIntegerv(GL_NUM_EXTENSIONS, &num_extensions); | ||
| 38 | std::vector<std::string_view> extensions; | ||
| 39 | extensions.reserve(num_extensions); | ||
| 40 | for (GLint index = 0; index < num_extensions; ++index) { | ||
| 41 | extensions.push_back( | ||
| 42 | reinterpret_cast<const char*>(glGetStringi(GL_EXTENSIONS, static_cast<GLuint>(index)))); | ||
| 43 | } | ||
| 44 | return extensions; | ||
| 45 | } | ||
| 46 | |||
| 47 | bool HasExtension(const std::vector<std::string_view>& images, std::string_view extension) { | ||
| 48 | return std::find(images.begin(), images.end(), extension) != images.end(); | ||
| 49 | } | ||
| 50 | |||
| 33 | } // Anonymous namespace | 51 | } // Anonymous namespace |
| 34 | 52 | ||
| 35 | Device::Device() { | 53 | Device::Device() { |
| 54 | const std::vector extensions = GetExtensions(); | ||
| 55 | |||
| 36 | uniform_buffer_alignment = GetInteger<std::size_t>(GL_UNIFORM_BUFFER_OFFSET_ALIGNMENT); | 56 | uniform_buffer_alignment = GetInteger<std::size_t>(GL_UNIFORM_BUFFER_OFFSET_ALIGNMENT); |
| 37 | shader_storage_alignment = GetInteger<std::size_t>(GL_SHADER_STORAGE_BUFFER_OFFSET_ALIGNMENT); | 57 | shader_storage_alignment = GetInteger<std::size_t>(GL_SHADER_STORAGE_BUFFER_OFFSET_ALIGNMENT); |
| 38 | max_vertex_attributes = GetInteger<u32>(GL_MAX_VERTEX_ATTRIBS); | 58 | max_vertex_attributes = GetInteger<u32>(GL_MAX_VERTEX_ATTRIBS); |
| @@ -40,6 +60,7 @@ Device::Device() { | |||
| 40 | has_warp_intrinsics = GLAD_GL_NV_gpu_shader5 && GLAD_GL_NV_shader_thread_group && | 60 | has_warp_intrinsics = GLAD_GL_NV_gpu_shader5 && GLAD_GL_NV_shader_thread_group && |
| 41 | GLAD_GL_NV_shader_thread_shuffle; | 61 | GLAD_GL_NV_shader_thread_shuffle; |
| 42 | has_vertex_viewport_layer = GLAD_GL_ARB_shader_viewport_layer_array; | 62 | has_vertex_viewport_layer = GLAD_GL_ARB_shader_viewport_layer_array; |
| 63 | has_image_load_formatted = HasExtension(extensions, "GL_EXT_shader_image_load_formatted"); | ||
| 43 | has_variable_aoffi = TestVariableAoffi(); | 64 | has_variable_aoffi = TestVariableAoffi(); |
| 44 | has_component_indexing_bug = TestComponentIndexingBug(); | 65 | has_component_indexing_bug = TestComponentIndexingBug(); |
| 45 | has_precise_bug = TestPreciseBug(); | 66 | has_precise_bug = TestPreciseBug(); |
| @@ -55,6 +76,7 @@ Device::Device(std::nullptr_t) { | |||
| 55 | max_varyings = 15; | 76 | max_varyings = 15; |
| 56 | has_warp_intrinsics = true; | 77 | has_warp_intrinsics = true; |
| 57 | has_vertex_viewport_layer = true; | 78 | has_vertex_viewport_layer = true; |
| 79 | has_image_load_formatted = true; | ||
| 58 | has_variable_aoffi = true; | 80 | has_variable_aoffi = true; |
| 59 | has_component_indexing_bug = false; | 81 | has_component_indexing_bug = false; |
| 60 | has_precise_bug = false; | 82 | has_precise_bug = false; |
diff --git a/src/video_core/renderer_opengl/gl_device.h b/src/video_core/renderer_opengl/gl_device.h index ba6dcd3be..bb273c3d6 100644 --- a/src/video_core/renderer_opengl/gl_device.h +++ b/src/video_core/renderer_opengl/gl_device.h | |||
| @@ -38,6 +38,10 @@ public: | |||
| 38 | return has_vertex_viewport_layer; | 38 | return has_vertex_viewport_layer; |
| 39 | } | 39 | } |
| 40 | 40 | ||
| 41 | bool HasImageLoadFormatted() const { | ||
| 42 | return has_image_load_formatted; | ||
| 43 | } | ||
| 44 | |||
| 41 | bool HasVariableAoffi() const { | 45 | bool HasVariableAoffi() const { |
| 42 | return has_variable_aoffi; | 46 | return has_variable_aoffi; |
| 43 | } | 47 | } |
| @@ -61,6 +65,7 @@ private: | |||
| 61 | u32 max_varyings{}; | 65 | u32 max_varyings{}; |
| 62 | bool has_warp_intrinsics{}; | 66 | bool has_warp_intrinsics{}; |
| 63 | bool has_vertex_viewport_layer{}; | 67 | bool has_vertex_viewport_layer{}; |
| 68 | bool has_image_load_formatted{}; | ||
| 64 | bool has_variable_aoffi{}; | 69 | bool has_variable_aoffi{}; |
| 65 | bool has_component_indexing_bug{}; | 70 | bool has_component_indexing_bug{}; |
| 66 | bool has_precise_bug{}; | 71 | bool has_precise_bug{}; |
diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index 0dbc4c02f..42ca3b1bd 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp | |||
| @@ -211,14 +211,14 @@ CachedProgram SpecializeShader(const std::string& code, const GLShader::ShaderEn | |||
| 211 | const auto primitive_mode{variant.primitive_mode}; | 211 | const auto primitive_mode{variant.primitive_mode}; |
| 212 | const auto texture_buffer_usage{variant.texture_buffer_usage}; | 212 | const auto texture_buffer_usage{variant.texture_buffer_usage}; |
| 213 | 213 | ||
| 214 | std::string source = "#version 430 core\n" | 214 | std::string source = R"(#version 430 core |
| 215 | "#extension GL_ARB_separate_shader_objects : enable\n" | 215 | #extension GL_ARB_separate_shader_objects : enable |
| 216 | "#extension GL_NV_gpu_shader5 : enable\n" | 216 | #extension GL_ARB_shader_viewport_layer_array : enable |
| 217 | "#extension GL_NV_shader_thread_group : enable\n" | 217 | #extension GL_EXT_shader_image_load_formatted : enable |
| 218 | "#extension GL_NV_shader_thread_shuffle : enable\n"; | 218 | #extension GL_NV_gpu_shader5 : enable |
| 219 | if (entries.shader_viewport_layer_array) { | 219 | #extension GL_NV_shader_thread_group : enable |
| 220 | source += "#extension GL_ARB_shader_viewport_layer_array : enable\n"; | 220 | #extension GL_NV_shader_thread_shuffle : enable |
| 221 | } | 221 | )"; |
| 222 | if (program_type == ProgramType::Compute) { | 222 | if (program_type == ProgramType::Compute) { |
| 223 | source += "#extension GL_ARB_compute_variable_group_size : require\n"; | 223 | source += "#extension GL_ARB_compute_variable_group_size : require\n"; |
| 224 | } | 224 | } |
diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp index 74cb59bc1..e6b36a0f2 100644 --- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp +++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp | |||
| @@ -19,6 +19,7 @@ | |||
| 19 | #include "video_core/renderer_opengl/gl_device.h" | 19 | #include "video_core/renderer_opengl/gl_device.h" |
| 20 | #include "video_core/renderer_opengl/gl_rasterizer.h" | 20 | #include "video_core/renderer_opengl/gl_rasterizer.h" |
| 21 | #include "video_core/renderer_opengl/gl_shader_decompiler.h" | 21 | #include "video_core/renderer_opengl/gl_shader_decompiler.h" |
| 22 | #include "video_core/shader/node.h" | ||
| 22 | #include "video_core/shader/shader_ir.h" | 23 | #include "video_core/shader/shader_ir.h" |
| 23 | 24 | ||
| 24 | namespace OpenGL::GLShader { | 25 | namespace OpenGL::GLShader { |
| @@ -398,8 +399,6 @@ public: | |||
| 398 | usage.is_read, usage.is_written); | 399 | usage.is_read, usage.is_written); |
| 399 | } | 400 | } |
| 400 | entries.clip_distances = ir.GetClipDistances(); | 401 | entries.clip_distances = ir.GetClipDistances(); |
| 401 | entries.shader_viewport_layer_array = | ||
| 402 | IsVertexShader(stage) && (ir.UsesLayer() || ir.UsesViewportIndex()); | ||
| 403 | entries.shader_length = ir.GetLength(); | 402 | entries.shader_length = ir.GetLength(); |
| 404 | return entries; | 403 | return entries; |
| 405 | } | 404 | } |
| @@ -725,36 +724,20 @@ private: | |||
| 725 | const char* image_type = [&] { | 724 | const char* image_type = [&] { |
| 726 | switch (image.GetType()) { | 725 | switch (image.GetType()) { |
| 727 | case Tegra::Shader::ImageType::Texture1D: | 726 | case Tegra::Shader::ImageType::Texture1D: |
| 728 | return "image1D"; | 727 | return "1D"; |
| 729 | case Tegra::Shader::ImageType::TextureBuffer: | 728 | case Tegra::Shader::ImageType::TextureBuffer: |
| 730 | return "imageBuffer"; | 729 | return "Buffer"; |
| 731 | case Tegra::Shader::ImageType::Texture1DArray: | 730 | case Tegra::Shader::ImageType::Texture1DArray: |
| 732 | return "image1DArray"; | 731 | return "1DArray"; |
| 733 | case Tegra::Shader::ImageType::Texture2D: | 732 | case Tegra::Shader::ImageType::Texture2D: |
| 734 | return "image2D"; | 733 | return "2D"; |
| 735 | case Tegra::Shader::ImageType::Texture2DArray: | 734 | case Tegra::Shader::ImageType::Texture2DArray: |
| 736 | return "image2DArray"; | 735 | return "2DArray"; |
| 737 | case Tegra::Shader::ImageType::Texture3D: | 736 | case Tegra::Shader::ImageType::Texture3D: |
| 738 | return "image3D"; | 737 | return "3D"; |
| 739 | default: | 738 | default: |
| 740 | UNREACHABLE(); | 739 | UNREACHABLE(); |
| 741 | return "image1D"; | 740 | return "1D"; |
| 742 | } | ||
| 743 | }(); | ||
| 744 | |||
| 745 | const auto [type_prefix, format] = [&]() -> std::pair<const char*, const char*> { | ||
| 746 | if (!image.IsSizeKnown()) { | ||
| 747 | return {"", ""}; | ||
| 748 | } | ||
| 749 | switch (image.GetSize()) { | ||
| 750 | case Tegra::Shader::ImageAtomicSize::U32: | ||
| 751 | return {"u", "r32ui, "}; | ||
| 752 | case Tegra::Shader::ImageAtomicSize::S32: | ||
| 753 | return {"i", "r32i, "}; | ||
| 754 | default: | ||
| 755 | UNIMPLEMENTED_MSG("Unimplemented atomic size={}", | ||
| 756 | static_cast<u32>(image.GetSize())); | ||
| 757 | return {"", ""}; | ||
| 758 | } | 741 | } |
| 759 | }(); | 742 | }(); |
| 760 | 743 | ||
| @@ -765,8 +748,12 @@ private: | |||
| 765 | qualifier += " writeonly"; | 748 | qualifier += " writeonly"; |
| 766 | } | 749 | } |
| 767 | 750 | ||
| 768 | code.AddLine("layout (binding = IMAGE_BINDING_{}) {} uniform " | 751 | std::string format; |
| 769 | "{} {};", | 752 | if (image.IsAtomic()) { |
| 753 | format = "r32ui, "; | ||
| 754 | } | ||
| 755 | |||
| 756 | code.AddLine("layout ({}binding = IMAGE_BINDING_{}) {} uniform uimage{} {};", format, | ||
| 770 | image.GetIndex(), qualifier, image_type, GetImage(image)); | 757 | image.GetIndex(), qualifier, image_type, GetImage(image)); |
| 771 | } | 758 | } |
| 772 | if (!images.empty()) { | 759 | if (!images.empty()) { |
| @@ -1234,28 +1221,13 @@ private: | |||
| 1234 | } | 1221 | } |
| 1235 | 1222 | ||
| 1236 | std::string BuildImageValues(Operation operation) { | 1223 | std::string BuildImageValues(Operation operation) { |
| 1224 | constexpr std::array constructors{"uint", "uvec2", "uvec3", "uvec4"}; | ||
| 1237 | const auto meta{std::get<MetaImage>(operation.GetMeta())}; | 1225 | const auto meta{std::get<MetaImage>(operation.GetMeta())}; |
| 1238 | const auto [constructors, type] = [&]() -> std::pair<std::array<const char*, 4>, Type> { | ||
| 1239 | constexpr std::array float_constructors{"float", "vec2", "vec3", "vec4"}; | ||
| 1240 | if (!meta.image.IsSizeKnown()) { | ||
| 1241 | return {float_constructors, Type::Float}; | ||
| 1242 | } | ||
| 1243 | switch (meta.image.GetSize()) { | ||
| 1244 | case Tegra::Shader::ImageAtomicSize::U32: | ||
| 1245 | return {{"uint", "uvec2", "uvec3", "uvec4"}, Type::Uint}; | ||
| 1246 | case Tegra::Shader::ImageAtomicSize::S32: | ||
| 1247 | return {{"int", "ivec2", "ivec3", "ivec4"}, Type::Uint}; | ||
| 1248 | default: | ||
| 1249 | UNIMPLEMENTED_MSG("Unimplemented image size={}", | ||
| 1250 | static_cast<u32>(meta.image.GetSize())); | ||
| 1251 | return {float_constructors, Type::Float}; | ||
| 1252 | } | ||
| 1253 | }(); | ||
| 1254 | 1226 | ||
| 1255 | const std::size_t values_count{meta.values.size()}; | 1227 | const std::size_t values_count{meta.values.size()}; |
| 1256 | std::string expr = fmt::format("{}(", constructors.at(values_count - 1)); | 1228 | std::string expr = fmt::format("{}(", constructors.at(values_count - 1)); |
| 1257 | for (std::size_t i = 0; i < values_count; ++i) { | 1229 | for (std::size_t i = 0; i < values_count; ++i) { |
| 1258 | expr += Visit(meta.values.at(i)).As(type); | 1230 | expr += Visit(meta.values.at(i)).AsUint(); |
| 1259 | if (i + 1 < values_count) { | 1231 | if (i + 1 < values_count) { |
| 1260 | expr += ", "; | 1232 | expr += ", "; |
| 1261 | } | 1233 | } |
| @@ -1264,29 +1236,6 @@ private: | |||
| 1264 | return expr; | 1236 | return expr; |
| 1265 | } | 1237 | } |
| 1266 | 1238 | ||
| 1267 | Expression AtomicImage(Operation operation, const char* opname) { | ||
| 1268 | constexpr std::array constructors{"int(", "ivec2(", "ivec3(", "ivec4("}; | ||
| 1269 | const auto meta{std::get<MetaImage>(operation.GetMeta())}; | ||
| 1270 | ASSERT(meta.values.size() == 1); | ||
| 1271 | ASSERT(meta.image.IsSizeKnown()); | ||
| 1272 | |||
| 1273 | const auto type = [&]() { | ||
| 1274 | switch (const auto size = meta.image.GetSize()) { | ||
| 1275 | case Tegra::Shader::ImageAtomicSize::U32: | ||
| 1276 | return Type::Uint; | ||
| 1277 | case Tegra::Shader::ImageAtomicSize::S32: | ||
| 1278 | return Type::Int; | ||
| 1279 | default: | ||
| 1280 | UNIMPLEMENTED_MSG("Unimplemented image size={}", static_cast<u32>(size)); | ||
| 1281 | return Type::Uint; | ||
| 1282 | } | ||
| 1283 | }(); | ||
| 1284 | |||
| 1285 | return {fmt::format("{}({}, {}, {})", opname, GetImage(meta.image), | ||
| 1286 | BuildIntegerCoordinates(operation), Visit(meta.values[0]).As(type)), | ||
| 1287 | type}; | ||
| 1288 | } | ||
| 1289 | |||
| 1290 | Expression Assign(Operation operation) { | 1239 | Expression Assign(Operation operation) { |
| 1291 | const Node& dest = operation[0]; | 1240 | const Node& dest = operation[0]; |
| 1292 | const Node& src = operation[1]; | 1241 | const Node& src = operation[1]; |
| @@ -1809,6 +1758,19 @@ private: | |||
| 1809 | return {tmp, Type::Float}; | 1758 | return {tmp, Type::Float}; |
| 1810 | } | 1759 | } |
| 1811 | 1760 | ||
| 1761 | Expression ImageLoad(Operation operation) { | ||
| 1762 | if (!device.HasImageLoadFormatted()) { | ||
| 1763 | LOG_ERROR(Render_OpenGL, | ||
| 1764 | "Device lacks GL_EXT_shader_image_load_formatted, stubbing image load"); | ||
| 1765 | return {"0", Type::Int}; | ||
| 1766 | } | ||
| 1767 | |||
| 1768 | const auto meta{std::get<MetaImage>(operation.GetMeta())}; | ||
| 1769 | return {fmt::format("imageLoad({}, {}){}", GetImage(meta.image), | ||
| 1770 | BuildIntegerCoordinates(operation), GetSwizzle(meta.element)), | ||
| 1771 | Type::Uint}; | ||
| 1772 | } | ||
| 1773 | |||
| 1812 | Expression ImageStore(Operation operation) { | 1774 | Expression ImageStore(Operation operation) { |
| 1813 | const auto meta{std::get<MetaImage>(operation.GetMeta())}; | 1775 | const auto meta{std::get<MetaImage>(operation.GetMeta())}; |
| 1814 | code.AddLine("imageStore({}, {}, {});", GetImage(meta.image), | 1776 | code.AddLine("imageStore({}, {}, {});", GetImage(meta.image), |
| @@ -1816,31 +1778,14 @@ private: | |||
| 1816 | return {}; | 1778 | return {}; |
| 1817 | } | 1779 | } |
| 1818 | 1780 | ||
| 1819 | Expression AtomicImageAdd(Operation operation) { | 1781 | template <const std::string_view& opname> |
| 1820 | return AtomicImage(operation, "imageAtomicAdd"); | 1782 | Expression AtomicImage(Operation operation) { |
| 1821 | } | 1783 | const auto meta{std::get<MetaImage>(operation.GetMeta())}; |
| 1822 | 1784 | ASSERT(meta.values.size() == 1); | |
| 1823 | Expression AtomicImageMin(Operation operation) { | ||
| 1824 | return AtomicImage(operation, "imageAtomicMin"); | ||
| 1825 | } | ||
| 1826 | |||
| 1827 | Expression AtomicImageMax(Operation operation) { | ||
| 1828 | return AtomicImage(operation, "imageAtomicMax"); | ||
| 1829 | } | ||
| 1830 | Expression AtomicImageAnd(Operation operation) { | ||
| 1831 | return AtomicImage(operation, "imageAtomicAnd"); | ||
| 1832 | } | ||
| 1833 | |||
| 1834 | Expression AtomicImageOr(Operation operation) { | ||
| 1835 | return AtomicImage(operation, "imageAtomicOr"); | ||
| 1836 | } | ||
| 1837 | |||
| 1838 | Expression AtomicImageXor(Operation operation) { | ||
| 1839 | return AtomicImage(operation, "imageAtomicXor"); | ||
| 1840 | } | ||
| 1841 | 1785 | ||
| 1842 | Expression AtomicImageExchange(Operation operation) { | 1786 | return {fmt::format("imageAtomic{}({}, {}, {})", opname, GetImage(meta.image), |
| 1843 | return AtomicImage(operation, "imageAtomicExchange"); | 1787 | BuildIntegerCoordinates(operation), Visit(meta.values[0]).AsUint()), |
| 1788 | Type::Uint}; | ||
| 1844 | } | 1789 | } |
| 1845 | 1790 | ||
| 1846 | Expression Branch(Operation operation) { | 1791 | Expression Branch(Operation operation) { |
| @@ -2035,6 +1980,12 @@ private: | |||
| 2035 | Func() = delete; | 1980 | Func() = delete; |
| 2036 | ~Func() = delete; | 1981 | ~Func() = delete; |
| 2037 | 1982 | ||
| 1983 | static constexpr std::string_view Add = "Add"; | ||
| 1984 | static constexpr std::string_view And = "And"; | ||
| 1985 | static constexpr std::string_view Or = "Or"; | ||
| 1986 | static constexpr std::string_view Xor = "Xor"; | ||
| 1987 | static constexpr std::string_view Exchange = "Exchange"; | ||
| 1988 | |||
| 2038 | static constexpr std::string_view ShuffleIndexed = "shuffleNV"; | 1989 | static constexpr std::string_view ShuffleIndexed = "shuffleNV"; |
| 2039 | static constexpr std::string_view ShuffleUp = "shuffleUpNV"; | 1990 | static constexpr std::string_view ShuffleUp = "shuffleUpNV"; |
| 2040 | static constexpr std::string_view ShuffleDown = "shuffleDownNV"; | 1991 | static constexpr std::string_view ShuffleDown = "shuffleDownNV"; |
| @@ -2172,14 +2123,14 @@ private: | |||
| 2172 | &GLSLDecompiler::TextureQueryLod, | 2123 | &GLSLDecompiler::TextureQueryLod, |
| 2173 | &GLSLDecompiler::TexelFetch, | 2124 | &GLSLDecompiler::TexelFetch, |
| 2174 | 2125 | ||
| 2126 | &GLSLDecompiler::ImageLoad, | ||
| 2175 | &GLSLDecompiler::ImageStore, | 2127 | &GLSLDecompiler::ImageStore, |
| 2176 | &GLSLDecompiler::AtomicImageAdd, | 2128 | |
| 2177 | &GLSLDecompiler::AtomicImageMin, | 2129 | &GLSLDecompiler::AtomicImage<Func::Add>, |
| 2178 | &GLSLDecompiler::AtomicImageMax, | 2130 | &GLSLDecompiler::AtomicImage<Func::And>, |
| 2179 | &GLSLDecompiler::AtomicImageAnd, | 2131 | &GLSLDecompiler::AtomicImage<Func::Or>, |
| 2180 | &GLSLDecompiler::AtomicImageOr, | 2132 | &GLSLDecompiler::AtomicImage<Func::Xor>, |
| 2181 | &GLSLDecompiler::AtomicImageXor, | 2133 | &GLSLDecompiler::AtomicImage<Func::Exchange>, |
| 2182 | &GLSLDecompiler::AtomicImageExchange, | ||
| 2183 | 2134 | ||
| 2184 | &GLSLDecompiler::Branch, | 2135 | &GLSLDecompiler::Branch, |
| 2185 | &GLSLDecompiler::BranchIndirect, | 2136 | &GLSLDecompiler::BranchIndirect, |
diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.h b/src/video_core/renderer_opengl/gl_shader_decompiler.h index 2ea02f5bf..e538dc001 100644 --- a/src/video_core/renderer_opengl/gl_shader_decompiler.h +++ b/src/video_core/renderer_opengl/gl_shader_decompiler.h | |||
| @@ -90,7 +90,6 @@ struct ShaderEntries { | |||
| 90 | std::vector<ImageEntry> images; | 90 | std::vector<ImageEntry> images; |
| 91 | std::vector<GlobalMemoryEntry> global_memory_entries; | 91 | std::vector<GlobalMemoryEntry> global_memory_entries; |
| 92 | std::array<bool, Maxwell::NumClipDistances> clip_distances{}; | 92 | std::array<bool, Maxwell::NumClipDistances> clip_distances{}; |
| 93 | bool shader_viewport_layer_array{}; | ||
| 94 | std::size_t shader_length{}; | 93 | std::size_t shader_length{}; |
| 95 | }; | 94 | }; |
| 96 | 95 | ||
diff --git a/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp b/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp index f141c4e3b..6a7012b54 100644 --- a/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp | |||
| @@ -343,20 +343,17 @@ std::optional<ShaderDiskCacheDecompiled> ShaderDiskCacheOpenGL::LoadDecompiledEn | |||
| 343 | u8 is_bindless{}; | 343 | u8 is_bindless{}; |
| 344 | u8 is_written{}; | 344 | u8 is_written{}; |
| 345 | u8 is_read{}; | 345 | u8 is_read{}; |
| 346 | u8 is_size_known{}; | 346 | u8 is_atomic{}; |
| 347 | u32 size{}; | ||
| 348 | if (!LoadObjectFromPrecompiled(offset) || !LoadObjectFromPrecompiled(index) || | 347 | if (!LoadObjectFromPrecompiled(offset) || !LoadObjectFromPrecompiled(index) || |
| 349 | !LoadObjectFromPrecompiled(type) || !LoadObjectFromPrecompiled(is_bindless) || | 348 | !LoadObjectFromPrecompiled(type) || !LoadObjectFromPrecompiled(is_bindless) || |
| 350 | !LoadObjectFromPrecompiled(is_written) || !LoadObjectFromPrecompiled(is_read) || | 349 | !LoadObjectFromPrecompiled(is_written) || !LoadObjectFromPrecompiled(is_read) || |
| 351 | !LoadObjectFromPrecompiled(is_size_known) || !LoadObjectFromPrecompiled(size)) { | 350 | !LoadObjectFromPrecompiled(is_atomic)) { |
| 352 | return {}; | 351 | return {}; |
| 353 | } | 352 | } |
| 354 | entry.entries.images.emplace_back( | 353 | entry.entries.images.emplace_back( |
| 355 | static_cast<std::size_t>(offset), static_cast<std::size_t>(index), | 354 | static_cast<std::size_t>(offset), static_cast<std::size_t>(index), |
| 356 | static_cast<Tegra::Shader::ImageType>(type), is_bindless != 0, is_written != 0, | 355 | static_cast<Tegra::Shader::ImageType>(type), is_bindless != 0, is_written != 0, |
| 357 | is_read != 0, | 356 | is_read != 0, is_atomic != 0); |
| 358 | is_size_known ? std::make_optional(static_cast<Tegra::Shader::ImageAtomicSize>(size)) | ||
| 359 | : std::nullopt); | ||
| 360 | } | 357 | } |
| 361 | 358 | ||
| 362 | u32 global_memory_count{}; | 359 | u32 global_memory_count{}; |
| @@ -382,12 +379,6 @@ std::optional<ShaderDiskCacheDecompiled> ShaderDiskCacheOpenGL::LoadDecompiledEn | |||
| 382 | } | 379 | } |
| 383 | } | 380 | } |
| 384 | 381 | ||
| 385 | bool shader_viewport_layer_array{}; | ||
| 386 | if (!LoadObjectFromPrecompiled(shader_viewport_layer_array)) { | ||
| 387 | return {}; | ||
| 388 | } | ||
| 389 | entry.entries.shader_viewport_layer_array = shader_viewport_layer_array; | ||
| 390 | |||
| 391 | u64 shader_length{}; | 382 | u64 shader_length{}; |
| 392 | if (!LoadObjectFromPrecompiled(shader_length)) { | 383 | if (!LoadObjectFromPrecompiled(shader_length)) { |
| 393 | return {}; | 384 | return {}; |
| @@ -435,14 +426,13 @@ bool ShaderDiskCacheOpenGL::SaveDecompiledFile(u64 unique_identifier, const std: | |||
| 435 | return false; | 426 | return false; |
| 436 | } | 427 | } |
| 437 | for (const auto& image : entries.images) { | 428 | for (const auto& image : entries.images) { |
| 438 | const u32 size = image.IsSizeKnown() ? static_cast<u32>(image.GetSize()) : 0U; | ||
| 439 | if (!SaveObjectToPrecompiled(static_cast<u64>(image.GetOffset())) || | 429 | if (!SaveObjectToPrecompiled(static_cast<u64>(image.GetOffset())) || |
| 440 | !SaveObjectToPrecompiled(static_cast<u64>(image.GetIndex())) || | 430 | !SaveObjectToPrecompiled(static_cast<u64>(image.GetIndex())) || |
| 441 | !SaveObjectToPrecompiled(static_cast<u32>(image.GetType())) || | 431 | !SaveObjectToPrecompiled(static_cast<u32>(image.GetType())) || |
| 442 | !SaveObjectToPrecompiled(static_cast<u8>(image.IsBindless() ? 1 : 0)) || | 432 | !SaveObjectToPrecompiled(static_cast<u8>(image.IsBindless() ? 1 : 0)) || |
| 443 | !SaveObjectToPrecompiled(static_cast<u8>(image.IsWritten() ? 1 : 0)) || | 433 | !SaveObjectToPrecompiled(static_cast<u8>(image.IsWritten() ? 1 : 0)) || |
| 444 | !SaveObjectToPrecompiled(static_cast<u8>(image.IsRead() ? 1 : 0)) || | 434 | !SaveObjectToPrecompiled(static_cast<u8>(image.IsRead() ? 1 : 0)) || |
| 445 | !SaveObjectToPrecompiled(image.IsSizeKnown()) || !SaveObjectToPrecompiled(size)) { | 435 | !SaveObjectToPrecompiled(static_cast<u8>(image.IsAtomic() ? 1 : 0))) { |
| 446 | return false; | 436 | return false; |
| 447 | } | 437 | } |
| 448 | } | 438 | } |
| @@ -464,10 +454,6 @@ bool ShaderDiskCacheOpenGL::SaveDecompiledFile(u64 unique_identifier, const std: | |||
| 464 | } | 454 | } |
| 465 | } | 455 | } |
| 466 | 456 | ||
| 467 | if (!SaveObjectToPrecompiled(entries.shader_viewport_layer_array)) { | ||
| 468 | return false; | ||
| 469 | } | ||
| 470 | |||
| 471 | if (!SaveObjectToPrecompiled(static_cast<u64>(entries.shader_length))) { | 457 | if (!SaveObjectToPrecompiled(static_cast<u64>(entries.shader_length))) { |
| 472 | return false; | 458 | return false; |
| 473 | } | 459 | } |
diff --git a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp index f7fbbb6e4..77fc58f25 100644 --- a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp +++ b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp | |||
| @@ -19,6 +19,7 @@ | |||
| 19 | #include "video_core/engines/shader_header.h" | 19 | #include "video_core/engines/shader_header.h" |
| 20 | #include "video_core/renderer_vulkan/vk_device.h" | 20 | #include "video_core/renderer_vulkan/vk_device.h" |
| 21 | #include "video_core/renderer_vulkan/vk_shader_decompiler.h" | 21 | #include "video_core/renderer_vulkan/vk_shader_decompiler.h" |
| 22 | #include "video_core/shader/node.h" | ||
| 22 | #include "video_core/shader/shader_ir.h" | 23 | #include "video_core/shader/shader_ir.h" |
| 23 | 24 | ||
| 24 | namespace Vulkan::VKShader { | 25 | namespace Vulkan::VKShader { |
| @@ -939,22 +940,17 @@ private: | |||
| 939 | return {}; | 940 | return {}; |
| 940 | } | 941 | } |
| 941 | 942 | ||
| 942 | Id ImageStore(Operation operation) { | 943 | Id ImageLoad(Operation operation) { |
| 943 | UNIMPLEMENTED(); | ||
| 944 | return {}; | ||
| 945 | } | ||
| 946 | |||
| 947 | Id AtomicImageAdd(Operation operation) { | ||
| 948 | UNIMPLEMENTED(); | 944 | UNIMPLEMENTED(); |
| 949 | return {}; | 945 | return {}; |
| 950 | } | 946 | } |
| 951 | 947 | ||
| 952 | Id AtomicImageMin(Operation operation) { | 948 | Id ImageStore(Operation operation) { |
| 953 | UNIMPLEMENTED(); | 949 | UNIMPLEMENTED(); |
| 954 | return {}; | 950 | return {}; |
| 955 | } | 951 | } |
| 956 | 952 | ||
| 957 | Id AtomicImageMax(Operation operation) { | 953 | Id AtomicImageAdd(Operation operation) { |
| 958 | UNIMPLEMENTED(); | 954 | UNIMPLEMENTED(); |
| 959 | return {}; | 955 | return {}; |
| 960 | } | 956 | } |
| @@ -1440,10 +1436,9 @@ private: | |||
| 1440 | &SPIRVDecompiler::TextureQueryLod, | 1436 | &SPIRVDecompiler::TextureQueryLod, |
| 1441 | &SPIRVDecompiler::TexelFetch, | 1437 | &SPIRVDecompiler::TexelFetch, |
| 1442 | 1438 | ||
| 1439 | &SPIRVDecompiler::ImageLoad, | ||
| 1443 | &SPIRVDecompiler::ImageStore, | 1440 | &SPIRVDecompiler::ImageStore, |
| 1444 | &SPIRVDecompiler::AtomicImageAdd, | 1441 | &SPIRVDecompiler::AtomicImageAdd, |
| 1445 | &SPIRVDecompiler::AtomicImageMin, | ||
| 1446 | &SPIRVDecompiler::AtomicImageMax, | ||
| 1447 | &SPIRVDecompiler::AtomicImageAnd, | 1442 | &SPIRVDecompiler::AtomicImageAnd, |
| 1448 | &SPIRVDecompiler::AtomicImageOr, | 1443 | &SPIRVDecompiler::AtomicImageOr, |
| 1449 | &SPIRVDecompiler::AtomicImageXor, | 1444 | &SPIRVDecompiler::AtomicImageXor, |
diff --git a/src/video_core/shader/decode/image.cpp b/src/video_core/shader/decode/image.cpp index d54fb88c9..95ec1cdd9 100644 --- a/src/video_core/shader/decode/image.cpp +++ b/src/video_core/shader/decode/image.cpp | |||
| @@ -41,11 +41,46 @@ u32 ShaderIR::DecodeImage(NodeBlock& bb, u32 pc) { | |||
| 41 | const Instruction instr = {program_code[pc]}; | 41 | const Instruction instr = {program_code[pc]}; |
| 42 | const auto opcode = OpCode::Decode(instr); | 42 | const auto opcode = OpCode::Decode(instr); |
| 43 | 43 | ||
| 44 | const auto GetCoordinates = [this, instr](Tegra::Shader::ImageType image_type) { | ||
| 45 | std::vector<Node> coords; | ||
| 46 | const std::size_t num_coords{GetImageTypeNumCoordinates(image_type)}; | ||
| 47 | coords.reserve(num_coords); | ||
| 48 | for (std::size_t i = 0; i < num_coords; ++i) { | ||
| 49 | coords.push_back(GetRegister(instr.gpr8.Value() + i)); | ||
| 50 | } | ||
| 51 | return coords; | ||
| 52 | }; | ||
| 53 | |||
| 44 | switch (opcode->get().GetId()) { | 54 | switch (opcode->get().GetId()) { |
| 55 | case OpCode::Id::SULD: { | ||
| 56 | UNIMPLEMENTED_IF(instr.suldst.mode != Tegra::Shader::SurfaceDataMode::P); | ||
| 57 | UNIMPLEMENTED_IF(instr.suldst.out_of_bounds_store != | ||
| 58 | Tegra::Shader::OutOfBoundsStore::Ignore); | ||
| 59 | |||
| 60 | const auto type{instr.suldst.image_type}; | ||
| 61 | auto& image{instr.suldst.is_immediate ? GetImage(instr.image, type) | ||
| 62 | : GetBindlessImage(instr.gpr39, type)}; | ||
| 63 | image.MarkRead(); | ||
| 64 | |||
| 65 | u32 indexer = 0; | ||
| 66 | for (u32 element = 0; element < 4; ++element) { | ||
| 67 | if (!instr.suldst.IsComponentEnabled(element)) { | ||
| 68 | continue; | ||
| 69 | } | ||
| 70 | MetaImage meta{image, {}, element}; | ||
| 71 | Node value = Operation(OperationCode::ImageLoad, meta, GetCoordinates(type)); | ||
| 72 | SetTemporary(bb, indexer++, std::move(value)); | ||
| 73 | } | ||
| 74 | for (u32 i = 0; i < indexer; ++i) { | ||
| 75 | SetRegister(bb, instr.gpr0.Value() + i, GetTemporary(i)); | ||
| 76 | } | ||
| 77 | break; | ||
| 78 | } | ||
| 45 | case OpCode::Id::SUST: { | 79 | case OpCode::Id::SUST: { |
| 46 | UNIMPLEMENTED_IF(instr.sust.mode != Tegra::Shader::SurfaceDataMode::P); | 80 | UNIMPLEMENTED_IF(instr.suldst.mode != Tegra::Shader::SurfaceDataMode::P); |
| 47 | UNIMPLEMENTED_IF(instr.sust.out_of_bounds_store != Tegra::Shader::OutOfBoundsStore::Ignore); | 81 | UNIMPLEMENTED_IF(instr.suldst.out_of_bounds_store != |
| 48 | UNIMPLEMENTED_IF(instr.sust.component_mask_selector != 0xf); // Ensure we have an RGBA store | 82 | Tegra::Shader::OutOfBoundsStore::Ignore); |
| 83 | UNIMPLEMENTED_IF(instr.suldst.component_mask_selector != 0xf); // Ensure we have RGBA | ||
| 49 | 84 | ||
| 50 | std::vector<Node> values; | 85 | std::vector<Node> values; |
| 51 | constexpr std::size_t hardcoded_size{4}; | 86 | constexpr std::size_t hardcoded_size{4}; |
| @@ -53,58 +88,51 @@ u32 ShaderIR::DecodeImage(NodeBlock& bb, u32 pc) { | |||
| 53 | values.push_back(GetRegister(instr.gpr0.Value() + i)); | 88 | values.push_back(GetRegister(instr.gpr0.Value() + i)); |
| 54 | } | 89 | } |
| 55 | 90 | ||
| 56 | std::vector<Node> coords; | 91 | const auto type{instr.suldst.image_type}; |
| 57 | const std::size_t num_coords{GetImageTypeNumCoordinates(instr.sust.image_type)}; | 92 | auto& image{instr.suldst.is_immediate ? GetImage(instr.image, type) |
| 58 | for (std::size_t i = 0; i < num_coords; ++i) { | 93 | : GetBindlessImage(instr.gpr39, type)}; |
| 59 | coords.push_back(GetRegister(instr.gpr8.Value() + i)); | ||
| 60 | } | ||
| 61 | |||
| 62 | const auto type{instr.sust.image_type}; | ||
| 63 | auto& image{instr.sust.is_immediate ? GetImage(instr.image, type) | ||
| 64 | : GetBindlessImage(instr.gpr39, type)}; | ||
| 65 | image.MarkWrite(); | 94 | image.MarkWrite(); |
| 66 | 95 | ||
| 67 | MetaImage meta{image, values}; | 96 | MetaImage meta{image, std::move(values)}; |
| 68 | bb.push_back(Operation(OperationCode::ImageStore, meta, std::move(coords))); | 97 | bb.push_back(Operation(OperationCode::ImageStore, meta, GetCoordinates(type))); |
| 69 | break; | 98 | break; |
| 70 | } | 99 | } |
| 71 | case OpCode::Id::SUATOM: { | 100 | case OpCode::Id::SUATOM: { |
| 72 | UNIMPLEMENTED_IF(instr.suatom_d.is_ba != 0); | 101 | UNIMPLEMENTED_IF(instr.suatom_d.is_ba != 0); |
| 73 | 102 | ||
| 74 | Node value = GetRegister(instr.gpr0); | ||
| 75 | |||
| 76 | std::vector<Node> coords; | ||
| 77 | const std::size_t num_coords{GetImageTypeNumCoordinates(instr.sust.image_type)}; | ||
| 78 | for (std::size_t i = 0; i < num_coords; ++i) { | ||
| 79 | coords.push_back(GetRegister(instr.gpr8.Value() + i)); | ||
| 80 | } | ||
| 81 | |||
| 82 | const OperationCode operation_code = [instr] { | 103 | const OperationCode operation_code = [instr] { |
| 83 | switch (instr.suatom_d.operation) { | 104 | switch (instr.suatom_d.operation_type) { |
| 84 | case Tegra::Shader::ImageAtomicOperation::Add: | 105 | case Tegra::Shader::ImageAtomicOperationType::S32: |
| 85 | return OperationCode::AtomicImageAdd; | 106 | case Tegra::Shader::ImageAtomicOperationType::U32: |
| 86 | case Tegra::Shader::ImageAtomicOperation::Min: | 107 | switch (instr.suatom_d.operation) { |
| 87 | return OperationCode::AtomicImageMin; | 108 | case Tegra::Shader::ImageAtomicOperation::Add: |
| 88 | case Tegra::Shader::ImageAtomicOperation::Max: | 109 | return OperationCode::AtomicImageAdd; |
| 89 | return OperationCode::AtomicImageMax; | 110 | case Tegra::Shader::ImageAtomicOperation::And: |
| 90 | case Tegra::Shader::ImageAtomicOperation::And: | 111 | return OperationCode::AtomicImageAnd; |
| 91 | return OperationCode::AtomicImageAnd; | 112 | case Tegra::Shader::ImageAtomicOperation::Or: |
| 92 | case Tegra::Shader::ImageAtomicOperation::Or: | 113 | return OperationCode::AtomicImageOr; |
| 93 | return OperationCode::AtomicImageOr; | 114 | case Tegra::Shader::ImageAtomicOperation::Xor: |
| 94 | case Tegra::Shader::ImageAtomicOperation::Xor: | 115 | return OperationCode::AtomicImageXor; |
| 95 | return OperationCode::AtomicImageXor; | 116 | case Tegra::Shader::ImageAtomicOperation::Exch: |
| 96 | case Tegra::Shader::ImageAtomicOperation::Exch: | 117 | return OperationCode::AtomicImageExchange; |
| 97 | return OperationCode::AtomicImageExchange; | 118 | } |
| 98 | default: | 119 | default: |
| 99 | UNIMPLEMENTED_MSG("Unimplemented operation={}", | 120 | break; |
| 100 | static_cast<u32>(instr.suatom_d.operation.Value())); | ||
| 101 | return OperationCode::AtomicImageAdd; | ||
| 102 | } | 121 | } |
| 122 | UNIMPLEMENTED_MSG("Unimplemented operation={} type={}", | ||
| 123 | static_cast<u64>(instr.suatom_d.operation.Value()), | ||
| 124 | static_cast<u64>(instr.suatom_d.operation_type.Value())); | ||
| 125 | return OperationCode::AtomicImageAdd; | ||
| 103 | }(); | 126 | }(); |
| 104 | 127 | ||
| 105 | const auto& image{GetImage(instr.image, instr.suatom_d.image_type, instr.suatom_d.size)}; | 128 | Node value = GetRegister(instr.gpr0); |
| 129 | |||
| 130 | const auto type = instr.suatom_d.image_type; | ||
| 131 | auto& image = GetImage(instr.image, type); | ||
| 132 | image.MarkAtomic(); | ||
| 133 | |||
| 106 | MetaImage meta{image, {std::move(value)}}; | 134 | MetaImage meta{image, {std::move(value)}}; |
| 107 | SetRegister(bb, instr.gpr0, Operation(operation_code, meta, std::move(coords))); | 135 | SetRegister(bb, instr.gpr0, Operation(operation_code, meta, GetCoordinates(type))); |
| 108 | break; | 136 | break; |
| 109 | } | 137 | } |
| 110 | default: | 138 | default: |
| @@ -114,35 +142,32 @@ u32 ShaderIR::DecodeImage(NodeBlock& bb, u32 pc) { | |||
| 114 | return pc; | 142 | return pc; |
| 115 | } | 143 | } |
| 116 | 144 | ||
| 117 | Image& ShaderIR::GetImage(Tegra::Shader::Image image, Tegra::Shader::ImageType type, | 145 | Image& ShaderIR::GetImage(Tegra::Shader::Image image, Tegra::Shader::ImageType type) { |
| 118 | std::optional<Tegra::Shader::ImageAtomicSize> size) { | ||
| 119 | const auto offset{static_cast<std::size_t>(image.index.Value())}; | 146 | const auto offset{static_cast<std::size_t>(image.index.Value())}; |
| 120 | if (const auto image = TryUseExistingImage(offset, type, size)) { | 147 | if (const auto image = TryUseExistingImage(offset, type)) { |
| 121 | return *image; | 148 | return *image; |
| 122 | } | 149 | } |
| 123 | 150 | ||
| 124 | const std::size_t next_index{used_images.size()}; | 151 | const std::size_t next_index{used_images.size()}; |
| 125 | return used_images.emplace(offset, Image{offset, next_index, type, size}).first->second; | 152 | return used_images.emplace(offset, Image{offset, next_index, type}).first->second; |
| 126 | } | 153 | } |
| 127 | 154 | ||
| 128 | Image& ShaderIR::GetBindlessImage(Tegra::Shader::Register reg, Tegra::Shader::ImageType type, | 155 | Image& ShaderIR::GetBindlessImage(Tegra::Shader::Register reg, Tegra::Shader::ImageType type) { |
| 129 | std::optional<Tegra::Shader::ImageAtomicSize> size) { | ||
| 130 | const Node image_register{GetRegister(reg)}; | 156 | const Node image_register{GetRegister(reg)}; |
| 131 | const auto [base_image, cbuf_index, cbuf_offset]{ | 157 | const auto [base_image, cbuf_index, cbuf_offset]{ |
| 132 | TrackCbuf(image_register, global_code, static_cast<s64>(global_code.size()))}; | 158 | TrackCbuf(image_register, global_code, static_cast<s64>(global_code.size()))}; |
| 133 | const auto cbuf_key{(static_cast<u64>(cbuf_index) << 32) | static_cast<u64>(cbuf_offset)}; | 159 | const auto cbuf_key{(static_cast<u64>(cbuf_index) << 32) | static_cast<u64>(cbuf_offset)}; |
| 134 | 160 | ||
| 135 | if (const auto image = TryUseExistingImage(cbuf_key, type, size)) { | 161 | if (const auto image = TryUseExistingImage(cbuf_key, type)) { |
| 136 | return *image; | 162 | return *image; |
| 137 | } | 163 | } |
| 138 | 164 | ||
| 139 | const std::size_t next_index{used_images.size()}; | 165 | const std::size_t next_index{used_images.size()}; |
| 140 | return used_images.emplace(cbuf_key, Image{cbuf_index, cbuf_offset, next_index, type, size}) | 166 | return used_images.emplace(cbuf_key, Image{cbuf_index, cbuf_offset, next_index, type}) |
| 141 | .first->second; | 167 | .first->second; |
| 142 | } | 168 | } |
| 143 | 169 | ||
| 144 | Image* ShaderIR::TryUseExistingImage(u64 offset, Tegra::Shader::ImageType type, | 170 | Image* ShaderIR::TryUseExistingImage(u64 offset, Tegra::Shader::ImageType type) { |
| 145 | std::optional<Tegra::Shader::ImageAtomicSize> size) { | ||
| 146 | auto it = used_images.find(offset); | 171 | auto it = used_images.find(offset); |
| 147 | if (it == used_images.end()) { | 172 | if (it == used_images.end()) { |
| 148 | return nullptr; | 173 | return nullptr; |
| @@ -150,14 +175,6 @@ Image* ShaderIR::TryUseExistingImage(u64 offset, Tegra::Shader::ImageType type, | |||
| 150 | auto& image = it->second; | 175 | auto& image = it->second; |
| 151 | ASSERT(image.GetType() == type); | 176 | ASSERT(image.GetType() == type); |
| 152 | 177 | ||
| 153 | if (size) { | ||
| 154 | // We know the size, if it's known it has to be the same as before, otherwise we can set it. | ||
| 155 | if (image.IsSizeKnown()) { | ||
| 156 | ASSERT(image.GetSize() == size); | ||
| 157 | } else { | ||
| 158 | image.SetSize(*size); | ||
| 159 | } | ||
| 160 | } | ||
| 161 | return ℑ | 178 | return ℑ |
| 162 | } | 179 | } |
| 163 | 180 | ||
diff --git a/src/video_core/shader/node.h b/src/video_core/shader/node.h index abf2cb1ab..338bab17c 100644 --- a/src/video_core/shader/node.h +++ b/src/video_core/shader/node.h | |||
| @@ -149,10 +149,10 @@ enum class OperationCode { | |||
| 149 | TextureQueryLod, /// (MetaTexture, float[N] coords) -> float4 | 149 | TextureQueryLod, /// (MetaTexture, float[N] coords) -> float4 |
| 150 | TexelFetch, /// (MetaTexture, int[N], int) -> float4 | 150 | TexelFetch, /// (MetaTexture, int[N], int) -> float4 |
| 151 | 151 | ||
| 152 | ImageStore, /// (MetaImage, int[N] values) -> void | 152 | ImageLoad, /// (MetaImage, int[N] coords) -> void |
| 153 | ImageStore, /// (MetaImage, int[N] coords) -> void | ||
| 154 | |||
| 153 | AtomicImageAdd, /// (MetaImage, int[N] coords) -> void | 155 | AtomicImageAdd, /// (MetaImage, int[N] coords) -> void |
| 154 | AtomicImageMin, /// (MetaImage, int[N] coords) -> void | ||
| 155 | AtomicImageMax, /// (MetaImage, int[N] coords) -> void | ||
| 156 | AtomicImageAnd, /// (MetaImage, int[N] coords) -> void | 156 | AtomicImageAnd, /// (MetaImage, int[N] coords) -> void |
| 157 | AtomicImageOr, /// (MetaImage, int[N] coords) -> void | 157 | AtomicImageOr, /// (MetaImage, int[N] coords) -> void |
| 158 | AtomicImageXor, /// (MetaImage, int[N] coords) -> void | 158 | AtomicImageXor, /// (MetaImage, int[N] coords) -> void |
| @@ -294,21 +294,18 @@ private: | |||
| 294 | 294 | ||
| 295 | class Image final { | 295 | class Image final { |
| 296 | public: | 296 | public: |
| 297 | constexpr explicit Image(std::size_t offset, std::size_t index, Tegra::Shader::ImageType type, | 297 | constexpr explicit Image(std::size_t offset, std::size_t index, Tegra::Shader::ImageType type) |
| 298 | std::optional<Tegra::Shader::ImageAtomicSize> size) | 298 | : offset{offset}, index{index}, type{type}, is_bindless{false} {} |
| 299 | : offset{offset}, index{index}, type{type}, is_bindless{false}, size{size} {} | ||
| 300 | 299 | ||
| 301 | constexpr explicit Image(u32 cbuf_index, u32 cbuf_offset, std::size_t index, | 300 | constexpr explicit Image(u32 cbuf_index, u32 cbuf_offset, std::size_t index, |
| 302 | Tegra::Shader::ImageType type, | 301 | Tegra::Shader::ImageType type) |
| 303 | std::optional<Tegra::Shader::ImageAtomicSize> size) | ||
| 304 | : offset{(static_cast<u64>(cbuf_index) << 32) | cbuf_offset}, index{index}, type{type}, | 302 | : offset{(static_cast<u64>(cbuf_index) << 32) | cbuf_offset}, index{index}, type{type}, |
| 305 | is_bindless{true}, size{size} {} | 303 | is_bindless{true} {} |
| 306 | 304 | ||
| 307 | constexpr explicit Image(std::size_t offset, std::size_t index, Tegra::Shader::ImageType type, | 305 | constexpr explicit Image(std::size_t offset, std::size_t index, Tegra::Shader::ImageType type, |
| 308 | bool is_bindless, bool is_written, bool is_read, | 306 | bool is_bindless, bool is_written, bool is_read, bool is_atomic) |
| 309 | std::optional<Tegra::Shader::ImageAtomicSize> size) | ||
| 310 | : offset{offset}, index{index}, type{type}, is_bindless{is_bindless}, | 307 | : offset{offset}, index{index}, type{type}, is_bindless{is_bindless}, |
| 311 | is_written{is_written}, is_read{is_read}, size{size} {} | 308 | is_written{is_written}, is_read{is_read}, is_atomic{is_atomic} {} |
| 312 | 309 | ||
| 313 | void MarkWrite() { | 310 | void MarkWrite() { |
| 314 | is_written = true; | 311 | is_written = true; |
| @@ -318,8 +315,10 @@ public: | |||
| 318 | is_read = true; | 315 | is_read = true; |
| 319 | } | 316 | } |
| 320 | 317 | ||
| 321 | void SetSize(Tegra::Shader::ImageAtomicSize size_) { | 318 | void MarkAtomic() { |
| 322 | size = size_; | 319 | MarkWrite(); |
| 320 | MarkRead(); | ||
| 321 | is_atomic = true; | ||
| 323 | } | 322 | } |
| 324 | 323 | ||
| 325 | constexpr std::size_t GetOffset() const { | 324 | constexpr std::size_t GetOffset() const { |
| @@ -346,21 +345,17 @@ public: | |||
| 346 | return is_read; | 345 | return is_read; |
| 347 | } | 346 | } |
| 348 | 347 | ||
| 349 | constexpr std::pair<u32, u32> GetBindlessCBuf() const { | 348 | constexpr bool IsAtomic() const { |
| 350 | return {static_cast<u32>(offset >> 32), static_cast<u32>(offset)}; | 349 | return is_atomic; |
| 351 | } | 350 | } |
| 352 | 351 | ||
| 353 | constexpr bool IsSizeKnown() const { | 352 | constexpr std::pair<u32, u32> GetBindlessCBuf() const { |
| 354 | return size.has_value(); | 353 | return {static_cast<u32>(offset >> 32), static_cast<u32>(offset)}; |
| 355 | } | ||
| 356 | |||
| 357 | constexpr Tegra::Shader::ImageAtomicSize GetSize() const { | ||
| 358 | return size.value(); | ||
| 359 | } | 354 | } |
| 360 | 355 | ||
| 361 | constexpr bool operator<(const Image& rhs) const { | 356 | constexpr bool operator<(const Image& rhs) const { |
| 362 | return std::tie(offset, index, type, size, is_bindless) < | 357 | return std::tie(offset, index, type, is_bindless) < |
| 363 | std::tie(rhs.offset, rhs.index, rhs.type, rhs.size, rhs.is_bindless); | 358 | std::tie(rhs.offset, rhs.index, rhs.type, rhs.is_bindless); |
| 364 | } | 359 | } |
| 365 | 360 | ||
| 366 | private: | 361 | private: |
| @@ -370,7 +365,7 @@ private: | |||
| 370 | bool is_bindless{}; | 365 | bool is_bindless{}; |
| 371 | bool is_written{}; | 366 | bool is_written{}; |
| 372 | bool is_read{}; | 367 | bool is_read{}; |
| 373 | std::optional<Tegra::Shader::ImageAtomicSize> size{}; | 368 | bool is_atomic{}; |
| 374 | }; | 369 | }; |
| 375 | 370 | ||
| 376 | struct GlobalMemoryBase { | 371 | struct GlobalMemoryBase { |
| @@ -402,6 +397,7 @@ struct MetaTexture { | |||
| 402 | struct MetaImage { | 397 | struct MetaImage { |
| 403 | const Image& image; | 398 | const Image& image; |
| 404 | std::vector<Node> values; | 399 | std::vector<Node> values; |
| 400 | u32 element{}; | ||
| 405 | }; | 401 | }; |
| 406 | 402 | ||
| 407 | /// Parameters that modify an operation but are not part of any particular operand | 403 | /// Parameters that modify an operation but are not part of any particular operand |
diff --git a/src/video_core/shader/shader_ir.h b/src/video_core/shader/shader_ir.h index 2f03d83ba..6f666ee30 100644 --- a/src/video_core/shader/shader_ir.h +++ b/src/video_core/shader/shader_ir.h | |||
| @@ -284,16 +284,13 @@ private: | |||
| 284 | bool is_shadow); | 284 | bool is_shadow); |
| 285 | 285 | ||
| 286 | /// Accesses an image. | 286 | /// Accesses an image. |
| 287 | Image& GetImage(Tegra::Shader::Image image, Tegra::Shader::ImageType type, | 287 | Image& GetImage(Tegra::Shader::Image image, Tegra::Shader::ImageType type); |
| 288 | std::optional<Tegra::Shader::ImageAtomicSize> size = {}); | ||
| 289 | 288 | ||
| 290 | /// Access a bindless image sampler. | 289 | /// Access a bindless image sampler. |
| 291 | Image& GetBindlessImage(Tegra::Shader::Register reg, Tegra::Shader::ImageType type, | 290 | Image& GetBindlessImage(Tegra::Shader::Register reg, Tegra::Shader::ImageType type); |
| 292 | std::optional<Tegra::Shader::ImageAtomicSize> size = {}); | ||
| 293 | 291 | ||
| 294 | /// Tries to access an existing image, updating it's state as needed | 292 | /// Tries to access an existing image, updating it's state as needed |
| 295 | Image* TryUseExistingImage(u64 offset, Tegra::Shader::ImageType type, | 293 | Image* TryUseExistingImage(u64 offset, Tegra::Shader::ImageType type); |
| 296 | std::optional<Tegra::Shader::ImageAtomicSize> size); | ||
| 297 | 294 | ||
| 298 | /// Extracts a sequence of bits from a node | 295 | /// Extracts a sequence of bits from a node |
| 299 | Node BitfieldExtract(Node value, u32 offset, u32 bits); | 296 | Node BitfieldExtract(Node value, u32 offset, u32 bits); |