diff options
| author | 2019-09-18 01:07:01 -0300 | |
|---|---|---|
| committer | 2019-09-21 17:32:48 -0300 | |
| commit | 675f23aedc9a3a99925068e952cbcb3faf88296a (patch) | |
| tree | 01b3101c3837428903bb91f29d02fa2c099468a6 | |
| parent | shader_bytecode: Add SULD encoding (diff) | |
| download | yuzu-675f23aedc9a3a99925068e952cbcb3faf88296a.tar.gz yuzu-675f23aedc9a3a99925068e952cbcb3faf88296a.tar.xz yuzu-675f23aedc9a3a99925068e952cbcb3faf88296a.zip | |
shader/image: Implement SULD and remove irrelevant code
* Implement SULD as float.
* Remove conditional declaration of GL_ARB_shader_viewport_layer_array.
Diffstat (limited to '')
| -rw-r--r-- | src/video_core/engines/shader_bytecode.h | 2 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_device.cpp | 22 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_device.h | 5 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_shader_cache.cpp | 16 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_shader_decompiler.cpp | 17 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_shader_decompiler.h | 1 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_shader_disk_cache.cpp | 10 | ||||
| -rw-r--r-- | src/video_core/renderer_vulkan/vk_shader_decompiler.cpp | 7 | ||||
| -rw-r--r-- | src/video_core/shader/decode/image.cpp | 73 | ||||
| -rw-r--r-- | src/video_core/shader/node.h | 4 |
10 files changed, 110 insertions, 47 deletions
diff --git a/src/video_core/engines/shader_bytecode.h b/src/video_core/engines/shader_bytecode.h index 12fb8abb7..81dfe33a5 100644 --- a/src/video_core/engines/shader_bytecode.h +++ b/src/video_core/engines/shader_bytecode.h | |||
| @@ -1427,7 +1427,7 @@ union Instruction { | |||
| 1427 | ASSERT(mode == SurfaceDataMode::D_BA); | 1427 | ASSERT(mode == SurfaceDataMode::D_BA); |
| 1428 | return store_data_layout; | 1428 | return store_data_layout; |
| 1429 | } | 1429 | } |
| 1430 | } sust; | 1430 | } suldst; |
| 1431 | 1431 | ||
| 1432 | union { | 1432 | union { |
| 1433 | BitField<28, 1, u64> is_ba; | 1433 | BitField<28, 1, u64> is_ba; |
diff --git a/src/video_core/renderer_opengl/gl_device.cpp b/src/video_core/renderer_opengl/gl_device.cpp index 4f59a87b4..64de7e425 100644 --- a/src/video_core/renderer_opengl/gl_device.cpp +++ b/src/video_core/renderer_opengl/gl_device.cpp | |||
| @@ -2,8 +2,10 @@ | |||
| 2 | // Licensed under GPLv2 or any later version | 2 | // Licensed under GPLv2 or any later version |
| 3 | // Refer to the license.txt file included. | 3 | // Refer to the license.txt file included. |
| 4 | 4 | ||
| 5 | #include <algorithm> | ||
| 5 | #include <array> | 6 | #include <array> |
| 6 | #include <cstddef> | 7 | #include <cstddef> |
| 8 | #include <vector> | ||
| 7 | #include <glad/glad.h> | 9 | #include <glad/glad.h> |
| 8 | 10 | ||
| 9 | #include "common/logging/log.h" | 11 | #include "common/logging/log.h" |
| @@ -30,9 +32,27 @@ bool TestProgram(const GLchar* glsl) { | |||
| 30 | return link_status == GL_TRUE; | 32 | return link_status == GL_TRUE; |
| 31 | } | 33 | } |
| 32 | 34 | ||
| 35 | std::vector<std::string_view> GetExtensions() { | ||
| 36 | GLint num_extensions; | ||
| 37 | glGetIntegerv(GL_NUM_EXTENSIONS, &num_extensions); | ||
| 38 | std::vector<std::string_view> extensions; | ||
| 39 | extensions.reserve(num_extensions); | ||
| 40 | for (GLint index = 0; index < num_extensions; ++index) { | ||
| 41 | extensions.push_back( | ||
| 42 | reinterpret_cast<const char*>(glGetStringi(GL_EXTENSIONS, static_cast<GLuint>(index)))); | ||
| 43 | } | ||
| 44 | return extensions; | ||
| 45 | } | ||
| 46 | |||
| 47 | bool HasExtension(const std::vector<std::string_view>& images, std::string_view extension) { | ||
| 48 | return std::find(images.begin(), images.end(), extension) != images.end(); | ||
| 49 | } | ||
| 50 | |||
| 33 | } // Anonymous namespace | 51 | } // Anonymous namespace |
| 34 | 52 | ||
| 35 | Device::Device() { | 53 | Device::Device() { |
| 54 | const std::vector extensions = GetExtensions(); | ||
| 55 | |||
| 36 | uniform_buffer_alignment = GetInteger<std::size_t>(GL_UNIFORM_BUFFER_OFFSET_ALIGNMENT); | 56 | uniform_buffer_alignment = GetInteger<std::size_t>(GL_UNIFORM_BUFFER_OFFSET_ALIGNMENT); |
| 37 | shader_storage_alignment = GetInteger<std::size_t>(GL_SHADER_STORAGE_BUFFER_OFFSET_ALIGNMENT); | 57 | shader_storage_alignment = GetInteger<std::size_t>(GL_SHADER_STORAGE_BUFFER_OFFSET_ALIGNMENT); |
| 38 | max_vertex_attributes = GetInteger<u32>(GL_MAX_VERTEX_ATTRIBS); | 58 | max_vertex_attributes = GetInteger<u32>(GL_MAX_VERTEX_ATTRIBS); |
| @@ -40,6 +60,7 @@ Device::Device() { | |||
| 40 | has_warp_intrinsics = GLAD_GL_NV_gpu_shader5 && GLAD_GL_NV_shader_thread_group && | 60 | has_warp_intrinsics = GLAD_GL_NV_gpu_shader5 && GLAD_GL_NV_shader_thread_group && |
| 41 | GLAD_GL_NV_shader_thread_shuffle; | 61 | GLAD_GL_NV_shader_thread_shuffle; |
| 42 | has_vertex_viewport_layer = GLAD_GL_ARB_shader_viewport_layer_array; | 62 | has_vertex_viewport_layer = GLAD_GL_ARB_shader_viewport_layer_array; |
| 63 | has_image_load_formatted = HasExtension(extensions, "GL_EXT_shader_image_load_formatted"); | ||
| 43 | has_variable_aoffi = TestVariableAoffi(); | 64 | has_variable_aoffi = TestVariableAoffi(); |
| 44 | has_component_indexing_bug = TestComponentIndexingBug(); | 65 | has_component_indexing_bug = TestComponentIndexingBug(); |
| 45 | has_precise_bug = TestPreciseBug(); | 66 | has_precise_bug = TestPreciseBug(); |
| @@ -55,6 +76,7 @@ Device::Device(std::nullptr_t) { | |||
| 55 | max_varyings = 15; | 76 | max_varyings = 15; |
| 56 | has_warp_intrinsics = true; | 77 | has_warp_intrinsics = true; |
| 57 | has_vertex_viewport_layer = true; | 78 | has_vertex_viewport_layer = true; |
| 79 | has_image_load_formatted = true; | ||
| 58 | has_variable_aoffi = true; | 80 | has_variable_aoffi = true; |
| 59 | has_component_indexing_bug = false; | 81 | has_component_indexing_bug = false; |
| 60 | has_precise_bug = false; | 82 | has_precise_bug = false; |
diff --git a/src/video_core/renderer_opengl/gl_device.h b/src/video_core/renderer_opengl/gl_device.h index ba6dcd3be..bb273c3d6 100644 --- a/src/video_core/renderer_opengl/gl_device.h +++ b/src/video_core/renderer_opengl/gl_device.h | |||
| @@ -38,6 +38,10 @@ public: | |||
| 38 | return has_vertex_viewport_layer; | 38 | return has_vertex_viewport_layer; |
| 39 | } | 39 | } |
| 40 | 40 | ||
| 41 | bool HasImageLoadFormatted() const { | ||
| 42 | return has_image_load_formatted; | ||
| 43 | } | ||
| 44 | |||
| 41 | bool HasVariableAoffi() const { | 45 | bool HasVariableAoffi() const { |
| 42 | return has_variable_aoffi; | 46 | return has_variable_aoffi; |
| 43 | } | 47 | } |
| @@ -61,6 +65,7 @@ private: | |||
| 61 | u32 max_varyings{}; | 65 | u32 max_varyings{}; |
| 62 | bool has_warp_intrinsics{}; | 66 | bool has_warp_intrinsics{}; |
| 63 | bool has_vertex_viewport_layer{}; | 67 | bool has_vertex_viewport_layer{}; |
| 68 | bool has_image_load_formatted{}; | ||
| 64 | bool has_variable_aoffi{}; | 69 | bool has_variable_aoffi{}; |
| 65 | bool has_component_indexing_bug{}; | 70 | bool has_component_indexing_bug{}; |
| 66 | bool has_precise_bug{}; | 71 | bool has_precise_bug{}; |
diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index 0dbc4c02f..42ca3b1bd 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp | |||
| @@ -211,14 +211,14 @@ CachedProgram SpecializeShader(const std::string& code, const GLShader::ShaderEn | |||
| 211 | const auto primitive_mode{variant.primitive_mode}; | 211 | const auto primitive_mode{variant.primitive_mode}; |
| 212 | const auto texture_buffer_usage{variant.texture_buffer_usage}; | 212 | const auto texture_buffer_usage{variant.texture_buffer_usage}; |
| 213 | 213 | ||
| 214 | std::string source = "#version 430 core\n" | 214 | std::string source = R"(#version 430 core |
| 215 | "#extension GL_ARB_separate_shader_objects : enable\n" | 215 | #extension GL_ARB_separate_shader_objects : enable |
| 216 | "#extension GL_NV_gpu_shader5 : enable\n" | 216 | #extension GL_ARB_shader_viewport_layer_array : enable |
| 217 | "#extension GL_NV_shader_thread_group : enable\n" | 217 | #extension GL_EXT_shader_image_load_formatted : enable |
| 218 | "#extension GL_NV_shader_thread_shuffle : enable\n"; | 218 | #extension GL_NV_gpu_shader5 : enable |
| 219 | if (entries.shader_viewport_layer_array) { | 219 | #extension GL_NV_shader_thread_group : enable |
| 220 | source += "#extension GL_ARB_shader_viewport_layer_array : enable\n"; | 220 | #extension GL_NV_shader_thread_shuffle : enable |
| 221 | } | 221 | )"; |
| 222 | if (program_type == ProgramType::Compute) { | 222 | if (program_type == ProgramType::Compute) { |
| 223 | source += "#extension GL_ARB_compute_variable_group_size : require\n"; | 223 | source += "#extension GL_ARB_compute_variable_group_size : require\n"; |
| 224 | } | 224 | } |
diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp index 76439e7ab..70ce6572b 100644 --- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp +++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp | |||
| @@ -19,6 +19,7 @@ | |||
| 19 | #include "video_core/renderer_opengl/gl_device.h" | 19 | #include "video_core/renderer_opengl/gl_device.h" |
| 20 | #include "video_core/renderer_opengl/gl_rasterizer.h" | 20 | #include "video_core/renderer_opengl/gl_rasterizer.h" |
| 21 | #include "video_core/renderer_opengl/gl_shader_decompiler.h" | 21 | #include "video_core/renderer_opengl/gl_shader_decompiler.h" |
| 22 | #include "video_core/shader/node.h" | ||
| 22 | #include "video_core/shader/shader_ir.h" | 23 | #include "video_core/shader/shader_ir.h" |
| 23 | 24 | ||
| 24 | namespace OpenGL::GLShader { | 25 | namespace OpenGL::GLShader { |
| @@ -398,8 +399,6 @@ public: | |||
| 398 | usage.is_read, usage.is_written); | 399 | usage.is_read, usage.is_written); |
| 399 | } | 400 | } |
| 400 | entries.clip_distances = ir.GetClipDistances(); | 401 | entries.clip_distances = ir.GetClipDistances(); |
| 401 | entries.shader_viewport_layer_array = | ||
| 402 | IsVertexShader(stage) && (ir.UsesLayer() || ir.UsesViewportIndex()); | ||
| 403 | entries.shader_length = ir.GetLength(); | 402 | entries.shader_length = ir.GetLength(); |
| 404 | return entries; | 403 | return entries; |
| 405 | } | 404 | } |
| @@ -1801,6 +1800,19 @@ private: | |||
| 1801 | return {tmp, Type::Float}; | 1800 | return {tmp, Type::Float}; |
| 1802 | } | 1801 | } |
| 1803 | 1802 | ||
| 1803 | Expression ImageLoad(Operation operation) { | ||
| 1804 | if (!device.HasImageLoadFormatted()) { | ||
| 1805 | LOG_ERROR(Render_OpenGL, | ||
| 1806 | "Device lacks GL_EXT_shader_image_load_formatted, stubbing image load"); | ||
| 1807 | return {"0", Type::Int}; | ||
| 1808 | } | ||
| 1809 | |||
| 1810 | const auto meta{std::get<MetaImage>(operation.GetMeta())}; | ||
| 1811 | return {fmt::format("imageLoad({}, {}){}", GetImage(meta.image), | ||
| 1812 | BuildIntegerCoordinates(operation), GetSwizzle(meta.element)), | ||
| 1813 | Type::Float}; | ||
| 1814 | } | ||
| 1815 | |||
| 1804 | Expression ImageStore(Operation operation) { | 1816 | Expression ImageStore(Operation operation) { |
| 1805 | const auto meta{std::get<MetaImage>(operation.GetMeta())}; | 1817 | const auto meta{std::get<MetaImage>(operation.GetMeta())}; |
| 1806 | code.AddLine("imageStore({}, {}, {});", GetImage(meta.image), | 1818 | code.AddLine("imageStore({}, {}, {});", GetImage(meta.image), |
| @@ -2164,6 +2176,7 @@ private: | |||
| 2164 | &GLSLDecompiler::TextureQueryLod, | 2176 | &GLSLDecompiler::TextureQueryLod, |
| 2165 | &GLSLDecompiler::TexelFetch, | 2177 | &GLSLDecompiler::TexelFetch, |
| 2166 | 2178 | ||
| 2179 | &GLSLDecompiler::ImageLoad, | ||
| 2167 | &GLSLDecompiler::ImageStore, | 2180 | &GLSLDecompiler::ImageStore, |
| 2168 | &GLSLDecompiler::AtomicImageAdd, | 2181 | &GLSLDecompiler::AtomicImageAdd, |
| 2169 | &GLSLDecompiler::AtomicImageMin, | 2182 | &GLSLDecompiler::AtomicImageMin, |
diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.h b/src/video_core/renderer_opengl/gl_shader_decompiler.h index 2ea02f5bf..e538dc001 100644 --- a/src/video_core/renderer_opengl/gl_shader_decompiler.h +++ b/src/video_core/renderer_opengl/gl_shader_decompiler.h | |||
| @@ -90,7 +90,6 @@ struct ShaderEntries { | |||
| 90 | std::vector<ImageEntry> images; | 90 | std::vector<ImageEntry> images; |
| 91 | std::vector<GlobalMemoryEntry> global_memory_entries; | 91 | std::vector<GlobalMemoryEntry> global_memory_entries; |
| 92 | std::array<bool, Maxwell::NumClipDistances> clip_distances{}; | 92 | std::array<bool, Maxwell::NumClipDistances> clip_distances{}; |
| 93 | bool shader_viewport_layer_array{}; | ||
| 94 | std::size_t shader_length{}; | 93 | std::size_t shader_length{}; |
| 95 | }; | 94 | }; |
| 96 | 95 | ||
diff --git a/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp b/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp index f141c4e3b..02b4dd234 100644 --- a/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp | |||
| @@ -382,12 +382,6 @@ std::optional<ShaderDiskCacheDecompiled> ShaderDiskCacheOpenGL::LoadDecompiledEn | |||
| 382 | } | 382 | } |
| 383 | } | 383 | } |
| 384 | 384 | ||
| 385 | bool shader_viewport_layer_array{}; | ||
| 386 | if (!LoadObjectFromPrecompiled(shader_viewport_layer_array)) { | ||
| 387 | return {}; | ||
| 388 | } | ||
| 389 | entry.entries.shader_viewport_layer_array = shader_viewport_layer_array; | ||
| 390 | |||
| 391 | u64 shader_length{}; | 385 | u64 shader_length{}; |
| 392 | if (!LoadObjectFromPrecompiled(shader_length)) { | 386 | if (!LoadObjectFromPrecompiled(shader_length)) { |
| 393 | return {}; | 387 | return {}; |
| @@ -464,10 +458,6 @@ bool ShaderDiskCacheOpenGL::SaveDecompiledFile(u64 unique_identifier, const std: | |||
| 464 | } | 458 | } |
| 465 | } | 459 | } |
| 466 | 460 | ||
| 467 | if (!SaveObjectToPrecompiled(entries.shader_viewport_layer_array)) { | ||
| 468 | return false; | ||
| 469 | } | ||
| 470 | |||
| 471 | if (!SaveObjectToPrecompiled(static_cast<u64>(entries.shader_length))) { | 461 | if (!SaveObjectToPrecompiled(static_cast<u64>(entries.shader_length))) { |
| 472 | return false; | 462 | return false; |
| 473 | } | 463 | } |
diff --git a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp index f7fbbb6e4..9d31bff43 100644 --- a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp +++ b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp | |||
| @@ -19,6 +19,7 @@ | |||
| 19 | #include "video_core/engines/shader_header.h" | 19 | #include "video_core/engines/shader_header.h" |
| 20 | #include "video_core/renderer_vulkan/vk_device.h" | 20 | #include "video_core/renderer_vulkan/vk_device.h" |
| 21 | #include "video_core/renderer_vulkan/vk_shader_decompiler.h" | 21 | #include "video_core/renderer_vulkan/vk_shader_decompiler.h" |
| 22 | #include "video_core/shader/node.h" | ||
| 22 | #include "video_core/shader/shader_ir.h" | 23 | #include "video_core/shader/shader_ir.h" |
| 23 | 24 | ||
| 24 | namespace Vulkan::VKShader { | 25 | namespace Vulkan::VKShader { |
| @@ -939,6 +940,11 @@ private: | |||
| 939 | return {}; | 940 | return {}; |
| 940 | } | 941 | } |
| 941 | 942 | ||
| 943 | Id ImageLoad(Operation operation) { | ||
| 944 | UNIMPLEMENTED(); | ||
| 945 | return {}; | ||
| 946 | } | ||
| 947 | |||
| 942 | Id ImageStore(Operation operation) { | 948 | Id ImageStore(Operation operation) { |
| 943 | UNIMPLEMENTED(); | 949 | UNIMPLEMENTED(); |
| 944 | return {}; | 950 | return {}; |
| @@ -1440,6 +1446,7 @@ private: | |||
| 1440 | &SPIRVDecompiler::TextureQueryLod, | 1446 | &SPIRVDecompiler::TextureQueryLod, |
| 1441 | &SPIRVDecompiler::TexelFetch, | 1447 | &SPIRVDecompiler::TexelFetch, |
| 1442 | 1448 | ||
| 1449 | &SPIRVDecompiler::ImageLoad, | ||
| 1443 | &SPIRVDecompiler::ImageStore, | 1450 | &SPIRVDecompiler::ImageStore, |
| 1444 | &SPIRVDecompiler::AtomicImageAdd, | 1451 | &SPIRVDecompiler::AtomicImageAdd, |
| 1445 | &SPIRVDecompiler::AtomicImageMin, | 1452 | &SPIRVDecompiler::AtomicImageMin, |
diff --git a/src/video_core/shader/decode/image.cpp b/src/video_core/shader/decode/image.cpp index d54fb88c9..e611f9f3b 100644 --- a/src/video_core/shader/decode/image.cpp +++ b/src/video_core/shader/decode/image.cpp | |||
| @@ -41,11 +41,46 @@ u32 ShaderIR::DecodeImage(NodeBlock& bb, u32 pc) { | |||
| 41 | const Instruction instr = {program_code[pc]}; | 41 | const Instruction instr = {program_code[pc]}; |
| 42 | const auto opcode = OpCode::Decode(instr); | 42 | const auto opcode = OpCode::Decode(instr); |
| 43 | 43 | ||
| 44 | const auto GetCoordinates = [this, instr](Tegra::Shader::ImageType image_type) { | ||
| 45 | std::vector<Node> coords; | ||
| 46 | const std::size_t num_coords{GetImageTypeNumCoordinates(image_type)}; | ||
| 47 | coords.reserve(num_coords); | ||
| 48 | for (std::size_t i = 0; i < num_coords; ++i) { | ||
| 49 | coords.push_back(GetRegister(instr.gpr8.Value() + i)); | ||
| 50 | } | ||
| 51 | return coords; | ||
| 52 | }; | ||
| 53 | |||
| 44 | switch (opcode->get().GetId()) { | 54 | switch (opcode->get().GetId()) { |
| 55 | case OpCode::Id::SULD: { | ||
| 56 | UNIMPLEMENTED_IF(instr.suldst.mode != Tegra::Shader::SurfaceDataMode::P); | ||
| 57 | UNIMPLEMENTED_IF(instr.suldst.out_of_bounds_store != | ||
| 58 | Tegra::Shader::OutOfBoundsStore::Ignore); | ||
| 59 | |||
| 60 | const auto type{instr.suldst.image_type}; | ||
| 61 | auto& image{instr.suldst.is_immediate ? GetImage(instr.image, type) | ||
| 62 | : GetBindlessImage(instr.gpr39, type)}; | ||
| 63 | image.MarkRead(); | ||
| 64 | |||
| 65 | u32 indexer = 0; | ||
| 66 | for (u32 element = 0; element < 4; ++element) { | ||
| 67 | if (!instr.suldst.IsComponentEnabled(element)) { | ||
| 68 | continue; | ||
| 69 | } | ||
| 70 | MetaImage meta{image, {}, element}; | ||
| 71 | Node value = Operation(OperationCode::ImageLoad, meta, GetCoordinates(type)); | ||
| 72 | SetTemporary(bb, indexer++, std::move(value)); | ||
| 73 | } | ||
| 74 | for (u32 i = 0; i < indexer; ++i) { | ||
| 75 | SetRegister(bb, instr.gpr0.Value() + i, GetTemporary(i)); | ||
| 76 | } | ||
| 77 | break; | ||
| 78 | } | ||
| 45 | case OpCode::Id::SUST: { | 79 | case OpCode::Id::SUST: { |
| 46 | UNIMPLEMENTED_IF(instr.sust.mode != Tegra::Shader::SurfaceDataMode::P); | 80 | UNIMPLEMENTED_IF(instr.suldst.mode != Tegra::Shader::SurfaceDataMode::P); |
| 47 | UNIMPLEMENTED_IF(instr.sust.out_of_bounds_store != Tegra::Shader::OutOfBoundsStore::Ignore); | 81 | UNIMPLEMENTED_IF(instr.suldst.out_of_bounds_store != |
| 48 | UNIMPLEMENTED_IF(instr.sust.component_mask_selector != 0xf); // Ensure we have an RGBA store | 82 | Tegra::Shader::OutOfBoundsStore::Ignore); |
| 83 | UNIMPLEMENTED_IF(instr.suldst.component_mask_selector != 0xf); // Ensure we have RGBA | ||
| 49 | 84 | ||
| 50 | std::vector<Node> values; | 85 | std::vector<Node> values; |
| 51 | constexpr std::size_t hardcoded_size{4}; | 86 | constexpr std::size_t hardcoded_size{4}; |
| @@ -53,32 +88,18 @@ u32 ShaderIR::DecodeImage(NodeBlock& bb, u32 pc) { | |||
| 53 | values.push_back(GetRegister(instr.gpr0.Value() + i)); | 88 | values.push_back(GetRegister(instr.gpr0.Value() + i)); |
| 54 | } | 89 | } |
| 55 | 90 | ||
| 56 | std::vector<Node> coords; | 91 | const auto type{instr.suldst.image_type}; |
| 57 | const std::size_t num_coords{GetImageTypeNumCoordinates(instr.sust.image_type)}; | 92 | auto& image{instr.suldst.is_immediate ? GetImage(instr.image, type) |
| 58 | for (std::size_t i = 0; i < num_coords; ++i) { | 93 | : GetBindlessImage(instr.gpr39, type)}; |
| 59 | coords.push_back(GetRegister(instr.gpr8.Value() + i)); | ||
| 60 | } | ||
| 61 | |||
| 62 | const auto type{instr.sust.image_type}; | ||
| 63 | auto& image{instr.sust.is_immediate ? GetImage(instr.image, type) | ||
| 64 | : GetBindlessImage(instr.gpr39, type)}; | ||
| 65 | image.MarkWrite(); | 94 | image.MarkWrite(); |
| 66 | 95 | ||
| 67 | MetaImage meta{image, values}; | 96 | MetaImage meta{image, std::move(values)}; |
| 68 | bb.push_back(Operation(OperationCode::ImageStore, meta, std::move(coords))); | 97 | bb.push_back(Operation(OperationCode::ImageStore, meta, GetCoordinates(type))); |
| 69 | break; | 98 | break; |
| 70 | } | 99 | } |
| 71 | case OpCode::Id::SUATOM: { | 100 | case OpCode::Id::SUATOM: { |
| 72 | UNIMPLEMENTED_IF(instr.suatom_d.is_ba != 0); | 101 | UNIMPLEMENTED_IF(instr.suatom_d.is_ba != 0); |
| 73 | 102 | ||
| 74 | Node value = GetRegister(instr.gpr0); | ||
| 75 | |||
| 76 | std::vector<Node> coords; | ||
| 77 | const std::size_t num_coords{GetImageTypeNumCoordinates(instr.sust.image_type)}; | ||
| 78 | for (std::size_t i = 0; i < num_coords; ++i) { | ||
| 79 | coords.push_back(GetRegister(instr.gpr8.Value() + i)); | ||
| 80 | } | ||
| 81 | |||
| 82 | const OperationCode operation_code = [instr] { | 103 | const OperationCode operation_code = [instr] { |
| 83 | switch (instr.suatom_d.operation) { | 104 | switch (instr.suatom_d.operation) { |
| 84 | case Tegra::Shader::ImageAtomicOperation::Add: | 105 | case Tegra::Shader::ImageAtomicOperation::Add: |
| @@ -102,9 +123,13 @@ u32 ShaderIR::DecodeImage(NodeBlock& bb, u32 pc) { | |||
| 102 | } | 123 | } |
| 103 | }(); | 124 | }(); |
| 104 | 125 | ||
| 105 | const auto& image{GetImage(instr.image, instr.suatom_d.image_type, instr.suatom_d.size)}; | 126 | Node value = GetRegister(instr.gpr0); |
| 127 | |||
| 128 | const auto type = instr.suatom_d.image_type; | ||
| 129 | const auto& image{GetImage(instr.image, type, instr.suatom_d.size)}; | ||
| 130 | |||
| 106 | MetaImage meta{image, {std::move(value)}}; | 131 | MetaImage meta{image, {std::move(value)}}; |
| 107 | SetRegister(bb, instr.gpr0, Operation(operation_code, meta, std::move(coords))); | 132 | SetRegister(bb, instr.gpr0, Operation(operation_code, meta, GetCoordinates(type))); |
| 108 | break; | 133 | break; |
| 109 | } | 134 | } |
| 110 | default: | 135 | default: |
diff --git a/src/video_core/shader/node.h b/src/video_core/shader/node.h index abf2cb1ab..e5b75783d 100644 --- a/src/video_core/shader/node.h +++ b/src/video_core/shader/node.h | |||
| @@ -149,7 +149,8 @@ enum class OperationCode { | |||
| 149 | TextureQueryLod, /// (MetaTexture, float[N] coords) -> float4 | 149 | TextureQueryLod, /// (MetaTexture, float[N] coords) -> float4 |
| 150 | TexelFetch, /// (MetaTexture, int[N], int) -> float4 | 150 | TexelFetch, /// (MetaTexture, int[N], int) -> float4 |
| 151 | 151 | ||
| 152 | ImageStore, /// (MetaImage, int[N] values) -> void | 152 | ImageLoad, /// (MetaImage, int[N] coords) -> void |
| 153 | ImageStore, /// (MetaImage, int[N] coords) -> void | ||
| 153 | AtomicImageAdd, /// (MetaImage, int[N] coords) -> void | 154 | AtomicImageAdd, /// (MetaImage, int[N] coords) -> void |
| 154 | AtomicImageMin, /// (MetaImage, int[N] coords) -> void | 155 | AtomicImageMin, /// (MetaImage, int[N] coords) -> void |
| 155 | AtomicImageMax, /// (MetaImage, int[N] coords) -> void | 156 | AtomicImageMax, /// (MetaImage, int[N] coords) -> void |
| @@ -402,6 +403,7 @@ struct MetaTexture { | |||
| 402 | struct MetaImage { | 403 | struct MetaImage { |
| 403 | const Image& image; | 404 | const Image& image; |
| 404 | std::vector<Node> values; | 405 | std::vector<Node> values; |
| 406 | u32 element{}; | ||
| 405 | }; | 407 | }; |
| 406 | 408 | ||
| 407 | /// Parameters that modify an operation but are not part of any particular operand | 409 | /// Parameters that modify an operation but are not part of any particular operand |