diff options
| -rw-r--r-- | src/video_core/engines/shader_bytecode.h | 2 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_shader_decompiler.cpp | 1 | ||||
| -rw-r--r-- | src/video_core/renderer_vulkan/shaders/blit.frag | 24 | ||||
| -rw-r--r-- | src/video_core/renderer_vulkan/shaders/blit.vert | 28 | ||||
| -rw-r--r-- | src/video_core/renderer_vulkan/shaders/quad_array.comp | 37 | ||||
| -rw-r--r-- | src/video_core/renderer_vulkan/shaders/uint8.comp | 33 | ||||
| -rw-r--r-- | src/video_core/renderer_vulkan/vk_shader_decompiler.cpp | 24 | ||||
| -rw-r--r-- | src/video_core/shader/decode/conversion.cpp | 15 | ||||
| -rw-r--r-- | src/video_core/shader/decode/memory.cpp | 38 | ||||
| -rw-r--r-- | src/video_core/shader/decode/texture.cpp | 13 |
10 files changed, 179 insertions, 36 deletions
diff --git a/src/video_core/engines/shader_bytecode.h b/src/video_core/engines/shader_bytecode.h index d6a2cc8b8..dfb12cd2d 100644 --- a/src/video_core/engines/shader_bytecode.h +++ b/src/video_core/engines/shader_bytecode.h | |||
| @@ -1973,7 +1973,7 @@ private: | |||
| 1973 | INST("1101-01---------", Id::TLDS, Type::Texture, "TLDS"), | 1973 | INST("1101-01---------", Id::TLDS, Type::Texture, "TLDS"), |
| 1974 | INST("110010----111---", Id::TLD4, Type::Texture, "TLD4"), | 1974 | INST("110010----111---", Id::TLD4, Type::Texture, "TLD4"), |
| 1975 | INST("1101111011111---", Id::TLD4_B, Type::Texture, "TLD4_B"), | 1975 | INST("1101111011111---", Id::TLD4_B, Type::Texture, "TLD4_B"), |
| 1976 | INST("11011111--00----", Id::TLD4S, Type::Texture, "TLD4S"), | 1976 | INST("11011111-0------", Id::TLD4S, Type::Texture, "TLD4S"), |
| 1977 | INST("110111110110----", Id::TMML_B, Type::Texture, "TMML_B"), | 1977 | INST("110111110110----", Id::TMML_B, Type::Texture, "TMML_B"), |
| 1978 | INST("1101111101011---", Id::TMML, Type::Texture, "TMML"), | 1978 | INST("1101111101011---", Id::TMML, Type::Texture, "TMML"), |
| 1979 | INST("11011110011110--", Id::TXD_B, Type::Texture, "TXD_B"), | 1979 | INST("11011110011110--", Id::TXD_B, Type::Texture, "TXD_B"), |
diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp index d1ae4be6d..0389c2143 100644 --- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp +++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp | |||
| @@ -399,6 +399,7 @@ public: | |||
| 399 | DeclareConstantBuffers(); | 399 | DeclareConstantBuffers(); |
| 400 | DeclareGlobalMemory(); | 400 | DeclareGlobalMemory(); |
| 401 | DeclareSamplers(); | 401 | DeclareSamplers(); |
| 402 | DeclareImages(); | ||
| 402 | DeclarePhysicalAttributeReader(); | 403 | DeclarePhysicalAttributeReader(); |
| 403 | 404 | ||
| 404 | code.AddLine("void execute_{}() {{", suffix); | 405 | code.AddLine("void execute_{}() {{", suffix); |
diff --git a/src/video_core/renderer_vulkan/shaders/blit.frag b/src/video_core/renderer_vulkan/shaders/blit.frag new file mode 100644 index 000000000..a06ecd24a --- /dev/null +++ b/src/video_core/renderer_vulkan/shaders/blit.frag | |||
| @@ -0,0 +1,24 @@ | |||
| 1 | // Copyright 2019 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | /* | ||
| 6 | * Build instructions: | ||
| 7 | * $ glslangValidator -V $THIS_FILE -o output.spv | ||
| 8 | * $ spirv-opt -O --strip-debug output.spv -o optimized.spv | ||
| 9 | * $ xxd -i optimized.spv | ||
| 10 | * | ||
| 11 | * Then copy that bytecode to the C++ file | ||
| 12 | */ | ||
| 13 | |||
| 14 | #version 460 core | ||
| 15 | |||
| 16 | layout (location = 0) in vec2 frag_tex_coord; | ||
| 17 | |||
| 18 | layout (location = 0) out vec4 color; | ||
| 19 | |||
| 20 | layout (binding = 1) uniform sampler2D color_texture; | ||
| 21 | |||
| 22 | void main() { | ||
| 23 | color = texture(color_texture, frag_tex_coord); | ||
| 24 | } | ||
diff --git a/src/video_core/renderer_vulkan/shaders/blit.vert b/src/video_core/renderer_vulkan/shaders/blit.vert new file mode 100644 index 000000000..c64d9235a --- /dev/null +++ b/src/video_core/renderer_vulkan/shaders/blit.vert | |||
| @@ -0,0 +1,28 @@ | |||
| 1 | // Copyright 2019 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | /* | ||
| 6 | * Build instructions: | ||
| 7 | * $ glslangValidator -V $THIS_FILE -o output.spv | ||
| 8 | * $ spirv-opt -O --strip-debug output.spv -o optimized.spv | ||
| 9 | * $ xxd -i optimized.spv | ||
| 10 | * | ||
| 11 | * Then copy that bytecode to the C++ file | ||
| 12 | */ | ||
| 13 | |||
| 14 | #version 460 core | ||
| 15 | |||
| 16 | layout (location = 0) in vec2 vert_position; | ||
| 17 | layout (location = 1) in vec2 vert_tex_coord; | ||
| 18 | |||
| 19 | layout (location = 0) out vec2 frag_tex_coord; | ||
| 20 | |||
| 21 | layout (set = 0, binding = 0) uniform MatrixBlock { | ||
| 22 | mat4 modelview_matrix; | ||
| 23 | }; | ||
| 24 | |||
| 25 | void main() { | ||
| 26 | gl_Position = modelview_matrix * vec4(vert_position, 0.0, 1.0); | ||
| 27 | frag_tex_coord = vert_tex_coord; | ||
| 28 | } | ||
diff --git a/src/video_core/renderer_vulkan/shaders/quad_array.comp b/src/video_core/renderer_vulkan/shaders/quad_array.comp new file mode 100644 index 000000000..5a5703308 --- /dev/null +++ b/src/video_core/renderer_vulkan/shaders/quad_array.comp | |||
| @@ -0,0 +1,37 @@ | |||
| 1 | // Copyright 2019 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | /* | ||
| 6 | * Build instructions: | ||
| 7 | * $ glslangValidator -V $THIS_FILE -o output.spv | ||
| 8 | * $ spirv-opt -O --strip-debug output.spv -o optimized.spv | ||
| 9 | * $ xxd -i optimized.spv | ||
| 10 | * | ||
| 11 | * Then copy that bytecode to the C++ file | ||
| 12 | */ | ||
| 13 | |||
| 14 | #version 460 core | ||
| 15 | |||
| 16 | layout (local_size_x = 1024) in; | ||
| 17 | |||
| 18 | layout (std430, set = 0, binding = 0) buffer OutputBuffer { | ||
| 19 | uint output_indexes[]; | ||
| 20 | }; | ||
| 21 | |||
| 22 | layout (push_constant) uniform PushConstants { | ||
| 23 | uint first; | ||
| 24 | }; | ||
| 25 | |||
| 26 | void main() { | ||
| 27 | uint primitive = gl_GlobalInvocationID.x; | ||
| 28 | if (primitive * 6 >= output_indexes.length()) { | ||
| 29 | return; | ||
| 30 | } | ||
| 31 | |||
| 32 | const uint quad_map[6] = uint[](0, 1, 2, 0, 2, 3); | ||
| 33 | for (uint vertex = 0; vertex < 6; ++vertex) { | ||
| 34 | uint index = first + primitive * 4 + quad_map[vertex]; | ||
| 35 | output_indexes[primitive * 6 + vertex] = index; | ||
| 36 | } | ||
| 37 | } | ||
diff --git a/src/video_core/renderer_vulkan/shaders/uint8.comp b/src/video_core/renderer_vulkan/shaders/uint8.comp new file mode 100644 index 000000000..a320f3ae0 --- /dev/null +++ b/src/video_core/renderer_vulkan/shaders/uint8.comp | |||
| @@ -0,0 +1,33 @@ | |||
| 1 | // Copyright 2019 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | /* | ||
| 6 | * Build instructions: | ||
| 7 | * $ glslangValidator -V $THIS_FILE -o output.spv | ||
| 8 | * $ spirv-opt -O --strip-debug output.spv -o optimized.spv | ||
| 9 | * $ xxd -i optimized.spv | ||
| 10 | * | ||
| 11 | * Then copy that bytecode to the C++ file | ||
| 12 | */ | ||
| 13 | |||
| 14 | #version 460 core | ||
| 15 | #extension GL_EXT_shader_16bit_storage : require | ||
| 16 | #extension GL_EXT_shader_8bit_storage : require | ||
| 17 | |||
| 18 | layout (local_size_x = 1024) in; | ||
| 19 | |||
| 20 | layout (std430, set = 0, binding = 0) readonly buffer InputBuffer { | ||
| 21 | uint8_t input_indexes[]; | ||
| 22 | }; | ||
| 23 | |||
| 24 | layout (std430, set = 0, binding = 1) writeonly buffer OutputBuffer { | ||
| 25 | uint16_t output_indexes[]; | ||
| 26 | }; | ||
| 27 | |||
| 28 | void main() { | ||
| 29 | uint id = gl_GlobalInvocationID.x; | ||
| 30 | if (id < input_indexes.length()) { | ||
| 31 | output_indexes[id] = uint16_t(input_indexes[id]); | ||
| 32 | } | ||
| 33 | } | ||
diff --git a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp index 833145971..a8baf91de 100644 --- a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp +++ b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp | |||
| @@ -2557,29 +2557,7 @@ public: | |||
| 2557 | } | 2557 | } |
| 2558 | 2558 | ||
| 2559 | Id operator()(const ExprCondCode& expr) { | 2559 | Id operator()(const ExprCondCode& expr) { |
| 2560 | const Node cc = decomp.ir.GetConditionCode(expr.cc); | 2560 | return decomp.AsBool(decomp.Visit(decomp.ir.GetConditionCode(expr.cc))); |
| 2561 | Id target; | ||
| 2562 | |||
| 2563 | if (const auto pred = std::get_if<PredicateNode>(&*cc)) { | ||
| 2564 | const auto index = pred->GetIndex(); | ||
| 2565 | switch (index) { | ||
| 2566 | case Tegra::Shader::Pred::NeverExecute: | ||
| 2567 | target = decomp.v_false; | ||
| 2568 | break; | ||
| 2569 | case Tegra::Shader::Pred::UnusedIndex: | ||
| 2570 | target = decomp.v_true; | ||
| 2571 | break; | ||
| 2572 | default: | ||
| 2573 | target = decomp.predicates.at(index); | ||
| 2574 | break; | ||
| 2575 | } | ||
| 2576 | } else if (const auto flag = std::get_if<InternalFlagNode>(&*cc)) { | ||
| 2577 | target = decomp.internal_flags.at(static_cast<u32>(flag->GetFlag())); | ||
| 2578 | } else { | ||
| 2579 | UNREACHABLE(); | ||
| 2580 | } | ||
| 2581 | |||
| 2582 | return decomp.OpLoad(decomp.t_bool, target); | ||
| 2583 | } | 2561 | } |
| 2584 | 2562 | ||
| 2585 | Id operator()(const ExprVar& expr) { | 2563 | Id operator()(const ExprVar& expr) { |
diff --git a/src/video_core/shader/decode/conversion.cpp b/src/video_core/shader/decode/conversion.cpp index 32facd6ba..0eeb75559 100644 --- a/src/video_core/shader/decode/conversion.cpp +++ b/src/video_core/shader/decode/conversion.cpp | |||
| @@ -63,12 +63,11 @@ u32 ShaderIR::DecodeConversion(NodeBlock& bb, u32 pc) { | |||
| 63 | case OpCode::Id::I2F_R: | 63 | case OpCode::Id::I2F_R: |
| 64 | case OpCode::Id::I2F_C: | 64 | case OpCode::Id::I2F_C: |
| 65 | case OpCode::Id::I2F_IMM: { | 65 | case OpCode::Id::I2F_IMM: { |
| 66 | UNIMPLEMENTED_IF(instr.conversion.int_src.selector != 0); | ||
| 67 | UNIMPLEMENTED_IF(instr.conversion.dst_size == Register::Size::Long); | 66 | UNIMPLEMENTED_IF(instr.conversion.dst_size == Register::Size::Long); |
| 68 | UNIMPLEMENTED_IF_MSG(instr.generates_cc, | 67 | UNIMPLEMENTED_IF_MSG(instr.generates_cc, |
| 69 | "Condition codes generation in I2F is not implemented"); | 68 | "Condition codes generation in I2F is not implemented"); |
| 70 | 69 | ||
| 71 | Node value = [&]() { | 70 | Node value = [&] { |
| 72 | switch (opcode->get().GetId()) { | 71 | switch (opcode->get().GetId()) { |
| 73 | case OpCode::Id::I2F_R: | 72 | case OpCode::Id::I2F_R: |
| 74 | return GetRegister(instr.gpr20); | 73 | return GetRegister(instr.gpr20); |
| @@ -81,7 +80,19 @@ u32 ShaderIR::DecodeConversion(NodeBlock& bb, u32 pc) { | |||
| 81 | return Immediate(0); | 80 | return Immediate(0); |
| 82 | } | 81 | } |
| 83 | }(); | 82 | }(); |
| 83 | |||
| 84 | const bool input_signed = instr.conversion.is_input_signed; | 84 | const bool input_signed = instr.conversion.is_input_signed; |
| 85 | |||
| 86 | if (instr.conversion.src_size == Register::Size::Byte) { | ||
| 87 | const u32 offset = static_cast<u32>(instr.conversion.int_src.selector) * 8; | ||
| 88 | if (offset > 0) { | ||
| 89 | value = SignedOperation(OperationCode::ILogicalShiftRight, input_signed, | ||
| 90 | std::move(value), Immediate(offset)); | ||
| 91 | } | ||
| 92 | } else { | ||
| 93 | UNIMPLEMENTED_IF(instr.conversion.int_src.selector != 0); | ||
| 94 | } | ||
| 95 | |||
| 85 | value = ConvertIntegerSize(value, instr.conversion.src_size, input_signed); | 96 | value = ConvertIntegerSize(value, instr.conversion.src_size, input_signed); |
| 86 | value = GetOperandAbsNegInteger(value, instr.conversion.abs_a, false, input_signed); | 97 | value = GetOperandAbsNegInteger(value, instr.conversion.abs_a, false, input_signed); |
| 87 | value = SignedOperation(OperationCode::FCastInteger, input_signed, PRECISE, value); | 98 | value = SignedOperation(OperationCode::FCastInteger, input_signed, PRECISE, value); |
diff --git a/src/video_core/shader/decode/memory.cpp b/src/video_core/shader/decode/memory.cpp index 78e92f52e..c934d0719 100644 --- a/src/video_core/shader/decode/memory.cpp +++ b/src/video_core/shader/decode/memory.cpp | |||
| @@ -22,7 +22,23 @@ using Tegra::Shader::Register; | |||
| 22 | 22 | ||
| 23 | namespace { | 23 | namespace { |
| 24 | 24 | ||
| 25 | u32 GetUniformTypeElementsCount(Tegra::Shader::UniformType uniform_type) { | 25 | u32 GetLdgMemorySize(Tegra::Shader::UniformType uniform_type) { |
| 26 | switch (uniform_type) { | ||
| 27 | case Tegra::Shader::UniformType::UnsignedByte: | ||
| 28 | case Tegra::Shader::UniformType::Single: | ||
| 29 | return 1; | ||
| 30 | case Tegra::Shader::UniformType::Double: | ||
| 31 | return 2; | ||
| 32 | case Tegra::Shader::UniformType::Quad: | ||
| 33 | case Tegra::Shader::UniformType::UnsignedQuad: | ||
| 34 | return 4; | ||
| 35 | default: | ||
| 36 | UNIMPLEMENTED_MSG("Unimplemented size={}!", static_cast<u32>(uniform_type)); | ||
| 37 | return 1; | ||
| 38 | } | ||
| 39 | } | ||
| 40 | |||
| 41 | u32 GetStgMemorySize(Tegra::Shader::UniformType uniform_type) { | ||
| 26 | switch (uniform_type) { | 42 | switch (uniform_type) { |
| 27 | case Tegra::Shader::UniformType::Single: | 43 | case Tegra::Shader::UniformType::Single: |
| 28 | return 1; | 44 | return 1; |
| @@ -170,7 +186,7 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) { | |||
| 170 | const auto [real_address_base, base_address, descriptor] = | 186 | const auto [real_address_base, base_address, descriptor] = |
| 171 | TrackGlobalMemory(bb, instr, false); | 187 | TrackGlobalMemory(bb, instr, false); |
| 172 | 188 | ||
| 173 | const u32 count = GetUniformTypeElementsCount(type); | 189 | const u32 count = GetLdgMemorySize(type); |
| 174 | if (!real_address_base || !base_address) { | 190 | if (!real_address_base || !base_address) { |
| 175 | // Tracking failed, load zeroes. | 191 | // Tracking failed, load zeroes. |
| 176 | for (u32 i = 0; i < count; ++i) { | 192 | for (u32 i = 0; i < count; ++i) { |
| @@ -181,12 +197,22 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) { | |||
| 181 | 197 | ||
| 182 | for (u32 i = 0; i < count; ++i) { | 198 | for (u32 i = 0; i < count; ++i) { |
| 183 | const Node it_offset = Immediate(i * 4); | 199 | const Node it_offset = Immediate(i * 4); |
| 184 | const Node real_address = | 200 | const Node real_address = Operation(OperationCode::UAdd, real_address_base, it_offset); |
| 185 | Operation(OperationCode::UAdd, NO_PRECISE, real_address_base, it_offset); | 201 | Node gmem = MakeNode<GmemNode>(real_address, base_address, descriptor); |
| 186 | const Node gmem = MakeNode<GmemNode>(real_address, base_address, descriptor); | 202 | |
| 203 | if (type == Tegra::Shader::UniformType::UnsignedByte) { | ||
| 204 | // To handle unaligned loads get the byte used to dereferenced global memory | ||
| 205 | // and extract that byte from the loaded uint32. | ||
| 206 | Node byte = Operation(OperationCode::UBitwiseAnd, real_address, Immediate(3)); | ||
| 207 | byte = Operation(OperationCode::ULogicalShiftLeft, std::move(byte), Immediate(3)); | ||
| 208 | |||
| 209 | gmem = Operation(OperationCode::UBitfieldExtract, std::move(gmem), std::move(byte), | ||
| 210 | Immediate(8)); | ||
| 211 | } | ||
| 187 | 212 | ||
| 188 | SetTemporary(bb, i, gmem); | 213 | SetTemporary(bb, i, gmem); |
| 189 | } | 214 | } |
| 215 | |||
| 190 | for (u32 i = 0; i < count; ++i) { | 216 | for (u32 i = 0; i < count; ++i) { |
| 191 | SetRegister(bb, instr.gpr0.Value() + i, GetTemporary(i)); | 217 | SetRegister(bb, instr.gpr0.Value() + i, GetTemporary(i)); |
| 192 | } | 218 | } |
| @@ -276,7 +302,7 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) { | |||
| 276 | break; | 302 | break; |
| 277 | } | 303 | } |
| 278 | 304 | ||
| 279 | const u32 count = GetUniformTypeElementsCount(type); | 305 | const u32 count = GetStgMemorySize(type); |
| 280 | for (u32 i = 0; i < count; ++i) { | 306 | for (u32 i = 0; i < count; ++i) { |
| 281 | const Node it_offset = Immediate(i * 4); | 307 | const Node it_offset = Immediate(i * 4); |
| 282 | const Node real_address = Operation(OperationCode::UAdd, real_address_base, it_offset); | 308 | const Node real_address = Operation(OperationCode::UAdd, real_address_base, it_offset); |
diff --git a/src/video_core/shader/decode/texture.cpp b/src/video_core/shader/decode/texture.cpp index 994c05611..dff01a541 100644 --- a/src/video_core/shader/decode/texture.cpp +++ b/src/video_core/shader/decode/texture.cpp | |||
| @@ -743,13 +743,18 @@ Node4 ShaderIR::GetTldsCode(Instruction instr, TextureType texture_type, bool is | |||
| 743 | // When lod is used always is in gpr20 | 743 | // When lod is used always is in gpr20 |
| 744 | const Node lod = lod_enabled ? GetRegister(instr.gpr20) : Immediate(0); | 744 | const Node lod = lod_enabled ? GetRegister(instr.gpr20) : Immediate(0); |
| 745 | 745 | ||
| 746 | // Fill empty entries from the guest sampler. | 746 | // Fill empty entries from the guest sampler |
| 747 | const std::size_t entry_coord_count = GetCoordCount(sampler.GetType()); | 747 | const std::size_t entry_coord_count = GetCoordCount(sampler.GetType()); |
| 748 | if (type_coord_count != entry_coord_count) { | 748 | if (type_coord_count != entry_coord_count) { |
| 749 | LOG_WARNING(HW_GPU, "Bound and built texture types mismatch"); | 749 | LOG_WARNING(HW_GPU, "Bound and built texture types mismatch"); |
| 750 | } | 750 | |
| 751 | for (std::size_t i = type_coord_count; i < entry_coord_count; ++i) { | 751 | // When the size is higher we insert zeroes |
| 752 | coords.push_back(GetRegister(Register::ZeroIndex)); | 752 | for (std::size_t i = type_coord_count; i < entry_coord_count; ++i) { |
| 753 | coords.push_back(GetRegister(Register::ZeroIndex)); | ||
| 754 | } | ||
| 755 | |||
| 756 | // Then we ensure the size matches the number of entries (dropping unused values) | ||
| 757 | coords.resize(entry_coord_count); | ||
| 753 | } | 758 | } |
| 754 | 759 | ||
| 755 | Node4 values; | 760 | Node4 values; |