diff options
Diffstat (limited to 'src')
| -rw-r--r-- | src/video_core/CMakeLists.txt | 1 | ||||
| -rw-r--r-- | src/video_core/engines/shader_header.h | 103 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_shader_decompiler.cpp | 31 |
3 files changed, 111 insertions, 24 deletions
diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt index 4a79ce39c..f5ae57039 100644 --- a/src/video_core/CMakeLists.txt +++ b/src/video_core/CMakeLists.txt | |||
| @@ -14,6 +14,7 @@ add_library(video_core STATIC | |||
| 14 | engines/maxwell_dma.cpp | 14 | engines/maxwell_dma.cpp |
| 15 | engines/maxwell_dma.h | 15 | engines/maxwell_dma.h |
| 16 | engines/shader_bytecode.h | 16 | engines/shader_bytecode.h |
| 17 | engines/shader_header.h | ||
| 17 | gpu.cpp | 18 | gpu.cpp |
| 18 | gpu.h | 19 | gpu.h |
| 19 | macro_interpreter.cpp | 20 | macro_interpreter.cpp |
diff --git a/src/video_core/engines/shader_header.h b/src/video_core/engines/shader_header.h new file mode 100644 index 000000000..a885ee3cf --- /dev/null +++ b/src/video_core/engines/shader_header.h | |||
| @@ -0,0 +1,103 @@ | |||
| 1 | // Copyright 2018 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include "common/bit_field.h" | ||
| 8 | #include "common/common_funcs.h" | ||
| 9 | #include "common/common_types.h" | ||
| 10 | |||
| 11 | namespace Tegra::Shader { | ||
| 12 | |||
| 13 | enum class OutputTopology : u32 { | ||
| 14 | PointList = 1, | ||
| 15 | LineStrip = 6, | ||
| 16 | TriangleStrip = 7, | ||
| 17 | }; | ||
| 18 | |||
| 19 | // Documentation in: | ||
| 20 | // http://download.nvidia.com/open-gpu-doc/Shader-Program-Header/1/Shader-Program-Header.html#ImapTexture | ||
| 21 | struct Header { | ||
| 22 | union { | ||
| 23 | BitField<0, 5, u32> sph_type; | ||
| 24 | BitField<5, 5, u32> version; | ||
| 25 | BitField<10, 4, u32> shader_type; | ||
| 26 | BitField<14, 1, u32> mrt_enable; | ||
| 27 | BitField<15, 1, u32> kills_pixels; | ||
| 28 | BitField<16, 1, u32> does_global_store; | ||
| 29 | BitField<17, 4, u32> sass_version; | ||
| 30 | BitField<21, 5, u32> reserved; | ||
| 31 | BitField<26, 1, u32> does_load_or_store; | ||
| 32 | BitField<27, 1, u32> does_fp64; | ||
| 33 | BitField<28, 4, u32> stream_out_mask; | ||
| 34 | } common0; | ||
| 35 | |||
| 36 | union { | ||
| 37 | BitField<0, 24, u32> shader_local_memory_low_size; | ||
| 38 | BitField<24, 8, u32> per_patch_attribute_count; | ||
| 39 | } common1; | ||
| 40 | |||
| 41 | union { | ||
| 42 | BitField<0, 24, u32> shader_local_memory_high_size; | ||
| 43 | BitField<24, 8, u32> threads_per_input_primitive; | ||
| 44 | } common2; | ||
| 45 | |||
| 46 | union { | ||
| 47 | BitField<0, 24, u32> shader_local_memory_crs_size; | ||
| 48 | BitField<24, 4, OutputTopology> output_topology; | ||
| 49 | BitField<28, 4, u32> reserved; | ||
| 50 | } common3; | ||
| 51 | |||
| 52 | union { | ||
| 53 | BitField<0, 12, u32> max_output_vertices; | ||
| 54 | BitField<12, 8, u32> store_req_start; // NOTE: not used by geometry shaders. | ||
| 55 | BitField<24, 4, u32> reserved; | ||
| 56 | BitField<12, 8, u32> store_req_end; // NOTE: not used by geometry shaders. | ||
| 57 | } common4; | ||
| 58 | |||
| 59 | union { | ||
| 60 | struct { | ||
| 61 | INSERT_PADDING_BYTES(3); // ImapSystemValuesA | ||
| 62 | INSERT_PADDING_BYTES(1); // ImapSystemValuesB | ||
| 63 | INSERT_PADDING_BYTES(16); // ImapGenericVector[32] | ||
| 64 | INSERT_PADDING_BYTES(2); // ImapColor | ||
| 65 | INSERT_PADDING_BYTES(2); // ImapSystemValuesC | ||
| 66 | INSERT_PADDING_BYTES(5); // ImapFixedFncTexture[10] | ||
| 67 | INSERT_PADDING_BYTES(1); // ImapReserved | ||
| 68 | INSERT_PADDING_BYTES(3); // OmapSystemValuesA | ||
| 69 | INSERT_PADDING_BYTES(1); // OmapSystemValuesB | ||
| 70 | INSERT_PADDING_BYTES(16); // OmapGenericVector[32] | ||
| 71 | INSERT_PADDING_BYTES(2); // OmapColor | ||
| 72 | INSERT_PADDING_BYTES(2); // OmapSystemValuesC | ||
| 73 | INSERT_PADDING_BYTES(5); // OmapFixedFncTexture[10] | ||
| 74 | INSERT_PADDING_BYTES(1); // OmapReserved | ||
| 75 | } vtg; | ||
| 76 | |||
| 77 | struct { | ||
| 78 | INSERT_PADDING_BYTES(3); // ImapSystemValuesA | ||
| 79 | INSERT_PADDING_BYTES(1); // ImapSystemValuesB | ||
| 80 | INSERT_PADDING_BYTES(32); // ImapGenericVector[32] | ||
| 81 | INSERT_PADDING_BYTES(2); // ImapColor | ||
| 82 | INSERT_PADDING_BYTES(2); // ImapSystemValuesC | ||
| 83 | INSERT_PADDING_BYTES(10); // ImapFixedFncTexture[10] | ||
| 84 | INSERT_PADDING_BYTES(2); // ImapReserved | ||
| 85 | struct { | ||
| 86 | u32 target; | ||
| 87 | union { | ||
| 88 | BitField<0, 1, u32> sample_mask; | ||
| 89 | BitField<1, 1, u32> depth; | ||
| 90 | BitField<2, 30, u32> reserved; | ||
| 91 | }; | ||
| 92 | } omap; | ||
| 93 | bool IsColorComponentOutputEnabled(u32 render_target, u32 component) const { | ||
| 94 | const u32 bit = render_target * 4 + component; | ||
| 95 | return omap.target & (1 << bit); | ||
| 96 | } | ||
| 97 | } ps; | ||
| 98 | }; | ||
| 99 | }; | ||
| 100 | |||
| 101 | static_assert(sizeof(Header) == 0x50, "Incorrect structure size"); | ||
| 102 | |||
| 103 | } // namespace Tegra::Shader | ||
diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp index 252ff18fc..a1638c12e 100644 --- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp +++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp | |||
| @@ -12,6 +12,7 @@ | |||
| 12 | #include "common/assert.h" | 12 | #include "common/assert.h" |
| 13 | #include "common/common_types.h" | 13 | #include "common/common_types.h" |
| 14 | #include "video_core/engines/shader_bytecode.h" | 14 | #include "video_core/engines/shader_bytecode.h" |
| 15 | #include "video_core/engines/shader_header.h" | ||
| 15 | #include "video_core/renderer_opengl/gl_rasterizer.h" | 16 | #include "video_core/renderer_opengl/gl_rasterizer.h" |
| 16 | #include "video_core/renderer_opengl/gl_shader_decompiler.h" | 17 | #include "video_core/renderer_opengl/gl_shader_decompiler.h" |
| 17 | 18 | ||
| @@ -26,7 +27,7 @@ using Tegra::Shader::Sampler; | |||
| 26 | using Tegra::Shader::SubOp; | 27 | using Tegra::Shader::SubOp; |
| 27 | 28 | ||
| 28 | constexpr u32 PROGRAM_END = MAX_PROGRAM_CODE_LENGTH; | 29 | constexpr u32 PROGRAM_END = MAX_PROGRAM_CODE_LENGTH; |
| 29 | constexpr u32 PROGRAM_HEADER_SIZE = 0x50; | 30 | constexpr u32 PROGRAM_HEADER_SIZE = sizeof(Tegra::Shader::Header); |
| 30 | 31 | ||
| 31 | class DecompileFail : public std::runtime_error { | 32 | class DecompileFail : public std::runtime_error { |
| 32 | public: | 33 | public: |
| @@ -674,7 +675,7 @@ public: | |||
| 674 | u32 main_offset, Maxwell3D::Regs::ShaderStage stage, const std::string& suffix) | 675 | u32 main_offset, Maxwell3D::Regs::ShaderStage stage, const std::string& suffix) |
| 675 | : subroutines(subroutines), program_code(program_code), main_offset(main_offset), | 676 | : subroutines(subroutines), program_code(program_code), main_offset(main_offset), |
| 676 | stage(stage), suffix(suffix) { | 677 | stage(stage), suffix(suffix) { |
| 677 | 678 | std::memcpy(&header, program_code.data(), sizeof(Tegra::Shader::Header)); | |
| 678 | Generate(suffix); | 679 | Generate(suffix); |
| 679 | } | 680 | } |
| 680 | 681 | ||
| @@ -688,23 +689,6 @@ public: | |||
| 688 | } | 689 | } |
| 689 | 690 | ||
| 690 | private: | 691 | private: |
| 691 | // Shader program header for a Fragment Shader. | ||
| 692 | struct FragmentHeader { | ||
| 693 | INSERT_PADDING_WORDS(5); | ||
| 694 | INSERT_PADDING_WORDS(13); | ||
| 695 | u32 enabled_color_outputs; | ||
| 696 | union { | ||
| 697 | BitField<0, 1, u32> writes_samplemask; | ||
| 698 | BitField<1, 1, u32> writes_depth; | ||
| 699 | }; | ||
| 700 | |||
| 701 | bool IsColorComponentOutputEnabled(u32 render_target, u32 component) const { | ||
| 702 | const u32 bit = render_target * 4 + component; | ||
| 703 | return enabled_color_outputs & (1 << bit); | ||
| 704 | } | ||
| 705 | }; | ||
| 706 | static_assert(sizeof(FragmentHeader) == PROGRAM_HEADER_SIZE, "FragmentHeader size is wrong"); | ||
| 707 | |||
| 708 | /// Gets the Subroutine object corresponding to the specified address. | 692 | /// Gets the Subroutine object corresponding to the specified address. |
| 709 | const Subroutine& GetSubroutine(u32 begin, u32 end) const { | 693 | const Subroutine& GetSubroutine(u32 begin, u32 end) const { |
| 710 | const auto iter = subroutines.find(Subroutine{begin, end, suffix}); | 694 | const auto iter = subroutines.find(Subroutine{begin, end, suffix}); |
| @@ -1010,10 +994,8 @@ private: | |||
| 1010 | /// Writes the output values from a fragment shader to the corresponding GLSL output variables. | 994 | /// Writes the output values from a fragment shader to the corresponding GLSL output variables. |
| 1011 | void EmitFragmentOutputsWrite() { | 995 | void EmitFragmentOutputsWrite() { |
| 1012 | ASSERT(stage == Maxwell3D::Regs::ShaderStage::Fragment); | 996 | ASSERT(stage == Maxwell3D::Regs::ShaderStage::Fragment); |
| 1013 | FragmentHeader header; | ||
| 1014 | std::memcpy(&header, program_code.data(), PROGRAM_HEADER_SIZE); | ||
| 1015 | 997 | ||
| 1016 | ASSERT_MSG(header.writes_samplemask == 0, "Samplemask write is unimplemented"); | 998 | ASSERT_MSG(header.ps.omap.sample_mask == 0, "Samplemask write is unimplemented"); |
| 1017 | 999 | ||
| 1018 | // Write the color outputs using the data in the shader registers, disabled | 1000 | // Write the color outputs using the data in the shader registers, disabled |
| 1019 | // rendertargets/components are skipped in the register assignment. | 1001 | // rendertargets/components are skipped in the register assignment. |
| @@ -1022,7 +1004,7 @@ private: | |||
| 1022 | ++render_target) { | 1004 | ++render_target) { |
| 1023 | // TODO(Subv): Figure out how dual-source blending is configured in the Switch. | 1005 | // TODO(Subv): Figure out how dual-source blending is configured in the Switch. |
| 1024 | for (u32 component = 0; component < 4; ++component) { | 1006 | for (u32 component = 0; component < 4; ++component) { |
| 1025 | if (header.IsColorComponentOutputEnabled(render_target, component)) { | 1007 | if (header.ps.IsColorComponentOutputEnabled(render_target, component)) { |
| 1026 | shader.AddLine(fmt::format("FragColor{}[{}] = {};", render_target, component, | 1008 | shader.AddLine(fmt::format("FragColor{}[{}] = {};", render_target, component, |
| 1027 | regs.GetRegisterAsFloat(current_reg))); | 1009 | regs.GetRegisterAsFloat(current_reg))); |
| 1028 | ++current_reg; | 1010 | ++current_reg; |
| @@ -1030,7 +1012,7 @@ private: | |||
| 1030 | } | 1012 | } |
| 1031 | } | 1013 | } |
| 1032 | 1014 | ||
| 1033 | if (header.writes_depth) { | 1015 | if (header.ps.omap.depth) { |
| 1034 | // The depth output is always 2 registers after the last color output, and current_reg | 1016 | // The depth output is always 2 registers after the last color output, and current_reg |
| 1035 | // already contains one past the last color register. | 1017 | // already contains one past the last color register. |
| 1036 | 1018 | ||
| @@ -2666,6 +2648,7 @@ private: | |||
| 2666 | private: | 2648 | private: |
| 2667 | const std::set<Subroutine>& subroutines; | 2649 | const std::set<Subroutine>& subroutines; |
| 2668 | const ProgramCode& program_code; | 2650 | const ProgramCode& program_code; |
| 2651 | Tegra::Shader::Header header; | ||
| 2669 | const u32 main_offset; | 2652 | const u32 main_offset; |
| 2670 | Maxwell3D::Regs::ShaderStage stage; | 2653 | Maxwell3D::Regs::ShaderStage stage; |
| 2671 | const std::string& suffix; | 2654 | const std::string& suffix; |