summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/video_core/CMakeLists.txt1
-rw-r--r--src/video_core/engines/shader_header.h103
-rw-r--r--src/video_core/renderer_opengl/gl_shader_decompiler.cpp31
3 files changed, 111 insertions, 24 deletions
diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt
index 4a79ce39c..f5ae57039 100644
--- a/src/video_core/CMakeLists.txt
+++ b/src/video_core/CMakeLists.txt
@@ -14,6 +14,7 @@ add_library(video_core STATIC
14 engines/maxwell_dma.cpp 14 engines/maxwell_dma.cpp
15 engines/maxwell_dma.h 15 engines/maxwell_dma.h
16 engines/shader_bytecode.h 16 engines/shader_bytecode.h
17 engines/shader_header.h
17 gpu.cpp 18 gpu.cpp
18 gpu.h 19 gpu.h
19 macro_interpreter.cpp 20 macro_interpreter.cpp
diff --git a/src/video_core/engines/shader_header.h b/src/video_core/engines/shader_header.h
new file mode 100644
index 000000000..a885ee3cf
--- /dev/null
+++ b/src/video_core/engines/shader_header.h
@@ -0,0 +1,103 @@
1// Copyright 2018 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include "common/bit_field.h"
8#include "common/common_funcs.h"
9#include "common/common_types.h"
10
11namespace Tegra::Shader {
12
13enum class OutputTopology : u32 {
14 PointList = 1,
15 LineStrip = 6,
16 TriangleStrip = 7,
17};
18
19// Documentation in:
20// http://download.nvidia.com/open-gpu-doc/Shader-Program-Header/1/Shader-Program-Header.html#ImapTexture
21struct Header {
22 union {
23 BitField<0, 5, u32> sph_type;
24 BitField<5, 5, u32> version;
25 BitField<10, 4, u32> shader_type;
26 BitField<14, 1, u32> mrt_enable;
27 BitField<15, 1, u32> kills_pixels;
28 BitField<16, 1, u32> does_global_store;
29 BitField<17, 4, u32> sass_version;
30 BitField<21, 5, u32> reserved;
31 BitField<26, 1, u32> does_load_or_store;
32 BitField<27, 1, u32> does_fp64;
33 BitField<28, 4, u32> stream_out_mask;
34 } common0;
35
36 union {
37 BitField<0, 24, u32> shader_local_memory_low_size;
38 BitField<24, 8, u32> per_patch_attribute_count;
39 } common1;
40
41 union {
42 BitField<0, 24, u32> shader_local_memory_high_size;
43 BitField<24, 8, u32> threads_per_input_primitive;
44 } common2;
45
46 union {
47 BitField<0, 24, u32> shader_local_memory_crs_size;
48 BitField<24, 4, OutputTopology> output_topology;
49 BitField<28, 4, u32> reserved;
50 } common3;
51
52 union {
53 BitField<0, 12, u32> max_output_vertices;
54 BitField<12, 8, u32> store_req_start; // NOTE: not used by geometry shaders.
55 BitField<24, 4, u32> reserved;
56 BitField<12, 8, u32> store_req_end; // NOTE: not used by geometry shaders.
57 } common4;
58
59 union {
60 struct {
61 INSERT_PADDING_BYTES(3); // ImapSystemValuesA
62 INSERT_PADDING_BYTES(1); // ImapSystemValuesB
63 INSERT_PADDING_BYTES(16); // ImapGenericVector[32]
64 INSERT_PADDING_BYTES(2); // ImapColor
65 INSERT_PADDING_BYTES(2); // ImapSystemValuesC
66 INSERT_PADDING_BYTES(5); // ImapFixedFncTexture[10]
67 INSERT_PADDING_BYTES(1); // ImapReserved
68 INSERT_PADDING_BYTES(3); // OmapSystemValuesA
69 INSERT_PADDING_BYTES(1); // OmapSystemValuesB
70 INSERT_PADDING_BYTES(16); // OmapGenericVector[32]
71 INSERT_PADDING_BYTES(2); // OmapColor
72 INSERT_PADDING_BYTES(2); // OmapSystemValuesC
73 INSERT_PADDING_BYTES(5); // OmapFixedFncTexture[10]
74 INSERT_PADDING_BYTES(1); // OmapReserved
75 } vtg;
76
77 struct {
78 INSERT_PADDING_BYTES(3); // ImapSystemValuesA
79 INSERT_PADDING_BYTES(1); // ImapSystemValuesB
80 INSERT_PADDING_BYTES(32); // ImapGenericVector[32]
81 INSERT_PADDING_BYTES(2); // ImapColor
82 INSERT_PADDING_BYTES(2); // ImapSystemValuesC
83 INSERT_PADDING_BYTES(10); // ImapFixedFncTexture[10]
84 INSERT_PADDING_BYTES(2); // ImapReserved
85 struct {
86 u32 target;
87 union {
88 BitField<0, 1, u32> sample_mask;
89 BitField<1, 1, u32> depth;
90 BitField<2, 30, u32> reserved;
91 };
92 } omap;
93 bool IsColorComponentOutputEnabled(u32 render_target, u32 component) const {
94 const u32 bit = render_target * 4 + component;
95 return omap.target & (1 << bit);
96 }
97 } ps;
98 };
99};
100
101static_assert(sizeof(Header) == 0x50, "Incorrect structure size");
102
103} // namespace Tegra::Shader
diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
index 252ff18fc..a1638c12e 100644
--- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
@@ -12,6 +12,7 @@
12#include "common/assert.h" 12#include "common/assert.h"
13#include "common/common_types.h" 13#include "common/common_types.h"
14#include "video_core/engines/shader_bytecode.h" 14#include "video_core/engines/shader_bytecode.h"
15#include "video_core/engines/shader_header.h"
15#include "video_core/renderer_opengl/gl_rasterizer.h" 16#include "video_core/renderer_opengl/gl_rasterizer.h"
16#include "video_core/renderer_opengl/gl_shader_decompiler.h" 17#include "video_core/renderer_opengl/gl_shader_decompiler.h"
17 18
@@ -26,7 +27,7 @@ using Tegra::Shader::Sampler;
26using Tegra::Shader::SubOp; 27using Tegra::Shader::SubOp;
27 28
28constexpr u32 PROGRAM_END = MAX_PROGRAM_CODE_LENGTH; 29constexpr u32 PROGRAM_END = MAX_PROGRAM_CODE_LENGTH;
29constexpr u32 PROGRAM_HEADER_SIZE = 0x50; 30constexpr u32 PROGRAM_HEADER_SIZE = sizeof(Tegra::Shader::Header);
30 31
31class DecompileFail : public std::runtime_error { 32class DecompileFail : public std::runtime_error {
32public: 33public:
@@ -674,7 +675,7 @@ public:
674 u32 main_offset, Maxwell3D::Regs::ShaderStage stage, const std::string& suffix) 675 u32 main_offset, Maxwell3D::Regs::ShaderStage stage, const std::string& suffix)
675 : subroutines(subroutines), program_code(program_code), main_offset(main_offset), 676 : subroutines(subroutines), program_code(program_code), main_offset(main_offset),
676 stage(stage), suffix(suffix) { 677 stage(stage), suffix(suffix) {
677 678 std::memcpy(&header, program_code.data(), sizeof(Tegra::Shader::Header));
678 Generate(suffix); 679 Generate(suffix);
679 } 680 }
680 681
@@ -688,23 +689,6 @@ public:
688 } 689 }
689 690
690private: 691private:
691 // Shader program header for a Fragment Shader.
692 struct FragmentHeader {
693 INSERT_PADDING_WORDS(5);
694 INSERT_PADDING_WORDS(13);
695 u32 enabled_color_outputs;
696 union {
697 BitField<0, 1, u32> writes_samplemask;
698 BitField<1, 1, u32> writes_depth;
699 };
700
701 bool IsColorComponentOutputEnabled(u32 render_target, u32 component) const {
702 const u32 bit = render_target * 4 + component;
703 return enabled_color_outputs & (1 << bit);
704 }
705 };
706 static_assert(sizeof(FragmentHeader) == PROGRAM_HEADER_SIZE, "FragmentHeader size is wrong");
707
708 /// Gets the Subroutine object corresponding to the specified address. 692 /// Gets the Subroutine object corresponding to the specified address.
709 const Subroutine& GetSubroutine(u32 begin, u32 end) const { 693 const Subroutine& GetSubroutine(u32 begin, u32 end) const {
710 const auto iter = subroutines.find(Subroutine{begin, end, suffix}); 694 const auto iter = subroutines.find(Subroutine{begin, end, suffix});
@@ -1010,10 +994,8 @@ private:
1010 /// Writes the output values from a fragment shader to the corresponding GLSL output variables. 994 /// Writes the output values from a fragment shader to the corresponding GLSL output variables.
1011 void EmitFragmentOutputsWrite() { 995 void EmitFragmentOutputsWrite() {
1012 ASSERT(stage == Maxwell3D::Regs::ShaderStage::Fragment); 996 ASSERT(stage == Maxwell3D::Regs::ShaderStage::Fragment);
1013 FragmentHeader header;
1014 std::memcpy(&header, program_code.data(), PROGRAM_HEADER_SIZE);
1015 997
1016 ASSERT_MSG(header.writes_samplemask == 0, "Samplemask write is unimplemented"); 998 ASSERT_MSG(header.ps.omap.sample_mask == 0, "Samplemask write is unimplemented");
1017 999
1018 // Write the color outputs using the data in the shader registers, disabled 1000 // Write the color outputs using the data in the shader registers, disabled
1019 // rendertargets/components are skipped in the register assignment. 1001 // rendertargets/components are skipped in the register assignment.
@@ -1022,7 +1004,7 @@ private:
1022 ++render_target) { 1004 ++render_target) {
1023 // TODO(Subv): Figure out how dual-source blending is configured in the Switch. 1005 // TODO(Subv): Figure out how dual-source blending is configured in the Switch.
1024 for (u32 component = 0; component < 4; ++component) { 1006 for (u32 component = 0; component < 4; ++component) {
1025 if (header.IsColorComponentOutputEnabled(render_target, component)) { 1007 if (header.ps.IsColorComponentOutputEnabled(render_target, component)) {
1026 shader.AddLine(fmt::format("FragColor{}[{}] = {};", render_target, component, 1008 shader.AddLine(fmt::format("FragColor{}[{}] = {};", render_target, component,
1027 regs.GetRegisterAsFloat(current_reg))); 1009 regs.GetRegisterAsFloat(current_reg)));
1028 ++current_reg; 1010 ++current_reg;
@@ -1030,7 +1012,7 @@ private:
1030 } 1012 }
1031 } 1013 }
1032 1014
1033 if (header.writes_depth) { 1015 if (header.ps.omap.depth) {
1034 // The depth output is always 2 registers after the last color output, and current_reg 1016 // The depth output is always 2 registers after the last color output, and current_reg
1035 // already contains one past the last color register. 1017 // already contains one past the last color register.
1036 1018
@@ -2666,6 +2648,7 @@ private:
2666private: 2648private:
2667 const std::set<Subroutine>& subroutines; 2649 const std::set<Subroutine>& subroutines;
2668 const ProgramCode& program_code; 2650 const ProgramCode& program_code;
2651 Tegra::Shader::Header header;
2669 const u32 main_offset; 2652 const u32 main_offset;
2670 Maxwell3D::Regs::ShaderStage stage; 2653 Maxwell3D::Regs::ShaderStage stage;
2671 const std::string& suffix; 2654 const std::string& suffix;