diff options
| author | 2016-12-16 20:45:57 -0800 | |
|---|---|---|
| committer | 2016-12-16 20:45:57 -0800 | |
| commit | 2ca548bbe52bb72cdd269893571b92ad185107a5 (patch) | |
| tree | 3200dcfb8b0f3afe4010a141f33f7693b9f7c938 /src | |
| parent | Merge pull request #2303 from freiro/citra-qt_missing_sdl2_dll (diff) | |
| parent | VideoCore/Shader: Extract DebugData out from UnitState (diff) | |
| download | yuzu-2ca548bbe52bb72cdd269893571b92ad185107a5.tar.gz yuzu-2ca548bbe52bb72cdd269893571b92ad185107a5.tar.xz yuzu-2ca548bbe52bb72cdd269893571b92ad185107a5.zip | |
Merge pull request #2335 from yuriks/shader-refactor
Misc. Shader refactors
Diffstat (limited to 'src')
| -rw-r--r-- | src/video_core/CMakeLists.txt | 1 | ||||
| -rw-r--r-- | src/video_core/command_processor.cpp | 4 | ||||
| -rw-r--r-- | src/video_core/shader/debug_data.h | 186 | ||||
| -rw-r--r-- | src/video_core/shader/shader.cpp | 26 | ||||
| -rw-r--r-- | src/video_core/shader/shader.h | 199 | ||||
| -rw-r--r-- | src/video_core/shader/shader_interpreter.cpp | 235 | ||||
| -rw-r--r-- | src/video_core/shader/shader_interpreter.h | 7 | ||||
| -rw-r--r-- | src/video_core/shader/shader_jit_x64.cpp | 14 | ||||
| -rw-r--r-- | src/video_core/shader/shader_jit_x64.h | 2 |
9 files changed, 336 insertions, 338 deletions
diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt index 9aa446a8f..6ca319b59 100644 --- a/src/video_core/CMakeLists.txt +++ b/src/video_core/CMakeLists.txt | |||
| @@ -39,6 +39,7 @@ set(HEADERS | |||
| 39 | rasterizer.h | 39 | rasterizer.h |
| 40 | rasterizer_interface.h | 40 | rasterizer_interface.h |
| 41 | renderer_base.h | 41 | renderer_base.h |
| 42 | shader/debug_data.h | ||
| 42 | shader/shader.h | 43 | shader/shader.h |
| 43 | shader/shader_interpreter.h | 44 | shader/shader_interpreter.h |
| 44 | swrasterizer.h | 45 | swrasterizer.h |
diff --git a/src/video_core/command_processor.cpp b/src/video_core/command_processor.cpp index 8a5d8533c..04de3e6b1 100644 --- a/src/video_core/command_processor.cpp +++ b/src/video_core/command_processor.cpp | |||
| @@ -138,7 +138,7 @@ static void WritePicaReg(u32 id, u32 value, u32 mask) { | |||
| 138 | if (immediate_attribute_id >= regs.vs.num_input_attributes + 1) { | 138 | if (immediate_attribute_id >= regs.vs.num_input_attributes + 1) { |
| 139 | immediate_attribute_id = 0; | 139 | immediate_attribute_id = 0; |
| 140 | 140 | ||
| 141 | Shader::UnitState<false> shader_unit; | 141 | Shader::UnitState shader_unit; |
| 142 | g_state.vs.Setup(); | 142 | g_state.vs.Setup(); |
| 143 | 143 | ||
| 144 | // Send to vertex shader | 144 | // Send to vertex shader |
| @@ -237,7 +237,7 @@ static void WritePicaReg(u32 id, u32 value, u32 mask) { | |||
| 237 | unsigned int vertex_cache_pos = 0; | 237 | unsigned int vertex_cache_pos = 0; |
| 238 | vertex_cache_ids.fill(-1); | 238 | vertex_cache_ids.fill(-1); |
| 239 | 239 | ||
| 240 | Shader::UnitState<false> shader_unit; | 240 | Shader::UnitState shader_unit; |
| 241 | g_state.vs.Setup(); | 241 | g_state.vs.Setup(); |
| 242 | 242 | ||
| 243 | for (unsigned int index = 0; index < regs.num_vertices; ++index) { | 243 | for (unsigned int index = 0; index < regs.num_vertices; ++index) { |
diff --git a/src/video_core/shader/debug_data.h b/src/video_core/shader/debug_data.h new file mode 100644 index 000000000..9e82122e1 --- /dev/null +++ b/src/video_core/shader/debug_data.h | |||
| @@ -0,0 +1,186 @@ | |||
| 1 | // Copyright 2016 Citra Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include <vector> | ||
| 8 | #include "common/common_types.h" | ||
| 9 | #include "common/vector_math.h" | ||
| 10 | #include "video_core/pica_types.h" | ||
| 11 | |||
| 12 | namespace Pica { | ||
| 13 | namespace Shader { | ||
| 14 | |||
| 15 | /// Helper structure used to keep track of data useful for inspection of shader emulation | ||
| 16 | template <bool full_debugging> | ||
| 17 | struct DebugData; | ||
| 18 | |||
| 19 | template <> | ||
| 20 | struct DebugData<false> { | ||
| 21 | // TODO: Hide these behind and interface and move them to DebugData<true> | ||
| 22 | u32 max_offset = 0; ///< maximum program counter ever reached | ||
| 23 | u32 max_opdesc_id = 0; ///< maximum swizzle pattern index ever used | ||
| 24 | }; | ||
| 25 | |||
| 26 | template <> | ||
| 27 | struct DebugData<true> { | ||
| 28 | /// Records store the input and output operands of a particular instruction. | ||
| 29 | struct Record { | ||
| 30 | enum Type { | ||
| 31 | // Floating point arithmetic operands | ||
| 32 | SRC1 = 0x1, | ||
| 33 | SRC2 = 0x2, | ||
| 34 | SRC3 = 0x4, | ||
| 35 | |||
| 36 | // Initial and final output operand value | ||
| 37 | DEST_IN = 0x8, | ||
| 38 | DEST_OUT = 0x10, | ||
| 39 | |||
| 40 | // Current and next instruction offset (in words) | ||
| 41 | CUR_INSTR = 0x20, | ||
| 42 | NEXT_INSTR = 0x40, | ||
| 43 | |||
| 44 | // Output address register value | ||
| 45 | ADDR_REG_OUT = 0x80, | ||
| 46 | |||
| 47 | // Result of a comparison instruction | ||
| 48 | CMP_RESULT = 0x100, | ||
| 49 | |||
| 50 | // Input values for conditional flow control instructions | ||
| 51 | COND_BOOL_IN = 0x200, | ||
| 52 | COND_CMP_IN = 0x400, | ||
| 53 | |||
| 54 | // Input values for a loop | ||
| 55 | LOOP_INT_IN = 0x800, | ||
| 56 | }; | ||
| 57 | |||
| 58 | Math::Vec4<float24> src1; | ||
| 59 | Math::Vec4<float24> src2; | ||
| 60 | Math::Vec4<float24> src3; | ||
| 61 | |||
| 62 | Math::Vec4<float24> dest_in; | ||
| 63 | Math::Vec4<float24> dest_out; | ||
| 64 | |||
| 65 | s32 address_registers[2]; | ||
| 66 | bool conditional_code[2]; | ||
| 67 | bool cond_bool; | ||
| 68 | bool cond_cmp[2]; | ||
| 69 | Math::Vec4<u8> loop_int; | ||
| 70 | |||
| 71 | u32 instruction_offset; | ||
| 72 | u32 next_instruction; | ||
| 73 | |||
| 74 | /// set of enabled fields (as a combination of Type flags) | ||
| 75 | unsigned mask = 0; | ||
| 76 | }; | ||
| 77 | |||
| 78 | u32 max_offset = 0; ///< maximum program counter ever reached | ||
| 79 | u32 max_opdesc_id = 0; ///< maximum swizzle pattern index ever used | ||
| 80 | |||
| 81 | /// List of records for each executed shader instruction | ||
| 82 | std::vector<DebugData<true>::Record> records; | ||
| 83 | }; | ||
| 84 | |||
| 85 | /// Type alias for better readability | ||
| 86 | using DebugDataRecord = DebugData<true>::Record; | ||
| 87 | |||
| 88 | /// Helper function to set a DebugData<true>::Record field based on the template enum parameter. | ||
| 89 | template <DebugDataRecord::Type type, typename ValueType> | ||
| 90 | inline void SetField(DebugDataRecord& record, ValueType value); | ||
| 91 | |||
| 92 | template <> | ||
| 93 | inline void SetField<DebugDataRecord::SRC1>(DebugDataRecord& record, float24* value) { | ||
| 94 | record.src1.x = value[0]; | ||
| 95 | record.src1.y = value[1]; | ||
| 96 | record.src1.z = value[2]; | ||
| 97 | record.src1.w = value[3]; | ||
| 98 | } | ||
| 99 | |||
| 100 | template <> | ||
| 101 | inline void SetField<DebugDataRecord::SRC2>(DebugDataRecord& record, float24* value) { | ||
| 102 | record.src2.x = value[0]; | ||
| 103 | record.src2.y = value[1]; | ||
| 104 | record.src2.z = value[2]; | ||
| 105 | record.src2.w = value[3]; | ||
| 106 | } | ||
| 107 | |||
| 108 | template <> | ||
| 109 | inline void SetField<DebugDataRecord::SRC3>(DebugDataRecord& record, float24* value) { | ||
| 110 | record.src3.x = value[0]; | ||
| 111 | record.src3.y = value[1]; | ||
| 112 | record.src3.z = value[2]; | ||
| 113 | record.src3.w = value[3]; | ||
| 114 | } | ||
| 115 | |||
| 116 | template <> | ||
| 117 | inline void SetField<DebugDataRecord::DEST_IN>(DebugDataRecord& record, float24* value) { | ||
| 118 | record.dest_in.x = value[0]; | ||
| 119 | record.dest_in.y = value[1]; | ||
| 120 | record.dest_in.z = value[2]; | ||
| 121 | record.dest_in.w = value[3]; | ||
| 122 | } | ||
| 123 | |||
| 124 | template <> | ||
| 125 | inline void SetField<DebugDataRecord::DEST_OUT>(DebugDataRecord& record, float24* value) { | ||
| 126 | record.dest_out.x = value[0]; | ||
| 127 | record.dest_out.y = value[1]; | ||
| 128 | record.dest_out.z = value[2]; | ||
| 129 | record.dest_out.w = value[3]; | ||
| 130 | } | ||
| 131 | |||
| 132 | template <> | ||
| 133 | inline void SetField<DebugDataRecord::ADDR_REG_OUT>(DebugDataRecord& record, s32* value) { | ||
| 134 | record.address_registers[0] = value[0]; | ||
| 135 | record.address_registers[1] = value[1]; | ||
| 136 | } | ||
| 137 | |||
| 138 | template <> | ||
| 139 | inline void SetField<DebugDataRecord::CMP_RESULT>(DebugDataRecord& record, bool* value) { | ||
| 140 | record.conditional_code[0] = value[0]; | ||
| 141 | record.conditional_code[1] = value[1]; | ||
| 142 | } | ||
| 143 | |||
| 144 | template <> | ||
| 145 | inline void SetField<DebugDataRecord::COND_BOOL_IN>(DebugDataRecord& record, bool value) { | ||
| 146 | record.cond_bool = value; | ||
| 147 | } | ||
| 148 | |||
| 149 | template <> | ||
| 150 | inline void SetField<DebugDataRecord::COND_CMP_IN>(DebugDataRecord& record, bool* value) { | ||
| 151 | record.cond_cmp[0] = value[0]; | ||
| 152 | record.cond_cmp[1] = value[1]; | ||
| 153 | } | ||
| 154 | |||
| 155 | template <> | ||
| 156 | inline void SetField<DebugDataRecord::LOOP_INT_IN>(DebugDataRecord& record, Math::Vec4<u8> value) { | ||
| 157 | record.loop_int = value; | ||
| 158 | } | ||
| 159 | |||
| 160 | template <> | ||
| 161 | inline void SetField<DebugDataRecord::CUR_INSTR>(DebugDataRecord& record, u32 value) { | ||
| 162 | record.instruction_offset = value; | ||
| 163 | } | ||
| 164 | |||
| 165 | template <> | ||
| 166 | inline void SetField<DebugDataRecord::NEXT_INSTR>(DebugDataRecord& record, u32 value) { | ||
| 167 | record.next_instruction = value; | ||
| 168 | } | ||
| 169 | |||
| 170 | /// Helper function to set debug information on the current shader iteration. | ||
| 171 | template <DebugDataRecord::Type type, typename ValueType> | ||
| 172 | inline void Record(DebugData<false>& debug_data, u32 offset, ValueType value) { | ||
| 173 | // Debugging disabled => nothing to do | ||
| 174 | } | ||
| 175 | |||
| 176 | template <DebugDataRecord::Type type, typename ValueType> | ||
| 177 | inline void Record(DebugData<true>& debug_data, u32 offset, ValueType value) { | ||
| 178 | if (offset >= debug_data.records.size()) | ||
| 179 | debug_data.records.resize(offset + 1); | ||
| 180 | |||
| 181 | SetField<type, ValueType>(debug_data.records[offset], value); | ||
| 182 | debug_data.records[offset].mask |= type; | ||
| 183 | } | ||
| 184 | |||
| 185 | } // namespace Shader | ||
| 186 | } // namespace Pica | ||
diff --git a/src/video_core/shader/shader.cpp b/src/video_core/shader/shader.cpp index c7f23dab9..a4aa3c9e0 100644 --- a/src/video_core/shader/shader.cpp +++ b/src/video_core/shader/shader.cpp | |||
| @@ -109,15 +109,12 @@ void ShaderSetup::Setup() { | |||
| 109 | 109 | ||
| 110 | MICROPROFILE_DEFINE(GPU_Shader, "GPU", "Shader", MP_RGB(50, 50, 240)); | 110 | MICROPROFILE_DEFINE(GPU_Shader, "GPU", "Shader", MP_RGB(50, 50, 240)); |
| 111 | 111 | ||
| 112 | void ShaderSetup::Run(UnitState<false>& state, const InputVertex& input, int num_attributes) { | 112 | void ShaderSetup::Run(UnitState& state, const InputVertex& input, int num_attributes) { |
| 113 | auto& config = g_state.regs.vs; | 113 | auto& config = g_state.regs.vs; |
| 114 | auto& setup = g_state.vs; | 114 | auto& setup = g_state.vs; |
| 115 | 115 | ||
| 116 | MICROPROFILE_SCOPE(GPU_Shader); | 116 | MICROPROFILE_SCOPE(GPU_Shader); |
| 117 | 117 | ||
| 118 | state.debug.max_offset = 0; | ||
| 119 | state.debug.max_opdesc_id = 0; | ||
| 120 | |||
| 121 | // Setup input register table | 118 | // Setup input register table |
| 122 | const auto& attribute_register_map = config.input_register_map; | 119 | const auto& attribute_register_map = config.input_register_map; |
| 123 | 120 | ||
| @@ -128,22 +125,23 @@ void ShaderSetup::Run(UnitState<false>& state, const InputVertex& input, int num | |||
| 128 | state.conditional_code[1] = false; | 125 | state.conditional_code[1] = false; |
| 129 | 126 | ||
| 130 | #ifdef ARCHITECTURE_x86_64 | 127 | #ifdef ARCHITECTURE_x86_64 |
| 131 | if (VideoCore::g_shader_jit_enabled) | 128 | if (VideoCore::g_shader_jit_enabled) { |
| 132 | jit_shader->Run(setup, state, config.main_offset); | 129 | jit_shader->Run(setup, state, config.main_offset); |
| 133 | else | 130 | } else { |
| 134 | RunInterpreter(setup, state, config.main_offset); | 131 | DebugData<false> dummy_debug_data; |
| 132 | RunInterpreter(setup, state, dummy_debug_data, config.main_offset); | ||
| 133 | } | ||
| 135 | #else | 134 | #else |
| 136 | RunInterpreter(setup, state, config.main_offset); | 135 | DebugData<false> dummy_debug_data; |
| 136 | RunInterpreter(setup, state, dummy_debug_data, config.main_offset); | ||
| 137 | #endif // ARCHITECTURE_x86_64 | 137 | #endif // ARCHITECTURE_x86_64 |
| 138 | } | 138 | } |
| 139 | 139 | ||
| 140 | DebugData<true> ShaderSetup::ProduceDebugInfo(const InputVertex& input, int num_attributes, | 140 | DebugData<true> ShaderSetup::ProduceDebugInfo(const InputVertex& input, int num_attributes, |
| 141 | const Regs::ShaderConfig& config, | 141 | const Regs::ShaderConfig& config, |
| 142 | const ShaderSetup& setup) { | 142 | const ShaderSetup& setup) { |
| 143 | UnitState<true> state; | 143 | UnitState state; |
| 144 | 144 | DebugData<true> debug_data; | |
| 145 | state.debug.max_offset = 0; | ||
| 146 | state.debug.max_opdesc_id = 0; | ||
| 147 | 145 | ||
| 148 | // Setup input register table | 146 | // Setup input register table |
| 149 | boost::fill(state.registers.input, Math::Vec4<float24>::AssignToAll(float24::Zero())); | 147 | boost::fill(state.registers.input, Math::Vec4<float24>::AssignToAll(float24::Zero())); |
| @@ -154,8 +152,8 @@ DebugData<true> ShaderSetup::ProduceDebugInfo(const InputVertex& input, int num_ | |||
| 154 | state.conditional_code[0] = false; | 152 | state.conditional_code[0] = false; |
| 155 | state.conditional_code[1] = false; | 153 | state.conditional_code[1] = false; |
| 156 | 154 | ||
| 157 | RunInterpreter(setup, state, config.main_offset); | 155 | RunInterpreter(setup, state, debug_data, config.main_offset); |
| 158 | return state.debug; | 156 | return debug_data; |
| 159 | } | 157 | } |
| 160 | 158 | ||
| 161 | } // namespace Shader | 159 | } // namespace Shader |
diff --git a/src/video_core/shader/shader.h b/src/video_core/shader/shader.h index 0111d8c0f..2b07759b9 100644 --- a/src/video_core/shader/shader.h +++ b/src/video_core/shader/shader.h | |||
| @@ -8,8 +8,6 @@ | |||
| 8 | #include <cstddef> | 8 | #include <cstddef> |
| 9 | #include <memory> | 9 | #include <memory> |
| 10 | #include <type_traits> | 10 | #include <type_traits> |
| 11 | #include <vector> | ||
| 12 | #include <boost/container/static_vector.hpp> | ||
| 13 | #include <nihstro/shader_bytecode.h> | 11 | #include <nihstro/shader_bytecode.h> |
| 14 | #include "common/assert.h" | 12 | #include "common/assert.h" |
| 15 | #include "common/common_funcs.h" | 13 | #include "common/common_funcs.h" |
| @@ -17,6 +15,7 @@ | |||
| 17 | #include "common/vector_math.h" | 15 | #include "common/vector_math.h" |
| 18 | #include "video_core/pica.h" | 16 | #include "video_core/pica.h" |
| 19 | #include "video_core/pica_types.h" | 17 | #include "video_core/pica_types.h" |
| 18 | #include "video_core/shader/debug_data.h" | ||
| 20 | 19 | ||
| 21 | using nihstro::RegisterType; | 20 | using nihstro::RegisterType; |
| 22 | using nihstro::SourceRegister; | 21 | using nihstro::SourceRegister; |
| @@ -89,183 +88,12 @@ struct OutputRegisters { | |||
| 89 | }; | 88 | }; |
| 90 | static_assert(std::is_pod<OutputRegisters>::value, "Structure is not POD"); | 89 | static_assert(std::is_pod<OutputRegisters>::value, "Structure is not POD"); |
| 91 | 90 | ||
| 92 | // Helper structure used to keep track of data useful for inspection of shader emulation | ||
| 93 | template <bool full_debugging> | ||
| 94 | struct DebugData; | ||
| 95 | |||
| 96 | template <> | ||
| 97 | struct DebugData<false> { | ||
| 98 | // TODO: Hide these behind and interface and move them to DebugData<true> | ||
| 99 | u32 max_offset; // maximum program counter ever reached | ||
| 100 | u32 max_opdesc_id; // maximum swizzle pattern index ever used | ||
| 101 | }; | ||
| 102 | |||
| 103 | template <> | ||
| 104 | struct DebugData<true> { | ||
| 105 | // Records store the input and output operands of a particular instruction. | ||
| 106 | struct Record { | ||
| 107 | enum Type { | ||
| 108 | // Floating point arithmetic operands | ||
| 109 | SRC1 = 0x1, | ||
| 110 | SRC2 = 0x2, | ||
| 111 | SRC3 = 0x4, | ||
| 112 | |||
| 113 | // Initial and final output operand value | ||
| 114 | DEST_IN = 0x8, | ||
| 115 | DEST_OUT = 0x10, | ||
| 116 | |||
| 117 | // Current and next instruction offset (in words) | ||
| 118 | CUR_INSTR = 0x20, | ||
| 119 | NEXT_INSTR = 0x40, | ||
| 120 | |||
| 121 | // Output address register value | ||
| 122 | ADDR_REG_OUT = 0x80, | ||
| 123 | |||
| 124 | // Result of a comparison instruction | ||
| 125 | CMP_RESULT = 0x100, | ||
| 126 | |||
| 127 | // Input values for conditional flow control instructions | ||
| 128 | COND_BOOL_IN = 0x200, | ||
| 129 | COND_CMP_IN = 0x400, | ||
| 130 | |||
| 131 | // Input values for a loop | ||
| 132 | LOOP_INT_IN = 0x800, | ||
| 133 | }; | ||
| 134 | |||
| 135 | Math::Vec4<float24> src1; | ||
| 136 | Math::Vec4<float24> src2; | ||
| 137 | Math::Vec4<float24> src3; | ||
| 138 | |||
| 139 | Math::Vec4<float24> dest_in; | ||
| 140 | Math::Vec4<float24> dest_out; | ||
| 141 | |||
| 142 | s32 address_registers[2]; | ||
| 143 | bool conditional_code[2]; | ||
| 144 | bool cond_bool; | ||
| 145 | bool cond_cmp[2]; | ||
| 146 | Math::Vec4<u8> loop_int; | ||
| 147 | |||
| 148 | u32 instruction_offset; | ||
| 149 | u32 next_instruction; | ||
| 150 | |||
| 151 | // set of enabled fields (as a combination of Type flags) | ||
| 152 | unsigned mask = 0; | ||
| 153 | }; | ||
| 154 | |||
| 155 | u32 max_offset; // maximum program counter ever reached | ||
| 156 | u32 max_opdesc_id; // maximum swizzle pattern index ever used | ||
| 157 | |||
| 158 | // List of records for each executed shader instruction | ||
| 159 | std::vector<DebugData<true>::Record> records; | ||
| 160 | }; | ||
| 161 | |||
| 162 | // Type alias for better readability | ||
| 163 | using DebugDataRecord = DebugData<true>::Record; | ||
| 164 | |||
| 165 | // Helper function to set a DebugData<true>::Record field based on the template enum parameter. | ||
| 166 | template <DebugDataRecord::Type type, typename ValueType> | ||
| 167 | inline void SetField(DebugDataRecord& record, ValueType value); | ||
| 168 | |||
| 169 | template <> | ||
| 170 | inline void SetField<DebugDataRecord::SRC1>(DebugDataRecord& record, float24* value) { | ||
| 171 | record.src1.x = value[0]; | ||
| 172 | record.src1.y = value[1]; | ||
| 173 | record.src1.z = value[2]; | ||
| 174 | record.src1.w = value[3]; | ||
| 175 | } | ||
| 176 | |||
| 177 | template <> | ||
| 178 | inline void SetField<DebugDataRecord::SRC2>(DebugDataRecord& record, float24* value) { | ||
| 179 | record.src2.x = value[0]; | ||
| 180 | record.src2.y = value[1]; | ||
| 181 | record.src2.z = value[2]; | ||
| 182 | record.src2.w = value[3]; | ||
| 183 | } | ||
| 184 | |||
| 185 | template <> | ||
| 186 | inline void SetField<DebugDataRecord::SRC3>(DebugDataRecord& record, float24* value) { | ||
| 187 | record.src3.x = value[0]; | ||
| 188 | record.src3.y = value[1]; | ||
| 189 | record.src3.z = value[2]; | ||
| 190 | record.src3.w = value[3]; | ||
| 191 | } | ||
| 192 | |||
| 193 | template <> | ||
| 194 | inline void SetField<DebugDataRecord::DEST_IN>(DebugDataRecord& record, float24* value) { | ||
| 195 | record.dest_in.x = value[0]; | ||
| 196 | record.dest_in.y = value[1]; | ||
| 197 | record.dest_in.z = value[2]; | ||
| 198 | record.dest_in.w = value[3]; | ||
| 199 | } | ||
| 200 | |||
| 201 | template <> | ||
| 202 | inline void SetField<DebugDataRecord::DEST_OUT>(DebugDataRecord& record, float24* value) { | ||
| 203 | record.dest_out.x = value[0]; | ||
| 204 | record.dest_out.y = value[1]; | ||
| 205 | record.dest_out.z = value[2]; | ||
| 206 | record.dest_out.w = value[3]; | ||
| 207 | } | ||
| 208 | |||
| 209 | template <> | ||
| 210 | inline void SetField<DebugDataRecord::ADDR_REG_OUT>(DebugDataRecord& record, s32* value) { | ||
| 211 | record.address_registers[0] = value[0]; | ||
| 212 | record.address_registers[1] = value[1]; | ||
| 213 | } | ||
| 214 | |||
| 215 | template <> | ||
| 216 | inline void SetField<DebugDataRecord::CMP_RESULT>(DebugDataRecord& record, bool* value) { | ||
| 217 | record.conditional_code[0] = value[0]; | ||
| 218 | record.conditional_code[1] = value[1]; | ||
| 219 | } | ||
| 220 | |||
| 221 | template <> | ||
| 222 | inline void SetField<DebugDataRecord::COND_BOOL_IN>(DebugDataRecord& record, bool value) { | ||
| 223 | record.cond_bool = value; | ||
| 224 | } | ||
| 225 | |||
| 226 | template <> | ||
| 227 | inline void SetField<DebugDataRecord::COND_CMP_IN>(DebugDataRecord& record, bool* value) { | ||
| 228 | record.cond_cmp[0] = value[0]; | ||
| 229 | record.cond_cmp[1] = value[1]; | ||
| 230 | } | ||
| 231 | |||
| 232 | template <> | ||
| 233 | inline void SetField<DebugDataRecord::LOOP_INT_IN>(DebugDataRecord& record, Math::Vec4<u8> value) { | ||
| 234 | record.loop_int = value; | ||
| 235 | } | ||
| 236 | |||
| 237 | template <> | ||
| 238 | inline void SetField<DebugDataRecord::CUR_INSTR>(DebugDataRecord& record, u32 value) { | ||
| 239 | record.instruction_offset = value; | ||
| 240 | } | ||
| 241 | |||
| 242 | template <> | ||
| 243 | inline void SetField<DebugDataRecord::NEXT_INSTR>(DebugDataRecord& record, u32 value) { | ||
| 244 | record.next_instruction = value; | ||
| 245 | } | ||
| 246 | |||
| 247 | // Helper function to set debug information on the current shader iteration. | ||
| 248 | template <DebugDataRecord::Type type, typename ValueType> | ||
| 249 | inline void Record(DebugData<false>& debug_data, u32 offset, ValueType value) { | ||
| 250 | // Debugging disabled => nothing to do | ||
| 251 | } | ||
| 252 | |||
| 253 | template <DebugDataRecord::Type type, typename ValueType> | ||
| 254 | inline void Record(DebugData<true>& debug_data, u32 offset, ValueType value) { | ||
| 255 | if (offset >= debug_data.records.size()) | ||
| 256 | debug_data.records.resize(offset + 1); | ||
| 257 | |||
| 258 | SetField<type, ValueType>(debug_data.records[offset], value); | ||
| 259 | debug_data.records[offset].mask |= type; | ||
| 260 | } | ||
| 261 | |||
| 262 | /** | 91 | /** |
| 263 | * This structure contains the state information that needs to be unique for a shader unit. The 3DS | 92 | * This structure contains the state information that needs to be unique for a shader unit. The 3DS |
| 264 | * has four shader units that process shaders in parallel. At the present, Citra only implements a | 93 | * has four shader units that process shaders in parallel. At the present, Citra only implements a |
| 265 | * single shader unit that processes all shaders serially. Putting the state information in a struct | 94 | * single shader unit that processes all shaders serially. Putting the state information in a struct |
| 266 | * here will make it easier for us to parallelize the shader processing later. | 95 | * here will make it easier for us to parallelize the shader processing later. |
| 267 | */ | 96 | */ |
| 268 | template <bool Debug> | ||
| 269 | struct UnitState { | 97 | struct UnitState { |
| 270 | struct Registers { | 98 | struct Registers { |
| 271 | // The registers are accessed by the shader JIT using SSE instructions, and are therefore | 99 | // The registers are accessed by the shader JIT using SSE instructions, and are therefore |
| @@ -283,8 +111,6 @@ struct UnitState { | |||
| 283 | // TODO: How many bits do these actually have? | 111 | // TODO: How many bits do these actually have? |
| 284 | s32 address_registers[3]; | 112 | s32 address_registers[3]; |
| 285 | 113 | ||
| 286 | DebugData<Debug> debug; | ||
| 287 | |||
| 288 | static size_t InputOffset(const SourceRegister& reg) { | 114 | static size_t InputOffset(const SourceRegister& reg) { |
| 289 | switch (reg.GetRegisterType()) { | 115 | switch (reg.GetRegisterType()) { |
| 290 | case RegisterType::Input: | 116 | case RegisterType::Input: |
| @@ -332,21 +158,16 @@ struct ShaderSetup { | |||
| 332 | std::array<Math::Vec4<u8>, 4> i; | 158 | std::array<Math::Vec4<u8>, 4> i; |
| 333 | } uniforms; | 159 | } uniforms; |
| 334 | 160 | ||
| 335 | static size_t UniformOffset(RegisterType type, unsigned index) { | 161 | static size_t GetFloatUniformOffset(unsigned index) { |
| 336 | switch (type) { | 162 | return offsetof(ShaderSetup, uniforms.f) + index * sizeof(Math::Vec4<float24>); |
| 337 | case RegisterType::FloatUniform: | 163 | } |
| 338 | return offsetof(ShaderSetup, uniforms.f) + index * sizeof(Math::Vec4<float24>); | ||
| 339 | |||
| 340 | case RegisterType::BoolUniform: | ||
| 341 | return offsetof(ShaderSetup, uniforms.b) + index * sizeof(bool); | ||
| 342 | 164 | ||
| 343 | case RegisterType::IntUniform: | 165 | static size_t GetBoolUniformOffset(unsigned index) { |
| 344 | return offsetof(ShaderSetup, uniforms.i) + index * sizeof(Math::Vec4<u8>); | 166 | return offsetof(ShaderSetup, uniforms.b) + index * sizeof(bool); |
| 167 | } | ||
| 345 | 168 | ||
| 346 | default: | 169 | static size_t GetIntUniformOffset(unsigned index) { |
| 347 | UNREACHABLE(); | 170 | return offsetof(ShaderSetup, uniforms.i) + index * sizeof(Math::Vec4<u8>); |
| 348 | return 0; | ||
| 349 | } | ||
| 350 | } | 171 | } |
| 351 | 172 | ||
| 352 | std::array<u32, 1024> program_code; | 173 | std::array<u32, 1024> program_code; |
| @@ -364,7 +185,7 @@ struct ShaderSetup { | |||
| 364 | * @param input Input vertex into the shader | 185 | * @param input Input vertex into the shader |
| 365 | * @param num_attributes The number of vertex shader attributes | 186 | * @param num_attributes The number of vertex shader attributes |
| 366 | */ | 187 | */ |
| 367 | void Run(UnitState<false>& state, const InputVertex& input, int num_attributes); | 188 | void Run(UnitState& state, const InputVertex& input, int num_attributes); |
| 368 | 189 | ||
| 369 | /** | 190 | /** |
| 370 | * Produce debug information based on the given shader and input vertex | 191 | * Produce debug information based on the given shader and input vertex |
diff --git a/src/video_core/shader/shader_interpreter.cpp b/src/video_core/shader/shader_interpreter.cpp index 6abb6761f..70db4167e 100644 --- a/src/video_core/shader/shader_interpreter.cpp +++ b/src/video_core/shader/shader_interpreter.cpp | |||
| @@ -6,6 +6,7 @@ | |||
| 6 | #include <array> | 6 | #include <array> |
| 7 | #include <cmath> | 7 | #include <cmath> |
| 8 | #include <numeric> | 8 | #include <numeric> |
| 9 | #include <boost/container/static_vector.hpp> | ||
| 9 | #include <nihstro/shader_bytecode.h> | 10 | #include <nihstro/shader_bytecode.h> |
| 10 | #include "common/assert.h" | 11 | #include "common/assert.h" |
| 11 | #include "common/common_types.h" | 12 | #include "common/common_types.h" |
| @@ -38,12 +39,42 @@ struct CallStackElement { | |||
| 38 | }; | 39 | }; |
| 39 | 40 | ||
| 40 | template <bool Debug> | 41 | template <bool Debug> |
| 41 | void RunInterpreter(const ShaderSetup& setup, UnitState<Debug>& state, unsigned offset) { | 42 | void RunInterpreter(const ShaderSetup& setup, UnitState& state, DebugData<Debug>& debug_data, |
| 43 | unsigned offset) { | ||
| 42 | // TODO: Is there a maximal size for this? | 44 | // TODO: Is there a maximal size for this? |
| 43 | boost::container::static_vector<CallStackElement, 16> call_stack; | 45 | boost::container::static_vector<CallStackElement, 16> call_stack; |
| 44 | |||
| 45 | u32 program_counter = offset; | 46 | u32 program_counter = offset; |
| 46 | 47 | ||
| 48 | auto call = [&program_counter, &call_stack](u32 offset, u32 num_instructions, u32 return_offset, | ||
| 49 | u8 repeat_count, u8 loop_increment) { | ||
| 50 | // -1 to make sure when incrementing the PC we end up at the correct offset | ||
| 51 | program_counter = offset - 1; | ||
| 52 | ASSERT(call_stack.size() < call_stack.capacity()); | ||
| 53 | call_stack.push_back( | ||
| 54 | {offset + num_instructions, return_offset, repeat_count, loop_increment, offset}); | ||
| 55 | }; | ||
| 56 | |||
| 57 | auto evaluate_condition = [&state](Instruction::FlowControlType flow_control) { | ||
| 58 | using Op = Instruction::FlowControlType::Op; | ||
| 59 | |||
| 60 | bool result_x = flow_control.refx.Value() == state.conditional_code[0]; | ||
| 61 | bool result_y = flow_control.refy.Value() == state.conditional_code[1]; | ||
| 62 | |||
| 63 | switch (flow_control.op) { | ||
| 64 | case Op::Or: | ||
| 65 | return result_x || result_y; | ||
| 66 | case Op::And: | ||
| 67 | return result_x && result_y; | ||
| 68 | case Op::JustX: | ||
| 69 | return result_x; | ||
| 70 | case Op::JustY: | ||
| 71 | return result_y; | ||
| 72 | default: | ||
| 73 | UNREACHABLE(); | ||
| 74 | return false; | ||
| 75 | } | ||
| 76 | }; | ||
| 77 | |||
| 47 | const auto& uniforms = g_state.vs.uniforms; | 78 | const auto& uniforms = g_state.vs.uniforms; |
| 48 | const auto& swizzle_data = g_state.vs.swizzle_data; | 79 | const auto& swizzle_data = g_state.vs.swizzle_data; |
| 49 | const auto& program_code = g_state.vs.program_code; | 80 | const auto& program_code = g_state.vs.program_code; |
| @@ -74,20 +105,11 @@ void RunInterpreter(const ShaderSetup& setup, UnitState<Debug>& state, unsigned | |||
| 74 | const Instruction instr = {program_code[program_counter]}; | 105 | const Instruction instr = {program_code[program_counter]}; |
| 75 | const SwizzlePattern swizzle = {swizzle_data[instr.common.operand_desc_id]}; | 106 | const SwizzlePattern swizzle = {swizzle_data[instr.common.operand_desc_id]}; |
| 76 | 107 | ||
| 77 | auto call = [&program_counter, &call_stack](UnitState<Debug>& state, u32 offset, | 108 | Record<DebugDataRecord::CUR_INSTR>(debug_data, iteration, program_counter); |
| 78 | u32 num_instructions, u32 return_offset, | ||
| 79 | u8 repeat_count, u8 loop_increment) { | ||
| 80 | // -1 to make sure when incrementing the PC we end up at the correct offset | ||
| 81 | program_counter = offset - 1; | ||
| 82 | ASSERT(call_stack.size() < call_stack.capacity()); | ||
| 83 | call_stack.push_back( | ||
| 84 | {offset + num_instructions, return_offset, repeat_count, loop_increment, offset}); | ||
| 85 | }; | ||
| 86 | Record<DebugDataRecord::CUR_INSTR>(state.debug, iteration, program_counter); | ||
| 87 | if (iteration > 0) | 109 | if (iteration > 0) |
| 88 | Record<DebugDataRecord::NEXT_INSTR>(state.debug, iteration - 1, program_counter); | 110 | Record<DebugDataRecord::NEXT_INSTR>(debug_data, iteration - 1, program_counter); |
| 89 | 111 | ||
| 90 | state.debug.max_offset = std::max<u32>(state.debug.max_offset, 1 + program_counter); | 112 | debug_data.max_offset = std::max<u32>(debug_data.max_offset, 1 + program_counter); |
| 91 | 113 | ||
| 92 | auto LookupSourceRegister = [&](const SourceRegister& source_reg) -> const float24* { | 114 | auto LookupSourceRegister = [&](const SourceRegister& source_reg) -> const float24* { |
| 93 | switch (source_reg.GetRegisterType()) { | 115 | switch (source_reg.GetRegisterType()) { |
| @@ -155,54 +177,54 @@ void RunInterpreter(const ShaderSetup& setup, UnitState<Debug>& state, unsigned | |||
| 155 | ? &state.registers.temporary[instr.common.dest.Value().GetIndex()][0] | 177 | ? &state.registers.temporary[instr.common.dest.Value().GetIndex()][0] |
| 156 | : dummy_vec4_float24; | 178 | : dummy_vec4_float24; |
| 157 | 179 | ||
| 158 | state.debug.max_opdesc_id = | 180 | debug_data.max_opdesc_id = |
| 159 | std::max<u32>(state.debug.max_opdesc_id, 1 + instr.common.operand_desc_id); | 181 | std::max<u32>(debug_data.max_opdesc_id, 1 + instr.common.operand_desc_id); |
| 160 | 182 | ||
| 161 | switch (instr.opcode.Value().EffectiveOpCode()) { | 183 | switch (instr.opcode.Value().EffectiveOpCode()) { |
| 162 | case OpCode::Id::ADD: { | 184 | case OpCode::Id::ADD: { |
| 163 | Record<DebugDataRecord::SRC1>(state.debug, iteration, src1); | 185 | Record<DebugDataRecord::SRC1>(debug_data, iteration, src1); |
| 164 | Record<DebugDataRecord::SRC2>(state.debug, iteration, src2); | 186 | Record<DebugDataRecord::SRC2>(debug_data, iteration, src2); |
| 165 | Record<DebugDataRecord::DEST_IN>(state.debug, iteration, dest); | 187 | Record<DebugDataRecord::DEST_IN>(debug_data, iteration, dest); |
| 166 | for (int i = 0; i < 4; ++i) { | 188 | for (int i = 0; i < 4; ++i) { |
| 167 | if (!swizzle.DestComponentEnabled(i)) | 189 | if (!swizzle.DestComponentEnabled(i)) |
| 168 | continue; | 190 | continue; |
| 169 | 191 | ||
| 170 | dest[i] = src1[i] + src2[i]; | 192 | dest[i] = src1[i] + src2[i]; |
| 171 | } | 193 | } |
| 172 | Record<DebugDataRecord::DEST_OUT>(state.debug, iteration, dest); | 194 | Record<DebugDataRecord::DEST_OUT>(debug_data, iteration, dest); |
| 173 | break; | 195 | break; |
| 174 | } | 196 | } |
| 175 | 197 | ||
| 176 | case OpCode::Id::MUL: { | 198 | case OpCode::Id::MUL: { |
| 177 | Record<DebugDataRecord::SRC1>(state.debug, iteration, src1); | 199 | Record<DebugDataRecord::SRC1>(debug_data, iteration, src1); |
| 178 | Record<DebugDataRecord::SRC2>(state.debug, iteration, src2); | 200 | Record<DebugDataRecord::SRC2>(debug_data, iteration, src2); |
| 179 | Record<DebugDataRecord::DEST_IN>(state.debug, iteration, dest); | 201 | Record<DebugDataRecord::DEST_IN>(debug_data, iteration, dest); |
| 180 | for (int i = 0; i < 4; ++i) { | 202 | for (int i = 0; i < 4; ++i) { |
| 181 | if (!swizzle.DestComponentEnabled(i)) | 203 | if (!swizzle.DestComponentEnabled(i)) |
| 182 | continue; | 204 | continue; |
| 183 | 205 | ||
| 184 | dest[i] = src1[i] * src2[i]; | 206 | dest[i] = src1[i] * src2[i]; |
| 185 | } | 207 | } |
| 186 | Record<DebugDataRecord::DEST_OUT>(state.debug, iteration, dest); | 208 | Record<DebugDataRecord::DEST_OUT>(debug_data, iteration, dest); |
| 187 | break; | 209 | break; |
| 188 | } | 210 | } |
| 189 | 211 | ||
| 190 | case OpCode::Id::FLR: | 212 | case OpCode::Id::FLR: |
| 191 | Record<DebugDataRecord::SRC1>(state.debug, iteration, src1); | 213 | Record<DebugDataRecord::SRC1>(debug_data, iteration, src1); |
| 192 | Record<DebugDataRecord::DEST_IN>(state.debug, iteration, dest); | 214 | Record<DebugDataRecord::DEST_IN>(debug_data, iteration, dest); |
| 193 | for (int i = 0; i < 4; ++i) { | 215 | for (int i = 0; i < 4; ++i) { |
| 194 | if (!swizzle.DestComponentEnabled(i)) | 216 | if (!swizzle.DestComponentEnabled(i)) |
| 195 | continue; | 217 | continue; |
| 196 | 218 | ||
| 197 | dest[i] = float24::FromFloat32(std::floor(src1[i].ToFloat32())); | 219 | dest[i] = float24::FromFloat32(std::floor(src1[i].ToFloat32())); |
| 198 | } | 220 | } |
| 199 | Record<DebugDataRecord::DEST_OUT>(state.debug, iteration, dest); | 221 | Record<DebugDataRecord::DEST_OUT>(debug_data, iteration, dest); |
| 200 | break; | 222 | break; |
| 201 | 223 | ||
| 202 | case OpCode::Id::MAX: | 224 | case OpCode::Id::MAX: |
| 203 | Record<DebugDataRecord::SRC1>(state.debug, iteration, src1); | 225 | Record<DebugDataRecord::SRC1>(debug_data, iteration, src1); |
| 204 | Record<DebugDataRecord::SRC2>(state.debug, iteration, src2); | 226 | Record<DebugDataRecord::SRC2>(debug_data, iteration, src2); |
| 205 | Record<DebugDataRecord::DEST_IN>(state.debug, iteration, dest); | 227 | Record<DebugDataRecord::DEST_IN>(debug_data, iteration, dest); |
| 206 | for (int i = 0; i < 4; ++i) { | 228 | for (int i = 0; i < 4; ++i) { |
| 207 | if (!swizzle.DestComponentEnabled(i)) | 229 | if (!swizzle.DestComponentEnabled(i)) |
| 208 | continue; | 230 | continue; |
| @@ -212,13 +234,13 @@ void RunInterpreter(const ShaderSetup& setup, UnitState<Debug>& state, unsigned | |||
| 212 | // max(NaN, 0) -> 0 | 234 | // max(NaN, 0) -> 0 |
| 213 | dest[i] = (src1[i] > src2[i]) ? src1[i] : src2[i]; | 235 | dest[i] = (src1[i] > src2[i]) ? src1[i] : src2[i]; |
| 214 | } | 236 | } |
| 215 | Record<DebugDataRecord::DEST_OUT>(state.debug, iteration, dest); | 237 | Record<DebugDataRecord::DEST_OUT>(debug_data, iteration, dest); |
| 216 | break; | 238 | break; |
| 217 | 239 | ||
| 218 | case OpCode::Id::MIN: | 240 | case OpCode::Id::MIN: |
| 219 | Record<DebugDataRecord::SRC1>(state.debug, iteration, src1); | 241 | Record<DebugDataRecord::SRC1>(debug_data, iteration, src1); |
| 220 | Record<DebugDataRecord::SRC2>(state.debug, iteration, src2); | 242 | Record<DebugDataRecord::SRC2>(debug_data, iteration, src2); |
| 221 | Record<DebugDataRecord::DEST_IN>(state.debug, iteration, dest); | 243 | Record<DebugDataRecord::DEST_IN>(debug_data, iteration, dest); |
| 222 | for (int i = 0; i < 4; ++i) { | 244 | for (int i = 0; i < 4; ++i) { |
| 223 | if (!swizzle.DestComponentEnabled(i)) | 245 | if (!swizzle.DestComponentEnabled(i)) |
| 224 | continue; | 246 | continue; |
| @@ -228,16 +250,16 @@ void RunInterpreter(const ShaderSetup& setup, UnitState<Debug>& state, unsigned | |||
| 228 | // min(NaN, 0) -> 0 | 250 | // min(NaN, 0) -> 0 |
| 229 | dest[i] = (src1[i] < src2[i]) ? src1[i] : src2[i]; | 251 | dest[i] = (src1[i] < src2[i]) ? src1[i] : src2[i]; |
| 230 | } | 252 | } |
| 231 | Record<DebugDataRecord::DEST_OUT>(state.debug, iteration, dest); | 253 | Record<DebugDataRecord::DEST_OUT>(debug_data, iteration, dest); |
| 232 | break; | 254 | break; |
| 233 | 255 | ||
| 234 | case OpCode::Id::DP3: | 256 | case OpCode::Id::DP3: |
| 235 | case OpCode::Id::DP4: | 257 | case OpCode::Id::DP4: |
| 236 | case OpCode::Id::DPH: | 258 | case OpCode::Id::DPH: |
| 237 | case OpCode::Id::DPHI: { | 259 | case OpCode::Id::DPHI: { |
| 238 | Record<DebugDataRecord::SRC1>(state.debug, iteration, src1); | 260 | Record<DebugDataRecord::SRC1>(debug_data, iteration, src1); |
| 239 | Record<DebugDataRecord::SRC2>(state.debug, iteration, src2); | 261 | Record<DebugDataRecord::SRC2>(debug_data, iteration, src2); |
| 240 | Record<DebugDataRecord::DEST_IN>(state.debug, iteration, dest); | 262 | Record<DebugDataRecord::DEST_IN>(debug_data, iteration, dest); |
| 241 | 263 | ||
| 242 | OpCode::Id opcode = instr.opcode.Value().EffectiveOpCode(); | 264 | OpCode::Id opcode = instr.opcode.Value().EffectiveOpCode(); |
| 243 | if (opcode == OpCode::Id::DPH || opcode == OpCode::Id::DPHI) | 265 | if (opcode == OpCode::Id::DPH || opcode == OpCode::Id::DPHI) |
| @@ -253,14 +275,14 @@ void RunInterpreter(const ShaderSetup& setup, UnitState<Debug>& state, unsigned | |||
| 253 | 275 | ||
| 254 | dest[i] = dot; | 276 | dest[i] = dot; |
| 255 | } | 277 | } |
| 256 | Record<DebugDataRecord::DEST_OUT>(state.debug, iteration, dest); | 278 | Record<DebugDataRecord::DEST_OUT>(debug_data, iteration, dest); |
| 257 | break; | 279 | break; |
| 258 | } | 280 | } |
| 259 | 281 | ||
| 260 | // Reciprocal | 282 | // Reciprocal |
| 261 | case OpCode::Id::RCP: { | 283 | case OpCode::Id::RCP: { |
| 262 | Record<DebugDataRecord::SRC1>(state.debug, iteration, src1); | 284 | Record<DebugDataRecord::SRC1>(debug_data, iteration, src1); |
| 263 | Record<DebugDataRecord::DEST_IN>(state.debug, iteration, dest); | 285 | Record<DebugDataRecord::DEST_IN>(debug_data, iteration, dest); |
| 264 | float24 rcp_res = float24::FromFloat32(1.0f / src1[0].ToFloat32()); | 286 | float24 rcp_res = float24::FromFloat32(1.0f / src1[0].ToFloat32()); |
| 265 | for (int i = 0; i < 4; ++i) { | 287 | for (int i = 0; i < 4; ++i) { |
| 266 | if (!swizzle.DestComponentEnabled(i)) | 288 | if (!swizzle.DestComponentEnabled(i)) |
| @@ -268,14 +290,14 @@ void RunInterpreter(const ShaderSetup& setup, UnitState<Debug>& state, unsigned | |||
| 268 | 290 | ||
| 269 | dest[i] = rcp_res; | 291 | dest[i] = rcp_res; |
| 270 | } | 292 | } |
| 271 | Record<DebugDataRecord::DEST_OUT>(state.debug, iteration, dest); | 293 | Record<DebugDataRecord::DEST_OUT>(debug_data, iteration, dest); |
| 272 | break; | 294 | break; |
| 273 | } | 295 | } |
| 274 | 296 | ||
| 275 | // Reciprocal Square Root | 297 | // Reciprocal Square Root |
| 276 | case OpCode::Id::RSQ: { | 298 | case OpCode::Id::RSQ: { |
| 277 | Record<DebugDataRecord::SRC1>(state.debug, iteration, src1); | 299 | Record<DebugDataRecord::SRC1>(debug_data, iteration, src1); |
| 278 | Record<DebugDataRecord::DEST_IN>(state.debug, iteration, dest); | 300 | Record<DebugDataRecord::DEST_IN>(debug_data, iteration, dest); |
| 279 | float24 rsq_res = float24::FromFloat32(1.0f / std::sqrt(src1[0].ToFloat32())); | 301 | float24 rsq_res = float24::FromFloat32(1.0f / std::sqrt(src1[0].ToFloat32())); |
| 280 | for (int i = 0; i < 4; ++i) { | 302 | for (int i = 0; i < 4; ++i) { |
| 281 | if (!swizzle.DestComponentEnabled(i)) | 303 | if (!swizzle.DestComponentEnabled(i)) |
| @@ -283,12 +305,12 @@ void RunInterpreter(const ShaderSetup& setup, UnitState<Debug>& state, unsigned | |||
| 283 | 305 | ||
| 284 | dest[i] = rsq_res; | 306 | dest[i] = rsq_res; |
| 285 | } | 307 | } |
| 286 | Record<DebugDataRecord::DEST_OUT>(state.debug, iteration, dest); | 308 | Record<DebugDataRecord::DEST_OUT>(debug_data, iteration, dest); |
| 287 | break; | 309 | break; |
| 288 | } | 310 | } |
| 289 | 311 | ||
| 290 | case OpCode::Id::MOVA: { | 312 | case OpCode::Id::MOVA: { |
| 291 | Record<DebugDataRecord::SRC1>(state.debug, iteration, src1); | 313 | Record<DebugDataRecord::SRC1>(debug_data, iteration, src1); |
| 292 | for (int i = 0; i < 2; ++i) { | 314 | for (int i = 0; i < 2; ++i) { |
| 293 | if (!swizzle.DestComponentEnabled(i)) | 315 | if (!swizzle.DestComponentEnabled(i)) |
| 294 | continue; | 316 | continue; |
| @@ -296,29 +318,29 @@ void RunInterpreter(const ShaderSetup& setup, UnitState<Debug>& state, unsigned | |||
| 296 | // TODO: Figure out how the rounding is done on hardware | 318 | // TODO: Figure out how the rounding is done on hardware |
| 297 | state.address_registers[i] = static_cast<s32>(src1[i].ToFloat32()); | 319 | state.address_registers[i] = static_cast<s32>(src1[i].ToFloat32()); |
| 298 | } | 320 | } |
| 299 | Record<DebugDataRecord::ADDR_REG_OUT>(state.debug, iteration, | 321 | Record<DebugDataRecord::ADDR_REG_OUT>(debug_data, iteration, |
| 300 | state.address_registers); | 322 | state.address_registers); |
| 301 | break; | 323 | break; |
| 302 | } | 324 | } |
| 303 | 325 | ||
| 304 | case OpCode::Id::MOV: { | 326 | case OpCode::Id::MOV: { |
| 305 | Record<DebugDataRecord::SRC1>(state.debug, iteration, src1); | 327 | Record<DebugDataRecord::SRC1>(debug_data, iteration, src1); |
| 306 | Record<DebugDataRecord::DEST_IN>(state.debug, iteration, dest); | 328 | Record<DebugDataRecord::DEST_IN>(debug_data, iteration, dest); |
| 307 | for (int i = 0; i < 4; ++i) { | 329 | for (int i = 0; i < 4; ++i) { |
| 308 | if (!swizzle.DestComponentEnabled(i)) | 330 | if (!swizzle.DestComponentEnabled(i)) |
| 309 | continue; | 331 | continue; |
| 310 | 332 | ||
| 311 | dest[i] = src1[i]; | 333 | dest[i] = src1[i]; |
| 312 | } | 334 | } |
| 313 | Record<DebugDataRecord::DEST_OUT>(state.debug, iteration, dest); | 335 | Record<DebugDataRecord::DEST_OUT>(debug_data, iteration, dest); |
| 314 | break; | 336 | break; |
| 315 | } | 337 | } |
| 316 | 338 | ||
| 317 | case OpCode::Id::SGE: | 339 | case OpCode::Id::SGE: |
| 318 | case OpCode::Id::SGEI: | 340 | case OpCode::Id::SGEI: |
| 319 | Record<DebugDataRecord::SRC1>(state.debug, iteration, src1); | 341 | Record<DebugDataRecord::SRC1>(debug_data, iteration, src1); |
| 320 | Record<DebugDataRecord::SRC2>(state.debug, iteration, src2); | 342 | Record<DebugDataRecord::SRC2>(debug_data, iteration, src2); |
| 321 | Record<DebugDataRecord::DEST_IN>(state.debug, iteration, dest); | 343 | Record<DebugDataRecord::DEST_IN>(debug_data, iteration, dest); |
| 322 | for (int i = 0; i < 4; ++i) { | 344 | for (int i = 0; i < 4; ++i) { |
| 323 | if (!swizzle.DestComponentEnabled(i)) | 345 | if (!swizzle.DestComponentEnabled(i)) |
| 324 | continue; | 346 | continue; |
| @@ -326,14 +348,14 @@ void RunInterpreter(const ShaderSetup& setup, UnitState<Debug>& state, unsigned | |||
| 326 | dest[i] = (src1[i] >= src2[i]) ? float24::FromFloat32(1.0f) | 348 | dest[i] = (src1[i] >= src2[i]) ? float24::FromFloat32(1.0f) |
| 327 | : float24::FromFloat32(0.0f); | 349 | : float24::FromFloat32(0.0f); |
| 328 | } | 350 | } |
| 329 | Record<DebugDataRecord::DEST_OUT>(state.debug, iteration, dest); | 351 | Record<DebugDataRecord::DEST_OUT>(debug_data, iteration, dest); |
| 330 | break; | 352 | break; |
| 331 | 353 | ||
| 332 | case OpCode::Id::SLT: | 354 | case OpCode::Id::SLT: |
| 333 | case OpCode::Id::SLTI: | 355 | case OpCode::Id::SLTI: |
| 334 | Record<DebugDataRecord::SRC1>(state.debug, iteration, src1); | 356 | Record<DebugDataRecord::SRC1>(debug_data, iteration, src1); |
| 335 | Record<DebugDataRecord::SRC2>(state.debug, iteration, src2); | 357 | Record<DebugDataRecord::SRC2>(debug_data, iteration, src2); |
| 336 | Record<DebugDataRecord::DEST_IN>(state.debug, iteration, dest); | 358 | Record<DebugDataRecord::DEST_IN>(debug_data, iteration, dest); |
| 337 | for (int i = 0; i < 4; ++i) { | 359 | for (int i = 0; i < 4; ++i) { |
| 338 | if (!swizzle.DestComponentEnabled(i)) | 360 | if (!swizzle.DestComponentEnabled(i)) |
| 339 | continue; | 361 | continue; |
| @@ -341,12 +363,12 @@ void RunInterpreter(const ShaderSetup& setup, UnitState<Debug>& state, unsigned | |||
| 341 | dest[i] = (src1[i] < src2[i]) ? float24::FromFloat32(1.0f) | 363 | dest[i] = (src1[i] < src2[i]) ? float24::FromFloat32(1.0f) |
| 342 | : float24::FromFloat32(0.0f); | 364 | : float24::FromFloat32(0.0f); |
| 343 | } | 365 | } |
| 344 | Record<DebugDataRecord::DEST_OUT>(state.debug, iteration, dest); | 366 | Record<DebugDataRecord::DEST_OUT>(debug_data, iteration, dest); |
| 345 | break; | 367 | break; |
| 346 | 368 | ||
| 347 | case OpCode::Id::CMP: | 369 | case OpCode::Id::CMP: |
| 348 | Record<DebugDataRecord::SRC1>(state.debug, iteration, src1); | 370 | Record<DebugDataRecord::SRC1>(debug_data, iteration, src1); |
| 349 | Record<DebugDataRecord::SRC2>(state.debug, iteration, src2); | 371 | Record<DebugDataRecord::SRC2>(debug_data, iteration, src2); |
| 350 | for (int i = 0; i < 2; ++i) { | 372 | for (int i = 0; i < 2; ++i) { |
| 351 | // TODO: Can you restrict to one compare via dest masking? | 373 | // TODO: Can you restrict to one compare via dest masking? |
| 352 | 374 | ||
| @@ -383,12 +405,12 @@ void RunInterpreter(const ShaderSetup& setup, UnitState<Debug>& state, unsigned | |||
| 383 | break; | 405 | break; |
| 384 | } | 406 | } |
| 385 | } | 407 | } |
| 386 | Record<DebugDataRecord::CMP_RESULT>(state.debug, iteration, state.conditional_code); | 408 | Record<DebugDataRecord::CMP_RESULT>(debug_data, iteration, state.conditional_code); |
| 387 | break; | 409 | break; |
| 388 | 410 | ||
| 389 | case OpCode::Id::EX2: { | 411 | case OpCode::Id::EX2: { |
| 390 | Record<DebugDataRecord::SRC1>(state.debug, iteration, src1); | 412 | Record<DebugDataRecord::SRC1>(debug_data, iteration, src1); |
| 391 | Record<DebugDataRecord::DEST_IN>(state.debug, iteration, dest); | 413 | Record<DebugDataRecord::DEST_IN>(debug_data, iteration, dest); |
| 392 | 414 | ||
| 393 | // EX2 only takes first component exp2 and writes it to all dest components | 415 | // EX2 only takes first component exp2 and writes it to all dest components |
| 394 | float24 ex2_res = float24::FromFloat32(std::exp2(src1[0].ToFloat32())); | 416 | float24 ex2_res = float24::FromFloat32(std::exp2(src1[0].ToFloat32())); |
| @@ -399,13 +421,13 @@ void RunInterpreter(const ShaderSetup& setup, UnitState<Debug>& state, unsigned | |||
| 399 | dest[i] = ex2_res; | 421 | dest[i] = ex2_res; |
| 400 | } | 422 | } |
| 401 | 423 | ||
| 402 | Record<DebugDataRecord::DEST_OUT>(state.debug, iteration, dest); | 424 | Record<DebugDataRecord::DEST_OUT>(debug_data, iteration, dest); |
| 403 | break; | 425 | break; |
| 404 | } | 426 | } |
| 405 | 427 | ||
| 406 | case OpCode::Id::LG2: { | 428 | case OpCode::Id::LG2: { |
| 407 | Record<DebugDataRecord::SRC1>(state.debug, iteration, src1); | 429 | Record<DebugDataRecord::SRC1>(debug_data, iteration, src1); |
| 408 | Record<DebugDataRecord::DEST_IN>(state.debug, iteration, dest); | 430 | Record<DebugDataRecord::DEST_IN>(debug_data, iteration, dest); |
| 409 | 431 | ||
| 410 | // LG2 only takes the first component log2 and writes it to all dest components | 432 | // LG2 only takes the first component log2 and writes it to all dest components |
| 411 | float24 lg2_res = float24::FromFloat32(std::log2(src1[0].ToFloat32())); | 433 | float24 lg2_res = float24::FromFloat32(std::log2(src1[0].ToFloat32())); |
| @@ -416,7 +438,7 @@ void RunInterpreter(const ShaderSetup& setup, UnitState<Debug>& state, unsigned | |||
| 416 | dest[i] = lg2_res; | 438 | dest[i] = lg2_res; |
| 417 | } | 439 | } |
| 418 | 440 | ||
| 419 | Record<DebugDataRecord::DEST_OUT>(state.debug, iteration, dest); | 441 | Record<DebugDataRecord::DEST_OUT>(debug_data, iteration, dest); |
| 420 | break; | 442 | break; |
| 421 | } | 443 | } |
| 422 | 444 | ||
| @@ -498,17 +520,17 @@ void RunInterpreter(const ShaderSetup& setup, UnitState<Debug>& state, unsigned | |||
| 498 | ? &state.registers.temporary[instr.mad.dest.Value().GetIndex()][0] | 520 | ? &state.registers.temporary[instr.mad.dest.Value().GetIndex()][0] |
| 499 | : dummy_vec4_float24; | 521 | : dummy_vec4_float24; |
| 500 | 522 | ||
| 501 | Record<DebugDataRecord::SRC1>(state.debug, iteration, src1); | 523 | Record<DebugDataRecord::SRC1>(debug_data, iteration, src1); |
| 502 | Record<DebugDataRecord::SRC2>(state.debug, iteration, src2); | 524 | Record<DebugDataRecord::SRC2>(debug_data, iteration, src2); |
| 503 | Record<DebugDataRecord::SRC3>(state.debug, iteration, src3); | 525 | Record<DebugDataRecord::SRC3>(debug_data, iteration, src3); |
| 504 | Record<DebugDataRecord::DEST_IN>(state.debug, iteration, dest); | 526 | Record<DebugDataRecord::DEST_IN>(debug_data, iteration, dest); |
| 505 | for (int i = 0; i < 4; ++i) { | 527 | for (int i = 0; i < 4; ++i) { |
| 506 | if (!swizzle.DestComponentEnabled(i)) | 528 | if (!swizzle.DestComponentEnabled(i)) |
| 507 | continue; | 529 | continue; |
| 508 | 530 | ||
| 509 | dest[i] = src1[i] * src2[i] + src3[i]; | 531 | dest[i] = src1[i] * src2[i] + src3[i]; |
| 510 | } | 532 | } |
| 511 | Record<DebugDataRecord::DEST_OUT>(state.debug, iteration, dest); | 533 | Record<DebugDataRecord::DEST_OUT>(debug_data, iteration, dest); |
| 512 | } else { | 534 | } else { |
| 513 | LOG_ERROR(HW_GPU, "Unhandled multiply-add instruction: 0x%02x (%s): 0x%08x", | 535 | LOG_ERROR(HW_GPU, "Unhandled multiply-add instruction: 0x%02x (%s): 0x%08x", |
| 514 | (int)instr.opcode.Value().EffectiveOpCode(), | 536 | (int)instr.opcode.Value().EffectiveOpCode(), |
| @@ -518,26 +540,6 @@ void RunInterpreter(const ShaderSetup& setup, UnitState<Debug>& state, unsigned | |||
| 518 | } | 540 | } |
| 519 | 541 | ||
| 520 | default: { | 542 | default: { |
| 521 | static auto evaluate_condition = [](const UnitState<Debug>& state, bool refx, bool refy, | ||
| 522 | Instruction::FlowControlType flow_control) { | ||
| 523 | bool results[2] = {refx == state.conditional_code[0], | ||
| 524 | refy == state.conditional_code[1]}; | ||
| 525 | |||
| 526 | switch (flow_control.op) { | ||
| 527 | case flow_control.Or: | ||
| 528 | return results[0] || results[1]; | ||
| 529 | |||
| 530 | case flow_control.And: | ||
| 531 | return results[0] && results[1]; | ||
| 532 | |||
| 533 | case flow_control.JustX: | ||
| 534 | return results[0]; | ||
| 535 | |||
| 536 | case flow_control.JustY: | ||
| 537 | return results[1]; | ||
| 538 | } | ||
| 539 | }; | ||
| 540 | |||
| 541 | // Handle each instruction on its own | 543 | // Handle each instruction on its own |
| 542 | switch (instr.opcode.Value()) { | 544 | switch (instr.opcode.Value()) { |
| 543 | case OpCode::Id::END: | 545 | case OpCode::Id::END: |
| @@ -545,17 +547,15 @@ void RunInterpreter(const ShaderSetup& setup, UnitState<Debug>& state, unsigned | |||
| 545 | break; | 547 | break; |
| 546 | 548 | ||
| 547 | case OpCode::Id::JMPC: | 549 | case OpCode::Id::JMPC: |
| 548 | Record<DebugDataRecord::COND_CMP_IN>(state.debug, iteration, | 550 | Record<DebugDataRecord::COND_CMP_IN>(debug_data, iteration, state.conditional_code); |
| 549 | state.conditional_code); | 551 | if (evaluate_condition(instr.flow_control)) { |
| 550 | if (evaluate_condition(state, instr.flow_control.refx, instr.flow_control.refy, | ||
| 551 | instr.flow_control)) { | ||
| 552 | program_counter = instr.flow_control.dest_offset - 1; | 552 | program_counter = instr.flow_control.dest_offset - 1; |
| 553 | } | 553 | } |
| 554 | break; | 554 | break; |
| 555 | 555 | ||
| 556 | case OpCode::Id::JMPU: | 556 | case OpCode::Id::JMPU: |
| 557 | Record<DebugDataRecord::COND_BOOL_IN>( | 557 | Record<DebugDataRecord::COND_BOOL_IN>( |
| 558 | state.debug, iteration, uniforms.b[instr.flow_control.bool_uniform_id]); | 558 | debug_data, iteration, uniforms.b[instr.flow_control.bool_uniform_id]); |
| 559 | 559 | ||
| 560 | if (uniforms.b[instr.flow_control.bool_uniform_id] == | 560 | if (uniforms.b[instr.flow_control.bool_uniform_id] == |
| 561 | !(instr.flow_control.num_instructions & 1)) { | 561 | !(instr.flow_control.num_instructions & 1)) { |
| @@ -564,25 +564,23 @@ void RunInterpreter(const ShaderSetup& setup, UnitState<Debug>& state, unsigned | |||
| 564 | break; | 564 | break; |
| 565 | 565 | ||
| 566 | case OpCode::Id::CALL: | 566 | case OpCode::Id::CALL: |
| 567 | call(state, instr.flow_control.dest_offset, instr.flow_control.num_instructions, | 567 | call(instr.flow_control.dest_offset, instr.flow_control.num_instructions, |
| 568 | program_counter + 1, 0, 0); | 568 | program_counter + 1, 0, 0); |
| 569 | break; | 569 | break; |
| 570 | 570 | ||
| 571 | case OpCode::Id::CALLU: | 571 | case OpCode::Id::CALLU: |
| 572 | Record<DebugDataRecord::COND_BOOL_IN>( | 572 | Record<DebugDataRecord::COND_BOOL_IN>( |
| 573 | state.debug, iteration, uniforms.b[instr.flow_control.bool_uniform_id]); | 573 | debug_data, iteration, uniforms.b[instr.flow_control.bool_uniform_id]); |
| 574 | if (uniforms.b[instr.flow_control.bool_uniform_id]) { | 574 | if (uniforms.b[instr.flow_control.bool_uniform_id]) { |
| 575 | call(state, instr.flow_control.dest_offset, instr.flow_control.num_instructions, | 575 | call(instr.flow_control.dest_offset, instr.flow_control.num_instructions, |
| 576 | program_counter + 1, 0, 0); | 576 | program_counter + 1, 0, 0); |
| 577 | } | 577 | } |
| 578 | break; | 578 | break; |
| 579 | 579 | ||
| 580 | case OpCode::Id::CALLC: | 580 | case OpCode::Id::CALLC: |
| 581 | Record<DebugDataRecord::COND_CMP_IN>(state.debug, iteration, | 581 | Record<DebugDataRecord::COND_CMP_IN>(debug_data, iteration, state.conditional_code); |
| 582 | state.conditional_code); | 582 | if (evaluate_condition(instr.flow_control)) { |
| 583 | if (evaluate_condition(state, instr.flow_control.refx, instr.flow_control.refy, | 583 | call(instr.flow_control.dest_offset, instr.flow_control.num_instructions, |
| 584 | instr.flow_control)) { | ||
| 585 | call(state, instr.flow_control.dest_offset, instr.flow_control.num_instructions, | ||
| 586 | program_counter + 1, 0, 0); | 584 | program_counter + 1, 0, 0); |
| 587 | } | 585 | } |
| 588 | break; | 586 | break; |
| @@ -592,14 +590,13 @@ void RunInterpreter(const ShaderSetup& setup, UnitState<Debug>& state, unsigned | |||
| 592 | 590 | ||
| 593 | case OpCode::Id::IFU: | 591 | case OpCode::Id::IFU: |
| 594 | Record<DebugDataRecord::COND_BOOL_IN>( | 592 | Record<DebugDataRecord::COND_BOOL_IN>( |
| 595 | state.debug, iteration, uniforms.b[instr.flow_control.bool_uniform_id]); | 593 | debug_data, iteration, uniforms.b[instr.flow_control.bool_uniform_id]); |
| 596 | if (uniforms.b[instr.flow_control.bool_uniform_id]) { | 594 | if (uniforms.b[instr.flow_control.bool_uniform_id]) { |
| 597 | call(state, program_counter + 1, | 595 | call(program_counter + 1, instr.flow_control.dest_offset - program_counter - 1, |
| 598 | instr.flow_control.dest_offset - program_counter - 1, | ||
| 599 | instr.flow_control.dest_offset + instr.flow_control.num_instructions, 0, | 596 | instr.flow_control.dest_offset + instr.flow_control.num_instructions, 0, |
| 600 | 0); | 597 | 0); |
| 601 | } else { | 598 | } else { |
| 602 | call(state, instr.flow_control.dest_offset, instr.flow_control.num_instructions, | 599 | call(instr.flow_control.dest_offset, instr.flow_control.num_instructions, |
| 603 | instr.flow_control.dest_offset + instr.flow_control.num_instructions, 0, | 600 | instr.flow_control.dest_offset + instr.flow_control.num_instructions, 0, |
| 604 | 0); | 601 | 0); |
| 605 | } | 602 | } |
| @@ -609,16 +606,13 @@ void RunInterpreter(const ShaderSetup& setup, UnitState<Debug>& state, unsigned | |||
| 609 | case OpCode::Id::IFC: { | 606 | case OpCode::Id::IFC: { |
| 610 | // TODO: Do we need to consider swizzlers here? | 607 | // TODO: Do we need to consider swizzlers here? |
| 611 | 608 | ||
| 612 | Record<DebugDataRecord::COND_CMP_IN>(state.debug, iteration, | 609 | Record<DebugDataRecord::COND_CMP_IN>(debug_data, iteration, state.conditional_code); |
| 613 | state.conditional_code); | 610 | if (evaluate_condition(instr.flow_control)) { |
| 614 | if (evaluate_condition(state, instr.flow_control.refx, instr.flow_control.refy, | 611 | call(program_counter + 1, instr.flow_control.dest_offset - program_counter - 1, |
| 615 | instr.flow_control)) { | ||
| 616 | call(state, program_counter + 1, | ||
| 617 | instr.flow_control.dest_offset - program_counter - 1, | ||
| 618 | instr.flow_control.dest_offset + instr.flow_control.num_instructions, 0, | 612 | instr.flow_control.dest_offset + instr.flow_control.num_instructions, 0, |
| 619 | 0); | 613 | 0); |
| 620 | } else { | 614 | } else { |
| 621 | call(state, instr.flow_control.dest_offset, instr.flow_control.num_instructions, | 615 | call(instr.flow_control.dest_offset, instr.flow_control.num_instructions, |
| 622 | instr.flow_control.dest_offset + instr.flow_control.num_instructions, 0, | 616 | instr.flow_control.dest_offset + instr.flow_control.num_instructions, 0, |
| 623 | 0); | 617 | 0); |
| 624 | } | 618 | } |
| @@ -633,9 +627,8 @@ void RunInterpreter(const ShaderSetup& setup, UnitState<Debug>& state, unsigned | |||
| 633 | uniforms.i[instr.flow_control.int_uniform_id].w); | 627 | uniforms.i[instr.flow_control.int_uniform_id].w); |
| 634 | state.address_registers[2] = loop_param.y; | 628 | state.address_registers[2] = loop_param.y; |
| 635 | 629 | ||
| 636 | Record<DebugDataRecord::LOOP_INT_IN>(state.debug, iteration, loop_param); | 630 | Record<DebugDataRecord::LOOP_INT_IN>(debug_data, iteration, loop_param); |
| 637 | call(state, program_counter + 1, | 631 | call(program_counter + 1, instr.flow_control.dest_offset - program_counter + 1, |
| 638 | instr.flow_control.dest_offset - program_counter + 1, | ||
| 639 | instr.flow_control.dest_offset + 1, loop_param.x, loop_param.z); | 632 | instr.flow_control.dest_offset + 1, loop_param.x, loop_param.z); |
| 640 | break; | 633 | break; |
| 641 | } | 634 | } |
| @@ -657,8 +650,8 @@ void RunInterpreter(const ShaderSetup& setup, UnitState<Debug>& state, unsigned | |||
| 657 | } | 650 | } |
| 658 | 651 | ||
| 659 | // Explicit instantiation | 652 | // Explicit instantiation |
| 660 | template void RunInterpreter(const ShaderSetup& setup, UnitState<false>& state, unsigned offset); | 653 | template void RunInterpreter(const ShaderSetup&, UnitState&, DebugData<false>&, unsigned offset); |
| 661 | template void RunInterpreter(const ShaderSetup& setup, UnitState<true>& state, unsigned offset); | 654 | template void RunInterpreter(const ShaderSetup&, UnitState&, DebugData<true>&, unsigned offset); |
| 662 | 655 | ||
| 663 | } // namespace | 656 | } // namespace |
| 664 | 657 | ||
diff --git a/src/video_core/shader/shader_interpreter.h b/src/video_core/shader/shader_interpreter.h index 48ede0a2e..d31dcd7a6 100644 --- a/src/video_core/shader/shader_interpreter.h +++ b/src/video_core/shader/shader_interpreter.h | |||
| @@ -8,11 +8,14 @@ namespace Pica { | |||
| 8 | 8 | ||
| 9 | namespace Shader { | 9 | namespace Shader { |
| 10 | 10 | ||
| 11 | template <bool Debug> | ||
| 12 | struct UnitState; | 11 | struct UnitState; |
| 13 | 12 | ||
| 14 | template <bool Debug> | 13 | template <bool Debug> |
| 15 | void RunInterpreter(const ShaderSetup& setup, UnitState<Debug>& state, unsigned offset); | 14 | struct DebugData; |
| 15 | |||
| 16 | template <bool Debug> | ||
| 17 | void RunInterpreter(const ShaderSetup& setup, UnitState& state, DebugData<Debug>& debug_data, | ||
| 18 | unsigned offset); | ||
| 16 | 19 | ||
| 17 | } // namespace | 20 | } // namespace |
| 18 | 21 | ||
diff --git a/src/video_core/shader/shader_jit_x64.cpp b/src/video_core/shader/shader_jit_x64.cpp index 3ba31d474..c588b778b 100644 --- a/src/video_core/shader/shader_jit_x64.cpp +++ b/src/video_core/shader/shader_jit_x64.cpp | |||
| @@ -185,10 +185,10 @@ void JitShader::Compile_SwizzleSrc(Instruction instr, unsigned src_num, SourceRe | |||
| 185 | 185 | ||
| 186 | if (src_reg.GetRegisterType() == RegisterType::FloatUniform) { | 186 | if (src_reg.GetRegisterType() == RegisterType::FloatUniform) { |
| 187 | src_ptr = SETUP; | 187 | src_ptr = SETUP; |
| 188 | src_offset = ShaderSetup::UniformOffset(RegisterType::FloatUniform, src_reg.GetIndex()); | 188 | src_offset = ShaderSetup::GetFloatUniformOffset(src_reg.GetIndex()); |
| 189 | } else { | 189 | } else { |
| 190 | src_ptr = STATE; | 190 | src_ptr = STATE; |
| 191 | src_offset = UnitState<false>::InputOffset(src_reg); | 191 | src_offset = UnitState::InputOffset(src_reg); |
| 192 | } | 192 | } |
| 193 | 193 | ||
| 194 | int src_offset_disp = (int)src_offset; | 194 | int src_offset_disp = (int)src_offset; |
| @@ -266,9 +266,7 @@ void JitShader::Compile_DestEnable(Instruction instr, Xmm src) { | |||
| 266 | 266 | ||
| 267 | SwizzlePattern swiz = {g_state.vs.swizzle_data[operand_desc_id]}; | 267 | SwizzlePattern swiz = {g_state.vs.swizzle_data[operand_desc_id]}; |
| 268 | 268 | ||
| 269 | int dest_offset_disp = (int)UnitState<false>::OutputOffset(dest); | 269 | size_t dest_offset_disp = UnitState::OutputOffset(dest); |
| 270 | ASSERT_MSG(dest_offset_disp == UnitState<false>::OutputOffset(dest), | ||
| 271 | "Destinaton offset too large for int type"); | ||
| 272 | 270 | ||
| 273 | // If all components are enabled, write the result to the destination register | 271 | // If all components are enabled, write the result to the destination register |
| 274 | if (swiz.dest_mask == NO_DEST_REG_MASK) { | 272 | if (swiz.dest_mask == NO_DEST_REG_MASK) { |
| @@ -348,8 +346,7 @@ void JitShader::Compile_EvaluateCondition(Instruction instr) { | |||
| 348 | } | 346 | } |
| 349 | 347 | ||
| 350 | void JitShader::Compile_UniformCondition(Instruction instr) { | 348 | void JitShader::Compile_UniformCondition(Instruction instr) { |
| 351 | size_t offset = | 349 | size_t offset = ShaderSetup::GetBoolUniformOffset(instr.flow_control.bool_uniform_id); |
| 352 | ShaderSetup::UniformOffset(RegisterType::BoolUniform, instr.flow_control.bool_uniform_id); | ||
| 353 | cmp(byte[SETUP + offset], 0); | 350 | cmp(byte[SETUP + offset], 0); |
| 354 | } | 351 | } |
| 355 | 352 | ||
| @@ -732,8 +729,7 @@ void JitShader::Compile_LOOP(Instruction instr) { | |||
| 732 | // This decodes the fields from the integer uniform at index instr.flow_control.int_uniform_id. | 729 | // This decodes the fields from the integer uniform at index instr.flow_control.int_uniform_id. |
| 733 | // The Y (LOOPCOUNT_REG) and Z (LOOPINC) component are kept multiplied by 16 (Left shifted by | 730 | // The Y (LOOPCOUNT_REG) and Z (LOOPINC) component are kept multiplied by 16 (Left shifted by |
| 734 | // 4 bits) to be used as an offset into the 16-byte vector registers later | 731 | // 4 bits) to be used as an offset into the 16-byte vector registers later |
| 735 | size_t offset = | 732 | size_t offset = ShaderSetup::GetIntUniformOffset(instr.flow_control.int_uniform_id); |
| 736 | ShaderSetup::UniformOffset(RegisterType::IntUniform, instr.flow_control.int_uniform_id); | ||
| 737 | mov(LOOPCOUNT, dword[SETUP + offset]); | 733 | mov(LOOPCOUNT, dword[SETUP + offset]); |
| 738 | mov(LOOPCOUNT_REG, LOOPCOUNT); | 734 | mov(LOOPCOUNT_REG, LOOPCOUNT); |
| 739 | shr(LOOPCOUNT_REG, 4); | 735 | shr(LOOPCOUNT_REG, 4); |
diff --git a/src/video_core/shader/shader_jit_x64.h b/src/video_core/shader/shader_jit_x64.h index e0ecde3f2..f37548306 100644 --- a/src/video_core/shader/shader_jit_x64.h +++ b/src/video_core/shader/shader_jit_x64.h | |||
| @@ -34,7 +34,7 @@ class JitShader : public Xbyak::CodeGenerator { | |||
| 34 | public: | 34 | public: |
| 35 | JitShader(); | 35 | JitShader(); |
| 36 | 36 | ||
| 37 | void Run(const ShaderSetup& setup, UnitState<false>& state, unsigned offset) const { | 37 | void Run(const ShaderSetup& setup, UnitState& state, unsigned offset) const { |
| 38 | program(&setup, &state, instruction_labels[offset].getAddress()); | 38 | program(&setup, &state, instruction_labels[offset].getAddress()); |
| 39 | } | 39 | } |
| 40 | 40 | ||