diff options
| author | 2015-07-12 01:57:59 +0200 | |
|---|---|---|
| committer | 2015-08-16 14:12:11 +0200 | |
| commit | 33ba604fd903d9511a414a54b91ebe818df338ef (patch) | |
| tree | 578ee82515c8e819e6468af4ef5552ee028e472b /src/video_core | |
| parent | Pica/DebugUtils: Include uniform information into shader dumps. (diff) | |
| download | yuzu-33ba604fd903d9511a414a54b91ebe818df338ef.tar.gz yuzu-33ba604fd903d9511a414a54b91ebe818df338ef.tar.xz yuzu-33ba604fd903d9511a414a54b91ebe818df338ef.zip | |
Introduce a shader tracer to allow inspection of input/output values for each processed instruction.
Diffstat (limited to 'src/video_core')
| -rw-r--r-- | src/video_core/command_processor.cpp | 2 | ||||
| -rw-r--r-- | src/video_core/debug_utils/debug_utils.cpp | 3 | ||||
| -rw-r--r-- | src/video_core/debug_utils/debug_utils.h | 3 | ||||
| -rw-r--r-- | src/video_core/shader/shader.cpp | 49 | ||||
| -rw-r--r-- | src/video_core/shader/shader.h | 195 | ||||
| -rw-r--r-- | src/video_core/shader/shader_interpreter.cpp | 104 | ||||
| -rw-r--r-- | src/video_core/shader/shader_interpreter.h | 3 | ||||
| -rw-r--r-- | src/video_core/shader/shader_jit_x64.cpp | 8 |
8 files changed, 326 insertions, 41 deletions
diff --git a/src/video_core/command_processor.cpp b/src/video_core/command_processor.cpp index 374c4748d..8c741f31f 100644 --- a/src/video_core/command_processor.cpp +++ b/src/video_core/command_processor.cpp | |||
| @@ -215,7 +215,7 @@ static inline void WritePicaReg(u32 id, u32 value, u32 mask) { | |||
| 215 | unsigned int vertex_cache_pos = 0; | 215 | unsigned int vertex_cache_pos = 0; |
| 216 | vertex_cache_ids.fill(-1); | 216 | vertex_cache_ids.fill(-1); |
| 217 | 217 | ||
| 218 | Shader::UnitState shader_unit; | 218 | Shader::UnitState<false> shader_unit; |
| 219 | Shader::Setup(shader_unit); | 219 | Shader::Setup(shader_unit); |
| 220 | 220 | ||
| 221 | for (unsigned int index = 0; index < regs.num_vertices; ++index) | 221 | for (unsigned int index = 0; index < regs.num_vertices; ++index) |
diff --git a/src/video_core/debug_utils/debug_utils.cpp b/src/video_core/debug_utils/debug_utils.cpp index a79d90ef1..ac071790a 100644 --- a/src/video_core/debug_utils/debug_utils.cpp +++ b/src/video_core/debug_utils/debug_utils.cpp | |||
| @@ -111,7 +111,7 @@ void GeometryDumper::Dump() { | |||
| 111 | } | 111 | } |
| 112 | 112 | ||
| 113 | 113 | ||
| 114 | void DumpShader(const Regs::ShaderConfig& config, const State::ShaderSetup& setup, const Regs::VSOutputAttributes* output_attributes) | 114 | void DumpShader(const std::string& filename, const Regs::ShaderConfig& config, const State::ShaderSetup& setup, const Regs::VSOutputAttributes* output_attributes) |
| 115 | { | 115 | { |
| 116 | struct StuffToWrite { | 116 | struct StuffToWrite { |
| 117 | u8* pointer; | 117 | u8* pointer; |
| @@ -294,7 +294,6 @@ void DumpShader(const Regs::ShaderConfig& config, const State::ShaderSetup& setu | |||
| 294 | 294 | ||
| 295 | // Write data to file | 295 | // Write data to file |
| 296 | static int dump_index = 0; | 296 | static int dump_index = 0; |
| 297 | std::string filename = std::string("shader_dump") + std::to_string(++dump_index) + std::string(".shbin"); | ||
| 298 | std::ofstream file(filename, std::ios_base::out | std::ios_base::binary); | 297 | std::ofstream file(filename, std::ios_base::out | std::ios_base::binary); |
| 299 | 298 | ||
| 300 | for (auto& chunk : writing_queue) { | 299 | for (auto& chunk : writing_queue) { |
diff --git a/src/video_core/debug_utils/debug_utils.h b/src/video_core/debug_utils/debug_utils.h index 1450e5bf3..0b30d7ffa 100644 --- a/src/video_core/debug_utils/debug_utils.h +++ b/src/video_core/debug_utils/debug_utils.h | |||
| @@ -181,7 +181,8 @@ private: | |||
| 181 | std::vector<Face> faces; | 181 | std::vector<Face> faces; |
| 182 | }; | 182 | }; |
| 183 | 183 | ||
| 184 | void DumpShader(const Regs::ShaderConfig& config, const State::ShaderSetup& setup, const Regs::VSOutputAttributes* output_attributes); | 184 | void DumpShader(const std::string& filename, const Regs::ShaderConfig& config, |
| 185 | const State::ShaderSetup& setup, const Regs::VSOutputAttributes* output_attributes); | ||
| 185 | 186 | ||
| 186 | 187 | ||
| 187 | // Utility class to log Pica commands. | 188 | // Utility class to log Pica commands. |
diff --git a/src/video_core/shader/shader.cpp b/src/video_core/shader/shader.cpp index 2692b91e4..4e9836c80 100644 --- a/src/video_core/shader/shader.cpp +++ b/src/video_core/shader/shader.cpp | |||
| @@ -5,6 +5,8 @@ | |||
| 5 | #include <memory> | 5 | #include <memory> |
| 6 | #include <unordered_map> | 6 | #include <unordered_map> |
| 7 | 7 | ||
| 8 | #include <boost/range/algorithm/fill.hpp> | ||
| 9 | |||
| 8 | #include "common/hash.h" | 10 | #include "common/hash.h" |
| 9 | #include "common/make_unique.h" | 11 | #include "common/make_unique.h" |
| 10 | #include "common/profiler.h" | 12 | #include "common/profiler.h" |
| @@ -30,7 +32,7 @@ static JitCompiler jit; | |||
| 30 | static CompiledShader* jit_shader; | 32 | static CompiledShader* jit_shader; |
| 31 | #endif // ARCHITECTURE_x86_64 | 33 | #endif // ARCHITECTURE_x86_64 |
| 32 | 34 | ||
| 33 | void Setup(UnitState& state) { | 35 | void Setup(UnitState<false>& state) { |
| 34 | #ifdef ARCHITECTURE_x86_64 | 36 | #ifdef ARCHITECTURE_x86_64 |
| 35 | if (VideoCore::g_shader_jit_enabled) { | 37 | if (VideoCore::g_shader_jit_enabled) { |
| 36 | u64 cache_key = (Common::ComputeHash64(&g_state.vs.program_code, sizeof(g_state.vs.program_code)) ^ | 38 | u64 cache_key = (Common::ComputeHash64(&g_state.vs.program_code, sizeof(g_state.vs.program_code)) ^ |
| @@ -54,9 +56,8 @@ void Shutdown() { | |||
| 54 | 56 | ||
| 55 | static Common::Profiling::TimingCategory shader_category("Vertex Shader"); | 57 | static Common::Profiling::TimingCategory shader_category("Vertex Shader"); |
| 56 | 58 | ||
| 57 | OutputVertex Run(UnitState& state, const InputVertex& input, int num_attributes) { | 59 | OutputVertex Run(UnitState<false>& state, const InputVertex& input, int num_attributes) { |
| 58 | auto& config = g_state.regs.vs; | 60 | auto& config = g_state.regs.vs; |
| 59 | auto& setup = g_state.vs; | ||
| 60 | 61 | ||
| 61 | Common::Profiling::ScopeTimer timer(shader_category); | 62 | Common::Profiling::ScopeTimer timer(shader_category); |
| 62 | 63 | ||
| @@ -67,6 +68,8 @@ OutputVertex Run(UnitState& state, const InputVertex& input, int num_attributes) | |||
| 67 | // Setup input register table | 68 | // Setup input register table |
| 68 | const auto& attribute_register_map = config.input_register_map; | 69 | const auto& attribute_register_map = config.input_register_map; |
| 69 | 70 | ||
| 71 | // TODO: Instead of this cumbersome logic, just load the input data directly like | ||
| 72 | // for (int attr = 0; attr < num_attributes; ++attr) { input_attr[0] = state.registers.input[attribute_register_map.attribute0_register]; } | ||
| 70 | if (num_attributes > 0) state.registers.input[attribute_register_map.attribute0_register] = input.attr[0]; | 73 | if (num_attributes > 0) state.registers.input[attribute_register_map.attribute0_register] = input.attr[0]; |
| 71 | if (num_attributes > 1) state.registers.input[attribute_register_map.attribute1_register] = input.attr[1]; | 74 | if (num_attributes > 1) state.registers.input[attribute_register_map.attribute1_register] = input.attr[1]; |
| 72 | if (num_attributes > 2) state.registers.input[attribute_register_map.attribute2_register] = input.attr[2]; | 75 | if (num_attributes > 2) state.registers.input[attribute_register_map.attribute2_register] = input.attr[2]; |
| @@ -126,14 +129,52 @@ OutputVertex Run(UnitState& state, const InputVertex& input, int num_attributes) | |||
| 126 | std::fmin(std::fabs(ret.color[i].ToFloat32()), 1.0f)); | 129 | std::fmin(std::fabs(ret.color[i].ToFloat32()), 1.0f)); |
| 127 | } | 130 | } |
| 128 | 131 | ||
| 129 | LOG_TRACE(Render_Software, "Output vertex: pos (%.2f, %.2f, %.2f, %.2f), col(%.2f, %.2f, %.2f, %.2f), tc0(%.2f, %.2f)", | 132 | LOG_TRACE(Render_Software, "Output vertex: pos (%.2f, %.2f, %.2f, %.2f), quat (%.2f, %.2f, %.2f, %.2f), col(%.2f, %.2f, %.2f, %.2f), tc0(%.2f, %.2f)", |
| 130 | ret.pos.x.ToFloat32(), ret.pos.y.ToFloat32(), ret.pos.z.ToFloat32(), ret.pos.w.ToFloat32(), | 133 | ret.pos.x.ToFloat32(), ret.pos.y.ToFloat32(), ret.pos.z.ToFloat32(), ret.pos.w.ToFloat32(), |
| 134 | ret.quat.x.ToFloat32(), ret.quat.y.ToFloat32(), ret.quat.z.ToFloat32(), ret.quat.w.ToFloat32(), | ||
| 131 | ret.color.x.ToFloat32(), ret.color.y.ToFloat32(), ret.color.z.ToFloat32(), ret.color.w.ToFloat32(), | 135 | ret.color.x.ToFloat32(), ret.color.y.ToFloat32(), ret.color.z.ToFloat32(), ret.color.w.ToFloat32(), |
| 132 | ret.tc0.u().ToFloat32(), ret.tc0.v().ToFloat32()); | 136 | ret.tc0.u().ToFloat32(), ret.tc0.v().ToFloat32()); |
| 133 | 137 | ||
| 134 | return ret; | 138 | return ret; |
| 135 | } | 139 | } |
| 136 | 140 | ||
| 141 | DebugData<true> ProduceDebugInfo(const InputVertex& input, int num_attributes, const Regs::ShaderConfig& config, const State::ShaderSetup& setup) { | ||
| 142 | UnitState<true> state; | ||
| 143 | |||
| 144 | const auto& shader_memory = setup.program_code; | ||
| 145 | state.program_counter = config.main_offset; | ||
| 146 | state.debug.max_offset = 0; | ||
| 147 | state.debug.max_opdesc_id = 0; | ||
| 148 | |||
| 149 | // Setup input register table | ||
| 150 | const auto& attribute_register_map = config.input_register_map; | ||
| 151 | float24 dummy_register; | ||
| 152 | boost::fill(state.registers.input, &dummy_register); | ||
| 153 | |||
| 154 | if (num_attributes > 0) state.registers.input[attribute_register_map.attribute0_register] = &input.attr[0].x; | ||
| 155 | if (num_attributes > 1) state.registers.input[attribute_register_map.attribute1_register] = &input.attr[1].x; | ||
| 156 | if (num_attributes > 2) state.registers.input[attribute_register_map.attribute2_register] = &input.attr[2].x; | ||
| 157 | if (num_attributes > 3) state.registers.input[attribute_register_map.attribute3_register] = &input.attr[3].x; | ||
| 158 | if (num_attributes > 4) state.registers.input[attribute_register_map.attribute4_register] = &input.attr[4].x; | ||
| 159 | if (num_attributes > 5) state.registers.input[attribute_register_map.attribute5_register] = &input.attr[5].x; | ||
| 160 | if (num_attributes > 6) state.registers.input[attribute_register_map.attribute6_register] = &input.attr[6].x; | ||
| 161 | if (num_attributes > 7) state.registers.input[attribute_register_map.attribute7_register] = &input.attr[7].x; | ||
| 162 | if (num_attributes > 8) state.registers.input[attribute_register_map.attribute8_register] = &input.attr[8].x; | ||
| 163 | if (num_attributes > 9) state.registers.input[attribute_register_map.attribute9_register] = &input.attr[9].x; | ||
| 164 | if (num_attributes > 10) state.registers.input[attribute_register_map.attribute10_register] = &input.attr[10].x; | ||
| 165 | if (num_attributes > 11) state.registers.input[attribute_register_map.attribute11_register] = &input.attr[11].x; | ||
| 166 | if (num_attributes > 12) state.registers.input[attribute_register_map.attribute12_register] = &input.attr[12].x; | ||
| 167 | if (num_attributes > 13) state.registers.input[attribute_register_map.attribute13_register] = &input.attr[13].x; | ||
| 168 | if (num_attributes > 14) state.registers.input[attribute_register_map.attribute14_register] = &input.attr[14].x; | ||
| 169 | if (num_attributes > 15) state.registers.input[attribute_register_map.attribute15_register] = &input.attr[15].x; | ||
| 170 | |||
| 171 | state.conditional_code[0] = false; | ||
| 172 | state.conditional_code[1] = false; | ||
| 173 | |||
| 174 | RunInterpreter(state); | ||
| 175 | return state.debug; | ||
| 176 | } | ||
| 177 | |||
| 137 | } // namespace Shader | 178 | } // namespace Shader |
| 138 | 179 | ||
| 139 | } // namespace Pica | 180 | } // namespace Pica |
diff --git a/src/video_core/shader/shader.h b/src/video_core/shader/shader.h index 2007a2844..58d21f7cd 100644 --- a/src/video_core/shader/shader.h +++ b/src/video_core/shader/shader.h | |||
| @@ -4,7 +4,10 @@ | |||
| 4 | 4 | ||
| 5 | #pragma once | 5 | #pragma once |
| 6 | 6 | ||
| 7 | #include <vector> | ||
| 8 | |||
| 7 | #include <boost/container/static_vector.hpp> | 9 | #include <boost/container/static_vector.hpp> |
| 10 | |||
| 8 | #include <nihstro/shader_binary.h> | 11 | #include <nihstro/shader_binary.h> |
| 9 | 12 | ||
| 10 | #include "common/common_funcs.h" | 13 | #include "common/common_funcs.h" |
| @@ -72,12 +75,185 @@ struct OutputVertex { | |||
| 72 | static_assert(std::is_pod<OutputVertex>::value, "Structure is not POD"); | 75 | static_assert(std::is_pod<OutputVertex>::value, "Structure is not POD"); |
| 73 | static_assert(sizeof(OutputVertex) == 32 * sizeof(float), "OutputVertex has invalid size"); | 76 | static_assert(sizeof(OutputVertex) == 32 * sizeof(float), "OutputVertex has invalid size"); |
| 74 | 77 | ||
| 78 | |||
| 79 | // Helper structure used to keep track of data useful for inspection of shader emulation | ||
| 80 | template<bool full_debugging> | ||
| 81 | struct DebugData; | ||
| 82 | |||
| 83 | template<> | ||
| 84 | struct DebugData<false> { | ||
| 85 | // TODO: Hide these behind and interface and move them to DebugData<true> | ||
| 86 | u32 max_offset; // maximum program counter ever reached | ||
| 87 | u32 max_opdesc_id; // maximum swizzle pattern index ever used | ||
| 88 | }; | ||
| 89 | |||
| 90 | template<> | ||
| 91 | struct DebugData<true> { | ||
| 92 | // Records store the input and output operands of a particular instruction. | ||
| 93 | struct Record { | ||
| 94 | enum Type { | ||
| 95 | // Floating point arithmetic operands | ||
| 96 | SRC1 = 0x1, | ||
| 97 | SRC2 = 0x2, | ||
| 98 | SRC3 = 0x4, | ||
| 99 | |||
| 100 | // Initial and final output operand value | ||
| 101 | DEST_IN = 0x8, | ||
| 102 | DEST_OUT = 0x10, | ||
| 103 | |||
| 104 | // Current and next instruction offset (in words) | ||
| 105 | CUR_INSTR = 0x20, | ||
| 106 | NEXT_INSTR = 0x40, | ||
| 107 | |||
| 108 | // Output address register value | ||
| 109 | ADDR_REG_OUT = 0x80, | ||
| 110 | |||
| 111 | // Result of a comparison instruction | ||
| 112 | CMP_RESULT = 0x100, | ||
| 113 | |||
| 114 | // Input values for conditional flow control instructions | ||
| 115 | COND_BOOL_IN = 0x200, | ||
| 116 | COND_CMP_IN = 0x400, | ||
| 117 | |||
| 118 | // Input values for a loop | ||
| 119 | LOOP_INT_IN = 0x800, | ||
| 120 | }; | ||
| 121 | |||
| 122 | Math::Vec4<float24> src1; | ||
| 123 | Math::Vec4<float24> src2; | ||
| 124 | Math::Vec4<float24> src3; | ||
| 125 | |||
| 126 | Math::Vec4<float24> dest_in; | ||
| 127 | Math::Vec4<float24> dest_out; | ||
| 128 | |||
| 129 | s32 address_registers[2]; | ||
| 130 | bool conditional_code[2]; | ||
| 131 | bool cond_bool; | ||
| 132 | bool cond_cmp[2]; | ||
| 133 | Math::Vec4<u8> loop_int; | ||
| 134 | |||
| 135 | u32 instruction_offset; | ||
| 136 | u32 next_instruction; | ||
| 137 | |||
| 138 | // set of enabled fields (as a combination of Type flags) | ||
| 139 | unsigned mask = 0; | ||
| 140 | }; | ||
| 141 | |||
| 142 | u32 max_offset; // maximum program counter ever reached | ||
| 143 | u32 max_opdesc_id; // maximum swizzle pattern index ever used | ||
| 144 | |||
| 145 | // List of records for each executed shader instruction | ||
| 146 | std::vector<DebugData<true>::Record> records; | ||
| 147 | }; | ||
| 148 | |||
| 149 | // Type alias for better readability | ||
| 150 | using DebugDataRecord = DebugData<true>::Record; | ||
| 151 | |||
| 152 | // Helper function to set a DebugData<true>::Record field based on the template enum parameter. | ||
| 153 | template<DebugDataRecord::Type type, typename ValueType> | ||
| 154 | inline void SetField(DebugDataRecord& record, ValueType value); | ||
| 155 | |||
| 156 | template<> | ||
| 157 | inline void SetField<DebugDataRecord::SRC1>(DebugDataRecord& record, float24* value) { | ||
| 158 | record.src1.x = value[0]; | ||
| 159 | record.src1.y = value[1]; | ||
| 160 | record.src1.z = value[2]; | ||
| 161 | record.src1.w = value[3]; | ||
| 162 | } | ||
| 163 | |||
| 164 | template<> | ||
| 165 | inline void SetField<DebugDataRecord::SRC2>(DebugDataRecord& record, float24* value) { | ||
| 166 | record.src2.x = value[0]; | ||
| 167 | record.src2.y = value[1]; | ||
| 168 | record.src2.z = value[2]; | ||
| 169 | record.src2.w = value[3]; | ||
| 170 | } | ||
| 171 | |||
| 172 | template<> | ||
| 173 | inline void SetField<DebugDataRecord::SRC3>(DebugDataRecord& record, float24* value) { | ||
| 174 | record.src3.x = value[0]; | ||
| 175 | record.src3.y = value[1]; | ||
| 176 | record.src3.z = value[2]; | ||
| 177 | record.src3.w = value[3]; | ||
| 178 | } | ||
| 179 | |||
| 180 | template<> | ||
| 181 | inline void SetField<DebugDataRecord::DEST_IN>(DebugDataRecord& record, float24* value) { | ||
| 182 | record.dest_in.x = value[0]; | ||
| 183 | record.dest_in.y = value[1]; | ||
| 184 | record.dest_in.z = value[2]; | ||
| 185 | record.dest_in.w = value[3]; | ||
| 186 | } | ||
| 187 | |||
| 188 | template<> | ||
| 189 | inline void SetField<DebugDataRecord::DEST_OUT>(DebugDataRecord& record, float24* value) { | ||
| 190 | record.dest_out.x = value[0]; | ||
| 191 | record.dest_out.y = value[1]; | ||
| 192 | record.dest_out.z = value[2]; | ||
| 193 | record.dest_out.w = value[3]; | ||
| 194 | } | ||
| 195 | |||
| 196 | template<> | ||
| 197 | inline void SetField<DebugDataRecord::ADDR_REG_OUT>(DebugDataRecord& record, s32* value) { | ||
| 198 | record.address_registers[0] = value[0]; | ||
| 199 | record.address_registers[1] = value[1]; | ||
| 200 | } | ||
| 201 | |||
| 202 | template<> | ||
| 203 | inline void SetField<DebugDataRecord::CMP_RESULT>(DebugDataRecord& record, bool* value) { | ||
| 204 | record.conditional_code[0] = value[0]; | ||
| 205 | record.conditional_code[1] = value[1]; | ||
| 206 | } | ||
| 207 | |||
| 208 | template<> | ||
| 209 | inline void SetField<DebugDataRecord::COND_BOOL_IN>(DebugDataRecord& record, bool value) { | ||
| 210 | record.cond_bool = value; | ||
| 211 | } | ||
| 212 | |||
| 213 | template<> | ||
| 214 | inline void SetField<DebugDataRecord::COND_CMP_IN>(DebugDataRecord& record, bool* value) { | ||
| 215 | record.cond_cmp[0] = value[0]; | ||
| 216 | record.cond_cmp[1] = value[1]; | ||
| 217 | } | ||
| 218 | |||
| 219 | template<> | ||
| 220 | inline void SetField<DebugDataRecord::LOOP_INT_IN>(DebugDataRecord& record, Math::Vec4<u8> value) { | ||
| 221 | record.loop_int = value; | ||
| 222 | } | ||
| 223 | |||
| 224 | template<> | ||
| 225 | inline void SetField<DebugDataRecord::CUR_INSTR>(DebugDataRecord& record, u32 value) { | ||
| 226 | record.instruction_offset = value; | ||
| 227 | } | ||
| 228 | |||
| 229 | template<> | ||
| 230 | inline void SetField<DebugDataRecord::NEXT_INSTR>(DebugDataRecord& record, u32 value) { | ||
| 231 | record.next_instruction = value; | ||
| 232 | } | ||
| 233 | |||
| 234 | // Helper function to set debug information on the current shader iteration. | ||
| 235 | template<DebugDataRecord::Type type, typename ValueType> | ||
| 236 | inline void Record(DebugData<false>& debug_data, u32 offset, ValueType value) { | ||
| 237 | // Debugging disabled => nothing to do | ||
| 238 | } | ||
| 239 | |||
| 240 | template<DebugDataRecord::Type type, typename ValueType> | ||
| 241 | inline void Record(DebugData<true>& debug_data, u32 offset, ValueType value) { | ||
| 242 | if (offset >= debug_data.records.size()) | ||
| 243 | debug_data.records.resize(offset + 1); | ||
| 244 | |||
| 245 | SetField<type, ValueType>(debug_data.records[offset], value); | ||
| 246 | debug_data.records[offset].mask |= type; | ||
| 247 | } | ||
| 248 | |||
| 249 | |||
| 75 | /** | 250 | /** |
| 76 | * This structure contains the state information that needs to be unique for a shader unit. The 3DS | 251 | * This structure contains the state information that needs to be unique for a shader unit. The 3DS |
| 77 | * has four shader units that process shaders in parallel. At the present, Citra only implements a | 252 | * has four shader units that process shaders in parallel. At the present, Citra only implements a |
| 78 | * single shader unit that processes all shaders serially. Putting the state information in a struct | 253 | * single shader unit that processes all shaders serially. Putting the state information in a struct |
| 79 | * here will make it easier for us to parallelize the shader processing later. | 254 | * here will make it easier for us to parallelize the shader processing later. |
| 80 | */ | 255 | */ |
| 256 | template<bool Debug> | ||
| 81 | struct UnitState { | 257 | struct UnitState { |
| 82 | struct Registers { | 258 | struct Registers { |
| 83 | // The registers are accessed by the shader JIT using SSE instructions, and are therefore | 259 | // The registers are accessed by the shader JIT using SSE instructions, and are therefore |
| @@ -111,10 +287,7 @@ struct UnitState { | |||
| 111 | // TODO: Is there a maximal size for this? | 287 | // TODO: Is there a maximal size for this? |
| 112 | boost::container::static_vector<CallStackElement, 16> call_stack; | 288 | boost::container::static_vector<CallStackElement, 16> call_stack; |
| 113 | 289 | ||
| 114 | struct { | 290 | DebugData<Debug> debug; |
| 115 | u32 max_offset; // maximum program counter ever reached | ||
| 116 | u32 max_opdesc_id; // maximum swizzle pattern index ever used | ||
| 117 | } debug; | ||
| 118 | 291 | ||
| 119 | static int InputOffset(const SourceRegister& reg) { | 292 | static int InputOffset(const SourceRegister& reg) { |
| 120 | switch (reg.GetRegisterType()) { | 293 | switch (reg.GetRegisterType()) { |
| @@ -150,7 +323,7 @@ struct UnitState { | |||
| 150 | * vertex, which would happen within the `Run` function). | 323 | * vertex, which would happen within the `Run` function). |
| 151 | * @param state Shader unit state, must be setup per shader and per shader unit | 324 | * @param state Shader unit state, must be setup per shader and per shader unit |
| 152 | */ | 325 | */ |
| 153 | void Setup(UnitState& state); | 326 | void Setup(UnitState<false>& state); |
| 154 | 327 | ||
| 155 | /// Performs any cleanup when the emulator is shutdown | 328 | /// Performs any cleanup when the emulator is shutdown |
| 156 | void Shutdown(); | 329 | void Shutdown(); |
| @@ -162,7 +335,17 @@ void Shutdown(); | |||
| 162 | * @param num_attributes The number of vertex shader attributes | 335 | * @param num_attributes The number of vertex shader attributes |
| 163 | * @return The output vertex, after having been processed by the vertex shader | 336 | * @return The output vertex, after having been processed by the vertex shader |
| 164 | */ | 337 | */ |
| 165 | OutputVertex Run(UnitState& state, const InputVertex& input, int num_attributes); | 338 | OutputVertex Run(UnitState<false>& state, const InputVertex& input, int num_attributes); |
| 339 | |||
| 340 | /** | ||
| 341 | * Produce debug information based on the given shader and input vertex | ||
| 342 | * @param input Input vertex into the shader | ||
| 343 | * @param num_attributes The number of vertex shader attributes | ||
| 344 | * @param config Configuration object for the shader pipeline | ||
| 345 | * @param setup Setup object for the shader pipeline | ||
| 346 | * @return Debug information for this shader with regards to the given vertex | ||
| 347 | */ | ||
| 348 | DebugData<true> ProduceDebugInfo(const InputVertex& input, int num_attributes, const Regs::ShaderConfig& config, const State::ShaderSetup& setup); | ||
| 166 | 349 | ||
| 167 | } // namespace Shader | 350 | } // namespace Shader |
| 168 | 351 | ||
diff --git a/src/video_core/shader/shader_interpreter.cpp b/src/video_core/shader/shader_interpreter.cpp index c8489f920..e14de0768 100644 --- a/src/video_core/shader/shader_interpreter.cpp +++ b/src/video_core/shader/shader_interpreter.cpp | |||
| @@ -21,7 +21,8 @@ namespace Pica { | |||
| 21 | 21 | ||
| 22 | namespace Shader { | 22 | namespace Shader { |
| 23 | 23 | ||
| 24 | void RunInterpreter(UnitState& state) { | 24 | template<bool Debug> |
| 25 | void RunInterpreter(UnitState<Debug>& state) { | ||
| 25 | const auto& uniforms = g_state.vs.uniforms; | 26 | const auto& uniforms = g_state.vs.uniforms; |
| 26 | const auto& swizzle_data = g_state.vs.swizzle_data; | 27 | const auto& swizzle_data = g_state.vs.swizzle_data; |
| 27 | const auto& program_code = g_state.vs.program_code; | 28 | const auto& program_code = g_state.vs.program_code; |
| @@ -29,7 +30,9 @@ void RunInterpreter(UnitState& state) { | |||
| 29 | // Placeholder for invalid inputs | 30 | // Placeholder for invalid inputs |
| 30 | static float24 dummy_vec4_float24[4]; | 31 | static float24 dummy_vec4_float24[4]; |
| 31 | 32 | ||
| 32 | while (true) { | 33 | unsigned iteration = 0; |
| 34 | bool exit_loop = false; | ||
| 35 | while (!exit_loop) { | ||
| 33 | if (!state.call_stack.empty()) { | 36 | if (!state.call_stack.empty()) { |
| 34 | auto& top = state.call_stack.back(); | 37 | auto& top = state.call_stack.back(); |
| 35 | if (state.program_counter == top.final_address) { | 38 | if (state.program_counter == top.final_address) { |
| @@ -47,16 +50,19 @@ void RunInterpreter(UnitState& state) { | |||
| 47 | } | 50 | } |
| 48 | } | 51 | } |
| 49 | 52 | ||
| 50 | bool exit_loop = false; | ||
| 51 | const Instruction instr = { program_code[state.program_counter] }; | 53 | const Instruction instr = { program_code[state.program_counter] }; |
| 52 | const SwizzlePattern swizzle = { swizzle_data[instr.common.operand_desc_id] }; | 54 | const SwizzlePattern swizzle = { swizzle_data[instr.common.operand_desc_id] }; |
| 53 | 55 | ||
| 54 | static auto call = [](UnitState& state, u32 offset, u32 num_instructions, | 56 | static auto call = [](UnitState<Debug>& state, u32 offset, u32 num_instructions, |
| 55 | u32 return_offset, u8 repeat_count, u8 loop_increment) { | 57 | u32 return_offset, u8 repeat_count, u8 loop_increment) { |
| 56 | state.program_counter = offset - 1; // -1 to make sure when incrementing the PC we end up at the correct offset | 58 | state.program_counter = offset - 1; // -1 to make sure when incrementing the PC we end up at the correct offset |
| 57 | ASSERT(state.call_stack.size() < state.call_stack.capacity()); | 59 | ASSERT(state.call_stack.size() < state.call_stack.capacity()); |
| 58 | state.call_stack.push_back({ offset + num_instructions, return_offset, repeat_count, loop_increment, offset }); | 60 | state.call_stack.push_back({ offset + num_instructions, return_offset, repeat_count, loop_increment, offset }); |
| 59 | }; | 61 | }; |
| 62 | Record<DebugDataRecord::CUR_INSTR>(state.debug, iteration, state.program_counter); | ||
| 63 | if (iteration > 0) | ||
| 64 | Record<DebugDataRecord::NEXT_INSTR>(state.debug, iteration - 1, state.program_counter); | ||
| 65 | |||
| 60 | state.debug.max_offset = std::max<u32>(state.debug.max_offset, 1 + state.program_counter); | 66 | state.debug.max_offset = std::max<u32>(state.debug.max_offset, 1 + state.program_counter); |
| 61 | 67 | ||
| 62 | auto LookupSourceRegister = [&](const SourceRegister& source_reg) -> const float24* { | 68 | auto LookupSourceRegister = [&](const SourceRegister& source_reg) -> const float24* { |
| @@ -123,58 +129,78 @@ void RunInterpreter(UnitState& state) { | |||
| 123 | switch (instr.opcode.Value().EffectiveOpCode()) { | 129 | switch (instr.opcode.Value().EffectiveOpCode()) { |
| 124 | case OpCode::Id::ADD: | 130 | case OpCode::Id::ADD: |
| 125 | { | 131 | { |
| 132 | Record<DebugDataRecord::SRC1>(state.debug, iteration, src1); | ||
| 133 | Record<DebugDataRecord::SRC2>(state.debug, iteration, src2); | ||
| 134 | Record<DebugDataRecord::DEST_IN>(state.debug, iteration, dest); | ||
| 126 | for (int i = 0; i < 4; ++i) { | 135 | for (int i = 0; i < 4; ++i) { |
| 127 | if (!swizzle.DestComponentEnabled(i)) | 136 | if (!swizzle.DestComponentEnabled(i)) |
| 128 | continue; | 137 | continue; |
| 129 | 138 | ||
| 130 | dest[i] = src1[i] + src2[i]; | 139 | dest[i] = src1[i] + src2[i]; |
| 131 | } | 140 | } |
| 132 | 141 | Record<DebugDataRecord::DEST_OUT>(state.debug, iteration, dest); | |
| 133 | break; | 142 | break; |
| 134 | } | 143 | } |
| 135 | 144 | ||
| 136 | case OpCode::Id::MUL: | 145 | case OpCode::Id::MUL: |
| 137 | { | 146 | { |
| 147 | Record<DebugDataRecord::SRC1>(state.debug, iteration, src1); | ||
| 148 | Record<DebugDataRecord::SRC2>(state.debug, iteration, src2); | ||
| 149 | Record<DebugDataRecord::DEST_IN>(state.debug, iteration, dest); | ||
| 138 | for (int i = 0; i < 4; ++i) { | 150 | for (int i = 0; i < 4; ++i) { |
| 139 | if (!swizzle.DestComponentEnabled(i)) | 151 | if (!swizzle.DestComponentEnabled(i)) |
| 140 | continue; | 152 | continue; |
| 141 | 153 | ||
| 142 | dest[i] = src1[i] * src2[i]; | 154 | dest[i] = src1[i] * src2[i]; |
| 143 | } | 155 | } |
| 144 | 156 | Record<DebugDataRecord::DEST_OUT>(state.debug, iteration, dest); | |
| 145 | break; | 157 | break; |
| 146 | } | 158 | } |
| 147 | 159 | ||
| 148 | case OpCode::Id::FLR: | 160 | case OpCode::Id::FLR: |
| 161 | Record<DebugDataRecord::SRC1>(state.debug, iteration, src1); | ||
| 162 | Record<DebugDataRecord::DEST_IN>(state.debug, iteration, dest); | ||
| 149 | for (int i = 0; i < 4; ++i) { | 163 | for (int i = 0; i < 4; ++i) { |
| 150 | if (!swizzle.DestComponentEnabled(i)) | 164 | if (!swizzle.DestComponentEnabled(i)) |
| 151 | continue; | 165 | continue; |
| 152 | 166 | ||
| 153 | dest[i] = float24::FromFloat32(std::floor(src1[i].ToFloat32())); | 167 | dest[i] = float24::FromFloat32(std::floor(src1[i].ToFloat32())); |
| 154 | } | 168 | } |
| 169 | Record<DebugDataRecord::DEST_OUT>(state.debug, iteration, dest); | ||
| 155 | break; | 170 | break; |
| 156 | 171 | ||
| 157 | case OpCode::Id::MAX: | 172 | case OpCode::Id::MAX: |
| 173 | Record<DebugDataRecord::SRC1>(state.debug, iteration, src1); | ||
| 174 | Record<DebugDataRecord::SRC2>(state.debug, iteration, src2); | ||
| 175 | Record<DebugDataRecord::DEST_IN>(state.debug, iteration, dest); | ||
| 158 | for (int i = 0; i < 4; ++i) { | 176 | for (int i = 0; i < 4; ++i) { |
| 159 | if (!swizzle.DestComponentEnabled(i)) | 177 | if (!swizzle.DestComponentEnabled(i)) |
| 160 | continue; | 178 | continue; |
| 161 | 179 | ||
| 162 | dest[i] = std::max(src1[i], src2[i]); | 180 | dest[i] = std::max(src1[i], src2[i]); |
| 163 | } | 181 | } |
| 182 | Record<DebugDataRecord::DEST_OUT>(state.debug, iteration, dest); | ||
| 164 | break; | 183 | break; |
| 165 | 184 | ||
| 166 | case OpCode::Id::MIN: | 185 | case OpCode::Id::MIN: |
| 186 | Record<DebugDataRecord::SRC1>(state.debug, iteration, src1); | ||
| 187 | Record<DebugDataRecord::SRC2>(state.debug, iteration, src2); | ||
| 188 | Record<DebugDataRecord::DEST_IN>(state.debug, iteration, dest); | ||
| 167 | for (int i = 0; i < 4; ++i) { | 189 | for (int i = 0; i < 4; ++i) { |
| 168 | if (!swizzle.DestComponentEnabled(i)) | 190 | if (!swizzle.DestComponentEnabled(i)) |
| 169 | continue; | 191 | continue; |
| 170 | 192 | ||
| 171 | dest[i] = std::min(src1[i], src2[i]); | 193 | dest[i] = std::min(src1[i], src2[i]); |
| 172 | } | 194 | } |
| 195 | Record<DebugDataRecord::DEST_OUT>(state.debug, iteration, dest); | ||
| 173 | break; | 196 | break; |
| 174 | 197 | ||
| 175 | case OpCode::Id::DP3: | 198 | case OpCode::Id::DP3: |
| 176 | case OpCode::Id::DP4: | 199 | case OpCode::Id::DP4: |
| 177 | { | 200 | { |
| 201 | Record<DebugDataRecord::SRC1>(state.debug, iteration, src1); | ||
| 202 | Record<DebugDataRecord::SRC2>(state.debug, iteration, src2); | ||
| 203 | Record<DebugDataRecord::DEST_IN>(state.debug, iteration, dest); | ||
| 178 | float24 dot = float24::FromFloat32(0.f); | 204 | float24 dot = float24::FromFloat32(0.f); |
| 179 | int num_components = (instr.opcode.Value() == OpCode::Id::DP3) ? 3 : 4; | 205 | int num_components = (instr.opcode.Value() == OpCode::Id::DP3) ? 3 : 4; |
| 180 | for (int i = 0; i < num_components; ++i) | 206 | for (int i = 0; i < num_components; ++i) |
| @@ -186,12 +212,15 @@ void RunInterpreter(UnitState& state) { | |||
| 186 | 212 | ||
| 187 | dest[i] = dot; | 213 | dest[i] = dot; |
| 188 | } | 214 | } |
| 215 | Record<DebugDataRecord::DEST_OUT>(state.debug, iteration, dest); | ||
| 189 | break; | 216 | break; |
| 190 | } | 217 | } |
| 191 | 218 | ||
| 192 | // Reciprocal | 219 | // Reciprocal |
| 193 | case OpCode::Id::RCP: | 220 | case OpCode::Id::RCP: |
| 194 | { | 221 | { |
| 222 | Record<DebugDataRecord::SRC1>(state.debug, iteration, src1); | ||
| 223 | Record<DebugDataRecord::DEST_IN>(state.debug, iteration, dest); | ||
| 195 | for (int i = 0; i < 4; ++i) { | 224 | for (int i = 0; i < 4; ++i) { |
| 196 | if (!swizzle.DestComponentEnabled(i)) | 225 | if (!swizzle.DestComponentEnabled(i)) |
| 197 | continue; | 226 | continue; |
| @@ -200,13 +229,15 @@ void RunInterpreter(UnitState& state) { | |||
| 200 | // TODO: I think this might be wrong... we should only use one component here | 229 | // TODO: I think this might be wrong... we should only use one component here |
| 201 | dest[i] = float24::FromFloat32(1.0f / src1[i].ToFloat32()); | 230 | dest[i] = float24::FromFloat32(1.0f / src1[i].ToFloat32()); |
| 202 | } | 231 | } |
| 203 | 232 | Record<DebugDataRecord::DEST_OUT>(state.debug, iteration, dest); | |
| 204 | break; | 233 | break; |
| 205 | } | 234 | } |
| 206 | 235 | ||
| 207 | // Reciprocal Square Root | 236 | // Reciprocal Square Root |
| 208 | case OpCode::Id::RSQ: | 237 | case OpCode::Id::RSQ: |
| 209 | { | 238 | { |
| 239 | Record<DebugDataRecord::SRC1>(state.debug, iteration, src1); | ||
| 240 | Record<DebugDataRecord::DEST_IN>(state.debug, iteration, dest); | ||
| 210 | for (int i = 0; i < 4; ++i) { | 241 | for (int i = 0; i < 4; ++i) { |
| 211 | if (!swizzle.DestComponentEnabled(i)) | 242 | if (!swizzle.DestComponentEnabled(i)) |
| 212 | continue; | 243 | continue; |
| @@ -215,12 +246,13 @@ void RunInterpreter(UnitState& state) { | |||
| 215 | // TODO: I think this might be wrong... we should only use one component here | 246 | // TODO: I think this might be wrong... we should only use one component here |
| 216 | dest[i] = float24::FromFloat32(1.0f / sqrt(src1[i].ToFloat32())); | 247 | dest[i] = float24::FromFloat32(1.0f / sqrt(src1[i].ToFloat32())); |
| 217 | } | 248 | } |
| 218 | 249 | Record<DebugDataRecord::DEST_OUT>(state.debug, iteration, dest); | |
| 219 | break; | 250 | break; |
| 220 | } | 251 | } |
| 221 | 252 | ||
| 222 | case OpCode::Id::MOVA: | 253 | case OpCode::Id::MOVA: |
| 223 | { | 254 | { |
| 255 | Record<DebugDataRecord::SRC1>(state.debug, iteration, src1); | ||
| 224 | for (int i = 0; i < 2; ++i) { | 256 | for (int i = 0; i < 2; ++i) { |
| 225 | if (!swizzle.DestComponentEnabled(i)) | 257 | if (!swizzle.DestComponentEnabled(i)) |
| 226 | continue; | 258 | continue; |
| @@ -228,32 +260,41 @@ void RunInterpreter(UnitState& state) { | |||
| 228 | // TODO: Figure out how the rounding is done on hardware | 260 | // TODO: Figure out how the rounding is done on hardware |
| 229 | state.address_registers[i] = static_cast<s32>(src1[i].ToFloat32()); | 261 | state.address_registers[i] = static_cast<s32>(src1[i].ToFloat32()); |
| 230 | } | 262 | } |
| 231 | 263 | Record<DebugDataRecord::ADDR_REG_OUT>(state.debug, iteration, state.address_registers); | |
| 232 | break; | 264 | break; |
| 233 | } | 265 | } |
| 234 | 266 | ||
| 235 | case OpCode::Id::MOV: | 267 | case OpCode::Id::MOV: |
| 236 | { | 268 | { |
| 269 | Record<DebugDataRecord::SRC1>(state.debug, iteration, src1); | ||
| 270 | Record<DebugDataRecord::DEST_IN>(state.debug, iteration, dest); | ||
| 237 | for (int i = 0; i < 4; ++i) { | 271 | for (int i = 0; i < 4; ++i) { |
| 238 | if (!swizzle.DestComponentEnabled(i)) | 272 | if (!swizzle.DestComponentEnabled(i)) |
| 239 | continue; | 273 | continue; |
| 240 | 274 | ||
| 241 | dest[i] = src1[i]; | 275 | dest[i] = src1[i]; |
| 242 | } | 276 | } |
| 277 | Record<DebugDataRecord::DEST_OUT>(state.debug, iteration, dest); | ||
| 243 | break; | 278 | break; |
| 244 | } | 279 | } |
| 245 | 280 | ||
| 246 | case OpCode::Id::SLT: | 281 | case OpCode::Id::SLT: |
| 247 | case OpCode::Id::SLTI: | 282 | case OpCode::Id::SLTI: |
| 283 | Record<DebugDataRecord::SRC1>(state.debug, iteration, src1); | ||
| 284 | Record<DebugDataRecord::SRC2>(state.debug, iteration, src2); | ||
| 285 | Record<DebugDataRecord::DEST_IN>(state.debug, iteration, dest); | ||
| 248 | for (int i = 0; i < 4; ++i) { | 286 | for (int i = 0; i < 4; ++i) { |
| 249 | if (!swizzle.DestComponentEnabled(i)) | 287 | if (!swizzle.DestComponentEnabled(i)) |
| 250 | continue; | 288 | continue; |
| 251 | 289 | ||
| 252 | dest[i] = (src1[i] < src2[i]) ? float24::FromFloat32(1.0f) : float24::FromFloat32(0.0f); | 290 | dest[i] = (src1[i] < src2[i]) ? float24::FromFloat32(1.0f) : float24::FromFloat32(0.0f); |
| 253 | } | 291 | } |
| 292 | Record<DebugDataRecord::DEST_OUT>(state.debug, iteration, dest); | ||
| 254 | break; | 293 | break; |
| 255 | 294 | ||
| 256 | case OpCode::Id::CMP: | 295 | case OpCode::Id::CMP: |
| 296 | Record<DebugDataRecord::SRC1>(state.debug, iteration, src1); | ||
| 297 | Record<DebugDataRecord::SRC2>(state.debug, iteration, src2); | ||
| 257 | for (int i = 0; i < 2; ++i) { | 298 | for (int i = 0; i < 2; ++i) { |
| 258 | // TODO: Can you restrict to one compare via dest masking? | 299 | // TODO: Can you restrict to one compare via dest masking? |
| 259 | 300 | ||
| @@ -261,27 +302,27 @@ void RunInterpreter(UnitState& state) { | |||
| 261 | auto op = (i == 0) ? compare_op.x.Value() : compare_op.y.Value(); | 302 | auto op = (i == 0) ? compare_op.x.Value() : compare_op.y.Value(); |
| 262 | 303 | ||
| 263 | switch (op) { | 304 | switch (op) { |
| 264 | case compare_op.Equal: | 305 | case Instruction::Common::CompareOpType::Equal: |
| 265 | state.conditional_code[i] = (src1[i] == src2[i]); | 306 | state.conditional_code[i] = (src1[i] == src2[i]); |
| 266 | break; | 307 | break; |
| 267 | 308 | ||
| 268 | case compare_op.NotEqual: | 309 | case Instruction::Common::CompareOpType::NotEqual: |
| 269 | state.conditional_code[i] = (src1[i] != src2[i]); | 310 | state.conditional_code[i] = (src1[i] != src2[i]); |
| 270 | break; | 311 | break; |
| 271 | 312 | ||
| 272 | case compare_op.LessThan: | 313 | case Instruction::Common::CompareOpType::LessThan: |
| 273 | state.conditional_code[i] = (src1[i] < src2[i]); | 314 | state.conditional_code[i] = (src1[i] < src2[i]); |
| 274 | break; | 315 | break; |
| 275 | 316 | ||
| 276 | case compare_op.LessEqual: | 317 | case Instruction::Common::CompareOpType::LessEqual: |
| 277 | state.conditional_code[i] = (src1[i] <= src2[i]); | 318 | state.conditional_code[i] = (src1[i] <= src2[i]); |
| 278 | break; | 319 | break; |
| 279 | 320 | ||
| 280 | case compare_op.GreaterThan: | 321 | case Instruction::Common::CompareOpType::GreaterThan: |
| 281 | state.conditional_code[i] = (src1[i] > src2[i]); | 322 | state.conditional_code[i] = (src1[i] > src2[i]); |
| 282 | break; | 323 | break; |
| 283 | 324 | ||
| 284 | case compare_op.GreaterEqual: | 325 | case Instruction::Common::CompareOpType::GreaterEqual: |
| 285 | state.conditional_code[i] = (src1[i] >= src2[i]); | 326 | state.conditional_code[i] = (src1[i] >= src2[i]); |
| 286 | break; | 327 | break; |
| 287 | 328 | ||
| @@ -290,6 +331,7 @@ void RunInterpreter(UnitState& state) { | |||
| 290 | break; | 331 | break; |
| 291 | } | 332 | } |
| 292 | } | 333 | } |
| 334 | Record<DebugDataRecord::CMP_RESULT>(state.debug, iteration, state.conditional_code); | ||
| 293 | break; | 335 | break; |
| 294 | 336 | ||
| 295 | default: | 337 | default: |
| @@ -359,12 +401,17 @@ void RunInterpreter(UnitState& state) { | |||
| 359 | : (instr.mad.dest.Value() < 0x20) ? &state.registers.temporary[instr.mad.dest.Value().GetIndex()][0] | 401 | : (instr.mad.dest.Value() < 0x20) ? &state.registers.temporary[instr.mad.dest.Value().GetIndex()][0] |
| 360 | : dummy_vec4_float24; | 402 | : dummy_vec4_float24; |
| 361 | 403 | ||
| 404 | Record<DebugDataRecord::SRC1>(state.debug, iteration, src1); | ||
| 405 | Record<DebugDataRecord::SRC2>(state.debug, iteration, src2); | ||
| 406 | Record<DebugDataRecord::SRC3>(state.debug, iteration, src3); | ||
| 407 | Record<DebugDataRecord::DEST_IN>(state.debug, iteration, dest); | ||
| 362 | for (int i = 0; i < 4; ++i) { | 408 | for (int i = 0; i < 4; ++i) { |
| 363 | if (!swizzle.DestComponentEnabled(i)) | 409 | if (!swizzle.DestComponentEnabled(i)) |
| 364 | continue; | 410 | continue; |
| 365 | 411 | ||
| 366 | dest[i] = src1[i] * src2[i] + src3[i]; | 412 | dest[i] = src1[i] * src2[i] + src3[i]; |
| 367 | } | 413 | } |
| 414 | Record<DebugDataRecord::DEST_OUT>(state.debug, iteration, dest); | ||
| 368 | } else { | 415 | } else { |
| 369 | LOG_ERROR(HW_GPU, "Unhandled multiply-add instruction: 0x%02x (%s): 0x%08x", | 416 | LOG_ERROR(HW_GPU, "Unhandled multiply-add instruction: 0x%02x (%s): 0x%08x", |
| 370 | (int)instr.opcode.Value().EffectiveOpCode(), instr.opcode.Value().GetInfo().name, instr.hex); | 417 | (int)instr.opcode.Value().EffectiveOpCode(), instr.opcode.Value().GetInfo().name, instr.hex); |
| @@ -374,7 +421,7 @@ void RunInterpreter(UnitState& state) { | |||
| 374 | 421 | ||
| 375 | default: | 422 | default: |
| 376 | { | 423 | { |
| 377 | static auto evaluate_condition = [](const UnitState& state, bool refx, bool refy, Instruction::FlowControlType flow_control) { | 424 | static auto evaluate_condition = [](const UnitState<Debug>& state, bool refx, bool refy, Instruction::FlowControlType flow_control) { |
| 378 | bool results[2] = { refx == state.conditional_code[0], | 425 | bool results[2] = { refx == state.conditional_code[0], |
| 379 | refy == state.conditional_code[1] }; | 426 | refy == state.conditional_code[1] }; |
| 380 | 427 | ||
| @@ -400,12 +447,14 @@ void RunInterpreter(UnitState& state) { | |||
| 400 | break; | 447 | break; |
| 401 | 448 | ||
| 402 | case OpCode::Id::JMPC: | 449 | case OpCode::Id::JMPC: |
| 450 | Record<DebugDataRecord::COND_CMP_IN>(state.debug, iteration, state.conditional_code); | ||
| 403 | if (evaluate_condition(state, instr.flow_control.refx, instr.flow_control.refy, instr.flow_control)) { | 451 | if (evaluate_condition(state, instr.flow_control.refx, instr.flow_control.refy, instr.flow_control)) { |
| 404 | state.program_counter = instr.flow_control.dest_offset - 1; | 452 | state.program_counter = instr.flow_control.dest_offset - 1; |
| 405 | } | 453 | } |
| 406 | break; | 454 | break; |
| 407 | 455 | ||
| 408 | case OpCode::Id::JMPU: | 456 | case OpCode::Id::JMPU: |
| 457 | Record<DebugDataRecord::COND_BOOL_IN>(state.debug, iteration, uniforms.b[instr.flow_control.bool_uniform_id]); | ||
| 409 | if (uniforms.b[instr.flow_control.bool_uniform_id]) { | 458 | if (uniforms.b[instr.flow_control.bool_uniform_id]) { |
| 410 | state.program_counter = instr.flow_control.dest_offset - 1; | 459 | state.program_counter = instr.flow_control.dest_offset - 1; |
| 411 | } | 460 | } |
| @@ -419,6 +468,7 @@ void RunInterpreter(UnitState& state) { | |||
| 419 | break; | 468 | break; |
| 420 | 469 | ||
| 421 | case OpCode::Id::CALLU: | 470 | case OpCode::Id::CALLU: |
| 471 | Record<DebugDataRecord::COND_BOOL_IN>(state.debug, iteration, uniforms.b[instr.flow_control.bool_uniform_id]); | ||
| 422 | if (uniforms.b[instr.flow_control.bool_uniform_id]) { | 472 | if (uniforms.b[instr.flow_control.bool_uniform_id]) { |
| 423 | call(state, | 473 | call(state, |
| 424 | instr.flow_control.dest_offset, | 474 | instr.flow_control.dest_offset, |
| @@ -428,6 +478,7 @@ void RunInterpreter(UnitState& state) { | |||
| 428 | break; | 478 | break; |
| 429 | 479 | ||
| 430 | case OpCode::Id::CALLC: | 480 | case OpCode::Id::CALLC: |
| 481 | Record<DebugDataRecord::COND_CMP_IN>(state.debug, iteration, state.conditional_code); | ||
| 431 | if (evaluate_condition(state, instr.flow_control.refx, instr.flow_control.refy, instr.flow_control)) { | 482 | if (evaluate_condition(state, instr.flow_control.refx, instr.flow_control.refy, instr.flow_control)) { |
| 432 | call(state, | 483 | call(state, |
| 433 | instr.flow_control.dest_offset, | 484 | instr.flow_control.dest_offset, |
| @@ -440,6 +491,7 @@ void RunInterpreter(UnitState& state) { | |||
| 440 | break; | 491 | break; |
| 441 | 492 | ||
| 442 | case OpCode::Id::IFU: | 493 | case OpCode::Id::IFU: |
| 494 | Record<DebugDataRecord::COND_BOOL_IN>(state.debug, iteration, uniforms.b[instr.flow_control.bool_uniform_id]); | ||
| 443 | if (uniforms.b[instr.flow_control.bool_uniform_id]) { | 495 | if (uniforms.b[instr.flow_control.bool_uniform_id]) { |
| 444 | call(state, | 496 | call(state, |
| 445 | state.program_counter + 1, | 497 | state.program_counter + 1, |
| @@ -458,6 +510,7 @@ void RunInterpreter(UnitState& state) { | |||
| 458 | { | 510 | { |
| 459 | // TODO: Do we need to consider swizzlers here? | 511 | // TODO: Do we need to consider swizzlers here? |
| 460 | 512 | ||
| 513 | Record<DebugDataRecord::COND_CMP_IN>(state.debug, iteration, state.conditional_code); | ||
| 461 | if (evaluate_condition(state, instr.flow_control.refx, instr.flow_control.refy, instr.flow_control)) { | 514 | if (evaluate_condition(state, instr.flow_control.refx, instr.flow_control.refy, instr.flow_control)) { |
| 462 | call(state, | 515 | call(state, |
| 463 | state.program_counter + 1, | 516 | state.program_counter + 1, |
| @@ -475,14 +528,19 @@ void RunInterpreter(UnitState& state) { | |||
| 475 | 528 | ||
| 476 | case OpCode::Id::LOOP: | 529 | case OpCode::Id::LOOP: |
| 477 | { | 530 | { |
| 478 | state.address_registers[2] = uniforms.i[instr.flow_control.int_uniform_id].y; | 531 | Math::Vec4<u8> loop_param(uniforms.i[instr.flow_control.int_uniform_id].x, |
| 532 | uniforms.i[instr.flow_control.int_uniform_id].y, | ||
| 533 | uniforms.i[instr.flow_control.int_uniform_id].z, | ||
| 534 | uniforms.i[instr.flow_control.int_uniform_id].w); | ||
| 535 | state.address_registers[2] = loop_param.y; | ||
| 479 | 536 | ||
| 537 | Record<DebugDataRecord::LOOP_INT_IN>(state.debug, iteration, loop_param); | ||
| 480 | call(state, | 538 | call(state, |
| 481 | state.program_counter + 1, | 539 | state.program_counter + 1, |
| 482 | instr.flow_control.dest_offset - state.program_counter + 1, | 540 | instr.flow_control.dest_offset - state.program_counter + 1, |
| 483 | instr.flow_control.dest_offset + 1, | 541 | instr.flow_control.dest_offset + 1, |
| 484 | uniforms.i[instr.flow_control.int_uniform_id].x, | 542 | loop_param.x, |
| 485 | uniforms.i[instr.flow_control.int_uniform_id].z); | 543 | loop_param.z); |
| 486 | break; | 544 | break; |
| 487 | } | 545 | } |
| 488 | 546 | ||
| @@ -497,12 +555,14 @@ void RunInterpreter(UnitState& state) { | |||
| 497 | } | 555 | } |
| 498 | 556 | ||
| 499 | ++state.program_counter; | 557 | ++state.program_counter; |
| 500 | 558 | ++iteration; | |
| 501 | if (exit_loop) | ||
| 502 | break; | ||
| 503 | } | 559 | } |
| 504 | } | 560 | } |
| 505 | 561 | ||
| 562 | // Explicit instantiation | ||
| 563 | template void RunInterpreter(UnitState<false>& state); | ||
| 564 | template void RunInterpreter(UnitState<true>& state); | ||
| 565 | |||
| 506 | } // namespace | 566 | } // namespace |
| 507 | 567 | ||
| 508 | } // namespace | 568 | } // namespace |
diff --git a/src/video_core/shader/shader_interpreter.h b/src/video_core/shader/shader_interpreter.h index ad6e58e39..71bcad5ac 100644 --- a/src/video_core/shader/shader_interpreter.h +++ b/src/video_core/shader/shader_interpreter.h | |||
| @@ -12,7 +12,8 @@ namespace Pica { | |||
| 12 | 12 | ||
| 13 | namespace Shader { | 13 | namespace Shader { |
| 14 | 14 | ||
| 15 | void RunInterpreter(UnitState& state); | 15 | template<bool Debug> |
| 16 | void RunInterpreter(UnitState<Debug>& state); | ||
| 16 | 17 | ||
| 17 | } // namespace | 18 | } // namespace |
| 18 | 19 | ||
diff --git a/src/video_core/shader/shader_jit_x64.cpp b/src/video_core/shader/shader_jit_x64.cpp index ce47774d5..836942c6b 100644 --- a/src/video_core/shader/shader_jit_x64.cpp +++ b/src/video_core/shader/shader_jit_x64.cpp | |||
| @@ -141,7 +141,7 @@ void JitCompiler::Compile_SwizzleSrc(Instruction instr, unsigned src_num, Source | |||
| 141 | src_offset = src_reg.GetIndex() * sizeof(float24) * 4; | 141 | src_offset = src_reg.GetIndex() * sizeof(float24) * 4; |
| 142 | } else { | 142 | } else { |
| 143 | src_ptr = REGISTERS; | 143 | src_ptr = REGISTERS; |
| 144 | src_offset = UnitState::InputOffset(src_reg); | 144 | src_offset = UnitState<false>::InputOffset(src_reg); |
| 145 | } | 145 | } |
| 146 | 146 | ||
| 147 | unsigned operand_desc_id; | 147 | unsigned operand_desc_id; |
| @@ -217,11 +217,11 @@ void JitCompiler::Compile_DestEnable(Instruction instr,X64Reg src) { | |||
| 217 | // If all components are enabled, write the result to the destination register | 217 | // If all components are enabled, write the result to the destination register |
| 218 | if (swiz.dest_mask == NO_DEST_REG_MASK) { | 218 | if (swiz.dest_mask == NO_DEST_REG_MASK) { |
| 219 | // Store dest back to memory | 219 | // Store dest back to memory |
| 220 | MOVAPS(MDisp(REGISTERS, UnitState::OutputOffset(dest)), src); | 220 | MOVAPS(MDisp(REGISTERS, UnitState<false>::OutputOffset(dest)), src); |
| 221 | 221 | ||
| 222 | } else { | 222 | } else { |
| 223 | // Not all components are enabled, so mask the result when storing to the destination register... | 223 | // Not all components are enabled, so mask the result when storing to the destination register... |
| 224 | MOVAPS(SCRATCH, MDisp(REGISTERS, UnitState::OutputOffset(dest))); | 224 | MOVAPS(SCRATCH, MDisp(REGISTERS, UnitState<false>::OutputOffset(dest))); |
| 225 | 225 | ||
| 226 | if (Common::GetCPUCaps().sse4_1) { | 226 | if (Common::GetCPUCaps().sse4_1) { |
| 227 | u8 mask = ((swiz.dest_mask & 1) << 3) | ((swiz.dest_mask & 8) >> 3) | ((swiz.dest_mask & 2) << 1) | ((swiz.dest_mask & 4) >> 1); | 227 | u8 mask = ((swiz.dest_mask & 1) << 3) | ((swiz.dest_mask & 8) >> 3) | ((swiz.dest_mask & 2) << 1) | ((swiz.dest_mask & 4) >> 1); |
| @@ -240,7 +240,7 @@ void JitCompiler::Compile_DestEnable(Instruction instr,X64Reg src) { | |||
| 240 | } | 240 | } |
| 241 | 241 | ||
| 242 | // Store dest back to memory | 242 | // Store dest back to memory |
| 243 | MOVAPS(MDisp(REGISTERS, UnitState::OutputOffset(dest)), SCRATCH); | 243 | MOVAPS(MDisp(REGISTERS, UnitState<false>::OutputOffset(dest)), SCRATCH); |
| 244 | } | 244 | } |
| 245 | } | 245 | } |
| 246 | 246 | ||