summaryrefslogtreecommitdiff
path: root/src/video_core/shader
diff options
context:
space:
mode:
Diffstat (limited to 'src/video_core/shader')
-rw-r--r--src/video_core/shader/shader.cpp39
-rw-r--r--src/video_core/shader/shader.h92
-rw-r--r--src/video_core/shader/shader_interpreter.cpp290
-rw-r--r--src/video_core/shader/shader_interpreter.h5
-rw-r--r--src/video_core/shader/shader_jit_x64.cpp212
-rw-r--r--src/video_core/shader/shader_jit_x64.h8
6 files changed, 335 insertions, 311 deletions
diff --git a/src/video_core/shader/shader.cpp b/src/video_core/shader/shader.cpp
index f565e2c91..852c5a9a0 100644
--- a/src/video_core/shader/shader.cpp
+++ b/src/video_core/shader/shader.cpp
@@ -46,10 +46,8 @@ OutputVertex OutputRegisters::ToVertex(const Regs::ShaderConfig& config) {
46 46
47 const auto& output_register_map = g_state.regs.vs_output_attributes[index]; 47 const auto& output_register_map = g_state.regs.vs_output_attributes[index];
48 48
49 u32 semantics[4] = { 49 u32 semantics[4] = {output_register_map.map_x, output_register_map.map_y,
50 output_register_map.map_x, output_register_map.map_y, 50 output_register_map.map_z, output_register_map.map_w};
51 output_register_map.map_z, output_register_map.map_w
52 };
53 51
54 for (unsigned comp = 0; comp < 4; ++comp) { 52 for (unsigned comp = 0; comp < 4; ++comp) {
55 float24* out = ((float24*)&ret) + semantics[comp]; 53 float24* out = ((float24*)&ret) + semantics[comp];
@@ -65,19 +63,20 @@ OutputVertex OutputRegisters::ToVertex(const Regs::ShaderConfig& config) {
65 index++; 63 index++;
66 } 64 }
67 65
68 // The hardware takes the absolute and saturates vertex colors like this, *before* doing interpolation 66 // The hardware takes the absolute and saturates vertex colors like this, *before* doing
67 // interpolation
69 for (unsigned i = 0; i < 4; ++i) { 68 for (unsigned i = 0; i < 4; ++i) {
70 ret.color[i] = float24::FromFloat32( 69 ret.color[i] = float24::FromFloat32(std::fmin(std::fabs(ret.color[i].ToFloat32()), 1.0f));
71 std::fmin(std::fabs(ret.color[i].ToFloat32()), 1.0f));
72 } 70 }
73 71
74 LOG_TRACE(HW_GPU, "Output vertex: pos(%.2f, %.2f, %.2f, %.2f), quat(%.2f, %.2f, %.2f, %.2f), " 72 LOG_TRACE(HW_GPU, "Output vertex: pos(%.2f, %.2f, %.2f, %.2f), quat(%.2f, %.2f, %.2f, %.2f), "
75 "col(%.2f, %.2f, %.2f, %.2f), tc0(%.2f, %.2f), view(%.2f, %.2f, %.2f)", 73 "col(%.2f, %.2f, %.2f, %.2f), tc0(%.2f, %.2f), view(%.2f, %.2f, %.2f)",
76 ret.pos.x.ToFloat32(), ret.pos.y.ToFloat32(), ret.pos.z.ToFloat32(), ret.pos.w.ToFloat32(), 74 ret.pos.x.ToFloat32(), ret.pos.y.ToFloat32(), ret.pos.z.ToFloat32(),
77 ret.quat.x.ToFloat32(), ret.quat.y.ToFloat32(), ret.quat.z.ToFloat32(), ret.quat.w.ToFloat32(), 75 ret.pos.w.ToFloat32(), ret.quat.x.ToFloat32(), ret.quat.y.ToFloat32(),
78 ret.color.x.ToFloat32(), ret.color.y.ToFloat32(), ret.color.z.ToFloat32(), ret.color.w.ToFloat32(), 76 ret.quat.z.ToFloat32(), ret.quat.w.ToFloat32(), ret.color.x.ToFloat32(),
79 ret.tc0.u().ToFloat32(), ret.tc0.v().ToFloat32(), 77 ret.color.y.ToFloat32(), ret.color.z.ToFloat32(), ret.color.w.ToFloat32(),
80 ret.view.x.ToFloat32(), ret.view.y.ToFloat32(), ret.view.z.ToFloat32()); 78 ret.tc0.u().ToFloat32(), ret.tc0.v().ToFloat32(), ret.view.x.ToFloat32(),
79 ret.view.y.ToFloat32(), ret.view.z.ToFloat32());
81 80
82 return ret; 81 return ret;
83} 82}
@@ -96,8 +95,9 @@ void ClearCache() {
96void ShaderSetup::Setup() { 95void ShaderSetup::Setup() {
97#ifdef ARCHITECTURE_x86_64 96#ifdef ARCHITECTURE_x86_64
98 if (VideoCore::g_shader_jit_enabled) { 97 if (VideoCore::g_shader_jit_enabled) {
99 u64 cache_key = (Common::ComputeHash64(&g_state.vs.program_code, sizeof(g_state.vs.program_code)) ^ 98 u64 cache_key =
100 Common::ComputeHash64(&g_state.vs.swizzle_data, sizeof(g_state.vs.swizzle_data))); 99 (Common::ComputeHash64(&g_state.vs.program_code, sizeof(g_state.vs.program_code)) ^
100 Common::ComputeHash64(&g_state.vs.swizzle_data, sizeof(g_state.vs.swizzle_data)));
101 101
102 auto iter = shader_map.find(cache_key); 102 auto iter = shader_map.find(cache_key);
103 if (iter != shader_map.end()) { 103 if (iter != shader_map.end()) {
@@ -127,7 +127,7 @@ void ShaderSetup::Run(UnitState<false>& state, const InputVertex& input, int num
127 const auto& attribute_register_map = config.input_register_map; 127 const auto& attribute_register_map = config.input_register_map;
128 128
129 for (unsigned i = 0; i < num_attributes; i++) 129 for (unsigned i = 0; i < num_attributes; i++)
130 state.registers.input[attribute_register_map.GetRegisterForAttribute(i)] = input.attr[i]; 130 state.registers.input[attribute_register_map.GetRegisterForAttribute(i)] = input.attr[i];
131 131
132 state.conditional_code[0] = false; 132 state.conditional_code[0] = false;
133 state.conditional_code[1] = false; 133 state.conditional_code[1] = false;
@@ -140,10 +140,11 @@ void ShaderSetup::Run(UnitState<false>& state, const InputVertex& input, int num
140#else 140#else
141 RunInterpreter(setup, state, config.main_offset); 141 RunInterpreter(setup, state, config.main_offset);
142#endif // ARCHITECTURE_x86_64 142#endif // ARCHITECTURE_x86_64
143
144} 143}
145 144
146DebugData<true> ShaderSetup::ProduceDebugInfo(const InputVertex& input, int num_attributes, const Regs::ShaderConfig& config, const ShaderSetup& setup) { 145DebugData<true> ShaderSetup::ProduceDebugInfo(const InputVertex& input, int num_attributes,
146 const Regs::ShaderConfig& config,
147 const ShaderSetup& setup) {
147 UnitState<true> state; 148 UnitState<true> state;
148 149
149 state.debug.max_offset = 0; 150 state.debug.max_offset = 0;
@@ -155,7 +156,7 @@ DebugData<true> ShaderSetup::ProduceDebugInfo(const InputVertex& input, int num_
155 boost::fill(state.registers.input, &dummy_register); 156 boost::fill(state.registers.input, &dummy_register);
156 157
157 for (unsigned i = 0; i < num_attributes; i++) 158 for (unsigned i = 0; i < num_attributes; i++)
158 state.registers.input[attribute_register_map.GetRegisterForAttribute(i)] = input.attr[i]; 159 state.registers.input[attribute_register_map.GetRegisterForAttribute(i)] = input.attr[i];
159 160
160 state.conditional_code[0] = false; 161 state.conditional_code[0] = false;
161 state.conditional_code[1] = false; 162 state.conditional_code[1] = false;
diff --git a/src/video_core/shader/shader.h b/src/video_core/shader/shader.h
index fee16df62..830d933a8 100644
--- a/src/video_core/shader/shader.h
+++ b/src/video_core/shader/shader.h
@@ -94,46 +94,46 @@ struct OutputRegisters {
94static_assert(std::is_pod<OutputRegisters>::value, "Structure is not POD"); 94static_assert(std::is_pod<OutputRegisters>::value, "Structure is not POD");
95 95
96// Helper structure used to keep track of data useful for inspection of shader emulation 96// Helper structure used to keep track of data useful for inspection of shader emulation
97template<bool full_debugging> 97template <bool full_debugging>
98struct DebugData; 98struct DebugData;
99 99
100template<> 100template <>
101struct DebugData<false> { 101struct DebugData<false> {
102 // TODO: Hide these behind and interface and move them to DebugData<true> 102 // TODO: Hide these behind and interface and move them to DebugData<true>
103 u32 max_offset; // maximum program counter ever reached 103 u32 max_offset; // maximum program counter ever reached
104 u32 max_opdesc_id; // maximum swizzle pattern index ever used 104 u32 max_opdesc_id; // maximum swizzle pattern index ever used
105}; 105};
106 106
107template<> 107template <>
108struct DebugData<true> { 108struct DebugData<true> {
109 // Records store the input and output operands of a particular instruction. 109 // Records store the input and output operands of a particular instruction.
110 struct Record { 110 struct Record {
111 enum Type { 111 enum Type {
112 // Floating point arithmetic operands 112 // Floating point arithmetic operands
113 SRC1 = 0x1, 113 SRC1 = 0x1,
114 SRC2 = 0x2, 114 SRC2 = 0x2,
115 SRC3 = 0x4, 115 SRC3 = 0x4,
116 116
117 // Initial and final output operand value 117 // Initial and final output operand value
118 DEST_IN = 0x8, 118 DEST_IN = 0x8,
119 DEST_OUT = 0x10, 119 DEST_OUT = 0x10,
120 120
121 // Current and next instruction offset (in words) 121 // Current and next instruction offset (in words)
122 CUR_INSTR = 0x20, 122 CUR_INSTR = 0x20,
123 NEXT_INSTR = 0x40, 123 NEXT_INSTR = 0x40,
124 124
125 // Output address register value 125 // Output address register value
126 ADDR_REG_OUT = 0x80, 126 ADDR_REG_OUT = 0x80,
127 127
128 // Result of a comparison instruction 128 // Result of a comparison instruction
129 CMP_RESULT = 0x100, 129 CMP_RESULT = 0x100,
130 130
131 // Input values for conditional flow control instructions 131 // Input values for conditional flow control instructions
132 COND_BOOL_IN = 0x200, 132 COND_BOOL_IN = 0x200,
133 COND_CMP_IN = 0x400, 133 COND_CMP_IN = 0x400,
134 134
135 // Input values for a loop 135 // Input values for a loop
136 LOOP_INT_IN = 0x800, 136 LOOP_INT_IN = 0x800,
137 }; 137 };
138 138
139 Math::Vec4<float24> src1; 139 Math::Vec4<float24> src1;
@@ -156,7 +156,7 @@ struct DebugData<true> {
156 unsigned mask = 0; 156 unsigned mask = 0;
157 }; 157 };
158 158
159 u32 max_offset; // maximum program counter ever reached 159 u32 max_offset; // maximum program counter ever reached
160 u32 max_opdesc_id; // maximum swizzle pattern index ever used 160 u32 max_opdesc_id; // maximum swizzle pattern index ever used
161 161
162 // List of records for each executed shader instruction 162 // List of records for each executed shader instruction
@@ -167,10 +167,10 @@ struct DebugData<true> {
167using DebugDataRecord = DebugData<true>::Record; 167using DebugDataRecord = DebugData<true>::Record;
168 168
169// Helper function to set a DebugData<true>::Record field based on the template enum parameter. 169// Helper function to set a DebugData<true>::Record field based on the template enum parameter.
170template<DebugDataRecord::Type type, typename ValueType> 170template <DebugDataRecord::Type type, typename ValueType>
171inline void SetField(DebugDataRecord& record, ValueType value); 171inline void SetField(DebugDataRecord& record, ValueType value);
172 172
173template<> 173template <>
174inline void SetField<DebugDataRecord::SRC1>(DebugDataRecord& record, float24* value) { 174inline void SetField<DebugDataRecord::SRC1>(DebugDataRecord& record, float24* value) {
175 record.src1.x = value[0]; 175 record.src1.x = value[0];
176 record.src1.y = value[1]; 176 record.src1.y = value[1];
@@ -178,7 +178,7 @@ inline void SetField<DebugDataRecord::SRC1>(DebugDataRecord& record, float24* va
178 record.src1.w = value[3]; 178 record.src1.w = value[3];
179} 179}
180 180
181template<> 181template <>
182inline void SetField<DebugDataRecord::SRC2>(DebugDataRecord& record, float24* value) { 182inline void SetField<DebugDataRecord::SRC2>(DebugDataRecord& record, float24* value) {
183 record.src2.x = value[0]; 183 record.src2.x = value[0];
184 record.src2.y = value[1]; 184 record.src2.y = value[1];
@@ -186,7 +186,7 @@ inline void SetField<DebugDataRecord::SRC2>(DebugDataRecord& record, float24* va
186 record.src2.w = value[3]; 186 record.src2.w = value[3];
187} 187}
188 188
189template<> 189template <>
190inline void SetField<DebugDataRecord::SRC3>(DebugDataRecord& record, float24* value) { 190inline void SetField<DebugDataRecord::SRC3>(DebugDataRecord& record, float24* value) {
191 record.src3.x = value[0]; 191 record.src3.x = value[0];
192 record.src3.y = value[1]; 192 record.src3.y = value[1];
@@ -194,7 +194,7 @@ inline void SetField<DebugDataRecord::SRC3>(DebugDataRecord& record, float24* va
194 record.src3.w = value[3]; 194 record.src3.w = value[3];
195} 195}
196 196
197template<> 197template <>
198inline void SetField<DebugDataRecord::DEST_IN>(DebugDataRecord& record, float24* value) { 198inline void SetField<DebugDataRecord::DEST_IN>(DebugDataRecord& record, float24* value) {
199 record.dest_in.x = value[0]; 199 record.dest_in.x = value[0];
200 record.dest_in.y = value[1]; 200 record.dest_in.y = value[1];
@@ -202,7 +202,7 @@ inline void SetField<DebugDataRecord::DEST_IN>(DebugDataRecord& record, float24*
202 record.dest_in.w = value[3]; 202 record.dest_in.w = value[3];
203} 203}
204 204
205template<> 205template <>
206inline void SetField<DebugDataRecord::DEST_OUT>(DebugDataRecord& record, float24* value) { 206inline void SetField<DebugDataRecord::DEST_OUT>(DebugDataRecord& record, float24* value) {
207 record.dest_out.x = value[0]; 207 record.dest_out.x = value[0];
208 record.dest_out.y = value[1]; 208 record.dest_out.y = value[1];
@@ -210,67 +210,66 @@ inline void SetField<DebugDataRecord::DEST_OUT>(DebugDataRecord& record, float24
210 record.dest_out.w = value[3]; 210 record.dest_out.w = value[3];
211} 211}
212 212
213template<> 213template <>
214inline void SetField<DebugDataRecord::ADDR_REG_OUT>(DebugDataRecord& record, s32* value) { 214inline void SetField<DebugDataRecord::ADDR_REG_OUT>(DebugDataRecord& record, s32* value) {
215 record.address_registers[0] = value[0]; 215 record.address_registers[0] = value[0];
216 record.address_registers[1] = value[1]; 216 record.address_registers[1] = value[1];
217} 217}
218 218
219template<> 219template <>
220inline void SetField<DebugDataRecord::CMP_RESULT>(DebugDataRecord& record, bool* value) { 220inline void SetField<DebugDataRecord::CMP_RESULT>(DebugDataRecord& record, bool* value) {
221 record.conditional_code[0] = value[0]; 221 record.conditional_code[0] = value[0];
222 record.conditional_code[1] = value[1]; 222 record.conditional_code[1] = value[1];
223} 223}
224 224
225template<> 225template <>
226inline void SetField<DebugDataRecord::COND_BOOL_IN>(DebugDataRecord& record, bool value) { 226inline void SetField<DebugDataRecord::COND_BOOL_IN>(DebugDataRecord& record, bool value) {
227 record.cond_bool = value; 227 record.cond_bool = value;
228} 228}
229 229
230template<> 230template <>
231inline void SetField<DebugDataRecord::COND_CMP_IN>(DebugDataRecord& record, bool* value) { 231inline void SetField<DebugDataRecord::COND_CMP_IN>(DebugDataRecord& record, bool* value) {
232 record.cond_cmp[0] = value[0]; 232 record.cond_cmp[0] = value[0];
233 record.cond_cmp[1] = value[1]; 233 record.cond_cmp[1] = value[1];
234} 234}
235 235
236template<> 236template <>
237inline void SetField<DebugDataRecord::LOOP_INT_IN>(DebugDataRecord& record, Math::Vec4<u8> value) { 237inline void SetField<DebugDataRecord::LOOP_INT_IN>(DebugDataRecord& record, Math::Vec4<u8> value) {
238 record.loop_int = value; 238 record.loop_int = value;
239} 239}
240 240
241template<> 241template <>
242inline void SetField<DebugDataRecord::CUR_INSTR>(DebugDataRecord& record, u32 value) { 242inline void SetField<DebugDataRecord::CUR_INSTR>(DebugDataRecord& record, u32 value) {
243 record.instruction_offset = value; 243 record.instruction_offset = value;
244} 244}
245 245
246template<> 246template <>
247inline void SetField<DebugDataRecord::NEXT_INSTR>(DebugDataRecord& record, u32 value) { 247inline void SetField<DebugDataRecord::NEXT_INSTR>(DebugDataRecord& record, u32 value) {
248 record.next_instruction = value; 248 record.next_instruction = value;
249} 249}
250 250
251// Helper function to set debug information on the current shader iteration. 251// Helper function to set debug information on the current shader iteration.
252template<DebugDataRecord::Type type, typename ValueType> 252template <DebugDataRecord::Type type, typename ValueType>
253inline void Record(DebugData<false>& debug_data, u32 offset, ValueType value) { 253inline void Record(DebugData<false>& debug_data, u32 offset, ValueType value) {
254 // Debugging disabled => nothing to do 254 // Debugging disabled => nothing to do
255} 255}
256 256
257template<DebugDataRecord::Type type, typename ValueType> 257template <DebugDataRecord::Type type, typename ValueType>
258inline void Record(DebugData<true>& debug_data, u32 offset, ValueType value) { 258inline void Record(DebugData<true>& debug_data, u32 offset, ValueType value) {
259 if (offset >= debug_data.records.size()) 259 if (offset >= debug_data.records.size())
260 debug_data.records.resize(offset + 1); 260 debug_data.records.resize(offset + 1);
261 261
262 SetField<type, ValueType>(debug_data.records[offset], value); 262 SetField<type, ValueType>(debug_data.records[offset], value);
263 debug_data.records[offset].mask |= type; 263 debug_data.records[offset].mask |= type;
264} 264}
265 265
266
267/** 266/**
268 * This structure contains the state information that needs to be unique for a shader unit. The 3DS 267 * This structure contains the state information that needs to be unique for a shader unit. The 3DS
269 * has four shader units that process shaders in parallel. At the present, Citra only implements a 268 * has four shader units that process shaders in parallel. At the present, Citra only implements a
270 * single shader unit that processes all shaders serially. Putting the state information in a struct 269 * single shader unit that processes all shaders serially. Putting the state information in a struct
271 * here will make it easier for us to parallelize the shader processing later. 270 * here will make it easier for us to parallelize the shader processing later.
272 */ 271 */
273template<bool Debug> 272template <bool Debug>
274struct UnitState { 273struct UnitState {
275 struct Registers { 274 struct Registers {
276 // The registers are accessed by the shader JIT using SSE instructions, and are therefore 275 // The registers are accessed by the shader JIT using SSE instructions, and are therefore
@@ -293,10 +292,12 @@ struct UnitState {
293 static size_t InputOffset(const SourceRegister& reg) { 292 static size_t InputOffset(const SourceRegister& reg) {
294 switch (reg.GetRegisterType()) { 293 switch (reg.GetRegisterType()) {
295 case RegisterType::Input: 294 case RegisterType::Input:
296 return offsetof(UnitState, registers.input) + reg.GetIndex()*sizeof(Math::Vec4<float24>); 295 return offsetof(UnitState, registers.input) +
296 reg.GetIndex() * sizeof(Math::Vec4<float24>);
297 297
298 case RegisterType::Temporary: 298 case RegisterType::Temporary:
299 return offsetof(UnitState, registers.temporary) + reg.GetIndex()*sizeof(Math::Vec4<float24>); 299 return offsetof(UnitState, registers.temporary) +
300 reg.GetIndex() * sizeof(Math::Vec4<float24>);
300 301
301 default: 302 default:
302 UNREACHABLE(); 303 UNREACHABLE();
@@ -307,10 +308,12 @@ struct UnitState {
307 static size_t OutputOffset(const DestRegister& reg) { 308 static size_t OutputOffset(const DestRegister& reg) {
308 switch (reg.GetRegisterType()) { 309 switch (reg.GetRegisterType()) {
309 case RegisterType::Output: 310 case RegisterType::Output:
310 return offsetof(UnitState, output_registers.value) + reg.GetIndex()*sizeof(Math::Vec4<float24>); 311 return offsetof(UnitState, output_registers.value) +
312 reg.GetIndex() * sizeof(Math::Vec4<float24>);
311 313
312 case RegisterType::Temporary: 314 case RegisterType::Temporary:
313 return offsetof(UnitState, registers.temporary) + reg.GetIndex()*sizeof(Math::Vec4<float24>); 315 return offsetof(UnitState, registers.temporary) +
316 reg.GetIndex() * sizeof(Math::Vec4<float24>);
314 317
315 default: 318 default:
316 UNREACHABLE(); 319 UNREACHABLE();
@@ -336,13 +339,13 @@ struct ShaderSetup {
336 static size_t UniformOffset(RegisterType type, unsigned index) { 339 static size_t UniformOffset(RegisterType type, unsigned index) {
337 switch (type) { 340 switch (type) {
338 case RegisterType::FloatUniform: 341 case RegisterType::FloatUniform:
339 return offsetof(ShaderSetup, uniforms.f) + index*sizeof(Math::Vec4<float24>); 342 return offsetof(ShaderSetup, uniforms.f) + index * sizeof(Math::Vec4<float24>);
340 343
341 case RegisterType::BoolUniform: 344 case RegisterType::BoolUniform:
342 return offsetof(ShaderSetup, uniforms.b) + index*sizeof(bool); 345 return offsetof(ShaderSetup, uniforms.b) + index * sizeof(bool);
343 346
344 case RegisterType::IntUniform: 347 case RegisterType::IntUniform:
345 return offsetof(ShaderSetup, uniforms.i) + index*sizeof(Math::Vec4<u8>); 348 return offsetof(ShaderSetup, uniforms.i) + index * sizeof(Math::Vec4<u8>);
346 349
347 default: 350 default:
348 UNREACHABLE(); 351 UNREACHABLE();
@@ -354,7 +357,8 @@ struct ShaderSetup {
354 std::array<u32, 1024> swizzle_data; 357 std::array<u32, 1024> swizzle_data;
355 358
356 /** 359 /**
357 * Performs any shader unit setup that only needs to happen once per shader (as opposed to once per 360 * Performs any shader unit setup that only needs to happen once per shader (as opposed to once
361 * per
358 * vertex, which would happen within the `Run` function). 362 * vertex, which would happen within the `Run` function).
359 */ 363 */
360 void Setup(); 364 void Setup();
@@ -375,8 +379,8 @@ struct ShaderSetup {
375 * @param setup Setup object for the shader pipeline 379 * @param setup Setup object for the shader pipeline
376 * @return Debug information for this shader with regards to the given vertex 380 * @return Debug information for this shader with regards to the given vertex
377 */ 381 */
378 DebugData<true> ProduceDebugInfo(const InputVertex& input, int num_attributes, const Regs::ShaderConfig& config, const ShaderSetup& setup); 382 DebugData<true> ProduceDebugInfo(const InputVertex& input, int num_attributes,
379 383 const Regs::ShaderConfig& config, const ShaderSetup& setup);
380}; 384};
381 385
382} // namespace Shader 386} // namespace Shader
diff --git a/src/video_core/shader/shader_interpreter.cpp b/src/video_core/shader/shader_interpreter.cpp
index f6c86a759..681ff9728 100644
--- a/src/video_core/shader/shader_interpreter.cpp
+++ b/src/video_core/shader/shader_interpreter.cpp
@@ -40,7 +40,7 @@ struct CallStackElement {
40 u32 loop_address; // The address where we'll return to after each loop iteration 40 u32 loop_address; // The address where we'll return to after each loop iteration
41}; 41};
42 42
43template<bool Debug> 43template <bool Debug>
44void RunInterpreter(const ShaderSetup& setup, UnitState<Debug>& state, unsigned offset) { 44void RunInterpreter(const ShaderSetup& setup, UnitState<Debug>& state, unsigned offset) {
45 // TODO: Is there a maximal size for this? 45 // TODO: Is there a maximal size for this?
46 boost::container::static_vector<CallStackElement, 16> call_stack; 46 boost::container::static_vector<CallStackElement, 16> call_stack;
@@ -74,14 +74,18 @@ void RunInterpreter(const ShaderSetup& setup, UnitState<Debug>& state, unsigned
74 } 74 }
75 } 75 }
76 76
77 const Instruction instr = { program_code[program_counter] }; 77 const Instruction instr = {program_code[program_counter]};
78 const SwizzlePattern swizzle = { swizzle_data[instr.common.operand_desc_id] }; 78 const SwizzlePattern swizzle = {swizzle_data[instr.common.operand_desc_id]};
79 79
80 auto call = [&program_counter, &call_stack](UnitState<Debug>& state, u32 offset, u32 num_instructions, 80 auto call = [&program_counter, &call_stack](UnitState<Debug>& state, u32 offset,
81 u32 return_offset, u8 repeat_count, u8 loop_increment) { 81 u32 num_instructions, u32 return_offset,
82 program_counter = offset - 1; // -1 to make sure when incrementing the PC we end up at the correct offset 82 u8 repeat_count, u8 loop_increment) {
83 program_counter =
84 offset -
85 1; // -1 to make sure when incrementing the PC we end up at the correct offset
83 ASSERT(call_stack.size() < call_stack.capacity()); 86 ASSERT(call_stack.size() < call_stack.capacity());
84 call_stack.push_back({ offset + num_instructions, return_offset, repeat_count, loop_increment, offset }); 87 call_stack.push_back(
88 {offset + num_instructions, return_offset, repeat_count, loop_increment, offset});
85 }; 89 };
86 Record<DebugDataRecord::CUR_INSTR>(state.debug, iteration, program_counter); 90 Record<DebugDataRecord::CUR_INSTR>(state.debug, iteration, program_counter);
87 if (iteration > 0) 91 if (iteration > 0)
@@ -106,24 +110,26 @@ void RunInterpreter(const ShaderSetup& setup, UnitState<Debug>& state, unsigned
106 }; 110 };
107 111
108 switch (instr.opcode.Value().GetInfo().type) { 112 switch (instr.opcode.Value().GetInfo().type) {
109 case OpCode::Type::Arithmetic: 113 case OpCode::Type::Arithmetic: {
110 { 114 const bool is_inverted =
111 const bool is_inverted = (0 != (instr.opcode.Value().GetInfo().subtype & OpCode::Info::SrcInversed)); 115 (0 != (instr.opcode.Value().GetInfo().subtype & OpCode::Info::SrcInversed));
112 116
113 const int address_offset = (instr.common.address_register_index == 0) 117 const int address_offset =
114 ? 0 : state.address_registers[instr.common.address_register_index - 1]; 118 (instr.common.address_register_index == 0)
119 ? 0
120 : state.address_registers[instr.common.address_register_index - 1];
115 121
116 const float24* src1_ = LookupSourceRegister(instr.common.GetSrc1(is_inverted) + (!is_inverted * address_offset)); 122 const float24* src1_ = LookupSourceRegister(instr.common.GetSrc1(is_inverted) +
117 const float24* src2_ = LookupSourceRegister(instr.common.GetSrc2(is_inverted) + ( is_inverted * address_offset)); 123 (!is_inverted * address_offset));
124 const float24* src2_ = LookupSourceRegister(instr.common.GetSrc2(is_inverted) +
125 (is_inverted * address_offset));
118 126
119 const bool negate_src1 = ((bool)swizzle.negate_src1 != false); 127 const bool negate_src1 = ((bool)swizzle.negate_src1 != false);
120 const bool negate_src2 = ((bool)swizzle.negate_src2 != false); 128 const bool negate_src2 = ((bool)swizzle.negate_src2 != false);
121 129
122 float24 src1[4] = { 130 float24 src1[4] = {
123 src1_[(int)swizzle.GetSelectorSrc1(0)], 131 src1_[(int)swizzle.GetSelectorSrc1(0)], src1_[(int)swizzle.GetSelectorSrc1(1)],
124 src1_[(int)swizzle.GetSelectorSrc1(1)], 132 src1_[(int)swizzle.GetSelectorSrc1(2)], src1_[(int)swizzle.GetSelectorSrc1(3)],
125 src1_[(int)swizzle.GetSelectorSrc1(2)],
126 src1_[(int)swizzle.GetSelectorSrc1(3)],
127 }; 133 };
128 if (negate_src1) { 134 if (negate_src1) {
129 src1[0] = src1[0] * float24::FromFloat32(-1); 135 src1[0] = src1[0] * float24::FromFloat32(-1);
@@ -132,10 +138,8 @@ void RunInterpreter(const ShaderSetup& setup, UnitState<Debug>& state, unsigned
132 src1[3] = src1[3] * float24::FromFloat32(-1); 138 src1[3] = src1[3] * float24::FromFloat32(-1);
133 } 139 }
134 float24 src2[4] = { 140 float24 src2[4] = {
135 src2_[(int)swizzle.GetSelectorSrc2(0)], 141 src2_[(int)swizzle.GetSelectorSrc2(0)], src2_[(int)swizzle.GetSelectorSrc2(1)],
136 src2_[(int)swizzle.GetSelectorSrc2(1)], 142 src2_[(int)swizzle.GetSelectorSrc2(2)], src2_[(int)swizzle.GetSelectorSrc2(3)],
137 src2_[(int)swizzle.GetSelectorSrc2(2)],
138 src2_[(int)swizzle.GetSelectorSrc2(3)],
139 }; 143 };
140 if (negate_src2) { 144 if (negate_src2) {
141 src2[0] = src2[0] * float24::FromFloat32(-1); 145 src2[0] = src2[0] * float24::FromFloat32(-1);
@@ -144,15 +148,18 @@ void RunInterpreter(const ShaderSetup& setup, UnitState<Debug>& state, unsigned
144 src2[3] = src2[3] * float24::FromFloat32(-1); 148 src2[3] = src2[3] * float24::FromFloat32(-1);
145 } 149 }
146 150
147 float24* dest = (instr.common.dest.Value() < 0x10) ? &state.output_registers.value[instr.common.dest.Value().GetIndex()][0] 151 float24* dest =
148 : (instr.common.dest.Value() < 0x20) ? &state.registers.temporary[instr.common.dest.Value().GetIndex()][0] 152 (instr.common.dest.Value() < 0x10)
149 : dummy_vec4_float24; 153 ? &state.output_registers.value[instr.common.dest.Value().GetIndex()][0]
154 : (instr.common.dest.Value() < 0x20)
155 ? &state.registers.temporary[instr.common.dest.Value().GetIndex()][0]
156 : dummy_vec4_float24;
150 157
151 state.debug.max_opdesc_id = std::max<u32>(state.debug.max_opdesc_id, 1+instr.common.operand_desc_id); 158 state.debug.max_opdesc_id =
159 std::max<u32>(state.debug.max_opdesc_id, 1 + instr.common.operand_desc_id);
152 160
153 switch (instr.opcode.Value().EffectiveOpCode()) { 161 switch (instr.opcode.Value().EffectiveOpCode()) {
154 case OpCode::Id::ADD: 162 case OpCode::Id::ADD: {
155 {
156 Record<DebugDataRecord::SRC1>(state.debug, iteration, src1); 163 Record<DebugDataRecord::SRC1>(state.debug, iteration, src1);
157 Record<DebugDataRecord::SRC2>(state.debug, iteration, src2); 164 Record<DebugDataRecord::SRC2>(state.debug, iteration, src2);
158 Record<DebugDataRecord::DEST_IN>(state.debug, iteration, dest); 165 Record<DebugDataRecord::DEST_IN>(state.debug, iteration, dest);
@@ -166,8 +173,7 @@ void RunInterpreter(const ShaderSetup& setup, UnitState<Debug>& state, unsigned
166 break; 173 break;
167 } 174 }
168 175
169 case OpCode::Id::MUL: 176 case OpCode::Id::MUL: {
170 {
171 Record<DebugDataRecord::SRC1>(state.debug, iteration, src1); 177 Record<DebugDataRecord::SRC1>(state.debug, iteration, src1);
172 Record<DebugDataRecord::SRC2>(state.debug, iteration, src2); 178 Record<DebugDataRecord::SRC2>(state.debug, iteration, src2);
173 Record<DebugDataRecord::DEST_IN>(state.debug, iteration, dest); 179 Record<DebugDataRecord::DEST_IN>(state.debug, iteration, dest);
@@ -228,8 +234,7 @@ void RunInterpreter(const ShaderSetup& setup, UnitState<Debug>& state, unsigned
228 case OpCode::Id::DP3: 234 case OpCode::Id::DP3:
229 case OpCode::Id::DP4: 235 case OpCode::Id::DP4:
230 case OpCode::Id::DPH: 236 case OpCode::Id::DPH:
231 case OpCode::Id::DPHI: 237 case OpCode::Id::DPHI: {
232 {
233 Record<DebugDataRecord::SRC1>(state.debug, iteration, src1); 238 Record<DebugDataRecord::SRC1>(state.debug, iteration, src1);
234 Record<DebugDataRecord::SRC2>(state.debug, iteration, src2); 239 Record<DebugDataRecord::SRC2>(state.debug, iteration, src2);
235 Record<DebugDataRecord::DEST_IN>(state.debug, iteration, dest); 240 Record<DebugDataRecord::DEST_IN>(state.debug, iteration, dest);
@@ -239,7 +244,8 @@ void RunInterpreter(const ShaderSetup& setup, UnitState<Debug>& state, unsigned
239 src1[3] = float24::FromFloat32(1.0f); 244 src1[3] = float24::FromFloat32(1.0f);
240 245
241 int num_components = (opcode == OpCode::Id::DP3) ? 3 : 4; 246 int num_components = (opcode == OpCode::Id::DP3) ? 3 : 4;
242 float24 dot = std::inner_product(src1, src1 + num_components, src2, float24::FromFloat32(0.f)); 247 float24 dot = std::inner_product(src1, src1 + num_components, src2,
248 float24::FromFloat32(0.f));
243 249
244 for (int i = 0; i < 4; ++i) { 250 for (int i = 0; i < 4; ++i) {
245 if (!swizzle.DestComponentEnabled(i)) 251 if (!swizzle.DestComponentEnabled(i))
@@ -252,8 +258,7 @@ void RunInterpreter(const ShaderSetup& setup, UnitState<Debug>& state, unsigned
252 } 258 }
253 259
254 // Reciprocal 260 // Reciprocal
255 case OpCode::Id::RCP: 261 case OpCode::Id::RCP: {
256 {
257 Record<DebugDataRecord::SRC1>(state.debug, iteration, src1); 262 Record<DebugDataRecord::SRC1>(state.debug, iteration, src1);
258 Record<DebugDataRecord::DEST_IN>(state.debug, iteration, dest); 263 Record<DebugDataRecord::DEST_IN>(state.debug, iteration, dest);
259 float24 rcp_res = float24::FromFloat32(1.0f / src1[0].ToFloat32()); 264 float24 rcp_res = float24::FromFloat32(1.0f / src1[0].ToFloat32());
@@ -268,8 +273,7 @@ void RunInterpreter(const ShaderSetup& setup, UnitState<Debug>& state, unsigned
268 } 273 }
269 274
270 // Reciprocal Square Root 275 // Reciprocal Square Root
271 case OpCode::Id::RSQ: 276 case OpCode::Id::RSQ: {
272 {
273 Record<DebugDataRecord::SRC1>(state.debug, iteration, src1); 277 Record<DebugDataRecord::SRC1>(state.debug, iteration, src1);
274 Record<DebugDataRecord::DEST_IN>(state.debug, iteration, dest); 278 Record<DebugDataRecord::DEST_IN>(state.debug, iteration, dest);
275 float24 rsq_res = float24::FromFloat32(1.0f / std::sqrt(src1[0].ToFloat32())); 279 float24 rsq_res = float24::FromFloat32(1.0f / std::sqrt(src1[0].ToFloat32()));
@@ -283,8 +287,7 @@ void RunInterpreter(const ShaderSetup& setup, UnitState<Debug>& state, unsigned
283 break; 287 break;
284 } 288 }
285 289
286 case OpCode::Id::MOVA: 290 case OpCode::Id::MOVA: {
287 {
288 Record<DebugDataRecord::SRC1>(state.debug, iteration, src1); 291 Record<DebugDataRecord::SRC1>(state.debug, iteration, src1);
289 for (int i = 0; i < 2; ++i) { 292 for (int i = 0; i < 2; ++i) {
290 if (!swizzle.DestComponentEnabled(i)) 293 if (!swizzle.DestComponentEnabled(i))
@@ -293,12 +296,12 @@ void RunInterpreter(const ShaderSetup& setup, UnitState<Debug>& state, unsigned
293 // TODO: Figure out how the rounding is done on hardware 296 // TODO: Figure out how the rounding is done on hardware
294 state.address_registers[i] = static_cast<s32>(src1[i].ToFloat32()); 297 state.address_registers[i] = static_cast<s32>(src1[i].ToFloat32());
295 } 298 }
296 Record<DebugDataRecord::ADDR_REG_OUT>(state.debug, iteration, state.address_registers); 299 Record<DebugDataRecord::ADDR_REG_OUT>(state.debug, iteration,
300 state.address_registers);
297 break; 301 break;
298 } 302 }
299 303
300 case OpCode::Id::MOV: 304 case OpCode::Id::MOV: {
301 {
302 Record<DebugDataRecord::SRC1>(state.debug, iteration, src1); 305 Record<DebugDataRecord::SRC1>(state.debug, iteration, src1);
303 Record<DebugDataRecord::DEST_IN>(state.debug, iteration, dest); 306 Record<DebugDataRecord::DEST_IN>(state.debug, iteration, dest);
304 for (int i = 0; i < 4; ++i) { 307 for (int i = 0; i < 4; ++i) {
@@ -320,7 +323,8 @@ void RunInterpreter(const ShaderSetup& setup, UnitState<Debug>& state, unsigned
320 if (!swizzle.DestComponentEnabled(i)) 323 if (!swizzle.DestComponentEnabled(i))
321 continue; 324 continue;
322 325
323 dest[i] = (src1[i] >= src2[i]) ? float24::FromFloat32(1.0f) : float24::FromFloat32(0.0f); 326 dest[i] = (src1[i] >= src2[i]) ? float24::FromFloat32(1.0f)
327 : float24::FromFloat32(0.0f);
324 } 328 }
325 Record<DebugDataRecord::DEST_OUT>(state.debug, iteration, dest); 329 Record<DebugDataRecord::DEST_OUT>(state.debug, iteration, dest);
326 break; 330 break;
@@ -334,7 +338,8 @@ void RunInterpreter(const ShaderSetup& setup, UnitState<Debug>& state, unsigned
334 if (!swizzle.DestComponentEnabled(i)) 338 if (!swizzle.DestComponentEnabled(i))
335 continue; 339 continue;
336 340
337 dest[i] = (src1[i] < src2[i]) ? float24::FromFloat32(1.0f) : float24::FromFloat32(0.0f); 341 dest[i] = (src1[i] < src2[i]) ? float24::FromFloat32(1.0f)
342 : float24::FromFloat32(0.0f);
338 } 343 }
339 Record<DebugDataRecord::DEST_OUT>(state.debug, iteration, dest); 344 Record<DebugDataRecord::DEST_OUT>(state.debug, iteration, dest);
340 break; 345 break;
@@ -349,40 +354,39 @@ void RunInterpreter(const ShaderSetup& setup, UnitState<Debug>& state, unsigned
349 auto op = (i == 0) ? compare_op.x.Value() : compare_op.y.Value(); 354 auto op = (i == 0) ? compare_op.x.Value() : compare_op.y.Value();
350 355
351 switch (op) { 356 switch (op) {
352 case Instruction::Common::CompareOpType::Equal: 357 case Instruction::Common::CompareOpType::Equal:
353 state.conditional_code[i] = (src1[i] == src2[i]); 358 state.conditional_code[i] = (src1[i] == src2[i]);
354 break; 359 break;
355 360
356 case Instruction::Common::CompareOpType::NotEqual: 361 case Instruction::Common::CompareOpType::NotEqual:
357 state.conditional_code[i] = (src1[i] != src2[i]); 362 state.conditional_code[i] = (src1[i] != src2[i]);
358 break; 363 break;
359 364
360 case Instruction::Common::CompareOpType::LessThan: 365 case Instruction::Common::CompareOpType::LessThan:
361 state.conditional_code[i] = (src1[i] < src2[i]); 366 state.conditional_code[i] = (src1[i] < src2[i]);
362 break; 367 break;
363 368
364 case Instruction::Common::CompareOpType::LessEqual: 369 case Instruction::Common::CompareOpType::LessEqual:
365 state.conditional_code[i] = (src1[i] <= src2[i]); 370 state.conditional_code[i] = (src1[i] <= src2[i]);
366 break; 371 break;
367 372
368 case Instruction::Common::CompareOpType::GreaterThan: 373 case Instruction::Common::CompareOpType::GreaterThan:
369 state.conditional_code[i] = (src1[i] > src2[i]); 374 state.conditional_code[i] = (src1[i] > src2[i]);
370 break; 375 break;
371 376
372 case Instruction::Common::CompareOpType::GreaterEqual: 377 case Instruction::Common::CompareOpType::GreaterEqual:
373 state.conditional_code[i] = (src1[i] >= src2[i]); 378 state.conditional_code[i] = (src1[i] >= src2[i]);
374 break; 379 break;
375 380
376 default: 381 default:
377 LOG_ERROR(HW_GPU, "Unknown compare mode %x", static_cast<int>(op)); 382 LOG_ERROR(HW_GPU, "Unknown compare mode %x", static_cast<int>(op));
378 break; 383 break;
379 } 384 }
380 } 385 }
381 Record<DebugDataRecord::CMP_RESULT>(state.debug, iteration, state.conditional_code); 386 Record<DebugDataRecord::CMP_RESULT>(state.debug, iteration, state.conditional_code);
382 break; 387 break;
383 388
384 case OpCode::Id::EX2: 389 case OpCode::Id::EX2: {
385 {
386 Record<DebugDataRecord::SRC1>(state.debug, iteration, src1); 390 Record<DebugDataRecord::SRC1>(state.debug, iteration, src1);
387 Record<DebugDataRecord::DEST_IN>(state.debug, iteration, dest); 391 Record<DebugDataRecord::DEST_IN>(state.debug, iteration, dest);
388 392
@@ -399,8 +403,7 @@ void RunInterpreter(const ShaderSetup& setup, UnitState<Debug>& state, unsigned
399 break; 403 break;
400 } 404 }
401 405
402 case OpCode::Id::LG2: 406 case OpCode::Id::LG2: {
403 {
404 Record<DebugDataRecord::SRC1>(state.debug, iteration, src1); 407 Record<DebugDataRecord::SRC1>(state.debug, iteration, src1);
405 Record<DebugDataRecord::DEST_IN>(state.debug, iteration, dest); 408 Record<DebugDataRecord::DEST_IN>(state.debug, iteration, dest);
406 409
@@ -419,7 +422,8 @@ void RunInterpreter(const ShaderSetup& setup, UnitState<Debug>& state, unsigned
419 422
420 default: 423 default:
421 LOG_ERROR(HW_GPU, "Unhandled arithmetic instruction: 0x%02x (%s): 0x%08x", 424 LOG_ERROR(HW_GPU, "Unhandled arithmetic instruction: 0x%02x (%s): 0x%08x",
422 (int)instr.opcode.Value().EffectiveOpCode(), instr.opcode.Value().GetInfo().name, instr.hex); 425 (int)instr.opcode.Value().EffectiveOpCode(),
426 instr.opcode.Value().GetInfo().name, instr.hex);
423 DEBUG_ASSERT(false); 427 DEBUG_ASSERT(false);
424 break; 428 break;
425 } 429 }
@@ -427,30 +431,32 @@ void RunInterpreter(const ShaderSetup& setup, UnitState<Debug>& state, unsigned
427 break; 431 break;
428 } 432 }
429 433
430 case OpCode::Type::MultiplyAdd: 434 case OpCode::Type::MultiplyAdd: {
431 {
432 if ((instr.opcode.Value().EffectiveOpCode() == OpCode::Id::MAD) || 435 if ((instr.opcode.Value().EffectiveOpCode() == OpCode::Id::MAD) ||
433 (instr.opcode.Value().EffectiveOpCode() == OpCode::Id::MADI)) { 436 (instr.opcode.Value().EffectiveOpCode() == OpCode::Id::MADI)) {
434 const SwizzlePattern& swizzle = *reinterpret_cast<const SwizzlePattern*>(&swizzle_data[instr.mad.operand_desc_id]); 437 const SwizzlePattern& swizzle = *reinterpret_cast<const SwizzlePattern*>(
438 &swizzle_data[instr.mad.operand_desc_id]);
435 439
436 bool is_inverted = (instr.opcode.Value().EffectiveOpCode() == OpCode::Id::MADI); 440 bool is_inverted = (instr.opcode.Value().EffectiveOpCode() == OpCode::Id::MADI);
437 441
438 const int address_offset = (instr.mad.address_register_index == 0) 442 const int address_offset =
439 ? 0 : state.address_registers[instr.mad.address_register_index - 1]; 443 (instr.mad.address_register_index == 0)
444 ? 0
445 : state.address_registers[instr.mad.address_register_index - 1];
440 446
441 const float24* src1_ = LookupSourceRegister(instr.mad.GetSrc1(is_inverted)); 447 const float24* src1_ = LookupSourceRegister(instr.mad.GetSrc1(is_inverted));
442 const float24* src2_ = LookupSourceRegister(instr.mad.GetSrc2(is_inverted) + (!is_inverted * address_offset)); 448 const float24* src2_ = LookupSourceRegister(instr.mad.GetSrc2(is_inverted) +
443 const float24* src3_ = LookupSourceRegister(instr.mad.GetSrc3(is_inverted) + ( is_inverted * address_offset)); 449 (!is_inverted * address_offset));
450 const float24* src3_ = LookupSourceRegister(instr.mad.GetSrc3(is_inverted) +
451 (is_inverted * address_offset));
444 452
445 const bool negate_src1 = ((bool)swizzle.negate_src1 != false); 453 const bool negate_src1 = ((bool)swizzle.negate_src1 != false);
446 const bool negate_src2 = ((bool)swizzle.negate_src2 != false); 454 const bool negate_src2 = ((bool)swizzle.negate_src2 != false);
447 const bool negate_src3 = ((bool)swizzle.negate_src3 != false); 455 const bool negate_src3 = ((bool)swizzle.negate_src3 != false);
448 456
449 float24 src1[4] = { 457 float24 src1[4] = {
450 src1_[(int)swizzle.GetSelectorSrc1(0)], 458 src1_[(int)swizzle.GetSelectorSrc1(0)], src1_[(int)swizzle.GetSelectorSrc1(1)],
451 src1_[(int)swizzle.GetSelectorSrc1(1)], 459 src1_[(int)swizzle.GetSelectorSrc1(2)], src1_[(int)swizzle.GetSelectorSrc1(3)],
452 src1_[(int)swizzle.GetSelectorSrc1(2)],
453 src1_[(int)swizzle.GetSelectorSrc1(3)],
454 }; 460 };
455 if (negate_src1) { 461 if (negate_src1) {
456 src1[0] = src1[0] * float24::FromFloat32(-1); 462 src1[0] = src1[0] * float24::FromFloat32(-1);
@@ -459,10 +465,8 @@ void RunInterpreter(const ShaderSetup& setup, UnitState<Debug>& state, unsigned
459 src1[3] = src1[3] * float24::FromFloat32(-1); 465 src1[3] = src1[3] * float24::FromFloat32(-1);
460 } 466 }
461 float24 src2[4] = { 467 float24 src2[4] = {
462 src2_[(int)swizzle.GetSelectorSrc2(0)], 468 src2_[(int)swizzle.GetSelectorSrc2(0)], src2_[(int)swizzle.GetSelectorSrc2(1)],
463 src2_[(int)swizzle.GetSelectorSrc2(1)], 469 src2_[(int)swizzle.GetSelectorSrc2(2)], src2_[(int)swizzle.GetSelectorSrc2(3)],
464 src2_[(int)swizzle.GetSelectorSrc2(2)],
465 src2_[(int)swizzle.GetSelectorSrc2(3)],
466 }; 470 };
467 if (negate_src2) { 471 if (negate_src2) {
468 src2[0] = src2[0] * float24::FromFloat32(-1); 472 src2[0] = src2[0] * float24::FromFloat32(-1);
@@ -471,10 +475,8 @@ void RunInterpreter(const ShaderSetup& setup, UnitState<Debug>& state, unsigned
471 src2[3] = src2[3] * float24::FromFloat32(-1); 475 src2[3] = src2[3] * float24::FromFloat32(-1);
472 } 476 }
473 float24 src3[4] = { 477 float24 src3[4] = {
474 src3_[(int)swizzle.GetSelectorSrc3(0)], 478 src3_[(int)swizzle.GetSelectorSrc3(0)], src3_[(int)swizzle.GetSelectorSrc3(1)],
475 src3_[(int)swizzle.GetSelectorSrc3(1)], 479 src3_[(int)swizzle.GetSelectorSrc3(2)], src3_[(int)swizzle.GetSelectorSrc3(3)],
476 src3_[(int)swizzle.GetSelectorSrc3(2)],
477 src3_[(int)swizzle.GetSelectorSrc3(3)],
478 }; 480 };
479 if (negate_src3) { 481 if (negate_src3) {
480 src3[0] = src3[0] * float24::FromFloat32(-1); 482 src3[0] = src3[0] * float24::FromFloat32(-1);
@@ -483,9 +485,12 @@ void RunInterpreter(const ShaderSetup& setup, UnitState<Debug>& state, unsigned
483 src3[3] = src3[3] * float24::FromFloat32(-1); 485 src3[3] = src3[3] * float24::FromFloat32(-1);
484 } 486 }
485 487
486 float24* dest = (instr.mad.dest.Value() < 0x10) ? &state.output_registers.value[instr.mad.dest.Value().GetIndex()][0] 488 float24* dest =
487 : (instr.mad.dest.Value() < 0x20) ? &state.registers.temporary[instr.mad.dest.Value().GetIndex()][0] 489 (instr.mad.dest.Value() < 0x10)
488 : dummy_vec4_float24; 490 ? &state.output_registers.value[instr.mad.dest.Value().GetIndex()][0]
491 : (instr.mad.dest.Value() < 0x20)
492 ? &state.registers.temporary[instr.mad.dest.Value().GetIndex()][0]
493 : dummy_vec4_float24;
489 494
490 Record<DebugDataRecord::SRC1>(state.debug, iteration, src1); 495 Record<DebugDataRecord::SRC1>(state.debug, iteration, src1);
491 Record<DebugDataRecord::SRC2>(state.debug, iteration, src2); 496 Record<DebugDataRecord::SRC2>(state.debug, iteration, src2);
@@ -500,16 +505,17 @@ void RunInterpreter(const ShaderSetup& setup, UnitState<Debug>& state, unsigned
500 Record<DebugDataRecord::DEST_OUT>(state.debug, iteration, dest); 505 Record<DebugDataRecord::DEST_OUT>(state.debug, iteration, dest);
501 } else { 506 } else {
502 LOG_ERROR(HW_GPU, "Unhandled multiply-add instruction: 0x%02x (%s): 0x%08x", 507 LOG_ERROR(HW_GPU, "Unhandled multiply-add instruction: 0x%02x (%s): 0x%08x",
503 (int)instr.opcode.Value().EffectiveOpCode(), instr.opcode.Value().GetInfo().name, instr.hex); 508 (int)instr.opcode.Value().EffectiveOpCode(),
509 instr.opcode.Value().GetInfo().name, instr.hex);
504 } 510 }
505 break; 511 break;
506 } 512 }
507 513
508 default: 514 default: {
509 { 515 static auto evaluate_condition = [](const UnitState<Debug>& state, bool refx, bool refy,
510 static auto evaluate_condition = [](const UnitState<Debug>& state, bool refx, bool refy, Instruction::FlowControlType flow_control) { 516 Instruction::FlowControlType flow_control) {
511 bool results[2] = { refx == state.conditional_code[0], 517 bool results[2] = {refx == state.conditional_code[0],
512 refy == state.conditional_code[1] }; 518 refy == state.conditional_code[1]};
513 519
514 switch (flow_control.op) { 520 switch (flow_control.op) {
515 case flow_control.Or: 521 case flow_control.Or:
@@ -533,44 +539,45 @@ void RunInterpreter(const ShaderSetup& setup, UnitState<Debug>& state, unsigned
533 break; 539 break;
534 540
535 case OpCode::Id::JMPC: 541 case OpCode::Id::JMPC:
536 Record<DebugDataRecord::COND_CMP_IN>(state.debug, iteration, state.conditional_code); 542 Record<DebugDataRecord::COND_CMP_IN>(state.debug, iteration,
537 if (evaluate_condition(state, instr.flow_control.refx, instr.flow_control.refy, instr.flow_control)) { 543 state.conditional_code);
544 if (evaluate_condition(state, instr.flow_control.refx, instr.flow_control.refy,
545 instr.flow_control)) {
538 program_counter = instr.flow_control.dest_offset - 1; 546 program_counter = instr.flow_control.dest_offset - 1;
539 } 547 }
540 break; 548 break;
541 549
542 case OpCode::Id::JMPU: 550 case OpCode::Id::JMPU:
543 Record<DebugDataRecord::COND_BOOL_IN>(state.debug, iteration, uniforms.b[instr.flow_control.bool_uniform_id]); 551 Record<DebugDataRecord::COND_BOOL_IN>(
552 state.debug, iteration, uniforms.b[instr.flow_control.bool_uniform_id]);
544 553
545 if (uniforms.b[instr.flow_control.bool_uniform_id] == !(instr.flow_control.num_instructions & 1)) { 554 if (uniforms.b[instr.flow_control.bool_uniform_id] ==
555 !(instr.flow_control.num_instructions & 1)) {
546 program_counter = instr.flow_control.dest_offset - 1; 556 program_counter = instr.flow_control.dest_offset - 1;
547 } 557 }
548 break; 558 break;
549 559
550 case OpCode::Id::CALL: 560 case OpCode::Id::CALL:
551 call(state, 561 call(state, instr.flow_control.dest_offset, instr.flow_control.num_instructions,
552 instr.flow_control.dest_offset,
553 instr.flow_control.num_instructions,
554 program_counter + 1, 0, 0); 562 program_counter + 1, 0, 0);
555 break; 563 break;
556 564
557 case OpCode::Id::CALLU: 565 case OpCode::Id::CALLU:
558 Record<DebugDataRecord::COND_BOOL_IN>(state.debug, iteration, uniforms.b[instr.flow_control.bool_uniform_id]); 566 Record<DebugDataRecord::COND_BOOL_IN>(
567 state.debug, iteration, uniforms.b[instr.flow_control.bool_uniform_id]);
559 if (uniforms.b[instr.flow_control.bool_uniform_id]) { 568 if (uniforms.b[instr.flow_control.bool_uniform_id]) {
560 call(state, 569 call(state, instr.flow_control.dest_offset, instr.flow_control.num_instructions,
561 instr.flow_control.dest_offset, 570 program_counter + 1, 0, 0);
562 instr.flow_control.num_instructions,
563 program_counter + 1, 0, 0);
564 } 571 }
565 break; 572 break;
566 573
567 case OpCode::Id::CALLC: 574 case OpCode::Id::CALLC:
568 Record<DebugDataRecord::COND_CMP_IN>(state.debug, iteration, state.conditional_code); 575 Record<DebugDataRecord::COND_CMP_IN>(state.debug, iteration,
569 if (evaluate_condition(state, instr.flow_control.refx, instr.flow_control.refy, instr.flow_control)) { 576 state.conditional_code);
570 call(state, 577 if (evaluate_condition(state, instr.flow_control.refx, instr.flow_control.refy,
571 instr.flow_control.dest_offset, 578 instr.flow_control)) {
572 instr.flow_control.num_instructions, 579 call(state, instr.flow_control.dest_offset, instr.flow_control.num_instructions,
573 program_counter + 1, 0, 0); 580 program_counter + 1, 0, 0);
574 } 581 }
575 break; 582 break;
576 583
@@ -578,43 +585,42 @@ void RunInterpreter(const ShaderSetup& setup, UnitState<Debug>& state, unsigned
578 break; 585 break;
579 586
580 case OpCode::Id::IFU: 587 case OpCode::Id::IFU:
581 Record<DebugDataRecord::COND_BOOL_IN>(state.debug, iteration, uniforms.b[instr.flow_control.bool_uniform_id]); 588 Record<DebugDataRecord::COND_BOOL_IN>(
589 state.debug, iteration, uniforms.b[instr.flow_control.bool_uniform_id]);
582 if (uniforms.b[instr.flow_control.bool_uniform_id]) { 590 if (uniforms.b[instr.flow_control.bool_uniform_id]) {
583 call(state, 591 call(state, program_counter + 1,
584 program_counter + 1,
585 instr.flow_control.dest_offset - program_counter - 1, 592 instr.flow_control.dest_offset - program_counter - 1,
586 instr.flow_control.dest_offset + instr.flow_control.num_instructions, 0, 0); 593 instr.flow_control.dest_offset + instr.flow_control.num_instructions, 0,
594 0);
587 } else { 595 } else {
588 call(state, 596 call(state, instr.flow_control.dest_offset, instr.flow_control.num_instructions,
589 instr.flow_control.dest_offset, 597 instr.flow_control.dest_offset + instr.flow_control.num_instructions, 0,
590 instr.flow_control.num_instructions, 598 0);
591 instr.flow_control.dest_offset + instr.flow_control.num_instructions, 0, 0);
592 } 599 }
593 600
594 break; 601 break;
595 602
596 case OpCode::Id::IFC: 603 case OpCode::Id::IFC: {
597 {
598 // TODO: Do we need to consider swizzlers here? 604 // TODO: Do we need to consider swizzlers here?
599 605
600 Record<DebugDataRecord::COND_CMP_IN>(state.debug, iteration, state.conditional_code); 606 Record<DebugDataRecord::COND_CMP_IN>(state.debug, iteration,
601 if (evaluate_condition(state, instr.flow_control.refx, instr.flow_control.refy, instr.flow_control)) { 607 state.conditional_code);
602 call(state, 608 if (evaluate_condition(state, instr.flow_control.refx, instr.flow_control.refy,
603 program_counter + 1, 609 instr.flow_control)) {
610 call(state, program_counter + 1,
604 instr.flow_control.dest_offset - program_counter - 1, 611 instr.flow_control.dest_offset - program_counter - 1,
605 instr.flow_control.dest_offset + instr.flow_control.num_instructions, 0, 0); 612 instr.flow_control.dest_offset + instr.flow_control.num_instructions, 0,
613 0);
606 } else { 614 } else {
607 call(state, 615 call(state, instr.flow_control.dest_offset, instr.flow_control.num_instructions,
608 instr.flow_control.dest_offset, 616 instr.flow_control.dest_offset + instr.flow_control.num_instructions, 0,
609 instr.flow_control.num_instructions, 617 0);
610 instr.flow_control.dest_offset + instr.flow_control.num_instructions, 0, 0);
611 } 618 }
612 619
613 break; 620 break;
614 } 621 }
615 622
616 case OpCode::Id::LOOP: 623 case OpCode::Id::LOOP: {
617 {
618 Math::Vec4<u8> loop_param(uniforms.i[instr.flow_control.int_uniform_id].x, 624 Math::Vec4<u8> loop_param(uniforms.i[instr.flow_control.int_uniform_id].x,
619 uniforms.i[instr.flow_control.int_uniform_id].y, 625 uniforms.i[instr.flow_control.int_uniform_id].y,
620 uniforms.i[instr.flow_control.int_uniform_id].z, 626 uniforms.i[instr.flow_control.int_uniform_id].z,
@@ -622,18 +628,16 @@ void RunInterpreter(const ShaderSetup& setup, UnitState<Debug>& state, unsigned
622 state.address_registers[2] = loop_param.y; 628 state.address_registers[2] = loop_param.y;
623 629
624 Record<DebugDataRecord::LOOP_INT_IN>(state.debug, iteration, loop_param); 630 Record<DebugDataRecord::LOOP_INT_IN>(state.debug, iteration, loop_param);
625 call(state, 631 call(state, program_counter + 1,
626 program_counter + 1,
627 instr.flow_control.dest_offset - program_counter + 1, 632 instr.flow_control.dest_offset - program_counter + 1,
628 instr.flow_control.dest_offset + 1, 633 instr.flow_control.dest_offset + 1, loop_param.x, loop_param.z);
629 loop_param.x,
630 loop_param.z);
631 break; 634 break;
632 } 635 }
633 636
634 default: 637 default:
635 LOG_ERROR(HW_GPU, "Unhandled instruction: 0x%02x (%s): 0x%08x", 638 LOG_ERROR(HW_GPU, "Unhandled instruction: 0x%02x (%s): 0x%08x",
636 (int)instr.opcode.Value().EffectiveOpCode(), instr.opcode.Value().GetInfo().name, instr.hex); 639 (int)instr.opcode.Value().EffectiveOpCode(),
640 instr.opcode.Value().GetInfo().name, instr.hex);
637 break; 641 break;
638 } 642 }
639 643
diff --git a/src/video_core/shader/shader_interpreter.h b/src/video_core/shader/shader_interpreter.h
index bb3ce1c6e..48ede0a2e 100644
--- a/src/video_core/shader/shader_interpreter.h
+++ b/src/video_core/shader/shader_interpreter.h
@@ -8,9 +8,10 @@ namespace Pica {
8 8
9namespace Shader { 9namespace Shader {
10 10
11template <bool Debug> struct UnitState; 11template <bool Debug>
12struct UnitState;
12 13
13template<bool Debug> 14template <bool Debug>
14void RunInterpreter(const ShaderSetup& setup, UnitState<Debug>& state, unsigned offset); 15void RunInterpreter(const ShaderSetup& setup, UnitState<Debug>& state, unsigned offset);
15 16
16} // namespace 17} // namespace
diff --git a/src/video_core/shader/shader_jit_x64.cpp b/src/video_core/shader/shader_jit_x64.cpp
index 43e7e6b4c..04e04ba1a 100644
--- a/src/video_core/shader/shader_jit_x64.cpp
+++ b/src/video_core/shader/shader_jit_x64.cpp
@@ -31,70 +31,70 @@ using namespace Gen;
31typedef void (JitShader::*JitFunction)(Instruction instr); 31typedef void (JitShader::*JitFunction)(Instruction instr);
32 32
33const JitFunction instr_table[64] = { 33const JitFunction instr_table[64] = {
34 &JitShader::Compile_ADD, // add 34 &JitShader::Compile_ADD, // add
35 &JitShader::Compile_DP3, // dp3 35 &JitShader::Compile_DP3, // dp3
36 &JitShader::Compile_DP4, // dp4 36 &JitShader::Compile_DP4, // dp4
37 &JitShader::Compile_DPH, // dph 37 &JitShader::Compile_DPH, // dph
38 nullptr, // unknown 38 nullptr, // unknown
39 &JitShader::Compile_EX2, // ex2 39 &JitShader::Compile_EX2, // ex2
40 &JitShader::Compile_LG2, // lg2 40 &JitShader::Compile_LG2, // lg2
41 nullptr, // unknown 41 nullptr, // unknown
42 &JitShader::Compile_MUL, // mul 42 &JitShader::Compile_MUL, // mul
43 &JitShader::Compile_SGE, // sge 43 &JitShader::Compile_SGE, // sge
44 &JitShader::Compile_SLT, // slt 44 &JitShader::Compile_SLT, // slt
45 &JitShader::Compile_FLR, // flr 45 &JitShader::Compile_FLR, // flr
46 &JitShader::Compile_MAX, // max 46 &JitShader::Compile_MAX, // max
47 &JitShader::Compile_MIN, // min 47 &JitShader::Compile_MIN, // min
48 &JitShader::Compile_RCP, // rcp 48 &JitShader::Compile_RCP, // rcp
49 &JitShader::Compile_RSQ, // rsq 49 &JitShader::Compile_RSQ, // rsq
50 nullptr, // unknown 50 nullptr, // unknown
51 nullptr, // unknown 51 nullptr, // unknown
52 &JitShader::Compile_MOVA, // mova 52 &JitShader::Compile_MOVA, // mova
53 &JitShader::Compile_MOV, // mov 53 &JitShader::Compile_MOV, // mov
54 nullptr, // unknown 54 nullptr, // unknown
55 nullptr, // unknown 55 nullptr, // unknown
56 nullptr, // unknown 56 nullptr, // unknown
57 nullptr, // unknown 57 nullptr, // unknown
58 &JitShader::Compile_DPH, // dphi 58 &JitShader::Compile_DPH, // dphi
59 nullptr, // unknown 59 nullptr, // unknown
60 &JitShader::Compile_SGE, // sgei 60 &JitShader::Compile_SGE, // sgei
61 &JitShader::Compile_SLT, // slti 61 &JitShader::Compile_SLT, // slti
62 nullptr, // unknown 62 nullptr, // unknown
63 nullptr, // unknown 63 nullptr, // unknown
64 nullptr, // unknown 64 nullptr, // unknown
65 nullptr, // unknown 65 nullptr, // unknown
66 nullptr, // unknown 66 nullptr, // unknown
67 &JitShader::Compile_NOP, // nop 67 &JitShader::Compile_NOP, // nop
68 &JitShader::Compile_END, // end 68 &JitShader::Compile_END, // end
69 nullptr, // break 69 nullptr, // break
70 &JitShader::Compile_CALL, // call 70 &JitShader::Compile_CALL, // call
71 &JitShader::Compile_CALLC, // callc 71 &JitShader::Compile_CALLC, // callc
72 &JitShader::Compile_CALLU, // callu 72 &JitShader::Compile_CALLU, // callu
73 &JitShader::Compile_IF, // ifu 73 &JitShader::Compile_IF, // ifu
74 &JitShader::Compile_IF, // ifc 74 &JitShader::Compile_IF, // ifc
75 &JitShader::Compile_LOOP, // loop 75 &JitShader::Compile_LOOP, // loop
76 nullptr, // emit 76 nullptr, // emit
77 nullptr, // sete 77 nullptr, // sete
78 &JitShader::Compile_JMP, // jmpc 78 &JitShader::Compile_JMP, // jmpc
79 &JitShader::Compile_JMP, // jmpu 79 &JitShader::Compile_JMP, // jmpu
80 &JitShader::Compile_CMP, // cmp 80 &JitShader::Compile_CMP, // cmp
81 &JitShader::Compile_CMP, // cmp 81 &JitShader::Compile_CMP, // cmp
82 &JitShader::Compile_MAD, // madi 82 &JitShader::Compile_MAD, // madi
83 &JitShader::Compile_MAD, // madi 83 &JitShader::Compile_MAD, // madi
84 &JitShader::Compile_MAD, // madi 84 &JitShader::Compile_MAD, // madi
85 &JitShader::Compile_MAD, // madi 85 &JitShader::Compile_MAD, // madi
86 &JitShader::Compile_MAD, // madi 86 &JitShader::Compile_MAD, // madi
87 &JitShader::Compile_MAD, // madi 87 &JitShader::Compile_MAD, // madi
88 &JitShader::Compile_MAD, // madi 88 &JitShader::Compile_MAD, // madi
89 &JitShader::Compile_MAD, // madi 89 &JitShader::Compile_MAD, // madi
90 &JitShader::Compile_MAD, // mad 90 &JitShader::Compile_MAD, // mad
91 &JitShader::Compile_MAD, // mad 91 &JitShader::Compile_MAD, // mad
92 &JitShader::Compile_MAD, // mad 92 &JitShader::Compile_MAD, // mad
93 &JitShader::Compile_MAD, // mad 93 &JitShader::Compile_MAD, // mad
94 &JitShader::Compile_MAD, // mad 94 &JitShader::Compile_MAD, // mad
95 &JitShader::Compile_MAD, // mad 95 &JitShader::Compile_MAD, // mad
96 &JitShader::Compile_MAD, // mad 96 &JitShader::Compile_MAD, // mad
97 &JitShader::Compile_MAD, // mad 97 &JitShader::Compile_MAD, // mad
98}; 98};
99 99
100// The following is used to alias some commonly used registers. Generally, RAX-RDX and XMM0-XMM3 can 100// The following is used to alias some commonly used registers. Generally, RAX-RDX and XMM0-XMM3 can
@@ -136,9 +136,9 @@ static const X64Reg NEGBIT = XMM15;
136// State registers that must not be modified by external functions calls 136// State registers that must not be modified by external functions calls
137// Scratch registers, e.g., SRC1 and SCRATCH, have to be saved on the side if needed 137// Scratch registers, e.g., SRC1 and SCRATCH, have to be saved on the side if needed
138static const BitSet32 persistent_regs = { 138static const BitSet32 persistent_regs = {
139 SETUP, STATE, // Pointers to register blocks 139 SETUP, STATE, // Pointers to register blocks
140 ADDROFFS_REG_0, ADDROFFS_REG_1, LOOPCOUNT_REG, COND0, COND1, // Cached registers 140 ADDROFFS_REG_0, ADDROFFS_REG_1, LOOPCOUNT_REG, COND0, COND1, // Cached registers
141 ONE+16, NEGBIT+16, // Constants 141 ONE + 16, NEGBIT + 16, // Constants
142}; 142};
143 143
144/// Raw constant for the source register selector that indicates no swizzling is performed 144/// Raw constant for the source register selector that indicates no swizzling is performed
@@ -152,7 +152,7 @@ static const u8 NO_DEST_REG_MASK = 0xf;
152 * @return Instruction at the specified offset 152 * @return Instruction at the specified offset
153 */ 153 */
154static Instruction GetVertexShaderInstruction(size_t offset) { 154static Instruction GetVertexShaderInstruction(size_t offset) {
155 return { g_state.vs.program_code[offset] }; 155 return {g_state.vs.program_code[offset]};
156} 156}
157 157
158static void LogCritical(const char* msg) { 158static void LogCritical(const char* msg) {
@@ -172,7 +172,8 @@ void JitShader::Compile_Assert(bool condition, const char* msg) {
172 * @param src_reg SourceRegister object corresponding to the source register to load 172 * @param src_reg SourceRegister object corresponding to the source register to load
173 * @param dest Destination XMM register to store the loaded, swizzled source register 173 * @param dest Destination XMM register to store the loaded, swizzled source register
174 */ 174 */
175void JitShader::Compile_SwizzleSrc(Instruction instr, unsigned src_num, SourceRegister src_reg, X64Reg dest) { 175void JitShader::Compile_SwizzleSrc(Instruction instr, unsigned src_num, SourceRegister src_reg,
176 X64Reg dest) {
176 X64Reg src_ptr; 177 X64Reg src_ptr;
177 size_t src_offset; 178 size_t src_offset;
178 179
@@ -189,7 +190,8 @@ void JitShader::Compile_SwizzleSrc(Instruction instr, unsigned src_num, SourceRe
189 190
190 unsigned operand_desc_id; 191 unsigned operand_desc_id;
191 192
192 const bool is_inverted = (0 != (instr.opcode.Value().GetInfo().subtype & OpCode::Info::SrcInversed)); 193 const bool is_inverted =
194 (0 != (instr.opcode.Value().GetInfo().subtype & OpCode::Info::SrcInversed));
193 195
194 unsigned address_register_index; 196 unsigned address_register_index;
195 unsigned offset_src; 197 unsigned offset_src;
@@ -225,7 +227,7 @@ void JitShader::Compile_SwizzleSrc(Instruction instr, unsigned src_num, SourceRe
225 MOVAPS(dest, MDisp(src_ptr, src_offset_disp)); 227 MOVAPS(dest, MDisp(src_ptr, src_offset_disp));
226 } 228 }
227 229
228 SwizzlePattern swiz = { g_state.vs.swizzle_data[operand_desc_id] }; 230 SwizzlePattern swiz = {g_state.vs.swizzle_data[operand_desc_id]};
229 231
230 // Generate instructions for source register swizzling as needed 232 // Generate instructions for source register swizzling as needed
231 u8 sel = swiz.GetRawSelector(src_num); 233 u8 sel = swiz.GetRawSelector(src_num);
@@ -238,13 +240,13 @@ void JitShader::Compile_SwizzleSrc(Instruction instr, unsigned src_num, SourceRe
238 } 240 }
239 241
240 // If the source register should be negated, flip the negative bit using XOR 242 // If the source register should be negated, flip the negative bit using XOR
241 const bool negate[] = { swiz.negate_src1, swiz.negate_src2, swiz.negate_src3 }; 243 const bool negate[] = {swiz.negate_src1, swiz.negate_src2, swiz.negate_src3};
242 if (negate[src_num - 1]) { 244 if (negate[src_num - 1]) {
243 XORPS(dest, R(NEGBIT)); 245 XORPS(dest, R(NEGBIT));
244 } 246 }
245} 247}
246 248
247void JitShader::Compile_DestEnable(Instruction instr,X64Reg src) { 249void JitShader::Compile_DestEnable(Instruction instr, X64Reg src) {
248 DestRegister dest; 250 DestRegister dest;
249 unsigned operand_desc_id; 251 unsigned operand_desc_id;
250 if (instr.opcode.Value().EffectiveOpCode() == OpCode::Id::MAD || 252 if (instr.opcode.Value().EffectiveOpCode() == OpCode::Id::MAD ||
@@ -256,10 +258,11 @@ void JitShader::Compile_DestEnable(Instruction instr,X64Reg src) {
256 dest = instr.common.dest.Value(); 258 dest = instr.common.dest.Value();
257 } 259 }
258 260
259 SwizzlePattern swiz = { g_state.vs.swizzle_data[operand_desc_id] }; 261 SwizzlePattern swiz = {g_state.vs.swizzle_data[operand_desc_id]};
260 262
261 int dest_offset_disp = (int)UnitState<false>::OutputOffset(dest); 263 int dest_offset_disp = (int)UnitState<false>::OutputOffset(dest);
262 ASSERT_MSG(dest_offset_disp == UnitState<false>::OutputOffset(dest), "Destinaton offset too large for int type"); 264 ASSERT_MSG(dest_offset_disp == UnitState<false>::OutputOffset(dest),
265 "Destinaton offset too large for int type");
263 266
264 // If all components are enabled, write the result to the destination register 267 // If all components are enabled, write the result to the destination register
265 if (swiz.dest_mask == NO_DEST_REG_MASK) { 268 if (swiz.dest_mask == NO_DEST_REG_MASK) {
@@ -267,18 +270,21 @@ void JitShader::Compile_DestEnable(Instruction instr,X64Reg src) {
267 MOVAPS(MDisp(STATE, dest_offset_disp), src); 270 MOVAPS(MDisp(STATE, dest_offset_disp), src);
268 271
269 } else { 272 } else {
270 // Not all components are enabled, so mask the result when storing to the destination register... 273 // Not all components are enabled, so mask the result when storing to the destination
274 // register...
271 MOVAPS(SCRATCH, MDisp(STATE, dest_offset_disp)); 275 MOVAPS(SCRATCH, MDisp(STATE, dest_offset_disp));
272 276
273 if (Common::GetCPUCaps().sse4_1) { 277 if (Common::GetCPUCaps().sse4_1) {
274 u8 mask = ((swiz.dest_mask & 1) << 3) | ((swiz.dest_mask & 8) >> 3) | ((swiz.dest_mask & 2) << 1) | ((swiz.dest_mask & 4) >> 1); 278 u8 mask = ((swiz.dest_mask & 1) << 3) | ((swiz.dest_mask & 8) >> 3) |
279 ((swiz.dest_mask & 2) << 1) | ((swiz.dest_mask & 4) >> 1);
275 BLENDPS(SCRATCH, R(src), mask); 280 BLENDPS(SCRATCH, R(src), mask);
276 } else { 281 } else {
277 MOVAPS(SCRATCH2, R(src)); 282 MOVAPS(SCRATCH2, R(src));
278 UNPCKHPS(SCRATCH2, R(SCRATCH)); // Unpack X/Y components of source and destination 283 UNPCKHPS(SCRATCH2, R(SCRATCH)); // Unpack X/Y components of source and destination
279 UNPCKLPS(SCRATCH, R(src)); // Unpack Z/W components of source and destination 284 UNPCKLPS(SCRATCH, R(src)); // Unpack Z/W components of source and destination
280 285
281 // Compute selector to selectively copy source components to destination for SHUFPS instruction 286 // Compute selector to selectively copy source components to destination for SHUFPS
287 // instruction
282 u8 sel = ((swiz.DestComponentEnabled(0) ? 1 : 0) << 0) | 288 u8 sel = ((swiz.DestComponentEnabled(0) ? 1 : 0) << 0) |
283 ((swiz.DestComponentEnabled(1) ? 3 : 2) << 2) | 289 ((swiz.DestComponentEnabled(1) ? 3 : 2) << 2) |
284 ((swiz.DestComponentEnabled(2) ? 0 : 1) << 4) | 290 ((swiz.DestComponentEnabled(2) ? 0 : 1) << 4) |
@@ -336,7 +342,8 @@ void JitShader::Compile_EvaluateCondition(Instruction instr) {
336} 342}
337 343
338void JitShader::Compile_UniformCondition(Instruction instr) { 344void JitShader::Compile_UniformCondition(Instruction instr) {
339 int offset = ShaderSetup::UniformOffset(RegisterType::BoolUniform, instr.flow_control.bool_uniform_id); 345 int offset =
346 ShaderSetup::UniformOffset(RegisterType::BoolUniform, instr.flow_control.bool_uniform_id);
340 CMP(sizeof(bool) * 8, MDisp(SETUP, offset), Imm8(0)); 347 CMP(sizeof(bool) * 8, MDisp(SETUP, offset), Imm8(0));
341} 348}
342 349
@@ -512,7 +519,7 @@ void JitShader::Compile_MIN(Instruction instr) {
512} 519}
513 520
514void JitShader::Compile_MOVA(Instruction instr) { 521void JitShader::Compile_MOVA(Instruction instr) {
515 SwizzlePattern swiz = { g_state.vs.swizzle_data[instr.common.operand_desc_id] }; 522 SwizzlePattern swiz = {g_state.vs.swizzle_data[instr.common.operand_desc_id]};
516 523
517 if (!swiz.DestComponentEnabled(0) && !swiz.DestComponentEnabled(1)) { 524 if (!swiz.DestComponentEnabled(0) && !swiz.DestComponentEnabled(1)) {
518 return; // NoOp 525 return; // NoOp
@@ -597,7 +604,7 @@ void JitShader::Compile_CALL(Instruction instr) {
597 604
598 // Call the subroutine 605 // Call the subroutine
599 FixupBranch b = CALL(); 606 FixupBranch b = CALL();
600 fixup_branches.push_back({ b, instr.flow_control.dest_offset }); 607 fixup_branches.push_back({b, instr.flow_control.dest_offset});
601 608
602 // Skip over the return offset that's on the stack 609 // Skip over the return offset that's on the stack
603 ADD(64, R(RSP), Imm32(8)); 610 ADD(64, R(RSP), Imm32(8));
@@ -628,7 +635,7 @@ void JitShader::Compile_CMP(Instruction instr) {
628 // SSE doesn't have greater-than (GT) or greater-equal (GE) comparison operators. You need to 635 // SSE doesn't have greater-than (GT) or greater-equal (GE) comparison operators. You need to
629 // emulate them by swapping the lhs and rhs and using LT and LE. NLT and NLE can't be used here 636 // emulate them by swapping the lhs and rhs and using LT and LE. NLT and NLE can't be used here
630 // because they don't match when used with NaNs. 637 // because they don't match when used with NaNs.
631 static const u8 cmp[] = { CMP_EQ, CMP_NEQ, CMP_LT, CMP_LE, CMP_LT, CMP_LE }; 638 static const u8 cmp[] = {CMP_EQ, CMP_NEQ, CMP_LT, CMP_LE, CMP_LT, CMP_LE};
632 639
633 bool invert_op_x = (op_x == Op::GreaterThan || op_x == Op::GreaterEqual); 640 bool invert_op_x = (op_x == Op::GreaterThan || op_x == Op::GreaterEqual);
634 Gen::X64Reg lhs_x = invert_op_x ? SRC2 : SRC1; 641 Gen::X64Reg lhs_x = invert_op_x ? SRC2 : SRC1;
@@ -678,7 +685,8 @@ void JitShader::Compile_MAD(Instruction instr) {
678} 685}
679 686
680void JitShader::Compile_IF(Instruction instr) { 687void JitShader::Compile_IF(Instruction instr) {
681 Compile_Assert(instr.flow_control.dest_offset >= program_counter, "Backwards if-statements not supported"); 688 Compile_Assert(instr.flow_control.dest_offset >= program_counter,
689 "Backwards if-statements not supported");
682 690
683 // Evaluate the "IF" condition 691 // Evaluate the "IF" condition
684 if (instr.opcode.Value() == OpCode::Id::IFU) { 692 if (instr.opcode.Value() == OpCode::Id::IFU) {
@@ -709,29 +717,31 @@ void JitShader::Compile_IF(Instruction instr) {
709} 717}
710 718
711void JitShader::Compile_LOOP(Instruction instr) { 719void JitShader::Compile_LOOP(Instruction instr) {
712 Compile_Assert(instr.flow_control.dest_offset >= program_counter, "Backwards loops not supported"); 720 Compile_Assert(instr.flow_control.dest_offset >= program_counter,
721 "Backwards loops not supported");
713 Compile_Assert(!looping, "Nested loops not supported"); 722 Compile_Assert(!looping, "Nested loops not supported");
714 723
715 looping = true; 724 looping = true;
716 725
717 int offset = ShaderSetup::UniformOffset(RegisterType::IntUniform, instr.flow_control.int_uniform_id); 726 int offset =
727 ShaderSetup::UniformOffset(RegisterType::IntUniform, instr.flow_control.int_uniform_id);
718 MOV(32, R(LOOPCOUNT), MDisp(SETUP, offset)); 728 MOV(32, R(LOOPCOUNT), MDisp(SETUP, offset));
719 MOV(32, R(LOOPCOUNT_REG), R(LOOPCOUNT)); 729 MOV(32, R(LOOPCOUNT_REG), R(LOOPCOUNT));
720 SHR(32, R(LOOPCOUNT_REG), Imm8(8)); 730 SHR(32, R(LOOPCOUNT_REG), Imm8(8));
721 AND(32, R(LOOPCOUNT_REG), Imm32(0xff)); // Y-component is the start 731 AND(32, R(LOOPCOUNT_REG), Imm32(0xff)); // Y-component is the start
722 MOV(32, R(LOOPINC), R(LOOPCOUNT)); 732 MOV(32, R(LOOPINC), R(LOOPCOUNT));
723 SHR(32, R(LOOPINC), Imm8(16)); 733 SHR(32, R(LOOPINC), Imm8(16));
724 MOVZX(32, 8, LOOPINC, R(LOOPINC)); // Z-component is the incrementer 734 MOVZX(32, 8, LOOPINC, R(LOOPINC)); // Z-component is the incrementer
725 MOVZX(32, 8, LOOPCOUNT, R(LOOPCOUNT)); // X-component is iteration count 735 MOVZX(32, 8, LOOPCOUNT, R(LOOPCOUNT)); // X-component is iteration count
726 ADD(32, R(LOOPCOUNT), Imm8(1)); // Iteration count is X-component + 1 736 ADD(32, R(LOOPCOUNT), Imm8(1)); // Iteration count is X-component + 1
727 737
728 auto loop_start = GetCodePtr(); 738 auto loop_start = GetCodePtr();
729 739
730 Compile_Block(instr.flow_control.dest_offset + 1); 740 Compile_Block(instr.flow_control.dest_offset + 1);
731 741
732 ADD(32, R(LOOPCOUNT_REG), R(LOOPINC)); // Increment LOOPCOUNT_REG by Z-component 742 ADD(32, R(LOOPCOUNT_REG), R(LOOPINC)); // Increment LOOPCOUNT_REG by Z-component
733 SUB(32, R(LOOPCOUNT), Imm8(1)); // Increment loop count by 1 743 SUB(32, R(LOOPCOUNT), Imm8(1)); // Increment loop count by 1
734 J_CC(CC_NZ, loop_start); // Loop if not equal 744 J_CC(CC_NZ, loop_start); // Loop if not equal
735 745
736 looping = false; 746 looping = false;
737} 747}
@@ -744,11 +754,11 @@ void JitShader::Compile_JMP(Instruction instr) {
744 else 754 else
745 UNREACHABLE(); 755 UNREACHABLE();
746 756
747 bool inverted_condition = (instr.opcode.Value() == OpCode::Id::JMPU) && 757 bool inverted_condition =
748 (instr.flow_control.num_instructions & 1); 758 (instr.opcode.Value() == OpCode::Id::JMPU) && (instr.flow_control.num_instructions & 1);
749 759
750 FixupBranch b = J_CC(inverted_condition ? CC_Z : CC_NZ, true); 760 FixupBranch b = J_CC(inverted_condition ? CC_Z : CC_NZ, true);
751 fixup_branches.push_back({ b, instr.flow_control.dest_offset }); 761 fixup_branches.push_back({b, instr.flow_control.dest_offset});
752} 762}
753 763
754void JitShader::Compile_Block(unsigned end) { 764void JitShader::Compile_Block(unsigned end) {
@@ -773,7 +783,8 @@ void JitShader::Compile_NextInstr() {
773 Compile_Return(); 783 Compile_Return();
774 } 784 }
775 785
776 ASSERT_MSG(code_ptr[program_counter] == nullptr, "Tried to compile already compiled shader location!"); 786 ASSERT_MSG(code_ptr[program_counter] == nullptr,
787 "Tried to compile already compiled shader location!");
777 code_ptr[program_counter] = GetCodePtr(); 788 code_ptr[program_counter] = GetCodePtr();
778 789
779 Instruction instr = GetVertexShaderInstruction(program_counter++); 790 Instruction instr = GetVertexShaderInstruction(program_counter++);
@@ -787,7 +798,7 @@ void JitShader::Compile_NextInstr() {
787 } else { 798 } else {
788 // Unhandled instruction 799 // Unhandled instruction
789 LOG_CRITICAL(HW_GPU, "Unhandled instruction: 0x%02x (0x%08x)", 800 LOG_CRITICAL(HW_GPU, "Unhandled instruction: 0x%02x (0x%08x)",
790 instr.opcode.Value().EffectiveOpCode(), instr.hex); 801 instr.opcode.Value().EffectiveOpCode(), instr.hex);
791 } 802 }
792} 803}
793 804
@@ -801,7 +812,8 @@ void JitShader::FindReturnOffsets() {
801 case OpCode::Id::CALL: 812 case OpCode::Id::CALL:
802 case OpCode::Id::CALLC: 813 case OpCode::Id::CALLC:
803 case OpCode::Id::CALLU: 814 case OpCode::Id::CALLU:
804 return_offsets.push_back(instr.flow_control.dest_offset + instr.flow_control.num_instructions); 815 return_offsets.push_back(instr.flow_control.dest_offset +
816 instr.flow_control.num_instructions);
805 break; 817 break;
806 default: 818 default:
807 break; 819 break;
@@ -835,12 +847,12 @@ void JitShader::Compile() {
835 XOR(64, R(LOOPCOUNT_REG), R(LOOPCOUNT_REG)); 847 XOR(64, R(LOOPCOUNT_REG), R(LOOPCOUNT_REG));
836 848
837 // Used to set a register to one 849 // Used to set a register to one
838 static const __m128 one = { 1.f, 1.f, 1.f, 1.f }; 850 static const __m128 one = {1.f, 1.f, 1.f, 1.f};
839 MOV(PTRBITS, R(RAX), ImmPtr(&one)); 851 MOV(PTRBITS, R(RAX), ImmPtr(&one));
840 MOVAPS(ONE, MatR(RAX)); 852 MOVAPS(ONE, MatR(RAX));
841 853
842 // Used to negate registers 854 // Used to negate registers
843 static const __m128 neg = { -0.f, -0.f, -0.f, -0.f }; 855 static const __m128 neg = {-0.f, -0.f, -0.f, -0.f};
844 MOV(PTRBITS, R(RAX), ImmPtr(&neg)); 856 MOV(PTRBITS, R(RAX), ImmPtr(&neg));
845 MOVAPS(NEGBIT, MatR(RAX)); 857 MOVAPS(NEGBIT, MatR(RAX));
846 858
@@ -850,7 +862,8 @@ void JitShader::Compile() {
850 // Compile entire program 862 // Compile entire program
851 Compile_Block(static_cast<unsigned>(g_state.vs.program_code.size())); 863 Compile_Block(static_cast<unsigned>(g_state.vs.program_code.size()));
852 864
853 // Set the target for any incomplete branches now that the entire shader program has been emitted 865 // Set the target for any incomplete branches now that the entire shader program has been
866 // emitted
854 for (const auto& branch : fixup_branches) { 867 for (const auto& branch : fixup_branches) {
855 SetJumpTarget(branch.first, code_ptr[branch.second]); 868 SetJumpTarget(branch.first, code_ptr[branch.second]);
856 } 869 }
@@ -861,7 +874,8 @@ void JitShader::Compile() {
861 fixup_branches.clear(); 874 fixup_branches.clear();
862 fixup_branches.shrink_to_fit(); 875 fixup_branches.shrink_to_fit();
863 876
864 uintptr_t size = reinterpret_cast<uintptr_t>(GetCodePtr()) - reinterpret_cast<uintptr_t>(program); 877 uintptr_t size =
878 reinterpret_cast<uintptr_t>(GetCodePtr()) - reinterpret_cast<uintptr_t>(program);
865 ASSERT_MSG(size <= MAX_SHADER_SIZE, "Compiled a shader that exceeds the allocated size!"); 879 ASSERT_MSG(size <= MAX_SHADER_SIZE, "Compiled a shader that exceeds the allocated size!");
866 880
867 LOG_DEBUG(HW_GPU, "Compiled shader size=%lu", size); 881 LOG_DEBUG(HW_GPU, "Compiled shader size=%lu", size);
diff --git a/src/video_core/shader/shader_jit_x64.h b/src/video_core/shader/shader_jit_x64.h
index 5468459d4..2f37ef8bf 100644
--- a/src/video_core/shader/shader_jit_x64.h
+++ b/src/video_core/shader/shader_jit_x64.h
@@ -70,11 +70,11 @@ public:
70 void Compile_MAD(Instruction instr); 70 void Compile_MAD(Instruction instr);
71 71
72private: 72private:
73
74 void Compile_Block(unsigned end); 73 void Compile_Block(unsigned end);
75 void Compile_NextInstr(); 74 void Compile_NextInstr();
76 75
77 void Compile_SwizzleSrc(Instruction instr, unsigned src_num, SourceRegister src_reg, Gen::X64Reg dest); 76 void Compile_SwizzleSrc(Instruction instr, unsigned src_num, SourceRegister src_reg,
77 Gen::X64Reg dest);
78 void Compile_DestEnable(Instruction instr, Gen::X64Reg dest); 78 void Compile_DestEnable(Instruction instr, Gen::X64Reg dest);
79 79
80 /** 80 /**
@@ -111,8 +111,8 @@ private:
111 /// Offsets in code where a return needs to be inserted 111 /// Offsets in code where a return needs to be inserted
112 std::vector<unsigned> return_offsets; 112 std::vector<unsigned> return_offsets;
113 113
114 unsigned program_counter = 0; ///< Offset of the next instruction to decode 114 unsigned program_counter = 0; ///< Offset of the next instruction to decode
115 bool looping = false; ///< True if compiling a loop, used to check for nested loops 115 bool looping = false; ///< True if compiling a loop, used to check for nested loops
116 116
117 /// Branches that need to be fixed up once the entire shader program is compiled 117 /// Branches that need to be fixed up once the entire shader program is compiled
118 std::vector<std::pair<Gen::FixupBranch, unsigned>> fixup_branches; 118 std::vector<std::pair<Gen::FixupBranch, unsigned>> fixup_branches;