summaryrefslogtreecommitdiff
path: root/src/video_core/vertex_shader.cpp
diff options
context:
space:
mode:
authorGravatar bunnei2014-12-20 12:40:06 -0500
committerGravatar bunnei2014-12-20 12:40:06 -0500
commitacabd7be82b4174a3adab0771df6320bdbc5a85b (patch)
treec47c2f68f50005bfb5c4003831f3977aec0be3ca /src/video_core/vertex_shader.cpp
parentMerge pull request #315 from chinhodado/master (diff)
parentPica/VertexShader: Promote a log message to critical status. (diff)
downloadyuzu-acabd7be82b4174a3adab0771df6320bdbc5a85b.tar.gz
yuzu-acabd7be82b4174a3adab0771df6320bdbc5a85b.tar.xz
yuzu-acabd7be82b4174a3adab0771df6320bdbc5a85b.zip
Merge pull request #284 from neobrain/pica_progress
Pica progress: Texturing, shaders, cleanups & more
Diffstat (limited to 'src/video_core/vertex_shader.cpp')
-rw-r--r--src/video_core/vertex_shader.cpp338
1 files changed, 271 insertions, 67 deletions
diff --git a/src/video_core/vertex_shader.cpp b/src/video_core/vertex_shader.cpp
index 477e78cfe..4ba69fa51 100644
--- a/src/video_core/vertex_shader.cpp
+++ b/src/video_core/vertex_shader.cpp
@@ -2,16 +2,25 @@
2// Licensed under GPLv2 2// Licensed under GPLv2
3// Refer to the license.txt file included. 3// Refer to the license.txt file included.
4 4
5#include <stack>
6
5#include <boost/range/algorithm.hpp> 7#include <boost/range/algorithm.hpp>
6 8
7#include <common/file_util.h> 9#include <common/file_util.h>
8 10
9#include <core/mem_map.h> 11#include <core/mem_map.h>
10 12
11#include "debug_utils/debug_utils.h" 13#include <nihstro/shader_bytecode.h>
14
12 15
13#include "pica.h" 16#include "pica.h"
14#include "vertex_shader.h" 17#include "vertex_shader.h"
18#include "debug_utils/debug_utils.h"
19
20using nihstro::Instruction;
21using nihstro::RegisterType;
22using nihstro::SourceRegister;
23using nihstro::SwizzlePattern;
15 24
16namespace Pica { 25namespace Pica {
17 26
@@ -19,13 +28,14 @@ namespace VertexShader {
19 28
20static struct { 29static struct {
21 Math::Vec4<float24> f[96]; 30 Math::Vec4<float24> f[96];
22} shader_uniforms;
23 31
32 std::array<bool,16> b;
33} shader_uniforms;
24 34
25// TODO: Not sure where the shader binary and swizzle patterns are supposed to be loaded to! 35// TODO: Not sure where the shader binary and swizzle patterns are supposed to be loaded to!
26// For now, we just keep these local arrays around. 36// For now, we just keep these local arrays around.
27static u32 shader_memory[1024]; 37static std::array<u32, 1024> shader_memory;
28static u32 swizzle_data[1024]; 38static std::array<u32, 1024> swizzle_data;
29 39
30void SubmitShaderMemoryChange(u32 addr, u32 value) 40void SubmitShaderMemoryChange(u32 addr, u32 value)
31{ 41{
@@ -42,6 +52,21 @@ Math::Vec4<float24>& GetFloatUniform(u32 index)
42 return shader_uniforms.f[index]; 52 return shader_uniforms.f[index];
43} 53}
44 54
55bool& GetBoolUniform(u32 index)
56{
57 return shader_uniforms.b[index];
58}
59
60const std::array<u32, 1024>& GetShaderBinary()
61{
62 return shader_memory;
63}
64
65const std::array<u32, 1024>& GetSwizzlePatterns()
66{
67 return swizzle_data;
68}
69
45struct VertexShaderState { 70struct VertexShaderState {
46 u32* program_counter; 71 u32* program_counter;
47 72
@@ -49,13 +74,23 @@ struct VertexShaderState {
49 float24* output_register_table[7*4]; 74 float24* output_register_table[7*4];
50 75
51 Math::Vec4<float24> temporary_registers[16]; 76 Math::Vec4<float24> temporary_registers[16];
52 bool status_registers[2]; 77 bool conditional_code[2];
78
79 // Two Address registers and one loop counter
80 // TODO: How many bits do these actually have?
81 s32 address_registers[3];
53 82
54 enum { 83 enum {
55 INVALID_ADDRESS = 0xFFFFFFFF 84 INVALID_ADDRESS = 0xFFFFFFFF
56 }; 85 };
57 u32 call_stack[8]; // TODO: What is the maximal call stack depth? 86
58 u32* call_stack_pointer; 87 struct CallStackElement {
88 u32 final_address;
89 u32 return_address;
90 };
91
92 // TODO: Is there a maximal size for this?
93 std::stack<CallStackElement> call_stack;
59 94
60 struct { 95 struct {
61 u32 max_offset; // maximum program counter ever reached 96 u32 max_offset; // maximum program counter ever reached
@@ -64,49 +99,105 @@ struct VertexShaderState {
64}; 99};
65 100
66static void ProcessShaderCode(VertexShaderState& state) { 101static void ProcessShaderCode(VertexShaderState& state) {
102
103 // Placeholder for invalid inputs
104 static float24 dummy_vec4_float24[4];
105
67 while (true) { 106 while (true) {
68 bool increment_pc = true; 107 if (!state.call_stack.empty()) {
108 if (state.program_counter - shader_memory.data() == state.call_stack.top().final_address) {
109 state.program_counter = &shader_memory[state.call_stack.top().return_address];
110 state.call_stack.pop();
111
112 // TODO: Is "trying again" accurate to hardware?
113 continue;
114 }
115 }
116
69 bool exit_loop = false; 117 bool exit_loop = false;
70 const Instruction& instr = *(const Instruction*)state.program_counter; 118 const Instruction& instr = *(const Instruction*)state.program_counter;
71 state.debug.max_offset = std::max<u32>(state.debug.max_offset, 1 + (state.program_counter - shader_memory));
72
73 const float24* src1_ = (instr.common.src1 < 0x10) ? state.input_register_table[instr.common.src1.GetIndex()]
74 : (instr.common.src1 < 0x20) ? &state.temporary_registers[instr.common.src1.GetIndex()].x
75 : (instr.common.src1 < 0x80) ? &shader_uniforms.f[instr.common.src1.GetIndex()].x
76 : nullptr;
77 const float24* src2_ = (instr.common.src2 < 0x10) ? state.input_register_table[instr.common.src2.GetIndex()]
78 : &state.temporary_registers[instr.common.src2.GetIndex()].x;
79 float24* dest = (instr.common.dest < 0x08) ? state.output_register_table[4*instr.common.dest.GetIndex()]
80 : (instr.common.dest < 0x10) ? nullptr
81 : (instr.common.dest < 0x20) ? &state.temporary_registers[instr.common.dest.GetIndex()][0]
82 : nullptr;
83
84 const SwizzlePattern& swizzle = *(SwizzlePattern*)&swizzle_data[instr.common.operand_desc_id]; 119 const SwizzlePattern& swizzle = *(SwizzlePattern*)&swizzle_data[instr.common.operand_desc_id];
85 const bool negate_src1 = (swizzle.negate != 0);
86 120
87 float24 src1[4] = { 121 auto call = [&](VertexShaderState& state, u32 offset, u32 num_instructions, u32 return_offset) {
88 src1_[(int)swizzle.GetSelectorSrc1(0)], 122 state.program_counter = &shader_memory[offset] - 1; // -1 to make sure when incrementing the PC we end up at the correct offset
89 src1_[(int)swizzle.GetSelectorSrc1(1)], 123 state.call_stack.push({ offset + num_instructions, return_offset });
90 src1_[(int)swizzle.GetSelectorSrc1(2)],
91 src1_[(int)swizzle.GetSelectorSrc1(3)],
92 }; 124 };
93 if (negate_src1) { 125 u32 binary_offset = state.program_counter - shader_memory.data();
94 src1[0] = src1[0] * float24::FromFloat32(-1); 126
95 src1[1] = src1[1] * float24::FromFloat32(-1); 127 state.debug.max_offset = std::max<u32>(state.debug.max_offset, 1 + binary_offset);
96 src1[2] = src1[2] * float24::FromFloat32(-1); 128
97 src1[3] = src1[3] * float24::FromFloat32(-1); 129 auto LookupSourceRegister = [&](const SourceRegister& source_reg) -> const float24* {
98 } 130 switch (source_reg.GetRegisterType()) {
99 const float24 src2[4] = { 131 case RegisterType::Input:
100 src2_[(int)swizzle.GetSelectorSrc2(0)], 132 return state.input_register_table[source_reg.GetIndex()];
101 src2_[(int)swizzle.GetSelectorSrc2(1)], 133
102 src2_[(int)swizzle.GetSelectorSrc2(2)], 134 case RegisterType::Temporary:
103 src2_[(int)swizzle.GetSelectorSrc2(3)], 135 return &state.temporary_registers[source_reg.GetIndex()].x;
136
137 case RegisterType::FloatUniform:
138 return &shader_uniforms.f[source_reg.GetIndex()].x;
139
140 default:
141 return dummy_vec4_float24;
142 }
104 }; 143 };
105 144
106 switch (instr.opcode) { 145 switch (instr.opcode.GetInfo().type) {
146 case Instruction::OpCodeType::Arithmetic:
147 {
148 bool is_inverted = 0 != (instr.opcode.GetInfo().subtype & Instruction::OpCodeInfo::SrcInversed);
149 if (is_inverted) {
150 // TODO: We don't really support this properly: For instance, the address register
151 // offset needs to be applied to SRC2 instead, etc.
152 // For now, we just abort in this situation.
153 LOG_CRITICAL(HW_GPU, "Bad condition...");
154 exit(0);
155 }
156
157 const int address_offset = (instr.common.address_register_index == 0)
158 ? 0 : state.address_registers[instr.common.address_register_index - 1];
159
160 const float24* src1_ = LookupSourceRegister(instr.common.GetSrc1(is_inverted) + address_offset);
161 const float24* src2_ = LookupSourceRegister(instr.common.GetSrc2(is_inverted));
162
163 const bool negate_src1 = (swizzle.negate_src1 != false);
164 const bool negate_src2 = (swizzle.negate_src2 != false);
165
166 float24 src1[4] = {
167 src1_[(int)swizzle.GetSelectorSrc1(0)],
168 src1_[(int)swizzle.GetSelectorSrc1(1)],
169 src1_[(int)swizzle.GetSelectorSrc1(2)],
170 src1_[(int)swizzle.GetSelectorSrc1(3)],
171 };
172 if (negate_src1) {
173 src1[0] = src1[0] * float24::FromFloat32(-1);
174 src1[1] = src1[1] * float24::FromFloat32(-1);
175 src1[2] = src1[2] * float24::FromFloat32(-1);
176 src1[3] = src1[3] * float24::FromFloat32(-1);
177 }
178 float24 src2[4] = {
179 src2_[(int)swizzle.GetSelectorSrc2(0)],
180 src2_[(int)swizzle.GetSelectorSrc2(1)],
181 src2_[(int)swizzle.GetSelectorSrc2(2)],
182 src2_[(int)swizzle.GetSelectorSrc2(3)],
183 };
184 if (negate_src2) {
185 src2[0] = src2[0] * float24::FromFloat32(-1);
186 src2[1] = src2[1] * float24::FromFloat32(-1);
187 src2[2] = src2[2] * float24::FromFloat32(-1);
188 src2[3] = src2[3] * float24::FromFloat32(-1);
189 }
190
191 float24* dest = (instr.common.dest < 0x08) ? state.output_register_table[4*instr.common.dest.GetIndex()]
192 : (instr.common.dest < 0x10) ? dummy_vec4_float24
193 : (instr.common.dest < 0x20) ? &state.temporary_registers[instr.common.dest.GetIndex()][0]
194 : dummy_vec4_float24;
195
196 state.debug.max_opdesc_id = std::max<u32>(state.debug.max_opdesc_id, 1+instr.common.operand_desc_id);
197
198 switch (instr.opcode.EffectiveOpCode()) {
107 case Instruction::OpCode::ADD: 199 case Instruction::OpCode::ADD:
108 { 200 {
109 state.debug.max_opdesc_id = std::max<u32>(state.debug.max_opdesc_id, 1+instr.common.operand_desc_id);
110 for (int i = 0; i < 4; ++i) { 201 for (int i = 0; i < 4; ++i) {
111 if (!swizzle.DestComponentEnabled(i)) 202 if (!swizzle.DestComponentEnabled(i))
112 continue; 203 continue;
@@ -119,7 +210,6 @@ static void ProcessShaderCode(VertexShaderState& state) {
119 210
120 case Instruction::OpCode::MUL: 211 case Instruction::OpCode::MUL:
121 { 212 {
122 state.debug.max_opdesc_id = std::max<u32>(state.debug.max_opdesc_id, 1+instr.common.operand_desc_id);
123 for (int i = 0; i < 4; ++i) { 213 for (int i = 0; i < 4; ++i) {
124 if (!swizzle.DestComponentEnabled(i)) 214 if (!swizzle.DestComponentEnabled(i))
125 continue; 215 continue;
@@ -130,10 +220,18 @@ static void ProcessShaderCode(VertexShaderState& state) {
130 break; 220 break;
131 } 221 }
132 222
223 case Instruction::OpCode::MAX:
224 for (int i = 0; i < 4; ++i) {
225 if (!swizzle.DestComponentEnabled(i))
226 continue;
227
228 dest[i] = std::max(src1[i], src2[i]);
229 }
230 break;
231
133 case Instruction::OpCode::DP3: 232 case Instruction::OpCode::DP3:
134 case Instruction::OpCode::DP4: 233 case Instruction::OpCode::DP4:
135 { 234 {
136 state.debug.max_opdesc_id = std::max<u32>(state.debug.max_opdesc_id, 1+instr.common.operand_desc_id);
137 float24 dot = float24::FromFloat32(0.f); 235 float24 dot = float24::FromFloat32(0.f);
138 int num_components = (instr.opcode == Instruction::OpCode::DP3) ? 3 : 4; 236 int num_components = (instr.opcode == Instruction::OpCode::DP3) ? 3 : 4;
139 for (int i = 0; i < num_components; ++i) 237 for (int i = 0; i < num_components; ++i)
@@ -151,7 +249,6 @@ static void ProcessShaderCode(VertexShaderState& state) {
151 // Reciprocal 249 // Reciprocal
152 case Instruction::OpCode::RCP: 250 case Instruction::OpCode::RCP:
153 { 251 {
154 state.debug.max_opdesc_id = std::max<u32>(state.debug.max_opdesc_id, 1+instr.common.operand_desc_id);
155 for (int i = 0; i < 4; ++i) { 252 for (int i = 0; i < 4; ++i) {
156 if (!swizzle.DestComponentEnabled(i)) 253 if (!swizzle.DestComponentEnabled(i))
157 continue; 254 continue;
@@ -167,7 +264,6 @@ static void ProcessShaderCode(VertexShaderState& state) {
167 // Reciprocal Square Root 264 // Reciprocal Square Root
168 case Instruction::OpCode::RSQ: 265 case Instruction::OpCode::RSQ:
169 { 266 {
170 state.debug.max_opdesc_id = std::max<u32>(state.debug.max_opdesc_id, 1+instr.common.operand_desc_id);
171 for (int i = 0; i < 4; ++i) { 267 for (int i = 0; i < 4; ++i) {
172 if (!swizzle.DestComponentEnabled(i)) 268 if (!swizzle.DestComponentEnabled(i))
173 continue; 269 continue;
@@ -180,9 +276,21 @@ static void ProcessShaderCode(VertexShaderState& state) {
180 break; 276 break;
181 } 277 }
182 278
279 case Instruction::OpCode::MOVA:
280 {
281 for (int i = 0; i < 2; ++i) {
282 if (!swizzle.DestComponentEnabled(i))
283 continue;
284
285 // TODO: Figure out how the rounding is done on hardware
286 state.address_registers[i] = static_cast<s32>(src1[i].ToFloat32());
287 }
288
289 break;
290 }
291
183 case Instruction::OpCode::MOV: 292 case Instruction::OpCode::MOV:
184 { 293 {
185 state.debug.max_opdesc_id = std::max<u32>(state.debug.max_opdesc_id, 1+instr.common.operand_desc_id);
186 for (int i = 0; i < 4; ++i) { 294 for (int i = 0; i < 4; ++i) {
187 if (!swizzle.DestComponentEnabled(i)) 295 if (!swizzle.DestComponentEnabled(i))
188 continue; 296 continue;
@@ -192,39 +300,137 @@ static void ProcessShaderCode(VertexShaderState& state) {
192 break; 300 break;
193 } 301 }
194 302
195 case Instruction::OpCode::RET: 303 case Instruction::OpCode::CMP:
196 if (*state.call_stack_pointer == VertexShaderState::INVALID_ADDRESS) { 304 for (int i = 0; i < 2; ++i) {
197 exit_loop = true; 305 // TODO: Can you restrict to one compare via dest masking?
198 } else { 306
199 // Jump back to call stack position, invalidate call stack entry, move up call stack pointer 307 auto compare_op = instr.common.compare_op;
200 state.program_counter = &shader_memory[*state.call_stack_pointer]; 308 auto op = (i == 0) ? compare_op.x.Value() : compare_op.y.Value();
201 *state.call_stack_pointer-- = VertexShaderState::INVALID_ADDRESS; 309
310 switch (op) {
311 case compare_op.Equal:
312 state.conditional_code[i] = (src1[i] == src2[i]);
313 break;
314
315 case compare_op.NotEqual:
316 state.conditional_code[i] = (src1[i] != src2[i]);
317 break;
318
319 case compare_op.LessThan:
320 state.conditional_code[i] = (src1[i] < src2[i]);
321 break;
322
323 case compare_op.LessEqual:
324 state.conditional_code[i] = (src1[i] <= src2[i]);
325 break;
326
327 case compare_op.GreaterThan:
328 state.conditional_code[i] = (src1[i] > src2[i]);
329 break;
330
331 case compare_op.GreaterEqual:
332 state.conditional_code[i] = (src1[i] >= src2[i]);
333 break;
334
335 default:
336 LOG_ERROR(HW_GPU, "Unknown compare mode %x", static_cast<int>(op));
337 break;
338 }
202 } 339 }
340 break;
203 341
342 default:
343 LOG_ERROR(HW_GPU, "Unhandled arithmetic instruction: 0x%02x (%s): 0x%08x",
344 (int)instr.opcode.Value(), instr.opcode.GetInfo().name, instr.hex);
345 _dbg_assert_(HW_GPU, 0);
346 break;
347 }
348
349 break;
350 }
351 default:
352 // Handle each instruction on its own
353 switch (instr.opcode) {
354 case Instruction::OpCode::END:
355 exit_loop = true;
204 break; 356 break;
205 357
206 case Instruction::OpCode::CALL: 358 case Instruction::OpCode::CALL:
207 increment_pc = false; 359 call(state,
360 instr.flow_control.dest_offset,
361 instr.flow_control.num_instructions,
362 binary_offset + 1);
363 break;
364
365 case Instruction::OpCode::NOP:
366 break;
208 367
209 _dbg_assert_(HW_GPU, state.call_stack_pointer - state.call_stack < sizeof(state.call_stack)); 368 case Instruction::OpCode::IFU:
369 if (shader_uniforms.b[instr.flow_control.bool_uniform_id]) {
370 call(state,
371 binary_offset + 1,
372 instr.flow_control.dest_offset - binary_offset - 1,
373 instr.flow_control.dest_offset + instr.flow_control.num_instructions);
374 } else {
375 call(state,
376 instr.flow_control.dest_offset,
377 instr.flow_control.num_instructions,
378 instr.flow_control.dest_offset + instr.flow_control.num_instructions);
379 }
210 380
211 *++state.call_stack_pointer = state.program_counter - shader_memory;
212 // TODO: Does this offset refer to the beginning of shader memory?
213 state.program_counter = &shader_memory[instr.flow_control.offset_words];
214 break; 381 break;
215 382
216 case Instruction::OpCode::FLS: 383 case Instruction::OpCode::IFC:
217 // TODO: Do whatever needs to be done here? 384 {
385 // TODO: Do we need to consider swizzlers here?
386
387 auto flow_control = instr.flow_control;
388 bool results[3] = { flow_control.refx == state.conditional_code[0],
389 flow_control.refy == state.conditional_code[1] };
390
391 switch (flow_control.op) {
392 case flow_control.Or:
393 results[2] = results[0] || results[1];
394 break;
395
396 case flow_control.And:
397 results[2] = results[0] && results[1];
398 break;
399
400 case flow_control.JustX:
401 results[2] = results[0];
402 break;
403
404 case flow_control.JustY:
405 results[2] = results[1];
406 break;
407 }
408
409 if (results[2]) {
410 call(state,
411 binary_offset + 1,
412 instr.flow_control.dest_offset - binary_offset - 1,
413 instr.flow_control.dest_offset + instr.flow_control.num_instructions);
414 } else {
415 call(state,
416 instr.flow_control.dest_offset,
417 instr.flow_control.num_instructions,
418 instr.flow_control.dest_offset + instr.flow_control.num_instructions);
419 }
420
218 break; 421 break;
422 }
219 423
220 default: 424 default:
221 LOG_ERROR(HW_GPU, "Unhandled instruction: 0x%02x (%s): 0x%08x", 425 LOG_ERROR(HW_GPU, "Unhandled instruction: 0x%02x (%s): 0x%08x",
222 (int)instr.opcode.Value(), instr.GetOpCodeName().c_str(), instr.hex); 426 (int)instr.opcode.Value(), instr.opcode.GetInfo().name, instr.hex);
223 break; 427 break;
428 }
429
430 break;
224 } 431 }
225 432
226 if (increment_pc) 433 ++state.program_counter;
227 ++state.program_counter;
228 434
229 if (exit_loop) 435 if (exit_loop)
230 break; 436 break;
@@ -275,13 +481,11 @@ OutputVertex RunShader(const InputVertex& input, int num_attributes)
275 state.output_register_table[4*i+comp] = ((float24*)&ret) + semantics[comp]; 481 state.output_register_table[4*i+comp] = ((float24*)&ret) + semantics[comp];
276 } 482 }
277 483
278 state.status_registers[0] = false; 484 state.conditional_code[0] = false;
279 state.status_registers[1] = false; 485 state.conditional_code[1] = false;
280 boost::fill(state.call_stack, VertexShaderState::INVALID_ADDRESS);
281 state.call_stack_pointer = &state.call_stack[0];
282 486
283 ProcessShaderCode(state); 487 ProcessShaderCode(state);
284 DebugUtils::DumpShader(shader_memory, state.debug.max_offset, swizzle_data, 488 DebugUtils::DumpShader(shader_memory.data(), state.debug.max_offset, swizzle_data.data(),
285 state.debug.max_opdesc_id, registers.vs_main_offset, 489 state.debug.max_opdesc_id, registers.vs_main_offset,
286 registers.vs_output_attributes); 490 registers.vs_output_attributes);
287 491