summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--src/video_core/pica.h8
-rw-r--r--src/video_core/vertex_shader.cpp137
2 files changed, 138 insertions, 7 deletions
diff --git a/src/video_core/pica.h b/src/video_core/pica.h
index 583614328..87a9e7913 100644
--- a/src/video_core/pica.h
+++ b/src/video_core/pica.h
@@ -771,6 +771,14 @@ struct float24 {
771 return ToFloat32() <= flt.ToFloat32(); 771 return ToFloat32() <= flt.ToFloat32();
772 } 772 }
773 773
774 bool operator == (const float24& flt) const {
775 return ToFloat32() == flt.ToFloat32();
776 }
777
778 bool operator != (const float24& flt) const {
779 return ToFloat32() != flt.ToFloat32();
780 }
781
774private: 782private:
775 // Stored as a regular float, merely for convenience 783 // Stored as a regular float, merely for convenience
776 // TODO: Perform proper arithmetic on this! 784 // TODO: Perform proper arithmetic on this!
diff --git a/src/video_core/vertex_shader.cpp b/src/video_core/vertex_shader.cpp
index 33a862b74..5d9203c86 100644
--- a/src/video_core/vertex_shader.cpp
+++ b/src/video_core/vertex_shader.cpp
@@ -59,6 +59,8 @@ const std::array<u32, 1024>& GetSwizzlePatterns()
59 return swizzle_data; 59 return swizzle_data;
60} 60}
61 61
62// TODO: Is there actually a limit on hardware?
63const int if_stack_size = 8;
62 64
63struct VertexShaderState { 65struct VertexShaderState {
64 u32* program_counter; 66 u32* program_counter;
@@ -67,7 +69,11 @@ struct VertexShaderState {
67 float24* output_register_table[7*4]; 69 float24* output_register_table[7*4];
68 70
69 Math::Vec4<float24> temporary_registers[16]; 71 Math::Vec4<float24> temporary_registers[16];
70 bool status_registers[2]; 72 bool conditional_code[2];
73
74 // Two Address registers and one loop counter
75 // TODO: How many bits do these actually have?
76 s32 address_registers[3];
71 77
72 enum { 78 enum {
73 INVALID_ADDRESS = 0xFFFFFFFF 79 INVALID_ADDRESS = 0xFFFFFFFF
@@ -75,6 +81,12 @@ struct VertexShaderState {
75 u32 call_stack[8]; // TODO: What is the maximal call stack depth? 81 u32 call_stack[8]; // TODO: What is the maximal call stack depth?
76 u32* call_stack_pointer; 82 u32* call_stack_pointer;
77 83
84 struct IfStackElement {
85 u32 else_addr;
86 u32 else_instructions;
87 } if_stack[if_stack_size];
88 IfStackElement* if_stack_pointer;
89
78 struct { 90 struct {
79 u32 max_offset; // maximum program counter ever reached 91 u32 max_offset; // maximum program counter ever reached
80 u32 max_opdesc_id; // maximum swizzle pattern index ever used 92 u32 max_opdesc_id; // maximum swizzle pattern index ever used
@@ -107,11 +119,20 @@ static void ProcessShaderCode(VertexShaderState& state) {
107 case Instruction::OpCodeType::Arithmetic: 119 case Instruction::OpCodeType::Arithmetic:
108 { 120 {
109 bool is_inverted = 0 != (instr.opcode.GetInfo().subtype & Instruction::OpCodeInfo::SrcInversed); 121 bool is_inverted = 0 != (instr.opcode.GetInfo().subtype & Instruction::OpCodeInfo::SrcInversed);
110 const float24* src1_ = LookupSourceRegister(instr.common.GetSrc1(is_inverted)); 122 if (is_inverted) {
123 // We don't really support this properly and/or reliably
124 LOG_ERROR(HW_GPU, "Bad condition...");
125 exit(0);
126 }
127
128 const int address_offset = (instr.common.address_register_index == 0)
129 ? 0 : state.address_registers[instr.common.address_register_index - 1];
130
131 const float24* src1_ = LookupSourceRegister(instr.common.GetSrc1(is_inverted) + address_offset);
111 const float24* src2_ = LookupSourceRegister(instr.common.GetSrc2(is_inverted)); 132 const float24* src2_ = LookupSourceRegister(instr.common.GetSrc2(is_inverted));
112 133
113 const bool negate_src1 = (swizzle.negate_src1 != 0); 134 const bool negate_src1 = (swizzle.negate_src1 != false);
114 const bool negate_src2 = (swizzle.negate_src2 != 0); 135 const bool negate_src2 = (swizzle.negate_src2 != false);
115 136
116 float24 src1[4] = { 137 float24 src1[4] = {
117 src1_[(int)swizzle.GetSelectorSrc1(0)], 138 src1_[(int)swizzle.GetSelectorSrc1(0)],
@@ -217,6 +238,19 @@ static void ProcessShaderCode(VertexShaderState& state) {
217 break; 238 break;
218 } 239 }
219 240
241 case Instruction::OpCode::MOVA:
242 {
243 for (int i = 0; i < 2; ++i) {
244 if (!swizzle.DestComponentEnabled(i))
245 continue;
246
247 // TODO: Figure out how the rounding is done on hardware
248 state.address_registers[i] = static_cast<s32>(src1[i].ToFloat32());
249 }
250
251 break;
252 }
253
220 case Instruction::OpCode::MOV: 254 case Instruction::OpCode::MOV:
221 { 255 {
222 for (int i = 0; i < 4; ++i) { 256 for (int i = 0; i < 4; ++i) {
@@ -228,16 +262,56 @@ static void ProcessShaderCode(VertexShaderState& state) {
228 break; 262 break;
229 } 263 }
230 264
265 case Instruction::OpCode::CMP:
266 for (int i = 0; i < 2; ++i) {
267 // TODO: Can you restrict to one compare via dest masking?
268
269 auto compare_op = instr.common.compare_op;
270 auto op = (i == 0) ? compare_op.x.Value() : compare_op.y.Value();
271
272 switch (op) {
273 case compare_op.Equal:
274 state.conditional_code[i] = (src1[i] == src2[i]);
275 break;
276
277 case compare_op.NotEqual:
278 state.conditional_code[i] = (src1[i] != src2[i]);
279 break;
280
281 case compare_op.LessThan:
282 state.conditional_code[i] = (src1[i] < src2[i]);
283 break;
284
285 case compare_op.LessEqual:
286 state.conditional_code[i] = (src1[i] <= src2[i]);
287 break;
288
289 case compare_op.GreaterThan:
290 state.conditional_code[i] = (src1[i] > src2[i]);
291 break;
292
293 case compare_op.GreaterEqual:
294 state.conditional_code[i] = (src1[i] >= src2[i]);
295 break;
296
297 default:
298 LOG_ERROR(HW_GPU, "Unknown compare mode %x", static_cast<int>(op));
299 break;
300 }
301 }
302 break;
303
231 default: 304 default:
232 LOG_ERROR(HW_GPU, "Unhandled arithmetic instruction: 0x%02x (%s): 0x%08x", 305 LOG_ERROR(HW_GPU, "Unhandled arithmetic instruction: 0x%02x (%s): 0x%08x",
233 (int)instr.opcode.Value(), instr.opcode.GetInfo().name, instr.hex); 306 (int)instr.opcode.Value(), instr.opcode.GetInfo().name, instr.hex);
307 _dbg_assert_(HW_GPU, 0);
234 break; 308 break;
235 } 309 }
236 310
237 break; 311 break;
238 } 312 }
239 default: 313 default:
240 // Process instruction explicitly 314 // Handle each instruction on its own
241 switch (instr.opcode) { 315 switch (instr.opcode) {
242 // NOP is currently used as a heuristic for leaving from a function. 316 // NOP is currently used as a heuristic for leaving from a function.
243 // TODO: This is completely incorrect. 317 // TODO: This is completely incorrect.
@@ -265,6 +339,44 @@ static void ProcessShaderCode(VertexShaderState& state) {
265 // TODO 339 // TODO
266 break; 340 break;
267 341
342 case Instruction::OpCode::IFC:
343 {
344 // TODO: Do we need to consider swizzlers here?
345
346 auto flow_control = instr.flow_control;
347 bool results[3] = { flow_control.refx == state.conditional_code[0],
348 flow_control.refy == state.conditional_code[1] };
349
350 switch (flow_control.op) {
351 case flow_control.Or:
352 results[2] = results[0] || results[1];
353 break;
354
355 case flow_control.And:
356 results[2] = results[0] && results[1];
357 break;
358
359 case flow_control.JustX:
360 results[2] = results[0];
361 break;
362
363 case flow_control.JustY:
364 results[2] = results[1];
365 break;
366 }
367
368 if (results[2]) {
369 ++state.if_stack_pointer;
370
371 state.if_stack_pointer->else_addr = instr.flow_control.dest_offset;
372 state.if_stack_pointer->else_instructions = instr.flow_control.num_instructions;
373 } else {
374 state.program_counter = &shader_memory[instr.flow_control.dest_offset] - 1;
375 }
376
377 break;
378 }
379
268 default: 380 default:
269 LOG_ERROR(HW_GPU, "Unhandled instruction: 0x%02x (%s): 0x%08x", 381 LOG_ERROR(HW_GPU, "Unhandled instruction: 0x%02x (%s): 0x%08x",
270 (int)instr.opcode.Value(), instr.opcode.GetInfo().name, instr.hex); 382 (int)instr.opcode.Value(), instr.opcode.GetInfo().name, instr.hex);
@@ -277,6 +389,13 @@ static void ProcessShaderCode(VertexShaderState& state) {
277 if (increment_pc) 389 if (increment_pc)
278 ++state.program_counter; 390 ++state.program_counter;
279 391
392 if (state.if_stack_pointer >= &state.if_stack[0]) {
393 if (state.program_counter - shader_memory.data() == state.if_stack_pointer->else_addr) {
394 state.program_counter += state.if_stack_pointer->else_instructions;
395 state.if_stack_pointer--;
396 }
397 }
398
280 if (exit_loop) 399 if (exit_loop)
281 break; 400 break;
282 } 401 }
@@ -326,11 +445,15 @@ OutputVertex RunShader(const InputVertex& input, int num_attributes)
326 state.output_register_table[4*i+comp] = ((float24*)&ret) + semantics[comp]; 445 state.output_register_table[4*i+comp] = ((float24*)&ret) + semantics[comp];
327 } 446 }
328 447
329 state.status_registers[0] = false; 448 state.conditional_code[0] = false;
330 state.status_registers[1] = false; 449 state.conditional_code[1] = false;
331 boost::fill(state.call_stack, VertexShaderState::INVALID_ADDRESS); 450 boost::fill(state.call_stack, VertexShaderState::INVALID_ADDRESS);
332 state.call_stack_pointer = &state.call_stack[0]; 451 state.call_stack_pointer = &state.call_stack[0];
333 452
453 std::fill(state.if_stack, state.if_stack + sizeof(state.if_stack) / sizeof(state.if_stack[0]),
454 VertexShaderState::IfStackElement{VertexShaderState::INVALID_ADDRESS, VertexShaderState::INVALID_ADDRESS});
455 state.if_stack_pointer = state.if_stack - 1; // Meh. TODO: Make this less ugly
456
334 ProcessShaderCode(state); 457 ProcessShaderCode(state);
335 DebugUtils::DumpShader(shader_memory.data(), state.debug.max_offset, swizzle_data.data(), 458 DebugUtils::DumpShader(shader_memory.data(), state.debug.max_offset, swizzle_data.data(),
336 state.debug.max_opdesc_id, registers.vs_main_offset, 459 state.debug.max_opdesc_id, registers.vs_main_offset,