diff options
Diffstat (limited to 'src')
| -rw-r--r-- | src/video_core/pica.h | 8 | ||||
| -rw-r--r-- | src/video_core/vertex_shader.cpp | 137 |
2 files changed, 138 insertions, 7 deletions
diff --git a/src/video_core/pica.h b/src/video_core/pica.h index 583614328..87a9e7913 100644 --- a/src/video_core/pica.h +++ b/src/video_core/pica.h | |||
| @@ -771,6 +771,14 @@ struct float24 { | |||
| 771 | return ToFloat32() <= flt.ToFloat32(); | 771 | return ToFloat32() <= flt.ToFloat32(); |
| 772 | } | 772 | } |
| 773 | 773 | ||
| 774 | bool operator == (const float24& flt) const { | ||
| 775 | return ToFloat32() == flt.ToFloat32(); | ||
| 776 | } | ||
| 777 | |||
| 778 | bool operator != (const float24& flt) const { | ||
| 779 | return ToFloat32() != flt.ToFloat32(); | ||
| 780 | } | ||
| 781 | |||
| 774 | private: | 782 | private: |
| 775 | // Stored as a regular float, merely for convenience | 783 | // Stored as a regular float, merely for convenience |
| 776 | // TODO: Perform proper arithmetic on this! | 784 | // TODO: Perform proper arithmetic on this! |
diff --git a/src/video_core/vertex_shader.cpp b/src/video_core/vertex_shader.cpp index 33a862b74..5d9203c86 100644 --- a/src/video_core/vertex_shader.cpp +++ b/src/video_core/vertex_shader.cpp | |||
| @@ -59,6 +59,8 @@ const std::array<u32, 1024>& GetSwizzlePatterns() | |||
| 59 | return swizzle_data; | 59 | return swizzle_data; |
| 60 | } | 60 | } |
| 61 | 61 | ||
| 62 | // TODO: Is there actually a limit on hardware? | ||
| 63 | const int if_stack_size = 8; | ||
| 62 | 64 | ||
| 63 | struct VertexShaderState { | 65 | struct VertexShaderState { |
| 64 | u32* program_counter; | 66 | u32* program_counter; |
| @@ -67,7 +69,11 @@ struct VertexShaderState { | |||
| 67 | float24* output_register_table[7*4]; | 69 | float24* output_register_table[7*4]; |
| 68 | 70 | ||
| 69 | Math::Vec4<float24> temporary_registers[16]; | 71 | Math::Vec4<float24> temporary_registers[16]; |
| 70 | bool status_registers[2]; | 72 | bool conditional_code[2]; |
| 73 | |||
| 74 | // Two Address registers and one loop counter | ||
| 75 | // TODO: How many bits do these actually have? | ||
| 76 | s32 address_registers[3]; | ||
| 71 | 77 | ||
| 72 | enum { | 78 | enum { |
| 73 | INVALID_ADDRESS = 0xFFFFFFFF | 79 | INVALID_ADDRESS = 0xFFFFFFFF |
| @@ -75,6 +81,12 @@ struct VertexShaderState { | |||
| 75 | u32 call_stack[8]; // TODO: What is the maximal call stack depth? | 81 | u32 call_stack[8]; // TODO: What is the maximal call stack depth? |
| 76 | u32* call_stack_pointer; | 82 | u32* call_stack_pointer; |
| 77 | 83 | ||
| 84 | struct IfStackElement { | ||
| 85 | u32 else_addr; | ||
| 86 | u32 else_instructions; | ||
| 87 | } if_stack[if_stack_size]; | ||
| 88 | IfStackElement* if_stack_pointer; | ||
| 89 | |||
| 78 | struct { | 90 | struct { |
| 79 | u32 max_offset; // maximum program counter ever reached | 91 | u32 max_offset; // maximum program counter ever reached |
| 80 | u32 max_opdesc_id; // maximum swizzle pattern index ever used | 92 | u32 max_opdesc_id; // maximum swizzle pattern index ever used |
| @@ -107,11 +119,20 @@ static void ProcessShaderCode(VertexShaderState& state) { | |||
| 107 | case Instruction::OpCodeType::Arithmetic: | 119 | case Instruction::OpCodeType::Arithmetic: |
| 108 | { | 120 | { |
| 109 | bool is_inverted = 0 != (instr.opcode.GetInfo().subtype & Instruction::OpCodeInfo::SrcInversed); | 121 | bool is_inverted = 0 != (instr.opcode.GetInfo().subtype & Instruction::OpCodeInfo::SrcInversed); |
| 110 | const float24* src1_ = LookupSourceRegister(instr.common.GetSrc1(is_inverted)); | 122 | if (is_inverted) { |
| 123 | // We don't really support this properly and/or reliably | ||
| 124 | LOG_ERROR(HW_GPU, "Bad condition..."); | ||
| 125 | exit(0); | ||
| 126 | } | ||
| 127 | |||
| 128 | const int address_offset = (instr.common.address_register_index == 0) | ||
| 129 | ? 0 : state.address_registers[instr.common.address_register_index - 1]; | ||
| 130 | |||
| 131 | const float24* src1_ = LookupSourceRegister(instr.common.GetSrc1(is_inverted) + address_offset); | ||
| 111 | const float24* src2_ = LookupSourceRegister(instr.common.GetSrc2(is_inverted)); | 132 | const float24* src2_ = LookupSourceRegister(instr.common.GetSrc2(is_inverted)); |
| 112 | 133 | ||
| 113 | const bool negate_src1 = (swizzle.negate_src1 != 0); | 134 | const bool negate_src1 = (swizzle.negate_src1 != false); |
| 114 | const bool negate_src2 = (swizzle.negate_src2 != 0); | 135 | const bool negate_src2 = (swizzle.negate_src2 != false); |
| 115 | 136 | ||
| 116 | float24 src1[4] = { | 137 | float24 src1[4] = { |
| 117 | src1_[(int)swizzle.GetSelectorSrc1(0)], | 138 | src1_[(int)swizzle.GetSelectorSrc1(0)], |
| @@ -217,6 +238,19 @@ static void ProcessShaderCode(VertexShaderState& state) { | |||
| 217 | break; | 238 | break; |
| 218 | } | 239 | } |
| 219 | 240 | ||
| 241 | case Instruction::OpCode::MOVA: | ||
| 242 | { | ||
| 243 | for (int i = 0; i < 2; ++i) { | ||
| 244 | if (!swizzle.DestComponentEnabled(i)) | ||
| 245 | continue; | ||
| 246 | |||
| 247 | // TODO: Figure out how the rounding is done on hardware | ||
| 248 | state.address_registers[i] = static_cast<s32>(src1[i].ToFloat32()); | ||
| 249 | } | ||
| 250 | |||
| 251 | break; | ||
| 252 | } | ||
| 253 | |||
| 220 | case Instruction::OpCode::MOV: | 254 | case Instruction::OpCode::MOV: |
| 221 | { | 255 | { |
| 222 | for (int i = 0; i < 4; ++i) { | 256 | for (int i = 0; i < 4; ++i) { |
| @@ -228,16 +262,56 @@ static void ProcessShaderCode(VertexShaderState& state) { | |||
| 228 | break; | 262 | break; |
| 229 | } | 263 | } |
| 230 | 264 | ||
| 265 | case Instruction::OpCode::CMP: | ||
| 266 | for (int i = 0; i < 2; ++i) { | ||
| 267 | // TODO: Can you restrict to one compare via dest masking? | ||
| 268 | |||
| 269 | auto compare_op = instr.common.compare_op; | ||
| 270 | auto op = (i == 0) ? compare_op.x.Value() : compare_op.y.Value(); | ||
| 271 | |||
| 272 | switch (op) { | ||
| 273 | case compare_op.Equal: | ||
| 274 | state.conditional_code[i] = (src1[i] == src2[i]); | ||
| 275 | break; | ||
| 276 | |||
| 277 | case compare_op.NotEqual: | ||
| 278 | state.conditional_code[i] = (src1[i] != src2[i]); | ||
| 279 | break; | ||
| 280 | |||
| 281 | case compare_op.LessThan: | ||
| 282 | state.conditional_code[i] = (src1[i] < src2[i]); | ||
| 283 | break; | ||
| 284 | |||
| 285 | case compare_op.LessEqual: | ||
| 286 | state.conditional_code[i] = (src1[i] <= src2[i]); | ||
| 287 | break; | ||
| 288 | |||
| 289 | case compare_op.GreaterThan: | ||
| 290 | state.conditional_code[i] = (src1[i] > src2[i]); | ||
| 291 | break; | ||
| 292 | |||
| 293 | case compare_op.GreaterEqual: | ||
| 294 | state.conditional_code[i] = (src1[i] >= src2[i]); | ||
| 295 | break; | ||
| 296 | |||
| 297 | default: | ||
| 298 | LOG_ERROR(HW_GPU, "Unknown compare mode %x", static_cast<int>(op)); | ||
| 299 | break; | ||
| 300 | } | ||
| 301 | } | ||
| 302 | break; | ||
| 303 | |||
| 231 | default: | 304 | default: |
| 232 | LOG_ERROR(HW_GPU, "Unhandled arithmetic instruction: 0x%02x (%s): 0x%08x", | 305 | LOG_ERROR(HW_GPU, "Unhandled arithmetic instruction: 0x%02x (%s): 0x%08x", |
| 233 | (int)instr.opcode.Value(), instr.opcode.GetInfo().name, instr.hex); | 306 | (int)instr.opcode.Value(), instr.opcode.GetInfo().name, instr.hex); |
| 307 | _dbg_assert_(HW_GPU, 0); | ||
| 234 | break; | 308 | break; |
| 235 | } | 309 | } |
| 236 | 310 | ||
| 237 | break; | 311 | break; |
| 238 | } | 312 | } |
| 239 | default: | 313 | default: |
| 240 | // Process instruction explicitly | 314 | // Handle each instruction on its own |
| 241 | switch (instr.opcode) { | 315 | switch (instr.opcode) { |
| 242 | // NOP is currently used as a heuristic for leaving from a function. | 316 | // NOP is currently used as a heuristic for leaving from a function. |
| 243 | // TODO: This is completely incorrect. | 317 | // TODO: This is completely incorrect. |
| @@ -265,6 +339,44 @@ static void ProcessShaderCode(VertexShaderState& state) { | |||
| 265 | // TODO | 339 | // TODO |
| 266 | break; | 340 | break; |
| 267 | 341 | ||
| 342 | case Instruction::OpCode::IFC: | ||
| 343 | { | ||
| 344 | // TODO: Do we need to consider swizzlers here? | ||
| 345 | |||
| 346 | auto flow_control = instr.flow_control; | ||
| 347 | bool results[3] = { flow_control.refx == state.conditional_code[0], | ||
| 348 | flow_control.refy == state.conditional_code[1] }; | ||
| 349 | |||
| 350 | switch (flow_control.op) { | ||
| 351 | case flow_control.Or: | ||
| 352 | results[2] = results[0] || results[1]; | ||
| 353 | break; | ||
| 354 | |||
| 355 | case flow_control.And: | ||
| 356 | results[2] = results[0] && results[1]; | ||
| 357 | break; | ||
| 358 | |||
| 359 | case flow_control.JustX: | ||
| 360 | results[2] = results[0]; | ||
| 361 | break; | ||
| 362 | |||
| 363 | case flow_control.JustY: | ||
| 364 | results[2] = results[1]; | ||
| 365 | break; | ||
| 366 | } | ||
| 367 | |||
| 368 | if (results[2]) { | ||
| 369 | ++state.if_stack_pointer; | ||
| 370 | |||
| 371 | state.if_stack_pointer->else_addr = instr.flow_control.dest_offset; | ||
| 372 | state.if_stack_pointer->else_instructions = instr.flow_control.num_instructions; | ||
| 373 | } else { | ||
| 374 | state.program_counter = &shader_memory[instr.flow_control.dest_offset] - 1; | ||
| 375 | } | ||
| 376 | |||
| 377 | break; | ||
| 378 | } | ||
| 379 | |||
| 268 | default: | 380 | default: |
| 269 | LOG_ERROR(HW_GPU, "Unhandled instruction: 0x%02x (%s): 0x%08x", | 381 | LOG_ERROR(HW_GPU, "Unhandled instruction: 0x%02x (%s): 0x%08x", |
| 270 | (int)instr.opcode.Value(), instr.opcode.GetInfo().name, instr.hex); | 382 | (int)instr.opcode.Value(), instr.opcode.GetInfo().name, instr.hex); |
| @@ -277,6 +389,13 @@ static void ProcessShaderCode(VertexShaderState& state) { | |||
| 277 | if (increment_pc) | 389 | if (increment_pc) |
| 278 | ++state.program_counter; | 390 | ++state.program_counter; |
| 279 | 391 | ||
| 392 | if (state.if_stack_pointer >= &state.if_stack[0]) { | ||
| 393 | if (state.program_counter - shader_memory.data() == state.if_stack_pointer->else_addr) { | ||
| 394 | state.program_counter += state.if_stack_pointer->else_instructions; | ||
| 395 | state.if_stack_pointer--; | ||
| 396 | } | ||
| 397 | } | ||
| 398 | |||
| 280 | if (exit_loop) | 399 | if (exit_loop) |
| 281 | break; | 400 | break; |
| 282 | } | 401 | } |
| @@ -326,11 +445,15 @@ OutputVertex RunShader(const InputVertex& input, int num_attributes) | |||
| 326 | state.output_register_table[4*i+comp] = ((float24*)&ret) + semantics[comp]; | 445 | state.output_register_table[4*i+comp] = ((float24*)&ret) + semantics[comp]; |
| 327 | } | 446 | } |
| 328 | 447 | ||
| 329 | state.status_registers[0] = false; | 448 | state.conditional_code[0] = false; |
| 330 | state.status_registers[1] = false; | 449 | state.conditional_code[1] = false; |
| 331 | boost::fill(state.call_stack, VertexShaderState::INVALID_ADDRESS); | 450 | boost::fill(state.call_stack, VertexShaderState::INVALID_ADDRESS); |
| 332 | state.call_stack_pointer = &state.call_stack[0]; | 451 | state.call_stack_pointer = &state.call_stack[0]; |
| 333 | 452 | ||
| 453 | std::fill(state.if_stack, state.if_stack + sizeof(state.if_stack) / sizeof(state.if_stack[0]), | ||
| 454 | VertexShaderState::IfStackElement{VertexShaderState::INVALID_ADDRESS, VertexShaderState::INVALID_ADDRESS}); | ||
| 455 | state.if_stack_pointer = state.if_stack - 1; // Meh. TODO: Make this less ugly | ||
| 456 | |||
| 334 | ProcessShaderCode(state); | 457 | ProcessShaderCode(state); |
| 335 | DebugUtils::DumpShader(shader_memory.data(), state.debug.max_offset, swizzle_data.data(), | 458 | DebugUtils::DumpShader(shader_memory.data(), state.debug.max_offset, swizzle_data.data(), |
| 336 | state.debug.max_opdesc_id, registers.vs_main_offset, | 459 | state.debug.max_opdesc_id, registers.vs_main_offset, |