diff options
| author | 2015-05-22 23:27:41 -0400 | |
|---|---|---|
| committer | 2015-05-22 23:27:41 -0400 | |
| commit | a7946f9027a87da93cd76ec46d54cadf4203b082 (patch) | |
| tree | 01e98e937a3ed9d8c65b270df016658f8ae89a97 /src/video_core/vertex_shader.cpp | |
| parent | Merge pull request #801 from purpasmart96/hid_stubs (diff) | |
| parent | Pica: Create 'State' structure and move state memory there. (diff) | |
| download | yuzu-a7946f9027a87da93cd76ec46d54cadf4203b082.tar.gz yuzu-a7946f9027a87da93cd76ec46d54cadf4203b082.tar.xz yuzu-a7946f9027a87da93cd76ec46d54cadf4203b082.zip | |
Merge pull request #776 from bunnei/pica-state
GPU: Consolidate Pica state
Diffstat (limited to 'src/video_core/vertex_shader.cpp')
| -rw-r--r-- | src/video_core/vertex_shader.cpp | 96 |
1 files changed, 27 insertions, 69 deletions
diff --git a/src/video_core/vertex_shader.cpp b/src/video_core/vertex_shader.cpp index 981d1a356..7d68998f1 100644 --- a/src/video_core/vertex_shader.cpp +++ b/src/video_core/vertex_shader.cpp | |||
| @@ -26,55 +26,8 @@ namespace Pica { | |||
| 26 | 26 | ||
| 27 | namespace VertexShader { | 27 | namespace VertexShader { |
| 28 | 28 | ||
| 29 | static struct { | ||
| 30 | Math::Vec4<float24> f[96]; | ||
| 31 | |||
| 32 | std::array<bool,16> b; | ||
| 33 | |||
| 34 | std::array<Math::Vec4<u8>,4> i; | ||
| 35 | } shader_uniforms; | ||
| 36 | |||
| 37 | static Math::Vec4<float24> vs_default_attributes[16]; | ||
| 38 | |||
| 39 | // TODO: Not sure where the shader binary and swizzle patterns are supposed to be loaded to! | ||
| 40 | // For now, we just keep these local arrays around. | ||
| 41 | static std::array<u32, 1024> shader_memory; | ||
| 42 | static std::array<u32, 1024> swizzle_data; | ||
| 43 | |||
| 44 | void SubmitShaderMemoryChange(u32 addr, u32 value) { | ||
| 45 | shader_memory[addr] = value; | ||
| 46 | } | ||
| 47 | |||
| 48 | void SubmitSwizzleDataChange(u32 addr, u32 value) { | ||
| 49 | swizzle_data[addr] = value; | ||
| 50 | } | ||
| 51 | |||
| 52 | Math::Vec4<float24>& GetFloatUniform(u32 index) { | ||
| 53 | return shader_uniforms.f[index]; | ||
| 54 | } | ||
| 55 | |||
| 56 | bool& GetBoolUniform(u32 index) { | ||
| 57 | return shader_uniforms.b[index]; | ||
| 58 | } | ||
| 59 | |||
| 60 | Math::Vec4<u8>& GetIntUniform(u32 index) { | ||
| 61 | return shader_uniforms.i[index]; | ||
| 62 | } | ||
| 63 | |||
| 64 | Math::Vec4<float24>& GetDefaultAttribute(u32 index) { | ||
| 65 | return vs_default_attributes[index]; | ||
| 66 | } | ||
| 67 | |||
| 68 | const std::array<u32, 1024>& GetShaderBinary() { | ||
| 69 | return shader_memory; | ||
| 70 | } | ||
| 71 | |||
| 72 | const std::array<u32, 1024>& GetSwizzlePatterns() { | ||
| 73 | return swizzle_data; | ||
| 74 | } | ||
| 75 | |||
| 76 | struct VertexShaderState { | 29 | struct VertexShaderState { |
| 77 | u32* program_counter; | 30 | const u32* program_counter; |
| 78 | 31 | ||
| 79 | const float24* input_register_table[16]; | 32 | const float24* input_register_table[16]; |
| 80 | Math::Vec4<float24> output_registers[16]; | 33 | Math::Vec4<float24> output_registers[16]; |
| @@ -109,6 +62,9 @@ struct VertexShaderState { | |||
| 109 | }; | 62 | }; |
| 110 | 63 | ||
| 111 | static void ProcessShaderCode(VertexShaderState& state) { | 64 | static void ProcessShaderCode(VertexShaderState& state) { |
| 65 | const auto& uniforms = g_state.vs.uniforms; | ||
| 66 | const auto& swizzle_data = g_state.vs.swizzle_data; | ||
| 67 | const auto& program_code = g_state.vs.program_code; | ||
| 112 | 68 | ||
| 113 | // Placeholder for invalid inputs | 69 | // Placeholder for invalid inputs |
| 114 | static float24 dummy_vec4_float24[4]; | 70 | static float24 dummy_vec4_float24[4]; |
| @@ -116,14 +72,14 @@ static void ProcessShaderCode(VertexShaderState& state) { | |||
| 116 | while (true) { | 72 | while (true) { |
| 117 | if (!state.call_stack.empty()) { | 73 | if (!state.call_stack.empty()) { |
| 118 | auto& top = state.call_stack.top(); | 74 | auto& top = state.call_stack.top(); |
| 119 | if (state.program_counter - shader_memory.data() == top.final_address) { | 75 | if (state.program_counter - program_code.data() == top.final_address) { |
| 120 | state.address_registers[2] += top.loop_increment; | 76 | state.address_registers[2] += top.loop_increment; |
| 121 | 77 | ||
| 122 | if (top.repeat_counter-- == 0) { | 78 | if (top.repeat_counter-- == 0) { |
| 123 | state.program_counter = &shader_memory[top.return_address]; | 79 | state.program_counter = &program_code[top.return_address]; |
| 124 | state.call_stack.pop(); | 80 | state.call_stack.pop(); |
| 125 | } else { | 81 | } else { |
| 126 | state.program_counter = &shader_memory[top.loop_address]; | 82 | state.program_counter = &program_code[top.loop_address]; |
| 127 | } | 83 | } |
| 128 | 84 | ||
| 129 | // TODO: Is "trying again" accurate to hardware? | 85 | // TODO: Is "trying again" accurate to hardware? |
| @@ -135,12 +91,12 @@ static void ProcessShaderCode(VertexShaderState& state) { | |||
| 135 | const Instruction& instr = *(const Instruction*)state.program_counter; | 91 | const Instruction& instr = *(const Instruction*)state.program_counter; |
| 136 | const SwizzlePattern& swizzle = *(SwizzlePattern*)&swizzle_data[instr.common.operand_desc_id]; | 92 | const SwizzlePattern& swizzle = *(SwizzlePattern*)&swizzle_data[instr.common.operand_desc_id]; |
| 137 | 93 | ||
| 138 | static auto call = [](VertexShaderState& state, u32 offset, u32 num_instructions, | 94 | static auto call = [&program_code](VertexShaderState& state, u32 offset, u32 num_instructions, |
| 139 | u32 return_offset, u8 repeat_count, u8 loop_increment) { | 95 | u32 return_offset, u8 repeat_count, u8 loop_increment) { |
| 140 | state.program_counter = &shader_memory[offset] - 1; // -1 to make sure when incrementing the PC we end up at the correct offset | 96 | state.program_counter = &program_code[offset] - 1; // -1 to make sure when incrementing the PC we end up at the correct offset |
| 141 | state.call_stack.push({ offset + num_instructions, return_offset, repeat_count, loop_increment, offset }); | 97 | state.call_stack.push({ offset + num_instructions, return_offset, repeat_count, loop_increment, offset }); |
| 142 | }; | 98 | }; |
| 143 | u32 binary_offset = state.program_counter - shader_memory.data(); | 99 | u32 binary_offset = state.program_counter - program_code.data(); |
| 144 | 100 | ||
| 145 | state.debug.max_offset = std::max<u32>(state.debug.max_offset, 1 + binary_offset); | 101 | state.debug.max_offset = std::max<u32>(state.debug.max_offset, 1 + binary_offset); |
| 146 | 102 | ||
| @@ -153,7 +109,7 @@ static void ProcessShaderCode(VertexShaderState& state) { | |||
| 153 | return &state.temporary_registers[source_reg.GetIndex()].x; | 109 | return &state.temporary_registers[source_reg.GetIndex()].x; |
| 154 | 110 | ||
| 155 | case RegisterType::FloatUniform: | 111 | case RegisterType::FloatUniform: |
| 156 | return &shader_uniforms.f[source_reg.GetIndex()].x; | 112 | return &uniforms.f[source_reg.GetIndex()].x; |
| 157 | 113 | ||
| 158 | default: | 114 | default: |
| 159 | return dummy_vec4_float24; | 115 | return dummy_vec4_float24; |
| @@ -471,13 +427,13 @@ static void ProcessShaderCode(VertexShaderState& state) { | |||
| 471 | 427 | ||
| 472 | case OpCode::Id::JMPC: | 428 | case OpCode::Id::JMPC: |
| 473 | if (evaluate_condition(state, instr.flow_control.refx, instr.flow_control.refy, instr.flow_control)) { | 429 | if (evaluate_condition(state, instr.flow_control.refx, instr.flow_control.refy, instr.flow_control)) { |
| 474 | state.program_counter = &shader_memory[instr.flow_control.dest_offset] - 1; | 430 | state.program_counter = &program_code[instr.flow_control.dest_offset] - 1; |
| 475 | } | 431 | } |
| 476 | break; | 432 | break; |
| 477 | 433 | ||
| 478 | case OpCode::Id::JMPU: | 434 | case OpCode::Id::JMPU: |
| 479 | if (shader_uniforms.b[instr.flow_control.bool_uniform_id]) { | 435 | if (uniforms.b[instr.flow_control.bool_uniform_id]) { |
| 480 | state.program_counter = &shader_memory[instr.flow_control.dest_offset] - 1; | 436 | state.program_counter = &program_code[instr.flow_control.dest_offset] - 1; |
| 481 | } | 437 | } |
| 482 | break; | 438 | break; |
| 483 | 439 | ||
| @@ -489,7 +445,7 @@ static void ProcessShaderCode(VertexShaderState& state) { | |||
| 489 | break; | 445 | break; |
| 490 | 446 | ||
| 491 | case OpCode::Id::CALLU: | 447 | case OpCode::Id::CALLU: |
| 492 | if (shader_uniforms.b[instr.flow_control.bool_uniform_id]) { | 448 | if (uniforms.b[instr.flow_control.bool_uniform_id]) { |
| 493 | call(state, | 449 | call(state, |
| 494 | instr.flow_control.dest_offset, | 450 | instr.flow_control.dest_offset, |
| 495 | instr.flow_control.num_instructions, | 451 | instr.flow_control.num_instructions, |
| @@ -510,7 +466,7 @@ static void ProcessShaderCode(VertexShaderState& state) { | |||
| 510 | break; | 466 | break; |
| 511 | 467 | ||
| 512 | case OpCode::Id::IFU: | 468 | case OpCode::Id::IFU: |
| 513 | if (shader_uniforms.b[instr.flow_control.bool_uniform_id]) { | 469 | if (uniforms.b[instr.flow_control.bool_uniform_id]) { |
| 514 | call(state, | 470 | call(state, |
| 515 | binary_offset + 1, | 471 | binary_offset + 1, |
| 516 | instr.flow_control.dest_offset - binary_offset - 1, | 472 | instr.flow_control.dest_offset - binary_offset - 1, |
| @@ -545,14 +501,14 @@ static void ProcessShaderCode(VertexShaderState& state) { | |||
| 545 | 501 | ||
| 546 | case OpCode::Id::LOOP: | 502 | case OpCode::Id::LOOP: |
| 547 | { | 503 | { |
| 548 | state.address_registers[2] = shader_uniforms.i[instr.flow_control.int_uniform_id].y; | 504 | state.address_registers[2] = uniforms.i[instr.flow_control.int_uniform_id].y; |
| 549 | 505 | ||
| 550 | call(state, | 506 | call(state, |
| 551 | binary_offset + 1, | 507 | binary_offset + 1, |
| 552 | instr.flow_control.dest_offset - binary_offset + 1, | 508 | instr.flow_control.dest_offset - binary_offset + 1, |
| 553 | instr.flow_control.dest_offset + 1, | 509 | instr.flow_control.dest_offset + 1, |
| 554 | shader_uniforms.i[instr.flow_control.int_uniform_id].x, | 510 | uniforms.i[instr.flow_control.int_uniform_id].x, |
| 555 | shader_uniforms.i[instr.flow_control.int_uniform_id].z); | 511 | uniforms.i[instr.flow_control.int_uniform_id].z); |
| 556 | break; | 512 | break; |
| 557 | } | 513 | } |
| 558 | 514 | ||
| @@ -578,15 +534,17 @@ static Common::Profiling::TimingCategory shader_category("Vertex Shader"); | |||
| 578 | OutputVertex RunShader(const InputVertex& input, int num_attributes) { | 534 | OutputVertex RunShader(const InputVertex& input, int num_attributes) { |
| 579 | Common::Profiling::ScopeTimer timer(shader_category); | 535 | Common::Profiling::ScopeTimer timer(shader_category); |
| 580 | 536 | ||
| 537 | const auto& regs = g_state.regs; | ||
| 538 | const auto& vs = g_state.vs; | ||
| 581 | VertexShaderState state; | 539 | VertexShaderState state; |
| 582 | 540 | ||
| 583 | const u32* main = &shader_memory[registers.vs_main_offset]; | 541 | const u32* main = &vs.program_code[regs.vs_main_offset]; |
| 584 | state.program_counter = (u32*)main; | 542 | state.program_counter = (u32*)main; |
| 585 | state.debug.max_offset = 0; | 543 | state.debug.max_offset = 0; |
| 586 | state.debug.max_opdesc_id = 0; | 544 | state.debug.max_opdesc_id = 0; |
| 587 | 545 | ||
| 588 | // Setup input register table | 546 | // Setup input register table |
| 589 | const auto& attribute_register_map = registers.vs_input_register_map; | 547 | const auto& attribute_register_map = regs.vs_input_register_map; |
| 590 | float24 dummy_register; | 548 | float24 dummy_register; |
| 591 | boost::fill(state.input_register_table, &dummy_register); | 549 | boost::fill(state.input_register_table, &dummy_register); |
| 592 | 550 | ||
| @@ -611,16 +569,16 @@ OutputVertex RunShader(const InputVertex& input, int num_attributes) { | |||
| 611 | state.conditional_code[1] = false; | 569 | state.conditional_code[1] = false; |
| 612 | 570 | ||
| 613 | ProcessShaderCode(state); | 571 | ProcessShaderCode(state); |
| 614 | DebugUtils::DumpShader(shader_memory.data(), state.debug.max_offset, swizzle_data.data(), | 572 | DebugUtils::DumpShader(vs.program_code.data(), state.debug.max_offset, vs.swizzle_data.data(), |
| 615 | state.debug.max_opdesc_id, registers.vs_main_offset, | 573 | state.debug.max_opdesc_id, regs.vs_main_offset, |
| 616 | registers.vs_output_attributes); | 574 | regs.vs_output_attributes); |
| 617 | 575 | ||
| 618 | // Setup output data | 576 | // Setup output data |
| 619 | OutputVertex ret; | 577 | OutputVertex ret; |
| 620 | // TODO(neobrain): Under some circumstances, up to 16 attributes may be output. We need to | 578 | // TODO(neobrain): Under some circumstances, up to 16 attributes may be output. We need to |
| 621 | // figure out what those circumstances are and enable the remaining outputs then. | 579 | // figure out what those circumstances are and enable the remaining outputs then. |
| 622 | for (int i = 0; i < 7; ++i) { | 580 | for (int i = 0; i < 7; ++i) { |
| 623 | const auto& output_register_map = registers.vs_output_attributes[i]; | 581 | const auto& output_register_map = regs.vs_output_attributes[i]; |
| 624 | 582 | ||
| 625 | u32 semantics[4] = { | 583 | u32 semantics[4] = { |
| 626 | output_register_map.map_x, output_register_map.map_y, | 584 | output_register_map.map_x, output_register_map.map_y, |