diff options
| author | 2015-07-26 04:56:51 -0700 | |
|---|---|---|
| committer | 2015-07-26 04:56:51 -0700 | |
| commit | edc8b11e25faa8414f32089814b984e0573fc479 (patch) | |
| tree | 6a142face25e1049de5b3d1d5e6201a859a7e5f3 /src | |
| parent | Merge pull request #993 from yuriks/update-nihstro (diff) | |
| parent | Videocore: Simplify variables in vertex shader interpreter (diff) | |
| download | yuzu-edc8b11e25faa8414f32089814b984e0573fc479.tar.gz yuzu-edc8b11e25faa8414f32089814b984e0573fc479.tar.xz yuzu-edc8b11e25faa8414f32089814b984e0573fc479.zip | |
Merge pull request #994 from yuriks/vsh-interpreter-opt
Optimisations and cleanups in the shader interpreter
Diffstat (limited to 'src')
| -rw-r--r-- | src/video_core/vertex_shader.cpp | 57 |
1 files changed, 27 insertions, 30 deletions
diff --git a/src/video_core/vertex_shader.cpp b/src/video_core/vertex_shader.cpp index b77503806..e73a1d365 100644 --- a/src/video_core/vertex_shader.cpp +++ b/src/video_core/vertex_shader.cpp | |||
| @@ -2,8 +2,7 @@ | |||
| 2 | // Licensed under GPLv2 or any later version | 2 | // Licensed under GPLv2 or any later version |
| 3 | // Refer to the license.txt file included. | 3 | // Refer to the license.txt file included. |
| 4 | 4 | ||
| 5 | #include <stack> | 5 | #include <boost/container/static_vector.hpp> |
| 6 | |||
| 7 | #include <boost/range/algorithm.hpp> | 6 | #include <boost/range/algorithm.hpp> |
| 8 | 7 | ||
| 9 | #include <common/file_util.h> | 8 | #include <common/file_util.h> |
| @@ -27,7 +26,7 @@ namespace Pica { | |||
| 27 | namespace VertexShader { | 26 | namespace VertexShader { |
| 28 | 27 | ||
| 29 | struct VertexShaderState { | 28 | struct VertexShaderState { |
| 30 | const u32* program_counter; | 29 | u32 program_counter; |
| 31 | 30 | ||
| 32 | const float24* input_register_table[16]; | 31 | const float24* input_register_table[16]; |
| 33 | Math::Vec4<float24> output_registers[16]; | 32 | Math::Vec4<float24> output_registers[16]; |
| @@ -53,7 +52,7 @@ struct VertexShaderState { | |||
| 53 | }; | 52 | }; |
| 54 | 53 | ||
| 55 | // TODO: Is there a maximal size for this? | 54 | // TODO: Is there a maximal size for this? |
| 56 | std::stack<CallStackElement> call_stack; | 55 | boost::container::static_vector<CallStackElement, 16> call_stack; |
| 57 | 56 | ||
| 58 | struct { | 57 | struct { |
| 59 | u32 max_offset; // maximum program counter ever reached | 58 | u32 max_offset; // maximum program counter ever reached |
| @@ -71,15 +70,15 @@ static void ProcessShaderCode(VertexShaderState& state) { | |||
| 71 | 70 | ||
| 72 | while (true) { | 71 | while (true) { |
| 73 | if (!state.call_stack.empty()) { | 72 | if (!state.call_stack.empty()) { |
| 74 | auto& top = state.call_stack.top(); | 73 | auto& top = state.call_stack.back(); |
| 75 | if (state.program_counter - program_code.data() == top.final_address) { | 74 | if (state.program_counter == top.final_address) { |
| 76 | state.address_registers[2] += top.loop_increment; | 75 | state.address_registers[2] += top.loop_increment; |
| 77 | 76 | ||
| 78 | if (top.repeat_counter-- == 0) { | 77 | if (top.repeat_counter-- == 0) { |
| 79 | state.program_counter = &program_code[top.return_address]; | 78 | state.program_counter = top.return_address; |
| 80 | state.call_stack.pop(); | 79 | state.call_stack.pop_back(); |
| 81 | } else { | 80 | } else { |
| 82 | state.program_counter = &program_code[top.loop_address]; | 81 | state.program_counter = top.loop_address; |
| 83 | } | 82 | } |
| 84 | 83 | ||
| 85 | // TODO: Is "trying again" accurate to hardware? | 84 | // TODO: Is "trying again" accurate to hardware? |
| @@ -88,17 +87,16 @@ static void ProcessShaderCode(VertexShaderState& state) { | |||
| 88 | } | 87 | } |
| 89 | 88 | ||
| 90 | bool exit_loop = false; | 89 | bool exit_loop = false; |
| 91 | const Instruction& instr = *(const Instruction*)state.program_counter; | 90 | const Instruction instr = { program_code[state.program_counter] }; |
| 92 | const SwizzlePattern& swizzle = *(SwizzlePattern*)&swizzle_data[instr.common.operand_desc_id]; | 91 | const SwizzlePattern swizzle = { swizzle_data[instr.common.operand_desc_id] }; |
| 93 | 92 | ||
| 94 | static auto call = [&program_code](VertexShaderState& state, u32 offset, u32 num_instructions, | 93 | static auto call = [](VertexShaderState& state, u32 offset, u32 num_instructions, |
| 95 | u32 return_offset, u8 repeat_count, u8 loop_increment) { | 94 | u32 return_offset, u8 repeat_count, u8 loop_increment) { |
| 96 | state.program_counter = &program_code[offset] - 1; // -1 to make sure when incrementing the PC we end up at the correct offset | 95 | state.program_counter = offset - 1; // -1 to make sure when incrementing the PC we end up at the correct offset |
| 97 | state.call_stack.push({ offset + num_instructions, return_offset, repeat_count, loop_increment, offset }); | 96 | ASSERT(state.call_stack.size() < state.call_stack.capacity()); |
| 97 | state.call_stack.push_back({ offset + num_instructions, return_offset, repeat_count, loop_increment, offset }); | ||
| 98 | }; | 98 | }; |
| 99 | u32 binary_offset = state.program_counter - program_code.data(); | 99 | state.debug.max_offset = std::max<u32>(state.debug.max_offset, 1 + state.program_counter); |
| 100 | |||
| 101 | state.debug.max_offset = std::max<u32>(state.debug.max_offset, 1 + binary_offset); | ||
| 102 | 100 | ||
| 103 | auto LookupSourceRegister = [&](const SourceRegister& source_reg) -> const float24* { | 101 | auto LookupSourceRegister = [&](const SourceRegister& source_reg) -> const float24* { |
| 104 | switch (source_reg.GetRegisterType()) { | 102 | switch (source_reg.GetRegisterType()) { |
| @@ -442,13 +440,13 @@ static void ProcessShaderCode(VertexShaderState& state) { | |||
| 442 | 440 | ||
| 443 | case OpCode::Id::JMPC: | 441 | case OpCode::Id::JMPC: |
| 444 | if (evaluate_condition(state, instr.flow_control.refx, instr.flow_control.refy, instr.flow_control)) { | 442 | if (evaluate_condition(state, instr.flow_control.refx, instr.flow_control.refy, instr.flow_control)) { |
| 445 | state.program_counter = &program_code[instr.flow_control.dest_offset] - 1; | 443 | state.program_counter = instr.flow_control.dest_offset - 1; |
| 446 | } | 444 | } |
| 447 | break; | 445 | break; |
| 448 | 446 | ||
| 449 | case OpCode::Id::JMPU: | 447 | case OpCode::Id::JMPU: |
| 450 | if (uniforms.b[instr.flow_control.bool_uniform_id]) { | 448 | if (uniforms.b[instr.flow_control.bool_uniform_id]) { |
| 451 | state.program_counter = &program_code[instr.flow_control.dest_offset] - 1; | 449 | state.program_counter = instr.flow_control.dest_offset - 1; |
| 452 | } | 450 | } |
| 453 | break; | 451 | break; |
| 454 | 452 | ||
| @@ -456,7 +454,7 @@ static void ProcessShaderCode(VertexShaderState& state) { | |||
| 456 | call(state, | 454 | call(state, |
| 457 | instr.flow_control.dest_offset, | 455 | instr.flow_control.dest_offset, |
| 458 | instr.flow_control.num_instructions, | 456 | instr.flow_control.num_instructions, |
| 459 | binary_offset + 1, 0, 0); | 457 | state.program_counter + 1, 0, 0); |
| 460 | break; | 458 | break; |
| 461 | 459 | ||
| 462 | case OpCode::Id::CALLU: | 460 | case OpCode::Id::CALLU: |
| @@ -464,7 +462,7 @@ static void ProcessShaderCode(VertexShaderState& state) { | |||
| 464 | call(state, | 462 | call(state, |
| 465 | instr.flow_control.dest_offset, | 463 | instr.flow_control.dest_offset, |
| 466 | instr.flow_control.num_instructions, | 464 | instr.flow_control.num_instructions, |
| 467 | binary_offset + 1, 0, 0); | 465 | state.program_counter + 1, 0, 0); |
| 468 | } | 466 | } |
| 469 | break; | 467 | break; |
| 470 | 468 | ||
| @@ -473,7 +471,7 @@ static void ProcessShaderCode(VertexShaderState& state) { | |||
| 473 | call(state, | 471 | call(state, |
| 474 | instr.flow_control.dest_offset, | 472 | instr.flow_control.dest_offset, |
| 475 | instr.flow_control.num_instructions, | 473 | instr.flow_control.num_instructions, |
| 476 | binary_offset + 1, 0, 0); | 474 | state.program_counter + 1, 0, 0); |
| 477 | } | 475 | } |
| 478 | break; | 476 | break; |
| 479 | 477 | ||
| @@ -483,8 +481,8 @@ static void ProcessShaderCode(VertexShaderState& state) { | |||
| 483 | case OpCode::Id::IFU: | 481 | case OpCode::Id::IFU: |
| 484 | if (uniforms.b[instr.flow_control.bool_uniform_id]) { | 482 | if (uniforms.b[instr.flow_control.bool_uniform_id]) { |
| 485 | call(state, | 483 | call(state, |
| 486 | binary_offset + 1, | 484 | state.program_counter + 1, |
| 487 | instr.flow_control.dest_offset - binary_offset - 1, | 485 | instr.flow_control.dest_offset - state.program_counter - 1, |
| 488 | instr.flow_control.dest_offset + instr.flow_control.num_instructions, 0, 0); | 486 | instr.flow_control.dest_offset + instr.flow_control.num_instructions, 0, 0); |
| 489 | } else { | 487 | } else { |
| 490 | call(state, | 488 | call(state, |
| @@ -501,8 +499,8 @@ static void ProcessShaderCode(VertexShaderState& state) { | |||
| 501 | 499 | ||
| 502 | if (evaluate_condition(state, instr.flow_control.refx, instr.flow_control.refy, instr.flow_control)) { | 500 | if (evaluate_condition(state, instr.flow_control.refx, instr.flow_control.refy, instr.flow_control)) { |
| 503 | call(state, | 501 | call(state, |
| 504 | binary_offset + 1, | 502 | state.program_counter + 1, |
| 505 | instr.flow_control.dest_offset - binary_offset - 1, | 503 | instr.flow_control.dest_offset - state.program_counter - 1, |
| 506 | instr.flow_control.dest_offset + instr.flow_control.num_instructions, 0, 0); | 504 | instr.flow_control.dest_offset + instr.flow_control.num_instructions, 0, 0); |
| 507 | } else { | 505 | } else { |
| 508 | call(state, | 506 | call(state, |
| @@ -519,8 +517,8 @@ static void ProcessShaderCode(VertexShaderState& state) { | |||
| 519 | state.address_registers[2] = uniforms.i[instr.flow_control.int_uniform_id].y; | 517 | state.address_registers[2] = uniforms.i[instr.flow_control.int_uniform_id].y; |
| 520 | 518 | ||
| 521 | call(state, | 519 | call(state, |
| 522 | binary_offset + 1, | 520 | state.program_counter + 1, |
| 523 | instr.flow_control.dest_offset - binary_offset + 1, | 521 | instr.flow_control.dest_offset - state.program_counter + 1, |
| 524 | instr.flow_control.dest_offset + 1, | 522 | instr.flow_control.dest_offset + 1, |
| 525 | uniforms.i[instr.flow_control.int_uniform_id].x, | 523 | uniforms.i[instr.flow_control.int_uniform_id].x, |
| 526 | uniforms.i[instr.flow_control.int_uniform_id].z); | 524 | uniforms.i[instr.flow_control.int_uniform_id].z); |
| @@ -551,8 +549,7 @@ OutputVertex RunShader(const InputVertex& input, int num_attributes, const Regs: | |||
| 551 | 549 | ||
| 552 | VertexShaderState state; | 550 | VertexShaderState state; |
| 553 | 551 | ||
| 554 | const u32* main = &setup.program_code[config.main_offset]; | 552 | state.program_counter = config.main_offset; |
| 555 | state.program_counter = (u32*)main; | ||
| 556 | state.debug.max_offset = 0; | 553 | state.debug.max_offset = 0; |
| 557 | state.debug.max_opdesc_id = 0; | 554 | state.debug.max_opdesc_id = 0; |
| 558 | 555 | ||