diff options
| author | 2016-03-17 19:45:09 -0400 | |
|---|---|---|
| committer | 2016-04-13 23:04:44 -0400 | |
| commit | 4632791a40f8ec5af7e166ff90fd4f8cd69b2745 (patch) | |
| tree | 92c664c183dece433e59713b49895076552f749a /src | |
| parent | shader_jit_x64: Fix strict memory aliasing issues. (diff) | |
| download | yuzu-4632791a40f8ec5af7e166ff90fd4f8cd69b2745.tar.gz yuzu-4632791a40f8ec5af7e166ff90fd4f8cd69b2745.tar.xz yuzu-4632791a40f8ec5af7e166ff90fd4f8cd69b2745.zip | |
shader_jit_x64: Rewrite flow control to support arbitrary CALL and JMP instructions.
Diffstat (limited to 'src')
| -rw-r--r-- | src/video_core/shader/shader_jit_x64.cpp | 122 | ||||
| -rw-r--r-- | src/video_core/shader/shader_jit_x64.h | 32 |
2 files changed, 119 insertions, 35 deletions
diff --git a/src/video_core/shader/shader_jit_x64.cpp b/src/video_core/shader/shader_jit_x64.cpp index d74b58d84..c798992ec 100644 --- a/src/video_core/shader/shader_jit_x64.cpp +++ b/src/video_core/shader/shader_jit_x64.cpp | |||
| @@ -138,6 +138,15 @@ static const u8 NO_SRC_REG_SWIZZLE = 0x1b; | |||
| 138 | static const u8 NO_DEST_REG_MASK = 0xf; | 138 | static const u8 NO_DEST_REG_MASK = 0xf; |
| 139 | 139 | ||
| 140 | /** | 140 | /** |
| 141 | * Get the vertex shader instruction for a given offset in the current shader program | ||
| 142 | * @param offset Offset in the current shader program of the instruction | ||
| 143 | * @return Instruction at the specified offset | ||
| 144 | */ | ||
| 145 | static Instruction GetVertexShaderInstruction(size_t offset) { | ||
| 146 | return { g_state.vs.program_code[offset] }; | ||
| 147 | } | ||
| 148 | |||
| 149 | /** | ||
| 141 | * Loads and swizzles a source register into the specified XMM register. | 150 | * Loads and swizzles a source register into the specified XMM register. |
| 142 | * @param instr VS instruction, used for determining how to load the source register | 151 | * @param instr VS instruction, used for determining how to load the source register |
| 143 | * @param src_num Number indicating which source register to load (1 = src1, 2 = src2, 3 = src3) | 152 | * @param src_num Number indicating which source register to load (1 = src1, 2 = src2, 3 = src3) |
| @@ -564,10 +573,23 @@ void JitCompiler::Compile_END(Instruction instr) { | |||
| 564 | } | 573 | } |
| 565 | 574 | ||
| 566 | void JitCompiler::Compile_CALL(Instruction instr) { | 575 | void JitCompiler::Compile_CALL(Instruction instr) { |
| 567 | unsigned offset = instr.flow_control.dest_offset; | 576 | // Need to advance the return address past the proceeding instructions, this is the number of bytes to skip |
| 568 | while (offset < (instr.flow_control.dest_offset + instr.flow_control.num_instructions)) { | 577 | constexpr unsigned SKIP = 21; |
| 569 | Compile_NextInstr(&offset); | 578 | const uintptr_t start = reinterpret_cast<uintptr_t>(GetCodePtr()); |
| 570 | } | 579 | |
| 580 | // Push return address - not using CALL because we also want to push the offset of the return before jumping | ||
| 581 | MOV(64, R(RAX), ImmPtr(GetCodePtr() + SKIP)); | ||
| 582 | PUSH(RAX); | ||
| 583 | |||
| 584 | // Push offset of the return | ||
| 585 | PUSH(32, Imm32(instr.flow_control.dest_offset + instr.flow_control.num_instructions)); | ||
| 586 | |||
| 587 | // Jump | ||
| 588 | FixupBranch b = J(true); | ||
| 589 | fixup_branches.push_back({ b, instr.flow_control.dest_offset }); | ||
| 590 | |||
| 591 | // Make sure that if the above code changes, SKIP gets updated | ||
| 592 | ASSERT(reinterpret_cast<uintptr_t>(GetCodePtr()) - start == SKIP); | ||
| 571 | } | 593 | } |
| 572 | 594 | ||
| 573 | void JitCompiler::Compile_CALLC(Instruction instr) { | 595 | void JitCompiler::Compile_CALLC(Instruction instr) { |
| @@ -645,8 +667,8 @@ void JitCompiler::Compile_MAD(Instruction instr) { | |||
| 645 | } | 667 | } |
| 646 | 668 | ||
| 647 | void JitCompiler::Compile_IF(Instruction instr) { | 669 | void JitCompiler::Compile_IF(Instruction instr) { |
| 648 | ASSERT_MSG(instr.flow_control.dest_offset > *offset_ptr, "Backwards if-statements (%d -> %d) not supported", | 670 | ASSERT_MSG(instr.flow_control.dest_offset > last_program_counter, "Backwards if-statements (%d -> %d) not supported", |
| 649 | *offset_ptr, instr.flow_control.dest_offset.Value()); | 671 | last_program_counter, instr.flow_control.dest_offset.Value()); |
| 650 | 672 | ||
| 651 | // Evaluate the "IF" condition | 673 | // Evaluate the "IF" condition |
| 652 | if (instr.opcode.Value() == OpCode::Id::IFU) { | 674 | if (instr.opcode.Value() == OpCode::Id::IFU) { |
| @@ -677,8 +699,8 @@ void JitCompiler::Compile_IF(Instruction instr) { | |||
| 677 | } | 699 | } |
| 678 | 700 | ||
| 679 | void JitCompiler::Compile_LOOP(Instruction instr) { | 701 | void JitCompiler::Compile_LOOP(Instruction instr) { |
| 680 | ASSERT_MSG(instr.flow_control.dest_offset > *offset_ptr, "Backwards loops (%d -> %d) not supported", | 702 | ASSERT_MSG(instr.flow_control.dest_offset > last_program_counter, "Backwards loops (%d -> %d) not supported", |
| 681 | *offset_ptr, instr.flow_control.dest_offset.Value()); | 703 | last_program_counter, instr.flow_control.dest_offset.Value()); |
| 682 | ASSERT_MSG(!looping, "Nested loops not supported"); | 704 | ASSERT_MSG(!looping, "Nested loops not supported"); |
| 683 | 705 | ||
| 684 | looping = true; | 706 | looping = true; |
| @@ -706,9 +728,6 @@ void JitCompiler::Compile_LOOP(Instruction instr) { | |||
| 706 | } | 728 | } |
| 707 | 729 | ||
| 708 | void JitCompiler::Compile_JMP(Instruction instr) { | 730 | void JitCompiler::Compile_JMP(Instruction instr) { |
| 709 | ASSERT_MSG(instr.flow_control.dest_offset > *offset_ptr, "Backwards jumps (%d -> %d) not supported", | ||
| 710 | *offset_ptr, instr.flow_control.dest_offset.Value()); | ||
| 711 | |||
| 712 | if (instr.opcode.Value() == OpCode::Id::JMPC) | 731 | if (instr.opcode.Value() == OpCode::Id::JMPC) |
| 713 | Compile_EvaluateCondition(instr); | 732 | Compile_EvaluateCondition(instr); |
| 714 | else if (instr.opcode.Value() == OpCode::Id::JMPU) | 733 | else if (instr.opcode.Value() == OpCode::Id::JMPU) |
| @@ -718,31 +737,42 @@ void JitCompiler::Compile_JMP(Instruction instr) { | |||
| 718 | 737 | ||
| 719 | bool inverted_condition = (instr.opcode.Value() == OpCode::Id::JMPU) && | 738 | bool inverted_condition = (instr.opcode.Value() == OpCode::Id::JMPU) && |
| 720 | (instr.flow_control.num_instructions & 1); | 739 | (instr.flow_control.num_instructions & 1); |
| 721 | FixupBranch b = J_CC(inverted_condition ? CC_Z : CC_NZ, true); | ||
| 722 | |||
| 723 | Compile_Block(instr.flow_control.dest_offset); | ||
| 724 | 740 | ||
| 725 | SetJumpTarget(b); | 741 | FixupBranch b = J_CC(inverted_condition ? CC_Z : CC_NZ, true); |
| 742 | fixup_branches.push_back({ b, instr.flow_control.dest_offset }); | ||
| 726 | } | 743 | } |
| 727 | 744 | ||
| 728 | void JitCompiler::Compile_Block(unsigned end) { | 745 | void JitCompiler::Compile_Block(unsigned end) { |
| 729 | // Save current offset pointer | 746 | while (program_counter < end) { |
| 730 | unsigned* prev_offset_ptr = offset_ptr; | 747 | Compile_NextInstr(); |
| 731 | unsigned offset = *prev_offset_ptr; | 748 | } |
| 749 | } | ||
| 732 | 750 | ||
| 733 | while (offset < end) | 751 | void JitCompiler::Compile_Return() { |
| 734 | Compile_NextInstr(&offset); | 752 | // Peek return offset on the stack and check if we're at that offset |
| 753 | MOV(64, R(RAX), MDisp(RSP, 0)); | ||
| 754 | CMP(32, R(RAX), Imm32(program_counter)); | ||
| 735 | 755 | ||
| 736 | // Restore current offset pointer | 756 | // If so, jump back to before CALL |
| 737 | offset_ptr = prev_offset_ptr; | 757 | FixupBranch b = J_CC(CC_NZ, true); |
| 738 | *offset_ptr = offset; | 758 | ADD(64, R(RSP), Imm32(8)); // Ignore return offset that's on the stack |
| 759 | POP(RAX); // Pop off return address | ||
| 760 | JMPptr(R(RAX)); | ||
| 761 | SetJumpTarget(b); | ||
| 739 | } | 762 | } |
| 740 | 763 | ||
| 741 | void JitCompiler::Compile_NextInstr(unsigned* offset) { | 764 | void JitCompiler::Compile_NextInstr() { |
| 742 | offset_ptr = offset; | 765 | last_program_counter = program_counter; |
| 766 | |||
| 767 | auto search = return_offsets.find(program_counter); | ||
| 768 | if (search != return_offsets.end()) { | ||
| 769 | Compile_Return(); | ||
| 770 | } | ||
| 771 | |||
| 772 | ASSERT_MSG(code_ptr[program_counter] == nullptr, "Tried to compile already compiled shader location!"); | ||
| 773 | code_ptr[program_counter] = GetCodePtr(); | ||
| 743 | 774 | ||
| 744 | Instruction instr; | 775 | Instruction instr = GetVertexShaderInstruction(program_counter++); |
| 745 | std::memcpy(&instr, &g_state.vs.program_code[(*offset_ptr)++], sizeof(Instruction)); | ||
| 746 | 776 | ||
| 747 | OpCode::Id opcode = instr.opcode.Value(); | 777 | OpCode::Id opcode = instr.opcode.Value(); |
| 748 | auto instr_func = instr_table[static_cast<unsigned>(opcode)]; | 778 | auto instr_func = instr_table[static_cast<unsigned>(opcode)]; |
| @@ -757,9 +787,24 @@ void JitCompiler::Compile_NextInstr(unsigned* offset) { | |||
| 757 | } | 787 | } |
| 758 | } | 788 | } |
| 759 | 789 | ||
| 790 | void JitCompiler::FindReturnOffsets() { | ||
| 791 | return_offsets.clear(); | ||
| 792 | |||
| 793 | for (size_t offset = 0; offset < g_state.vs.program_code.size(); ++offset) { | ||
| 794 | Instruction instr = GetVertexShaderInstruction(offset); | ||
| 795 | |||
| 796 | switch (instr.opcode.Value()) { | ||
| 797 | case OpCode::Id::CALL: | ||
| 798 | case OpCode::Id::CALLC: | ||
| 799 | case OpCode::Id::CALLU: | ||
| 800 | return_offsets.insert(instr.flow_control.dest_offset + instr.flow_control.num_instructions); | ||
| 801 | break; | ||
| 802 | } | ||
| 803 | } | ||
| 804 | } | ||
| 805 | |||
| 760 | CompiledShader* JitCompiler::Compile() { | 806 | CompiledShader* JitCompiler::Compile() { |
| 761 | const u8* start = GetCodePtr(); | 807 | const u8* start = GetCodePtr(); |
| 762 | unsigned offset = g_state.regs.vs.main_offset; | ||
| 763 | 808 | ||
| 764 | // The stack pointer is 8 modulo 16 at the entry of a procedure | 809 | // The stack pointer is 8 modulo 16 at the entry of a procedure |
| 765 | ABI_PushRegistersAndAdjustStack(ABI_ALL_CALLEE_SAVED, 8); | 810 | ABI_PushRegistersAndAdjustStack(ABI_ALL_CALLEE_SAVED, 8); |
| @@ -782,10 +827,27 @@ CompiledShader* JitCompiler::Compile() { | |||
| 782 | MOV(PTRBITS, R(RAX), ImmPtr(&neg)); | 827 | MOV(PTRBITS, R(RAX), ImmPtr(&neg)); |
| 783 | MOVAPS(NEGBIT, MatR(RAX)); | 828 | MOVAPS(NEGBIT, MatR(RAX)); |
| 784 | 829 | ||
| 830 | // Find all `CALL` instructions and identify return locations | ||
| 831 | FindReturnOffsets(); | ||
| 832 | |||
| 833 | // Reset flow control state | ||
| 834 | last_program_counter = 0; | ||
| 835 | program_counter = 0; | ||
| 785 | looping = false; | 836 | looping = false; |
| 837 | code_ptr.fill(nullptr); | ||
| 838 | fixup_branches.clear(); | ||
| 839 | |||
| 840 | // Jump to start of the shader program | ||
| 841 | if (g_state.regs.vs.main_offset != 0) { | ||
| 842 | fixup_branches.push_back({ J(true), g_state.regs.vs.main_offset }); | ||
| 843 | } | ||
| 844 | |||
| 845 | // Compile entire program | ||
| 846 | Compile_Block(static_cast<unsigned>(g_state.vs.program_code.size())); | ||
| 786 | 847 | ||
| 787 | while (offset < g_state.vs.program_code.size()) { | 848 | // Set the target for any incomplete branches now that the entire shader program has been emitted |
| 788 | Compile_NextInstr(&offset); | 849 | for (const auto& branch : fixup_branches) { |
| 850 | SetJumpTarget(branch.first, code_ptr[branch.second]); | ||
| 789 | } | 851 | } |
| 790 | 852 | ||
| 791 | return (CompiledShader*)start; | 853 | return (CompiledShader*)start; |
diff --git a/src/video_core/shader/shader_jit_x64.h b/src/video_core/shader/shader_jit_x64.h index 5357c964b..d6f03892d 100644 --- a/src/video_core/shader/shader_jit_x64.h +++ b/src/video_core/shader/shader_jit_x64.h | |||
| @@ -4,6 +4,9 @@ | |||
| 4 | 4 | ||
| 5 | #pragma once | 5 | #pragma once |
| 6 | 6 | ||
| 7 | #include <set> | ||
| 8 | #include <utility> | ||
| 9 | |||
| 7 | #include <nihstro/shader_bytecode.h> | 10 | #include <nihstro/shader_bytecode.h> |
| 8 | 11 | ||
| 9 | #include "common/x64/emitter.h" | 12 | #include "common/x64/emitter.h" |
| @@ -66,8 +69,9 @@ public: | |||
| 66 | void Compile_MAD(Instruction instr); | 69 | void Compile_MAD(Instruction instr); |
| 67 | 70 | ||
| 68 | private: | 71 | private: |
| 72 | |||
| 69 | void Compile_Block(unsigned end); | 73 | void Compile_Block(unsigned end); |
| 70 | void Compile_NextInstr(unsigned* offset); | 74 | void Compile_NextInstr(); |
| 71 | 75 | ||
| 72 | void Compile_SwizzleSrc(Instruction instr, unsigned src_num, SourceRegister src_reg, Gen::X64Reg dest); | 76 | void Compile_SwizzleSrc(Instruction instr, unsigned src_num, SourceRegister src_reg, Gen::X64Reg dest); |
| 73 | void Compile_DestEnable(Instruction instr, Gen::X64Reg dest); | 77 | void Compile_DestEnable(Instruction instr, Gen::X64Reg dest); |
| @@ -81,13 +85,31 @@ private: | |||
| 81 | void Compile_EvaluateCondition(Instruction instr); | 85 | void Compile_EvaluateCondition(Instruction instr); |
| 82 | void Compile_UniformCondition(Instruction instr); | 86 | void Compile_UniformCondition(Instruction instr); |
| 83 | 87 | ||
| 88 | /** | ||
| 89 | * Emits the code to conditionally return from a subroutine envoked by the `CALL` instruction. | ||
| 90 | */ | ||
| 91 | void Compile_Return(); | ||
| 92 | |||
| 84 | BitSet32 PersistentCallerSavedRegs(); | 93 | BitSet32 PersistentCallerSavedRegs(); |
| 85 | 94 | ||
| 86 | /// Pointer to the variable that stores the current Pica code offset. Used to handle nested code blocks. | 95 | /** |
| 87 | unsigned* offset_ptr = nullptr; | 96 | * Analyzes the entire shader program for `CALL` instructions before emitting any code, |
| 97 | * identifying the locations where a return needs to be inserted. | ||
| 98 | */ | ||
| 99 | void FindReturnOffsets(); | ||
| 100 | |||
| 101 | /// Mapping of Pica VS instructions to pointers in the emitted code | ||
| 102 | std::array<const u8*, 1024> code_ptr; | ||
| 103 | |||
| 104 | /// Offsets in code where a return needs to be inserted | ||
| 105 | std::set<unsigned> return_offsets; | ||
| 106 | |||
| 107 | unsigned last_program_counter; ///< Offset of the most recent instruction decoded | ||
| 108 | unsigned program_counter; ///< Offset of the next instruction to decode | ||
| 109 | bool looping = false; ///< True if compiling a loop, used to check for nested loops | ||
| 88 | 110 | ||
| 89 | /// Set to true if currently in a loop, used to check for the existence of nested loops | 111 | /// Branches that need to be fixed up once the entire shader program is compiled |
| 90 | bool looping = false; | 112 | std::vector<std::pair<Gen::FixupBranch, unsigned>> fixup_branches; |
| 91 | }; | 113 | }; |
| 92 | 114 | ||
| 93 | } // Shader | 115 | } // Shader |