diff options
| -rw-r--r-- | src/video_core/shader/shader.cpp | 9 | ||||
| -rw-r--r-- | src/video_core/shader/shader.h | 25 | ||||
| -rw-r--r-- | src/video_core/shader/shader_interpreter.cpp | 8 | ||||
| -rw-r--r-- | src/video_core/shader/shader_interpreter.h | 2 | ||||
| -rw-r--r-- | src/video_core/shader/shader_jit_x64.cpp | 32 | ||||
| -rw-r--r-- | src/video_core/shader/shader_jit_x64.h | 6 |
6 files changed, 50 insertions, 32 deletions
diff --git a/src/video_core/shader/shader.cpp b/src/video_core/shader/shader.cpp index e93a9d92a..161097610 100644 --- a/src/video_core/shader/shader.cpp +++ b/src/video_core/shader/shader.cpp | |||
| @@ -64,6 +64,7 @@ MICROPROFILE_DEFINE(GPU_Shader, "GPU", "Shader", MP_RGB(50, 50, 240)); | |||
| 64 | 64 | ||
| 65 | OutputVertex ShaderSetup::Run(UnitState<false>& state, const InputVertex& input, int num_attributes) { | 65 | OutputVertex ShaderSetup::Run(UnitState<false>& state, const InputVertex& input, int num_attributes) { |
| 66 | auto& config = g_state.regs.vs; | 66 | auto& config = g_state.regs.vs; |
| 67 | auto& setup = g_state.vs; | ||
| 67 | 68 | ||
| 68 | MICROPROFILE_SCOPE(GPU_Shader); | 69 | MICROPROFILE_SCOPE(GPU_Shader); |
| 69 | 70 | ||
| @@ -81,11 +82,11 @@ OutputVertex ShaderSetup::Run(UnitState<false>& state, const InputVertex& input, | |||
| 81 | 82 | ||
| 82 | #ifdef ARCHITECTURE_x86_64 | 83 | #ifdef ARCHITECTURE_x86_64 |
| 83 | if (VideoCore::g_shader_jit_enabled) | 84 | if (VideoCore::g_shader_jit_enabled) |
| 84 | jit_shader->Run(&state.registers, g_state.regs.vs.main_offset); | 85 | jit_shader->Run(setup, state, config.main_offset); |
| 85 | else | 86 | else |
| 86 | RunInterpreter(state); | 87 | RunInterpreter(setup, state, config.main_offset); |
| 87 | #else | 88 | #else |
| 88 | RunInterpreter(state); | 89 | RunInterpreter(setup, state, config.main_offset); |
| 89 | #endif // ARCHITECTURE_x86_64 | 90 | #endif // ARCHITECTURE_x86_64 |
| 90 | 91 | ||
| 91 | // Setup output data | 92 | // Setup output data |
| @@ -156,7 +157,7 @@ DebugData<true> ShaderSetup::ProduceDebugInfo(const InputVertex& input, int num_ | |||
| 156 | state.conditional_code[0] = false; | 157 | state.conditional_code[0] = false; |
| 157 | state.conditional_code[1] = false; | 158 | state.conditional_code[1] = false; |
| 158 | 159 | ||
| 159 | RunInterpreter(state); | 160 | RunInterpreter(setup, state, config.main_offset); |
| 160 | return state.debug; | 161 | return state.debug; |
| 161 | } | 162 | } |
| 162 | 163 | ||
diff --git a/src/video_core/shader/shader.h b/src/video_core/shader/shader.h index 983e4a967..84898f21c 100644 --- a/src/video_core/shader/shader.h +++ b/src/video_core/shader/shader.h | |||
| @@ -283,10 +283,10 @@ struct UnitState { | |||
| 283 | static size_t InputOffset(const SourceRegister& reg) { | 283 | static size_t InputOffset(const SourceRegister& reg) { |
| 284 | switch (reg.GetRegisterType()) { | 284 | switch (reg.GetRegisterType()) { |
| 285 | case RegisterType::Input: | 285 | case RegisterType::Input: |
| 286 | return offsetof(UnitState::Registers, input) + reg.GetIndex()*sizeof(Math::Vec4<float24>); | 286 | return offsetof(UnitState, registers.input) + reg.GetIndex()*sizeof(Math::Vec4<float24>); |
| 287 | 287 | ||
| 288 | case RegisterType::Temporary: | 288 | case RegisterType::Temporary: |
| 289 | return offsetof(UnitState::Registers, temporary) + reg.GetIndex()*sizeof(Math::Vec4<float24>); | 289 | return offsetof(UnitState, registers.temporary) + reg.GetIndex()*sizeof(Math::Vec4<float24>); |
| 290 | 290 | ||
| 291 | default: | 291 | default: |
| 292 | UNREACHABLE(); | 292 | UNREACHABLE(); |
| @@ -297,10 +297,10 @@ struct UnitState { | |||
| 297 | static size_t OutputOffset(const DestRegister& reg) { | 297 | static size_t OutputOffset(const DestRegister& reg) { |
| 298 | switch (reg.GetRegisterType()) { | 298 | switch (reg.GetRegisterType()) { |
| 299 | case RegisterType::Output: | 299 | case RegisterType::Output: |
| 300 | return offsetof(UnitState::Registers, output) + reg.GetIndex()*sizeof(Math::Vec4<float24>); | 300 | return offsetof(UnitState, registers.output) + reg.GetIndex()*sizeof(Math::Vec4<float24>); |
| 301 | 301 | ||
| 302 | case RegisterType::Temporary: | 302 | case RegisterType::Temporary: |
| 303 | return offsetof(UnitState::Registers, temporary) + reg.GetIndex()*sizeof(Math::Vec4<float24>); | 303 | return offsetof(UnitState, registers.temporary) + reg.GetIndex()*sizeof(Math::Vec4<float24>); |
| 304 | 304 | ||
| 305 | default: | 305 | default: |
| 306 | UNREACHABLE(); | 306 | UNREACHABLE(); |
| @@ -323,6 +323,23 @@ struct ShaderSetup { | |||
| 323 | std::array<Math::Vec4<u8>, 4> i; | 323 | std::array<Math::Vec4<u8>, 4> i; |
| 324 | } uniforms; | 324 | } uniforms; |
| 325 | 325 | ||
| 326 | static size_t UniformOffset(RegisterType type, unsigned index) { | ||
| 327 | switch (type) { | ||
| 328 | case RegisterType::FloatUniform: | ||
| 329 | return offsetof(ShaderSetup, uniforms.f) + index*sizeof(Math::Vec4<float24>); | ||
| 330 | |||
| 331 | case RegisterType::BoolUniform: | ||
| 332 | return offsetof(ShaderSetup, uniforms.b) + index*sizeof(bool); | ||
| 333 | |||
| 334 | case RegisterType::IntUniform: | ||
| 335 | return offsetof(ShaderSetup, uniforms.i) + index*sizeof(Math::Vec4<u8>); | ||
| 336 | |||
| 337 | default: | ||
| 338 | UNREACHABLE(); | ||
| 339 | return 0; | ||
| 340 | } | ||
| 341 | } | ||
| 342 | |||
| 326 | std::array<u32, 1024> program_code; | 343 | std::array<u32, 1024> program_code; |
| 327 | std::array<u32, 1024> swizzle_data; | 344 | std::array<u32, 1024> swizzle_data; |
| 328 | 345 | ||
diff --git a/src/video_core/shader/shader_interpreter.cpp b/src/video_core/shader/shader_interpreter.cpp index 3a827d11f..714e8bfd5 100644 --- a/src/video_core/shader/shader_interpreter.cpp +++ b/src/video_core/shader/shader_interpreter.cpp | |||
| @@ -41,11 +41,11 @@ struct CallStackElement { | |||
| 41 | }; | 41 | }; |
| 42 | 42 | ||
| 43 | template<bool Debug> | 43 | template<bool Debug> |
| 44 | void RunInterpreter(UnitState<Debug>& state) { | 44 | void RunInterpreter(const ShaderSetup& setup, UnitState<Debug>& state, unsigned offset) { |
| 45 | // TODO: Is there a maximal size for this? | 45 | // TODO: Is there a maximal size for this? |
| 46 | boost::container::static_vector<CallStackElement, 16> call_stack; | 46 | boost::container::static_vector<CallStackElement, 16> call_stack; |
| 47 | 47 | ||
| 48 | u32 program_counter = g_state.regs.vs.main_offset; | 48 | u32 program_counter = offset; |
| 49 | 49 | ||
| 50 | const auto& uniforms = g_state.vs.uniforms; | 50 | const auto& uniforms = g_state.vs.uniforms; |
| 51 | const auto& swizzle_data = g_state.vs.swizzle_data; | 51 | const auto& swizzle_data = g_state.vs.swizzle_data; |
| @@ -647,8 +647,8 @@ void RunInterpreter(UnitState<Debug>& state) { | |||
| 647 | } | 647 | } |
| 648 | 648 | ||
| 649 | // Explicit instantiation | 649 | // Explicit instantiation |
| 650 | template void RunInterpreter(UnitState<false>& state); | 650 | template void RunInterpreter(const ShaderSetup& setup, UnitState<false>& state, unsigned offset); |
| 651 | template void RunInterpreter(UnitState<true>& state); | 651 | template void RunInterpreter(const ShaderSetup& setup, UnitState<true>& state, unsigned offset); |
| 652 | 652 | ||
| 653 | } // namespace | 653 | } // namespace |
| 654 | 654 | ||
diff --git a/src/video_core/shader/shader_interpreter.h b/src/video_core/shader/shader_interpreter.h index 6048cdf3a..bb3ce1c6e 100644 --- a/src/video_core/shader/shader_interpreter.h +++ b/src/video_core/shader/shader_interpreter.h | |||
| @@ -11,7 +11,7 @@ namespace Shader { | |||
| 11 | template <bool Debug> struct UnitState; | 11 | template <bool Debug> struct UnitState; |
| 12 | 12 | ||
| 13 | template<bool Debug> | 13 | template<bool Debug> |
| 14 | void RunInterpreter(UnitState<Debug>& state); | 14 | void RunInterpreter(const ShaderSetup& setup, UnitState<Debug>& state, unsigned offset); |
| 15 | 15 | ||
| 16 | } // namespace | 16 | } // namespace |
| 17 | 17 | ||
diff --git a/src/video_core/shader/shader_jit_x64.cpp b/src/video_core/shader/shader_jit_x64.cpp index 99f6c51eb..43e7e6b4c 100644 --- a/src/video_core/shader/shader_jit_x64.cpp +++ b/src/video_core/shader/shader_jit_x64.cpp | |||
| @@ -102,7 +102,7 @@ const JitFunction instr_table[64] = { | |||
| 102 | // purposes, as documented below: | 102 | // purposes, as documented below: |
| 103 | 103 | ||
| 104 | /// Pointer to the uniform memory | 104 | /// Pointer to the uniform memory |
| 105 | static const X64Reg UNIFORMS = R9; | 105 | static const X64Reg SETUP = R9; |
| 106 | /// The two 32-bit VS address offset registers set by the MOVA instruction | 106 | /// The two 32-bit VS address offset registers set by the MOVA instruction |
| 107 | static const X64Reg ADDROFFS_REG_0 = R10; | 107 | static const X64Reg ADDROFFS_REG_0 = R10; |
| 108 | static const X64Reg ADDROFFS_REG_1 = R11; | 108 | static const X64Reg ADDROFFS_REG_1 = R11; |
| @@ -117,7 +117,7 @@ static const X64Reg COND0 = R13; | |||
| 117 | /// Result of the previous CMP instruction for the Y-component comparison | 117 | /// Result of the previous CMP instruction for the Y-component comparison |
| 118 | static const X64Reg COND1 = R14; | 118 | static const X64Reg COND1 = R14; |
| 119 | /// Pointer to the UnitState instance for the current VS unit | 119 | /// Pointer to the UnitState instance for the current VS unit |
| 120 | static const X64Reg REGISTERS = R15; | 120 | static const X64Reg STATE = R15; |
| 121 | /// SIMD scratch register | 121 | /// SIMD scratch register |
| 122 | static const X64Reg SCRATCH = XMM0; | 122 | static const X64Reg SCRATCH = XMM0; |
| 123 | /// Loaded with the first swizzled source register, otherwise can be used as a scratch register | 123 | /// Loaded with the first swizzled source register, otherwise can be used as a scratch register |
| @@ -136,7 +136,7 @@ static const X64Reg NEGBIT = XMM15; | |||
| 136 | // State registers that must not be modified by external functions calls | 136 | // State registers that must not be modified by external functions calls |
| 137 | // Scratch registers, e.g., SRC1 and SCRATCH, have to be saved on the side if needed | 137 | // Scratch registers, e.g., SRC1 and SCRATCH, have to be saved on the side if needed |
| 138 | static const BitSet32 persistent_regs = { | 138 | static const BitSet32 persistent_regs = { |
| 139 | UNIFORMS, REGISTERS, // Pointers to register blocks | 139 | SETUP, STATE, // Pointers to register blocks |
| 140 | ADDROFFS_REG_0, ADDROFFS_REG_1, LOOPCOUNT_REG, COND0, COND1, // Cached registers | 140 | ADDROFFS_REG_0, ADDROFFS_REG_1, LOOPCOUNT_REG, COND0, COND1, // Cached registers |
| 141 | ONE+16, NEGBIT+16, // Constants | 141 | ONE+16, NEGBIT+16, // Constants |
| 142 | }; | 142 | }; |
| @@ -177,10 +177,10 @@ void JitShader::Compile_SwizzleSrc(Instruction instr, unsigned src_num, SourceRe | |||
| 177 | size_t src_offset; | 177 | size_t src_offset; |
| 178 | 178 | ||
| 179 | if (src_reg.GetRegisterType() == RegisterType::FloatUniform) { | 179 | if (src_reg.GetRegisterType() == RegisterType::FloatUniform) { |
| 180 | src_ptr = UNIFORMS; | 180 | src_ptr = SETUP; |
| 181 | src_offset = src_reg.GetIndex() * sizeof(float24) * 4; | 181 | src_offset = ShaderSetup::UniformOffset(RegisterType::FloatUniform, src_reg.GetIndex()); |
| 182 | } else { | 182 | } else { |
| 183 | src_ptr = REGISTERS; | 183 | src_ptr = STATE; |
| 184 | src_offset = UnitState<false>::InputOffset(src_reg); | 184 | src_offset = UnitState<false>::InputOffset(src_reg); |
| 185 | } | 185 | } |
| 186 | 186 | ||
| @@ -264,11 +264,11 @@ void JitShader::Compile_DestEnable(Instruction instr,X64Reg src) { | |||
| 264 | // If all components are enabled, write the result to the destination register | 264 | // If all components are enabled, write the result to the destination register |
| 265 | if (swiz.dest_mask == NO_DEST_REG_MASK) { | 265 | if (swiz.dest_mask == NO_DEST_REG_MASK) { |
| 266 | // Store dest back to memory | 266 | // Store dest back to memory |
| 267 | MOVAPS(MDisp(REGISTERS, dest_offset_disp), src); | 267 | MOVAPS(MDisp(STATE, dest_offset_disp), src); |
| 268 | 268 | ||
| 269 | } else { | 269 | } else { |
| 270 | // Not all components are enabled, so mask the result when storing to the destination register... | 270 | // Not all components are enabled, so mask the result when storing to the destination register... |
| 271 | MOVAPS(SCRATCH, MDisp(REGISTERS, dest_offset_disp)); | 271 | MOVAPS(SCRATCH, MDisp(STATE, dest_offset_disp)); |
| 272 | 272 | ||
| 273 | if (Common::GetCPUCaps().sse4_1) { | 273 | if (Common::GetCPUCaps().sse4_1) { |
| 274 | u8 mask = ((swiz.dest_mask & 1) << 3) | ((swiz.dest_mask & 8) >> 3) | ((swiz.dest_mask & 2) << 1) | ((swiz.dest_mask & 4) >> 1); | 274 | u8 mask = ((swiz.dest_mask & 1) << 3) | ((swiz.dest_mask & 8) >> 3) | ((swiz.dest_mask & 2) << 1) | ((swiz.dest_mask & 4) >> 1); |
| @@ -287,7 +287,7 @@ void JitShader::Compile_DestEnable(Instruction instr,X64Reg src) { | |||
| 287 | } | 287 | } |
| 288 | 288 | ||
| 289 | // Store dest back to memory | 289 | // Store dest back to memory |
| 290 | MOVAPS(MDisp(REGISTERS, dest_offset_disp), SCRATCH); | 290 | MOVAPS(MDisp(STATE, dest_offset_disp), SCRATCH); |
| 291 | } | 291 | } |
| 292 | } | 292 | } |
| 293 | 293 | ||
| @@ -336,8 +336,8 @@ void JitShader::Compile_EvaluateCondition(Instruction instr) { | |||
| 336 | } | 336 | } |
| 337 | 337 | ||
| 338 | void JitShader::Compile_UniformCondition(Instruction instr) { | 338 | void JitShader::Compile_UniformCondition(Instruction instr) { |
| 339 | int offset = offsetof(decltype(g_state.vs.uniforms), b) + (instr.flow_control.bool_uniform_id * sizeof(bool)); | 339 | int offset = ShaderSetup::UniformOffset(RegisterType::BoolUniform, instr.flow_control.bool_uniform_id); |
| 340 | CMP(sizeof(bool) * 8, MDisp(UNIFORMS, offset), Imm8(0)); | 340 | CMP(sizeof(bool) * 8, MDisp(SETUP, offset), Imm8(0)); |
| 341 | } | 341 | } |
| 342 | 342 | ||
| 343 | BitSet32 JitShader::PersistentCallerSavedRegs() { | 343 | BitSet32 JitShader::PersistentCallerSavedRegs() { |
| @@ -714,8 +714,8 @@ void JitShader::Compile_LOOP(Instruction instr) { | |||
| 714 | 714 | ||
| 715 | looping = true; | 715 | looping = true; |
| 716 | 716 | ||
| 717 | int offset = offsetof(decltype(g_state.vs.uniforms), i) + (instr.flow_control.int_uniform_id * sizeof(Math::Vec4<u8>)); | 717 | int offset = ShaderSetup::UniformOffset(RegisterType::IntUniform, instr.flow_control.int_uniform_id); |
| 718 | MOV(32, R(LOOPCOUNT), MDisp(UNIFORMS, offset)); | 718 | MOV(32, R(LOOPCOUNT), MDisp(SETUP, offset)); |
| 719 | MOV(32, R(LOOPCOUNT_REG), R(LOOPCOUNT)); | 719 | MOV(32, R(LOOPCOUNT_REG), R(LOOPCOUNT)); |
| 720 | SHR(32, R(LOOPCOUNT_REG), Imm8(8)); | 720 | SHR(32, R(LOOPCOUNT_REG), Imm8(8)); |
| 721 | AND(32, R(LOOPCOUNT_REG), Imm32(0xff)); // Y-component is the start | 721 | AND(32, R(LOOPCOUNT_REG), Imm32(0xff)); // Y-component is the start |
| @@ -826,8 +826,8 @@ void JitShader::Compile() { | |||
| 826 | // The stack pointer is 8 modulo 16 at the entry of a procedure | 826 | // The stack pointer is 8 modulo 16 at the entry of a procedure |
| 827 | ABI_PushRegistersAndAdjustStack(ABI_ALL_CALLEE_SAVED, 8); | 827 | ABI_PushRegistersAndAdjustStack(ABI_ALL_CALLEE_SAVED, 8); |
| 828 | 828 | ||
| 829 | MOV(PTRBITS, R(REGISTERS), R(ABI_PARAM1)); | 829 | MOV(PTRBITS, R(SETUP), R(ABI_PARAM1)); |
| 830 | MOV(PTRBITS, R(UNIFORMS), ImmPtr(&g_state.vs.uniforms)); | 830 | MOV(PTRBITS, R(STATE), R(ABI_PARAM2)); |
| 831 | 831 | ||
| 832 | // Zero address/loop registers | 832 | // Zero address/loop registers |
| 833 | XOR(64, R(ADDROFFS_REG_0), R(ADDROFFS_REG_0)); | 833 | XOR(64, R(ADDROFFS_REG_0), R(ADDROFFS_REG_0)); |
| @@ -845,7 +845,7 @@ void JitShader::Compile() { | |||
| 845 | MOVAPS(NEGBIT, MatR(RAX)); | 845 | MOVAPS(NEGBIT, MatR(RAX)); |
| 846 | 846 | ||
| 847 | // Jump to start of the shader program | 847 | // Jump to start of the shader program |
| 848 | JMPptr(R(ABI_PARAM2)); | 848 | JMPptr(R(ABI_PARAM3)); |
| 849 | 849 | ||
| 850 | // Compile entire program | 850 | // Compile entire program |
| 851 | Compile_Block(static_cast<unsigned>(g_state.vs.program_code.size())); | 851 | Compile_Block(static_cast<unsigned>(g_state.vs.program_code.size())); |
diff --git a/src/video_core/shader/shader_jit_x64.h b/src/video_core/shader/shader_jit_x64.h index 30aa7ff30..5468459d4 100644 --- a/src/video_core/shader/shader_jit_x64.h +++ b/src/video_core/shader/shader_jit_x64.h | |||
| @@ -36,8 +36,8 @@ class JitShader : public Gen::XCodeBlock { | |||
| 36 | public: | 36 | public: |
| 37 | JitShader(); | 37 | JitShader(); |
| 38 | 38 | ||
| 39 | void Run(void* registers, unsigned offset) const { | 39 | void Run(const ShaderSetup& setup, UnitState<false>& state, unsigned offset) const { |
| 40 | program(registers, code_ptr[offset]); | 40 | program(&setup, &state, code_ptr[offset]); |
| 41 | } | 41 | } |
| 42 | 42 | ||
| 43 | void Compile(); | 43 | void Compile(); |
| @@ -117,7 +117,7 @@ private: | |||
| 117 | /// Branches that need to be fixed up once the entire shader program is compiled | 117 | /// Branches that need to be fixed up once the entire shader program is compiled |
| 118 | std::vector<std::pair<Gen::FixupBranch, unsigned>> fixup_branches; | 118 | std::vector<std::pair<Gen::FixupBranch, unsigned>> fixup_branches; |
| 119 | 119 | ||
| 120 | using CompiledShader = void(void* registers, const u8* start_addr); | 120 | using CompiledShader = void(const void* setup, void* state, const u8* start_addr); |
| 121 | CompiledShader* program = nullptr; | 121 | CompiledShader* program = nullptr; |
| 122 | }; | 122 | }; |
| 123 | 123 | ||