summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorGravatar Jannik Vogel2016-05-13 08:46:14 +0200
committerGravatar Jannik Vogel2016-05-13 09:20:14 +0200
commit4e01e9ffc54ddfc9a1a5b285b4434e2f3ac4854e (patch)
tree5c7873636fe7317c6c4489292c48a2a8f7f41595
parentMerge pull request #1695 from Subv/tls_alloc (diff)
downloadyuzu-4e01e9ffc54ddfc9a1a5b285b4434e2f3ac4854e.tar.gz
yuzu-4e01e9ffc54ddfc9a1a5b285b4434e2f3ac4854e.tar.xz
yuzu-4e01e9ffc54ddfc9a1a5b285b4434e2f3ac4854e.zip
Refactor access to state in shader-jit
-rw-r--r--src/video_core/shader/shader.cpp3
-rw-r--r--src/video_core/shader/shader.h25
-rw-r--r--src/video_core/shader/shader_jit_x64.cpp32
-rw-r--r--src/video_core/shader/shader_jit_x64.h6
4 files changed, 42 insertions, 24 deletions
diff --git a/src/video_core/shader/shader.cpp b/src/video_core/shader/shader.cpp
index e93a9d92a..d36d3d78e 100644
--- a/src/video_core/shader/shader.cpp
+++ b/src/video_core/shader/shader.cpp
@@ -64,6 +64,7 @@ MICROPROFILE_DEFINE(GPU_Shader, "GPU", "Shader", MP_RGB(50, 50, 240));
64 64
65OutputVertex ShaderSetup::Run(UnitState<false>& state, const InputVertex& input, int num_attributes) { 65OutputVertex ShaderSetup::Run(UnitState<false>& state, const InputVertex& input, int num_attributes) {
66 auto& config = g_state.regs.vs; 66 auto& config = g_state.regs.vs;
67 auto& setup = g_state.vs;
67 68
68 MICROPROFILE_SCOPE(GPU_Shader); 69 MICROPROFILE_SCOPE(GPU_Shader);
69 70
@@ -81,7 +82,7 @@ OutputVertex ShaderSetup::Run(UnitState<false>& state, const InputVertex& input,
81 82
82#ifdef ARCHITECTURE_x86_64 83#ifdef ARCHITECTURE_x86_64
83 if (VideoCore::g_shader_jit_enabled) 84 if (VideoCore::g_shader_jit_enabled)
84 jit_shader->Run(&state.registers, g_state.regs.vs.main_offset); 85 jit_shader->Run(setup, state, config.main_offset);
85 else 86 else
86 RunInterpreter(state); 87 RunInterpreter(state);
87#else 88#else
diff --git a/src/video_core/shader/shader.h b/src/video_core/shader/shader.h
index 983e4a967..84898f21c 100644
--- a/src/video_core/shader/shader.h
+++ b/src/video_core/shader/shader.h
@@ -283,10 +283,10 @@ struct UnitState {
283 static size_t InputOffset(const SourceRegister& reg) { 283 static size_t InputOffset(const SourceRegister& reg) {
284 switch (reg.GetRegisterType()) { 284 switch (reg.GetRegisterType()) {
285 case RegisterType::Input: 285 case RegisterType::Input:
286 return offsetof(UnitState::Registers, input) + reg.GetIndex()*sizeof(Math::Vec4<float24>); 286 return offsetof(UnitState, registers.input) + reg.GetIndex()*sizeof(Math::Vec4<float24>);
287 287
288 case RegisterType::Temporary: 288 case RegisterType::Temporary:
289 return offsetof(UnitState::Registers, temporary) + reg.GetIndex()*sizeof(Math::Vec4<float24>); 289 return offsetof(UnitState, registers.temporary) + reg.GetIndex()*sizeof(Math::Vec4<float24>);
290 290
291 default: 291 default:
292 UNREACHABLE(); 292 UNREACHABLE();
@@ -297,10 +297,10 @@ struct UnitState {
297 static size_t OutputOffset(const DestRegister& reg) { 297 static size_t OutputOffset(const DestRegister& reg) {
298 switch (reg.GetRegisterType()) { 298 switch (reg.GetRegisterType()) {
299 case RegisterType::Output: 299 case RegisterType::Output:
300 return offsetof(UnitState::Registers, output) + reg.GetIndex()*sizeof(Math::Vec4<float24>); 300 return offsetof(UnitState, registers.output) + reg.GetIndex()*sizeof(Math::Vec4<float24>);
301 301
302 case RegisterType::Temporary: 302 case RegisterType::Temporary:
303 return offsetof(UnitState::Registers, temporary) + reg.GetIndex()*sizeof(Math::Vec4<float24>); 303 return offsetof(UnitState, registers.temporary) + reg.GetIndex()*sizeof(Math::Vec4<float24>);
304 304
305 default: 305 default:
306 UNREACHABLE(); 306 UNREACHABLE();
@@ -323,6 +323,23 @@ struct ShaderSetup {
323 std::array<Math::Vec4<u8>, 4> i; 323 std::array<Math::Vec4<u8>, 4> i;
324 } uniforms; 324 } uniforms;
325 325
326 static size_t UniformOffset(RegisterType type, unsigned index) {
327 switch (type) {
328 case RegisterType::FloatUniform:
329 return offsetof(ShaderSetup, uniforms.f) + index*sizeof(Math::Vec4<float24>);
330
331 case RegisterType::BoolUniform:
332 return offsetof(ShaderSetup, uniforms.b) + index*sizeof(bool);
333
334 case RegisterType::IntUniform:
335 return offsetof(ShaderSetup, uniforms.i) + index*sizeof(Math::Vec4<u8>);
336
337 default:
338 UNREACHABLE();
339 return 0;
340 }
341 }
342
326 std::array<u32, 1024> program_code; 343 std::array<u32, 1024> program_code;
327 std::array<u32, 1024> swizzle_data; 344 std::array<u32, 1024> swizzle_data;
328 345
diff --git a/src/video_core/shader/shader_jit_x64.cpp b/src/video_core/shader/shader_jit_x64.cpp
index 99f6c51eb..43e7e6b4c 100644
--- a/src/video_core/shader/shader_jit_x64.cpp
+++ b/src/video_core/shader/shader_jit_x64.cpp
@@ -102,7 +102,7 @@ const JitFunction instr_table[64] = {
102// purposes, as documented below: 102// purposes, as documented below:
103 103
104/// Pointer to the uniform memory 104/// Pointer to the uniform memory
105static const X64Reg UNIFORMS = R9; 105static const X64Reg SETUP = R9;
106/// The two 32-bit VS address offset registers set by the MOVA instruction 106/// The two 32-bit VS address offset registers set by the MOVA instruction
107static const X64Reg ADDROFFS_REG_0 = R10; 107static const X64Reg ADDROFFS_REG_0 = R10;
108static const X64Reg ADDROFFS_REG_1 = R11; 108static const X64Reg ADDROFFS_REG_1 = R11;
@@ -117,7 +117,7 @@ static const X64Reg COND0 = R13;
117/// Result of the previous CMP instruction for the Y-component comparison 117/// Result of the previous CMP instruction for the Y-component comparison
118static const X64Reg COND1 = R14; 118static const X64Reg COND1 = R14;
119/// Pointer to the UnitState instance for the current VS unit 119/// Pointer to the UnitState instance for the current VS unit
120static const X64Reg REGISTERS = R15; 120static const X64Reg STATE = R15;
121/// SIMD scratch register 121/// SIMD scratch register
122static const X64Reg SCRATCH = XMM0; 122static const X64Reg SCRATCH = XMM0;
123/// Loaded with the first swizzled source register, otherwise can be used as a scratch register 123/// Loaded with the first swizzled source register, otherwise can be used as a scratch register
@@ -136,7 +136,7 @@ static const X64Reg NEGBIT = XMM15;
136// State registers that must not be modified by external functions calls 136// State registers that must not be modified by external functions calls
137// Scratch registers, e.g., SRC1 and SCRATCH, have to be saved on the side if needed 137// Scratch registers, e.g., SRC1 and SCRATCH, have to be saved on the side if needed
138static const BitSet32 persistent_regs = { 138static const BitSet32 persistent_regs = {
139 UNIFORMS, REGISTERS, // Pointers to register blocks 139 SETUP, STATE, // Pointers to register blocks
140 ADDROFFS_REG_0, ADDROFFS_REG_1, LOOPCOUNT_REG, COND0, COND1, // Cached registers 140 ADDROFFS_REG_0, ADDROFFS_REG_1, LOOPCOUNT_REG, COND0, COND1, // Cached registers
141 ONE+16, NEGBIT+16, // Constants 141 ONE+16, NEGBIT+16, // Constants
142}; 142};
@@ -177,10 +177,10 @@ void JitShader::Compile_SwizzleSrc(Instruction instr, unsigned src_num, SourceRe
177 size_t src_offset; 177 size_t src_offset;
178 178
179 if (src_reg.GetRegisterType() == RegisterType::FloatUniform) { 179 if (src_reg.GetRegisterType() == RegisterType::FloatUniform) {
180 src_ptr = UNIFORMS; 180 src_ptr = SETUP;
181 src_offset = src_reg.GetIndex() * sizeof(float24) * 4; 181 src_offset = ShaderSetup::UniformOffset(RegisterType::FloatUniform, src_reg.GetIndex());
182 } else { 182 } else {
183 src_ptr = REGISTERS; 183 src_ptr = STATE;
184 src_offset = UnitState<false>::InputOffset(src_reg); 184 src_offset = UnitState<false>::InputOffset(src_reg);
185 } 185 }
186 186
@@ -264,11 +264,11 @@ void JitShader::Compile_DestEnable(Instruction instr,X64Reg src) {
264 // If all components are enabled, write the result to the destination register 264 // If all components are enabled, write the result to the destination register
265 if (swiz.dest_mask == NO_DEST_REG_MASK) { 265 if (swiz.dest_mask == NO_DEST_REG_MASK) {
266 // Store dest back to memory 266 // Store dest back to memory
267 MOVAPS(MDisp(REGISTERS, dest_offset_disp), src); 267 MOVAPS(MDisp(STATE, dest_offset_disp), src);
268 268
269 } else { 269 } else {
270 // Not all components are enabled, so mask the result when storing to the destination register... 270 // Not all components are enabled, so mask the result when storing to the destination register...
271 MOVAPS(SCRATCH, MDisp(REGISTERS, dest_offset_disp)); 271 MOVAPS(SCRATCH, MDisp(STATE, dest_offset_disp));
272 272
273 if (Common::GetCPUCaps().sse4_1) { 273 if (Common::GetCPUCaps().sse4_1) {
274 u8 mask = ((swiz.dest_mask & 1) << 3) | ((swiz.dest_mask & 8) >> 3) | ((swiz.dest_mask & 2) << 1) | ((swiz.dest_mask & 4) >> 1); 274 u8 mask = ((swiz.dest_mask & 1) << 3) | ((swiz.dest_mask & 8) >> 3) | ((swiz.dest_mask & 2) << 1) | ((swiz.dest_mask & 4) >> 1);
@@ -287,7 +287,7 @@ void JitShader::Compile_DestEnable(Instruction instr,X64Reg src) {
287 } 287 }
288 288
289 // Store dest back to memory 289 // Store dest back to memory
290 MOVAPS(MDisp(REGISTERS, dest_offset_disp), SCRATCH); 290 MOVAPS(MDisp(STATE, dest_offset_disp), SCRATCH);
291 } 291 }
292} 292}
293 293
@@ -336,8 +336,8 @@ void JitShader::Compile_EvaluateCondition(Instruction instr) {
336} 336}
337 337
338void JitShader::Compile_UniformCondition(Instruction instr) { 338void JitShader::Compile_UniformCondition(Instruction instr) {
339 int offset = offsetof(decltype(g_state.vs.uniforms), b) + (instr.flow_control.bool_uniform_id * sizeof(bool)); 339 int offset = ShaderSetup::UniformOffset(RegisterType::BoolUniform, instr.flow_control.bool_uniform_id);
340 CMP(sizeof(bool) * 8, MDisp(UNIFORMS, offset), Imm8(0)); 340 CMP(sizeof(bool) * 8, MDisp(SETUP, offset), Imm8(0));
341} 341}
342 342
343BitSet32 JitShader::PersistentCallerSavedRegs() { 343BitSet32 JitShader::PersistentCallerSavedRegs() {
@@ -714,8 +714,8 @@ void JitShader::Compile_LOOP(Instruction instr) {
714 714
715 looping = true; 715 looping = true;
716 716
717 int offset = offsetof(decltype(g_state.vs.uniforms), i) + (instr.flow_control.int_uniform_id * sizeof(Math::Vec4<u8>)); 717 int offset = ShaderSetup::UniformOffset(RegisterType::IntUniform, instr.flow_control.int_uniform_id);
718 MOV(32, R(LOOPCOUNT), MDisp(UNIFORMS, offset)); 718 MOV(32, R(LOOPCOUNT), MDisp(SETUP, offset));
719 MOV(32, R(LOOPCOUNT_REG), R(LOOPCOUNT)); 719 MOV(32, R(LOOPCOUNT_REG), R(LOOPCOUNT));
720 SHR(32, R(LOOPCOUNT_REG), Imm8(8)); 720 SHR(32, R(LOOPCOUNT_REG), Imm8(8));
721 AND(32, R(LOOPCOUNT_REG), Imm32(0xff)); // Y-component is the start 721 AND(32, R(LOOPCOUNT_REG), Imm32(0xff)); // Y-component is the start
@@ -826,8 +826,8 @@ void JitShader::Compile() {
826 // The stack pointer is 8 modulo 16 at the entry of a procedure 826 // The stack pointer is 8 modulo 16 at the entry of a procedure
827 ABI_PushRegistersAndAdjustStack(ABI_ALL_CALLEE_SAVED, 8); 827 ABI_PushRegistersAndAdjustStack(ABI_ALL_CALLEE_SAVED, 8);
828 828
829 MOV(PTRBITS, R(REGISTERS), R(ABI_PARAM1)); 829 MOV(PTRBITS, R(SETUP), R(ABI_PARAM1));
830 MOV(PTRBITS, R(UNIFORMS), ImmPtr(&g_state.vs.uniforms)); 830 MOV(PTRBITS, R(STATE), R(ABI_PARAM2));
831 831
832 // Zero address/loop registers 832 // Zero address/loop registers
833 XOR(64, R(ADDROFFS_REG_0), R(ADDROFFS_REG_0)); 833 XOR(64, R(ADDROFFS_REG_0), R(ADDROFFS_REG_0));
@@ -845,7 +845,7 @@ void JitShader::Compile() {
845 MOVAPS(NEGBIT, MatR(RAX)); 845 MOVAPS(NEGBIT, MatR(RAX));
846 846
847 // Jump to start of the shader program 847 // Jump to start of the shader program
848 JMPptr(R(ABI_PARAM2)); 848 JMPptr(R(ABI_PARAM3));
849 849
850 // Compile entire program 850 // Compile entire program
851 Compile_Block(static_cast<unsigned>(g_state.vs.program_code.size())); 851 Compile_Block(static_cast<unsigned>(g_state.vs.program_code.size()));
diff --git a/src/video_core/shader/shader_jit_x64.h b/src/video_core/shader/shader_jit_x64.h
index 30aa7ff30..5468459d4 100644
--- a/src/video_core/shader/shader_jit_x64.h
+++ b/src/video_core/shader/shader_jit_x64.h
@@ -36,8 +36,8 @@ class JitShader : public Gen::XCodeBlock {
36public: 36public:
37 JitShader(); 37 JitShader();
38 38
39 void Run(void* registers, unsigned offset) const { 39 void Run(const ShaderSetup& setup, UnitState<false>& state, unsigned offset) const {
40 program(registers, code_ptr[offset]); 40 program(&setup, &state, code_ptr[offset]);
41 } 41 }
42 42
43 void Compile(); 43 void Compile();
@@ -117,7 +117,7 @@ private:
117 /// Branches that need to be fixed up once the entire shader program is compiled 117 /// Branches that need to be fixed up once the entire shader program is compiled
118 std::vector<std::pair<Gen::FixupBranch, unsigned>> fixup_branches; 118 std::vector<std::pair<Gen::FixupBranch, unsigned>> fixup_branches;
119 119
120 using CompiledShader = void(void* registers, const u8* start_addr); 120 using CompiledShader = void(const void* setup, void* state, const u8* start_addr);
121 CompiledShader* program = nullptr; 121 CompiledShader* program = nullptr;
122}; 122};
123 123