diff options
Diffstat (limited to 'src')
| -rw-r--r-- | src/video_core/shader/shader.cpp | 36 | ||||
| -rw-r--r-- | src/video_core/shader/shader.h | 21 | ||||
| -rw-r--r-- | src/video_core/shader/shader_interpreter.cpp | 12 | ||||
| -rw-r--r-- | src/video_core/shader/shader_jit_x64.cpp | 12 | ||||
| -rw-r--r-- | src/video_core/shader/shader_jit_x64.h | 2 |
5 files changed, 43 insertions, 40 deletions
diff --git a/src/video_core/shader/shader.cpp b/src/video_core/shader/shader.cpp index 06c1fe653..6a27a8015 100644 --- a/src/video_core/shader/shader.cpp +++ b/src/video_core/shader/shader.cpp | |||
| @@ -67,29 +67,29 @@ OutputVertex Run(UnitState& state, const InputVertex& input, int num_attributes) | |||
| 67 | // Setup input register table | 67 | // Setup input register table |
| 68 | const auto& attribute_register_map = config.input_register_map; | 68 | const auto& attribute_register_map = config.input_register_map; |
| 69 | 69 | ||
| 70 | if (num_attributes > 0) state.input_registers[attribute_register_map.attribute0_register] = input.attr[0]; | 70 | if (num_attributes > 0) state.registers.input[attribute_register_map.attribute0_register] = input.attr[0]; |
| 71 | if (num_attributes > 1) state.input_registers[attribute_register_map.attribute1_register] = input.attr[1]; | 71 | if (num_attributes > 1) state.registers.input[attribute_register_map.attribute1_register] = input.attr[1]; |
| 72 | if (num_attributes > 2) state.input_registers[attribute_register_map.attribute2_register] = input.attr[2]; | 72 | if (num_attributes > 2) state.registers.input[attribute_register_map.attribute2_register] = input.attr[2]; |
| 73 | if (num_attributes > 3) state.input_registers[attribute_register_map.attribute3_register] = input.attr[3]; | 73 | if (num_attributes > 3) state.registers.input[attribute_register_map.attribute3_register] = input.attr[3]; |
| 74 | if (num_attributes > 4) state.input_registers[attribute_register_map.attribute4_register] = input.attr[4]; | 74 | if (num_attributes > 4) state.registers.input[attribute_register_map.attribute4_register] = input.attr[4]; |
| 75 | if (num_attributes > 5) state.input_registers[attribute_register_map.attribute5_register] = input.attr[5]; | 75 | if (num_attributes > 5) state.registers.input[attribute_register_map.attribute5_register] = input.attr[5]; |
| 76 | if (num_attributes > 6) state.input_registers[attribute_register_map.attribute6_register] = input.attr[6]; | 76 | if (num_attributes > 6) state.registers.input[attribute_register_map.attribute6_register] = input.attr[6]; |
| 77 | if (num_attributes > 7) state.input_registers[attribute_register_map.attribute7_register] = input.attr[7]; | 77 | if (num_attributes > 7) state.registers.input[attribute_register_map.attribute7_register] = input.attr[7]; |
| 78 | if (num_attributes > 8) state.input_registers[attribute_register_map.attribute8_register] = input.attr[8]; | 78 | if (num_attributes > 8) state.registers.input[attribute_register_map.attribute8_register] = input.attr[8]; |
| 79 | if (num_attributes > 9) state.input_registers[attribute_register_map.attribute9_register] = input.attr[9]; | 79 | if (num_attributes > 9) state.registers.input[attribute_register_map.attribute9_register] = input.attr[9]; |
| 80 | if (num_attributes > 10) state.input_registers[attribute_register_map.attribute10_register] = input.attr[10]; | 80 | if (num_attributes > 10) state.registers.input[attribute_register_map.attribute10_register] = input.attr[10]; |
| 81 | if (num_attributes > 11) state.input_registers[attribute_register_map.attribute11_register] = input.attr[11]; | 81 | if (num_attributes > 11) state.registers.input[attribute_register_map.attribute11_register] = input.attr[11]; |
| 82 | if (num_attributes > 12) state.input_registers[attribute_register_map.attribute12_register] = input.attr[12]; | 82 | if (num_attributes > 12) state.registers.input[attribute_register_map.attribute12_register] = input.attr[12]; |
| 83 | if (num_attributes > 13) state.input_registers[attribute_register_map.attribute13_register] = input.attr[13]; | 83 | if (num_attributes > 13) state.registers.input[attribute_register_map.attribute13_register] = input.attr[13]; |
| 84 | if (num_attributes > 14) state.input_registers[attribute_register_map.attribute14_register] = input.attr[14]; | 84 | if (num_attributes > 14) state.registers.input[attribute_register_map.attribute14_register] = input.attr[14]; |
| 85 | if (num_attributes > 15) state.input_registers[attribute_register_map.attribute15_register] = input.attr[15]; | 85 | if (num_attributes > 15) state.registers.input[attribute_register_map.attribute15_register] = input.attr[15]; |
| 86 | 86 | ||
| 87 | state.conditional_code[0] = false; | 87 | state.conditional_code[0] = false; |
| 88 | state.conditional_code[1] = false; | 88 | state.conditional_code[1] = false; |
| 89 | 89 | ||
| 90 | #ifdef ARCHITECTURE_x86_64 | 90 | #ifdef ARCHITECTURE_x86_64 |
| 91 | if (VideoCore::g_shader_jit_enabled) | 91 | if (VideoCore::g_shader_jit_enabled) |
| 92 | jit_shader(&state); | 92 | jit_shader(&state.registers); |
| 93 | else | 93 | else |
| 94 | RunInterpreter(state); | 94 | RunInterpreter(state); |
| 95 | #else | 95 | #else |
| @@ -117,7 +117,7 @@ OutputVertex Run(UnitState& state, const InputVertex& input, int num_attributes) | |||
| 117 | for (int comp = 0; comp < 4; ++comp) { | 117 | for (int comp = 0; comp < 4; ++comp) { |
| 118 | float24* out = ((float24*)&ret) + semantics[comp]; | 118 | float24* out = ((float24*)&ret) + semantics[comp]; |
| 119 | if (semantics[comp] != Regs::VSOutputAttributes::INVALID) { | 119 | if (semantics[comp] != Regs::VSOutputAttributes::INVALID) { |
| 120 | *out = state.output_registers[i][comp]; | 120 | *out = state.registers.output[i][comp]; |
| 121 | } else { | 121 | } else { |
| 122 | // Zero output so that attributes which aren't output won't have denormals in them, | 122 | // Zero output so that attributes which aren't output won't have denormals in them, |
| 123 | // which would slow us down later. | 123 | // which would slow us down later. |
diff --git a/src/video_core/shader/shader.h b/src/video_core/shader/shader.h index 5825e9983..2007a2844 100644 --- a/src/video_core/shader/shader.h +++ b/src/video_core/shader/shader.h | |||
| @@ -79,11 +79,14 @@ static_assert(sizeof(OutputVertex) == 32 * sizeof(float), "OutputVertex has inva | |||
| 79 | * here will make it easier for us to parallelize the shader processing later. | 79 | * here will make it easier for us to parallelize the shader processing later. |
| 80 | */ | 80 | */ |
| 81 | struct UnitState { | 81 | struct UnitState { |
| 82 | // The registers are accessed by the shader JIT using SSE instructions, and are therefore | 82 | struct Registers { |
| 83 | // required to be 16-byte aligned. | 83 | // The registers are accessed by the shader JIT using SSE instructions, and are therefore |
| 84 | Math::Vec4<float24> MEMORY_ALIGNED16(input_registers[16]); | 84 | // required to be 16-byte aligned. |
| 85 | Math::Vec4<float24> MEMORY_ALIGNED16(output_registers[16]); | 85 | Math::Vec4<float24> MEMORY_ALIGNED16(input[16]); |
| 86 | Math::Vec4<float24> MEMORY_ALIGNED16(temporary_registers[16]); | 86 | Math::Vec4<float24> MEMORY_ALIGNED16(output[16]); |
| 87 | Math::Vec4<float24> MEMORY_ALIGNED16(temporary[16]); | ||
| 88 | } registers; | ||
| 89 | static_assert(std::is_pod<Registers>::value, "Structure is not POD"); | ||
| 87 | 90 | ||
| 88 | u32 program_counter; | 91 | u32 program_counter; |
| 89 | bool conditional_code[2]; | 92 | bool conditional_code[2]; |
| @@ -116,10 +119,10 @@ struct UnitState { | |||
| 116 | static int InputOffset(const SourceRegister& reg) { | 119 | static int InputOffset(const SourceRegister& reg) { |
| 117 | switch (reg.GetRegisterType()) { | 120 | switch (reg.GetRegisterType()) { |
| 118 | case RegisterType::Input: | 121 | case RegisterType::Input: |
| 119 | return (int)offsetof(UnitState, input_registers) + reg.GetIndex()*sizeof(Math::Vec4<float24>); | 122 | return (int)offsetof(UnitState::Registers, input) + reg.GetIndex()*sizeof(Math::Vec4<float24>); |
| 120 | 123 | ||
| 121 | case RegisterType::Temporary: | 124 | case RegisterType::Temporary: |
| 122 | return (int)offsetof(UnitState, temporary_registers) + reg.GetIndex()*sizeof(Math::Vec4<float24>); | 125 | return (int)offsetof(UnitState::Registers, temporary) + reg.GetIndex()*sizeof(Math::Vec4<float24>); |
| 123 | 126 | ||
| 124 | default: | 127 | default: |
| 125 | UNREACHABLE(); | 128 | UNREACHABLE(); |
| @@ -130,10 +133,10 @@ struct UnitState { | |||
| 130 | static int OutputOffset(const DestRegister& reg) { | 133 | static int OutputOffset(const DestRegister& reg) { |
| 131 | switch (reg.GetRegisterType()) { | 134 | switch (reg.GetRegisterType()) { |
| 132 | case RegisterType::Output: | 135 | case RegisterType::Output: |
| 133 | return (int)offsetof(UnitState, output_registers) + reg.GetIndex()*sizeof(Math::Vec4<float24>); | 136 | return (int)offsetof(UnitState::Registers, output) + reg.GetIndex()*sizeof(Math::Vec4<float24>); |
| 134 | 137 | ||
| 135 | case RegisterType::Temporary: | 138 | case RegisterType::Temporary: |
| 136 | return (int)offsetof(UnitState, temporary_registers) + reg.GetIndex()*sizeof(Math::Vec4<float24>); | 139 | return (int)offsetof(UnitState::Registers, temporary) + reg.GetIndex()*sizeof(Math::Vec4<float24>); |
| 137 | 140 | ||
| 138 | default: | 141 | default: |
| 139 | UNREACHABLE(); | 142 | UNREACHABLE(); |
diff --git a/src/video_core/shader/shader_interpreter.cpp b/src/video_core/shader/shader_interpreter.cpp index eb48e7053..c8489f920 100644 --- a/src/video_core/shader/shader_interpreter.cpp +++ b/src/video_core/shader/shader_interpreter.cpp | |||
| @@ -62,10 +62,10 @@ void RunInterpreter(UnitState& state) { | |||
| 62 | auto LookupSourceRegister = [&](const SourceRegister& source_reg) -> const float24* { | 62 | auto LookupSourceRegister = [&](const SourceRegister& source_reg) -> const float24* { |
| 63 | switch (source_reg.GetRegisterType()) { | 63 | switch (source_reg.GetRegisterType()) { |
| 64 | case RegisterType::Input: | 64 | case RegisterType::Input: |
| 65 | return &state.input_registers[source_reg.GetIndex()].x; | 65 | return &state.registers.input[source_reg.GetIndex()].x; |
| 66 | 66 | ||
| 67 | case RegisterType::Temporary: | 67 | case RegisterType::Temporary: |
| 68 | return &state.temporary_registers[source_reg.GetIndex()].x; | 68 | return &state.registers.temporary[source_reg.GetIndex()].x; |
| 69 | 69 | ||
| 70 | case RegisterType::FloatUniform: | 70 | case RegisterType::FloatUniform: |
| 71 | return &uniforms.f[source_reg.GetIndex()].x; | 71 | return &uniforms.f[source_reg.GetIndex()].x; |
| @@ -114,8 +114,8 @@ void RunInterpreter(UnitState& state) { | |||
| 114 | src2[3] = src2[3] * float24::FromFloat32(-1); | 114 | src2[3] = src2[3] * float24::FromFloat32(-1); |
| 115 | } | 115 | } |
| 116 | 116 | ||
| 117 | float24* dest = (instr.common.dest.Value() < 0x10) ? &state.output_registers[instr.common.dest.Value().GetIndex()][0] | 117 | float24* dest = (instr.common.dest.Value() < 0x10) ? &state.registers.output[instr.common.dest.Value().GetIndex()][0] |
| 118 | : (instr.common.dest.Value() < 0x20) ? &state.temporary_registers[instr.common.dest.Value().GetIndex()][0] | 118 | : (instr.common.dest.Value() < 0x20) ? &state.registers.temporary[instr.common.dest.Value().GetIndex()][0] |
| 119 | : dummy_vec4_float24; | 119 | : dummy_vec4_float24; |
| 120 | 120 | ||
| 121 | state.debug.max_opdesc_id = std::max<u32>(state.debug.max_opdesc_id, 1+instr.common.operand_desc_id); | 121 | state.debug.max_opdesc_id = std::max<u32>(state.debug.max_opdesc_id, 1+instr.common.operand_desc_id); |
| @@ -355,8 +355,8 @@ void RunInterpreter(UnitState& state) { | |||
| 355 | src3[3] = src3[3] * float24::FromFloat32(-1); | 355 | src3[3] = src3[3] * float24::FromFloat32(-1); |
| 356 | } | 356 | } |
| 357 | 357 | ||
| 358 | float24* dest = (instr.mad.dest.Value() < 0x10) ? &state.output_registers[instr.mad.dest.Value().GetIndex()][0] | 358 | float24* dest = (instr.mad.dest.Value() < 0x10) ? &state.registers.output[instr.mad.dest.Value().GetIndex()][0] |
| 359 | : (instr.mad.dest.Value() < 0x20) ? &state.temporary_registers[instr.mad.dest.Value().GetIndex()][0] | 359 | : (instr.mad.dest.Value() < 0x20) ? &state.registers.temporary[instr.mad.dest.Value().GetIndex()][0] |
| 360 | : dummy_vec4_float24; | 360 | : dummy_vec4_float24; |
| 361 | 361 | ||
| 362 | for (int i = 0; i < 4; ++i) { | 362 | for (int i = 0; i < 4; ++i) { |
diff --git a/src/video_core/shader/shader_jit_x64.cpp b/src/video_core/shader/shader_jit_x64.cpp index a7be433df..ce47774d5 100644 --- a/src/video_core/shader/shader_jit_x64.cpp +++ b/src/video_core/shader/shader_jit_x64.cpp | |||
| @@ -106,7 +106,7 @@ static const X64Reg COND0 = R13; | |||
| 106 | /// Result of the previous CMP instruction for the Y-component comparison | 106 | /// Result of the previous CMP instruction for the Y-component comparison |
| 107 | static const X64Reg COND1 = R14; | 107 | static const X64Reg COND1 = R14; |
| 108 | /// Pointer to the UnitState instance for the current VS unit | 108 | /// Pointer to the UnitState instance for the current VS unit |
| 109 | static const X64Reg STATE = R15; | 109 | static const X64Reg REGISTERS = R15; |
| 110 | /// SIMD scratch register | 110 | /// SIMD scratch register |
| 111 | static const X64Reg SCRATCH = XMM0; | 111 | static const X64Reg SCRATCH = XMM0; |
| 112 | /// Loaded with the first swizzled source register, otherwise can be used as a scratch register | 112 | /// Loaded with the first swizzled source register, otherwise can be used as a scratch register |
| @@ -140,7 +140,7 @@ void JitCompiler::Compile_SwizzleSrc(Instruction instr, unsigned src_num, Source | |||
| 140 | src_ptr = UNIFORMS; | 140 | src_ptr = UNIFORMS; |
| 141 | src_offset = src_reg.GetIndex() * sizeof(float24) * 4; | 141 | src_offset = src_reg.GetIndex() * sizeof(float24) * 4; |
| 142 | } else { | 142 | } else { |
| 143 | src_ptr = STATE; | 143 | src_ptr = REGISTERS; |
| 144 | src_offset = UnitState::InputOffset(src_reg); | 144 | src_offset = UnitState::InputOffset(src_reg); |
| 145 | } | 145 | } |
| 146 | 146 | ||
| @@ -217,11 +217,11 @@ void JitCompiler::Compile_DestEnable(Instruction instr,X64Reg src) { | |||
| 217 | // If all components are enabled, write the result to the destination register | 217 | // If all components are enabled, write the result to the destination register |
| 218 | if (swiz.dest_mask == NO_DEST_REG_MASK) { | 218 | if (swiz.dest_mask == NO_DEST_REG_MASK) { |
| 219 | // Store dest back to memory | 219 | // Store dest back to memory |
| 220 | MOVAPS(MDisp(STATE, UnitState::OutputOffset(dest)), src); | 220 | MOVAPS(MDisp(REGISTERS, UnitState::OutputOffset(dest)), src); |
| 221 | 221 | ||
| 222 | } else { | 222 | } else { |
| 223 | // Not all components are enabled, so mask the result when storing to the destination register... | 223 | // Not all components are enabled, so mask the result when storing to the destination register... |
| 224 | MOVAPS(SCRATCH, MDisp(STATE, UnitState::OutputOffset(dest))); | 224 | MOVAPS(SCRATCH, MDisp(REGISTERS, UnitState::OutputOffset(dest))); |
| 225 | 225 | ||
| 226 | if (Common::GetCPUCaps().sse4_1) { | 226 | if (Common::GetCPUCaps().sse4_1) { |
| 227 | u8 mask = ((swiz.dest_mask & 1) << 3) | ((swiz.dest_mask & 8) >> 3) | ((swiz.dest_mask & 2) << 1) | ((swiz.dest_mask & 4) >> 1); | 227 | u8 mask = ((swiz.dest_mask & 1) << 3) | ((swiz.dest_mask & 8) >> 3) | ((swiz.dest_mask & 2) << 1) | ((swiz.dest_mask & 4) >> 1); |
| @@ -240,7 +240,7 @@ void JitCompiler::Compile_DestEnable(Instruction instr,X64Reg src) { | |||
| 240 | } | 240 | } |
| 241 | 241 | ||
| 242 | // Store dest back to memory | 242 | // Store dest back to memory |
| 243 | MOVAPS(MDisp(STATE, UnitState::OutputOffset(dest)), SCRATCH); | 243 | MOVAPS(MDisp(REGISTERS, UnitState::OutputOffset(dest)), SCRATCH); |
| 244 | } | 244 | } |
| 245 | } | 245 | } |
| 246 | 246 | ||
| @@ -635,7 +635,7 @@ CompiledShader* JitCompiler::Compile() { | |||
| 635 | 635 | ||
| 636 | ABI_PushAllCalleeSavedRegsAndAdjustStack(); | 636 | ABI_PushAllCalleeSavedRegsAndAdjustStack(); |
| 637 | 637 | ||
| 638 | MOV(PTRBITS, R(STATE), R(ABI_PARAM1)); | 638 | MOV(PTRBITS, R(REGISTERS), R(ABI_PARAM1)); |
| 639 | MOV(PTRBITS, R(UNIFORMS), ImmPtr(&g_state.vs.uniforms)); | 639 | MOV(PTRBITS, R(UNIFORMS), ImmPtr(&g_state.vs.uniforms)); |
| 640 | 640 | ||
| 641 | // Zero address/loop registers | 641 | // Zero address/loop registers |
diff --git a/src/video_core/shader/shader_jit_x64.h b/src/video_core/shader/shader_jit_x64.h index 719a24210..b88f2a0d2 100644 --- a/src/video_core/shader/shader_jit_x64.h +++ b/src/video_core/shader/shader_jit_x64.h | |||
| @@ -20,7 +20,7 @@ namespace Pica { | |||
| 20 | 20 | ||
| 21 | namespace Shader { | 21 | namespace Shader { |
| 22 | 22 | ||
| 23 | using CompiledShader = void(void* state); | 23 | using CompiledShader = void(void* registers); |
| 24 | 24 | ||
| 25 | /** | 25 | /** |
| 26 | * This class implements the shader JIT compiler. It recompiles a Pica shader program into x86_64 | 26 | * This class implements the shader JIT compiler. It recompiles a Pica shader program into x86_64 |