summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/video_core/shader/shader.cpp36
-rw-r--r--src/video_core/shader/shader.h21
-rw-r--r--src/video_core/shader/shader_interpreter.cpp12
-rw-r--r--src/video_core/shader/shader_jit_x64.cpp12
-rw-r--r--src/video_core/shader/shader_jit_x64.h2
5 files changed, 43 insertions, 40 deletions
diff --git a/src/video_core/shader/shader.cpp b/src/video_core/shader/shader.cpp
index 06c1fe653..6a27a8015 100644
--- a/src/video_core/shader/shader.cpp
+++ b/src/video_core/shader/shader.cpp
@@ -67,29 +67,29 @@ OutputVertex Run(UnitState& state, const InputVertex& input, int num_attributes)
67 // Setup input register table 67 // Setup input register table
68 const auto& attribute_register_map = config.input_register_map; 68 const auto& attribute_register_map = config.input_register_map;
69 69
70 if (num_attributes > 0) state.input_registers[attribute_register_map.attribute0_register] = input.attr[0]; 70 if (num_attributes > 0) state.registers.input[attribute_register_map.attribute0_register] = input.attr[0];
71 if (num_attributes > 1) state.input_registers[attribute_register_map.attribute1_register] = input.attr[1]; 71 if (num_attributes > 1) state.registers.input[attribute_register_map.attribute1_register] = input.attr[1];
72 if (num_attributes > 2) state.input_registers[attribute_register_map.attribute2_register] = input.attr[2]; 72 if (num_attributes > 2) state.registers.input[attribute_register_map.attribute2_register] = input.attr[2];
73 if (num_attributes > 3) state.input_registers[attribute_register_map.attribute3_register] = input.attr[3]; 73 if (num_attributes > 3) state.registers.input[attribute_register_map.attribute3_register] = input.attr[3];
74 if (num_attributes > 4) state.input_registers[attribute_register_map.attribute4_register] = input.attr[4]; 74 if (num_attributes > 4) state.registers.input[attribute_register_map.attribute4_register] = input.attr[4];
75 if (num_attributes > 5) state.input_registers[attribute_register_map.attribute5_register] = input.attr[5]; 75 if (num_attributes > 5) state.registers.input[attribute_register_map.attribute5_register] = input.attr[5];
76 if (num_attributes > 6) state.input_registers[attribute_register_map.attribute6_register] = input.attr[6]; 76 if (num_attributes > 6) state.registers.input[attribute_register_map.attribute6_register] = input.attr[6];
77 if (num_attributes > 7) state.input_registers[attribute_register_map.attribute7_register] = input.attr[7]; 77 if (num_attributes > 7) state.registers.input[attribute_register_map.attribute7_register] = input.attr[7];
78 if (num_attributes > 8) state.input_registers[attribute_register_map.attribute8_register] = input.attr[8]; 78 if (num_attributes > 8) state.registers.input[attribute_register_map.attribute8_register] = input.attr[8];
79 if (num_attributes > 9) state.input_registers[attribute_register_map.attribute9_register] = input.attr[9]; 79 if (num_attributes > 9) state.registers.input[attribute_register_map.attribute9_register] = input.attr[9];
80 if (num_attributes > 10) state.input_registers[attribute_register_map.attribute10_register] = input.attr[10]; 80 if (num_attributes > 10) state.registers.input[attribute_register_map.attribute10_register] = input.attr[10];
81 if (num_attributes > 11) state.input_registers[attribute_register_map.attribute11_register] = input.attr[11]; 81 if (num_attributes > 11) state.registers.input[attribute_register_map.attribute11_register] = input.attr[11];
82 if (num_attributes > 12) state.input_registers[attribute_register_map.attribute12_register] = input.attr[12]; 82 if (num_attributes > 12) state.registers.input[attribute_register_map.attribute12_register] = input.attr[12];
83 if (num_attributes > 13) state.input_registers[attribute_register_map.attribute13_register] = input.attr[13]; 83 if (num_attributes > 13) state.registers.input[attribute_register_map.attribute13_register] = input.attr[13];
84 if (num_attributes > 14) state.input_registers[attribute_register_map.attribute14_register] = input.attr[14]; 84 if (num_attributes > 14) state.registers.input[attribute_register_map.attribute14_register] = input.attr[14];
85 if (num_attributes > 15) state.input_registers[attribute_register_map.attribute15_register] = input.attr[15]; 85 if (num_attributes > 15) state.registers.input[attribute_register_map.attribute15_register] = input.attr[15];
86 86
87 state.conditional_code[0] = false; 87 state.conditional_code[0] = false;
88 state.conditional_code[1] = false; 88 state.conditional_code[1] = false;
89 89
90#ifdef ARCHITECTURE_x86_64 90#ifdef ARCHITECTURE_x86_64
91 if (VideoCore::g_shader_jit_enabled) 91 if (VideoCore::g_shader_jit_enabled)
92 jit_shader(&state); 92 jit_shader(&state.registers);
93 else 93 else
94 RunInterpreter(state); 94 RunInterpreter(state);
95#else 95#else
@@ -117,7 +117,7 @@ OutputVertex Run(UnitState& state, const InputVertex& input, int num_attributes)
117 for (int comp = 0; comp < 4; ++comp) { 117 for (int comp = 0; comp < 4; ++comp) {
118 float24* out = ((float24*)&ret) + semantics[comp]; 118 float24* out = ((float24*)&ret) + semantics[comp];
119 if (semantics[comp] != Regs::VSOutputAttributes::INVALID) { 119 if (semantics[comp] != Regs::VSOutputAttributes::INVALID) {
120 *out = state.output_registers[i][comp]; 120 *out = state.registers.output[i][comp];
121 } else { 121 } else {
122 // Zero output so that attributes which aren't output won't have denormals in them, 122 // Zero output so that attributes which aren't output won't have denormals in them,
123 // which would slow us down later. 123 // which would slow us down later.
diff --git a/src/video_core/shader/shader.h b/src/video_core/shader/shader.h
index 5825e9983..2007a2844 100644
--- a/src/video_core/shader/shader.h
+++ b/src/video_core/shader/shader.h
@@ -79,11 +79,14 @@ static_assert(sizeof(OutputVertex) == 32 * sizeof(float), "OutputVertex has inva
79 * here will make it easier for us to parallelize the shader processing later. 79 * here will make it easier for us to parallelize the shader processing later.
80 */ 80 */
81struct UnitState { 81struct UnitState {
82 // The registers are accessed by the shader JIT using SSE instructions, and are therefore 82 struct Registers {
83 // required to be 16-byte aligned. 83 // The registers are accessed by the shader JIT using SSE instructions, and are therefore
84 Math::Vec4<float24> MEMORY_ALIGNED16(input_registers[16]); 84 // required to be 16-byte aligned.
85 Math::Vec4<float24> MEMORY_ALIGNED16(output_registers[16]); 85 Math::Vec4<float24> MEMORY_ALIGNED16(input[16]);
86 Math::Vec4<float24> MEMORY_ALIGNED16(temporary_registers[16]); 86 Math::Vec4<float24> MEMORY_ALIGNED16(output[16]);
87 Math::Vec4<float24> MEMORY_ALIGNED16(temporary[16]);
88 } registers;
89 static_assert(std::is_pod<Registers>::value, "Structure is not POD");
87 90
88 u32 program_counter; 91 u32 program_counter;
89 bool conditional_code[2]; 92 bool conditional_code[2];
@@ -116,10 +119,10 @@ struct UnitState {
116 static int InputOffset(const SourceRegister& reg) { 119 static int InputOffset(const SourceRegister& reg) {
117 switch (reg.GetRegisterType()) { 120 switch (reg.GetRegisterType()) {
118 case RegisterType::Input: 121 case RegisterType::Input:
119 return (int)offsetof(UnitState, input_registers) + reg.GetIndex()*sizeof(Math::Vec4<float24>); 122 return (int)offsetof(UnitState::Registers, input) + reg.GetIndex()*sizeof(Math::Vec4<float24>);
120 123
121 case RegisterType::Temporary: 124 case RegisterType::Temporary:
122 return (int)offsetof(UnitState, temporary_registers) + reg.GetIndex()*sizeof(Math::Vec4<float24>); 125 return (int)offsetof(UnitState::Registers, temporary) + reg.GetIndex()*sizeof(Math::Vec4<float24>);
123 126
124 default: 127 default:
125 UNREACHABLE(); 128 UNREACHABLE();
@@ -130,10 +133,10 @@ struct UnitState {
130 static int OutputOffset(const DestRegister& reg) { 133 static int OutputOffset(const DestRegister& reg) {
131 switch (reg.GetRegisterType()) { 134 switch (reg.GetRegisterType()) {
132 case RegisterType::Output: 135 case RegisterType::Output:
133 return (int)offsetof(UnitState, output_registers) + reg.GetIndex()*sizeof(Math::Vec4<float24>); 136 return (int)offsetof(UnitState::Registers, output) + reg.GetIndex()*sizeof(Math::Vec4<float24>);
134 137
135 case RegisterType::Temporary: 138 case RegisterType::Temporary:
136 return (int)offsetof(UnitState, temporary_registers) + reg.GetIndex()*sizeof(Math::Vec4<float24>); 139 return (int)offsetof(UnitState::Registers, temporary) + reg.GetIndex()*sizeof(Math::Vec4<float24>);
137 140
138 default: 141 default:
139 UNREACHABLE(); 142 UNREACHABLE();
diff --git a/src/video_core/shader/shader_interpreter.cpp b/src/video_core/shader/shader_interpreter.cpp
index eb48e7053..c8489f920 100644
--- a/src/video_core/shader/shader_interpreter.cpp
+++ b/src/video_core/shader/shader_interpreter.cpp
@@ -62,10 +62,10 @@ void RunInterpreter(UnitState& state) {
62 auto LookupSourceRegister = [&](const SourceRegister& source_reg) -> const float24* { 62 auto LookupSourceRegister = [&](const SourceRegister& source_reg) -> const float24* {
63 switch (source_reg.GetRegisterType()) { 63 switch (source_reg.GetRegisterType()) {
64 case RegisterType::Input: 64 case RegisterType::Input:
65 return &state.input_registers[source_reg.GetIndex()].x; 65 return &state.registers.input[source_reg.GetIndex()].x;
66 66
67 case RegisterType::Temporary: 67 case RegisterType::Temporary:
68 return &state.temporary_registers[source_reg.GetIndex()].x; 68 return &state.registers.temporary[source_reg.GetIndex()].x;
69 69
70 case RegisterType::FloatUniform: 70 case RegisterType::FloatUniform:
71 return &uniforms.f[source_reg.GetIndex()].x; 71 return &uniforms.f[source_reg.GetIndex()].x;
@@ -114,8 +114,8 @@ void RunInterpreter(UnitState& state) {
114 src2[3] = src2[3] * float24::FromFloat32(-1); 114 src2[3] = src2[3] * float24::FromFloat32(-1);
115 } 115 }
116 116
117 float24* dest = (instr.common.dest.Value() < 0x10) ? &state.output_registers[instr.common.dest.Value().GetIndex()][0] 117 float24* dest = (instr.common.dest.Value() < 0x10) ? &state.registers.output[instr.common.dest.Value().GetIndex()][0]
118 : (instr.common.dest.Value() < 0x20) ? &state.temporary_registers[instr.common.dest.Value().GetIndex()][0] 118 : (instr.common.dest.Value() < 0x20) ? &state.registers.temporary[instr.common.dest.Value().GetIndex()][0]
119 : dummy_vec4_float24; 119 : dummy_vec4_float24;
120 120
121 state.debug.max_opdesc_id = std::max<u32>(state.debug.max_opdesc_id, 1+instr.common.operand_desc_id); 121 state.debug.max_opdesc_id = std::max<u32>(state.debug.max_opdesc_id, 1+instr.common.operand_desc_id);
@@ -355,8 +355,8 @@ void RunInterpreter(UnitState& state) {
355 src3[3] = src3[3] * float24::FromFloat32(-1); 355 src3[3] = src3[3] * float24::FromFloat32(-1);
356 } 356 }
357 357
358 float24* dest = (instr.mad.dest.Value() < 0x10) ? &state.output_registers[instr.mad.dest.Value().GetIndex()][0] 358 float24* dest = (instr.mad.dest.Value() < 0x10) ? &state.registers.output[instr.mad.dest.Value().GetIndex()][0]
359 : (instr.mad.dest.Value() < 0x20) ? &state.temporary_registers[instr.mad.dest.Value().GetIndex()][0] 359 : (instr.mad.dest.Value() < 0x20) ? &state.registers.temporary[instr.mad.dest.Value().GetIndex()][0]
360 : dummy_vec4_float24; 360 : dummy_vec4_float24;
361 361
362 for (int i = 0; i < 4; ++i) { 362 for (int i = 0; i < 4; ++i) {
diff --git a/src/video_core/shader/shader_jit_x64.cpp b/src/video_core/shader/shader_jit_x64.cpp
index a7be433df..ce47774d5 100644
--- a/src/video_core/shader/shader_jit_x64.cpp
+++ b/src/video_core/shader/shader_jit_x64.cpp
@@ -106,7 +106,7 @@ static const X64Reg COND0 = R13;
106/// Result of the previous CMP instruction for the Y-component comparison 106/// Result of the previous CMP instruction for the Y-component comparison
107static const X64Reg COND1 = R14; 107static const X64Reg COND1 = R14;
108/// Pointer to the UnitState instance for the current VS unit 108/// Pointer to the UnitState instance for the current VS unit
109static const X64Reg STATE = R15; 109static const X64Reg REGISTERS = R15;
110/// SIMD scratch register 110/// SIMD scratch register
111static const X64Reg SCRATCH = XMM0; 111static const X64Reg SCRATCH = XMM0;
112/// Loaded with the first swizzled source register, otherwise can be used as a scratch register 112/// Loaded with the first swizzled source register, otherwise can be used as a scratch register
@@ -140,7 +140,7 @@ void JitCompiler::Compile_SwizzleSrc(Instruction instr, unsigned src_num, Source
140 src_ptr = UNIFORMS; 140 src_ptr = UNIFORMS;
141 src_offset = src_reg.GetIndex() * sizeof(float24) * 4; 141 src_offset = src_reg.GetIndex() * sizeof(float24) * 4;
142 } else { 142 } else {
143 src_ptr = STATE; 143 src_ptr = REGISTERS;
144 src_offset = UnitState::InputOffset(src_reg); 144 src_offset = UnitState::InputOffset(src_reg);
145 } 145 }
146 146
@@ -217,11 +217,11 @@ void JitCompiler::Compile_DestEnable(Instruction instr,X64Reg src) {
217 // If all components are enabled, write the result to the destination register 217 // If all components are enabled, write the result to the destination register
218 if (swiz.dest_mask == NO_DEST_REG_MASK) { 218 if (swiz.dest_mask == NO_DEST_REG_MASK) {
219 // Store dest back to memory 219 // Store dest back to memory
220 MOVAPS(MDisp(STATE, UnitState::OutputOffset(dest)), src); 220 MOVAPS(MDisp(REGISTERS, UnitState::OutputOffset(dest)), src);
221 221
222 } else { 222 } else {
223 // Not all components are enabled, so mask the result when storing to the destination register... 223 // Not all components are enabled, so mask the result when storing to the destination register...
224 MOVAPS(SCRATCH, MDisp(STATE, UnitState::OutputOffset(dest))); 224 MOVAPS(SCRATCH, MDisp(REGISTERS, UnitState::OutputOffset(dest)));
225 225
226 if (Common::GetCPUCaps().sse4_1) { 226 if (Common::GetCPUCaps().sse4_1) {
227 u8 mask = ((swiz.dest_mask & 1) << 3) | ((swiz.dest_mask & 8) >> 3) | ((swiz.dest_mask & 2) << 1) | ((swiz.dest_mask & 4) >> 1); 227 u8 mask = ((swiz.dest_mask & 1) << 3) | ((swiz.dest_mask & 8) >> 3) | ((swiz.dest_mask & 2) << 1) | ((swiz.dest_mask & 4) >> 1);
@@ -240,7 +240,7 @@ void JitCompiler::Compile_DestEnable(Instruction instr,X64Reg src) {
240 } 240 }
241 241
242 // Store dest back to memory 242 // Store dest back to memory
243 MOVAPS(MDisp(STATE, UnitState::OutputOffset(dest)), SCRATCH); 243 MOVAPS(MDisp(REGISTERS, UnitState::OutputOffset(dest)), SCRATCH);
244 } 244 }
245} 245}
246 246
@@ -635,7 +635,7 @@ CompiledShader* JitCompiler::Compile() {
635 635
636 ABI_PushAllCalleeSavedRegsAndAdjustStack(); 636 ABI_PushAllCalleeSavedRegsAndAdjustStack();
637 637
638 MOV(PTRBITS, R(STATE), R(ABI_PARAM1)); 638 MOV(PTRBITS, R(REGISTERS), R(ABI_PARAM1));
639 MOV(PTRBITS, R(UNIFORMS), ImmPtr(&g_state.vs.uniforms)); 639 MOV(PTRBITS, R(UNIFORMS), ImmPtr(&g_state.vs.uniforms));
640 640
641 // Zero address/loop registers 641 // Zero address/loop registers
diff --git a/src/video_core/shader/shader_jit_x64.h b/src/video_core/shader/shader_jit_x64.h
index 719a24210..b88f2a0d2 100644
--- a/src/video_core/shader/shader_jit_x64.h
+++ b/src/video_core/shader/shader_jit_x64.h
@@ -20,7 +20,7 @@ namespace Pica {
20 20
21namespace Shader { 21namespace Shader {
22 22
23using CompiledShader = void(void* state); 23using CompiledShader = void(void* registers);
24 24
25/** 25/**
26 * This class implements the shader JIT compiler. It recompiles a Pica shader program into x86_64 26 * This class implements the shader JIT compiler. It recompiles a Pica shader program into x86_64