summaryrefslogtreecommitdiff
path: root/src/video_core/vertex_shader.cpp
diff options
context:
space:
mode:
authorGravatar Tony Wasserka2014-07-26 19:17:09 +0200
committerGravatar Tony Wasserka2014-08-12 13:48:10 +0200
commitc52651261916b136f2ea4ff022fb9cead5a73a93 (patch)
tree11bf295b77b14d1d82c7f992a47cf20f5fffbda6 /src/video_core/vertex_shader.cpp
parentPica: Implement vertex loading. (diff)
downloadyuzu-c52651261916b136f2ea4ff022fb9cead5a73a93.tar.gz
yuzu-c52651261916b136f2ea4ff022fb9cead5a73a93.tar.xz
yuzu-c52651261916b136f2ea4ff022fb9cead5a73a93.zip
Pica: Add vertex shader implementation.
Diffstat (limited to 'src/video_core/vertex_shader.cpp')
-rw-r--r--src/video_core/vertex_shader.cpp270
1 files changed, 270 insertions, 0 deletions
diff --git a/src/video_core/vertex_shader.cpp b/src/video_core/vertex_shader.cpp
new file mode 100644
index 000000000..93830a96a
--- /dev/null
+++ b/src/video_core/vertex_shader.cpp
@@ -0,0 +1,270 @@
1// Copyright 2014 Citra Emulator Project
2// Licensed under GPLv2
3// Refer to the license.txt file included.
4
5#include "pica.h"
6#include "vertex_shader.h"
7#include <core/mem_map.h>
8#include <common/file_util.h>
9
10namespace Pica {
11
12namespace VertexShader {
13
14static struct {
15 Math::Vec4<float24> f[96];
16} shader_uniforms;
17
18
19// TODO: Not sure where the shader binary and swizzle patterns are supposed to be loaded to!
20// For now, we just keep these local arrays around.
21static u32 shader_memory[1024];
22static u32 swizzle_data[1024];
23
24void SubmitShaderMemoryChange(u32 addr, u32 value)
25{
26 shader_memory[addr] = value;
27}
28
29void SubmitSwizzleDataChange(u32 addr, u32 value)
30{
31 swizzle_data[addr] = value;
32}
33
34Math::Vec4<float24>& GetFloatUniform(u32 index)
35{
36 return shader_uniforms.f[index];
37}
38
39struct VertexShaderState {
40 u32* program_counter;
41
42 const float24* input_register_table[16];
43 float24* output_register_table[7*4];
44
45 Math::Vec4<float24> temporary_registers[16];
46 bool status_registers[2];
47
48 enum {
49 INVALID_ADDRESS = 0xFFFFFFFF
50 };
51 u32 call_stack[8]; // TODO: What is the maximal call stack depth?
52 u32* call_stack_pointer;
53};
54
55static void ProcessShaderCode(VertexShaderState& state) {
56 while (true) {
57 bool increment_pc = true;
58 bool exit_loop = false;
59 const Instruction& instr = *(const Instruction*)state.program_counter;
60
61 const float24* src1_ = (instr.common.src1 < 0x10) ? state.input_register_table[instr.common.src1]
62 : (instr.common.src1 < 0x20) ? &state.temporary_registers[instr.common.src1-0x10].x
63 : (instr.common.src1 < 0x80) ? &shader_uniforms.f[instr.common.src1-0x20].x
64 : nullptr;
65 const float24* src2_ = (instr.common.src2 < 0x10) ? state.input_register_table[instr.common.src2]
66 : &state.temporary_registers[instr.common.src2-0x10].x;
67 // TODO: Unsure about the limit values
68 float24* dest = (instr.common.dest <= 0x1C) ? state.output_register_table[instr.common.dest]
69 : (instr.common.dest <= 0x3C) ? nullptr
70 : (instr.common.dest <= 0x7C) ? &state.temporary_registers[(instr.common.dest-0x40)/4][instr.common.dest%4]
71 : nullptr;
72
73 const SwizzlePattern& swizzle = *(SwizzlePattern*)&swizzle_data[instr.common.operand_desc_id];
74
75 const float24 src1[4] = {
76 src1_[(int)swizzle.GetSelectorSrc1(0)],
77 src1_[(int)swizzle.GetSelectorSrc1(1)],
78 src1_[(int)swizzle.GetSelectorSrc1(2)],
79 src1_[(int)swizzle.GetSelectorSrc1(3)],
80 };
81 const float24 src2[4] = {
82 src2_[(int)swizzle.GetSelectorSrc2(0)],
83 src2_[(int)swizzle.GetSelectorSrc2(1)],
84 src2_[(int)swizzle.GetSelectorSrc2(2)],
85 src2_[(int)swizzle.GetSelectorSrc2(3)],
86 };
87
88 switch (instr.opcode) {
89 case Instruction::OpCode::ADD:
90 {
91 for (int i = 0; i < 4; ++i) {
92 if (!swizzle.DestComponentEnabled(i))
93 continue;
94
95 dest[i] = src1[i] + src2[i];
96 }
97
98 break;
99 }
100
101 case Instruction::OpCode::MUL:
102 {
103 for (int i = 0; i < 4; ++i) {
104 if (!swizzle.DestComponentEnabled(i))
105 continue;
106
107 dest[i] = src1[i] * src2[i];
108 }
109
110 break;
111 }
112
113 case Instruction::OpCode::DP3:
114 case Instruction::OpCode::DP4:
115 {
116 float24 dot = float24::FromFloat32(0.f);
117 int num_components = (instr.opcode == Instruction::OpCode::DP3) ? 3 : 4;
118 for (int i = 0; i < num_components; ++i)
119 dot = dot + src1[i] * src2[i];
120
121 for (int i = 0; i < num_components; ++i) {
122 if (!swizzle.DestComponentEnabled(i))
123 continue;
124
125 dest[i] = dot;
126 }
127 break;
128 }
129
130 // Reciprocal
131 case Instruction::OpCode::RCP:
132 {
133 for (int i = 0; i < 4; ++i) {
134 if (!swizzle.DestComponentEnabled(i))
135 continue;
136
137 // TODO: Be stable against division by zero!
138 // TODO: I think this might be wrong... we should only use one component here
139 dest[i] = float24::FromFloat32(1.0 / src1[i].ToFloat32());
140 }
141
142 break;
143 }
144
145 // Reciprocal Square Root
146 case Instruction::OpCode::RSQ:
147 {
148 for (int i = 0; i < 4; ++i) {
149 if (!swizzle.DestComponentEnabled(i))
150 continue;
151
152 // TODO: Be stable against division by zero!
153 // TODO: I think this might be wrong... we should only use one component here
154 dest[i] = float24::FromFloat32(1.0 / sqrt(src1[i].ToFloat32()));
155 }
156
157 break;
158 }
159
160 case Instruction::OpCode::MOV:
161 {
162 for (int i = 0; i < 4; ++i) {
163 if (!swizzle.DestComponentEnabled(i))
164 continue;
165
166 dest[i] = src1[i];
167 }
168 break;
169 }
170
171 case Instruction::OpCode::RET:
172 if (*state.call_stack_pointer == VertexShaderState::INVALID_ADDRESS) {
173 exit_loop = true;
174 } else {
175 state.program_counter = &shader_memory[*state.call_stack_pointer--];
176 *state.call_stack_pointer = VertexShaderState::INVALID_ADDRESS;
177 }
178
179 break;
180
181 case Instruction::OpCode::CALL:
182 increment_pc = false;
183
184 _dbg_assert_(GPU, state.call_stack_pointer - state.call_stack < sizeof(state.call_stack));
185
186 *++state.call_stack_pointer = state.program_counter - shader_memory;
187 // TODO: Does this offset refer to the beginning of shader memory?
188 state.program_counter = &shader_memory[instr.flow_control.offset_words];
189 break;
190
191 case Instruction::OpCode::FLS:
192 // TODO: Do whatever needs to be done here?
193 break;
194
195 default:
196 ERROR_LOG(GPU, "Unhandled instruction: 0x%02x (%s): 0x%08x",
197 (int)instr.opcode.Value(), instr.GetOpCodeName().c_str(), instr.hex);
198 break;
199 }
200
201 if (increment_pc)
202 ++state.program_counter;
203
204 if (exit_loop)
205 break;
206 }
207}
208
209OutputVertex RunShader(const InputVertex& input, int num_attributes)
210{
211 VertexShaderState state;
212
213 const u32* main = &shader_memory[registers.vs_main_offset];
214 state.program_counter = (u32*)main;
215
216 // Setup input register table
217 const auto& attribute_register_map = registers.vs_input_register_map;
218 float24 dummy_register;
219 std::fill(&state.input_register_table[0], &state.input_register_table[16], &dummy_register);
220 if(num_attributes > 0) state.input_register_table[attribute_register_map.attribute0_register] = &input.attr[0].x;
221 if(num_attributes > 1) state.input_register_table[attribute_register_map.attribute1_register] = &input.attr[1].x;
222 if(num_attributes > 2) state.input_register_table[attribute_register_map.attribute2_register] = &input.attr[2].x;
223 if(num_attributes > 3) state.input_register_table[attribute_register_map.attribute3_register] = &input.attr[3].x;
224 if(num_attributes > 4) state.input_register_table[attribute_register_map.attribute4_register] = &input.attr[4].x;
225 if(num_attributes > 5) state.input_register_table[attribute_register_map.attribute5_register] = &input.attr[5].x;
226 if(num_attributes > 6) state.input_register_table[attribute_register_map.attribute6_register] = &input.attr[6].x;
227 if(num_attributes > 7) state.input_register_table[attribute_register_map.attribute7_register] = &input.attr[7].x;
228 if(num_attributes > 8) state.input_register_table[attribute_register_map.attribute8_register] = &input.attr[8].x;
229 if(num_attributes > 9) state.input_register_table[attribute_register_map.attribute9_register] = &input.attr[9].x;
230 if(num_attributes > 10) state.input_register_table[attribute_register_map.attribute10_register] = &input.attr[10].x;
231 if(num_attributes > 11) state.input_register_table[attribute_register_map.attribute11_register] = &input.attr[11].x;
232 if(num_attributes > 12) state.input_register_table[attribute_register_map.attribute12_register] = &input.attr[12].x;
233 if(num_attributes > 13) state.input_register_table[attribute_register_map.attribute13_register] = &input.attr[13].x;
234 if(num_attributes > 14) state.input_register_table[attribute_register_map.attribute14_register] = &input.attr[14].x;
235 if(num_attributes > 15) state.input_register_table[attribute_register_map.attribute15_register] = &input.attr[15].x;
236
237 // Setup output register table
238 OutputVertex ret;
239 for (int i = 0; i < 7; ++i) {
240 const auto& output_register_map = registers.vs_output_attributes[i];
241
242 u32 semantics[4] = {
243 output_register_map.map_x, output_register_map.map_y,
244 output_register_map.map_z, output_register_map.map_w
245 };
246
247 for (int comp = 0; comp < 4; ++comp)
248 state.output_register_table[4*i+comp] = ((float24*)&ret) + semantics[comp];
249 }
250
251 state.status_registers[0] = false;
252 state.status_registers[1] = false;
253 std::fill(state.call_stack, state.call_stack + sizeof(state.call_stack) / sizeof(state.call_stack[0]),
254 VertexShaderState::INVALID_ADDRESS);
255 state.call_stack_pointer = &state.call_stack[0];
256
257 ProcessShaderCode(state);
258
259 DEBUG_LOG(GPU, "Output vertex: pos (%.2f, %.2f, %.2f, %.2f), col(%.2f, %.2f, %.2f, %.2f), tc0(%.2f, %.2f)",
260 ret.pos.x.ToFloat32(), ret.pos.y.ToFloat32(), ret.pos.z.ToFloat32(), ret.pos.w.ToFloat32(),
261 ret.color.x.ToFloat32(), ret.color.y.ToFloat32(), ret.color.z.ToFloat32(), ret.color.w.ToFloat32(),
262 ret.tc0.u().ToFloat32(), ret.tc0.v().ToFloat32());
263
264 return ret;
265}
266
267
268} // namespace
269
270} // namespace