summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--src/video_core/CMakeLists.txt2
-rw-r--r--src/video_core/command_processor.cpp108
-rw-r--r--src/video_core/pica.h137
-rw-r--r--src/video_core/vertex_shader.cpp270
-rw-r--r--src/video_core/vertex_shader.h211
-rw-r--r--src/video_core/video_core.vcxproj2
-rw-r--r--src/video_core/video_core.vcxproj.filters2
7 files changed, 722 insertions, 10 deletions
diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt
index 8977c8dca..74304ee49 100644
--- a/src/video_core/CMakeLists.txt
+++ b/src/video_core/CMakeLists.txt
@@ -1,5 +1,6 @@
1set(SRCS command_processor.cpp 1set(SRCS command_processor.cpp
2 utils.cpp 2 utils.cpp
3 vertex_shader.cpp
3 video_core.cpp 4 video_core.cpp
4 renderer_opengl/renderer_opengl.cpp) 5 renderer_opengl/renderer_opengl.cpp)
5 6
@@ -8,6 +9,7 @@ set(HEADERS command_processor.h
8 utils.h 9 utils.h
9 video_core.h 10 video_core.h
10 renderer_base.h 11 renderer_base.h
12 vertex_shader.h
11 video_core.h 13 video_core.h
12 renderer_opengl/renderer_opengl.h) 14 renderer_opengl/renderer_opengl.h)
13 15
diff --git a/src/video_core/command_processor.cpp b/src/video_core/command_processor.cpp
index e909c8c32..339fa7726 100644
--- a/src/video_core/command_processor.cpp
+++ b/src/video_core/command_processor.cpp
@@ -2,9 +2,10 @@
2// Licensed under GPLv2 2// Licensed under GPLv2
3// Refer to the license.txt file included. 3// Refer to the license.txt file included.
4 4
5#include "pica.h"
6#include "command_processor.h" 5#include "command_processor.h"
7#include "math.h" 6#include "math.h"
7#include "pica.h"
8#include "vertex_shader.h"
8 9
9 10
10namespace Pica { 11namespace Pica {
@@ -13,6 +14,14 @@ Regs registers;
13 14
14namespace CommandProcessor { 15namespace CommandProcessor {
15 16
17static int float_regs_counter = 0;
18
19static u32 uniform_write_buffer[4];
20
21// Used for VSLoadProgramData and VSLoadSwizzleData
22static u32 vs_binary_write_offset = 0;
23static u32 vs_swizzle_write_offset = 0;
24
16static inline void WritePicaReg(u32 id, u32 value) { 25static inline void WritePicaReg(u32 id, u32 value) {
17 u32 old_value = registers[id]; 26 u32 old_value = registers[id];
18 registers[id] = value; 27 registers[id] = value;
@@ -67,9 +76,7 @@ static inline void WritePicaReg(u32 id, u32 value) {
67 } 76 }
68 77
69 // Initialize data for the current vertex 78 // Initialize data for the current vertex
70 struct { 79 VertexShader::InputVertex input;
71 Math::Vec4<float24> attr[16];
72 } input;
73 80
74 for (int i = 0; i < attribute_config.GetNumTotalAttributes(); ++i) { 81 for (int i = 0; i < attribute_config.GetNumTotalAttributes(); ++i) {
75 for (int comp = 0; comp < vertex_attribute_elements[i]; ++comp) { 82 for (int comp = 0; comp < vertex_attribute_elements[i]; ++comp) {
@@ -87,7 +94,7 @@ static inline void WritePicaReg(u32 id, u32 value) {
87 input.attr[i][comp].ToFloat32()); 94 input.attr[i][comp].ToFloat32());
88 } 95 }
89 } 96 }
90 // TODO: Run vertex data through vertex shader 97 VertexShader::OutputVertex output = VertexShader::RunShader(input, attribute_config.GetNumTotalAttributes());
91 98
92 if (is_indexed) { 99 if (is_indexed) {
93 // TODO: Add processed vertex to vertex cache! 100 // TODO: Add processed vertex to vertex cache!
@@ -98,6 +105,97 @@ static inline void WritePicaReg(u32 id, u32 value) {
98 break; 105 break;
99 } 106 }
100 107
108 case PICA_REG_INDEX_WORKAROUND(vs_uniform_setup.set_value[0], 0x2c1):
109 case PICA_REG_INDEX_WORKAROUND(vs_uniform_setup.set_value[1], 0x2c2):
110 case PICA_REG_INDEX_WORKAROUND(vs_uniform_setup.set_value[2], 0x2c3):
111 case PICA_REG_INDEX_WORKAROUND(vs_uniform_setup.set_value[3], 0x2c4):
112 case PICA_REG_INDEX_WORKAROUND(vs_uniform_setup.set_value[4], 0x2c5):
113 case PICA_REG_INDEX_WORKAROUND(vs_uniform_setup.set_value[5], 0x2c6):
114 case PICA_REG_INDEX_WORKAROUND(vs_uniform_setup.set_value[6], 0x2c7):
115 case PICA_REG_INDEX_WORKAROUND(vs_uniform_setup.set_value[7], 0x2c8):
116 {
117 auto& uniform_setup = registers.vs_uniform_setup;
118
119 // TODO: Does actual hardware indeed keep an intermediate buffer or does
120 // it directly write the values?
121 uniform_write_buffer[float_regs_counter++] = value;
122
123 // Uniforms are written in a packed format such that 4 float24 values are encoded in
124 // three 32-bit numbers. We write to internal memory once a full such vector is
125 // written.
126 if ((float_regs_counter >= 4 && uniform_setup.IsFloat32()) ||
127 (float_regs_counter >= 3 && !uniform_setup.IsFloat32())) {
128 float_regs_counter = 0;
129
130 auto& uniform = VertexShader::GetFloatUniform(uniform_setup.index);
131
132 if (uniform_setup.index > 95) {
133 ERROR_LOG(GPU, "Invalid VS uniform index %d", (int)uniform_setup.index);
134 break;
135 }
136
137 // NOTE: The destination component order indeed is "backwards"
138 if (uniform_setup.IsFloat32()) {
139 for (auto i : {0,1,2,3})
140 uniform[3 - i] = float24::FromFloat32(*(float*)(&uniform_write_buffer[i]));
141 } else {
142 // TODO: Untested
143 uniform.w = float24::FromRawFloat24(uniform_write_buffer[0] >> 8);
144 uniform.z = float24::FromRawFloat24(((uniform_write_buffer[0] & 0xFF)<<16) | ((uniform_write_buffer[1] >> 16) & 0xFFFF));
145 uniform.y = float24::FromRawFloat24(((uniform_write_buffer[1] & 0xFFFF)<<8) | ((uniform_write_buffer[2] >> 24) & 0xFF));
146 uniform.x = float24::FromRawFloat24(uniform_write_buffer[2] & 0xFFFFFF);
147 }
148
149 DEBUG_LOG(GPU, "Set uniform %x to (%f %f %f %f)", (int)uniform_setup.index,
150 uniform.x.ToFloat32(), uniform.y.ToFloat32(), uniform.z.ToFloat32(),
151 uniform.w.ToFloat32());
152
153 // TODO: Verify that this actually modifies the register!
154 uniform_setup.index = uniform_setup.index + 1;
155 }
156 break;
157 }
158
159 // Seems to be used to reset the write pointer for VSLoadProgramData
160 case PICA_REG_INDEX(vs_program.begin_load):
161 vs_binary_write_offset = 0;
162 break;
163
164 // Load shader program code
165 case PICA_REG_INDEX_WORKAROUND(vs_program.set_word[0], 0x2cc):
166 case PICA_REG_INDEX_WORKAROUND(vs_program.set_word[1], 0x2cd):
167 case PICA_REG_INDEX_WORKAROUND(vs_program.set_word[2], 0x2ce):
168 case PICA_REG_INDEX_WORKAROUND(vs_program.set_word[3], 0x2cf):
169 case PICA_REG_INDEX_WORKAROUND(vs_program.set_word[4], 0x2d0):
170 case PICA_REG_INDEX_WORKAROUND(vs_program.set_word[5], 0x2d1):
171 case PICA_REG_INDEX_WORKAROUND(vs_program.set_word[6], 0x2d2):
172 case PICA_REG_INDEX_WORKAROUND(vs_program.set_word[7], 0x2d3):
173 {
174 VertexShader::SubmitShaderMemoryChange(vs_binary_write_offset, value);
175 vs_binary_write_offset++;
176 break;
177 }
178
179 // Seems to be used to reset the write pointer for VSLoadSwizzleData
180 case PICA_REG_INDEX(vs_swizzle_patterns.begin_load):
181 vs_swizzle_write_offset = 0;
182 break;
183
184 // Load swizzle pattern data
185 case PICA_REG_INDEX_WORKAROUND(vs_swizzle_patterns.set_word[0], 0x2d6):
186 case PICA_REG_INDEX_WORKAROUND(vs_swizzle_patterns.set_word[1], 0x2d7):
187 case PICA_REG_INDEX_WORKAROUND(vs_swizzle_patterns.set_word[2], 0x2d8):
188 case PICA_REG_INDEX_WORKAROUND(vs_swizzle_patterns.set_word[3], 0x2d9):
189 case PICA_REG_INDEX_WORKAROUND(vs_swizzle_patterns.set_word[4], 0x2da):
190 case PICA_REG_INDEX_WORKAROUND(vs_swizzle_patterns.set_word[5], 0x2db):
191 case PICA_REG_INDEX_WORKAROUND(vs_swizzle_patterns.set_word[6], 0x2dc):
192 case PICA_REG_INDEX_WORKAROUND(vs_swizzle_patterns.set_word[7], 0x2dd):
193 {
194 VertexShader::SubmitSwizzleDataChange(vs_swizzle_write_offset, value);
195 vs_swizzle_write_offset++;
196 break;
197 }
198
101 default: 199 default:
102 break; 200 break;
103 } 201 }
diff --git a/src/video_core/pica.h b/src/video_core/pica.h
index faf124c3d..42303a585 100644
--- a/src/video_core/pica.h
+++ b/src/video_core/pica.h
@@ -50,7 +50,39 @@ struct Regs {
50 INSERT_PADDING_WORDS(0x1); 50 INSERT_PADDING_WORDS(0x1);
51 BitField<0, 24, u32> viewport_size_y; 51 BitField<0, 24, u32> viewport_size_y;
52 52
53 INSERT_PADDING_WORDS(0x1bc); 53 INSERT_PADDING_WORDS(0xc);
54
55 union {
56 // Maps components of output vertex attributes to semantics
57 enum Semantic : u32
58 {
59 POSITION_X = 0,
60 POSITION_Y = 1,
61 POSITION_Z = 2,
62 POSITION_W = 3,
63
64 COLOR_R = 8,
65 COLOR_G = 9,
66 COLOR_B = 10,
67 COLOR_A = 11,
68
69 TEXCOORD0_U = 12,
70 TEXCOORD0_V = 13,
71 TEXCOORD1_U = 14,
72 TEXCOORD1_V = 15,
73 TEXCOORD2_U = 22,
74 TEXCOORD2_V = 23,
75
76 INVALID = 31,
77 };
78
79 BitField< 0, 5, Semantic> map_x;
80 BitField< 8, 5, Semantic> map_y;
81 BitField<16, 5, Semantic> map_z;
82 BitField<24, 5, Semantic> map_w;
83 } vs_output_attributes[7];
84
85 INSERT_PADDING_WORDS(0x1a9);
54 86
55 struct { 87 struct {
56 enum class Format : u64 { 88 enum class Format : u64 {
@@ -133,7 +165,7 @@ struct Regs {
133 165
134 // Attribute loaders map the source vertex data to input attributes 166 // Attribute loaders map the source vertex data to input attributes
135 // This e.g. allows to load different attributes from different memory locations 167 // This e.g. allows to load different attributes from different memory locations
136 struct Loader { 168 struct {
137 // Source attribute data offset from the base address 169 // Source attribute data offset from the base address
138 u32 data_offset; 170 u32 data_offset;
139 171
@@ -189,7 +221,90 @@ struct Regs {
189 u32 trigger_draw; 221 u32 trigger_draw;
190 u32 trigger_draw_indexed; 222 u32 trigger_draw_indexed;
191 223
192 INSERT_PADDING_WORDS(0xd0); 224 INSERT_PADDING_WORDS(0x8a);
225
226 // Offset to shader program entry point (in words)
227 BitField<0, 16, u32> vs_main_offset;
228
229 union {
230 BitField< 0, 4, u64> attribute0_register;
231 BitField< 4, 4, u64> attribute1_register;
232 BitField< 8, 4, u64> attribute2_register;
233 BitField<12, 4, u64> attribute3_register;
234 BitField<16, 4, u64> attribute4_register;
235 BitField<20, 4, u64> attribute5_register;
236 BitField<24, 4, u64> attribute6_register;
237 BitField<28, 4, u64> attribute7_register;
238 BitField<32, 4, u64> attribute8_register;
239 BitField<36, 4, u64> attribute9_register;
240 BitField<40, 4, u64> attribute10_register;
241 BitField<44, 4, u64> attribute11_register;
242 BitField<48, 4, u64> attribute12_register;
243 BitField<52, 4, u64> attribute13_register;
244 BitField<56, 4, u64> attribute14_register;
245 BitField<60, 4, u64> attribute15_register;
246
247 int GetRegisterForAttribute(int attribute_index) {
248 u64 fields[] = {
249 attribute0_register, attribute1_register, attribute2_register, attribute3_register,
250 attribute4_register, attribute5_register, attribute6_register, attribute7_register,
251 attribute8_register, attribute9_register, attribute10_register, attribute11_register,
252 attribute12_register, attribute13_register, attribute14_register, attribute15_register,
253 };
254 return (int)fields[attribute_index];
255 }
256 } vs_input_register_map;
257
258 INSERT_PADDING_WORDS(0x3);
259
260 struct {
261 enum Format : u32
262 {
263 FLOAT24 = 0,
264 FLOAT32 = 1
265 };
266
267 bool IsFloat32() const {
268 return format == FLOAT32;
269 }
270
271 union {
272 // Index of the next uniform to write to
273 // TODO: ctrulib uses 8 bits for this, however that seems to yield lots of invalid indices
274 BitField<0, 7, u32> index;
275
276 BitField<31, 1, Format> format;
277 };
278
279 // Writing to these registers sets the "current" uniform.
280 // TODO: It's not clear how the hardware stores what the "current" uniform is.
281 u32 set_value[8];
282
283 } vs_uniform_setup;
284
285 INSERT_PADDING_WORDS(0x2);
286
287 struct {
288 u32 begin_load;
289
290 // Writing to these registers sets the "current" word in the shader program.
291 // TODO: It's not clear how the hardware stores what the "current" word is.
292 u32 set_word[8];
293 } vs_program;
294
295 INSERT_PADDING_WORDS(0x1);
296
297 // This register group is used to load an internal table of swizzling patterns,
298 // which are indexed by each shader instruction to specify vector component swizzling.
299 struct {
300 u32 begin_load;
301
302 // Writing to these registers sets the "current" swizzle pattern in the table.
303 // TODO: It's not clear how the hardware stores what the "current" swizzle pattern is.
304 u32 set_word[8];
305 } vs_swizzle_patterns;
306
307 INSERT_PADDING_WORDS(0x22);
193 308
194#undef INSERT_PADDING_WORDS_HELPER1 309#undef INSERT_PADDING_WORDS_HELPER1
195#undef INSERT_PADDING_WORDS_HELPER2 310#undef INSERT_PADDING_WORDS_HELPER2
@@ -219,6 +334,11 @@ struct Regs {
219 ADD_FIELD(num_vertices); 334 ADD_FIELD(num_vertices);
220 ADD_FIELD(trigger_draw); 335 ADD_FIELD(trigger_draw);
221 ADD_FIELD(trigger_draw_indexed); 336 ADD_FIELD(trigger_draw_indexed);
337 ADD_FIELD(vs_main_offset);
338 ADD_FIELD(vs_input_register_map);
339 ADD_FIELD(vs_uniform_setup);
340 ADD_FIELD(vs_program);
341 ADD_FIELD(vs_swizzle_patterns);
222 342
223 #undef ADD_FIELD 343 #undef ADD_FIELD
224 #endif // _MSC_VER 344 #endif // _MSC_VER
@@ -259,17 +379,25 @@ private:
259 379
260ASSERT_REG_POSITION(viewport_size_x, 0x41); 380ASSERT_REG_POSITION(viewport_size_x, 0x41);
261ASSERT_REG_POSITION(viewport_size_y, 0x43); 381ASSERT_REG_POSITION(viewport_size_y, 0x43);
382ASSERT_REG_POSITION(vs_output_attributes[0], 0x50);
383ASSERT_REG_POSITION(vs_output_attributes[1], 0x51);
262ASSERT_REG_POSITION(vertex_attributes, 0x200); 384ASSERT_REG_POSITION(vertex_attributes, 0x200);
263ASSERT_REG_POSITION(index_array, 0x227); 385ASSERT_REG_POSITION(index_array, 0x227);
264ASSERT_REG_POSITION(num_vertices, 0x228); 386ASSERT_REG_POSITION(num_vertices, 0x228);
265ASSERT_REG_POSITION(trigger_draw, 0x22e); 387ASSERT_REG_POSITION(trigger_draw, 0x22e);
266ASSERT_REG_POSITION(trigger_draw_indexed, 0x22f); 388ASSERT_REG_POSITION(trigger_draw_indexed, 0x22f);
389ASSERT_REG_POSITION(vs_main_offset, 0x2ba);
390ASSERT_REG_POSITION(vs_input_register_map, 0x2bb);
391ASSERT_REG_POSITION(vs_uniform_setup, 0x2c0);
392ASSERT_REG_POSITION(vs_program, 0x2cb);
393ASSERT_REG_POSITION(vs_swizzle_patterns, 0x2d5);
267 394
268#undef ASSERT_REG_POSITION 395#undef ASSERT_REG_POSITION
269#endif // !defined(_MSC_VER) 396#endif // !defined(_MSC_VER)
270 397
271// The total number of registers is chosen arbitrarily, but let's make sure it's not some odd value anyway. 398// The total number of registers is chosen arbitrarily, but let's make sure it's not some odd value anyway.
272static_assert(sizeof(Regs) == 0x300 * sizeof(u32), "Invalid total size of register set"); 399static_assert(sizeof(Regs) <= 0x300 * sizeof(u32), "Register set structure larger than it should be");
400static_assert(sizeof(Regs) >= 0x300 * sizeof(u32), "Register set structure smaller than it should be");
273 401
274extern Regs registers; // TODO: Not sure if we want to have one global instance for this 402extern Regs registers; // TODO: Not sure if we want to have one global instance for this
275 403
@@ -347,7 +475,6 @@ private:
347 float value; 475 float value;
348}; 476};
349 477
350
351union CommandHeader { 478union CommandHeader {
352 CommandHeader(u32 h) : hex(h) {} 479 CommandHeader(u32 h) : hex(h) {}
353 480
diff --git a/src/video_core/vertex_shader.cpp b/src/video_core/vertex_shader.cpp
new file mode 100644
index 000000000..93830a96a
--- /dev/null
+++ b/src/video_core/vertex_shader.cpp
@@ -0,0 +1,270 @@
1// Copyright 2014 Citra Emulator Project
2// Licensed under GPLv2
3// Refer to the license.txt file included.
4
5#include "pica.h"
6#include "vertex_shader.h"
7#include <core/mem_map.h>
8#include <common/file_util.h>
9
10namespace Pica {
11
12namespace VertexShader {
13
14static struct {
15 Math::Vec4<float24> f[96];
16} shader_uniforms;
17
18
19// TODO: Not sure where the shader binary and swizzle patterns are supposed to be loaded to!
20// For now, we just keep these local arrays around.
21static u32 shader_memory[1024];
22static u32 swizzle_data[1024];
23
24void SubmitShaderMemoryChange(u32 addr, u32 value)
25{
26 shader_memory[addr] = value;
27}
28
29void SubmitSwizzleDataChange(u32 addr, u32 value)
30{
31 swizzle_data[addr] = value;
32}
33
34Math::Vec4<float24>& GetFloatUniform(u32 index)
35{
36 return shader_uniforms.f[index];
37}
38
39struct VertexShaderState {
40 u32* program_counter;
41
42 const float24* input_register_table[16];
43 float24* output_register_table[7*4];
44
45 Math::Vec4<float24> temporary_registers[16];
46 bool status_registers[2];
47
48 enum {
49 INVALID_ADDRESS = 0xFFFFFFFF
50 };
51 u32 call_stack[8]; // TODO: What is the maximal call stack depth?
52 u32* call_stack_pointer;
53};
54
55static void ProcessShaderCode(VertexShaderState& state) {
56 while (true) {
57 bool increment_pc = true;
58 bool exit_loop = false;
59 const Instruction& instr = *(const Instruction*)state.program_counter;
60
61 const float24* src1_ = (instr.common.src1 < 0x10) ? state.input_register_table[instr.common.src1]
62 : (instr.common.src1 < 0x20) ? &state.temporary_registers[instr.common.src1-0x10].x
63 : (instr.common.src1 < 0x80) ? &shader_uniforms.f[instr.common.src1-0x20].x
64 : nullptr;
65 const float24* src2_ = (instr.common.src2 < 0x10) ? state.input_register_table[instr.common.src2]
66 : &state.temporary_registers[instr.common.src2-0x10].x;
67 // TODO: Unsure about the limit values
68 float24* dest = (instr.common.dest <= 0x1C) ? state.output_register_table[instr.common.dest]
69 : (instr.common.dest <= 0x3C) ? nullptr
70 : (instr.common.dest <= 0x7C) ? &state.temporary_registers[(instr.common.dest-0x40)/4][instr.common.dest%4]
71 : nullptr;
72
73 const SwizzlePattern& swizzle = *(SwizzlePattern*)&swizzle_data[instr.common.operand_desc_id];
74
75 const float24 src1[4] = {
76 src1_[(int)swizzle.GetSelectorSrc1(0)],
77 src1_[(int)swizzle.GetSelectorSrc1(1)],
78 src1_[(int)swizzle.GetSelectorSrc1(2)],
79 src1_[(int)swizzle.GetSelectorSrc1(3)],
80 };
81 const float24 src2[4] = {
82 src2_[(int)swizzle.GetSelectorSrc2(0)],
83 src2_[(int)swizzle.GetSelectorSrc2(1)],
84 src2_[(int)swizzle.GetSelectorSrc2(2)],
85 src2_[(int)swizzle.GetSelectorSrc2(3)],
86 };
87
88 switch (instr.opcode) {
89 case Instruction::OpCode::ADD:
90 {
91 for (int i = 0; i < 4; ++i) {
92 if (!swizzle.DestComponentEnabled(i))
93 continue;
94
95 dest[i] = src1[i] + src2[i];
96 }
97
98 break;
99 }
100
101 case Instruction::OpCode::MUL:
102 {
103 for (int i = 0; i < 4; ++i) {
104 if (!swizzle.DestComponentEnabled(i))
105 continue;
106
107 dest[i] = src1[i] * src2[i];
108 }
109
110 break;
111 }
112
113 case Instruction::OpCode::DP3:
114 case Instruction::OpCode::DP4:
115 {
116 float24 dot = float24::FromFloat32(0.f);
117 int num_components = (instr.opcode == Instruction::OpCode::DP3) ? 3 : 4;
118 for (int i = 0; i < num_components; ++i)
119 dot = dot + src1[i] * src2[i];
120
121 for (int i = 0; i < num_components; ++i) {
122 if (!swizzle.DestComponentEnabled(i))
123 continue;
124
125 dest[i] = dot;
126 }
127 break;
128 }
129
130 // Reciprocal
131 case Instruction::OpCode::RCP:
132 {
133 for (int i = 0; i < 4; ++i) {
134 if (!swizzle.DestComponentEnabled(i))
135 continue;
136
137 // TODO: Be stable against division by zero!
138 // TODO: I think this might be wrong... we should only use one component here
139 dest[i] = float24::FromFloat32(1.0 / src1[i].ToFloat32());
140 }
141
142 break;
143 }
144
145 // Reciprocal Square Root
146 case Instruction::OpCode::RSQ:
147 {
148 for (int i = 0; i < 4; ++i) {
149 if (!swizzle.DestComponentEnabled(i))
150 continue;
151
152 // TODO: Be stable against division by zero!
153 // TODO: I think this might be wrong... we should only use one component here
154 dest[i] = float24::FromFloat32(1.0 / sqrt(src1[i].ToFloat32()));
155 }
156
157 break;
158 }
159
160 case Instruction::OpCode::MOV:
161 {
162 for (int i = 0; i < 4; ++i) {
163 if (!swizzle.DestComponentEnabled(i))
164 continue;
165
166 dest[i] = src1[i];
167 }
168 break;
169 }
170
171 case Instruction::OpCode::RET:
172 if (*state.call_stack_pointer == VertexShaderState::INVALID_ADDRESS) {
173 exit_loop = true;
174 } else {
175 state.program_counter = &shader_memory[*state.call_stack_pointer--];
176 *state.call_stack_pointer = VertexShaderState::INVALID_ADDRESS;
177 }
178
179 break;
180
181 case Instruction::OpCode::CALL:
182 increment_pc = false;
183
184 _dbg_assert_(GPU, state.call_stack_pointer - state.call_stack < sizeof(state.call_stack));
185
186 *++state.call_stack_pointer = state.program_counter - shader_memory;
187 // TODO: Does this offset refer to the beginning of shader memory?
188 state.program_counter = &shader_memory[instr.flow_control.offset_words];
189 break;
190
191 case Instruction::OpCode::FLS:
192 // TODO: Do whatever needs to be done here?
193 break;
194
195 default:
196 ERROR_LOG(GPU, "Unhandled instruction: 0x%02x (%s): 0x%08x",
197 (int)instr.opcode.Value(), instr.GetOpCodeName().c_str(), instr.hex);
198 break;
199 }
200
201 if (increment_pc)
202 ++state.program_counter;
203
204 if (exit_loop)
205 break;
206 }
207}
208
209OutputVertex RunShader(const InputVertex& input, int num_attributes)
210{
211 VertexShaderState state;
212
213 const u32* main = &shader_memory[registers.vs_main_offset];
214 state.program_counter = (u32*)main;
215
216 // Setup input register table
217 const auto& attribute_register_map = registers.vs_input_register_map;
218 float24 dummy_register;
219 std::fill(&state.input_register_table[0], &state.input_register_table[16], &dummy_register);
220 if(num_attributes > 0) state.input_register_table[attribute_register_map.attribute0_register] = &input.attr[0].x;
221 if(num_attributes > 1) state.input_register_table[attribute_register_map.attribute1_register] = &input.attr[1].x;
222 if(num_attributes > 2) state.input_register_table[attribute_register_map.attribute2_register] = &input.attr[2].x;
223 if(num_attributes > 3) state.input_register_table[attribute_register_map.attribute3_register] = &input.attr[3].x;
224 if(num_attributes > 4) state.input_register_table[attribute_register_map.attribute4_register] = &input.attr[4].x;
225 if(num_attributes > 5) state.input_register_table[attribute_register_map.attribute5_register] = &input.attr[5].x;
226 if(num_attributes > 6) state.input_register_table[attribute_register_map.attribute6_register] = &input.attr[6].x;
227 if(num_attributes > 7) state.input_register_table[attribute_register_map.attribute7_register] = &input.attr[7].x;
228 if(num_attributes > 8) state.input_register_table[attribute_register_map.attribute8_register] = &input.attr[8].x;
229 if(num_attributes > 9) state.input_register_table[attribute_register_map.attribute9_register] = &input.attr[9].x;
230 if(num_attributes > 10) state.input_register_table[attribute_register_map.attribute10_register] = &input.attr[10].x;
231 if(num_attributes > 11) state.input_register_table[attribute_register_map.attribute11_register] = &input.attr[11].x;
232 if(num_attributes > 12) state.input_register_table[attribute_register_map.attribute12_register] = &input.attr[12].x;
233 if(num_attributes > 13) state.input_register_table[attribute_register_map.attribute13_register] = &input.attr[13].x;
234 if(num_attributes > 14) state.input_register_table[attribute_register_map.attribute14_register] = &input.attr[14].x;
235 if(num_attributes > 15) state.input_register_table[attribute_register_map.attribute15_register] = &input.attr[15].x;
236
237 // Setup output register table
238 OutputVertex ret;
239 for (int i = 0; i < 7; ++i) {
240 const auto& output_register_map = registers.vs_output_attributes[i];
241
242 u32 semantics[4] = {
243 output_register_map.map_x, output_register_map.map_y,
244 output_register_map.map_z, output_register_map.map_w
245 };
246
247 for (int comp = 0; comp < 4; ++comp)
248 state.output_register_table[4*i+comp] = ((float24*)&ret) + semantics[comp];
249 }
250
251 state.status_registers[0] = false;
252 state.status_registers[1] = false;
253 std::fill(state.call_stack, state.call_stack + sizeof(state.call_stack) / sizeof(state.call_stack[0]),
254 VertexShaderState::INVALID_ADDRESS);
255 state.call_stack_pointer = &state.call_stack[0];
256
257 ProcessShaderCode(state);
258
259 DEBUG_LOG(GPU, "Output vertex: pos (%.2f, %.2f, %.2f, %.2f), col(%.2f, %.2f, %.2f, %.2f), tc0(%.2f, %.2f)",
260 ret.pos.x.ToFloat32(), ret.pos.y.ToFloat32(), ret.pos.z.ToFloat32(), ret.pos.w.ToFloat32(),
261 ret.color.x.ToFloat32(), ret.color.y.ToFloat32(), ret.color.z.ToFloat32(), ret.color.w.ToFloat32(),
262 ret.tc0.u().ToFloat32(), ret.tc0.v().ToFloat32());
263
264 return ret;
265}
266
267
268} // namespace
269
270} // namespace
diff --git a/src/video_core/vertex_shader.h b/src/video_core/vertex_shader.h
new file mode 100644
index 000000000..1b71e367b
--- /dev/null
+++ b/src/video_core/vertex_shader.h
@@ -0,0 +1,211 @@
1// Copyright 2014 Citra Emulator Project
2// Licensed under GPLv2
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <initializer_list>
8
9#include <common/common_types.h>
10
11#include "math.h"
12#include "pica.h"
13
14namespace Pica {
15
16namespace VertexShader {
17
18struct InputVertex {
19 Math::Vec4<float24> attr[16];
20};
21
22struct OutputVertex {
23 OutputVertex() = default;
24
25 // VS output attributes
26 Math::Vec4<float24> pos;
27 Math::Vec4<float24> dummy; // quaternions (not implemented, yet)
28 Math::Vec4<float24> color;
29 Math::Vec2<float24> tc0;
30 float24 tc0_v;
31
32 // Padding for optimal alignment
33 float24 pad[14];
34
35 // Attributes used to store intermediate results
36
37 // position after perspective divide
38 Math::Vec3<float24> screenpos;
39
40 // Linear interpolation
41 // factor: 0=this, 1=vtx
42 void Lerp(float24 factor, const OutputVertex& vtx) {
43 pos = pos * factor + vtx.pos * (float24::FromFloat32(1) - factor);
44
45 // TODO: Should perform perspective correct interpolation here...
46 tc0 = tc0 * factor + vtx.tc0 * (float24::FromFloat32(1) - factor);
47
48 screenpos = screenpos * factor + vtx.screenpos * (float24::FromFloat32(1) - factor);
49
50 color = color * factor + vtx.color * (float24::FromFloat32(1) - factor);
51 }
52
53 // Linear interpolation
54 // factor: 0=v0, 1=v1
55 static OutputVertex Lerp(float24 factor, const OutputVertex& v0, const OutputVertex& v1) {
56 OutputVertex ret = v0;
57 ret.Lerp(factor, v1);
58 return ret;
59 }
60};
61static_assert(std::is_pod<OutputVertex>::value, "Structure is not POD");
62
63union Instruction {
64 enum class OpCode : u32 {
65 ADD = 0x0,
66 DP3 = 0x1,
67 DP4 = 0x2,
68
69 MUL = 0x8,
70
71 MAX = 0xC,
72 MIN = 0xD,
73 RCP = 0xE,
74 RSQ = 0xF,
75
76 MOV = 0x13,
77
78 RET = 0x21,
79 FLS = 0x22, // Flush
80 CALL = 0x24,
81 };
82
83 std::string GetOpCodeName() const {
84 std::map<OpCode, std::string> map = {
85 { OpCode::ADD, "ADD" },
86 { OpCode::DP3, "DP3" },
87 { OpCode::DP4, "DP4" },
88 { OpCode::MUL, "MUL" },
89 { OpCode::MAX, "MAX" },
90 { OpCode::MIN, "MIN" },
91 { OpCode::RCP, "RCP" },
92 { OpCode::RSQ, "RSQ" },
93 { OpCode::MOV, "MOV" },
94 { OpCode::RET, "RET" },
95 { OpCode::FLS, "FLS" },
96 };
97 auto it = map.find(opcode);
98 if (it == map.end())
99 return "UNK";
100 else
101 return it->second;
102 }
103
104 u32 hex;
105
106 BitField<0x1a, 0x6, OpCode> opcode;
107
108 // General notes:
109 //
110 // When two input registers are used, one of them uses a 5-bit index while the other
111 // one uses a 7-bit index. This is because at most one floating point uniform may be used
112 // as an input.
113
114
115 // Format used e.g. by arithmetic instructions and comparisons
116 // "src1" and "src2" specify register indices (i.e. indices referring to groups of 4 floats),
117 // while "dest" addresses individual floats.
118 union {
119 BitField<0x00, 0x5, u32> operand_desc_id;
120 BitField<0x07, 0x5, u32> src2;
121 BitField<0x0c, 0x7, u32> src1;
122 BitField<0x13, 0x7, u32> dest;
123 } common;
124
125 // Format used for flow control instructions ("if")
126 union {
127 BitField<0x00, 0x8, u32> num_instructions;
128 BitField<0x0a, 0xc, u32> offset_words;
129 } flow_control;
130};
131
132union SwizzlePattern {
133 u32 hex;
134
135 enum class Selector : u32 {
136 x = 0,
137 y = 1,
138 z = 2,
139 w = 3
140 };
141
142 Selector GetSelectorSrc1(int comp) const {
143 Selector selectors[] = {
144 src1_selector_0, src1_selector_1, src1_selector_2, src1_selector_3
145 };
146 return selectors[comp];
147 }
148
149 Selector GetSelectorSrc2(int comp) const {
150 Selector selectors[] = {
151 src2_selector_0, src2_selector_1, src2_selector_2, src2_selector_3
152 };
153 return selectors[comp];
154 }
155
156 bool DestComponentEnabled(int i) const {
157 return (dest_mask & (0x8 >> i));
158 }
159
160 std::string SelectorToString(bool src2) const {
161 std::map<Selector, std::string> map = {
162 { Selector::x, "x" },
163 { Selector::y, "y" },
164 { Selector::z, "z" },
165 { Selector::w, "w" }
166 };
167 std::string ret;
168 for (int i = 0; i < 4; ++i) {
169 ret += map.at(src2 ? GetSelectorSrc2(i) : GetSelectorSrc1(i));
170 }
171 return ret;
172 }
173
174 std::string DestMaskToString() const {
175 std::string ret;
176 for (int i = 0; i < 4; ++i) {
177 if (!DestComponentEnabled(i))
178 ret += "_";
179 else
180 ret += "xyzw"[i];
181 }
182 return ret;
183 }
184
185 // Components of "dest" that should be written to: LSB=dest.w, MSB=dest.x
186 BitField< 0, 4, u32> dest_mask;
187
188 BitField< 5, 2, Selector> src1_selector_3;
189 BitField< 7, 2, Selector> src1_selector_2;
190 BitField< 9, 2, Selector> src1_selector_1;
191 BitField<11, 2, Selector> src1_selector_0;
192
193 BitField<14, 2, Selector> src2_selector_3;
194 BitField<16, 2, Selector> src2_selector_2;
195 BitField<18, 2, Selector> src2_selector_1;
196 BitField<20, 2, Selector> src2_selector_0;
197
198 BitField<31, 1, u32> flag; // not sure what this means, maybe it's the sign?
199};
200
201void SubmitShaderMemoryChange(u32 addr, u32 value);
202void SubmitSwizzleDataChange(u32 addr, u32 value);
203
204OutputVertex RunShader(const InputVertex& input, int num_attributes);
205
206Math::Vec4<float24>& GetFloatUniform(u32 index);
207
208} // namespace
209
210} // namespace
211
diff --git a/src/video_core/video_core.vcxproj b/src/video_core/video_core.vcxproj
index 28eb21284..56729dc03 100644
--- a/src/video_core/video_core.vcxproj
+++ b/src/video_core/video_core.vcxproj
@@ -22,6 +22,7 @@
22 <ClCompile Include="renderer_opengl\renderer_opengl.cpp" /> 22 <ClCompile Include="renderer_opengl\renderer_opengl.cpp" />
23 <ClCompile Include="command_processor.cpp" /> 23 <ClCompile Include="command_processor.cpp" />
24 <ClCompile Include="utils.cpp" /> 24 <ClCompile Include="utils.cpp" />
25 <ClCompile Include="vertex_shader.cpp" />
25 <ClCompile Include="video_core.cpp" /> 26 <ClCompile Include="video_core.cpp" />
26 </ItemGroup> 27 </ItemGroup>
27 <ItemGroup> 28 <ItemGroup>
@@ -31,6 +32,7 @@
31 <ClInclude Include="pica.h" /> 32 <ClInclude Include="pica.h" />
32 <ClInclude Include="renderer_base.h" /> 33 <ClInclude Include="renderer_base.h" />
33 <ClInclude Include="utils.h" /> 34 <ClInclude Include="utils.h" />
35 <ClInclude Include="vertex_shader.h" />
34 <ClInclude Include="video_core.h" /> 36 <ClInclude Include="video_core.h" />
35 <ClInclude Include="renderer_opengl\renderer_opengl.h" /> 37 <ClInclude Include="renderer_opengl\renderer_opengl.h" />
36 </ItemGroup> 38 </ItemGroup>
diff --git a/src/video_core/video_core.vcxproj.filters b/src/video_core/video_core.vcxproj.filters
index 713458fcf..db0b37018 100644
--- a/src/video_core/video_core.vcxproj.filters
+++ b/src/video_core/video_core.vcxproj.filters
@@ -11,6 +11,7 @@
11 </ClCompile> 11 </ClCompile>
12 <ClCompile Include="command_processor.cpp" /> 12 <ClCompile Include="command_processor.cpp" />
13 <ClCompile Include="utils.cpp" /> 13 <ClCompile Include="utils.cpp" />
14 <ClCompile Include="vertex_shader.cpp" />
14 <ClCompile Include="video_core.cpp" /> 15 <ClCompile Include="video_core.cpp" />
15 </ItemGroup> 16 </ItemGroup>
16 <ItemGroup> 17 <ItemGroup>
@@ -23,6 +24,7 @@
23 <ClInclude Include="pica.h" /> 24 <ClInclude Include="pica.h" />
24 <ClInclude Include="renderer_base.h" /> 25 <ClInclude Include="renderer_base.h" />
25 <ClInclude Include="utils.h" /> 26 <ClInclude Include="utils.h" />
27 <ClInclude Include="vertex_shader.h" />
26 <ClInclude Include="video_core.h" /> 28 <ClInclude Include="video_core.h" />
27 </ItemGroup> 29 </ItemGroup>
28 <ItemGroup> 30 <ItemGroup>