diff options
| author | 2014-07-26 19:17:09 +0200 | |
|---|---|---|
| committer | 2014-08-12 13:48:10 +0200 | |
| commit | c52651261916b136f2ea4ff022fb9cead5a73a93 (patch) | |
| tree | 11bf295b77b14d1d82c7f992a47cf20f5fffbda6 /src/video_core/vertex_shader.cpp | |
| parent | Pica: Implement vertex loading. (diff) | |
| download | yuzu-c52651261916b136f2ea4ff022fb9cead5a73a93.tar.gz yuzu-c52651261916b136f2ea4ff022fb9cead5a73a93.tar.xz yuzu-c52651261916b136f2ea4ff022fb9cead5a73a93.zip | |
Pica: Add vertex shader implementation.
Diffstat (limited to 'src/video_core/vertex_shader.cpp')
| -rw-r--r-- | src/video_core/vertex_shader.cpp | 270 |
1 files changed, 270 insertions, 0 deletions
diff --git a/src/video_core/vertex_shader.cpp b/src/video_core/vertex_shader.cpp new file mode 100644 index 000000000..93830a96a --- /dev/null +++ b/src/video_core/vertex_shader.cpp | |||
| @@ -0,0 +1,270 @@ | |||
| 1 | // Copyright 2014 Citra Emulator Project | ||
| 2 | // Licensed under GPLv2 | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "pica.h" | ||
| 6 | #include "vertex_shader.h" | ||
| 7 | #include <core/mem_map.h> | ||
| 8 | #include <common/file_util.h> | ||
| 9 | |||
| 10 | namespace Pica { | ||
| 11 | |||
| 12 | namespace VertexShader { | ||
| 13 | |||
| 14 | static struct { | ||
| 15 | Math::Vec4<float24> f[96]; | ||
| 16 | } shader_uniforms; | ||
| 17 | |||
| 18 | |||
| 19 | // TODO: Not sure where the shader binary and swizzle patterns are supposed to be loaded to! | ||
| 20 | // For now, we just keep these local arrays around. | ||
| 21 | static u32 shader_memory[1024]; | ||
| 22 | static u32 swizzle_data[1024]; | ||
| 23 | |||
| 24 | void SubmitShaderMemoryChange(u32 addr, u32 value) | ||
| 25 | { | ||
| 26 | shader_memory[addr] = value; | ||
| 27 | } | ||
| 28 | |||
| 29 | void SubmitSwizzleDataChange(u32 addr, u32 value) | ||
| 30 | { | ||
| 31 | swizzle_data[addr] = value; | ||
| 32 | } | ||
| 33 | |||
| 34 | Math::Vec4<float24>& GetFloatUniform(u32 index) | ||
| 35 | { | ||
| 36 | return shader_uniforms.f[index]; | ||
| 37 | } | ||
| 38 | |||
| 39 | struct VertexShaderState { | ||
| 40 | u32* program_counter; | ||
| 41 | |||
| 42 | const float24* input_register_table[16]; | ||
| 43 | float24* output_register_table[7*4]; | ||
| 44 | |||
| 45 | Math::Vec4<float24> temporary_registers[16]; | ||
| 46 | bool status_registers[2]; | ||
| 47 | |||
| 48 | enum { | ||
| 49 | INVALID_ADDRESS = 0xFFFFFFFF | ||
| 50 | }; | ||
| 51 | u32 call_stack[8]; // TODO: What is the maximal call stack depth? | ||
| 52 | u32* call_stack_pointer; | ||
| 53 | }; | ||
| 54 | |||
| 55 | static void ProcessShaderCode(VertexShaderState& state) { | ||
| 56 | while (true) { | ||
| 57 | bool increment_pc = true; | ||
| 58 | bool exit_loop = false; | ||
| 59 | const Instruction& instr = *(const Instruction*)state.program_counter; | ||
| 60 | |||
| 61 | const float24* src1_ = (instr.common.src1 < 0x10) ? state.input_register_table[instr.common.src1] | ||
| 62 | : (instr.common.src1 < 0x20) ? &state.temporary_registers[instr.common.src1-0x10].x | ||
| 63 | : (instr.common.src1 < 0x80) ? &shader_uniforms.f[instr.common.src1-0x20].x | ||
| 64 | : nullptr; | ||
| 65 | const float24* src2_ = (instr.common.src2 < 0x10) ? state.input_register_table[instr.common.src2] | ||
| 66 | : &state.temporary_registers[instr.common.src2-0x10].x; | ||
| 67 | // TODO: Unsure about the limit values | ||
| 68 | float24* dest = (instr.common.dest <= 0x1C) ? state.output_register_table[instr.common.dest] | ||
| 69 | : (instr.common.dest <= 0x3C) ? nullptr | ||
| 70 | : (instr.common.dest <= 0x7C) ? &state.temporary_registers[(instr.common.dest-0x40)/4][instr.common.dest%4] | ||
| 71 | : nullptr; | ||
| 72 | |||
| 73 | const SwizzlePattern& swizzle = *(SwizzlePattern*)&swizzle_data[instr.common.operand_desc_id]; | ||
| 74 | |||
| 75 | const float24 src1[4] = { | ||
| 76 | src1_[(int)swizzle.GetSelectorSrc1(0)], | ||
| 77 | src1_[(int)swizzle.GetSelectorSrc1(1)], | ||
| 78 | src1_[(int)swizzle.GetSelectorSrc1(2)], | ||
| 79 | src1_[(int)swizzle.GetSelectorSrc1(3)], | ||
| 80 | }; | ||
| 81 | const float24 src2[4] = { | ||
| 82 | src2_[(int)swizzle.GetSelectorSrc2(0)], | ||
| 83 | src2_[(int)swizzle.GetSelectorSrc2(1)], | ||
| 84 | src2_[(int)swizzle.GetSelectorSrc2(2)], | ||
| 85 | src2_[(int)swizzle.GetSelectorSrc2(3)], | ||
| 86 | }; | ||
| 87 | |||
| 88 | switch (instr.opcode) { | ||
| 89 | case Instruction::OpCode::ADD: | ||
| 90 | { | ||
| 91 | for (int i = 0; i < 4; ++i) { | ||
| 92 | if (!swizzle.DestComponentEnabled(i)) | ||
| 93 | continue; | ||
| 94 | |||
| 95 | dest[i] = src1[i] + src2[i]; | ||
| 96 | } | ||
| 97 | |||
| 98 | break; | ||
| 99 | } | ||
| 100 | |||
| 101 | case Instruction::OpCode::MUL: | ||
| 102 | { | ||
| 103 | for (int i = 0; i < 4; ++i) { | ||
| 104 | if (!swizzle.DestComponentEnabled(i)) | ||
| 105 | continue; | ||
| 106 | |||
| 107 | dest[i] = src1[i] * src2[i]; | ||
| 108 | } | ||
| 109 | |||
| 110 | break; | ||
| 111 | } | ||
| 112 | |||
| 113 | case Instruction::OpCode::DP3: | ||
| 114 | case Instruction::OpCode::DP4: | ||
| 115 | { | ||
| 116 | float24 dot = float24::FromFloat32(0.f); | ||
| 117 | int num_components = (instr.opcode == Instruction::OpCode::DP3) ? 3 : 4; | ||
| 118 | for (int i = 0; i < num_components; ++i) | ||
| 119 | dot = dot + src1[i] * src2[i]; | ||
| 120 | |||
| 121 | for (int i = 0; i < num_components; ++i) { | ||
| 122 | if (!swizzle.DestComponentEnabled(i)) | ||
| 123 | continue; | ||
| 124 | |||
| 125 | dest[i] = dot; | ||
| 126 | } | ||
| 127 | break; | ||
| 128 | } | ||
| 129 | |||
| 130 | // Reciprocal | ||
| 131 | case Instruction::OpCode::RCP: | ||
| 132 | { | ||
| 133 | for (int i = 0; i < 4; ++i) { | ||
| 134 | if (!swizzle.DestComponentEnabled(i)) | ||
| 135 | continue; | ||
| 136 | |||
| 137 | // TODO: Be stable against division by zero! | ||
| 138 | // TODO: I think this might be wrong... we should only use one component here | ||
| 139 | dest[i] = float24::FromFloat32(1.0 / src1[i].ToFloat32()); | ||
| 140 | } | ||
| 141 | |||
| 142 | break; | ||
| 143 | } | ||
| 144 | |||
| 145 | // Reciprocal Square Root | ||
| 146 | case Instruction::OpCode::RSQ: | ||
| 147 | { | ||
| 148 | for (int i = 0; i < 4; ++i) { | ||
| 149 | if (!swizzle.DestComponentEnabled(i)) | ||
| 150 | continue; | ||
| 151 | |||
| 152 | // TODO: Be stable against division by zero! | ||
| 153 | // TODO: I think this might be wrong... we should only use one component here | ||
| 154 | dest[i] = float24::FromFloat32(1.0 / sqrt(src1[i].ToFloat32())); | ||
| 155 | } | ||
| 156 | |||
| 157 | break; | ||
| 158 | } | ||
| 159 | |||
| 160 | case Instruction::OpCode::MOV: | ||
| 161 | { | ||
| 162 | for (int i = 0; i < 4; ++i) { | ||
| 163 | if (!swizzle.DestComponentEnabled(i)) | ||
| 164 | continue; | ||
| 165 | |||
| 166 | dest[i] = src1[i]; | ||
| 167 | } | ||
| 168 | break; | ||
| 169 | } | ||
| 170 | |||
| 171 | case Instruction::OpCode::RET: | ||
| 172 | if (*state.call_stack_pointer == VertexShaderState::INVALID_ADDRESS) { | ||
| 173 | exit_loop = true; | ||
| 174 | } else { | ||
| 175 | state.program_counter = &shader_memory[*state.call_stack_pointer--]; | ||
| 176 | *state.call_stack_pointer = VertexShaderState::INVALID_ADDRESS; | ||
| 177 | } | ||
| 178 | |||
| 179 | break; | ||
| 180 | |||
| 181 | case Instruction::OpCode::CALL: | ||
| 182 | increment_pc = false; | ||
| 183 | |||
| 184 | _dbg_assert_(GPU, state.call_stack_pointer - state.call_stack < sizeof(state.call_stack)); | ||
| 185 | |||
| 186 | *++state.call_stack_pointer = state.program_counter - shader_memory; | ||
| 187 | // TODO: Does this offset refer to the beginning of shader memory? | ||
| 188 | state.program_counter = &shader_memory[instr.flow_control.offset_words]; | ||
| 189 | break; | ||
| 190 | |||
| 191 | case Instruction::OpCode::FLS: | ||
| 192 | // TODO: Do whatever needs to be done here? | ||
| 193 | break; | ||
| 194 | |||
| 195 | default: | ||
| 196 | ERROR_LOG(GPU, "Unhandled instruction: 0x%02x (%s): 0x%08x", | ||
| 197 | (int)instr.opcode.Value(), instr.GetOpCodeName().c_str(), instr.hex); | ||
| 198 | break; | ||
| 199 | } | ||
| 200 | |||
| 201 | if (increment_pc) | ||
| 202 | ++state.program_counter; | ||
| 203 | |||
| 204 | if (exit_loop) | ||
| 205 | break; | ||
| 206 | } | ||
| 207 | } | ||
| 208 | |||
| 209 | OutputVertex RunShader(const InputVertex& input, int num_attributes) | ||
| 210 | { | ||
| 211 | VertexShaderState state; | ||
| 212 | |||
| 213 | const u32* main = &shader_memory[registers.vs_main_offset]; | ||
| 214 | state.program_counter = (u32*)main; | ||
| 215 | |||
| 216 | // Setup input register table | ||
| 217 | const auto& attribute_register_map = registers.vs_input_register_map; | ||
| 218 | float24 dummy_register; | ||
| 219 | std::fill(&state.input_register_table[0], &state.input_register_table[16], &dummy_register); | ||
| 220 | if(num_attributes > 0) state.input_register_table[attribute_register_map.attribute0_register] = &input.attr[0].x; | ||
| 221 | if(num_attributes > 1) state.input_register_table[attribute_register_map.attribute1_register] = &input.attr[1].x; | ||
| 222 | if(num_attributes > 2) state.input_register_table[attribute_register_map.attribute2_register] = &input.attr[2].x; | ||
| 223 | if(num_attributes > 3) state.input_register_table[attribute_register_map.attribute3_register] = &input.attr[3].x; | ||
| 224 | if(num_attributes > 4) state.input_register_table[attribute_register_map.attribute4_register] = &input.attr[4].x; | ||
| 225 | if(num_attributes > 5) state.input_register_table[attribute_register_map.attribute5_register] = &input.attr[5].x; | ||
| 226 | if(num_attributes > 6) state.input_register_table[attribute_register_map.attribute6_register] = &input.attr[6].x; | ||
| 227 | if(num_attributes > 7) state.input_register_table[attribute_register_map.attribute7_register] = &input.attr[7].x; | ||
| 228 | if(num_attributes > 8) state.input_register_table[attribute_register_map.attribute8_register] = &input.attr[8].x; | ||
| 229 | if(num_attributes > 9) state.input_register_table[attribute_register_map.attribute9_register] = &input.attr[9].x; | ||
| 230 | if(num_attributes > 10) state.input_register_table[attribute_register_map.attribute10_register] = &input.attr[10].x; | ||
| 231 | if(num_attributes > 11) state.input_register_table[attribute_register_map.attribute11_register] = &input.attr[11].x; | ||
| 232 | if(num_attributes > 12) state.input_register_table[attribute_register_map.attribute12_register] = &input.attr[12].x; | ||
| 233 | if(num_attributes > 13) state.input_register_table[attribute_register_map.attribute13_register] = &input.attr[13].x; | ||
| 234 | if(num_attributes > 14) state.input_register_table[attribute_register_map.attribute14_register] = &input.attr[14].x; | ||
| 235 | if(num_attributes > 15) state.input_register_table[attribute_register_map.attribute15_register] = &input.attr[15].x; | ||
| 236 | |||
| 237 | // Setup output register table | ||
| 238 | OutputVertex ret; | ||
| 239 | for (int i = 0; i < 7; ++i) { | ||
| 240 | const auto& output_register_map = registers.vs_output_attributes[i]; | ||
| 241 | |||
| 242 | u32 semantics[4] = { | ||
| 243 | output_register_map.map_x, output_register_map.map_y, | ||
| 244 | output_register_map.map_z, output_register_map.map_w | ||
| 245 | }; | ||
| 246 | |||
| 247 | for (int comp = 0; comp < 4; ++comp) | ||
| 248 | state.output_register_table[4*i+comp] = ((float24*)&ret) + semantics[comp]; | ||
| 249 | } | ||
| 250 | |||
| 251 | state.status_registers[0] = false; | ||
| 252 | state.status_registers[1] = false; | ||
| 253 | std::fill(state.call_stack, state.call_stack + sizeof(state.call_stack) / sizeof(state.call_stack[0]), | ||
| 254 | VertexShaderState::INVALID_ADDRESS); | ||
| 255 | state.call_stack_pointer = &state.call_stack[0]; | ||
| 256 | |||
| 257 | ProcessShaderCode(state); | ||
| 258 | |||
| 259 | DEBUG_LOG(GPU, "Output vertex: pos (%.2f, %.2f, %.2f, %.2f), col(%.2f, %.2f, %.2f, %.2f), tc0(%.2f, %.2f)", | ||
| 260 | ret.pos.x.ToFloat32(), ret.pos.y.ToFloat32(), ret.pos.z.ToFloat32(), ret.pos.w.ToFloat32(), | ||
| 261 | ret.color.x.ToFloat32(), ret.color.y.ToFloat32(), ret.color.z.ToFloat32(), ret.color.w.ToFloat32(), | ||
| 262 | ret.tc0.u().ToFloat32(), ret.tc0.v().ToFloat32()); | ||
| 263 | |||
| 264 | return ret; | ||
| 265 | } | ||
| 266 | |||
| 267 | |||
| 268 | } // namespace | ||
| 269 | |||
| 270 | } // namespace | ||