diff options
| author | 2015-07-21 19:04:05 -0400 | |
|---|---|---|
| committer | 2015-08-15 17:33:41 -0400 | |
| commit | 642b9b503040f7da02dcb2c52f3cd4cbf6fee4b2 (patch) | |
| tree | 85643112608a15fafc304d41c4457a50c453bfcd /src/video_core/shader_interpreter.cpp | |
| parent | Merge pull request #1027 from lioncash/debugger (diff) | |
| download | yuzu-642b9b503040f7da02dcb2c52f3cd4cbf6fee4b2.tar.gz yuzu-642b9b503040f7da02dcb2c52f3cd4cbf6fee4b2.tar.xz yuzu-642b9b503040f7da02dcb2c52f3cd4cbf6fee4b2.zip | |
GPU: Refactor "VertexShader" namespace to "Shader".
- Also renames "vertex_shader.*" to "shader_interpreter.*"
Diffstat (limited to 'src/video_core/shader_interpreter.cpp')
| -rw-r--r-- | src/video_core/shader_interpreter.cpp | 629 |
1 files changed, 629 insertions, 0 deletions
diff --git a/src/video_core/shader_interpreter.cpp b/src/video_core/shader_interpreter.cpp new file mode 100644 index 000000000..3cce26d36 --- /dev/null +++ b/src/video_core/shader_interpreter.cpp | |||
| @@ -0,0 +1,629 @@ | |||
| 1 | // Copyright 2014 Citra Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include <boost/container/static_vector.hpp> | ||
| 6 | #include <boost/range/algorithm.hpp> | ||
| 7 | |||
| 8 | #include <common/file_util.h> | ||
| 9 | |||
| 10 | #include <nihstro/shader_bytecode.h> | ||
| 11 | |||
| 12 | #include "common/profiler.h" | ||
| 13 | |||
| 14 | #include "pica.h" | ||
| 15 | #include "shader_interpreter.h" | ||
| 16 | #include "debug_utils/debug_utils.h" | ||
| 17 | |||
| 18 | using nihstro::OpCode; | ||
| 19 | using nihstro::Instruction; | ||
| 20 | using nihstro::RegisterType; | ||
| 21 | using nihstro::SourceRegister; | ||
| 22 | using nihstro::SwizzlePattern; | ||
| 23 | |||
| 24 | namespace Pica { | ||
| 25 | |||
| 26 | namespace Shader { | ||
| 27 | |||
| 28 | struct ShaderState { | ||
| 29 | u32 program_counter; | ||
| 30 | |||
| 31 | const float24* input_register_table[16]; | ||
| 32 | Math::Vec4<float24> output_registers[16]; | ||
| 33 | |||
| 34 | Math::Vec4<float24> temporary_registers[16]; | ||
| 35 | bool conditional_code[2]; | ||
| 36 | |||
| 37 | // Two Address registers and one loop counter | ||
| 38 | // TODO: How many bits do these actually have? | ||
| 39 | s32 address_registers[3]; | ||
| 40 | |||
| 41 | enum { | ||
| 42 | INVALID_ADDRESS = 0xFFFFFFFF | ||
| 43 | }; | ||
| 44 | |||
| 45 | struct CallStackElement { | ||
| 46 | u32 final_address; // Address upon which we jump to return_address | ||
| 47 | u32 return_address; // Where to jump when leaving scope | ||
| 48 | u8 repeat_counter; // How often to repeat until this call stack element is removed | ||
| 49 | u8 loop_increment; // Which value to add to the loop counter after an iteration | ||
| 50 | // TODO: Should this be a signed value? Does it even matter? | ||
| 51 | u32 loop_address; // The address where we'll return to after each loop iteration | ||
| 52 | }; | ||
| 53 | |||
| 54 | // TODO: Is there a maximal size for this? | ||
| 55 | boost::container::static_vector<CallStackElement, 16> call_stack; | ||
| 56 | |||
| 57 | struct { | ||
| 58 | u32 max_offset; // maximum program counter ever reached | ||
| 59 | u32 max_opdesc_id; // maximum swizzle pattern index ever used | ||
| 60 | } debug; | ||
| 61 | }; | ||
| 62 | |||
| 63 | static void ProcessShaderCode(ShaderState& state) { | ||
| 64 | const auto& uniforms = g_state.vs.uniforms; | ||
| 65 | const auto& swizzle_data = g_state.vs.swizzle_data; | ||
| 66 | const auto& program_code = g_state.vs.program_code; | ||
| 67 | |||
| 68 | // Placeholder for invalid inputs | ||
| 69 | static float24 dummy_vec4_float24[4]; | ||
| 70 | |||
| 71 | while (true) { | ||
| 72 | if (!state.call_stack.empty()) { | ||
| 73 | auto& top = state.call_stack.back(); | ||
| 74 | if (state.program_counter == top.final_address) { | ||
| 75 | state.address_registers[2] += top.loop_increment; | ||
| 76 | |||
| 77 | if (top.repeat_counter-- == 0) { | ||
| 78 | state.program_counter = top.return_address; | ||
| 79 | state.call_stack.pop_back(); | ||
| 80 | } else { | ||
| 81 | state.program_counter = top.loop_address; | ||
| 82 | } | ||
| 83 | |||
| 84 | // TODO: Is "trying again" accurate to hardware? | ||
| 85 | continue; | ||
| 86 | } | ||
| 87 | } | ||
| 88 | |||
| 89 | bool exit_loop = false; | ||
| 90 | const Instruction instr = { program_code[state.program_counter] }; | ||
| 91 | const SwizzlePattern swizzle = { swizzle_data[instr.common.operand_desc_id] }; | ||
| 92 | |||
| 93 | static auto call = [](ShaderState& state, u32 offset, u32 num_instructions, | ||
| 94 | u32 return_offset, u8 repeat_count, u8 loop_increment) { | ||
| 95 | state.program_counter = offset - 1; // -1 to make sure when incrementing the PC we end up at the correct offset | ||
| 96 | ASSERT(state.call_stack.size() < state.call_stack.capacity()); | ||
| 97 | state.call_stack.push_back({ offset + num_instructions, return_offset, repeat_count, loop_increment, offset }); | ||
| 98 | }; | ||
| 99 | state.debug.max_offset = std::max<u32>(state.debug.max_offset, 1 + state.program_counter); | ||
| 100 | |||
| 101 | auto LookupSourceRegister = [&](const SourceRegister& source_reg) -> const float24* { | ||
| 102 | switch (source_reg.GetRegisterType()) { | ||
| 103 | case RegisterType::Input: | ||
| 104 | return state.input_register_table[source_reg.GetIndex()]; | ||
| 105 | |||
| 106 | case RegisterType::Temporary: | ||
| 107 | return &state.temporary_registers[source_reg.GetIndex()].x; | ||
| 108 | |||
| 109 | case RegisterType::FloatUniform: | ||
| 110 | return &uniforms.f[source_reg.GetIndex()].x; | ||
| 111 | |||
| 112 | default: | ||
| 113 | return dummy_vec4_float24; | ||
| 114 | } | ||
| 115 | }; | ||
| 116 | |||
| 117 | switch (instr.opcode.Value().GetInfo().type) { | ||
| 118 | case OpCode::Type::Arithmetic: | ||
| 119 | { | ||
| 120 | const bool is_inverted = (0 != (instr.opcode.Value().GetInfo().subtype & OpCode::Info::SrcInversed)); | ||
| 121 | |||
| 122 | const int address_offset = (instr.common.address_register_index == 0) | ||
| 123 | ? 0 : state.address_registers[instr.common.address_register_index - 1]; | ||
| 124 | |||
| 125 | const float24* src1_ = LookupSourceRegister(instr.common.GetSrc1(is_inverted) + (!is_inverted * address_offset)); | ||
| 126 | const float24* src2_ = LookupSourceRegister(instr.common.GetSrc2(is_inverted) + ( is_inverted * address_offset)); | ||
| 127 | |||
| 128 | const bool negate_src1 = ((bool)swizzle.negate_src1 != false); | ||
| 129 | const bool negate_src2 = ((bool)swizzle.negate_src2 != false); | ||
| 130 | |||
| 131 | float24 src1[4] = { | ||
| 132 | src1_[(int)swizzle.GetSelectorSrc1(0)], | ||
| 133 | src1_[(int)swizzle.GetSelectorSrc1(1)], | ||
| 134 | src1_[(int)swizzle.GetSelectorSrc1(2)], | ||
| 135 | src1_[(int)swizzle.GetSelectorSrc1(3)], | ||
| 136 | }; | ||
| 137 | if (negate_src1) { | ||
| 138 | src1[0] = src1[0] * float24::FromFloat32(-1); | ||
| 139 | src1[1] = src1[1] * float24::FromFloat32(-1); | ||
| 140 | src1[2] = src1[2] * float24::FromFloat32(-1); | ||
| 141 | src1[3] = src1[3] * float24::FromFloat32(-1); | ||
| 142 | } | ||
| 143 | float24 src2[4] = { | ||
| 144 | src2_[(int)swizzle.GetSelectorSrc2(0)], | ||
| 145 | src2_[(int)swizzle.GetSelectorSrc2(1)], | ||
| 146 | src2_[(int)swizzle.GetSelectorSrc2(2)], | ||
| 147 | src2_[(int)swizzle.GetSelectorSrc2(3)], | ||
| 148 | }; | ||
| 149 | if (negate_src2) { | ||
| 150 | src2[0] = src2[0] * float24::FromFloat32(-1); | ||
| 151 | src2[1] = src2[1] * float24::FromFloat32(-1); | ||
| 152 | src2[2] = src2[2] * float24::FromFloat32(-1); | ||
| 153 | src2[3] = src2[3] * float24::FromFloat32(-1); | ||
| 154 | } | ||
| 155 | |||
| 156 | float24* dest = (instr.common.dest.Value() < 0x10) ? &state.output_registers[instr.common.dest.Value().GetIndex()][0] | ||
| 157 | : (instr.common.dest.Value() < 0x20) ? &state.temporary_registers[instr.common.dest.Value().GetIndex()][0] | ||
| 158 | : dummy_vec4_float24; | ||
| 159 | |||
| 160 | state.debug.max_opdesc_id = std::max<u32>(state.debug.max_opdesc_id, 1+instr.common.operand_desc_id); | ||
| 161 | |||
| 162 | switch (instr.opcode.Value().EffectiveOpCode()) { | ||
| 163 | case OpCode::Id::ADD: | ||
| 164 | { | ||
| 165 | for (int i = 0; i < 4; ++i) { | ||
| 166 | if (!swizzle.DestComponentEnabled(i)) | ||
| 167 | continue; | ||
| 168 | |||
| 169 | dest[i] = src1[i] + src2[i]; | ||
| 170 | } | ||
| 171 | |||
| 172 | break; | ||
| 173 | } | ||
| 174 | |||
| 175 | case OpCode::Id::MUL: | ||
| 176 | { | ||
| 177 | for (int i = 0; i < 4; ++i) { | ||
| 178 | if (!swizzle.DestComponentEnabled(i)) | ||
| 179 | continue; | ||
| 180 | |||
| 181 | dest[i] = src1[i] * src2[i]; | ||
| 182 | } | ||
| 183 | |||
| 184 | break; | ||
| 185 | } | ||
| 186 | |||
| 187 | case OpCode::Id::FLR: | ||
| 188 | for (int i = 0; i < 4; ++i) { | ||
| 189 | if (!swizzle.DestComponentEnabled(i)) | ||
| 190 | continue; | ||
| 191 | |||
| 192 | dest[i] = float24::FromFloat32(std::floor(src1[i].ToFloat32())); | ||
| 193 | } | ||
| 194 | break; | ||
| 195 | |||
| 196 | case OpCode::Id::MAX: | ||
| 197 | for (int i = 0; i < 4; ++i) { | ||
| 198 | if (!swizzle.DestComponentEnabled(i)) | ||
| 199 | continue; | ||
| 200 | |||
| 201 | dest[i] = std::max(src1[i], src2[i]); | ||
| 202 | } | ||
| 203 | break; | ||
| 204 | |||
| 205 | case OpCode::Id::MIN: | ||
| 206 | for (int i = 0; i < 4; ++i) { | ||
| 207 | if (!swizzle.DestComponentEnabled(i)) | ||
| 208 | continue; | ||
| 209 | |||
| 210 | dest[i] = std::min(src1[i], src2[i]); | ||
| 211 | } | ||
| 212 | break; | ||
| 213 | |||
| 214 | case OpCode::Id::DP3: | ||
| 215 | case OpCode::Id::DP4: | ||
| 216 | { | ||
| 217 | float24 dot = float24::FromFloat32(0.f); | ||
| 218 | int num_components = (instr.opcode.Value() == OpCode::Id::DP3) ? 3 : 4; | ||
| 219 | for (int i = 0; i < num_components; ++i) | ||
| 220 | dot = dot + src1[i] * src2[i]; | ||
| 221 | |||
| 222 | for (int i = 0; i < 4; ++i) { | ||
| 223 | if (!swizzle.DestComponentEnabled(i)) | ||
| 224 | continue; | ||
| 225 | |||
| 226 | dest[i] = dot; | ||
| 227 | } | ||
| 228 | break; | ||
| 229 | } | ||
| 230 | |||
| 231 | // Reciprocal | ||
| 232 | case OpCode::Id::RCP: | ||
| 233 | { | ||
| 234 | for (int i = 0; i < 4; ++i) { | ||
| 235 | if (!swizzle.DestComponentEnabled(i)) | ||
| 236 | continue; | ||
| 237 | |||
| 238 | // TODO: Be stable against division by zero! | ||
| 239 | // TODO: I think this might be wrong... we should only use one component here | ||
| 240 | dest[i] = float24::FromFloat32(1.0f / src1[i].ToFloat32()); | ||
| 241 | } | ||
| 242 | |||
| 243 | break; | ||
| 244 | } | ||
| 245 | |||
| 246 | // Reciprocal Square Root | ||
| 247 | case OpCode::Id::RSQ: | ||
| 248 | { | ||
| 249 | for (int i = 0; i < 4; ++i) { | ||
| 250 | if (!swizzle.DestComponentEnabled(i)) | ||
| 251 | continue; | ||
| 252 | |||
| 253 | // TODO: Be stable against division by zero! | ||
| 254 | // TODO: I think this might be wrong... we should only use one component here | ||
| 255 | dest[i] = float24::FromFloat32(1.0f / sqrt(src1[i].ToFloat32())); | ||
| 256 | } | ||
| 257 | |||
| 258 | break; | ||
| 259 | } | ||
| 260 | |||
| 261 | case OpCode::Id::MOVA: | ||
| 262 | { | ||
| 263 | for (int i = 0; i < 2; ++i) { | ||
| 264 | if (!swizzle.DestComponentEnabled(i)) | ||
| 265 | continue; | ||
| 266 | |||
| 267 | // TODO: Figure out how the rounding is done on hardware | ||
| 268 | state.address_registers[i] = static_cast<s32>(src1[i].ToFloat32()); | ||
| 269 | } | ||
| 270 | |||
| 271 | break; | ||
| 272 | } | ||
| 273 | |||
| 274 | case OpCode::Id::MOV: | ||
| 275 | { | ||
| 276 | for (int i = 0; i < 4; ++i) { | ||
| 277 | if (!swizzle.DestComponentEnabled(i)) | ||
| 278 | continue; | ||
| 279 | |||
| 280 | dest[i] = src1[i]; | ||
| 281 | } | ||
| 282 | break; | ||
| 283 | } | ||
| 284 | |||
| 285 | case OpCode::Id::SLT: | ||
| 286 | case OpCode::Id::SLTI: | ||
| 287 | for (int i = 0; i < 4; ++i) { | ||
| 288 | if (!swizzle.DestComponentEnabled(i)) | ||
| 289 | continue; | ||
| 290 | |||
| 291 | dest[i] = (src1[i] < src2[i]) ? float24::FromFloat32(1.0f) : float24::FromFloat32(0.0f); | ||
| 292 | } | ||
| 293 | break; | ||
| 294 | |||
| 295 | case OpCode::Id::CMP: | ||
| 296 | for (int i = 0; i < 2; ++i) { | ||
| 297 | // TODO: Can you restrict to one compare via dest masking? | ||
| 298 | |||
| 299 | auto compare_op = instr.common.compare_op; | ||
| 300 | auto op = (i == 0) ? compare_op.x.Value() : compare_op.y.Value(); | ||
| 301 | |||
| 302 | switch (op) { | ||
| 303 | case compare_op.Equal: | ||
| 304 | state.conditional_code[i] = (src1[i] == src2[i]); | ||
| 305 | break; | ||
| 306 | |||
| 307 | case compare_op.NotEqual: | ||
| 308 | state.conditional_code[i] = (src1[i] != src2[i]); | ||
| 309 | break; | ||
| 310 | |||
| 311 | case compare_op.LessThan: | ||
| 312 | state.conditional_code[i] = (src1[i] < src2[i]); | ||
| 313 | break; | ||
| 314 | |||
| 315 | case compare_op.LessEqual: | ||
| 316 | state.conditional_code[i] = (src1[i] <= src2[i]); | ||
| 317 | break; | ||
| 318 | |||
| 319 | case compare_op.GreaterThan: | ||
| 320 | state.conditional_code[i] = (src1[i] > src2[i]); | ||
| 321 | break; | ||
| 322 | |||
| 323 | case compare_op.GreaterEqual: | ||
| 324 | state.conditional_code[i] = (src1[i] >= src2[i]); | ||
| 325 | break; | ||
| 326 | |||
| 327 | default: | ||
| 328 | LOG_ERROR(HW_GPU, "Unknown compare mode %x", static_cast<int>(op)); | ||
| 329 | break; | ||
| 330 | } | ||
| 331 | } | ||
| 332 | break; | ||
| 333 | |||
| 334 | default: | ||
| 335 | LOG_ERROR(HW_GPU, "Unhandled arithmetic instruction: 0x%02x (%s): 0x%08x", | ||
| 336 | (int)instr.opcode.Value().EffectiveOpCode(), instr.opcode.Value().GetInfo().name, instr.hex); | ||
| 337 | DEBUG_ASSERT(false); | ||
| 338 | break; | ||
| 339 | } | ||
| 340 | |||
| 341 | break; | ||
| 342 | } | ||
| 343 | |||
| 344 | case OpCode::Type::MultiplyAdd: | ||
| 345 | { | ||
| 346 | if ((instr.opcode.Value().EffectiveOpCode() == OpCode::Id::MAD) || | ||
| 347 | (instr.opcode.Value().EffectiveOpCode() == OpCode::Id::MADI)) { | ||
| 348 | const SwizzlePattern& swizzle = *(SwizzlePattern*)&swizzle_data[instr.mad.operand_desc_id]; | ||
| 349 | |||
| 350 | bool is_inverted = (instr.opcode.Value().EffectiveOpCode() == OpCode::Id::MADI); | ||
| 351 | |||
| 352 | const float24* src1_ = LookupSourceRegister(instr.mad.GetSrc1(is_inverted)); | ||
| 353 | const float24* src2_ = LookupSourceRegister(instr.mad.GetSrc2(is_inverted)); | ||
| 354 | const float24* src3_ = LookupSourceRegister(instr.mad.GetSrc3(is_inverted)); | ||
| 355 | |||
| 356 | const bool negate_src1 = ((bool)swizzle.negate_src1 != false); | ||
| 357 | const bool negate_src2 = ((bool)swizzle.negate_src2 != false); | ||
| 358 | const bool negate_src3 = ((bool)swizzle.negate_src3 != false); | ||
| 359 | |||
| 360 | float24 src1[4] = { | ||
| 361 | src1_[(int)swizzle.GetSelectorSrc1(0)], | ||
| 362 | src1_[(int)swizzle.GetSelectorSrc1(1)], | ||
| 363 | src1_[(int)swizzle.GetSelectorSrc1(2)], | ||
| 364 | src1_[(int)swizzle.GetSelectorSrc1(3)], | ||
| 365 | }; | ||
| 366 | if (negate_src1) { | ||
| 367 | src1[0] = src1[0] * float24::FromFloat32(-1); | ||
| 368 | src1[1] = src1[1] * float24::FromFloat32(-1); | ||
| 369 | src1[2] = src1[2] * float24::FromFloat32(-1); | ||
| 370 | src1[3] = src1[3] * float24::FromFloat32(-1); | ||
| 371 | } | ||
| 372 | float24 src2[4] = { | ||
| 373 | src2_[(int)swizzle.GetSelectorSrc2(0)], | ||
| 374 | src2_[(int)swizzle.GetSelectorSrc2(1)], | ||
| 375 | src2_[(int)swizzle.GetSelectorSrc2(2)], | ||
| 376 | src2_[(int)swizzle.GetSelectorSrc2(3)], | ||
| 377 | }; | ||
| 378 | if (negate_src2) { | ||
| 379 | src2[0] = src2[0] * float24::FromFloat32(-1); | ||
| 380 | src2[1] = src2[1] * float24::FromFloat32(-1); | ||
| 381 | src2[2] = src2[2] * float24::FromFloat32(-1); | ||
| 382 | src2[3] = src2[3] * float24::FromFloat32(-1); | ||
| 383 | } | ||
| 384 | float24 src3[4] = { | ||
| 385 | src3_[(int)swizzle.GetSelectorSrc3(0)], | ||
| 386 | src3_[(int)swizzle.GetSelectorSrc3(1)], | ||
| 387 | src3_[(int)swizzle.GetSelectorSrc3(2)], | ||
| 388 | src3_[(int)swizzle.GetSelectorSrc3(3)], | ||
| 389 | }; | ||
| 390 | if (negate_src3) { | ||
| 391 | src3[0] = src3[0] * float24::FromFloat32(-1); | ||
| 392 | src3[1] = src3[1] * float24::FromFloat32(-1); | ||
| 393 | src3[2] = src3[2] * float24::FromFloat32(-1); | ||
| 394 | src3[3] = src3[3] * float24::FromFloat32(-1); | ||
| 395 | } | ||
| 396 | |||
| 397 | float24* dest = (instr.mad.dest.Value() < 0x10) ? &state.output_registers[instr.mad.dest.Value().GetIndex()][0] | ||
| 398 | : (instr.mad.dest.Value() < 0x20) ? &state.temporary_registers[instr.mad.dest.Value().GetIndex()][0] | ||
| 399 | : dummy_vec4_float24; | ||
| 400 | |||
| 401 | for (int i = 0; i < 4; ++i) { | ||
| 402 | if (!swizzle.DestComponentEnabled(i)) | ||
| 403 | continue; | ||
| 404 | |||
| 405 | dest[i] = src1[i] * src2[i] + src3[i]; | ||
| 406 | } | ||
| 407 | } else { | ||
| 408 | LOG_ERROR(HW_GPU, "Unhandled multiply-add instruction: 0x%02x (%s): 0x%08x", | ||
| 409 | (int)instr.opcode.Value().EffectiveOpCode(), instr.opcode.Value().GetInfo().name, instr.hex); | ||
| 410 | } | ||
| 411 | break; | ||
| 412 | } | ||
| 413 | |||
| 414 | default: | ||
| 415 | { | ||
| 416 | static auto evaluate_condition = [](const ShaderState& state, bool refx, bool refy, Instruction::FlowControlType flow_control) { | ||
| 417 | bool results[2] = { refx == state.conditional_code[0], | ||
| 418 | refy == state.conditional_code[1] }; | ||
| 419 | |||
| 420 | switch (flow_control.op) { | ||
| 421 | case flow_control.Or: | ||
| 422 | return results[0] || results[1]; | ||
| 423 | |||
| 424 | case flow_control.And: | ||
| 425 | return results[0] && results[1]; | ||
| 426 | |||
| 427 | case flow_control.JustX: | ||
| 428 | return results[0]; | ||
| 429 | |||
| 430 | case flow_control.JustY: | ||
| 431 | return results[1]; | ||
| 432 | } | ||
| 433 | }; | ||
| 434 | |||
| 435 | // Handle each instruction on its own | ||
| 436 | switch (instr.opcode.Value()) { | ||
| 437 | case OpCode::Id::END: | ||
| 438 | exit_loop = true; | ||
| 439 | break; | ||
| 440 | |||
| 441 | case OpCode::Id::JMPC: | ||
| 442 | if (evaluate_condition(state, instr.flow_control.refx, instr.flow_control.refy, instr.flow_control)) { | ||
| 443 | state.program_counter = instr.flow_control.dest_offset - 1; | ||
| 444 | } | ||
| 445 | break; | ||
| 446 | |||
| 447 | case OpCode::Id::JMPU: | ||
| 448 | if (uniforms.b[instr.flow_control.bool_uniform_id]) { | ||
| 449 | state.program_counter = instr.flow_control.dest_offset - 1; | ||
| 450 | } | ||
| 451 | break; | ||
| 452 | |||
| 453 | case OpCode::Id::CALL: | ||
| 454 | call(state, | ||
| 455 | instr.flow_control.dest_offset, | ||
| 456 | instr.flow_control.num_instructions, | ||
| 457 | state.program_counter + 1, 0, 0); | ||
| 458 | break; | ||
| 459 | |||
| 460 | case OpCode::Id::CALLU: | ||
| 461 | if (uniforms.b[instr.flow_control.bool_uniform_id]) { | ||
| 462 | call(state, | ||
| 463 | instr.flow_control.dest_offset, | ||
| 464 | instr.flow_control.num_instructions, | ||
| 465 | state.program_counter + 1, 0, 0); | ||
| 466 | } | ||
| 467 | break; | ||
| 468 | |||
| 469 | case OpCode::Id::CALLC: | ||
| 470 | if (evaluate_condition(state, instr.flow_control.refx, instr.flow_control.refy, instr.flow_control)) { | ||
| 471 | call(state, | ||
| 472 | instr.flow_control.dest_offset, | ||
| 473 | instr.flow_control.num_instructions, | ||
| 474 | state.program_counter + 1, 0, 0); | ||
| 475 | } | ||
| 476 | break; | ||
| 477 | |||
| 478 | case OpCode::Id::NOP: | ||
| 479 | break; | ||
| 480 | |||
| 481 | case OpCode::Id::IFU: | ||
| 482 | if (uniforms.b[instr.flow_control.bool_uniform_id]) { | ||
| 483 | call(state, | ||
| 484 | state.program_counter + 1, | ||
| 485 | instr.flow_control.dest_offset - state.program_counter - 1, | ||
| 486 | instr.flow_control.dest_offset + instr.flow_control.num_instructions, 0, 0); | ||
| 487 | } else { | ||
| 488 | call(state, | ||
| 489 | instr.flow_control.dest_offset, | ||
| 490 | instr.flow_control.num_instructions, | ||
| 491 | instr.flow_control.dest_offset + instr.flow_control.num_instructions, 0, 0); | ||
| 492 | } | ||
| 493 | |||
| 494 | break; | ||
| 495 | |||
| 496 | case OpCode::Id::IFC: | ||
| 497 | { | ||
| 498 | // TODO: Do we need to consider swizzlers here? | ||
| 499 | |||
| 500 | if (evaluate_condition(state, instr.flow_control.refx, instr.flow_control.refy, instr.flow_control)) { | ||
| 501 | call(state, | ||
| 502 | state.program_counter + 1, | ||
| 503 | instr.flow_control.dest_offset - state.program_counter - 1, | ||
| 504 | instr.flow_control.dest_offset + instr.flow_control.num_instructions, 0, 0); | ||
| 505 | } else { | ||
| 506 | call(state, | ||
| 507 | instr.flow_control.dest_offset, | ||
| 508 | instr.flow_control.num_instructions, | ||
| 509 | instr.flow_control.dest_offset + instr.flow_control.num_instructions, 0, 0); | ||
| 510 | } | ||
| 511 | |||
| 512 | break; | ||
| 513 | } | ||
| 514 | |||
| 515 | case OpCode::Id::LOOP: | ||
| 516 | { | ||
| 517 | state.address_registers[2] = uniforms.i[instr.flow_control.int_uniform_id].y; | ||
| 518 | |||
| 519 | call(state, | ||
| 520 | state.program_counter + 1, | ||
| 521 | instr.flow_control.dest_offset - state.program_counter + 1, | ||
| 522 | instr.flow_control.dest_offset + 1, | ||
| 523 | uniforms.i[instr.flow_control.int_uniform_id].x, | ||
| 524 | uniforms.i[instr.flow_control.int_uniform_id].z); | ||
| 525 | break; | ||
| 526 | } | ||
| 527 | |||
| 528 | default: | ||
| 529 | LOG_ERROR(HW_GPU, "Unhandled instruction: 0x%02x (%s): 0x%08x", | ||
| 530 | (int)instr.opcode.Value().EffectiveOpCode(), instr.opcode.Value().GetInfo().name, instr.hex); | ||
| 531 | break; | ||
| 532 | } | ||
| 533 | |||
| 534 | break; | ||
| 535 | } | ||
| 536 | } | ||
| 537 | |||
| 538 | ++state.program_counter; | ||
| 539 | |||
| 540 | if (exit_loop) | ||
| 541 | break; | ||
| 542 | } | ||
| 543 | } | ||
| 544 | |||
| 545 | static Common::Profiling::TimingCategory shader_category("Vertex Shader"); | ||
| 546 | |||
| 547 | OutputVertex RunShader(const InputVertex& input, int num_attributes, const Regs::ShaderConfig& config, const State::ShaderSetup& setup) { | ||
| 548 | Common::Profiling::ScopeTimer timer(shader_category); | ||
| 549 | |||
| 550 | ShaderState state; | ||
| 551 | |||
| 552 | state.program_counter = config.main_offset; | ||
| 553 | state.debug.max_offset = 0; | ||
| 554 | state.debug.max_opdesc_id = 0; | ||
| 555 | |||
| 556 | // Setup input register table | ||
| 557 | const auto& attribute_register_map = config.input_register_map; | ||
| 558 | float24 dummy_register; | ||
| 559 | boost::fill(state.input_register_table, &dummy_register); | ||
| 560 | |||
| 561 | if (num_attributes > 0) state.input_register_table[attribute_register_map.attribute0_register] = &input.attr[0].x; | ||
| 562 | if (num_attributes > 1) state.input_register_table[attribute_register_map.attribute1_register] = &input.attr[1].x; | ||
| 563 | if (num_attributes > 2) state.input_register_table[attribute_register_map.attribute2_register] = &input.attr[2].x; | ||
| 564 | if (num_attributes > 3) state.input_register_table[attribute_register_map.attribute3_register] = &input.attr[3].x; | ||
| 565 | if (num_attributes > 4) state.input_register_table[attribute_register_map.attribute4_register] = &input.attr[4].x; | ||
| 566 | if (num_attributes > 5) state.input_register_table[attribute_register_map.attribute5_register] = &input.attr[5].x; | ||
| 567 | if (num_attributes > 6) state.input_register_table[attribute_register_map.attribute6_register] = &input.attr[6].x; | ||
| 568 | if (num_attributes > 7) state.input_register_table[attribute_register_map.attribute7_register] = &input.attr[7].x; | ||
| 569 | if (num_attributes > 8) state.input_register_table[attribute_register_map.attribute8_register] = &input.attr[8].x; | ||
| 570 | if (num_attributes > 9) state.input_register_table[attribute_register_map.attribute9_register] = &input.attr[9].x; | ||
| 571 | if (num_attributes > 10) state.input_register_table[attribute_register_map.attribute10_register] = &input.attr[10].x; | ||
| 572 | if (num_attributes > 11) state.input_register_table[attribute_register_map.attribute11_register] = &input.attr[11].x; | ||
| 573 | if (num_attributes > 12) state.input_register_table[attribute_register_map.attribute12_register] = &input.attr[12].x; | ||
| 574 | if (num_attributes > 13) state.input_register_table[attribute_register_map.attribute13_register] = &input.attr[13].x; | ||
| 575 | if (num_attributes > 14) state.input_register_table[attribute_register_map.attribute14_register] = &input.attr[14].x; | ||
| 576 | if (num_attributes > 15) state.input_register_table[attribute_register_map.attribute15_register] = &input.attr[15].x; | ||
| 577 | |||
| 578 | state.conditional_code[0] = false; | ||
| 579 | state.conditional_code[1] = false; | ||
| 580 | |||
| 581 | ProcessShaderCode(state); | ||
| 582 | #if PICA_DUMP_SHADERS | ||
| 583 | DebugUtils::DumpShader(setup.program_code.data(), state.debug.max_offset, setup.swizzle_data.data(), | ||
| 584 | state.debug.max_opdesc_id, config.main_offset, | ||
| 585 | g_state.regs.vs_output_attributes); // TODO: Don't hardcode VS here | ||
| 586 | #endif | ||
| 587 | |||
| 588 | // Setup output data | ||
| 589 | OutputVertex ret; | ||
| 590 | // TODO(neobrain): Under some circumstances, up to 16 attributes may be output. We need to | ||
| 591 | // figure out what those circumstances are and enable the remaining outputs then. | ||
| 592 | for (int i = 0; i < 7; ++i) { | ||
| 593 | const auto& output_register_map = g_state.regs.vs_output_attributes[i]; // TODO: Don't hardcode VS here | ||
| 594 | |||
| 595 | u32 semantics[4] = { | ||
| 596 | output_register_map.map_x, output_register_map.map_y, | ||
| 597 | output_register_map.map_z, output_register_map.map_w | ||
| 598 | }; | ||
| 599 | |||
| 600 | for (int comp = 0; comp < 4; ++comp) { | ||
| 601 | float24* out = ((float24*)&ret) + semantics[comp]; | ||
| 602 | if (semantics[comp] != Regs::VSOutputAttributes::INVALID) { | ||
| 603 | *out = state.output_registers[i][comp]; | ||
| 604 | } else { | ||
| 605 | // Zero output so that attributes which aren't output won't have denormals in them, | ||
| 606 | // which would slow us down later. | ||
| 607 | memset(out, 0, sizeof(*out)); | ||
| 608 | } | ||
| 609 | } | ||
| 610 | } | ||
| 611 | |||
| 612 | // The hardware takes the absolute and saturates vertex colors like this, *before* doing interpolation | ||
| 613 | for (int i = 0; i < 4; ++i) { | ||
| 614 | ret.color[i] = float24::FromFloat32( | ||
| 615 | std::fmin(std::fabs(ret.color[i].ToFloat32()), 1.0f)); | ||
| 616 | } | ||
| 617 | |||
| 618 | LOG_TRACE(Render_Software, "Output vertex: pos (%.2f, %.2f, %.2f, %.2f), col(%.2f, %.2f, %.2f, %.2f), tc0(%.2f, %.2f)", | ||
| 619 | ret.pos.x.ToFloat32(), ret.pos.y.ToFloat32(), ret.pos.z.ToFloat32(), ret.pos.w.ToFloat32(), | ||
| 620 | ret.color.x.ToFloat32(), ret.color.y.ToFloat32(), ret.color.z.ToFloat32(), ret.color.w.ToFloat32(), | ||
| 621 | ret.tc0.u().ToFloat32(), ret.tc0.v().ToFloat32()); | ||
| 622 | |||
| 623 | return ret; | ||
| 624 | } | ||
| 625 | |||
| 626 | |||
| 627 | } // namespace | ||
| 628 | |||
| 629 | } // namespace | ||