summaryrefslogtreecommitdiff
path: root/src/video_core/shader_interpreter.cpp
diff options
context:
space:
mode:
authorGravatar bunnei2015-07-21 19:04:05 -0400
committerGravatar bunnei2015-08-15 17:33:41 -0400
commit642b9b503040f7da02dcb2c52f3cd4cbf6fee4b2 (patch)
tree85643112608a15fafc304d41c4457a50c453bfcd /src/video_core/shader_interpreter.cpp
parentMerge pull request #1027 from lioncash/debugger (diff)
downloadyuzu-642b9b503040f7da02dcb2c52f3cd4cbf6fee4b2.tar.gz
yuzu-642b9b503040f7da02dcb2c52f3cd4cbf6fee4b2.tar.xz
yuzu-642b9b503040f7da02dcb2c52f3cd4cbf6fee4b2.zip
GPU: Refactor "VertexShader" namespace to "Shader".
- Also renames "vertex_shader.*" to "shader_interpreter.*"
Diffstat (limited to 'src/video_core/shader_interpreter.cpp')
-rw-r--r--src/video_core/shader_interpreter.cpp629
1 files changed, 629 insertions, 0 deletions
diff --git a/src/video_core/shader_interpreter.cpp b/src/video_core/shader_interpreter.cpp
new file mode 100644
index 000000000..3cce26d36
--- /dev/null
+++ b/src/video_core/shader_interpreter.cpp
@@ -0,0 +1,629 @@
1// Copyright 2014 Citra Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <boost/container/static_vector.hpp>
6#include <boost/range/algorithm.hpp>
7
8#include <common/file_util.h>
9
10#include <nihstro/shader_bytecode.h>
11
12#include "common/profiler.h"
13
14#include "pica.h"
15#include "shader_interpreter.h"
16#include "debug_utils/debug_utils.h"
17
18using nihstro::OpCode;
19using nihstro::Instruction;
20using nihstro::RegisterType;
21using nihstro::SourceRegister;
22using nihstro::SwizzlePattern;
23
24namespace Pica {
25
26namespace Shader {
27
28struct ShaderState {
29 u32 program_counter;
30
31 const float24* input_register_table[16];
32 Math::Vec4<float24> output_registers[16];
33
34 Math::Vec4<float24> temporary_registers[16];
35 bool conditional_code[2];
36
37 // Two Address registers and one loop counter
38 // TODO: How many bits do these actually have?
39 s32 address_registers[3];
40
41 enum {
42 INVALID_ADDRESS = 0xFFFFFFFF
43 };
44
45 struct CallStackElement {
46 u32 final_address; // Address upon which we jump to return_address
47 u32 return_address; // Where to jump when leaving scope
48 u8 repeat_counter; // How often to repeat until this call stack element is removed
49 u8 loop_increment; // Which value to add to the loop counter after an iteration
50 // TODO: Should this be a signed value? Does it even matter?
51 u32 loop_address; // The address where we'll return to after each loop iteration
52 };
53
54 // TODO: Is there a maximal size for this?
55 boost::container::static_vector<CallStackElement, 16> call_stack;
56
57 struct {
58 u32 max_offset; // maximum program counter ever reached
59 u32 max_opdesc_id; // maximum swizzle pattern index ever used
60 } debug;
61};
62
63static void ProcessShaderCode(ShaderState& state) {
64 const auto& uniforms = g_state.vs.uniforms;
65 const auto& swizzle_data = g_state.vs.swizzle_data;
66 const auto& program_code = g_state.vs.program_code;
67
68 // Placeholder for invalid inputs
69 static float24 dummy_vec4_float24[4];
70
71 while (true) {
72 if (!state.call_stack.empty()) {
73 auto& top = state.call_stack.back();
74 if (state.program_counter == top.final_address) {
75 state.address_registers[2] += top.loop_increment;
76
77 if (top.repeat_counter-- == 0) {
78 state.program_counter = top.return_address;
79 state.call_stack.pop_back();
80 } else {
81 state.program_counter = top.loop_address;
82 }
83
84 // TODO: Is "trying again" accurate to hardware?
85 continue;
86 }
87 }
88
89 bool exit_loop = false;
90 const Instruction instr = { program_code[state.program_counter] };
91 const SwizzlePattern swizzle = { swizzle_data[instr.common.operand_desc_id] };
92
93 static auto call = [](ShaderState& state, u32 offset, u32 num_instructions,
94 u32 return_offset, u8 repeat_count, u8 loop_increment) {
95 state.program_counter = offset - 1; // -1 to make sure when incrementing the PC we end up at the correct offset
96 ASSERT(state.call_stack.size() < state.call_stack.capacity());
97 state.call_stack.push_back({ offset + num_instructions, return_offset, repeat_count, loop_increment, offset });
98 };
99 state.debug.max_offset = std::max<u32>(state.debug.max_offset, 1 + state.program_counter);
100
101 auto LookupSourceRegister = [&](const SourceRegister& source_reg) -> const float24* {
102 switch (source_reg.GetRegisterType()) {
103 case RegisterType::Input:
104 return state.input_register_table[source_reg.GetIndex()];
105
106 case RegisterType::Temporary:
107 return &state.temporary_registers[source_reg.GetIndex()].x;
108
109 case RegisterType::FloatUniform:
110 return &uniforms.f[source_reg.GetIndex()].x;
111
112 default:
113 return dummy_vec4_float24;
114 }
115 };
116
117 switch (instr.opcode.Value().GetInfo().type) {
118 case OpCode::Type::Arithmetic:
119 {
120 const bool is_inverted = (0 != (instr.opcode.Value().GetInfo().subtype & OpCode::Info::SrcInversed));
121
122 const int address_offset = (instr.common.address_register_index == 0)
123 ? 0 : state.address_registers[instr.common.address_register_index - 1];
124
125 const float24* src1_ = LookupSourceRegister(instr.common.GetSrc1(is_inverted) + (!is_inverted * address_offset));
126 const float24* src2_ = LookupSourceRegister(instr.common.GetSrc2(is_inverted) + ( is_inverted * address_offset));
127
128 const bool negate_src1 = ((bool)swizzle.negate_src1 != false);
129 const bool negate_src2 = ((bool)swizzle.negate_src2 != false);
130
131 float24 src1[4] = {
132 src1_[(int)swizzle.GetSelectorSrc1(0)],
133 src1_[(int)swizzle.GetSelectorSrc1(1)],
134 src1_[(int)swizzle.GetSelectorSrc1(2)],
135 src1_[(int)swizzle.GetSelectorSrc1(3)],
136 };
137 if (negate_src1) {
138 src1[0] = src1[0] * float24::FromFloat32(-1);
139 src1[1] = src1[1] * float24::FromFloat32(-1);
140 src1[2] = src1[2] * float24::FromFloat32(-1);
141 src1[3] = src1[3] * float24::FromFloat32(-1);
142 }
143 float24 src2[4] = {
144 src2_[(int)swizzle.GetSelectorSrc2(0)],
145 src2_[(int)swizzle.GetSelectorSrc2(1)],
146 src2_[(int)swizzle.GetSelectorSrc2(2)],
147 src2_[(int)swizzle.GetSelectorSrc2(3)],
148 };
149 if (negate_src2) {
150 src2[0] = src2[0] * float24::FromFloat32(-1);
151 src2[1] = src2[1] * float24::FromFloat32(-1);
152 src2[2] = src2[2] * float24::FromFloat32(-1);
153 src2[3] = src2[3] * float24::FromFloat32(-1);
154 }
155
156 float24* dest = (instr.common.dest.Value() < 0x10) ? &state.output_registers[instr.common.dest.Value().GetIndex()][0]
157 : (instr.common.dest.Value() < 0x20) ? &state.temporary_registers[instr.common.dest.Value().GetIndex()][0]
158 : dummy_vec4_float24;
159
160 state.debug.max_opdesc_id = std::max<u32>(state.debug.max_opdesc_id, 1+instr.common.operand_desc_id);
161
162 switch (instr.opcode.Value().EffectiveOpCode()) {
163 case OpCode::Id::ADD:
164 {
165 for (int i = 0; i < 4; ++i) {
166 if (!swizzle.DestComponentEnabled(i))
167 continue;
168
169 dest[i] = src1[i] + src2[i];
170 }
171
172 break;
173 }
174
175 case OpCode::Id::MUL:
176 {
177 for (int i = 0; i < 4; ++i) {
178 if (!swizzle.DestComponentEnabled(i))
179 continue;
180
181 dest[i] = src1[i] * src2[i];
182 }
183
184 break;
185 }
186
187 case OpCode::Id::FLR:
188 for (int i = 0; i < 4; ++i) {
189 if (!swizzle.DestComponentEnabled(i))
190 continue;
191
192 dest[i] = float24::FromFloat32(std::floor(src1[i].ToFloat32()));
193 }
194 break;
195
196 case OpCode::Id::MAX:
197 for (int i = 0; i < 4; ++i) {
198 if (!swizzle.DestComponentEnabled(i))
199 continue;
200
201 dest[i] = std::max(src1[i], src2[i]);
202 }
203 break;
204
205 case OpCode::Id::MIN:
206 for (int i = 0; i < 4; ++i) {
207 if (!swizzle.DestComponentEnabled(i))
208 continue;
209
210 dest[i] = std::min(src1[i], src2[i]);
211 }
212 break;
213
214 case OpCode::Id::DP3:
215 case OpCode::Id::DP4:
216 {
217 float24 dot = float24::FromFloat32(0.f);
218 int num_components = (instr.opcode.Value() == OpCode::Id::DP3) ? 3 : 4;
219 for (int i = 0; i < num_components; ++i)
220 dot = dot + src1[i] * src2[i];
221
222 for (int i = 0; i < 4; ++i) {
223 if (!swizzle.DestComponentEnabled(i))
224 continue;
225
226 dest[i] = dot;
227 }
228 break;
229 }
230
231 // Reciprocal
232 case OpCode::Id::RCP:
233 {
234 for (int i = 0; i < 4; ++i) {
235 if (!swizzle.DestComponentEnabled(i))
236 continue;
237
238 // TODO: Be stable against division by zero!
239 // TODO: I think this might be wrong... we should only use one component here
240 dest[i] = float24::FromFloat32(1.0f / src1[i].ToFloat32());
241 }
242
243 break;
244 }
245
246 // Reciprocal Square Root
247 case OpCode::Id::RSQ:
248 {
249 for (int i = 0; i < 4; ++i) {
250 if (!swizzle.DestComponentEnabled(i))
251 continue;
252
253 // TODO: Be stable against division by zero!
254 // TODO: I think this might be wrong... we should only use one component here
255 dest[i] = float24::FromFloat32(1.0f / sqrt(src1[i].ToFloat32()));
256 }
257
258 break;
259 }
260
261 case OpCode::Id::MOVA:
262 {
263 for (int i = 0; i < 2; ++i) {
264 if (!swizzle.DestComponentEnabled(i))
265 continue;
266
267 // TODO: Figure out how the rounding is done on hardware
268 state.address_registers[i] = static_cast<s32>(src1[i].ToFloat32());
269 }
270
271 break;
272 }
273
274 case OpCode::Id::MOV:
275 {
276 for (int i = 0; i < 4; ++i) {
277 if (!swizzle.DestComponentEnabled(i))
278 continue;
279
280 dest[i] = src1[i];
281 }
282 break;
283 }
284
285 case OpCode::Id::SLT:
286 case OpCode::Id::SLTI:
287 for (int i = 0; i < 4; ++i) {
288 if (!swizzle.DestComponentEnabled(i))
289 continue;
290
291 dest[i] = (src1[i] < src2[i]) ? float24::FromFloat32(1.0f) : float24::FromFloat32(0.0f);
292 }
293 break;
294
295 case OpCode::Id::CMP:
296 for (int i = 0; i < 2; ++i) {
297 // TODO: Can you restrict to one compare via dest masking?
298
299 auto compare_op = instr.common.compare_op;
300 auto op = (i == 0) ? compare_op.x.Value() : compare_op.y.Value();
301
302 switch (op) {
303 case compare_op.Equal:
304 state.conditional_code[i] = (src1[i] == src2[i]);
305 break;
306
307 case compare_op.NotEqual:
308 state.conditional_code[i] = (src1[i] != src2[i]);
309 break;
310
311 case compare_op.LessThan:
312 state.conditional_code[i] = (src1[i] < src2[i]);
313 break;
314
315 case compare_op.LessEqual:
316 state.conditional_code[i] = (src1[i] <= src2[i]);
317 break;
318
319 case compare_op.GreaterThan:
320 state.conditional_code[i] = (src1[i] > src2[i]);
321 break;
322
323 case compare_op.GreaterEqual:
324 state.conditional_code[i] = (src1[i] >= src2[i]);
325 break;
326
327 default:
328 LOG_ERROR(HW_GPU, "Unknown compare mode %x", static_cast<int>(op));
329 break;
330 }
331 }
332 break;
333
334 default:
335 LOG_ERROR(HW_GPU, "Unhandled arithmetic instruction: 0x%02x (%s): 0x%08x",
336 (int)instr.opcode.Value().EffectiveOpCode(), instr.opcode.Value().GetInfo().name, instr.hex);
337 DEBUG_ASSERT(false);
338 break;
339 }
340
341 break;
342 }
343
344 case OpCode::Type::MultiplyAdd:
345 {
346 if ((instr.opcode.Value().EffectiveOpCode() == OpCode::Id::MAD) ||
347 (instr.opcode.Value().EffectiveOpCode() == OpCode::Id::MADI)) {
348 const SwizzlePattern& swizzle = *(SwizzlePattern*)&swizzle_data[instr.mad.operand_desc_id];
349
350 bool is_inverted = (instr.opcode.Value().EffectiveOpCode() == OpCode::Id::MADI);
351
352 const float24* src1_ = LookupSourceRegister(instr.mad.GetSrc1(is_inverted));
353 const float24* src2_ = LookupSourceRegister(instr.mad.GetSrc2(is_inverted));
354 const float24* src3_ = LookupSourceRegister(instr.mad.GetSrc3(is_inverted));
355
356 const bool negate_src1 = ((bool)swizzle.negate_src1 != false);
357 const bool negate_src2 = ((bool)swizzle.negate_src2 != false);
358 const bool negate_src3 = ((bool)swizzle.negate_src3 != false);
359
360 float24 src1[4] = {
361 src1_[(int)swizzle.GetSelectorSrc1(0)],
362 src1_[(int)swizzle.GetSelectorSrc1(1)],
363 src1_[(int)swizzle.GetSelectorSrc1(2)],
364 src1_[(int)swizzle.GetSelectorSrc1(3)],
365 };
366 if (negate_src1) {
367 src1[0] = src1[0] * float24::FromFloat32(-1);
368 src1[1] = src1[1] * float24::FromFloat32(-1);
369 src1[2] = src1[2] * float24::FromFloat32(-1);
370 src1[3] = src1[3] * float24::FromFloat32(-1);
371 }
372 float24 src2[4] = {
373 src2_[(int)swizzle.GetSelectorSrc2(0)],
374 src2_[(int)swizzle.GetSelectorSrc2(1)],
375 src2_[(int)swizzle.GetSelectorSrc2(2)],
376 src2_[(int)swizzle.GetSelectorSrc2(3)],
377 };
378 if (negate_src2) {
379 src2[0] = src2[0] * float24::FromFloat32(-1);
380 src2[1] = src2[1] * float24::FromFloat32(-1);
381 src2[2] = src2[2] * float24::FromFloat32(-1);
382 src2[3] = src2[3] * float24::FromFloat32(-1);
383 }
384 float24 src3[4] = {
385 src3_[(int)swizzle.GetSelectorSrc3(0)],
386 src3_[(int)swizzle.GetSelectorSrc3(1)],
387 src3_[(int)swizzle.GetSelectorSrc3(2)],
388 src3_[(int)swizzle.GetSelectorSrc3(3)],
389 };
390 if (negate_src3) {
391 src3[0] = src3[0] * float24::FromFloat32(-1);
392 src3[1] = src3[1] * float24::FromFloat32(-1);
393 src3[2] = src3[2] * float24::FromFloat32(-1);
394 src3[3] = src3[3] * float24::FromFloat32(-1);
395 }
396
397 float24* dest = (instr.mad.dest.Value() < 0x10) ? &state.output_registers[instr.mad.dest.Value().GetIndex()][0]
398 : (instr.mad.dest.Value() < 0x20) ? &state.temporary_registers[instr.mad.dest.Value().GetIndex()][0]
399 : dummy_vec4_float24;
400
401 for (int i = 0; i < 4; ++i) {
402 if (!swizzle.DestComponentEnabled(i))
403 continue;
404
405 dest[i] = src1[i] * src2[i] + src3[i];
406 }
407 } else {
408 LOG_ERROR(HW_GPU, "Unhandled multiply-add instruction: 0x%02x (%s): 0x%08x",
409 (int)instr.opcode.Value().EffectiveOpCode(), instr.opcode.Value().GetInfo().name, instr.hex);
410 }
411 break;
412 }
413
414 default:
415 {
416 static auto evaluate_condition = [](const ShaderState& state, bool refx, bool refy, Instruction::FlowControlType flow_control) {
417 bool results[2] = { refx == state.conditional_code[0],
418 refy == state.conditional_code[1] };
419
420 switch (flow_control.op) {
421 case flow_control.Or:
422 return results[0] || results[1];
423
424 case flow_control.And:
425 return results[0] && results[1];
426
427 case flow_control.JustX:
428 return results[0];
429
430 case flow_control.JustY:
431 return results[1];
432 }
433 };
434
435 // Handle each instruction on its own
436 switch (instr.opcode.Value()) {
437 case OpCode::Id::END:
438 exit_loop = true;
439 break;
440
441 case OpCode::Id::JMPC:
442 if (evaluate_condition(state, instr.flow_control.refx, instr.flow_control.refy, instr.flow_control)) {
443 state.program_counter = instr.flow_control.dest_offset - 1;
444 }
445 break;
446
447 case OpCode::Id::JMPU:
448 if (uniforms.b[instr.flow_control.bool_uniform_id]) {
449 state.program_counter = instr.flow_control.dest_offset - 1;
450 }
451 break;
452
453 case OpCode::Id::CALL:
454 call(state,
455 instr.flow_control.dest_offset,
456 instr.flow_control.num_instructions,
457 state.program_counter + 1, 0, 0);
458 break;
459
460 case OpCode::Id::CALLU:
461 if (uniforms.b[instr.flow_control.bool_uniform_id]) {
462 call(state,
463 instr.flow_control.dest_offset,
464 instr.flow_control.num_instructions,
465 state.program_counter + 1, 0, 0);
466 }
467 break;
468
469 case OpCode::Id::CALLC:
470 if (evaluate_condition(state, instr.flow_control.refx, instr.flow_control.refy, instr.flow_control)) {
471 call(state,
472 instr.flow_control.dest_offset,
473 instr.flow_control.num_instructions,
474 state.program_counter + 1, 0, 0);
475 }
476 break;
477
478 case OpCode::Id::NOP:
479 break;
480
481 case OpCode::Id::IFU:
482 if (uniforms.b[instr.flow_control.bool_uniform_id]) {
483 call(state,
484 state.program_counter + 1,
485 instr.flow_control.dest_offset - state.program_counter - 1,
486 instr.flow_control.dest_offset + instr.flow_control.num_instructions, 0, 0);
487 } else {
488 call(state,
489 instr.flow_control.dest_offset,
490 instr.flow_control.num_instructions,
491 instr.flow_control.dest_offset + instr.flow_control.num_instructions, 0, 0);
492 }
493
494 break;
495
496 case OpCode::Id::IFC:
497 {
498 // TODO: Do we need to consider swizzlers here?
499
500 if (evaluate_condition(state, instr.flow_control.refx, instr.flow_control.refy, instr.flow_control)) {
501 call(state,
502 state.program_counter + 1,
503 instr.flow_control.dest_offset - state.program_counter - 1,
504 instr.flow_control.dest_offset + instr.flow_control.num_instructions, 0, 0);
505 } else {
506 call(state,
507 instr.flow_control.dest_offset,
508 instr.flow_control.num_instructions,
509 instr.flow_control.dest_offset + instr.flow_control.num_instructions, 0, 0);
510 }
511
512 break;
513 }
514
515 case OpCode::Id::LOOP:
516 {
517 state.address_registers[2] = uniforms.i[instr.flow_control.int_uniform_id].y;
518
519 call(state,
520 state.program_counter + 1,
521 instr.flow_control.dest_offset - state.program_counter + 1,
522 instr.flow_control.dest_offset + 1,
523 uniforms.i[instr.flow_control.int_uniform_id].x,
524 uniforms.i[instr.flow_control.int_uniform_id].z);
525 break;
526 }
527
528 default:
529 LOG_ERROR(HW_GPU, "Unhandled instruction: 0x%02x (%s): 0x%08x",
530 (int)instr.opcode.Value().EffectiveOpCode(), instr.opcode.Value().GetInfo().name, instr.hex);
531 break;
532 }
533
534 break;
535 }
536 }
537
538 ++state.program_counter;
539
540 if (exit_loop)
541 break;
542 }
543}
544
545static Common::Profiling::TimingCategory shader_category("Vertex Shader");
546
547OutputVertex RunShader(const InputVertex& input, int num_attributes, const Regs::ShaderConfig& config, const State::ShaderSetup& setup) {
548 Common::Profiling::ScopeTimer timer(shader_category);
549
550 ShaderState state;
551
552 state.program_counter = config.main_offset;
553 state.debug.max_offset = 0;
554 state.debug.max_opdesc_id = 0;
555
556 // Setup input register table
557 const auto& attribute_register_map = config.input_register_map;
558 float24 dummy_register;
559 boost::fill(state.input_register_table, &dummy_register);
560
561 if (num_attributes > 0) state.input_register_table[attribute_register_map.attribute0_register] = &input.attr[0].x;
562 if (num_attributes > 1) state.input_register_table[attribute_register_map.attribute1_register] = &input.attr[1].x;
563 if (num_attributes > 2) state.input_register_table[attribute_register_map.attribute2_register] = &input.attr[2].x;
564 if (num_attributes > 3) state.input_register_table[attribute_register_map.attribute3_register] = &input.attr[3].x;
565 if (num_attributes > 4) state.input_register_table[attribute_register_map.attribute4_register] = &input.attr[4].x;
566 if (num_attributes > 5) state.input_register_table[attribute_register_map.attribute5_register] = &input.attr[5].x;
567 if (num_attributes > 6) state.input_register_table[attribute_register_map.attribute6_register] = &input.attr[6].x;
568 if (num_attributes > 7) state.input_register_table[attribute_register_map.attribute7_register] = &input.attr[7].x;
569 if (num_attributes > 8) state.input_register_table[attribute_register_map.attribute8_register] = &input.attr[8].x;
570 if (num_attributes > 9) state.input_register_table[attribute_register_map.attribute9_register] = &input.attr[9].x;
571 if (num_attributes > 10) state.input_register_table[attribute_register_map.attribute10_register] = &input.attr[10].x;
572 if (num_attributes > 11) state.input_register_table[attribute_register_map.attribute11_register] = &input.attr[11].x;
573 if (num_attributes > 12) state.input_register_table[attribute_register_map.attribute12_register] = &input.attr[12].x;
574 if (num_attributes > 13) state.input_register_table[attribute_register_map.attribute13_register] = &input.attr[13].x;
575 if (num_attributes > 14) state.input_register_table[attribute_register_map.attribute14_register] = &input.attr[14].x;
576 if (num_attributes > 15) state.input_register_table[attribute_register_map.attribute15_register] = &input.attr[15].x;
577
578 state.conditional_code[0] = false;
579 state.conditional_code[1] = false;
580
581 ProcessShaderCode(state);
582#if PICA_DUMP_SHADERS
583 DebugUtils::DumpShader(setup.program_code.data(), state.debug.max_offset, setup.swizzle_data.data(),
584 state.debug.max_opdesc_id, config.main_offset,
585 g_state.regs.vs_output_attributes); // TODO: Don't hardcode VS here
586#endif
587
588 // Setup output data
589 OutputVertex ret;
590 // TODO(neobrain): Under some circumstances, up to 16 attributes may be output. We need to
591 // figure out what those circumstances are and enable the remaining outputs then.
592 for (int i = 0; i < 7; ++i) {
593 const auto& output_register_map = g_state.regs.vs_output_attributes[i]; // TODO: Don't hardcode VS here
594
595 u32 semantics[4] = {
596 output_register_map.map_x, output_register_map.map_y,
597 output_register_map.map_z, output_register_map.map_w
598 };
599
600 for (int comp = 0; comp < 4; ++comp) {
601 float24* out = ((float24*)&ret) + semantics[comp];
602 if (semantics[comp] != Regs::VSOutputAttributes::INVALID) {
603 *out = state.output_registers[i][comp];
604 } else {
605 // Zero output so that attributes which aren't output won't have denormals in them,
606 // which would slow us down later.
607 memset(out, 0, sizeof(*out));
608 }
609 }
610 }
611
612 // The hardware takes the absolute and saturates vertex colors like this, *before* doing interpolation
613 for (int i = 0; i < 4; ++i) {
614 ret.color[i] = float24::FromFloat32(
615 std::fmin(std::fabs(ret.color[i].ToFloat32()), 1.0f));
616 }
617
618 LOG_TRACE(Render_Software, "Output vertex: pos (%.2f, %.2f, %.2f, %.2f), col(%.2f, %.2f, %.2f, %.2f), tc0(%.2f, %.2f)",
619 ret.pos.x.ToFloat32(), ret.pos.y.ToFloat32(), ret.pos.z.ToFloat32(), ret.pos.w.ToFloat32(),
620 ret.color.x.ToFloat32(), ret.color.y.ToFloat32(), ret.color.z.ToFloat32(), ret.color.w.ToFloat32(),
621 ret.tc0.u().ToFloat32(), ret.tc0.v().ToFloat32());
622
623 return ret;
624}
625
626
627} // namespace
628
629} // namespace