summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorGravatar Yuri Kunde Schlesner2015-08-24 01:48:15 -0300
committerGravatar Yuri Kunde Schlesner2015-08-24 01:48:15 -0300
commit630a850d4d5a0509b16e96aaccc81e9384e1fba8 (patch)
tree01a553c46e23e4f5d2b92ec1faf1a4b72a8e1466 /src
parentShaders: Explicitly conform to PICA semantics in MAX/MIN (diff)
downloadyuzu-630a850d4d5a0509b16e96aaccc81e9384e1fba8.tar.gz
yuzu-630a850d4d5a0509b16e96aaccc81e9384e1fba8.tar.xz
yuzu-630a850d4d5a0509b16e96aaccc81e9384e1fba8.zip
Shaders: Fix multiplications between 0.0 and inf
The PICA200 semantics for multiplication are so that when multiplying inf by exactly 0.0, the result is 0.0, instead of NaN, as defined by IEEE. This is relied upon by games. Fixes #1024 (missing OoT interface items)
Diffstat (limited to 'src')
-rw-r--r--src/video_core/pica.h14
-rw-r--r--src/video_core/shader/shader_jit_x64.cpp78
-rw-r--r--src/video_core/shader/shader_jit_x64.h6
3 files changed, 58 insertions, 40 deletions
diff --git a/src/video_core/pica.h b/src/video_core/pica.h
index 58b924f9e..cf148de50 100644
--- a/src/video_core/pica.h
+++ b/src/video_core/pica.h
@@ -1021,12 +1021,20 @@ struct float24 {
1021 return ret; 1021 return ret;
1022 } 1022 }
1023 1023
1024 static float24 Zero() {
1025 return FromFloat32(0.f);
1026 }
1027
1024 // Not recommended for anything but logging 1028 // Not recommended for anything but logging
1025 float ToFloat32() const { 1029 float ToFloat32() const {
1026 return value; 1030 return value;
1027 } 1031 }
1028 1032
1029 float24 operator * (const float24& flt) const { 1033 float24 operator * (const float24& flt) const {
1034 if ((this->value == 0.f && flt.value == flt.value) ||
1035 (flt.value == 0.f && this->value == this->value))
1036 // PICA gives 0 instead of NaN when multiplying by inf
1037 return Zero();
1030 return float24::FromFloat32(ToFloat32() * flt.ToFloat32()); 1038 return float24::FromFloat32(ToFloat32() * flt.ToFloat32());
1031 } 1039 }
1032 1040
@@ -1043,7 +1051,11 @@ struct float24 {
1043 } 1051 }
1044 1052
1045 float24& operator *= (const float24& flt) { 1053 float24& operator *= (const float24& flt) {
1046 value *= flt.ToFloat32(); 1054 if ((this->value == 0.f && flt.value == flt.value) ||
1055 (flt.value == 0.f && this->value == this->value))
1056 // PICA gives 0 instead of NaN when multiplying by inf
1057 *this = Zero();
1058 else value *= flt.ToFloat32();
1047 return *this; 1059 return *this;
1048 } 1060 }
1049 1061
diff --git a/src/video_core/shader/shader_jit_x64.cpp b/src/video_core/shader/shader_jit_x64.cpp
index 456c8567d..ddae61cae 100644
--- a/src/video_core/shader/shader_jit_x64.cpp
+++ b/src/video_core/shader/shader_jit_x64.cpp
@@ -246,6 +246,19 @@ void JitCompiler::Compile_DestEnable(Instruction instr,X64Reg src) {
246 } 246 }
247} 247}
248 248
249void JitCompiler::Compile_SanitizedMul(Gen::X64Reg src1, Gen::X64Reg src2, Gen::X64Reg scratch) {
250 MOVAPS(scratch, R(src1));
251 CMPPS(scratch, R(src2), CMP_ORD);
252
253 MULPS(src1, R(src2));
254
255 MOVAPS(src2, R(src1));
256 CMPPS(src2, R(src2), CMP_UNORD);
257
258 XORPS(scratch, R(src2));
259 ANDPS(src1, R(scratch));
260}
261
249void JitCompiler::Compile_EvaluateCondition(Instruction instr) { 262void JitCompiler::Compile_EvaluateCondition(Instruction instr) {
250 // Note: NXOR is used below to check for equality 263 // Note: NXOR is used below to check for equality
251 switch (instr.flow_control.op) { 264 switch (instr.flow_control.op) {
@@ -309,21 +322,17 @@ void JitCompiler::Compile_DP3(Instruction instr) {
309 Compile_SwizzleSrc(instr, 1, instr.common.src1, SRC1); 322 Compile_SwizzleSrc(instr, 1, instr.common.src1, SRC1);
310 Compile_SwizzleSrc(instr, 2, instr.common.src2, SRC2); 323 Compile_SwizzleSrc(instr, 2, instr.common.src2, SRC2);
311 324
312 if (Common::GetCPUCaps().sse4_1) { 325 Compile_SanitizedMul(SRC1, SRC2, SCRATCH);
313 DPPS(SRC1, R(SRC2), 0x7f);
314 } else {
315 MULPS(SRC1, R(SRC2));
316 326
317 MOVAPS(SRC2, R(SRC1)); 327 MOVAPS(SRC2, R(SRC1));
318 SHUFPS(SRC2, R(SRC2), _MM_SHUFFLE(1, 1, 1, 1)); 328 SHUFPS(SRC2, R(SRC2), _MM_SHUFFLE(1, 1, 1, 1));
319 329
320 MOVAPS(SRC3, R(SRC1)); 330 MOVAPS(SRC3, R(SRC1));
321 SHUFPS(SRC3, R(SRC3), _MM_SHUFFLE(2, 2, 2, 2)); 331 SHUFPS(SRC3, R(SRC3), _MM_SHUFFLE(2, 2, 2, 2));
322 332
323 SHUFPS(SRC1, R(SRC1), _MM_SHUFFLE(0, 0, 0, 0)); 333 SHUFPS(SRC1, R(SRC1), _MM_SHUFFLE(0, 0, 0, 0));
324 ADDPS(SRC1, R(SRC2)); 334 ADDPS(SRC1, R(SRC2));
325 ADDPS(SRC1, R(SRC3)); 335 ADDPS(SRC1, R(SRC3));
326 }
327 336
328 Compile_DestEnable(instr, SRC1); 337 Compile_DestEnable(instr, SRC1);
329} 338}
@@ -332,19 +341,15 @@ void JitCompiler::Compile_DP4(Instruction instr) {
332 Compile_SwizzleSrc(instr, 1, instr.common.src1, SRC1); 341 Compile_SwizzleSrc(instr, 1, instr.common.src1, SRC1);
333 Compile_SwizzleSrc(instr, 2, instr.common.src2, SRC2); 342 Compile_SwizzleSrc(instr, 2, instr.common.src2, SRC2);
334 343
335 if (Common::GetCPUCaps().sse4_1) { 344 Compile_SanitizedMul(SRC1, SRC2, SCRATCH);
336 DPPS(SRC1, R(SRC2), 0xff);
337 } else {
338 MULPS(SRC1, R(SRC2));
339 345
340 MOVAPS(SRC2, R(SRC1)); 346 MOVAPS(SRC2, R(SRC1));
341 SHUFPS(SRC1, R(SRC1), _MM_SHUFFLE(2, 3, 0, 1)); // XYZW -> ZWXY 347 SHUFPS(SRC1, R(SRC1), _MM_SHUFFLE(2, 3, 0, 1)); // XYZW -> ZWXY
342 ADDPS(SRC1, R(SRC2)); 348 ADDPS(SRC1, R(SRC2));
343 349
344 MOVAPS(SRC2, R(SRC1)); 350 MOVAPS(SRC2, R(SRC1));
345 SHUFPS(SRC1, R(SRC1), _MM_SHUFFLE(0, 1, 2, 3)); // XYZW -> WZYX 351 SHUFPS(SRC1, R(SRC1), _MM_SHUFFLE(0, 1, 2, 3)); // XYZW -> WZYX
346 ADDPS(SRC1, R(SRC2)); 352 ADDPS(SRC1, R(SRC2));
347 }
348 353
349 Compile_DestEnable(instr, SRC1); 354 Compile_DestEnable(instr, SRC1);
350} 355}
@@ -361,23 +366,22 @@ void JitCompiler::Compile_DPH(Instruction instr) {
361 if (Common::GetCPUCaps().sse4_1) { 366 if (Common::GetCPUCaps().sse4_1) {
362 // Set 4th component to 1.0 367 // Set 4th component to 1.0
363 BLENDPS(SRC1, R(ONE), 0x8); // 0b1000 368 BLENDPS(SRC1, R(ONE), 0x8); // 0b1000
364 DPPS(SRC1, R(SRC2), 0xff);
365 } else { 369 } else {
366 // Reverse to set the 4th component to 1.0 370 // Reverse to set the 4th component to 1.0
367 SHUFPS(SRC1, R(SRC1), _MM_SHUFFLE(0, 1, 2, 3)); 371 SHUFPS(SRC1, R(SRC1), _MM_SHUFFLE(0, 1, 2, 3));
368 MOVSS(SRC1, R(ONE)); 372 MOVSS(SRC1, R(ONE));
369 SHUFPS(SRC1, R(SRC1), _MM_SHUFFLE(0, 1, 2, 3)); 373 SHUFPS(SRC1, R(SRC1), _MM_SHUFFLE(0, 1, 2, 3));
374 }
370 375
371 MULPS(SRC1, R(SRC2)); 376 Compile_SanitizedMul(SRC1, SRC2, SCRATCH);
372 377
373 MOVAPS(SRC2, R(SRC1)); 378 MOVAPS(SRC2, R(SRC1));
374 SHUFPS(SRC1, R(SRC1), _MM_SHUFFLE(2, 3, 0, 1)); // XYZW -> ZWXY 379 SHUFPS(SRC1, R(SRC1), _MM_SHUFFLE(2, 3, 0, 1)); // XYZW -> ZWXY
375 ADDPS(SRC1, R(SRC2)); 380 ADDPS(SRC1, R(SRC2));
376 381
377 MOVAPS(SRC2, R(SRC1)); 382 MOVAPS(SRC2, R(SRC1));
378 SHUFPS(SRC1, R(SRC1), _MM_SHUFFLE(0, 1, 2, 3)); // XYZW -> WZYX 383 SHUFPS(SRC1, R(SRC1), _MM_SHUFFLE(0, 1, 2, 3)); // XYZW -> WZYX
379 ADDPS(SRC1, R(SRC2)); 384 ADDPS(SRC1, R(SRC2));
380 }
381 385
382 Compile_DestEnable(instr, SRC1); 386 Compile_DestEnable(instr, SRC1);
383} 387}
@@ -417,7 +421,7 @@ void JitCompiler::Compile_LG2(Instruction instr) {
417void JitCompiler::Compile_MUL(Instruction instr) { 421void JitCompiler::Compile_MUL(Instruction instr) {
418 Compile_SwizzleSrc(instr, 1, instr.common.src1, SRC1); 422 Compile_SwizzleSrc(instr, 1, instr.common.src1, SRC1);
419 Compile_SwizzleSrc(instr, 2, instr.common.src2, SRC2); 423 Compile_SwizzleSrc(instr, 2, instr.common.src2, SRC2);
420 MULPS(SRC1, R(SRC2)); 424 Compile_SanitizedMul(SRC1, SRC2, SCRATCH);
421 Compile_DestEnable(instr, SRC1); 425 Compile_DestEnable(instr, SRC1);
422} 426}
423 427
@@ -635,12 +639,8 @@ void JitCompiler::Compile_MAD(Instruction instr) {
635 Compile_SwizzleSrc(instr, 3, instr.mad.src3, SRC3); 639 Compile_SwizzleSrc(instr, 3, instr.mad.src3, SRC3);
636 } 640 }
637 641
638 if (Common::GetCPUCaps().fma) { 642 Compile_SanitizedMul(SRC1, SRC2, SCRATCH);
639 VFMADD213PS(SRC1, SRC2, R(SRC3)); 643 ADDPS(SRC1, R(SRC3));
640 } else {
641 MULPS(SRC1, R(SRC2));
642 ADDPS(SRC1, R(SRC3));
643 }
644 644
645 Compile_DestEnable(instr, SRC1); 645 Compile_DestEnable(instr, SRC1);
646} 646}
diff --git a/src/video_core/shader/shader_jit_x64.h b/src/video_core/shader/shader_jit_x64.h
index fbe19fe93..58828ecc8 100644
--- a/src/video_core/shader/shader_jit_x64.h
+++ b/src/video_core/shader/shader_jit_x64.h
@@ -68,6 +68,12 @@ private:
68 void Compile_SwizzleSrc(Instruction instr, unsigned src_num, SourceRegister src_reg, Gen::X64Reg dest); 68 void Compile_SwizzleSrc(Instruction instr, unsigned src_num, SourceRegister src_reg, Gen::X64Reg dest);
69 void Compile_DestEnable(Instruction instr, Gen::X64Reg dest); 69 void Compile_DestEnable(Instruction instr, Gen::X64Reg dest);
70 70
71 /**
72 * Compiles a `MUL src1, src2` operation, properly handling the PICA semantics when multiplying
73 * zero by inf. Clobbers `src2` and `scratch`.
74 */
75 void Compile_SanitizedMul(Gen::X64Reg src1, Gen::X64Reg src2, Gen::X64Reg scratch);
76
71 void Compile_EvaluateCondition(Instruction instr); 77 void Compile_EvaluateCondition(Instruction instr);
72 void Compile_UniformCondition(Instruction instr); 78 void Compile_UniformCondition(Instruction instr);
73 79