summaryrefslogtreecommitdiff
path: root/src/video_core
diff options
context:
space:
mode:
authorGravatar Yuri Kunde Schlesner2017-02-12 13:29:14 -0800
committerGravatar Yuri Kunde Schlesner2017-02-12 13:29:14 -0800
commite10b11a5d06de6efd1ccb39a0ed6bb602761df6d (patch)
tree1fef798fc03ecfd0a789cc17d7f6ab57304e3f5c /src/video_core
parentMerge pull request #2550 from yuriks/pica-refactor2 (diff)
downloadyuzu-e10b11a5d06de6efd1ccb39a0ed6bb602761df6d.tar.gz
yuzu-e10b11a5d06de6efd1ccb39a0ed6bb602761df6d.tar.xz
yuzu-e10b11a5d06de6efd1ccb39a0ed6bb602761df6d.zip
video_core/shader: Document sanitized MUL operation
Diffstat (limited to 'src/video_core')
-rw-r--r--src/video_core/shader/shader_jit_x64_compiler.cpp8
1 files changed, 8 insertions, 0 deletions
diff --git a/src/video_core/shader/shader_jit_x64_compiler.cpp b/src/video_core/shader/shader_jit_x64_compiler.cpp
index 92b35dbc0..2dbc8b147 100644
--- a/src/video_core/shader/shader_jit_x64_compiler.cpp
+++ b/src/video_core/shader/shader_jit_x64_compiler.cpp
@@ -295,14 +295,22 @@ void JitShader::Compile_DestEnable(Instruction instr, Xmm src) {
295} 295}
296 296
297void JitShader::Compile_SanitizedMul(Xmm src1, Xmm src2, Xmm scratch) { 297void JitShader::Compile_SanitizedMul(Xmm src1, Xmm src2, Xmm scratch) {
298 // 0 * inf and inf * 0 in the PICA should return 0 instead of NaN. This can be implemented by
299 // checking for NaNs before and after the multiplication. If the multiplication result is NaN
300 // where neither source was, this NaN was generated by a 0 * inf multiplication, and so the
301 // result should be transformed to 0 to match PICA fp rules.
302
303 // Set scratch to mask of (src1 != NaN and src2 != NaN)
298 movaps(scratch, src1); 304 movaps(scratch, src1);
299 cmpordps(scratch, src2); 305 cmpordps(scratch, src2);
300 306
301 mulps(src1, src2); 307 mulps(src1, src2);
302 308
309 // Set src2 to mask of (result == NaN)
303 movaps(src2, src1); 310 movaps(src2, src1);
304 cmpunordps(src2, src2); 311 cmpunordps(src2, src2);
305 312
313 // Clear components where scratch != src2 (i.e. if result is NaN where neither source was NaN)
306 xorps(scratch, src2); 314 xorps(scratch, src2);
307 andps(src1, scratch); 315 andps(src1, scratch);
308} 316}