diff options
| author | 2015-08-23 17:47:09 -0400 | |
|---|---|---|
| committer | 2015-08-23 17:47:09 -0400 | |
| commit | 387bd3a1e49bc5d6e631798753aa8e72a930eebe (patch) | |
| tree | a9c7552ce502fc5b24c3924300a70a0df0bb6724 /src | |
| parent | Merge pull request #1058 from lioncash/ptr (diff) | |
| parent | Shader: implement DPH/DPHI in JIT (diff) | |
| download | yuzu-387bd3a1e49bc5d6e631798753aa8e72a930eebe.tar.gz yuzu-387bd3a1e49bc5d6e631798753aa8e72a930eebe.tar.xz yuzu-387bd3a1e49bc5d6e631798753aa8e72a930eebe.zip | |
Merge pull request #1057 from aroulin/shader-dph-dphi
Shader: Implement DPH and DPHI in interpreter/JIT
Diffstat (limited to 'src')
| -rw-r--r-- | src/video_core/shader/shader_interpreter.cpp | 9 | ||||
| -rw-r--r-- | src/video_core/shader/shader_jit_x64.cpp | 37 | ||||
| -rw-r--r-- | src/video_core/shader/shader_jit_x64.h | 1 |
3 files changed, 44 insertions, 3 deletions
diff --git a/src/video_core/shader/shader_interpreter.cpp b/src/video_core/shader/shader_interpreter.cpp index 063cc38f0..6b83d2c1c 100644 --- a/src/video_core/shader/shader_interpreter.cpp +++ b/src/video_core/shader/shader_interpreter.cpp | |||
| @@ -197,12 +197,19 @@ void RunInterpreter(UnitState<Debug>& state) { | |||
| 197 | 197 | ||
| 198 | case OpCode::Id::DP3: | 198 | case OpCode::Id::DP3: |
| 199 | case OpCode::Id::DP4: | 199 | case OpCode::Id::DP4: |
| 200 | case OpCode::Id::DPH: | ||
| 201 | case OpCode::Id::DPHI: | ||
| 200 | { | 202 | { |
| 201 | Record<DebugDataRecord::SRC1>(state.debug, iteration, src1); | 203 | Record<DebugDataRecord::SRC1>(state.debug, iteration, src1); |
| 202 | Record<DebugDataRecord::SRC2>(state.debug, iteration, src2); | 204 | Record<DebugDataRecord::SRC2>(state.debug, iteration, src2); |
| 203 | Record<DebugDataRecord::DEST_IN>(state.debug, iteration, dest); | 205 | Record<DebugDataRecord::DEST_IN>(state.debug, iteration, dest); |
| 206 | |||
| 207 | OpCode::Id opcode = instr.opcode.Value().EffectiveOpCode(); | ||
| 208 | if (opcode == OpCode::Id::DPH || opcode == OpCode::Id::DPHI) | ||
| 209 | src1[3] = float24::FromFloat32(1.0f); | ||
| 210 | |||
| 204 | float24 dot = float24::FromFloat32(0.f); | 211 | float24 dot = float24::FromFloat32(0.f); |
| 205 | int num_components = (instr.opcode.Value() == OpCode::Id::DP3) ? 3 : 4; | 212 | int num_components = (opcode == OpCode::Id::DP3) ? 3 : 4; |
| 206 | for (int i = 0; i < num_components; ++i) | 213 | for (int i = 0; i < num_components; ++i) |
| 207 | dot = dot + src1[i] * src2[i]; | 214 | dot = dot + src1[i] * src2[i]; |
| 208 | 215 | ||
diff --git a/src/video_core/shader/shader_jit_x64.cpp b/src/video_core/shader/shader_jit_x64.cpp index a1bdd8456..366be3901 100644 --- a/src/video_core/shader/shader_jit_x64.cpp +++ b/src/video_core/shader/shader_jit_x64.cpp | |||
| @@ -23,7 +23,7 @@ const JitFunction instr_table[64] = { | |||
| 23 | &JitCompiler::Compile_ADD, // add | 23 | &JitCompiler::Compile_ADD, // add |
| 24 | &JitCompiler::Compile_DP3, // dp3 | 24 | &JitCompiler::Compile_DP3, // dp3 |
| 25 | &JitCompiler::Compile_DP4, // dp4 | 25 | &JitCompiler::Compile_DP4, // dp4 |
| 26 | nullptr, // dph | 26 | &JitCompiler::Compile_DPH, // dph |
| 27 | nullptr, // unknown | 27 | nullptr, // unknown |
| 28 | &JitCompiler::Compile_EX2, // ex2 | 28 | &JitCompiler::Compile_EX2, // ex2 |
| 29 | &JitCompiler::Compile_LG2, // lg2 | 29 | &JitCompiler::Compile_LG2, // lg2 |
| @@ -44,7 +44,7 @@ const JitFunction instr_table[64] = { | |||
| 44 | nullptr, // unknown | 44 | nullptr, // unknown |
| 45 | nullptr, // unknown | 45 | nullptr, // unknown |
| 46 | nullptr, // unknown | 46 | nullptr, // unknown |
| 47 | nullptr, // dphi | 47 | &JitCompiler::Compile_DPH, // dphi |
| 48 | nullptr, // unknown | 48 | nullptr, // unknown |
| 49 | &JitCompiler::Compile_SGE, // sgei | 49 | &JitCompiler::Compile_SGE, // sgei |
| 50 | &JitCompiler::Compile_SLT, // slti | 50 | &JitCompiler::Compile_SLT, // slti |
| @@ -347,6 +347,39 @@ void JitCompiler::Compile_DP4(Instruction instr) { | |||
| 347 | Compile_DestEnable(instr, SRC1); | 347 | Compile_DestEnable(instr, SRC1); |
| 348 | } | 348 | } |
| 349 | 349 | ||
| 350 | void JitCompiler::Compile_DPH(Instruction instr) { | ||
| 351 | if (instr.opcode.Value().EffectiveOpCode() == OpCode::Id::DPHI) { | ||
| 352 | Compile_SwizzleSrc(instr, 1, instr.common.src1i, SRC1); | ||
| 353 | Compile_SwizzleSrc(instr, 2, instr.common.src2i, SRC2); | ||
| 354 | } else { | ||
| 355 | Compile_SwizzleSrc(instr, 1, instr.common.src1, SRC1); | ||
| 356 | Compile_SwizzleSrc(instr, 2, instr.common.src2, SRC2); | ||
| 357 | } | ||
| 358 | |||
| 359 | if (Common::GetCPUCaps().sse4_1) { | ||
| 360 | // Set 4th component to 1.0 | ||
| 361 | BLENDPS(SRC1, R(ONE), 0x8); // 0b1000 | ||
| 362 | DPPS(SRC1, R(SRC2), 0xff); | ||
| 363 | } else { | ||
| 364 | // Reverse to set the 4th component to 1.0 | ||
| 365 | SHUFPS(SRC1, R(SRC1), _MM_SHUFFLE(0, 1, 2, 3)); | ||
| 366 | MOVSS(SRC1, R(ONE)); | ||
| 367 | SHUFPS(SRC1, R(SRC1), _MM_SHUFFLE(0, 1, 2, 3)); | ||
| 368 | |||
| 369 | MULPS(SRC1, R(SRC2)); | ||
| 370 | |||
| 371 | MOVAPS(SRC2, R(SRC1)); | ||
| 372 | SHUFPS(SRC1, R(SRC1), _MM_SHUFFLE(2, 3, 0, 1)); // XYZW -> ZWXY | ||
| 373 | ADDPS(SRC1, R(SRC2)); | ||
| 374 | |||
| 375 | MOVAPS(SRC2, R(SRC1)); | ||
| 376 | SHUFPS(SRC1, R(SRC1), _MM_SHUFFLE(0, 1, 2, 3)); // XYZW -> WZYX | ||
| 377 | ADDPS(SRC1, R(SRC2)); | ||
| 378 | } | ||
| 379 | |||
| 380 | Compile_DestEnable(instr, SRC1); | ||
| 381 | } | ||
| 382 | |||
| 350 | void JitCompiler::Compile_EX2(Instruction instr) { | 383 | void JitCompiler::Compile_EX2(Instruction instr) { |
| 351 | Compile_SwizzleSrc(instr, 1, instr.common.src1, SRC1); | 384 | Compile_SwizzleSrc(instr, 1, instr.common.src1, SRC1); |
| 352 | MOVSS(XMM0, R(SRC1)); | 385 | MOVSS(XMM0, R(SRC1)); |
diff --git a/src/video_core/shader/shader_jit_x64.h b/src/video_core/shader/shader_jit_x64.h index b2aa5293c..fbe19fe93 100644 --- a/src/video_core/shader/shader_jit_x64.h +++ b/src/video_core/shader/shader_jit_x64.h | |||
| @@ -37,6 +37,7 @@ public: | |||
| 37 | void Compile_ADD(Instruction instr); | 37 | void Compile_ADD(Instruction instr); |
| 38 | void Compile_DP3(Instruction instr); | 38 | void Compile_DP3(Instruction instr); |
| 39 | void Compile_DP4(Instruction instr); | 39 | void Compile_DP4(Instruction instr); |
| 40 | void Compile_DPH(Instruction instr); | ||
| 40 | void Compile_EX2(Instruction instr); | 41 | void Compile_EX2(Instruction instr); |
| 41 | void Compile_LG2(Instruction instr); | 42 | void Compile_LG2(Instruction instr); |
| 42 | void Compile_MUL(Instruction instr); | 43 | void Compile_MUL(Instruction instr); |