diff options
| author | 2016-04-13 23:36:39 -0400 | |
|---|---|---|
| committer | 2016-04-13 23:36:39 -0400 | |
| commit | d89e48679e93f8a6242d9c8d0837053f5aa708d0 (patch) | |
| tree | 7bb6c9fd8e9a659369e92661478925512ac8eddd /src | |
| parent | Merge pull request #1660 from MerryMage/file_util (diff) | |
| parent | shader_jit_x64: Rename RuntimeAssert to Compile_Assert. (diff) | |
| download | yuzu-d89e48679e93f8a6242d9c8d0837053f5aa708d0.tar.gz yuzu-d89e48679e93f8a6242d9c8d0837053f5aa708d0.tar.xz yuzu-d89e48679e93f8a6242d9c8d0837053f5aa708d0.zip | |
Merge pull request #1546 from bunnei/refactor-shader-jit
Shader JIT Part 2
Diffstat (limited to 'src')
| -rw-r--r-- | src/common/x64/emitter.cpp | 28 | ||||
| -rw-r--r-- | src/common/x64/emitter.h | 2 | ||||
| -rw-r--r-- | src/video_core/command_processor.cpp | 4 | ||||
| -rw-r--r-- | src/video_core/shader/shader.cpp | 34 | ||||
| -rw-r--r-- | src/video_core/shader/shader.h | 3 | ||||
| -rw-r--r-- | src/video_core/shader/shader_jit_x64.cpp | 302 | ||||
| -rw-r--r-- | src/video_core/shader/shader_jit_x64.h | 58 |
7 files changed, 270 insertions, 161 deletions
diff --git a/src/common/x64/emitter.cpp b/src/common/x64/emitter.cpp index 1dcf2416c..5662f7f86 100644 --- a/src/common/x64/emitter.cpp +++ b/src/common/x64/emitter.cpp | |||
| @@ -455,6 +455,18 @@ void XEmitter::CALL(const void* fnptr) | |||
| 455 | Write32(u32(distance)); | 455 | Write32(u32(distance)); |
| 456 | } | 456 | } |
| 457 | 457 | ||
| 458 | FixupBranch XEmitter::CALL() | ||
| 459 | { | ||
| 460 | FixupBranch branch; | ||
| 461 | branch.type = 1; | ||
| 462 | branch.ptr = code + 5; | ||
| 463 | |||
| 464 | Write8(0xE8); | ||
| 465 | Write32(0); | ||
| 466 | |||
| 467 | return branch; | ||
| 468 | } | ||
| 469 | |||
| 458 | FixupBranch XEmitter::J(bool force5bytes) | 470 | FixupBranch XEmitter::J(bool force5bytes) |
| 459 | { | 471 | { |
| 460 | FixupBranch branch; | 472 | FixupBranch branch; |
| @@ -531,6 +543,22 @@ void XEmitter::SetJumpTarget(const FixupBranch& branch) | |||
| 531 | } | 543 | } |
| 532 | } | 544 | } |
| 533 | 545 | ||
| 546 | void XEmitter::SetJumpTarget(const FixupBranch& branch, const u8* target) | ||
| 547 | { | ||
| 548 | if (branch.type == 0) | ||
| 549 | { | ||
| 550 | s64 distance = (s64)(target - branch.ptr); | ||
| 551 | ASSERT_MSG(distance >= -0x80 && distance < 0x80, "Jump target too far away, needs force5Bytes = true"); | ||
| 552 | branch.ptr[-1] = (u8)(s8)distance; | ||
| 553 | } | ||
| 554 | else if (branch.type == 1) | ||
| 555 | { | ||
| 556 | s64 distance = (s64)(target - branch.ptr); | ||
| 557 | ASSERT_MSG(distance >= -0x80000000LL && distance < 0x80000000LL, "Jump target too far away, needs indirect register"); | ||
| 558 | ((s32*)branch.ptr)[-1] = (s32)distance; | ||
| 559 | } | ||
| 560 | } | ||
| 561 | |||
| 534 | //Single byte opcodes | 562 | //Single byte opcodes |
| 535 | //There is no PUSHAD/POPAD in 64-bit mode. | 563 | //There is no PUSHAD/POPAD in 64-bit mode. |
| 536 | void XEmitter::INT3() {Write8(0xCC);} | 564 | void XEmitter::INT3() {Write8(0xCC);} |
diff --git a/src/common/x64/emitter.h b/src/common/x64/emitter.h index 7c6548fb5..a33724146 100644 --- a/src/common/x64/emitter.h +++ b/src/common/x64/emitter.h | |||
| @@ -425,12 +425,14 @@ public: | |||
| 425 | #undef CALL | 425 | #undef CALL |
| 426 | #endif | 426 | #endif |
| 427 | void CALL(const void* fnptr); | 427 | void CALL(const void* fnptr); |
| 428 | FixupBranch CALL(); | ||
| 428 | void CALLptr(OpArg arg); | 429 | void CALLptr(OpArg arg); |
| 429 | 430 | ||
| 430 | FixupBranch J_CC(CCFlags conditionCode, bool force5bytes = false); | 431 | FixupBranch J_CC(CCFlags conditionCode, bool force5bytes = false); |
| 431 | void J_CC(CCFlags conditionCode, const u8* addr, bool force5Bytes = false); | 432 | void J_CC(CCFlags conditionCode, const u8* addr, bool force5Bytes = false); |
| 432 | 433 | ||
| 433 | void SetJumpTarget(const FixupBranch& branch); | 434 | void SetJumpTarget(const FixupBranch& branch); |
| 435 | void SetJumpTarget(const FixupBranch& branch, const u8* target); | ||
| 434 | 436 | ||
| 435 | void SETcc(CCFlags flag, OpArg dest); | 437 | void SETcc(CCFlags flag, OpArg dest); |
| 436 | // Note: CMOV brings small if any benefit on current cpus. | 438 | // Note: CMOV brings small if any benefit on current cpus. |
diff --git a/src/video_core/command_processor.cpp b/src/video_core/command_processor.cpp index 08ec2907a..3abe79c09 100644 --- a/src/video_core/command_processor.cpp +++ b/src/video_core/command_processor.cpp | |||
| @@ -140,7 +140,7 @@ static void WritePicaReg(u32 id, u32 value, u32 mask) { | |||
| 140 | immediate_attribute_id = 0; | 140 | immediate_attribute_id = 0; |
| 141 | 141 | ||
| 142 | Shader::UnitState<false> shader_unit; | 142 | Shader::UnitState<false> shader_unit; |
| 143 | Shader::Setup(shader_unit); | 143 | Shader::Setup(); |
| 144 | 144 | ||
| 145 | if (g_debug_context) | 145 | if (g_debug_context) |
| 146 | g_debug_context->OnEvent(DebugContext::Event::VertexLoaded, static_cast<void*>(&immediate_input)); | 146 | g_debug_context->OnEvent(DebugContext::Event::VertexLoaded, static_cast<void*>(&immediate_input)); |
| @@ -300,7 +300,7 @@ static void WritePicaReg(u32 id, u32 value, u32 mask) { | |||
| 300 | vertex_cache_ids.fill(-1); | 300 | vertex_cache_ids.fill(-1); |
| 301 | 301 | ||
| 302 | Shader::UnitState<false> shader_unit; | 302 | Shader::UnitState<false> shader_unit; |
| 303 | Shader::Setup(shader_unit); | 303 | Shader::Setup(); |
| 304 | 304 | ||
| 305 | for (unsigned int index = 0; index < regs.num_vertices; ++index) | 305 | for (unsigned int index = 0; index < regs.num_vertices; ++index) |
| 306 | { | 306 | { |
diff --git a/src/video_core/shader/shader.cpp b/src/video_core/shader/shader.cpp index 78d295c76..75301accd 100644 --- a/src/video_core/shader/shader.cpp +++ b/src/video_core/shader/shader.cpp | |||
| @@ -28,36 +28,24 @@ namespace Pica { | |||
| 28 | namespace Shader { | 28 | namespace Shader { |
| 29 | 29 | ||
| 30 | #ifdef ARCHITECTURE_x86_64 | 30 | #ifdef ARCHITECTURE_x86_64 |
| 31 | static std::unordered_map<u64, CompiledShader*> shader_map; | 31 | static std::unordered_map<u64, std::unique_ptr<JitShader>> shader_map; |
| 32 | static JitCompiler jit; | 32 | static const JitShader* jit_shader; |
| 33 | static CompiledShader* jit_shader; | ||
| 34 | |||
| 35 | static void ClearCache() { | ||
| 36 | shader_map.clear(); | ||
| 37 | jit.Clear(); | ||
| 38 | LOG_INFO(HW_GPU, "Shader JIT cache cleared"); | ||
| 39 | } | ||
| 40 | #endif // ARCHITECTURE_x86_64 | 33 | #endif // ARCHITECTURE_x86_64 |
| 41 | 34 | ||
| 42 | void Setup(UnitState<false>& state) { | 35 | void Setup() { |
| 43 | #ifdef ARCHITECTURE_x86_64 | 36 | #ifdef ARCHITECTURE_x86_64 |
| 44 | if (VideoCore::g_shader_jit_enabled) { | 37 | if (VideoCore::g_shader_jit_enabled) { |
| 45 | u64 cache_key = (Common::ComputeHash64(&g_state.vs.program_code, sizeof(g_state.vs.program_code)) ^ | 38 | u64 cache_key = (Common::ComputeHash64(&g_state.vs.program_code, sizeof(g_state.vs.program_code)) ^ |
| 46 | Common::ComputeHash64(&g_state.vs.swizzle_data, sizeof(g_state.vs.swizzle_data)) ^ | 39 | Common::ComputeHash64(&g_state.vs.swizzle_data, sizeof(g_state.vs.swizzle_data))); |
| 47 | g_state.regs.vs.main_offset); | ||
| 48 | 40 | ||
| 49 | auto iter = shader_map.find(cache_key); | 41 | auto iter = shader_map.find(cache_key); |
| 50 | if (iter != shader_map.end()) { | 42 | if (iter != shader_map.end()) { |
| 51 | jit_shader = iter->second; | 43 | jit_shader = iter->second.get(); |
| 52 | } else { | 44 | } else { |
| 53 | // Check if remaining JIT code space is enough for at least one more (massive) shader | 45 | auto shader = std::make_unique<JitShader>(); |
| 54 | if (jit.GetSpaceLeft() < jit_shader_size) { | 46 | shader->Compile(); |
| 55 | // If not, clear the cache of all previously compiled shaders | 47 | jit_shader = shader.get(); |
| 56 | ClearCache(); | 48 | shader_map[cache_key] = std::move(shader); |
| 57 | } | ||
| 58 | |||
| 59 | jit_shader = jit.Compile(); | ||
| 60 | shader_map.emplace(cache_key, jit_shader); | ||
| 61 | } | 49 | } |
| 62 | } | 50 | } |
| 63 | #endif // ARCHITECTURE_x86_64 | 51 | #endif // ARCHITECTURE_x86_64 |
| @@ -65,7 +53,7 @@ void Setup(UnitState<false>& state) { | |||
| 65 | 53 | ||
| 66 | void Shutdown() { | 54 | void Shutdown() { |
| 67 | #ifdef ARCHITECTURE_x86_64 | 55 | #ifdef ARCHITECTURE_x86_64 |
| 68 | ClearCache(); | 56 | shader_map.clear(); |
| 69 | #endif // ARCHITECTURE_x86_64 | 57 | #endif // ARCHITECTURE_x86_64 |
| 70 | } | 58 | } |
| 71 | 59 | ||
| @@ -109,7 +97,7 @@ OutputVertex Run(UnitState<false>& state, const InputVertex& input, int num_attr | |||
| 109 | 97 | ||
| 110 | #ifdef ARCHITECTURE_x86_64 | 98 | #ifdef ARCHITECTURE_x86_64 |
| 111 | if (VideoCore::g_shader_jit_enabled) | 99 | if (VideoCore::g_shader_jit_enabled) |
| 112 | jit_shader(&state.registers); | 100 | jit_shader->Run(&state.registers, g_state.regs.vs.main_offset); |
| 113 | else | 101 | else |
| 114 | RunInterpreter(state); | 102 | RunInterpreter(state); |
| 115 | #else | 103 | #else |
diff --git a/src/video_core/shader/shader.h b/src/video_core/shader/shader.h index 7af8f1fa1..9c5bd97bd 100644 --- a/src/video_core/shader/shader.h +++ b/src/video_core/shader/shader.h | |||
| @@ -339,9 +339,8 @@ struct UnitState { | |||
| 339 | /** | 339 | /** |
| 340 | * Performs any shader unit setup that only needs to happen once per shader (as opposed to once per | 340 | * Performs any shader unit setup that only needs to happen once per shader (as opposed to once per |
| 341 | * vertex, which would happen within the `Run` function). | 341 | * vertex, which would happen within the `Run` function). |
| 342 | * @param state Shader unit state, must be setup per shader and per shader unit | ||
| 343 | */ | 342 | */ |
| 344 | void Setup(UnitState<false>& state); | 343 | void Setup(); |
| 345 | 344 | ||
| 346 | /// Performs any cleanup when the emulator is shutdown | 345 | /// Performs any cleanup when the emulator is shutdown |
| 347 | void Shutdown(); | 346 | void Shutdown(); |
diff --git a/src/video_core/shader/shader_jit_x64.cpp b/src/video_core/shader/shader_jit_x64.cpp index dffe051ef..b47d3beda 100644 --- a/src/video_core/shader/shader_jit_x64.cpp +++ b/src/video_core/shader/shader_jit_x64.cpp | |||
| @@ -2,6 +2,7 @@ | |||
| 2 | // Licensed under GPLv2 or any later version | 2 | // Licensed under GPLv2 or any later version |
| 3 | // Refer to the license.txt file included. | 3 | // Refer to the license.txt file included. |
| 4 | 4 | ||
| 5 | #include <algorithm> | ||
| 5 | #include <smmintrin.h> | 6 | #include <smmintrin.h> |
| 6 | 7 | ||
| 7 | #include "common/x64/abi.h" | 8 | #include "common/x64/abi.h" |
| @@ -19,73 +20,73 @@ namespace Shader { | |||
| 19 | 20 | ||
| 20 | using namespace Gen; | 21 | using namespace Gen; |
| 21 | 22 | ||
| 22 | typedef void (JitCompiler::*JitFunction)(Instruction instr); | 23 | typedef void (JitShader::*JitFunction)(Instruction instr); |
| 23 | 24 | ||
| 24 | const JitFunction instr_table[64] = { | 25 | const JitFunction instr_table[64] = { |
| 25 | &JitCompiler::Compile_ADD, // add | 26 | &JitShader::Compile_ADD, // add |
| 26 | &JitCompiler::Compile_DP3, // dp3 | 27 | &JitShader::Compile_DP3, // dp3 |
| 27 | &JitCompiler::Compile_DP4, // dp4 | 28 | &JitShader::Compile_DP4, // dp4 |
| 28 | &JitCompiler::Compile_DPH, // dph | 29 | &JitShader::Compile_DPH, // dph |
| 29 | nullptr, // unknown | 30 | nullptr, // unknown |
| 30 | &JitCompiler::Compile_EX2, // ex2 | 31 | &JitShader::Compile_EX2, // ex2 |
| 31 | &JitCompiler::Compile_LG2, // lg2 | 32 | &JitShader::Compile_LG2, // lg2 |
| 32 | nullptr, // unknown | 33 | nullptr, // unknown |
| 33 | &JitCompiler::Compile_MUL, // mul | 34 | &JitShader::Compile_MUL, // mul |
| 34 | &JitCompiler::Compile_SGE, // sge | 35 | &JitShader::Compile_SGE, // sge |
| 35 | &JitCompiler::Compile_SLT, // slt | 36 | &JitShader::Compile_SLT, // slt |
| 36 | &JitCompiler::Compile_FLR, // flr | 37 | &JitShader::Compile_FLR, // flr |
| 37 | &JitCompiler::Compile_MAX, // max | 38 | &JitShader::Compile_MAX, // max |
| 38 | &JitCompiler::Compile_MIN, // min | 39 | &JitShader::Compile_MIN, // min |
| 39 | &JitCompiler::Compile_RCP, // rcp | 40 | &JitShader::Compile_RCP, // rcp |
| 40 | &JitCompiler::Compile_RSQ, // rsq | 41 | &JitShader::Compile_RSQ, // rsq |
| 41 | nullptr, // unknown | 42 | nullptr, // unknown |
| 42 | nullptr, // unknown | 43 | nullptr, // unknown |
| 43 | &JitCompiler::Compile_MOVA, // mova | 44 | &JitShader::Compile_MOVA, // mova |
| 44 | &JitCompiler::Compile_MOV, // mov | 45 | &JitShader::Compile_MOV, // mov |
| 45 | nullptr, // unknown | 46 | nullptr, // unknown |
| 46 | nullptr, // unknown | 47 | nullptr, // unknown |
| 47 | nullptr, // unknown | 48 | nullptr, // unknown |
| 48 | nullptr, // unknown | 49 | nullptr, // unknown |
| 49 | &JitCompiler::Compile_DPH, // dphi | 50 | &JitShader::Compile_DPH, // dphi |
| 50 | nullptr, // unknown | 51 | nullptr, // unknown |
| 51 | &JitCompiler::Compile_SGE, // sgei | 52 | &JitShader::Compile_SGE, // sgei |
| 52 | &JitCompiler::Compile_SLT, // slti | 53 | &JitShader::Compile_SLT, // slti |
| 53 | nullptr, // unknown | 54 | nullptr, // unknown |
| 54 | nullptr, // unknown | 55 | nullptr, // unknown |
| 55 | nullptr, // unknown | 56 | nullptr, // unknown |
| 56 | nullptr, // unknown | 57 | nullptr, // unknown |
| 57 | nullptr, // unknown | 58 | nullptr, // unknown |
| 58 | &JitCompiler::Compile_NOP, // nop | 59 | &JitShader::Compile_NOP, // nop |
| 59 | &JitCompiler::Compile_END, // end | 60 | &JitShader::Compile_END, // end |
| 60 | nullptr, // break | 61 | nullptr, // break |
| 61 | &JitCompiler::Compile_CALL, // call | 62 | &JitShader::Compile_CALL, // call |
| 62 | &JitCompiler::Compile_CALLC, // callc | 63 | &JitShader::Compile_CALLC, // callc |
| 63 | &JitCompiler::Compile_CALLU, // callu | 64 | &JitShader::Compile_CALLU, // callu |
| 64 | &JitCompiler::Compile_IF, // ifu | 65 | &JitShader::Compile_IF, // ifu |
| 65 | &JitCompiler::Compile_IF, // ifc | 66 | &JitShader::Compile_IF, // ifc |
| 66 | &JitCompiler::Compile_LOOP, // loop | 67 | &JitShader::Compile_LOOP, // loop |
| 67 | nullptr, // emit | 68 | nullptr, // emit |
| 68 | nullptr, // sete | 69 | nullptr, // sete |
| 69 | &JitCompiler::Compile_JMP, // jmpc | 70 | &JitShader::Compile_JMP, // jmpc |
| 70 | &JitCompiler::Compile_JMP, // jmpu | 71 | &JitShader::Compile_JMP, // jmpu |
| 71 | &JitCompiler::Compile_CMP, // cmp | 72 | &JitShader::Compile_CMP, // cmp |
| 72 | &JitCompiler::Compile_CMP, // cmp | 73 | &JitShader::Compile_CMP, // cmp |
| 73 | &JitCompiler::Compile_MAD, // madi | 74 | &JitShader::Compile_MAD, // madi |
| 74 | &JitCompiler::Compile_MAD, // madi | 75 | &JitShader::Compile_MAD, // madi |
| 75 | &JitCompiler::Compile_MAD, // madi | 76 | &JitShader::Compile_MAD, // madi |
| 76 | &JitCompiler::Compile_MAD, // madi | 77 | &JitShader::Compile_MAD, // madi |
| 77 | &JitCompiler::Compile_MAD, // madi | 78 | &JitShader::Compile_MAD, // madi |
| 78 | &JitCompiler::Compile_MAD, // madi | 79 | &JitShader::Compile_MAD, // madi |
| 79 | &JitCompiler::Compile_MAD, // madi | 80 | &JitShader::Compile_MAD, // madi |
| 80 | &JitCompiler::Compile_MAD, // madi | 81 | &JitShader::Compile_MAD, // madi |
| 81 | &JitCompiler::Compile_MAD, // mad | 82 | &JitShader::Compile_MAD, // mad |
| 82 | &JitCompiler::Compile_MAD, // mad | 83 | &JitShader::Compile_MAD, // mad |
| 83 | &JitCompiler::Compile_MAD, // mad | 84 | &JitShader::Compile_MAD, // mad |
| 84 | &JitCompiler::Compile_MAD, // mad | 85 | &JitShader::Compile_MAD, // mad |
| 85 | &JitCompiler::Compile_MAD, // mad | 86 | &JitShader::Compile_MAD, // mad |
| 86 | &JitCompiler::Compile_MAD, // mad | 87 | &JitShader::Compile_MAD, // mad |
| 87 | &JitCompiler::Compile_MAD, // mad | 88 | &JitShader::Compile_MAD, // mad |
| 88 | &JitCompiler::Compile_MAD, // mad | 89 | &JitShader::Compile_MAD, // mad |
| 89 | }; | 90 | }; |
| 90 | 91 | ||
| 91 | // The following is used to alias some commonly used registers. Generally, RAX-RDX and XMM0-XMM3 can | 92 | // The following is used to alias some commonly used registers. Generally, RAX-RDX and XMM0-XMM3 can |
| @@ -138,13 +139,32 @@ static const u8 NO_SRC_REG_SWIZZLE = 0x1b; | |||
| 138 | static const u8 NO_DEST_REG_MASK = 0xf; | 139 | static const u8 NO_DEST_REG_MASK = 0xf; |
| 139 | 140 | ||
| 140 | /** | 141 | /** |
| 142 | * Get the vertex shader instruction for a given offset in the current shader program | ||
| 143 | * @param offset Offset in the current shader program of the instruction | ||
| 144 | * @return Instruction at the specified offset | ||
| 145 | */ | ||
| 146 | static Instruction GetVertexShaderInstruction(size_t offset) { | ||
| 147 | return { g_state.vs.program_code[offset] }; | ||
| 148 | } | ||
| 149 | |||
| 150 | static void LogCritical(const char* msg) { | ||
| 151 | LOG_CRITICAL(HW_GPU, msg); | ||
| 152 | } | ||
| 153 | |||
| 154 | void JitShader::Compile_Assert(bool condition, const char* msg) { | ||
| 155 | if (!condition) { | ||
| 156 | ABI_CallFunctionP(reinterpret_cast<const void*>(LogCritical), const_cast<char*>(msg)); | ||
| 157 | } | ||
| 158 | } | ||
| 159 | |||
| 160 | /** | ||
| 141 | * Loads and swizzles a source register into the specified XMM register. | 161 | * Loads and swizzles a source register into the specified XMM register. |
| 142 | * @param instr VS instruction, used for determining how to load the source register | 162 | * @param instr VS instruction, used for determining how to load the source register |
| 143 | * @param src_num Number indicating which source register to load (1 = src1, 2 = src2, 3 = src3) | 163 | * @param src_num Number indicating which source register to load (1 = src1, 2 = src2, 3 = src3) |
| 144 | * @param src_reg SourceRegister object corresponding to the source register to load | 164 | * @param src_reg SourceRegister object corresponding to the source register to load |
| 145 | * @param dest Destination XMM register to store the loaded, swizzled source register | 165 | * @param dest Destination XMM register to store the loaded, swizzled source register |
| 146 | */ | 166 | */ |
| 147 | void JitCompiler::Compile_SwizzleSrc(Instruction instr, unsigned src_num, SourceRegister src_reg, X64Reg dest) { | 167 | void JitShader::Compile_SwizzleSrc(Instruction instr, unsigned src_num, SourceRegister src_reg, X64Reg dest) { |
| 148 | X64Reg src_ptr; | 168 | X64Reg src_ptr; |
| 149 | size_t src_offset; | 169 | size_t src_offset; |
| 150 | 170 | ||
| @@ -216,7 +236,7 @@ void JitCompiler::Compile_SwizzleSrc(Instruction instr, unsigned src_num, Source | |||
| 216 | } | 236 | } |
| 217 | } | 237 | } |
| 218 | 238 | ||
| 219 | void JitCompiler::Compile_DestEnable(Instruction instr,X64Reg src) { | 239 | void JitShader::Compile_DestEnable(Instruction instr,X64Reg src) { |
| 220 | DestRegister dest; | 240 | DestRegister dest; |
| 221 | unsigned operand_desc_id; | 241 | unsigned operand_desc_id; |
| 222 | if (instr.opcode.Value().EffectiveOpCode() == OpCode::Id::MAD || | 242 | if (instr.opcode.Value().EffectiveOpCode() == OpCode::Id::MAD || |
| @@ -263,7 +283,7 @@ void JitCompiler::Compile_DestEnable(Instruction instr,X64Reg src) { | |||
| 263 | } | 283 | } |
| 264 | } | 284 | } |
| 265 | 285 | ||
| 266 | void JitCompiler::Compile_SanitizedMul(Gen::X64Reg src1, Gen::X64Reg src2, Gen::X64Reg scratch) { | 286 | void JitShader::Compile_SanitizedMul(Gen::X64Reg src1, Gen::X64Reg src2, Gen::X64Reg scratch) { |
| 267 | MOVAPS(scratch, R(src1)); | 287 | MOVAPS(scratch, R(src1)); |
| 268 | CMPPS(scratch, R(src2), CMP_ORD); | 288 | CMPPS(scratch, R(src2), CMP_ORD); |
| 269 | 289 | ||
| @@ -276,7 +296,7 @@ void JitCompiler::Compile_SanitizedMul(Gen::X64Reg src1, Gen::X64Reg src2, Gen:: | |||
| 276 | ANDPS(src1, R(scratch)); | 296 | ANDPS(src1, R(scratch)); |
| 277 | } | 297 | } |
| 278 | 298 | ||
| 279 | void JitCompiler::Compile_EvaluateCondition(Instruction instr) { | 299 | void JitShader::Compile_EvaluateCondition(Instruction instr) { |
| 280 | // Note: NXOR is used below to check for equality | 300 | // Note: NXOR is used below to check for equality |
| 281 | switch (instr.flow_control.op) { | 301 | switch (instr.flow_control.op) { |
| 282 | case Instruction::FlowControlType::Or: | 302 | case Instruction::FlowControlType::Or: |
| @@ -307,23 +327,23 @@ void JitCompiler::Compile_EvaluateCondition(Instruction instr) { | |||
| 307 | } | 327 | } |
| 308 | } | 328 | } |
| 309 | 329 | ||
| 310 | void JitCompiler::Compile_UniformCondition(Instruction instr) { | 330 | void JitShader::Compile_UniformCondition(Instruction instr) { |
| 311 | int offset = offsetof(decltype(g_state.vs.uniforms), b) + (instr.flow_control.bool_uniform_id * sizeof(bool)); | 331 | int offset = offsetof(decltype(g_state.vs.uniforms), b) + (instr.flow_control.bool_uniform_id * sizeof(bool)); |
| 312 | CMP(sizeof(bool) * 8, MDisp(UNIFORMS, offset), Imm8(0)); | 332 | CMP(sizeof(bool) * 8, MDisp(UNIFORMS, offset), Imm8(0)); |
| 313 | } | 333 | } |
| 314 | 334 | ||
| 315 | BitSet32 JitCompiler::PersistentCallerSavedRegs() { | 335 | BitSet32 JitShader::PersistentCallerSavedRegs() { |
| 316 | return persistent_regs & ABI_ALL_CALLER_SAVED; | 336 | return persistent_regs & ABI_ALL_CALLER_SAVED; |
| 317 | } | 337 | } |
| 318 | 338 | ||
| 319 | void JitCompiler::Compile_ADD(Instruction instr) { | 339 | void JitShader::Compile_ADD(Instruction instr) { |
| 320 | Compile_SwizzleSrc(instr, 1, instr.common.src1, SRC1); | 340 | Compile_SwizzleSrc(instr, 1, instr.common.src1, SRC1); |
| 321 | Compile_SwizzleSrc(instr, 2, instr.common.src2, SRC2); | 341 | Compile_SwizzleSrc(instr, 2, instr.common.src2, SRC2); |
| 322 | ADDPS(SRC1, R(SRC2)); | 342 | ADDPS(SRC1, R(SRC2)); |
| 323 | Compile_DestEnable(instr, SRC1); | 343 | Compile_DestEnable(instr, SRC1); |
| 324 | } | 344 | } |
| 325 | 345 | ||
| 326 | void JitCompiler::Compile_DP3(Instruction instr) { | 346 | void JitShader::Compile_DP3(Instruction instr) { |
| 327 | Compile_SwizzleSrc(instr, 1, instr.common.src1, SRC1); | 347 | Compile_SwizzleSrc(instr, 1, instr.common.src1, SRC1); |
| 328 | Compile_SwizzleSrc(instr, 2, instr.common.src2, SRC2); | 348 | Compile_SwizzleSrc(instr, 2, instr.common.src2, SRC2); |
| 329 | 349 | ||
| @@ -342,7 +362,7 @@ void JitCompiler::Compile_DP3(Instruction instr) { | |||
| 342 | Compile_DestEnable(instr, SRC1); | 362 | Compile_DestEnable(instr, SRC1); |
| 343 | } | 363 | } |
| 344 | 364 | ||
| 345 | void JitCompiler::Compile_DP4(Instruction instr) { | 365 | void JitShader::Compile_DP4(Instruction instr) { |
| 346 | Compile_SwizzleSrc(instr, 1, instr.common.src1, SRC1); | 366 | Compile_SwizzleSrc(instr, 1, instr.common.src1, SRC1); |
| 347 | Compile_SwizzleSrc(instr, 2, instr.common.src2, SRC2); | 367 | Compile_SwizzleSrc(instr, 2, instr.common.src2, SRC2); |
| 348 | 368 | ||
| @@ -359,7 +379,7 @@ void JitCompiler::Compile_DP4(Instruction instr) { | |||
| 359 | Compile_DestEnable(instr, SRC1); | 379 | Compile_DestEnable(instr, SRC1); |
| 360 | } | 380 | } |
| 361 | 381 | ||
| 362 | void JitCompiler::Compile_DPH(Instruction instr) { | 382 | void JitShader::Compile_DPH(Instruction instr) { |
| 363 | if (instr.opcode.Value().EffectiveOpCode() == OpCode::Id::DPHI) { | 383 | if (instr.opcode.Value().EffectiveOpCode() == OpCode::Id::DPHI) { |
| 364 | Compile_SwizzleSrc(instr, 1, instr.common.src1i, SRC1); | 384 | Compile_SwizzleSrc(instr, 1, instr.common.src1i, SRC1); |
| 365 | Compile_SwizzleSrc(instr, 2, instr.common.src2i, SRC2); | 385 | Compile_SwizzleSrc(instr, 2, instr.common.src2i, SRC2); |
| @@ -391,7 +411,7 @@ void JitCompiler::Compile_DPH(Instruction instr) { | |||
| 391 | Compile_DestEnable(instr, SRC1); | 411 | Compile_DestEnable(instr, SRC1); |
| 392 | } | 412 | } |
| 393 | 413 | ||
| 394 | void JitCompiler::Compile_EX2(Instruction instr) { | 414 | void JitShader::Compile_EX2(Instruction instr) { |
| 395 | Compile_SwizzleSrc(instr, 1, instr.common.src1, SRC1); | 415 | Compile_SwizzleSrc(instr, 1, instr.common.src1, SRC1); |
| 396 | MOVSS(XMM0, R(SRC1)); | 416 | MOVSS(XMM0, R(SRC1)); |
| 397 | 417 | ||
| @@ -404,7 +424,7 @@ void JitCompiler::Compile_EX2(Instruction instr) { | |||
| 404 | Compile_DestEnable(instr, SRC1); | 424 | Compile_DestEnable(instr, SRC1); |
| 405 | } | 425 | } |
| 406 | 426 | ||
| 407 | void JitCompiler::Compile_LG2(Instruction instr) { | 427 | void JitShader::Compile_LG2(Instruction instr) { |
| 408 | Compile_SwizzleSrc(instr, 1, instr.common.src1, SRC1); | 428 | Compile_SwizzleSrc(instr, 1, instr.common.src1, SRC1); |
| 409 | MOVSS(XMM0, R(SRC1)); | 429 | MOVSS(XMM0, R(SRC1)); |
| 410 | 430 | ||
| @@ -417,14 +437,14 @@ void JitCompiler::Compile_LG2(Instruction instr) { | |||
| 417 | Compile_DestEnable(instr, SRC1); | 437 | Compile_DestEnable(instr, SRC1); |
| 418 | } | 438 | } |
| 419 | 439 | ||
| 420 | void JitCompiler::Compile_MUL(Instruction instr) { | 440 | void JitShader::Compile_MUL(Instruction instr) { |
| 421 | Compile_SwizzleSrc(instr, 1, instr.common.src1, SRC1); | 441 | Compile_SwizzleSrc(instr, 1, instr.common.src1, SRC1); |
| 422 | Compile_SwizzleSrc(instr, 2, instr.common.src2, SRC2); | 442 | Compile_SwizzleSrc(instr, 2, instr.common.src2, SRC2); |
| 423 | Compile_SanitizedMul(SRC1, SRC2, SCRATCH); | 443 | Compile_SanitizedMul(SRC1, SRC2, SCRATCH); |
| 424 | Compile_DestEnable(instr, SRC1); | 444 | Compile_DestEnable(instr, SRC1); |
| 425 | } | 445 | } |
| 426 | 446 | ||
| 427 | void JitCompiler::Compile_SGE(Instruction instr) { | 447 | void JitShader::Compile_SGE(Instruction instr) { |
| 428 | if (instr.opcode.Value().EffectiveOpCode() == OpCode::Id::SGEI) { | 448 | if (instr.opcode.Value().EffectiveOpCode() == OpCode::Id::SGEI) { |
| 429 | Compile_SwizzleSrc(instr, 1, instr.common.src1i, SRC1); | 449 | Compile_SwizzleSrc(instr, 1, instr.common.src1i, SRC1); |
| 430 | Compile_SwizzleSrc(instr, 2, instr.common.src2i, SRC2); | 450 | Compile_SwizzleSrc(instr, 2, instr.common.src2i, SRC2); |
| @@ -439,7 +459,7 @@ void JitCompiler::Compile_SGE(Instruction instr) { | |||
| 439 | Compile_DestEnable(instr, SRC2); | 459 | Compile_DestEnable(instr, SRC2); |
| 440 | } | 460 | } |
| 441 | 461 | ||
| 442 | void JitCompiler::Compile_SLT(Instruction instr) { | 462 | void JitShader::Compile_SLT(Instruction instr) { |
| 443 | if (instr.opcode.Value().EffectiveOpCode() == OpCode::Id::SLTI) { | 463 | if (instr.opcode.Value().EffectiveOpCode() == OpCode::Id::SLTI) { |
| 444 | Compile_SwizzleSrc(instr, 1, instr.common.src1i, SRC1); | 464 | Compile_SwizzleSrc(instr, 1, instr.common.src1i, SRC1); |
| 445 | Compile_SwizzleSrc(instr, 2, instr.common.src2i, SRC2); | 465 | Compile_SwizzleSrc(instr, 2, instr.common.src2i, SRC2); |
| @@ -454,7 +474,7 @@ void JitCompiler::Compile_SLT(Instruction instr) { | |||
| 454 | Compile_DestEnable(instr, SRC1); | 474 | Compile_DestEnable(instr, SRC1); |
| 455 | } | 475 | } |
| 456 | 476 | ||
| 457 | void JitCompiler::Compile_FLR(Instruction instr) { | 477 | void JitShader::Compile_FLR(Instruction instr) { |
| 458 | Compile_SwizzleSrc(instr, 1, instr.common.src1, SRC1); | 478 | Compile_SwizzleSrc(instr, 1, instr.common.src1, SRC1); |
| 459 | 479 | ||
| 460 | if (Common::GetCPUCaps().sse4_1) { | 480 | if (Common::GetCPUCaps().sse4_1) { |
| @@ -467,7 +487,7 @@ void JitCompiler::Compile_FLR(Instruction instr) { | |||
| 467 | Compile_DestEnable(instr, SRC1); | 487 | Compile_DestEnable(instr, SRC1); |
| 468 | } | 488 | } |
| 469 | 489 | ||
| 470 | void JitCompiler::Compile_MAX(Instruction instr) { | 490 | void JitShader::Compile_MAX(Instruction instr) { |
| 471 | Compile_SwizzleSrc(instr, 1, instr.common.src1, SRC1); | 491 | Compile_SwizzleSrc(instr, 1, instr.common.src1, SRC1); |
| 472 | Compile_SwizzleSrc(instr, 2, instr.common.src2, SRC2); | 492 | Compile_SwizzleSrc(instr, 2, instr.common.src2, SRC2); |
| 473 | // SSE semantics match PICA200 ones: In case of NaN, SRC2 is returned. | 493 | // SSE semantics match PICA200 ones: In case of NaN, SRC2 is returned. |
| @@ -475,7 +495,7 @@ void JitCompiler::Compile_MAX(Instruction instr) { | |||
| 475 | Compile_DestEnable(instr, SRC1); | 495 | Compile_DestEnable(instr, SRC1); |
| 476 | } | 496 | } |
| 477 | 497 | ||
| 478 | void JitCompiler::Compile_MIN(Instruction instr) { | 498 | void JitShader::Compile_MIN(Instruction instr) { |
| 479 | Compile_SwizzleSrc(instr, 1, instr.common.src1, SRC1); | 499 | Compile_SwizzleSrc(instr, 1, instr.common.src1, SRC1); |
| 480 | Compile_SwizzleSrc(instr, 2, instr.common.src2, SRC2); | 500 | Compile_SwizzleSrc(instr, 2, instr.common.src2, SRC2); |
| 481 | // SSE semantics match PICA200 ones: In case of NaN, SRC2 is returned. | 501 | // SSE semantics match PICA200 ones: In case of NaN, SRC2 is returned. |
| @@ -483,7 +503,7 @@ void JitCompiler::Compile_MIN(Instruction instr) { | |||
| 483 | Compile_DestEnable(instr, SRC1); | 503 | Compile_DestEnable(instr, SRC1); |
| 484 | } | 504 | } |
| 485 | 505 | ||
| 486 | void JitCompiler::Compile_MOVA(Instruction instr) { | 506 | void JitShader::Compile_MOVA(Instruction instr) { |
| 487 | SwizzlePattern swiz = { g_state.vs.swizzle_data[instr.common.operand_desc_id] }; | 507 | SwizzlePattern swiz = { g_state.vs.swizzle_data[instr.common.operand_desc_id] }; |
| 488 | 508 | ||
| 489 | if (!swiz.DestComponentEnabled(0) && !swiz.DestComponentEnabled(1)) { | 509 | if (!swiz.DestComponentEnabled(0) && !swiz.DestComponentEnabled(1)) { |
| @@ -528,12 +548,12 @@ void JitCompiler::Compile_MOVA(Instruction instr) { | |||
| 528 | } | 548 | } |
| 529 | } | 549 | } |
| 530 | 550 | ||
| 531 | void JitCompiler::Compile_MOV(Instruction instr) { | 551 | void JitShader::Compile_MOV(Instruction instr) { |
| 532 | Compile_SwizzleSrc(instr, 1, instr.common.src1, SRC1); | 552 | Compile_SwizzleSrc(instr, 1, instr.common.src1, SRC1); |
| 533 | Compile_DestEnable(instr, SRC1); | 553 | Compile_DestEnable(instr, SRC1); |
| 534 | } | 554 | } |
| 535 | 555 | ||
| 536 | void JitCompiler::Compile_RCP(Instruction instr) { | 556 | void JitShader::Compile_RCP(Instruction instr) { |
| 537 | Compile_SwizzleSrc(instr, 1, instr.common.src1, SRC1); | 557 | Compile_SwizzleSrc(instr, 1, instr.common.src1, SRC1); |
| 538 | 558 | ||
| 539 | // TODO(bunnei): RCPSS is a pretty rough approximation, this might cause problems if Pica | 559 | // TODO(bunnei): RCPSS is a pretty rough approximation, this might cause problems if Pica |
| @@ -544,7 +564,7 @@ void JitCompiler::Compile_RCP(Instruction instr) { | |||
| 544 | Compile_DestEnable(instr, SRC1); | 564 | Compile_DestEnable(instr, SRC1); |
| 545 | } | 565 | } |
| 546 | 566 | ||
| 547 | void JitCompiler::Compile_RSQ(Instruction instr) { | 567 | void JitShader::Compile_RSQ(Instruction instr) { |
| 548 | Compile_SwizzleSrc(instr, 1, instr.common.src1, SRC1); | 568 | Compile_SwizzleSrc(instr, 1, instr.common.src1, SRC1); |
| 549 | 569 | ||
| 550 | // TODO(bunnei): RSQRTSS is a pretty rough approximation, this might cause problems if Pica | 570 | // TODO(bunnei): RSQRTSS is a pretty rough approximation, this might cause problems if Pica |
| @@ -555,36 +575,41 @@ void JitCompiler::Compile_RSQ(Instruction instr) { | |||
| 555 | Compile_DestEnable(instr, SRC1); | 575 | Compile_DestEnable(instr, SRC1); |
| 556 | } | 576 | } |
| 557 | 577 | ||
| 558 | void JitCompiler::Compile_NOP(Instruction instr) { | 578 | void JitShader::Compile_NOP(Instruction instr) { |
| 559 | } | 579 | } |
| 560 | 580 | ||
| 561 | void JitCompiler::Compile_END(Instruction instr) { | 581 | void JitShader::Compile_END(Instruction instr) { |
| 562 | ABI_PopRegistersAndAdjustStack(ABI_ALL_CALLEE_SAVED, 8); | 582 | ABI_PopRegistersAndAdjustStack(ABI_ALL_CALLEE_SAVED, 8); |
| 563 | RET(); | 583 | RET(); |
| 564 | } | 584 | } |
| 565 | 585 | ||
| 566 | void JitCompiler::Compile_CALL(Instruction instr) { | 586 | void JitShader::Compile_CALL(Instruction instr) { |
| 567 | unsigned offset = instr.flow_control.dest_offset; | 587 | // Push offset of the return |
| 568 | while (offset < (instr.flow_control.dest_offset + instr.flow_control.num_instructions)) { | 588 | PUSH(64, Imm32(instr.flow_control.dest_offset + instr.flow_control.num_instructions)); |
| 569 | Compile_NextInstr(&offset); | 589 | |
| 570 | } | 590 | // Call the subroutine |
| 591 | FixupBranch b = CALL(); | ||
| 592 | fixup_branches.push_back({ b, instr.flow_control.dest_offset }); | ||
| 593 | |||
| 594 | // Skip over the return offset that's on the stack | ||
| 595 | ADD(64, R(RSP), Imm32(8)); | ||
| 571 | } | 596 | } |
| 572 | 597 | ||
| 573 | void JitCompiler::Compile_CALLC(Instruction instr) { | 598 | void JitShader::Compile_CALLC(Instruction instr) { |
| 574 | Compile_EvaluateCondition(instr); | 599 | Compile_EvaluateCondition(instr); |
| 575 | FixupBranch b = J_CC(CC_Z, true); | 600 | FixupBranch b = J_CC(CC_Z, true); |
| 576 | Compile_CALL(instr); | 601 | Compile_CALL(instr); |
| 577 | SetJumpTarget(b); | 602 | SetJumpTarget(b); |
| 578 | } | 603 | } |
| 579 | 604 | ||
| 580 | void JitCompiler::Compile_CALLU(Instruction instr) { | 605 | void JitShader::Compile_CALLU(Instruction instr) { |
| 581 | Compile_UniformCondition(instr); | 606 | Compile_UniformCondition(instr); |
| 582 | FixupBranch b = J_CC(CC_Z, true); | 607 | FixupBranch b = J_CC(CC_Z, true); |
| 583 | Compile_CALL(instr); | 608 | Compile_CALL(instr); |
| 584 | SetJumpTarget(b); | 609 | SetJumpTarget(b); |
| 585 | } | 610 | } |
| 586 | 611 | ||
| 587 | void JitCompiler::Compile_CMP(Instruction instr) { | 612 | void JitShader::Compile_CMP(Instruction instr) { |
| 588 | using Op = Instruction::Common::CompareOpType::Op; | 613 | using Op = Instruction::Common::CompareOpType::Op; |
| 589 | Op op_x = instr.common.compare_op.x; | 614 | Op op_x = instr.common.compare_op.x; |
| 590 | Op op_y = instr.common.compare_op.y; | 615 | Op op_y = instr.common.compare_op.y; |
| @@ -627,7 +652,7 @@ void JitCompiler::Compile_CMP(Instruction instr) { | |||
| 627 | SHR(64, R(COND1), Imm8(63)); | 652 | SHR(64, R(COND1), Imm8(63)); |
| 628 | } | 653 | } |
| 629 | 654 | ||
| 630 | void JitCompiler::Compile_MAD(Instruction instr) { | 655 | void JitShader::Compile_MAD(Instruction instr) { |
| 631 | Compile_SwizzleSrc(instr, 1, instr.mad.src1, SRC1); | 656 | Compile_SwizzleSrc(instr, 1, instr.mad.src1, SRC1); |
| 632 | 657 | ||
| 633 | if (instr.opcode.Value().EffectiveOpCode() == OpCode::Id::MADI) { | 658 | if (instr.opcode.Value().EffectiveOpCode() == OpCode::Id::MADI) { |
| @@ -644,9 +669,8 @@ void JitCompiler::Compile_MAD(Instruction instr) { | |||
| 644 | Compile_DestEnable(instr, SRC1); | 669 | Compile_DestEnable(instr, SRC1); |
| 645 | } | 670 | } |
| 646 | 671 | ||
| 647 | void JitCompiler::Compile_IF(Instruction instr) { | 672 | void JitShader::Compile_IF(Instruction instr) { |
| 648 | ASSERT_MSG(instr.flow_control.dest_offset > *offset_ptr, "Backwards if-statements (%d -> %d) not supported", | 673 | Compile_Assert(instr.flow_control.dest_offset >= program_counter, "Backwards if-statements not supported"); |
| 649 | *offset_ptr, instr.flow_control.dest_offset.Value()); | ||
| 650 | 674 | ||
| 651 | // Evaluate the "IF" condition | 675 | // Evaluate the "IF" condition |
| 652 | if (instr.opcode.Value() == OpCode::Id::IFU) { | 676 | if (instr.opcode.Value() == OpCode::Id::IFU) { |
| @@ -676,10 +700,9 @@ void JitCompiler::Compile_IF(Instruction instr) { | |||
| 676 | SetJumpTarget(b2); | 700 | SetJumpTarget(b2); |
| 677 | } | 701 | } |
| 678 | 702 | ||
| 679 | void JitCompiler::Compile_LOOP(Instruction instr) { | 703 | void JitShader::Compile_LOOP(Instruction instr) { |
| 680 | ASSERT_MSG(instr.flow_control.dest_offset > *offset_ptr, "Backwards loops (%d -> %d) not supported", | 704 | Compile_Assert(instr.flow_control.dest_offset >= program_counter, "Backwards loops not supported"); |
| 681 | *offset_ptr, instr.flow_control.dest_offset.Value()); | 705 | Compile_Assert(!looping, "Nested loops not supported"); |
| 682 | ASSERT_MSG(!looping, "Nested loops not supported"); | ||
| 683 | 706 | ||
| 684 | looping = true; | 707 | looping = true; |
| 685 | 708 | ||
| @@ -705,10 +728,7 @@ void JitCompiler::Compile_LOOP(Instruction instr) { | |||
| 705 | looping = false; | 728 | looping = false; |
| 706 | } | 729 | } |
| 707 | 730 | ||
| 708 | void JitCompiler::Compile_JMP(Instruction instr) { | 731 | void JitShader::Compile_JMP(Instruction instr) { |
| 709 | ASSERT_MSG(instr.flow_control.dest_offset > *offset_ptr, "Backwards jumps (%d -> %d) not supported", | ||
| 710 | *offset_ptr, instr.flow_control.dest_offset.Value()); | ||
| 711 | |||
| 712 | if (instr.opcode.Value() == OpCode::Id::JMPC) | 732 | if (instr.opcode.Value() == OpCode::Id::JMPC) |
| 713 | Compile_EvaluateCondition(instr); | 733 | Compile_EvaluateCondition(instr); |
| 714 | else if (instr.opcode.Value() == OpCode::Id::JMPU) | 734 | else if (instr.opcode.Value() == OpCode::Id::JMPU) |
| @@ -718,30 +738,38 @@ void JitCompiler::Compile_JMP(Instruction instr) { | |||
| 718 | 738 | ||
| 719 | bool inverted_condition = (instr.opcode.Value() == OpCode::Id::JMPU) && | 739 | bool inverted_condition = (instr.opcode.Value() == OpCode::Id::JMPU) && |
| 720 | (instr.flow_control.num_instructions & 1); | 740 | (instr.flow_control.num_instructions & 1); |
| 741 | |||
| 721 | FixupBranch b = J_CC(inverted_condition ? CC_Z : CC_NZ, true); | 742 | FixupBranch b = J_CC(inverted_condition ? CC_Z : CC_NZ, true); |
| 743 | fixup_branches.push_back({ b, instr.flow_control.dest_offset }); | ||
| 744 | } | ||
| 722 | 745 | ||
| 723 | Compile_Block(instr.flow_control.dest_offset); | 746 | void JitShader::Compile_Block(unsigned end) { |
| 747 | while (program_counter < end) { | ||
| 748 | Compile_NextInstr(); | ||
| 749 | } | ||
| 750 | } | ||
| 751 | |||
| 752 | void JitShader::Compile_Return() { | ||
| 753 | // Peek return offset on the stack and check if we're at that offset | ||
| 754 | MOV(64, R(RAX), MDisp(RSP, 8)); | ||
| 755 | CMP(32, R(RAX), Imm32(program_counter)); | ||
| 724 | 756 | ||
| 757 | // If so, jump back to before CALL | ||
| 758 | FixupBranch b = J_CC(CC_NZ, true); | ||
| 759 | RET(); | ||
| 725 | SetJumpTarget(b); | 760 | SetJumpTarget(b); |
| 726 | } | 761 | } |
| 727 | 762 | ||
| 728 | void JitCompiler::Compile_Block(unsigned end) { | 763 | void JitShader::Compile_NextInstr() { |
| 729 | // Save current offset pointer | 764 | if (std::binary_search(return_offsets.begin(), return_offsets.end(), program_counter)) { |
| 730 | unsigned* prev_offset_ptr = offset_ptr; | 765 | Compile_Return(); |
| 731 | unsigned offset = *prev_offset_ptr; | 766 | } |
| 732 | 767 | ||
| 733 | while (offset < end) | 768 | ASSERT_MSG(code_ptr[program_counter] == nullptr, "Tried to compile already compiled shader location!"); |
| 734 | Compile_NextInstr(&offset); | 769 | code_ptr[program_counter] = GetCodePtr(); |
| 735 | 770 | ||
| 736 | // Restore current offset pointer | 771 | Instruction instr = GetVertexShaderInstruction(program_counter++); |
| 737 | offset_ptr = prev_offset_ptr; | ||
| 738 | *offset_ptr = offset; | ||
| 739 | } | ||
| 740 | 772 | ||
| 741 | void JitCompiler::Compile_NextInstr(unsigned* offset) { | ||
| 742 | offset_ptr = offset; | ||
| 743 | |||
| 744 | Instruction instr = *(Instruction*)&g_state.vs.program_code[(*offset_ptr)++]; | ||
| 745 | OpCode::Id opcode = instr.opcode.Value(); | 773 | OpCode::Id opcode = instr.opcode.Value(); |
| 746 | auto instr_func = instr_table[static_cast<unsigned>(opcode)]; | 774 | auto instr_func = instr_table[static_cast<unsigned>(opcode)]; |
| 747 | 775 | ||
| @@ -755,9 +783,35 @@ void JitCompiler::Compile_NextInstr(unsigned* offset) { | |||
| 755 | } | 783 | } |
| 756 | } | 784 | } |
| 757 | 785 | ||
| 758 | CompiledShader* JitCompiler::Compile() { | 786 | void JitShader::FindReturnOffsets() { |
| 759 | const u8* start = GetCodePtr(); | 787 | return_offsets.clear(); |
| 760 | unsigned offset = g_state.regs.vs.main_offset; | 788 | |
| 789 | for (size_t offset = 0; offset < g_state.vs.program_code.size(); ++offset) { | ||
| 790 | Instruction instr = GetVertexShaderInstruction(offset); | ||
| 791 | |||
| 792 | switch (instr.opcode.Value()) { | ||
| 793 | case OpCode::Id::CALL: | ||
| 794 | case OpCode::Id::CALLC: | ||
| 795 | case OpCode::Id::CALLU: | ||
| 796 | return_offsets.push_back(instr.flow_control.dest_offset + instr.flow_control.num_instructions); | ||
| 797 | break; | ||
| 798 | } | ||
| 799 | } | ||
| 800 | |||
| 801 | // Sort for efficient binary search later | ||
| 802 | std::sort(return_offsets.begin(), return_offsets.end()); | ||
| 803 | } | ||
| 804 | |||
| 805 | void JitShader::Compile() { | ||
| 806 | // Reset flow control state | ||
| 807 | program = (CompiledShader*)GetCodePtr(); | ||
| 808 | program_counter = 0; | ||
| 809 | looping = false; | ||
| 810 | code_ptr.fill(nullptr); | ||
| 811 | fixup_branches.clear(); | ||
| 812 | |||
| 813 | // Find all `CALL` instructions and identify return locations | ||
| 814 | FindReturnOffsets(); | ||
| 761 | 815 | ||
| 762 | // The stack pointer is 8 modulo 16 at the entry of a procedure | 816 | // The stack pointer is 8 modulo 16 at the entry of a procedure |
| 763 | ABI_PushRegistersAndAdjustStack(ABI_ALL_CALLEE_SAVED, 8); | 817 | ABI_PushRegistersAndAdjustStack(ABI_ALL_CALLEE_SAVED, 8); |
| @@ -780,21 +834,31 @@ CompiledShader* JitCompiler::Compile() { | |||
| 780 | MOV(PTRBITS, R(RAX), ImmPtr(&neg)); | 834 | MOV(PTRBITS, R(RAX), ImmPtr(&neg)); |
| 781 | MOVAPS(NEGBIT, MatR(RAX)); | 835 | MOVAPS(NEGBIT, MatR(RAX)); |
| 782 | 836 | ||
| 783 | looping = false; | 837 | // Jump to start of the shader program |
| 838 | JMPptr(R(ABI_PARAM2)); | ||
| 839 | |||
| 840 | // Compile entire program | ||
| 841 | Compile_Block(static_cast<unsigned>(g_state.vs.program_code.size())); | ||
| 784 | 842 | ||
| 785 | while (offset < g_state.vs.program_code.size()) { | 843 | // Set the target for any incomplete branches now that the entire shader program has been emitted |
| 786 | Compile_NextInstr(&offset); | 844 | for (const auto& branch : fixup_branches) { |
| 845 | SetJumpTarget(branch.first, code_ptr[branch.second]); | ||
| 787 | } | 846 | } |
| 788 | 847 | ||
| 789 | return (CompiledShader*)start; | 848 | // Free memory that's no longer needed |
| 790 | } | 849 | return_offsets.clear(); |
| 850 | return_offsets.shrink_to_fit(); | ||
| 851 | fixup_branches.clear(); | ||
| 852 | fixup_branches.shrink_to_fit(); | ||
| 853 | |||
| 854 | uintptr_t size = reinterpret_cast<uintptr_t>(GetCodePtr()) - reinterpret_cast<uintptr_t>(program); | ||
| 855 | ASSERT_MSG(size <= MAX_SHADER_SIZE, "Compiled a shader that exceeds the allocated size!"); | ||
| 791 | 856 | ||
| 792 | JitCompiler::JitCompiler() { | 857 | LOG_DEBUG(HW_GPU, "Compiled shader size=%d", size); |
| 793 | AllocCodeSpace(jit_cache_size); | ||
| 794 | } | 858 | } |
| 795 | 859 | ||
| 796 | void JitCompiler::Clear() { | 860 | JitShader::JitShader() { |
| 797 | ClearCodeSpace(); | 861 | AllocCodeSpace(MAX_SHADER_SIZE); |
| 798 | } | 862 | } |
| 799 | 863 | ||
| 800 | } // namespace Shader | 864 | } // namespace Shader |
diff --git a/src/video_core/shader/shader_jit_x64.h b/src/video_core/shader/shader_jit_x64.h index 5357c964b..cd6280ade 100644 --- a/src/video_core/shader/shader_jit_x64.h +++ b/src/video_core/shader/shader_jit_x64.h | |||
| @@ -4,6 +4,9 @@ | |||
| 4 | 4 | ||
| 5 | #pragma once | 5 | #pragma once |
| 6 | 6 | ||
| 7 | #include <utility> | ||
| 8 | #include <vector> | ||
| 9 | |||
| 7 | #include <nihstro/shader_bytecode.h> | 10 | #include <nihstro/shader_bytecode.h> |
| 8 | 11 | ||
| 9 | #include "common/x64/emitter.h" | 12 | #include "common/x64/emitter.h" |
| @@ -19,24 +22,22 @@ namespace Pica { | |||
| 19 | 22 | ||
| 20 | namespace Shader { | 23 | namespace Shader { |
| 21 | 24 | ||
| 22 | /// Memory needed to be available to compile the next shader (otherwise, clear the cache) | 25 | /// Memory allocated for each compiled shader (64Kb) |
| 23 | constexpr size_t jit_shader_size = 1024 * 512; | 26 | constexpr size_t MAX_SHADER_SIZE = 1024 * 64; |
| 24 | /// Memory allocated for the JIT code space cache | ||
| 25 | constexpr size_t jit_cache_size = 1024 * 1024 * 8; | ||
| 26 | |||
| 27 | using CompiledShader = void(void* registers); | ||
| 28 | 27 | ||
| 29 | /** | 28 | /** |
| 30 | * This class implements the shader JIT compiler. It recompiles a Pica shader program into x86_64 | 29 | * This class implements the shader JIT compiler. It recompiles a Pica shader program into x86_64 |
| 31 | * code that can be executed on the host machine directly. | 30 | * code that can be executed on the host machine directly. |
| 32 | */ | 31 | */ |
| 33 | class JitCompiler : public Gen::XCodeBlock { | 32 | class JitShader : public Gen::XCodeBlock { |
| 34 | public: | 33 | public: |
| 35 | JitCompiler(); | 34 | JitShader(); |
| 36 | 35 | ||
| 37 | CompiledShader* Compile(); | 36 | void Run(void* registers, unsigned offset) const { |
| 37 | program(registers, code_ptr[offset]); | ||
| 38 | } | ||
| 38 | 39 | ||
| 39 | void Clear(); | 40 | void Compile(); |
| 40 | 41 | ||
| 41 | void Compile_ADD(Instruction instr); | 42 | void Compile_ADD(Instruction instr); |
| 42 | void Compile_DP3(Instruction instr); | 43 | void Compile_DP3(Instruction instr); |
| @@ -66,8 +67,9 @@ public: | |||
| 66 | void Compile_MAD(Instruction instr); | 67 | void Compile_MAD(Instruction instr); |
| 67 | 68 | ||
| 68 | private: | 69 | private: |
| 70 | |||
| 69 | void Compile_Block(unsigned end); | 71 | void Compile_Block(unsigned end); |
| 70 | void Compile_NextInstr(unsigned* offset); | 72 | void Compile_NextInstr(); |
| 71 | 73 | ||
| 72 | void Compile_SwizzleSrc(Instruction instr, unsigned src_num, SourceRegister src_reg, Gen::X64Reg dest); | 74 | void Compile_SwizzleSrc(Instruction instr, unsigned src_num, SourceRegister src_reg, Gen::X64Reg dest); |
| 73 | void Compile_DestEnable(Instruction instr, Gen::X64Reg dest); | 75 | void Compile_DestEnable(Instruction instr, Gen::X64Reg dest); |
| @@ -81,13 +83,39 @@ private: | |||
| 81 | void Compile_EvaluateCondition(Instruction instr); | 83 | void Compile_EvaluateCondition(Instruction instr); |
| 82 | void Compile_UniformCondition(Instruction instr); | 84 | void Compile_UniformCondition(Instruction instr); |
| 83 | 85 | ||
| 86 | /** | ||
| 87 | * Emits the code to conditionally return from a subroutine envoked by the `CALL` instruction. | ||
| 88 | */ | ||
| 89 | void Compile_Return(); | ||
| 90 | |||
| 84 | BitSet32 PersistentCallerSavedRegs(); | 91 | BitSet32 PersistentCallerSavedRegs(); |
| 85 | 92 | ||
| 86 | /// Pointer to the variable that stores the current Pica code offset. Used to handle nested code blocks. | 93 | /** |
| 87 | unsigned* offset_ptr = nullptr; | 94 | * Assertion evaluated at compile-time, but only triggered if executed at runtime. |
| 95 | * @param msg Message to be logged if the assertion fails. | ||
| 96 | */ | ||
| 97 | void Compile_Assert(bool condition, const char* msg); | ||
| 98 | |||
| 99 | /** | ||
| 100 | * Analyzes the entire shader program for `CALL` instructions before emitting any code, | ||
| 101 | * identifying the locations where a return needs to be inserted. | ||
| 102 | */ | ||
| 103 | void FindReturnOffsets(); | ||
| 104 | |||
| 105 | /// Mapping of Pica VS instructions to pointers in the emitted code | ||
| 106 | std::array<const u8*, 1024> code_ptr; | ||
| 107 | |||
| 108 | /// Offsets in code where a return needs to be inserted | ||
| 109 | std::vector<unsigned> return_offsets; | ||
| 110 | |||
| 111 | unsigned program_counter = 0; ///< Offset of the next instruction to decode | ||
| 112 | bool looping = false; ///< True if compiling a loop, used to check for nested loops | ||
| 113 | |||
| 114 | /// Branches that need to be fixed up once the entire shader program is compiled | ||
| 115 | std::vector<std::pair<Gen::FixupBranch, unsigned>> fixup_branches; | ||
| 88 | 116 | ||
| 89 | /// Set to true if currently in a loop, used to check for the existence of nested loops | 117 | using CompiledShader = void(void* registers, const u8* start_addr); |
| 90 | bool looping = false; | 118 | CompiledShader* program = nullptr; |
| 91 | }; | 119 | }; |
| 92 | 120 | ||
| 93 | } // Shader | 121 | } // Shader |