summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorGravatar bunnei2016-04-13 23:36:39 -0400
committerGravatar bunnei2016-04-13 23:36:39 -0400
commitd89e48679e93f8a6242d9c8d0837053f5aa708d0 (patch)
tree7bb6c9fd8e9a659369e92661478925512ac8eddd /src
parentMerge pull request #1660 from MerryMage/file_util (diff)
parentshader_jit_x64: Rename RuntimeAssert to Compile_Assert. (diff)
downloadyuzu-d89e48679e93f8a6242d9c8d0837053f5aa708d0.tar.gz
yuzu-d89e48679e93f8a6242d9c8d0837053f5aa708d0.tar.xz
yuzu-d89e48679e93f8a6242d9c8d0837053f5aa708d0.zip
Merge pull request #1546 from bunnei/refactor-shader-jit
Shader JIT Part 2
Diffstat (limited to 'src')
-rw-r--r--src/common/x64/emitter.cpp28
-rw-r--r--src/common/x64/emitter.h2
-rw-r--r--src/video_core/command_processor.cpp4
-rw-r--r--src/video_core/shader/shader.cpp34
-rw-r--r--src/video_core/shader/shader.h3
-rw-r--r--src/video_core/shader/shader_jit_x64.cpp302
-rw-r--r--src/video_core/shader/shader_jit_x64.h58
7 files changed, 270 insertions, 161 deletions
diff --git a/src/common/x64/emitter.cpp b/src/common/x64/emitter.cpp
index 1dcf2416c..5662f7f86 100644
--- a/src/common/x64/emitter.cpp
+++ b/src/common/x64/emitter.cpp
@@ -455,6 +455,18 @@ void XEmitter::CALL(const void* fnptr)
455 Write32(u32(distance)); 455 Write32(u32(distance));
456} 456}
457 457
458FixupBranch XEmitter::CALL()
459{
460 FixupBranch branch;
461 branch.type = 1;
462 branch.ptr = code + 5;
463
464 Write8(0xE8);
465 Write32(0);
466
467 return branch;
468}
469
458FixupBranch XEmitter::J(bool force5bytes) 470FixupBranch XEmitter::J(bool force5bytes)
459{ 471{
460 FixupBranch branch; 472 FixupBranch branch;
@@ -531,6 +543,22 @@ void XEmitter::SetJumpTarget(const FixupBranch& branch)
531 } 543 }
532} 544}
533 545
546void XEmitter::SetJumpTarget(const FixupBranch& branch, const u8* target)
547{
548 if (branch.type == 0)
549 {
550 s64 distance = (s64)(target - branch.ptr);
551 ASSERT_MSG(distance >= -0x80 && distance < 0x80, "Jump target too far away, needs force5Bytes = true");
552 branch.ptr[-1] = (u8)(s8)distance;
553 }
554 else if (branch.type == 1)
555 {
556 s64 distance = (s64)(target - branch.ptr);
557 ASSERT_MSG(distance >= -0x80000000LL && distance < 0x80000000LL, "Jump target too far away, needs indirect register");
558 ((s32*)branch.ptr)[-1] = (s32)distance;
559 }
560}
561
534//Single byte opcodes 562//Single byte opcodes
535//There is no PUSHAD/POPAD in 64-bit mode. 563//There is no PUSHAD/POPAD in 64-bit mode.
536void XEmitter::INT3() {Write8(0xCC);} 564void XEmitter::INT3() {Write8(0xCC);}
diff --git a/src/common/x64/emitter.h b/src/common/x64/emitter.h
index 7c6548fb5..a33724146 100644
--- a/src/common/x64/emitter.h
+++ b/src/common/x64/emitter.h
@@ -425,12 +425,14 @@ public:
425#undef CALL 425#undef CALL
426#endif 426#endif
427 void CALL(const void* fnptr); 427 void CALL(const void* fnptr);
428 FixupBranch CALL();
428 void CALLptr(OpArg arg); 429 void CALLptr(OpArg arg);
429 430
430 FixupBranch J_CC(CCFlags conditionCode, bool force5bytes = false); 431 FixupBranch J_CC(CCFlags conditionCode, bool force5bytes = false);
431 void J_CC(CCFlags conditionCode, const u8* addr, bool force5Bytes = false); 432 void J_CC(CCFlags conditionCode, const u8* addr, bool force5Bytes = false);
432 433
433 void SetJumpTarget(const FixupBranch& branch); 434 void SetJumpTarget(const FixupBranch& branch);
435 void SetJumpTarget(const FixupBranch& branch, const u8* target);
434 436
435 void SETcc(CCFlags flag, OpArg dest); 437 void SETcc(CCFlags flag, OpArg dest);
436 // Note: CMOV brings small if any benefit on current cpus. 438 // Note: CMOV brings small if any benefit on current cpus.
diff --git a/src/video_core/command_processor.cpp b/src/video_core/command_processor.cpp
index 08ec2907a..3abe79c09 100644
--- a/src/video_core/command_processor.cpp
+++ b/src/video_core/command_processor.cpp
@@ -140,7 +140,7 @@ static void WritePicaReg(u32 id, u32 value, u32 mask) {
140 immediate_attribute_id = 0; 140 immediate_attribute_id = 0;
141 141
142 Shader::UnitState<false> shader_unit; 142 Shader::UnitState<false> shader_unit;
143 Shader::Setup(shader_unit); 143 Shader::Setup();
144 144
145 if (g_debug_context) 145 if (g_debug_context)
146 g_debug_context->OnEvent(DebugContext::Event::VertexLoaded, static_cast<void*>(&immediate_input)); 146 g_debug_context->OnEvent(DebugContext::Event::VertexLoaded, static_cast<void*>(&immediate_input));
@@ -300,7 +300,7 @@ static void WritePicaReg(u32 id, u32 value, u32 mask) {
300 vertex_cache_ids.fill(-1); 300 vertex_cache_ids.fill(-1);
301 301
302 Shader::UnitState<false> shader_unit; 302 Shader::UnitState<false> shader_unit;
303 Shader::Setup(shader_unit); 303 Shader::Setup();
304 304
305 for (unsigned int index = 0; index < regs.num_vertices; ++index) 305 for (unsigned int index = 0; index < regs.num_vertices; ++index)
306 { 306 {
diff --git a/src/video_core/shader/shader.cpp b/src/video_core/shader/shader.cpp
index 78d295c76..75301accd 100644
--- a/src/video_core/shader/shader.cpp
+++ b/src/video_core/shader/shader.cpp
@@ -28,36 +28,24 @@ namespace Pica {
28namespace Shader { 28namespace Shader {
29 29
30#ifdef ARCHITECTURE_x86_64 30#ifdef ARCHITECTURE_x86_64
31static std::unordered_map<u64, CompiledShader*> shader_map; 31static std::unordered_map<u64, std::unique_ptr<JitShader>> shader_map;
32static JitCompiler jit; 32static const JitShader* jit_shader;
33static CompiledShader* jit_shader;
34
35static void ClearCache() {
36 shader_map.clear();
37 jit.Clear();
38 LOG_INFO(HW_GPU, "Shader JIT cache cleared");
39}
40#endif // ARCHITECTURE_x86_64 33#endif // ARCHITECTURE_x86_64
41 34
42void Setup(UnitState<false>& state) { 35void Setup() {
43#ifdef ARCHITECTURE_x86_64 36#ifdef ARCHITECTURE_x86_64
44 if (VideoCore::g_shader_jit_enabled) { 37 if (VideoCore::g_shader_jit_enabled) {
45 u64 cache_key = (Common::ComputeHash64(&g_state.vs.program_code, sizeof(g_state.vs.program_code)) ^ 38 u64 cache_key = (Common::ComputeHash64(&g_state.vs.program_code, sizeof(g_state.vs.program_code)) ^
46 Common::ComputeHash64(&g_state.vs.swizzle_data, sizeof(g_state.vs.swizzle_data)) ^ 39 Common::ComputeHash64(&g_state.vs.swizzle_data, sizeof(g_state.vs.swizzle_data)));
47 g_state.regs.vs.main_offset);
48 40
49 auto iter = shader_map.find(cache_key); 41 auto iter = shader_map.find(cache_key);
50 if (iter != shader_map.end()) { 42 if (iter != shader_map.end()) {
51 jit_shader = iter->second; 43 jit_shader = iter->second.get();
52 } else { 44 } else {
53 // Check if remaining JIT code space is enough for at least one more (massive) shader 45 auto shader = std::make_unique<JitShader>();
54 if (jit.GetSpaceLeft() < jit_shader_size) { 46 shader->Compile();
55 // If not, clear the cache of all previously compiled shaders 47 jit_shader = shader.get();
56 ClearCache(); 48 shader_map[cache_key] = std::move(shader);
57 }
58
59 jit_shader = jit.Compile();
60 shader_map.emplace(cache_key, jit_shader);
61 } 49 }
62 } 50 }
63#endif // ARCHITECTURE_x86_64 51#endif // ARCHITECTURE_x86_64
@@ -65,7 +53,7 @@ void Setup(UnitState<false>& state) {
65 53
66void Shutdown() { 54void Shutdown() {
67#ifdef ARCHITECTURE_x86_64 55#ifdef ARCHITECTURE_x86_64
68 ClearCache(); 56 shader_map.clear();
69#endif // ARCHITECTURE_x86_64 57#endif // ARCHITECTURE_x86_64
70} 58}
71 59
@@ -109,7 +97,7 @@ OutputVertex Run(UnitState<false>& state, const InputVertex& input, int num_attr
109 97
110#ifdef ARCHITECTURE_x86_64 98#ifdef ARCHITECTURE_x86_64
111 if (VideoCore::g_shader_jit_enabled) 99 if (VideoCore::g_shader_jit_enabled)
112 jit_shader(&state.registers); 100 jit_shader->Run(&state.registers, g_state.regs.vs.main_offset);
113 else 101 else
114 RunInterpreter(state); 102 RunInterpreter(state);
115#else 103#else
diff --git a/src/video_core/shader/shader.h b/src/video_core/shader/shader.h
index 7af8f1fa1..9c5bd97bd 100644
--- a/src/video_core/shader/shader.h
+++ b/src/video_core/shader/shader.h
@@ -339,9 +339,8 @@ struct UnitState {
339/** 339/**
340 * Performs any shader unit setup that only needs to happen once per shader (as opposed to once per 340 * Performs any shader unit setup that only needs to happen once per shader (as opposed to once per
341 * vertex, which would happen within the `Run` function). 341 * vertex, which would happen within the `Run` function).
342 * @param state Shader unit state, must be setup per shader and per shader unit
343 */ 342 */
344void Setup(UnitState<false>& state); 343void Setup();
345 344
346/// Performs any cleanup when the emulator is shutdown 345/// Performs any cleanup when the emulator is shutdown
347void Shutdown(); 346void Shutdown();
diff --git a/src/video_core/shader/shader_jit_x64.cpp b/src/video_core/shader/shader_jit_x64.cpp
index dffe051ef..b47d3beda 100644
--- a/src/video_core/shader/shader_jit_x64.cpp
+++ b/src/video_core/shader/shader_jit_x64.cpp
@@ -2,6 +2,7 @@
2// Licensed under GPLv2 or any later version 2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included. 3// Refer to the license.txt file included.
4 4
5#include <algorithm>
5#include <smmintrin.h> 6#include <smmintrin.h>
6 7
7#include "common/x64/abi.h" 8#include "common/x64/abi.h"
@@ -19,73 +20,73 @@ namespace Shader {
19 20
20using namespace Gen; 21using namespace Gen;
21 22
22typedef void (JitCompiler::*JitFunction)(Instruction instr); 23typedef void (JitShader::*JitFunction)(Instruction instr);
23 24
24const JitFunction instr_table[64] = { 25const JitFunction instr_table[64] = {
25 &JitCompiler::Compile_ADD, // add 26 &JitShader::Compile_ADD, // add
26 &JitCompiler::Compile_DP3, // dp3 27 &JitShader::Compile_DP3, // dp3
27 &JitCompiler::Compile_DP4, // dp4 28 &JitShader::Compile_DP4, // dp4
28 &JitCompiler::Compile_DPH, // dph 29 &JitShader::Compile_DPH, // dph
29 nullptr, // unknown 30 nullptr, // unknown
30 &JitCompiler::Compile_EX2, // ex2 31 &JitShader::Compile_EX2, // ex2
31 &JitCompiler::Compile_LG2, // lg2 32 &JitShader::Compile_LG2, // lg2
32 nullptr, // unknown 33 nullptr, // unknown
33 &JitCompiler::Compile_MUL, // mul 34 &JitShader::Compile_MUL, // mul
34 &JitCompiler::Compile_SGE, // sge 35 &JitShader::Compile_SGE, // sge
35 &JitCompiler::Compile_SLT, // slt 36 &JitShader::Compile_SLT, // slt
36 &JitCompiler::Compile_FLR, // flr 37 &JitShader::Compile_FLR, // flr
37 &JitCompiler::Compile_MAX, // max 38 &JitShader::Compile_MAX, // max
38 &JitCompiler::Compile_MIN, // min 39 &JitShader::Compile_MIN, // min
39 &JitCompiler::Compile_RCP, // rcp 40 &JitShader::Compile_RCP, // rcp
40 &JitCompiler::Compile_RSQ, // rsq 41 &JitShader::Compile_RSQ, // rsq
41 nullptr, // unknown 42 nullptr, // unknown
42 nullptr, // unknown 43 nullptr, // unknown
43 &JitCompiler::Compile_MOVA, // mova 44 &JitShader::Compile_MOVA, // mova
44 &JitCompiler::Compile_MOV, // mov 45 &JitShader::Compile_MOV, // mov
45 nullptr, // unknown 46 nullptr, // unknown
46 nullptr, // unknown 47 nullptr, // unknown
47 nullptr, // unknown 48 nullptr, // unknown
48 nullptr, // unknown 49 nullptr, // unknown
49 &JitCompiler::Compile_DPH, // dphi 50 &JitShader::Compile_DPH, // dphi
50 nullptr, // unknown 51 nullptr, // unknown
51 &JitCompiler::Compile_SGE, // sgei 52 &JitShader::Compile_SGE, // sgei
52 &JitCompiler::Compile_SLT, // slti 53 &JitShader::Compile_SLT, // slti
53 nullptr, // unknown 54 nullptr, // unknown
54 nullptr, // unknown 55 nullptr, // unknown
55 nullptr, // unknown 56 nullptr, // unknown
56 nullptr, // unknown 57 nullptr, // unknown
57 nullptr, // unknown 58 nullptr, // unknown
58 &JitCompiler::Compile_NOP, // nop 59 &JitShader::Compile_NOP, // nop
59 &JitCompiler::Compile_END, // end 60 &JitShader::Compile_END, // end
60 nullptr, // break 61 nullptr, // break
61 &JitCompiler::Compile_CALL, // call 62 &JitShader::Compile_CALL, // call
62 &JitCompiler::Compile_CALLC, // callc 63 &JitShader::Compile_CALLC, // callc
63 &JitCompiler::Compile_CALLU, // callu 64 &JitShader::Compile_CALLU, // callu
64 &JitCompiler::Compile_IF, // ifu 65 &JitShader::Compile_IF, // ifu
65 &JitCompiler::Compile_IF, // ifc 66 &JitShader::Compile_IF, // ifc
66 &JitCompiler::Compile_LOOP, // loop 67 &JitShader::Compile_LOOP, // loop
67 nullptr, // emit 68 nullptr, // emit
68 nullptr, // sete 69 nullptr, // sete
69 &JitCompiler::Compile_JMP, // jmpc 70 &JitShader::Compile_JMP, // jmpc
70 &JitCompiler::Compile_JMP, // jmpu 71 &JitShader::Compile_JMP, // jmpu
71 &JitCompiler::Compile_CMP, // cmp 72 &JitShader::Compile_CMP, // cmp
72 &JitCompiler::Compile_CMP, // cmp 73 &JitShader::Compile_CMP, // cmp
73 &JitCompiler::Compile_MAD, // madi 74 &JitShader::Compile_MAD, // madi
74 &JitCompiler::Compile_MAD, // madi 75 &JitShader::Compile_MAD, // madi
75 &JitCompiler::Compile_MAD, // madi 76 &JitShader::Compile_MAD, // madi
76 &JitCompiler::Compile_MAD, // madi 77 &JitShader::Compile_MAD, // madi
77 &JitCompiler::Compile_MAD, // madi 78 &JitShader::Compile_MAD, // madi
78 &JitCompiler::Compile_MAD, // madi 79 &JitShader::Compile_MAD, // madi
79 &JitCompiler::Compile_MAD, // madi 80 &JitShader::Compile_MAD, // madi
80 &JitCompiler::Compile_MAD, // madi 81 &JitShader::Compile_MAD, // madi
81 &JitCompiler::Compile_MAD, // mad 82 &JitShader::Compile_MAD, // mad
82 &JitCompiler::Compile_MAD, // mad 83 &JitShader::Compile_MAD, // mad
83 &JitCompiler::Compile_MAD, // mad 84 &JitShader::Compile_MAD, // mad
84 &JitCompiler::Compile_MAD, // mad 85 &JitShader::Compile_MAD, // mad
85 &JitCompiler::Compile_MAD, // mad 86 &JitShader::Compile_MAD, // mad
86 &JitCompiler::Compile_MAD, // mad 87 &JitShader::Compile_MAD, // mad
87 &JitCompiler::Compile_MAD, // mad 88 &JitShader::Compile_MAD, // mad
88 &JitCompiler::Compile_MAD, // mad 89 &JitShader::Compile_MAD, // mad
89}; 90};
90 91
91// The following is used to alias some commonly used registers. Generally, RAX-RDX and XMM0-XMM3 can 92// The following is used to alias some commonly used registers. Generally, RAX-RDX and XMM0-XMM3 can
@@ -138,13 +139,32 @@ static const u8 NO_SRC_REG_SWIZZLE = 0x1b;
138static const u8 NO_DEST_REG_MASK = 0xf; 139static const u8 NO_DEST_REG_MASK = 0xf;
139 140
140/** 141/**
142 * Get the vertex shader instruction for a given offset in the current shader program
143 * @param offset Offset in the current shader program of the instruction
144 * @return Instruction at the specified offset
145 */
146static Instruction GetVertexShaderInstruction(size_t offset) {
147 return { g_state.vs.program_code[offset] };
148}
149
150static void LogCritical(const char* msg) {
151 LOG_CRITICAL(HW_GPU, msg);
152}
153
154void JitShader::Compile_Assert(bool condition, const char* msg) {
155 if (!condition) {
156 ABI_CallFunctionP(reinterpret_cast<const void*>(LogCritical), const_cast<char*>(msg));
157 }
158}
159
160/**
141 * Loads and swizzles a source register into the specified XMM register. 161 * Loads and swizzles a source register into the specified XMM register.
142 * @param instr VS instruction, used for determining how to load the source register 162 * @param instr VS instruction, used for determining how to load the source register
143 * @param src_num Number indicating which source register to load (1 = src1, 2 = src2, 3 = src3) 163 * @param src_num Number indicating which source register to load (1 = src1, 2 = src2, 3 = src3)
144 * @param src_reg SourceRegister object corresponding to the source register to load 164 * @param src_reg SourceRegister object corresponding to the source register to load
145 * @param dest Destination XMM register to store the loaded, swizzled source register 165 * @param dest Destination XMM register to store the loaded, swizzled source register
146 */ 166 */
147void JitCompiler::Compile_SwizzleSrc(Instruction instr, unsigned src_num, SourceRegister src_reg, X64Reg dest) { 167void JitShader::Compile_SwizzleSrc(Instruction instr, unsigned src_num, SourceRegister src_reg, X64Reg dest) {
148 X64Reg src_ptr; 168 X64Reg src_ptr;
149 size_t src_offset; 169 size_t src_offset;
150 170
@@ -216,7 +236,7 @@ void JitCompiler::Compile_SwizzleSrc(Instruction instr, unsigned src_num, Source
216 } 236 }
217} 237}
218 238
219void JitCompiler::Compile_DestEnable(Instruction instr,X64Reg src) { 239void JitShader::Compile_DestEnable(Instruction instr,X64Reg src) {
220 DestRegister dest; 240 DestRegister dest;
221 unsigned operand_desc_id; 241 unsigned operand_desc_id;
222 if (instr.opcode.Value().EffectiveOpCode() == OpCode::Id::MAD || 242 if (instr.opcode.Value().EffectiveOpCode() == OpCode::Id::MAD ||
@@ -263,7 +283,7 @@ void JitCompiler::Compile_DestEnable(Instruction instr,X64Reg src) {
263 } 283 }
264} 284}
265 285
266void JitCompiler::Compile_SanitizedMul(Gen::X64Reg src1, Gen::X64Reg src2, Gen::X64Reg scratch) { 286void JitShader::Compile_SanitizedMul(Gen::X64Reg src1, Gen::X64Reg src2, Gen::X64Reg scratch) {
267 MOVAPS(scratch, R(src1)); 287 MOVAPS(scratch, R(src1));
268 CMPPS(scratch, R(src2), CMP_ORD); 288 CMPPS(scratch, R(src2), CMP_ORD);
269 289
@@ -276,7 +296,7 @@ void JitCompiler::Compile_SanitizedMul(Gen::X64Reg src1, Gen::X64Reg src2, Gen::
276 ANDPS(src1, R(scratch)); 296 ANDPS(src1, R(scratch));
277} 297}
278 298
279void JitCompiler::Compile_EvaluateCondition(Instruction instr) { 299void JitShader::Compile_EvaluateCondition(Instruction instr) {
280 // Note: NXOR is used below to check for equality 300 // Note: NXOR is used below to check for equality
281 switch (instr.flow_control.op) { 301 switch (instr.flow_control.op) {
282 case Instruction::FlowControlType::Or: 302 case Instruction::FlowControlType::Or:
@@ -307,23 +327,23 @@ void JitCompiler::Compile_EvaluateCondition(Instruction instr) {
307 } 327 }
308} 328}
309 329
310void JitCompiler::Compile_UniformCondition(Instruction instr) { 330void JitShader::Compile_UniformCondition(Instruction instr) {
311 int offset = offsetof(decltype(g_state.vs.uniforms), b) + (instr.flow_control.bool_uniform_id * sizeof(bool)); 331 int offset = offsetof(decltype(g_state.vs.uniforms), b) + (instr.flow_control.bool_uniform_id * sizeof(bool));
312 CMP(sizeof(bool) * 8, MDisp(UNIFORMS, offset), Imm8(0)); 332 CMP(sizeof(bool) * 8, MDisp(UNIFORMS, offset), Imm8(0));
313} 333}
314 334
315BitSet32 JitCompiler::PersistentCallerSavedRegs() { 335BitSet32 JitShader::PersistentCallerSavedRegs() {
316 return persistent_regs & ABI_ALL_CALLER_SAVED; 336 return persistent_regs & ABI_ALL_CALLER_SAVED;
317} 337}
318 338
319void JitCompiler::Compile_ADD(Instruction instr) { 339void JitShader::Compile_ADD(Instruction instr) {
320 Compile_SwizzleSrc(instr, 1, instr.common.src1, SRC1); 340 Compile_SwizzleSrc(instr, 1, instr.common.src1, SRC1);
321 Compile_SwizzleSrc(instr, 2, instr.common.src2, SRC2); 341 Compile_SwizzleSrc(instr, 2, instr.common.src2, SRC2);
322 ADDPS(SRC1, R(SRC2)); 342 ADDPS(SRC1, R(SRC2));
323 Compile_DestEnable(instr, SRC1); 343 Compile_DestEnable(instr, SRC1);
324} 344}
325 345
326void JitCompiler::Compile_DP3(Instruction instr) { 346void JitShader::Compile_DP3(Instruction instr) {
327 Compile_SwizzleSrc(instr, 1, instr.common.src1, SRC1); 347 Compile_SwizzleSrc(instr, 1, instr.common.src1, SRC1);
328 Compile_SwizzleSrc(instr, 2, instr.common.src2, SRC2); 348 Compile_SwizzleSrc(instr, 2, instr.common.src2, SRC2);
329 349
@@ -342,7 +362,7 @@ void JitCompiler::Compile_DP3(Instruction instr) {
342 Compile_DestEnable(instr, SRC1); 362 Compile_DestEnable(instr, SRC1);
343} 363}
344 364
345void JitCompiler::Compile_DP4(Instruction instr) { 365void JitShader::Compile_DP4(Instruction instr) {
346 Compile_SwizzleSrc(instr, 1, instr.common.src1, SRC1); 366 Compile_SwizzleSrc(instr, 1, instr.common.src1, SRC1);
347 Compile_SwizzleSrc(instr, 2, instr.common.src2, SRC2); 367 Compile_SwizzleSrc(instr, 2, instr.common.src2, SRC2);
348 368
@@ -359,7 +379,7 @@ void JitCompiler::Compile_DP4(Instruction instr) {
359 Compile_DestEnable(instr, SRC1); 379 Compile_DestEnable(instr, SRC1);
360} 380}
361 381
362void JitCompiler::Compile_DPH(Instruction instr) { 382void JitShader::Compile_DPH(Instruction instr) {
363 if (instr.opcode.Value().EffectiveOpCode() == OpCode::Id::DPHI) { 383 if (instr.opcode.Value().EffectiveOpCode() == OpCode::Id::DPHI) {
364 Compile_SwizzleSrc(instr, 1, instr.common.src1i, SRC1); 384 Compile_SwizzleSrc(instr, 1, instr.common.src1i, SRC1);
365 Compile_SwizzleSrc(instr, 2, instr.common.src2i, SRC2); 385 Compile_SwizzleSrc(instr, 2, instr.common.src2i, SRC2);
@@ -391,7 +411,7 @@ void JitCompiler::Compile_DPH(Instruction instr) {
391 Compile_DestEnable(instr, SRC1); 411 Compile_DestEnable(instr, SRC1);
392} 412}
393 413
394void JitCompiler::Compile_EX2(Instruction instr) { 414void JitShader::Compile_EX2(Instruction instr) {
395 Compile_SwizzleSrc(instr, 1, instr.common.src1, SRC1); 415 Compile_SwizzleSrc(instr, 1, instr.common.src1, SRC1);
396 MOVSS(XMM0, R(SRC1)); 416 MOVSS(XMM0, R(SRC1));
397 417
@@ -404,7 +424,7 @@ void JitCompiler::Compile_EX2(Instruction instr) {
404 Compile_DestEnable(instr, SRC1); 424 Compile_DestEnable(instr, SRC1);
405} 425}
406 426
407void JitCompiler::Compile_LG2(Instruction instr) { 427void JitShader::Compile_LG2(Instruction instr) {
408 Compile_SwizzleSrc(instr, 1, instr.common.src1, SRC1); 428 Compile_SwizzleSrc(instr, 1, instr.common.src1, SRC1);
409 MOVSS(XMM0, R(SRC1)); 429 MOVSS(XMM0, R(SRC1));
410 430
@@ -417,14 +437,14 @@ void JitCompiler::Compile_LG2(Instruction instr) {
417 Compile_DestEnable(instr, SRC1); 437 Compile_DestEnable(instr, SRC1);
418} 438}
419 439
420void JitCompiler::Compile_MUL(Instruction instr) { 440void JitShader::Compile_MUL(Instruction instr) {
421 Compile_SwizzleSrc(instr, 1, instr.common.src1, SRC1); 441 Compile_SwizzleSrc(instr, 1, instr.common.src1, SRC1);
422 Compile_SwizzleSrc(instr, 2, instr.common.src2, SRC2); 442 Compile_SwizzleSrc(instr, 2, instr.common.src2, SRC2);
423 Compile_SanitizedMul(SRC1, SRC2, SCRATCH); 443 Compile_SanitizedMul(SRC1, SRC2, SCRATCH);
424 Compile_DestEnable(instr, SRC1); 444 Compile_DestEnable(instr, SRC1);
425} 445}
426 446
427void JitCompiler::Compile_SGE(Instruction instr) { 447void JitShader::Compile_SGE(Instruction instr) {
428 if (instr.opcode.Value().EffectiveOpCode() == OpCode::Id::SGEI) { 448 if (instr.opcode.Value().EffectiveOpCode() == OpCode::Id::SGEI) {
429 Compile_SwizzleSrc(instr, 1, instr.common.src1i, SRC1); 449 Compile_SwizzleSrc(instr, 1, instr.common.src1i, SRC1);
430 Compile_SwizzleSrc(instr, 2, instr.common.src2i, SRC2); 450 Compile_SwizzleSrc(instr, 2, instr.common.src2i, SRC2);
@@ -439,7 +459,7 @@ void JitCompiler::Compile_SGE(Instruction instr) {
439 Compile_DestEnable(instr, SRC2); 459 Compile_DestEnable(instr, SRC2);
440} 460}
441 461
442void JitCompiler::Compile_SLT(Instruction instr) { 462void JitShader::Compile_SLT(Instruction instr) {
443 if (instr.opcode.Value().EffectiveOpCode() == OpCode::Id::SLTI) { 463 if (instr.opcode.Value().EffectiveOpCode() == OpCode::Id::SLTI) {
444 Compile_SwizzleSrc(instr, 1, instr.common.src1i, SRC1); 464 Compile_SwizzleSrc(instr, 1, instr.common.src1i, SRC1);
445 Compile_SwizzleSrc(instr, 2, instr.common.src2i, SRC2); 465 Compile_SwizzleSrc(instr, 2, instr.common.src2i, SRC2);
@@ -454,7 +474,7 @@ void JitCompiler::Compile_SLT(Instruction instr) {
454 Compile_DestEnable(instr, SRC1); 474 Compile_DestEnable(instr, SRC1);
455} 475}
456 476
457void JitCompiler::Compile_FLR(Instruction instr) { 477void JitShader::Compile_FLR(Instruction instr) {
458 Compile_SwizzleSrc(instr, 1, instr.common.src1, SRC1); 478 Compile_SwizzleSrc(instr, 1, instr.common.src1, SRC1);
459 479
460 if (Common::GetCPUCaps().sse4_1) { 480 if (Common::GetCPUCaps().sse4_1) {
@@ -467,7 +487,7 @@ void JitCompiler::Compile_FLR(Instruction instr) {
467 Compile_DestEnable(instr, SRC1); 487 Compile_DestEnable(instr, SRC1);
468} 488}
469 489
470void JitCompiler::Compile_MAX(Instruction instr) { 490void JitShader::Compile_MAX(Instruction instr) {
471 Compile_SwizzleSrc(instr, 1, instr.common.src1, SRC1); 491 Compile_SwizzleSrc(instr, 1, instr.common.src1, SRC1);
472 Compile_SwizzleSrc(instr, 2, instr.common.src2, SRC2); 492 Compile_SwizzleSrc(instr, 2, instr.common.src2, SRC2);
473 // SSE semantics match PICA200 ones: In case of NaN, SRC2 is returned. 493 // SSE semantics match PICA200 ones: In case of NaN, SRC2 is returned.
@@ -475,7 +495,7 @@ void JitCompiler::Compile_MAX(Instruction instr) {
475 Compile_DestEnable(instr, SRC1); 495 Compile_DestEnable(instr, SRC1);
476} 496}
477 497
478void JitCompiler::Compile_MIN(Instruction instr) { 498void JitShader::Compile_MIN(Instruction instr) {
479 Compile_SwizzleSrc(instr, 1, instr.common.src1, SRC1); 499 Compile_SwizzleSrc(instr, 1, instr.common.src1, SRC1);
480 Compile_SwizzleSrc(instr, 2, instr.common.src2, SRC2); 500 Compile_SwizzleSrc(instr, 2, instr.common.src2, SRC2);
481 // SSE semantics match PICA200 ones: In case of NaN, SRC2 is returned. 501 // SSE semantics match PICA200 ones: In case of NaN, SRC2 is returned.
@@ -483,7 +503,7 @@ void JitCompiler::Compile_MIN(Instruction instr) {
483 Compile_DestEnable(instr, SRC1); 503 Compile_DestEnable(instr, SRC1);
484} 504}
485 505
486void JitCompiler::Compile_MOVA(Instruction instr) { 506void JitShader::Compile_MOVA(Instruction instr) {
487 SwizzlePattern swiz = { g_state.vs.swizzle_data[instr.common.operand_desc_id] }; 507 SwizzlePattern swiz = { g_state.vs.swizzle_data[instr.common.operand_desc_id] };
488 508
489 if (!swiz.DestComponentEnabled(0) && !swiz.DestComponentEnabled(1)) { 509 if (!swiz.DestComponentEnabled(0) && !swiz.DestComponentEnabled(1)) {
@@ -528,12 +548,12 @@ void JitCompiler::Compile_MOVA(Instruction instr) {
528 } 548 }
529} 549}
530 550
531void JitCompiler::Compile_MOV(Instruction instr) { 551void JitShader::Compile_MOV(Instruction instr) {
532 Compile_SwizzleSrc(instr, 1, instr.common.src1, SRC1); 552 Compile_SwizzleSrc(instr, 1, instr.common.src1, SRC1);
533 Compile_DestEnable(instr, SRC1); 553 Compile_DestEnable(instr, SRC1);
534} 554}
535 555
536void JitCompiler::Compile_RCP(Instruction instr) { 556void JitShader::Compile_RCP(Instruction instr) {
537 Compile_SwizzleSrc(instr, 1, instr.common.src1, SRC1); 557 Compile_SwizzleSrc(instr, 1, instr.common.src1, SRC1);
538 558
539 // TODO(bunnei): RCPSS is a pretty rough approximation, this might cause problems if Pica 559 // TODO(bunnei): RCPSS is a pretty rough approximation, this might cause problems if Pica
@@ -544,7 +564,7 @@ void JitCompiler::Compile_RCP(Instruction instr) {
544 Compile_DestEnable(instr, SRC1); 564 Compile_DestEnable(instr, SRC1);
545} 565}
546 566
547void JitCompiler::Compile_RSQ(Instruction instr) { 567void JitShader::Compile_RSQ(Instruction instr) {
548 Compile_SwizzleSrc(instr, 1, instr.common.src1, SRC1); 568 Compile_SwizzleSrc(instr, 1, instr.common.src1, SRC1);
549 569
550 // TODO(bunnei): RSQRTSS is a pretty rough approximation, this might cause problems if Pica 570 // TODO(bunnei): RSQRTSS is a pretty rough approximation, this might cause problems if Pica
@@ -555,36 +575,41 @@ void JitCompiler::Compile_RSQ(Instruction instr) {
555 Compile_DestEnable(instr, SRC1); 575 Compile_DestEnable(instr, SRC1);
556} 576}
557 577
558void JitCompiler::Compile_NOP(Instruction instr) { 578void JitShader::Compile_NOP(Instruction instr) {
559} 579}
560 580
561void JitCompiler::Compile_END(Instruction instr) { 581void JitShader::Compile_END(Instruction instr) {
562 ABI_PopRegistersAndAdjustStack(ABI_ALL_CALLEE_SAVED, 8); 582 ABI_PopRegistersAndAdjustStack(ABI_ALL_CALLEE_SAVED, 8);
563 RET(); 583 RET();
564} 584}
565 585
566void JitCompiler::Compile_CALL(Instruction instr) { 586void JitShader::Compile_CALL(Instruction instr) {
567 unsigned offset = instr.flow_control.dest_offset; 587 // Push offset of the return
568 while (offset < (instr.flow_control.dest_offset + instr.flow_control.num_instructions)) { 588 PUSH(64, Imm32(instr.flow_control.dest_offset + instr.flow_control.num_instructions));
569 Compile_NextInstr(&offset); 589
570 } 590 // Call the subroutine
591 FixupBranch b = CALL();
592 fixup_branches.push_back({ b, instr.flow_control.dest_offset });
593
594 // Skip over the return offset that's on the stack
595 ADD(64, R(RSP), Imm32(8));
571} 596}
572 597
573void JitCompiler::Compile_CALLC(Instruction instr) { 598void JitShader::Compile_CALLC(Instruction instr) {
574 Compile_EvaluateCondition(instr); 599 Compile_EvaluateCondition(instr);
575 FixupBranch b = J_CC(CC_Z, true); 600 FixupBranch b = J_CC(CC_Z, true);
576 Compile_CALL(instr); 601 Compile_CALL(instr);
577 SetJumpTarget(b); 602 SetJumpTarget(b);
578} 603}
579 604
580void JitCompiler::Compile_CALLU(Instruction instr) { 605void JitShader::Compile_CALLU(Instruction instr) {
581 Compile_UniformCondition(instr); 606 Compile_UniformCondition(instr);
582 FixupBranch b = J_CC(CC_Z, true); 607 FixupBranch b = J_CC(CC_Z, true);
583 Compile_CALL(instr); 608 Compile_CALL(instr);
584 SetJumpTarget(b); 609 SetJumpTarget(b);
585} 610}
586 611
587void JitCompiler::Compile_CMP(Instruction instr) { 612void JitShader::Compile_CMP(Instruction instr) {
588 using Op = Instruction::Common::CompareOpType::Op; 613 using Op = Instruction::Common::CompareOpType::Op;
589 Op op_x = instr.common.compare_op.x; 614 Op op_x = instr.common.compare_op.x;
590 Op op_y = instr.common.compare_op.y; 615 Op op_y = instr.common.compare_op.y;
@@ -627,7 +652,7 @@ void JitCompiler::Compile_CMP(Instruction instr) {
627 SHR(64, R(COND1), Imm8(63)); 652 SHR(64, R(COND1), Imm8(63));
628} 653}
629 654
630void JitCompiler::Compile_MAD(Instruction instr) { 655void JitShader::Compile_MAD(Instruction instr) {
631 Compile_SwizzleSrc(instr, 1, instr.mad.src1, SRC1); 656 Compile_SwizzleSrc(instr, 1, instr.mad.src1, SRC1);
632 657
633 if (instr.opcode.Value().EffectiveOpCode() == OpCode::Id::MADI) { 658 if (instr.opcode.Value().EffectiveOpCode() == OpCode::Id::MADI) {
@@ -644,9 +669,8 @@ void JitCompiler::Compile_MAD(Instruction instr) {
644 Compile_DestEnable(instr, SRC1); 669 Compile_DestEnable(instr, SRC1);
645} 670}
646 671
647void JitCompiler::Compile_IF(Instruction instr) { 672void JitShader::Compile_IF(Instruction instr) {
648 ASSERT_MSG(instr.flow_control.dest_offset > *offset_ptr, "Backwards if-statements (%d -> %d) not supported", 673 Compile_Assert(instr.flow_control.dest_offset >= program_counter, "Backwards if-statements not supported");
649 *offset_ptr, instr.flow_control.dest_offset.Value());
650 674
651 // Evaluate the "IF" condition 675 // Evaluate the "IF" condition
652 if (instr.opcode.Value() == OpCode::Id::IFU) { 676 if (instr.opcode.Value() == OpCode::Id::IFU) {
@@ -676,10 +700,9 @@ void JitCompiler::Compile_IF(Instruction instr) {
676 SetJumpTarget(b2); 700 SetJumpTarget(b2);
677} 701}
678 702
679void JitCompiler::Compile_LOOP(Instruction instr) { 703void JitShader::Compile_LOOP(Instruction instr) {
680 ASSERT_MSG(instr.flow_control.dest_offset > *offset_ptr, "Backwards loops (%d -> %d) not supported", 704 Compile_Assert(instr.flow_control.dest_offset >= program_counter, "Backwards loops not supported");
681 *offset_ptr, instr.flow_control.dest_offset.Value()); 705 Compile_Assert(!looping, "Nested loops not supported");
682 ASSERT_MSG(!looping, "Nested loops not supported");
683 706
684 looping = true; 707 looping = true;
685 708
@@ -705,10 +728,7 @@ void JitCompiler::Compile_LOOP(Instruction instr) {
705 looping = false; 728 looping = false;
706} 729}
707 730
708void JitCompiler::Compile_JMP(Instruction instr) { 731void JitShader::Compile_JMP(Instruction instr) {
709 ASSERT_MSG(instr.flow_control.dest_offset > *offset_ptr, "Backwards jumps (%d -> %d) not supported",
710 *offset_ptr, instr.flow_control.dest_offset.Value());
711
712 if (instr.opcode.Value() == OpCode::Id::JMPC) 732 if (instr.opcode.Value() == OpCode::Id::JMPC)
713 Compile_EvaluateCondition(instr); 733 Compile_EvaluateCondition(instr);
714 else if (instr.opcode.Value() == OpCode::Id::JMPU) 734 else if (instr.opcode.Value() == OpCode::Id::JMPU)
@@ -718,30 +738,38 @@ void JitCompiler::Compile_JMP(Instruction instr) {
718 738
719 bool inverted_condition = (instr.opcode.Value() == OpCode::Id::JMPU) && 739 bool inverted_condition = (instr.opcode.Value() == OpCode::Id::JMPU) &&
720 (instr.flow_control.num_instructions & 1); 740 (instr.flow_control.num_instructions & 1);
741
721 FixupBranch b = J_CC(inverted_condition ? CC_Z : CC_NZ, true); 742 FixupBranch b = J_CC(inverted_condition ? CC_Z : CC_NZ, true);
743 fixup_branches.push_back({ b, instr.flow_control.dest_offset });
744}
722 745
723 Compile_Block(instr.flow_control.dest_offset); 746void JitShader::Compile_Block(unsigned end) {
747 while (program_counter < end) {
748 Compile_NextInstr();
749 }
750}
751
752void JitShader::Compile_Return() {
753 // Peek return offset on the stack and check if we're at that offset
754 MOV(64, R(RAX), MDisp(RSP, 8));
755 CMP(32, R(RAX), Imm32(program_counter));
724 756
757 // If so, jump back to before CALL
758 FixupBranch b = J_CC(CC_NZ, true);
759 RET();
725 SetJumpTarget(b); 760 SetJumpTarget(b);
726} 761}
727 762
728void JitCompiler::Compile_Block(unsigned end) { 763void JitShader::Compile_NextInstr() {
729 // Save current offset pointer 764 if (std::binary_search(return_offsets.begin(), return_offsets.end(), program_counter)) {
730 unsigned* prev_offset_ptr = offset_ptr; 765 Compile_Return();
731 unsigned offset = *prev_offset_ptr; 766 }
732 767
733 while (offset < end) 768 ASSERT_MSG(code_ptr[program_counter] == nullptr, "Tried to compile already compiled shader location!");
734 Compile_NextInstr(&offset); 769 code_ptr[program_counter] = GetCodePtr();
735 770
736 // Restore current offset pointer 771 Instruction instr = GetVertexShaderInstruction(program_counter++);
737 offset_ptr = prev_offset_ptr;
738 *offset_ptr = offset;
739}
740 772
741void JitCompiler::Compile_NextInstr(unsigned* offset) {
742 offset_ptr = offset;
743
744 Instruction instr = *(Instruction*)&g_state.vs.program_code[(*offset_ptr)++];
745 OpCode::Id opcode = instr.opcode.Value(); 773 OpCode::Id opcode = instr.opcode.Value();
746 auto instr_func = instr_table[static_cast<unsigned>(opcode)]; 774 auto instr_func = instr_table[static_cast<unsigned>(opcode)];
747 775
@@ -755,9 +783,35 @@ void JitCompiler::Compile_NextInstr(unsigned* offset) {
755 } 783 }
756} 784}
757 785
758CompiledShader* JitCompiler::Compile() { 786void JitShader::FindReturnOffsets() {
759 const u8* start = GetCodePtr(); 787 return_offsets.clear();
760 unsigned offset = g_state.regs.vs.main_offset; 788
789 for (size_t offset = 0; offset < g_state.vs.program_code.size(); ++offset) {
790 Instruction instr = GetVertexShaderInstruction(offset);
791
792 switch (instr.opcode.Value()) {
793 case OpCode::Id::CALL:
794 case OpCode::Id::CALLC:
795 case OpCode::Id::CALLU:
796 return_offsets.push_back(instr.flow_control.dest_offset + instr.flow_control.num_instructions);
797 break;
798 }
799 }
800
801 // Sort for efficient binary search later
802 std::sort(return_offsets.begin(), return_offsets.end());
803}
804
805void JitShader::Compile() {
806 // Reset flow control state
807 program = (CompiledShader*)GetCodePtr();
808 program_counter = 0;
809 looping = false;
810 code_ptr.fill(nullptr);
811 fixup_branches.clear();
812
813 // Find all `CALL` instructions and identify return locations
814 FindReturnOffsets();
761 815
762 // The stack pointer is 8 modulo 16 at the entry of a procedure 816 // The stack pointer is 8 modulo 16 at the entry of a procedure
763 ABI_PushRegistersAndAdjustStack(ABI_ALL_CALLEE_SAVED, 8); 817 ABI_PushRegistersAndAdjustStack(ABI_ALL_CALLEE_SAVED, 8);
@@ -780,21 +834,31 @@ CompiledShader* JitCompiler::Compile() {
780 MOV(PTRBITS, R(RAX), ImmPtr(&neg)); 834 MOV(PTRBITS, R(RAX), ImmPtr(&neg));
781 MOVAPS(NEGBIT, MatR(RAX)); 835 MOVAPS(NEGBIT, MatR(RAX));
782 836
783 looping = false; 837 // Jump to start of the shader program
838 JMPptr(R(ABI_PARAM2));
839
840 // Compile entire program
841 Compile_Block(static_cast<unsigned>(g_state.vs.program_code.size()));
784 842
785 while (offset < g_state.vs.program_code.size()) { 843 // Set the target for any incomplete branches now that the entire shader program has been emitted
786 Compile_NextInstr(&offset); 844 for (const auto& branch : fixup_branches) {
845 SetJumpTarget(branch.first, code_ptr[branch.second]);
787 } 846 }
788 847
789 return (CompiledShader*)start; 848 // Free memory that's no longer needed
790} 849 return_offsets.clear();
850 return_offsets.shrink_to_fit();
851 fixup_branches.clear();
852 fixup_branches.shrink_to_fit();
853
854 uintptr_t size = reinterpret_cast<uintptr_t>(GetCodePtr()) - reinterpret_cast<uintptr_t>(program);
855 ASSERT_MSG(size <= MAX_SHADER_SIZE, "Compiled a shader that exceeds the allocated size!");
791 856
792JitCompiler::JitCompiler() { 857 LOG_DEBUG(HW_GPU, "Compiled shader size=%d", size);
793 AllocCodeSpace(jit_cache_size);
794} 858}
795 859
796void JitCompiler::Clear() { 860JitShader::JitShader() {
797 ClearCodeSpace(); 861 AllocCodeSpace(MAX_SHADER_SIZE);
798} 862}
799 863
800} // namespace Shader 864} // namespace Shader
diff --git a/src/video_core/shader/shader_jit_x64.h b/src/video_core/shader/shader_jit_x64.h
index 5357c964b..cd6280ade 100644
--- a/src/video_core/shader/shader_jit_x64.h
+++ b/src/video_core/shader/shader_jit_x64.h
@@ -4,6 +4,9 @@
4 4
5#pragma once 5#pragma once
6 6
7#include <utility>
8#include <vector>
9
7#include <nihstro/shader_bytecode.h> 10#include <nihstro/shader_bytecode.h>
8 11
9#include "common/x64/emitter.h" 12#include "common/x64/emitter.h"
@@ -19,24 +22,22 @@ namespace Pica {
19 22
20namespace Shader { 23namespace Shader {
21 24
22/// Memory needed to be available to compile the next shader (otherwise, clear the cache) 25/// Memory allocated for each compiled shader (64Kb)
23constexpr size_t jit_shader_size = 1024 * 512; 26constexpr size_t MAX_SHADER_SIZE = 1024 * 64;
24/// Memory allocated for the JIT code space cache
25constexpr size_t jit_cache_size = 1024 * 1024 * 8;
26
27using CompiledShader = void(void* registers);
28 27
29/** 28/**
30 * This class implements the shader JIT compiler. It recompiles a Pica shader program into x86_64 29 * This class implements the shader JIT compiler. It recompiles a Pica shader program into x86_64
31 * code that can be executed on the host machine directly. 30 * code that can be executed on the host machine directly.
32 */ 31 */
33class JitCompiler : public Gen::XCodeBlock { 32class JitShader : public Gen::XCodeBlock {
34public: 33public:
35 JitCompiler(); 34 JitShader();
36 35
37 CompiledShader* Compile(); 36 void Run(void* registers, unsigned offset) const {
37 program(registers, code_ptr[offset]);
38 }
38 39
39 void Clear(); 40 void Compile();
40 41
41 void Compile_ADD(Instruction instr); 42 void Compile_ADD(Instruction instr);
42 void Compile_DP3(Instruction instr); 43 void Compile_DP3(Instruction instr);
@@ -66,8 +67,9 @@ public:
66 void Compile_MAD(Instruction instr); 67 void Compile_MAD(Instruction instr);
67 68
68private: 69private:
70
69 void Compile_Block(unsigned end); 71 void Compile_Block(unsigned end);
70 void Compile_NextInstr(unsigned* offset); 72 void Compile_NextInstr();
71 73
72 void Compile_SwizzleSrc(Instruction instr, unsigned src_num, SourceRegister src_reg, Gen::X64Reg dest); 74 void Compile_SwizzleSrc(Instruction instr, unsigned src_num, SourceRegister src_reg, Gen::X64Reg dest);
73 void Compile_DestEnable(Instruction instr, Gen::X64Reg dest); 75 void Compile_DestEnable(Instruction instr, Gen::X64Reg dest);
@@ -81,13 +83,39 @@ private:
81 void Compile_EvaluateCondition(Instruction instr); 83 void Compile_EvaluateCondition(Instruction instr);
82 void Compile_UniformCondition(Instruction instr); 84 void Compile_UniformCondition(Instruction instr);
83 85
86 /**
87 * Emits the code to conditionally return from a subroutine envoked by the `CALL` instruction.
88 */
89 void Compile_Return();
90
84 BitSet32 PersistentCallerSavedRegs(); 91 BitSet32 PersistentCallerSavedRegs();
85 92
86 /// Pointer to the variable that stores the current Pica code offset. Used to handle nested code blocks. 93 /**
87 unsigned* offset_ptr = nullptr; 94 * Assertion evaluated at compile-time, but only triggered if executed at runtime.
95 * @param msg Message to be logged if the assertion fails.
96 */
97 void Compile_Assert(bool condition, const char* msg);
98
99 /**
100 * Analyzes the entire shader program for `CALL` instructions before emitting any code,
101 * identifying the locations where a return needs to be inserted.
102 */
103 void FindReturnOffsets();
104
105 /// Mapping of Pica VS instructions to pointers in the emitted code
106 std::array<const u8*, 1024> code_ptr;
107
108 /// Offsets in code where a return needs to be inserted
109 std::vector<unsigned> return_offsets;
110
111 unsigned program_counter = 0; ///< Offset of the next instruction to decode
112 bool looping = false; ///< True if compiling a loop, used to check for nested loops
113
114 /// Branches that need to be fixed up once the entire shader program is compiled
115 std::vector<std::pair<Gen::FixupBranch, unsigned>> fixup_branches;
88 116
89 /// Set to true if currently in a loop, used to check for the existence of nested loops 117 using CompiledShader = void(void* registers, const u8* start_addr);
90 bool looping = false; 118 CompiledShader* program = nullptr;
91}; 119};
92 120
93} // Shader 121} // Shader