summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorGravatar bunnei2015-07-22 23:25:30 -0400
committerGravatar bunnei2015-08-15 18:01:07 -0400
commit094ae6fadb57883e25d412443fcae987ddf240ef (patch)
tree8817c9b0672bd02be3fa32a02f3f126f0ecfb664 /src
parentCommon: Added MurmurHash3 hash function for general-purpose use. (diff)
downloadyuzu-094ae6fadb57883e25d412443fcae987ddf240ef.tar.gz
yuzu-094ae6fadb57883e25d412443fcae987ddf240ef.tar.xz
yuzu-094ae6fadb57883e25d412443fcae987ddf240ef.zip
Shader: Initial implementation of x86_x64 JIT compiler for Pica vertex shaders.
- Config: Add an option for selecting to use shader JIT or interpreter. - Qt: Add a menu option for enabling/disabling the shader JIT.
Diffstat (limited to 'src')
-rw-r--r--src/citra/citra.cpp1
-rw-r--r--src/citra/config.cpp1
-rw-r--r--src/citra/default_ini.h4
-rw-r--r--src/citra_qt/config.cpp2
-rw-r--r--src/citra_qt/main.cpp8
-rw-r--r--src/citra_qt/main.h1
-rw-r--r--src/citra_qt/main.ui9
-rw-r--r--src/core/settings.h1
-rw-r--r--src/video_core/CMakeLists.txt10
-rw-r--r--src/video_core/pica.cpp3
-rw-r--r--src/video_core/shader/shader.cpp42
-rw-r--r--src/video_core/shader/shader.h3
-rw-r--r--src/video_core/shader/shader_jit.cpp36
-rw-r--r--src/video_core/shader/shader_jit.h85
-rw-r--r--src/video_core/shader/shader_jit_fake.cpp91
-rw-r--r--src/video_core/shader/shader_jit_x64.cpp669
-rw-r--r--src/video_core/video_core.cpp1
-rw-r--r--src/video_core/video_core.h3
18 files changed, 967 insertions, 3 deletions
diff --git a/src/citra/citra.cpp b/src/citra/citra.cpp
index 182646f4c..d6fcb66a5 100644
--- a/src/citra/citra.cpp
+++ b/src/citra/citra.cpp
@@ -71,6 +71,7 @@ int main(int argc, char **argv) {
71 EmuWindow_GLFW* emu_window = new EmuWindow_GLFW; 71 EmuWindow_GLFW* emu_window = new EmuWindow_GLFW;
72 72
73 VideoCore::g_hw_renderer_enabled = Settings::values.use_hw_renderer; 73 VideoCore::g_hw_renderer_enabled = Settings::values.use_hw_renderer;
74 VideoCore::g_shader_jit_enabled = Settings::values.use_shader_jit;
74 75
75 System::Init(emu_window); 76 System::Init(emu_window);
76 77
diff --git a/src/citra/config.cpp b/src/citra/config.cpp
index 2c1407a6f..8a98bda87 100644
--- a/src/citra/config.cpp
+++ b/src/citra/config.cpp
@@ -61,6 +61,7 @@ void Config::ReadValues() {
61 61
62 // Renderer 62 // Renderer
63 Settings::values.use_hw_renderer = glfw_config->GetBoolean("Renderer", "use_hw_renderer", false); 63 Settings::values.use_hw_renderer = glfw_config->GetBoolean("Renderer", "use_hw_renderer", false);
64 Settings::values.use_shader_jit = glfw_config->GetBoolean("Renderer", "use_shader_jit", true);
64 65
65 Settings::values.bg_red = (float)glfw_config->GetReal("Renderer", "bg_red", 1.0); 66 Settings::values.bg_red = (float)glfw_config->GetReal("Renderer", "bg_red", 1.0);
66 Settings::values.bg_green = (float)glfw_config->GetReal("Renderer", "bg_green", 1.0); 67 Settings::values.bg_green = (float)glfw_config->GetReal("Renderer", "bg_green", 1.0);
diff --git a/src/citra/default_ini.h b/src/citra/default_ini.h
index 1925bece8..7e5d49729 100644
--- a/src/citra/default_ini.h
+++ b/src/citra/default_ini.h
@@ -42,6 +42,10 @@ frame_skip =
42# 0 (default): Software, 1: Hardware 42# 0 (default): Software, 1: Hardware
43use_hw_renderer = 43use_hw_renderer =
44 44
45# Whether to use the Just-In-Time (JIT) compiler for shader emulation
46# 0 : Interpreter (slow), 1 (default): JIT (fast)
47use_shader_jit =
48
45# The clear color for the renderer. What shows up on the sides of the bottom screen. 49# The clear color for the renderer. What shows up on the sides of the bottom screen.
46# Must be in range of 0.0-1.0. Defaults to 1.0 for all. 50# Must be in range of 0.0-1.0. Defaults to 1.0 for all.
47bg_red = 51bg_red =
diff --git a/src/citra_qt/config.cpp b/src/citra_qt/config.cpp
index 5716634ee..a20351fb8 100644
--- a/src/citra_qt/config.cpp
+++ b/src/citra_qt/config.cpp
@@ -44,6 +44,7 @@ void Config::ReadValues() {
44 44
45 qt_config->beginGroup("Renderer"); 45 qt_config->beginGroup("Renderer");
46 Settings::values.use_hw_renderer = qt_config->value("use_hw_renderer", false).toBool(); 46 Settings::values.use_hw_renderer = qt_config->value("use_hw_renderer", false).toBool();
47 Settings::values.use_shader_jit = qt_config->value("use_shader_jit", true).toBool();
47 48
48 Settings::values.bg_red = qt_config->value("bg_red", 1.0).toFloat(); 49 Settings::values.bg_red = qt_config->value("bg_red", 1.0).toFloat();
49 Settings::values.bg_green = qt_config->value("bg_green", 1.0).toFloat(); 50 Settings::values.bg_green = qt_config->value("bg_green", 1.0).toFloat();
@@ -77,6 +78,7 @@ void Config::SaveValues() {
77 78
78 qt_config->beginGroup("Renderer"); 79 qt_config->beginGroup("Renderer");
79 qt_config->setValue("use_hw_renderer", Settings::values.use_hw_renderer); 80 qt_config->setValue("use_hw_renderer", Settings::values.use_hw_renderer);
81 qt_config->setValue("use_shader_jit", Settings::values.use_shader_jit);
80 82
81 // Cast to double because Qt's written float values are not human-readable 83 // Cast to double because Qt's written float values are not human-readable
82 qt_config->setValue("bg_red", (double)Settings::values.bg_red); 84 qt_config->setValue("bg_red", (double)Settings::values.bg_red);
diff --git a/src/citra_qt/main.cpp b/src/citra_qt/main.cpp
index 6b030c178..4c3edf87a 100644
--- a/src/citra_qt/main.cpp
+++ b/src/citra_qt/main.cpp
@@ -131,6 +131,9 @@ GMainWindow::GMainWindow() : emu_thread(nullptr)
131 ui.action_Use_Hardware_Renderer->setChecked(Settings::values.use_hw_renderer); 131 ui.action_Use_Hardware_Renderer->setChecked(Settings::values.use_hw_renderer);
132 SetHardwareRendererEnabled(ui.action_Use_Hardware_Renderer->isChecked()); 132 SetHardwareRendererEnabled(ui.action_Use_Hardware_Renderer->isChecked());
133 133
134 ui.action_Use_Shader_JIT->setChecked(Settings::values.use_shader_jit);
135 SetShaderJITEnabled(ui.action_Use_Shader_JIT->isChecked());
136
134 ui.action_Single_Window_Mode->setChecked(settings.value("singleWindowMode", true).toBool()); 137 ui.action_Single_Window_Mode->setChecked(settings.value("singleWindowMode", true).toBool());
135 ToggleWindowMode(); 138 ToggleWindowMode();
136 139
@@ -144,6 +147,7 @@ GMainWindow::GMainWindow() : emu_thread(nullptr)
144 connect(ui.action_Pause, SIGNAL(triggered()), this, SLOT(OnPauseGame())); 147 connect(ui.action_Pause, SIGNAL(triggered()), this, SLOT(OnPauseGame()));
145 connect(ui.action_Stop, SIGNAL(triggered()), this, SLOT(OnStopGame())); 148 connect(ui.action_Stop, SIGNAL(triggered()), this, SLOT(OnStopGame()));
146 connect(ui.action_Use_Hardware_Renderer, SIGNAL(triggered(bool)), this, SLOT(SetHardwareRendererEnabled(bool))); 149 connect(ui.action_Use_Hardware_Renderer, SIGNAL(triggered(bool)), this, SLOT(SetHardwareRendererEnabled(bool)));
150 connect(ui.action_Use_Shader_JIT, SIGNAL(triggered(bool)), this, SLOT(SetShaderJITEnabled(bool)));
147 connect(ui.action_Single_Window_Mode, SIGNAL(triggered(bool)), this, SLOT(ToggleWindowMode())); 151 connect(ui.action_Single_Window_Mode, SIGNAL(triggered(bool)), this, SLOT(ToggleWindowMode()));
148 connect(ui.action_Hotkeys, SIGNAL(triggered()), this, SLOT(OnOpenHotkeysDialog())); 152 connect(ui.action_Hotkeys, SIGNAL(triggered()), this, SLOT(OnOpenHotkeysDialog()));
149 153
@@ -331,6 +335,10 @@ void GMainWindow::SetHardwareRendererEnabled(bool enabled) {
331 VideoCore::g_hw_renderer_enabled = enabled; 335 VideoCore::g_hw_renderer_enabled = enabled;
332} 336}
333 337
338void GMainWindow::SetShaderJITEnabled(bool enabled) {
339 VideoCore::g_shader_jit_enabled = enabled;
340}
341
334void GMainWindow::ToggleWindowMode() { 342void GMainWindow::ToggleWindowMode() {
335 if (ui.action_Single_Window_Mode->isChecked()) { 343 if (ui.action_Single_Window_Mode->isChecked()) {
336 // Render in the main window... 344 // Render in the main window...
diff --git a/src/citra_qt/main.h b/src/citra_qt/main.h
index 9fe9e0c9c..61114a04d 100644
--- a/src/citra_qt/main.h
+++ b/src/citra_qt/main.h
@@ -70,6 +70,7 @@ private slots:
70 void OnConfigure(); 70 void OnConfigure();
71 void OnDisplayTitleBars(bool); 71 void OnDisplayTitleBars(bool);
72 void SetHardwareRendererEnabled(bool); 72 void SetHardwareRendererEnabled(bool);
73 void SetShaderJITEnabled(bool);
73 void ToggleWindowMode(); 74 void ToggleWindowMode();
74 75
75private: 76private:
diff --git a/src/citra_qt/main.ui b/src/citra_qt/main.ui
index 9a809ee6c..b2ce8167d 100644
--- a/src/citra_qt/main.ui
+++ b/src/citra_qt/main.ui
@@ -66,6 +66,7 @@
66 <addaction name="action_Stop"/> 66 <addaction name="action_Stop"/>
67 <addaction name="separator"/> 67 <addaction name="separator"/>
68 <addaction name="action_Use_Hardware_Renderer"/> 68 <addaction name="action_Use_Hardware_Renderer"/>
69 <addaction name="action_Use_Shader_JIT"/>
69 <addaction name="action_Configure"/> 70 <addaction name="action_Configure"/>
70 </widget> 71 </widget>
71 <widget class="QMenu" name="menu_View"> 72 <widget class="QMenu" name="menu_View">
@@ -153,6 +154,14 @@
153 <string>Use Hardware Renderer</string> 154 <string>Use Hardware Renderer</string>
154 </property> 155 </property>
155 </action> 156 </action>
157 <action name="action_Use_Shader_JIT">
158 <property name="checkable">
159 <bool>true</bool>
160 </property>
161 <property name="text">
162 <string>Use Shader JIT</string>
163 </property>
164 </action>
156 <action name="action_Configure"> 165 <action name="action_Configure">
157 <property name="text"> 166 <property name="text">
158 <string>Configure ...</string> 167 <string>Configure ...</string>
diff --git a/src/core/settings.h b/src/core/settings.h
index 2775ee257..6ca0e1afc 100644
--- a/src/core/settings.h
+++ b/src/core/settings.h
@@ -53,6 +53,7 @@ struct Values {
53 53
54 // Renderer 54 // Renderer
55 bool use_hw_renderer; 55 bool use_hw_renderer;
56 bool use_shader_jit;
56 57
57 float bg_red; 58 float bg_red;
58 float bg_green; 59 float bg_green;
diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt
index 2b859a077..544ed0297 100644
--- a/src/video_core/CMakeLists.txt
+++ b/src/video_core/CMakeLists.txt
@@ -13,6 +13,7 @@ set(SRCS
13 rasterizer.cpp 13 rasterizer.cpp
14 shader/shader.cpp 14 shader/shader.cpp
15 shader/shader_interpreter.cpp 15 shader/shader_interpreter.cpp
16 shader/shader_jit.cpp
16 utils.cpp 17 utils.cpp
17 video_core.cpp 18 video_core.cpp
18 ) 19 )
@@ -38,10 +39,19 @@ set(HEADERS
38 renderer_base.h 39 renderer_base.h
39 shader/shader.h 40 shader/shader.h
40 shader/shader_interpreter.h 41 shader/shader_interpreter.h
42 shader/shader_jit.h
41 utils.h 43 utils.h
42 video_core.h 44 video_core.h
43 ) 45 )
44 46
47if(_M_X86_64)
48 set(SRCS ${SRCS}
49 shader/shader_jit_x64.cpp)
50else()
51 set(SRCS ${SRCS}
52 shader/shader_jit_fake.cpp)
53endif()
54
45create_directory_groups(${SRCS} ${HEADERS}) 55create_directory_groups(${SRCS} ${HEADERS})
46 56
47add_library(video_core STATIC ${SRCS} ${HEADERS}) 57add_library(video_core STATIC ${SRCS} ${HEADERS})
diff --git a/src/video_core/pica.cpp b/src/video_core/pica.cpp
index 17cb66780..c73a8178e 100644
--- a/src/video_core/pica.cpp
+++ b/src/video_core/pica.cpp
@@ -6,6 +6,7 @@
6#include <unordered_map> 6#include <unordered_map>
7 7
8#include "pica.h" 8#include "pica.h"
9#include "shader/shader.h"
9 10
10namespace Pica { 11namespace Pica {
11 12
@@ -84,6 +85,8 @@ void Init() {
84} 85}
85 86
86void Shutdown() { 87void Shutdown() {
88 Shader::Shutdown();
89
87 memset(&g_state, 0, sizeof(State)); 90 memset(&g_state, 0, sizeof(State));
88} 91}
89 92
diff --git a/src/video_core/shader/shader.cpp b/src/video_core/shader/shader.cpp
index 13e22cb53..fa1f7cafe 100644
--- a/src/video_core/shader/shader.cpp
+++ b/src/video_core/shader/shader.cpp
@@ -2,21 +2,52 @@
2// Licensed under GPLv2 or any later version 2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included. 3// Refer to the license.txt file included.
4 4
5#include "common/logging/log.h" 5#include <memory>
6#include <unordered_map>
7
8#include "common/hash.h"
9#include "common/make_unique.h"
6#include "common/profiler.h" 10#include "common/profiler.h"
7 11
8#include "video_core/debug_utils/debug_utils.h" 12#include "video_core/debug_utils/debug_utils.h"
9#include "video_core/pica.h" 13#include "video_core/pica.h"
14#include "video_core/video_core.h"
10 15
11#include "shader.h" 16#include "shader.h"
12#include "shader_interpreter.h" 17#include "shader_interpreter.h"
18#include "shader_jit.h"
13 19
14namespace Pica { 20namespace Pica {
15 21
16namespace Shader { 22namespace Shader {
17 23
24#ifdef ARCHITECTURE_x86_64
25
26static std::unordered_map<u64, CompiledShader*> shader_map;
27static JitCompiler jit;
28static CompiledShader* jit_shader;
29
30#endif // ARCHITECTURE_x86_64
31
18void Setup(UnitState& state) { 32void Setup(UnitState& state) {
19 // TODO(bunnei): This will be used by the JIT in a subsequent patch 33#ifdef ARCHITECTURE_x86_64
34 if (VideoCore::g_shader_jit_enabled) {
35 u64 cache_key = (Common::ComputeHash64(&g_state.vs.program_code, sizeof(g_state.vs.program_code)) ^
36 Common::ComputeHash64(&g_state.vs.swizzle_data, sizeof(g_state.vs.swizzle_data)) ^
37 g_state.regs.vs.main_offset);
38
39 auto iter = shader_map.find(cache_key);
40 if (iter != shader_map.end()) {
41 jit_shader = iter->second;
42 } else {
43 jit_shader = jit.Compile();
44 shader_map.emplace(cache_key, jit_shader);
45 }
46 }
47}
48
49void Shutdown() {
50 shader_map.clear();
20} 51}
21 52
22static Common::Profiling::TimingCategory shader_category("Vertex Shader"); 53static Common::Profiling::TimingCategory shader_category("Vertex Shader");
@@ -54,7 +85,14 @@ OutputVertex Run(UnitState& state, const InputVertex& input, int num_attributes)
54 state.conditional_code[0] = false; 85 state.conditional_code[0] = false;
55 state.conditional_code[1] = false; 86 state.conditional_code[1] = false;
56 87
88#ifdef ARCHITECTURE_x86_64
89 if (VideoCore::g_shader_jit_enabled)
90 jit_shader(&state);
91 else
92 RunInterpreter(state);
93#else
57 RunInterpreter(state); 94 RunInterpreter(state);
95#endif
58 96
59#if PICA_DUMP_SHADERS 97#if PICA_DUMP_SHADERS
60 DebugUtils::DumpShader(setup.program_code.data(), state.debug.max_offset, setup.swizzle_data.data(), 98 DebugUtils::DumpShader(setup.program_code.data(), state.debug.max_offset, setup.swizzle_data.data(),
diff --git a/src/video_core/shader/shader.h b/src/video_core/shader/shader.h
index 38c00768d..5825e9983 100644
--- a/src/video_core/shader/shader.h
+++ b/src/video_core/shader/shader.h
@@ -149,6 +149,9 @@ struct UnitState {
149 */ 149 */
150void Setup(UnitState& state); 150void Setup(UnitState& state);
151 151
152/// Performs any cleanup when the emulator is shutdown
153void Shutdown();
154
152/** 155/**
153 * Runs the currently setup shader 156 * Runs the currently setup shader
154 * @param state Shader unit state, must be setup per shader and per shader unit 157 * @param state Shader unit state, must be setup per shader and per shader unit
diff --git a/src/video_core/shader/shader_jit.cpp b/src/video_core/shader/shader_jit.cpp
new file mode 100644
index 000000000..69fb7f6be
--- /dev/null
+++ b/src/video_core/shader/shader_jit.cpp
@@ -0,0 +1,36 @@
1// Copyright 2015 Citra Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "video_core/pica.h"
6
7#include "shader.h"
8#include "shader_jit.h"
9
10namespace Pica {
11
12namespace Shader {
13
14JitShader::JitShader() : jitted(nullptr) {
15}
16
17void JitShader::DoJit(JitCompiler& jit) {
18 jitted = jit.Compile();
19}
20
21void JitShader::Run(UnitState& state) {
22 if (jitted)
23 jitted(&state);
24}
25
26JitCompiler::JitCompiler() {
27 AllocCodeSpace(1024 * 1024 * 4);
28}
29
30void JitCompiler::Clear() {
31 ClearCodeSpace();
32}
33
34} // namespace Shader
35
36} // namespace Pica
diff --git a/src/video_core/shader/shader_jit.h b/src/video_core/shader/shader_jit.h
new file mode 100644
index 000000000..f05b64a92
--- /dev/null
+++ b/src/video_core/shader/shader_jit.h
@@ -0,0 +1,85 @@
1// Copyright 2015 Citra Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <nihstro/shader_bytecode.h>
8
9#if defined(_M_X86_64)
10#include "common/x64_emitter.h"
11#else
12#include "common/fake_emitter.h"
13#endif
14
15#include "video_core/pica.h"
16
17#include "shader.h"
18
19using nihstro::Instruction;
20using nihstro::OpCode;
21using nihstro::SwizzlePattern;
22
23namespace Pica {
24
25namespace Shader {
26
27using CompiledShader = void(void* state);
28
29/**
30 * This class implements the shader JIT compiler. It recompiles a Pica shader program into x86_64
31 * code that can be executed on the host machine directly.
32 */
33class JitCompiler : public Gen::XCodeBlock {
34public:
35 JitCompiler();
36
37 CompiledShader* Compile();
38
39 void Clear();
40
41 void Compile_ADD(Instruction instr);
42 void Compile_DP3(Instruction instr);
43 void Compile_DP4(Instruction instr);
44 void Compile_MUL(Instruction instr);
45 void Compile_FLR(Instruction instr);
46 void Compile_MAX(Instruction instr);
47 void Compile_MIN(Instruction instr);
48 void Compile_RCP(Instruction instr);
49 void Compile_RSQ(Instruction instr);
50 void Compile_MOVA(Instruction instr);
51 void Compile_MOV(Instruction instr);
52 void Compile_SLTI(Instruction instr);
53 void Compile_NOP(Instruction instr);
54 void Compile_END(Instruction instr);
55 void Compile_CALL(Instruction instr);
56 void Compile_CALLC(Instruction instr);
57 void Compile_CALLU(Instruction instr);
58 void Compile_IF(Instruction instr);
59 void Compile_LOOP(Instruction instr);
60 void Compile_JMP(Instruction instr);
61 void Compile_CMP(Instruction instr);
62 void Compile_MAD(Instruction instr);
63
64private:
65 void Compile_Block(unsigned stop);
66 void Compile_NextInstr(unsigned* offset);
67
68#if defined(_M_X86_64)
69 void Compile_SwizzleSrc(Instruction instr, unsigned src_num, SourceRegister src_reg, Gen::X64Reg dest);
70 void Compile_DestEnable(Instruction instr, Gen::X64Reg dest);
71
72 void Compile_EvaluateCondition(Instruction instr);
73 void Compile_UniformCondition(Instruction instr);
74#endif
75
76 /// Pointer to the variable that stores the current Pica code offset. Used to handle nested code blocks.
77 unsigned* offset_ptr = nullptr;
78
79 bool done = false;
80 bool looping = false;
81};
82
83} // Shader
84
85} // Pica
diff --git a/src/video_core/shader/shader_jit_fake.cpp b/src/video_core/shader/shader_jit_fake.cpp
new file mode 100644
index 000000000..e1e79b733
--- /dev/null
+++ b/src/video_core/shader/shader_jit_fake.cpp
@@ -0,0 +1,91 @@
1// Copyright 2015 Citra Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/fake_emitter.h"
6
7#include "video_core/shader/shader.h"
8#include "video_core/shader/shader_jit.h"
9
10namespace Pica {
11
12namespace Shader {
13
14using namespace FakeGen;
15
16void Jit::Comp_ADD(Instruction instr) {
17}
18
19void Jit::Comp_DP3(Instruction instr) {
20}
21
22void Jit::Comp_DP4(Instruction instr) {
23}
24
25void Jit::Comp_MUL(Instruction instr) {
26}
27
28void Jit::Comp_FLR(Instruction instr) {
29}
30
31void Jit::Comp_MAX(Instruction instr) {
32}
33
34void Jit::Comp_MIN(Instruction instr) {
35}
36
37void Jit::Comp_MOVA(Instruction instr) {
38}
39
40void Jit::Comp_MOV(Instruction instr) {
41}
42
43void Jit::Comp_SLTI(Instruction instr) {
44}
45
46void Jit::Comp_RCP(Instruction instr) {
47}
48
49void Jit::Comp_RSQ(Instruction instr) {
50}
51
52void Jit::Comp_NOP(Instruction instr) {
53}
54
55void Jit::Comp_END(Instruction instr) {
56}
57
58void Jit::Comp_CALL(Instruction instr) {
59}
60
61void Jit::Comp_CALLC(Instruction instr) {
62}
63
64void Jit::Comp_CALLU(Instruction instr) {
65}
66
67void Jit::Comp_CMP(Instruction instr) {
68}
69
70void Jit::Comp_MAD(Instruction instr) {
71}
72
73void Jit::Comp_IF(Instruction instr) {
74}
75
76void Jit::Comp_LOOP(Instruction instr) {
77}
78
79void Jit::Comp_JMP(Instruction instr) {
80}
81
82void Jit::Comp_NextInstr(unsigned* offset) {
83}
84
85CompiledShader Jit::Compile() {
86 return nullptr;
87}
88
89} // namespace Shader
90
91} // namespace Pica
diff --git a/src/video_core/shader/shader_jit_x64.cpp b/src/video_core/shader/shader_jit_x64.cpp
new file mode 100644
index 000000000..00c57afec
--- /dev/null
+++ b/src/video_core/shader/shader_jit_x64.cpp
@@ -0,0 +1,669 @@
1// Copyright 2015 Citra Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <smmintrin.h>
6
7#include "common/abi.h"
8#include "common/cpu_detect.h"
9#include "common/x64_emitter.h"
10
11#include "shader.h"
12#include "shader_jit.h"
13
14namespace Pica {
15
16namespace Shader {
17
18using namespace Gen;
19
20typedef void (JitCompiler::*JitFunction)(Instruction instr);
21
22const JitFunction instr_table[64] = {
23 &JitCompiler::Compile_ADD, // add
24 &JitCompiler::Compile_DP3, // dp3
25 &JitCompiler::Compile_DP4, // dp4
26 nullptr, // dph
27 nullptr, // unknown
28 nullptr, // ex2
29 nullptr, // lg2
30 nullptr, // unknown
31 &JitCompiler::Compile_MUL, // mul
32 nullptr, // lge
33 nullptr, // slt
34 &JitCompiler::Compile_FLR, // flr
35 &JitCompiler::Compile_MAX, // max
36 &JitCompiler::Compile_MIN, // min
37 &JitCompiler::Compile_RCP, // rcp
38 &JitCompiler::Compile_RSQ, // rsq
39 nullptr, // unknown
40 nullptr, // unknown
41 &JitCompiler::Compile_MOVA, // mova
42 &JitCompiler::Compile_MOV, // mov
43 nullptr, // unknown
44 nullptr, // unknown
45 nullptr, // unknown
46 nullptr, // unknown
47 nullptr, // dphi
48 nullptr, // unknown
49 nullptr, // sgei
50 &JitCompiler::Compile_SLTI, // slti
51 nullptr, // unknown
52 nullptr, // unknown
53 nullptr, // unknown
54 nullptr, // unknown
55 nullptr, // unknown
56 &JitCompiler::Compile_NOP, // nop
57 &JitCompiler::Compile_END, // end
58 nullptr, // break
59 &JitCompiler::Compile_CALL, // call
60 &JitCompiler::Compile_CALLC, // callc
61 &JitCompiler::Compile_CALLU, // callu
62 &JitCompiler::Compile_IF, // ifu
63 &JitCompiler::Compile_IF, // ifc
64 &JitCompiler::Compile_LOOP, // loop
65 nullptr, // emit
66 nullptr, // sete
67 &JitCompiler::Compile_JMP, // jmpc
68 &JitCompiler::Compile_JMP, // jmpu
69 &JitCompiler::Compile_CMP, // cmp
70 &JitCompiler::Compile_CMP, // cmp
71 &JitCompiler::Compile_MAD, // madi
72 &JitCompiler::Compile_MAD, // madi
73 &JitCompiler::Compile_MAD, // madi
74 &JitCompiler::Compile_MAD, // madi
75 &JitCompiler::Compile_MAD, // madi
76 &JitCompiler::Compile_MAD, // madi
77 &JitCompiler::Compile_MAD, // madi
78 &JitCompiler::Compile_MAD, // madi
79 &JitCompiler::Compile_MAD, // mad
80 &JitCompiler::Compile_MAD, // mad
81 &JitCompiler::Compile_MAD, // mad
82 &JitCompiler::Compile_MAD, // mad
83 &JitCompiler::Compile_MAD, // mad
84 &JitCompiler::Compile_MAD, // mad
85 &JitCompiler::Compile_MAD, // mad
86 &JitCompiler::Compile_MAD, // mad
87};
88
89// The following is used to alias some commonly used registers. Generally, RAX-RDX and XMM0-XMM3 can
90// be used as scratch registers within a compiler function. The other registers have designated
91// purposes, as documented below:
92
93/// Pointer to the uniform memory
94static const X64Reg UNIFORMS = R10;
95/// The two 32-bit VS address offset registers set by the MOVA instruction
96static const X64Reg ADDROFFS_REG = R11;
97/// VS loop count register
98static const X64Reg LOOPCOUNT_REG = R12;
99/// Current VS loop iteration number (we could probably use LOOPCOUNT_REG, but this quicker)
100static const X64Reg LOOPCOUNT = RSI;
101/// Number to increment LOOPCOUNT_REG by on each loop iteration
102static const X64Reg LOOPINC = RDI;
103/// Result of the previous CMP instruction for the X-component comparison
104static const X64Reg COND0 = R13;
105/// Result of the previous CMP instruction for the Y-component comparison
106static const X64Reg COND1 = R14;
107/// Pointer to the UnitState instance for the current VS unit
108static const X64Reg STATE = R15;
109/// SIMD scratch register
110static const X64Reg SCRATCH = XMM0;
111/// Loaded with the first swizzled source register, otherwise can be used as a scratch register
112static const X64Reg SRC1 = XMM1;
113/// Loaded with the second swizzled source register, otherwise can be used as a scratch register
114static const X64Reg SRC2 = XMM2;
115/// Loaded with the third swizzled source register, otherwise can be used as a scratch register
116static const X64Reg SRC3 = XMM3;
117/// Constant vector of [1.0f, 1.0f, 1.0f, 1.0f], used to efficiently set a vector to one
118static const X64Reg ONE = XMM14;
119/// Constant vector of [-0.f, -0.f, -0.f, -0.f], used to efficiently negate a vector with XOR
120static const X64Reg NEGBIT = XMM15;
121
122/// Raw constant for the source register selector that indicates no swizzling is performed
123static const u8 NO_SRC_REG_SWIZZLE = 0x1b;
124/// Raw constant for the destination register enable mask that indicates all components are enabled
125static const u8 NO_DEST_REG_MASK = 0xf;
126
127/**
128 * Loads and swizzles a source register into the specified XMM register.
129 * @param instr VS instruction, used for determining how to load the source register
130 * @param src_num Number indicating which source register to load (1 = src1, 2 = src2, 3 = src3)
131 * @param src_reg SourceRegister object corresponding to the source register to load
132 * @param dest Destination XMM register to store the loaded, swizzled source register
133 */
134void JitCompiler::Compile_SwizzleSrc(Instruction instr, unsigned src_num, SourceRegister src_reg, X64Reg dest) {
135 X64Reg src_ptr;
136 std::size_t src_offset;
137
138 if (src_reg.GetRegisterType() == RegisterType::FloatUniform) {
139 src_ptr = UNIFORMS;
140 src_offset = src_reg.GetIndex() * sizeof(float24) * 4;
141 } else {
142 src_ptr = STATE;
143 src_offset = UnitState::InputOffset(src_reg);
144 }
145
146 unsigned operand_desc_id;
147 if (instr.opcode.Value().EffectiveOpCode() == OpCode::Id::MAD ||
148 instr.opcode.Value().EffectiveOpCode() == OpCode::Id::MADI) {
149 // The MAD and MADI instructions do not use the address offset registers, so loading the
150 // source is a bit simpler here
151
152 operand_desc_id = instr.mad.operand_desc_id;
153
154 // Load the source
155 MOVAPS(dest, MDisp(src_ptr, src_offset));
156 } else {
157 operand_desc_id = instr.common.operand_desc_id;
158
159 const bool is_inverted = (0 != (instr.opcode.Value().GetInfo().subtype & OpCode::Info::SrcInversed));
160 unsigned offset_src = is_inverted ? 2 : 1;
161
162 if (src_num == offset_src && instr.common.address_register_index != 0) {
163 switch (instr.common.address_register_index) {
164 case 1: // address offset 1
165 MOV(32, R(RBX), R(ADDROFFS_REG));
166 break;
167 case 2: // address offset 2
168 MOV(64, R(RBX), R(ADDROFFS_REG));
169 SHR(64, R(RBX), Imm8(32));
170 break;
171 case 3: // adddress offet 3
172 MOV(64, R(RBX), R(LOOPCOUNT_REG));
173 break;
174 default:
175 UNREACHABLE();
176 break;
177 }
178
179 MOVAPS(dest, MComplex(src_ptr, RBX, 1, src_offset));
180 } else {
181 // Load the source
182 MOVAPS(dest, MDisp(src_ptr, src_offset));
183 }
184 }
185
186 SwizzlePattern swiz = { g_state.vs.swizzle_data[operand_desc_id] };
187
188 // Generate instructions for source register swizzling as needed
189 u8 sel = swiz.GetRawSelector(src_num);
190 if (sel != NO_SRC_REG_SWIZZLE) {
191 // Selector component order needs to be reversed for the SHUFPS instruction
192 sel = ((sel & 0xc0) >> 6) | ((sel & 3) << 6) | ((sel & 0xc) << 2) | ((sel & 0x30) >> 2);
193
194 // Shuffle inputs for swizzle
195 SHUFPS(dest, R(dest), sel);
196 }
197
198 // If the source register should be negated, flip the negative bit using XOR
199 const bool negate[] = { swiz.negate_src1, swiz.negate_src2, swiz.negate_src3 };
200 if (negate[src_num - 1]) {
201 XORPS(dest, R(NEGBIT));
202 }
203}
204
205void JitCompiler::Compile_DestEnable(Instruction instr,X64Reg src) {
206 DestRegister dest;
207 unsigned operand_desc_id;
208 if (instr.opcode.Value().EffectiveOpCode() == OpCode::Id::MAD ||
209 instr.opcode.Value().EffectiveOpCode() == OpCode::Id::MADI) {
210 operand_desc_id = instr.mad.operand_desc_id;
211 dest = instr.mad.dest.Value();
212 } else {
213 operand_desc_id = instr.common.operand_desc_id;
214 dest = instr.common.dest.Value();
215 }
216
217 SwizzlePattern swiz = { g_state.vs.swizzle_data[operand_desc_id] };
218
219 // If all components are enabled, write the result to the destination register
220 if (swiz.dest_mask == NO_DEST_REG_MASK) {
221 // Store dest back to memory
222 MOVAPS(MDisp(STATE, UnitState::OutputOffset(dest)), src);
223
224 } else {
225 // Not all components are enabled, so mask the result when storing to the destination register...
226 MOVAPS(SCRATCH, MDisp(STATE, UnitState::OutputOffset(dest)));
227
228 if (Common::cpu_info.bSSE4_1) {
229 u8 mask = ((swiz.dest_mask & 1) << 3) | ((swiz.dest_mask & 8) >> 3) | ((swiz.dest_mask & 2) << 1) | ((swiz.dest_mask & 4) >> 1);
230 BLENDPS(SCRATCH, R(src), mask);
231 } else {
232 MOVAPS(XMM4, R(src));
233 UNPCKHPS(XMM4, R(SCRATCH)); // Unpack X/Y components of source and destination
234 UNPCKLPS(SCRATCH, R(src)); // Unpack Z/W components of source and destination
235
236 // Compute selector to selectively copy source components to destination for SHUFPS instruction
237 u8 sel = ((swiz.DestComponentEnabled(0) ? 1 : 0) << 0) |
238 ((swiz.DestComponentEnabled(1) ? 3 : 2) << 2) |
239 ((swiz.DestComponentEnabled(2) ? 0 : 1) << 4) |
240 ((swiz.DestComponentEnabled(3) ? 2 : 3) << 6);
241 SHUFPS(SCRATCH, R(XMM4), sel);
242 }
243
244 // Store dest back to memory
245 MOVAPS(MDisp(STATE, UnitState::OutputOffset(dest)), SCRATCH);
246 }
247}
248
249void JitCompiler::Compile_EvaluateCondition(Instruction instr) {
250 // Note: NXOR is used below to check for equality
251 switch (instr.flow_control.op) {
252 case Instruction::FlowControlType::Or:
253 MOV(32, R(RAX), R(COND0));
254 MOV(32, R(RBX), R(COND1));
255 XOR(32, R(RAX), Imm32(instr.flow_control.refx.Value() ^ 1));
256 XOR(32, R(RBX), Imm32(instr.flow_control.refy.Value() ^ 1));
257 OR(32, R(RAX), R(RBX));
258 break;
259
260 case Instruction::FlowControlType::And:
261 MOV(32, R(RAX), R(COND0));
262 MOV(32, R(RBX), R(COND1));
263 XOR(32, R(RAX), Imm32(instr.flow_control.refx.Value() ^ 1));
264 XOR(32, R(RBX), Imm32(instr.flow_control.refy.Value() ^ 1));
265 AND(32, R(RAX), R(RBX));
266 break;
267
268 case Instruction::FlowControlType::JustX:
269 MOV(32, R(RAX), R(COND0));
270 XOR(32, R(RAX), Imm32(instr.flow_control.refx.Value() ^ 1));
271 break;
272
273 case Instruction::FlowControlType::JustY:
274 MOV(32, R(RAX), R(COND1));
275 XOR(32, R(RAX), Imm32(instr.flow_control.refy.Value() ^ 1));
276 break;
277 }
278}
279
280void JitCompiler::Compile_UniformCondition(Instruction instr) {
281 int offset = offsetof(decltype(g_state.vs.uniforms), b) + (instr.flow_control.bool_uniform_id * sizeof(bool));
282 CMP(sizeof(bool) * 8, MDisp(UNIFORMS, offset), Imm8(0));
283}
284
285void JitCompiler::Compile_ADD(Instruction instr) {
286 Compile_SwizzleSrc(instr, 1, instr.common.src1, SRC1);
287 Compile_SwizzleSrc(instr, 2, instr.common.src2, SRC2);
288 ADDPS(SRC1, R(SRC2));
289 Compile_DestEnable(instr, SRC1);
290}
291
292void JitCompiler::Compile_DP3(Instruction instr) {
293 Compile_SwizzleSrc(instr, 1, instr.common.src1, SRC1);
294 Compile_SwizzleSrc(instr, 2, instr.common.src2, SRC2);
295
296 if (Common::cpu_info.bSSE4_1) {
297 DPPS(SRC1, R(SRC2), 0x7f);
298 } else {
299 MULPS(SRC1, R(SRC2));
300
301 MOVAPS(SRC2, R(SRC1));
302 SHUFPS(SRC2, R(SRC2), _MM_SHUFFLE(1, 1, 1, 1));
303
304 MOVAPS(SRC3, R(SRC1));
305 SHUFPS(SRC3, R(SRC3), _MM_SHUFFLE(2, 2, 2, 2));
306
307 SHUFPS(SRC1, R(SRC1), _MM_SHUFFLE(0, 0, 0, 0));
308 ADDPS(SRC1, R(SRC2));
309 ADDPS(SRC1, R(SRC3));
310 }
311
312 Compile_DestEnable(instr, SRC1);
313}
314
315void JitCompiler::Compile_DP4(Instruction instr) {
316 Compile_SwizzleSrc(instr, 1, instr.common.src1, SRC1);
317 Compile_SwizzleSrc(instr, 2, instr.common.src2, SRC2);
318
319 if (Common::cpu_info.bSSE4_1) {
320 DPPS(SRC1, R(SRC2), 0xff);
321 } else {
322 MULPS(SRC1, R(SRC2));
323
324 MOVAPS(SRC2, R(SRC1));
325 SHUFPS(SRC1, R(SRC1), _MM_SHUFFLE(2, 3, 0, 1)); // XYZW -> ZWXY
326 ADDPS(SRC1, R(SRC2));
327
328 MOVAPS(SRC2, R(SRC1));
329 SHUFPS(SRC1, R(SRC1), _MM_SHUFFLE(0, 1, 2, 3)); // XYZW -> WZYX
330 ADDPS(SRC1, R(SRC2));
331 }
332
333 Compile_DestEnable(instr, SRC1);
334}
335
336void JitCompiler::Compile_MUL(Instruction instr) {
337 Compile_SwizzleSrc(instr, 1, instr.common.src1, SRC1);
338 Compile_SwizzleSrc(instr, 2, instr.common.src2, SRC2);
339 MULPS(SRC1, R(SRC2));
340 Compile_DestEnable(instr, SRC1);
341}
342
343void JitCompiler::Compile_FLR(Instruction instr) {
344 Compile_SwizzleSrc(instr, 1, instr.common.src1, SRC1);
345
346 if (Common::cpu_info.bSSE4_1) {
347 ROUNDFLOORPS(SRC1, R(SRC1));
348 } else {
349 CVTPS2DQ(SRC1, R(SRC1));
350 CVTDQ2PS(SRC1, R(SRC1));
351 }
352
353 Compile_DestEnable(instr, SRC1);
354}
355
356void JitCompiler::Compile_MAX(Instruction instr) {
357 Compile_SwizzleSrc(instr, 1, instr.common.src1, SRC1);
358 Compile_SwizzleSrc(instr, 2, instr.common.src2, SRC2);
359 MAXPS(SRC1, R(SRC2));
360 Compile_DestEnable(instr, SRC1);
361}
362
363void JitCompiler::Compile_MIN(Instruction instr) {
364 Compile_SwizzleSrc(instr, 1, instr.common.src1, SRC1);
365 Compile_SwizzleSrc(instr, 2, instr.common.src2, SRC2);
366 MINPS(SRC1, R(SRC2));
367 Compile_DestEnable(instr, SRC1);
368}
369
370void JitCompiler::Compile_MOVA(Instruction instr) {
371 SwizzlePattern swiz = { g_state.vs.swizzle_data[instr.common.operand_desc_id] };
372
373 if (!swiz.DestComponentEnabled(0) && !swiz.DestComponentEnabled(1)) {
374 return; // NoOp
375 }
376
377 Compile_SwizzleSrc(instr, 1, instr.common.src1, SRC1);
378
379 // Convert floats to integers (only care about X and Y components)
380 CVTPS2DQ(SRC1, R(SRC1));
381
382 // Get result
383 MOVQ_xmm(R(RAX), SRC1);
384 SHL(64, R(RAX), Imm8(4)); // Multiply by 16 to be used as an offset later
385
386 // Handle destination enable
387 if (swiz.DestComponentEnabled(0) && swiz.DestComponentEnabled(1)) {
388 MOV(64, R(ADDROFFS_REG), R(RAX)); // Overwrite both
389 } else {
390 if (swiz.DestComponentEnabled(0)) {
391 // Preserve Y-component
392
393 // Clear low 32 bits of previous address register
394 MOV(32, R(RBX), R(ADDROFFS_REG));
395 XOR(64, R(ADDROFFS_REG), R(RBX));
396
397 // Clear high 32-bits of new address register
398 MOV(32, R(RAX), R(RAX));
399 } else if (swiz.DestComponentEnabled(1)) {
400 // Preserve X-component
401
402 // Clear high 32-bits of previous address register
403 MOV(32, R(ADDROFFS_REG), R(ADDROFFS_REG));
404
405 // Clear low 32 bits of new address register
406 MOV(32, R(RBX), R(RAX));
407 XOR(64, R(RAX), R(RBX));
408 }
409
410 OR(64, R(ADDROFFS_REG), R(RAX)); // Combine result
411 }
412}
413
414void JitCompiler::Compile_MOV(Instruction instr) {
415 Compile_SwizzleSrc(instr, 1, instr.common.src1, SRC1);
416 Compile_DestEnable(instr, SRC1);
417}
418
419void JitCompiler::Compile_SLTI(Instruction instr) {
420 Compile_SwizzleSrc(instr, 1, instr.common.src1i, SRC1);
421 Compile_SwizzleSrc(instr, 1, instr.common.src2i, SRC2);
422
423 CMPSS(SRC1, R(SRC2), CMP_LT);
424 ANDPS(SRC1, R(ONE));
425
426 Compile_DestEnable(instr, SRC1);
427}
428
429void JitCompiler::Compile_RCP(Instruction instr) {
430 Compile_SwizzleSrc(instr, 1, instr.common.src1, SRC1);
431
432 // TODO(bunnei): RCPPS is a pretty rough approximation, this might cause problems if Pica
433 // performs this operation more accurately. This should be checked on hardware.
434 RCPPS(SRC1, R(SRC1));
435
436 Compile_DestEnable(instr, SRC1);
437}
438
439void JitCompiler::Compile_RSQ(Instruction instr) {
440 Compile_SwizzleSrc(instr, 1, instr.common.src1, SRC1);
441
442 // TODO(bunnei): RSQRTPS is a pretty rough approximation, this might cause problems if Pica
443 // performs this operation more accurately. This should be checked on hardware.
444 RSQRTPS(SRC1, R(SRC1));
445
446 Compile_DestEnable(instr, SRC1);
447}
448
449void JitCompiler::Compile_NOP(Instruction instr) {
450}
451
452void JitCompiler::Compile_END(Instruction instr) {
453 ABI_PopAllCalleeSavedRegsAndAdjustStack();
454 RET();
455 done = true;
456}
457
458void JitCompiler::Compile_CALL(Instruction instr) {
459 unsigned offset = instr.flow_control.dest_offset;
460 while (offset < (instr.flow_control.dest_offset + instr.flow_control.num_instructions)) {
461 Compile_NextInstr(&offset);
462 }
463}
464
465void JitCompiler::Compile_CALLC(Instruction instr) {
466 Compile_EvaluateCondition(instr);
467 FixupBranch b = J_CC(CC_Z, true);
468 Compile_CALL(instr);
469 SetJumpTarget(b);
470}
471
472void JitCompiler::Compile_CALLU(Instruction instr) {
473 Compile_UniformCondition(instr);
474 FixupBranch b = J_CC(CC_Z, true);
475 Compile_CALL(instr);
476 SetJumpTarget(b);
477}
478
479void JitCompiler::Compile_CMP(Instruction instr) {
480 Compile_SwizzleSrc(instr, 1, instr.common.src1, SRC1);
481 Compile_SwizzleSrc(instr, 2, instr.common.src2, SRC2);
482
483 static const u8 cmp[] = { CMP_EQ, CMP_NEQ, CMP_LT, CMP_LE, CMP_NLE, CMP_NLT };
484
485 if (instr.common.compare_op.x == instr.common.compare_op.y) {
486 // Compare X-component and Y-component together
487 CMPPS(SRC1, R(SRC2), cmp[instr.common.compare_op.x]);
488
489 MOVQ_xmm(R(COND0), SRC1);
490 MOV(64, R(COND1), R(COND0));
491 } else {
492 // Compare X-component
493 MOVAPS(SCRATCH, R(SRC1));
494 CMPSS(SCRATCH, R(SRC2), cmp[instr.common.compare_op.x]);
495
496 // Compare Y-component
497 CMPPS(SRC1, R(SRC2), cmp[instr.common.compare_op.y]);
498
499 MOVQ_xmm(R(COND0), SCRATCH);
500 MOVQ_xmm(R(COND1), SRC1);
501 }
502
503 SHR(32, R(COND0), Imm8(31));
504 SHR(64, R(COND1), Imm8(63));
505}
506
507void JitCompiler::Compile_MAD(Instruction instr) {
508 Compile_SwizzleSrc(instr, 1, instr.mad.src1, SRC1);
509
510 if (instr.opcode.Value().EffectiveOpCode() == OpCode::Id::MADI) {
511 Compile_SwizzleSrc(instr, 2, instr.mad.src2i, SRC2);
512 Compile_SwizzleSrc(instr, 3, instr.mad.src3i, SRC3);
513 } else {
514 Compile_SwizzleSrc(instr, 2, instr.mad.src2, SRC2);
515 Compile_SwizzleSrc(instr, 3, instr.mad.src3, SRC3);
516 }
517
518 if (Common::cpu_info.bFMA) {
519 VFMADD213PS(SRC1, SRC2, R(SRC3));
520 } else {
521 MULPS(SRC1, R(SRC2));
522 ADDPS(SRC1, R(SRC3));
523 }
524
525 Compile_DestEnable(instr, SRC1);
526}
527
528void JitCompiler::Compile_IF(Instruction instr) {
529 ASSERT_MSG(instr.flow_control.dest_offset > *offset_ptr, "Backwards if-statements not supported");
530
531 // Evaluate the "IF" condition
532 if (instr.opcode.Value() == OpCode::Id::IFU) {
533 Compile_UniformCondition(instr);
534 } else if (instr.opcode.Value() == OpCode::Id::IFC) {
535 Compile_EvaluateCondition(instr);
536 }
537 FixupBranch b = J_CC(CC_Z, true);
538
539 // Compile the code that corresponds to the condition evaluating as true
540 Compile_Block(instr.flow_control.dest_offset - 1);
541
542 // If there isn't an "ELSE" condition, we are done here
543 if (instr.flow_control.num_instructions == 0) {
544 SetJumpTarget(b);
545 return;
546 }
547
548 FixupBranch b2 = J(true);
549
550 SetJumpTarget(b);
551
552 // This code corresponds to the "ELSE" condition
553 // Comple the code that corresponds to the condition evaluating as false
554 Compile_Block(instr.flow_control.dest_offset + instr.flow_control.num_instructions - 1);
555
556 SetJumpTarget(b2);
557}
558
559void JitCompiler::Compile_LOOP(Instruction instr) {
560 ASSERT_MSG(instr.flow_control.dest_offset > *offset_ptr, "Backwards loops not supported");
561 ASSERT_MSG(!looping, "Nested loops not supported");
562
563 looping = true;
564
565 int offset = offsetof(decltype(g_state.vs.uniforms), i) + (instr.flow_control.int_uniform_id * sizeof(Math::Vec4<u8>));
566 MOV(32, R(LOOPCOUNT), MDisp(UNIFORMS, offset));
567 MOV(32, R(LOOPCOUNT_REG), R(LOOPCOUNT));
568 SHR(32, R(LOOPCOUNT_REG), Imm8(8));
569 AND(32, R(LOOPCOUNT_REG), Imm32(0xff)); // Y-component is the start
570 MOV(32, R(LOOPINC), R(LOOPCOUNT));
571 SHR(32, R(LOOPINC), Imm8(16));
572 MOVZX(32, 8, LOOPINC, R(LOOPINC)); // Z-component is the incrementer
573 MOVZX(32, 8, LOOPCOUNT, R(LOOPCOUNT)); // X-component is iteration count
574 ADD(32, R(LOOPCOUNT), Imm8(1)); // Iteration count is X-component + 1
575
576 auto loop_start = GetCodePtr();
577
578 Compile_Block(instr.flow_control.dest_offset);
579
580 ADD(32, R(LOOPCOUNT_REG), R(LOOPINC)); // Increment LOOPCOUNT_REG by Z-component
581 SUB(32, R(LOOPCOUNT), Imm8(1)); // Increment loop count by 1
582 J_CC(CC_NZ, loop_start); // Loop if not equal
583
584 looping = false;
585}
586
587void JitCompiler::Compile_JMP(Instruction instr) {
588 ASSERT_MSG(instr.flow_control.dest_offset > *offset_ptr, "Backwards jumps not supported");
589
590 if (instr.opcode.Value() == OpCode::Id::JMPC)
591 Compile_EvaluateCondition(instr);
592 else if (instr.opcode.Value() == OpCode::Id::JMPU)
593 Compile_UniformCondition(instr);
594 else
595 UNREACHABLE();
596
597 FixupBranch b = J_CC(CC_NZ, true);
598
599 Compile_Block(instr.flow_control.dest_offset);
600
601 SetJumpTarget(b);
602}
603
604void JitCompiler::Compile_Block(unsigned stop) {
605 // Save current offset pointer
606 unsigned* prev_offset_ptr = offset_ptr;
607 unsigned offset = *prev_offset_ptr;
608
609 while (offset <= stop)
610 Compile_NextInstr(&offset);
611
612 // Restore current offset pointer
613 offset_ptr = prev_offset_ptr;
614 *offset_ptr = offset;
615}
616
617void JitCompiler::Compile_NextInstr(unsigned* offset) {
618 offset_ptr = offset;
619
620 Instruction instr = *(Instruction*)&g_state.vs.program_code[(*offset_ptr)++];
621 OpCode::Id opcode = instr.opcode.Value();
622 auto instr_func = instr_table[static_cast<unsigned>(opcode)];
623
624 if (instr_func) {
625 // JIT the instruction!
626 ((*this).*instr_func)(instr);
627 } else {
628 // Unhandled instruction
629 LOG_CRITICAL(HW_GPU, "Unhandled instruction: 0x%02x (0x%08x)", instr.opcode.Value(), instr.hex);
630 }
631}
632
633CompiledShader* JitCompiler::Compile() {
634 const u8* start = GetCodePtr();
635 const auto& code = g_state.vs.program_code;
636 unsigned offset = g_state.regs.vs.main_offset;
637
638 ABI_PushAllCalleeSavedRegsAndAdjustStack();
639
640 MOV(PTRBITS, R(STATE), R(ABI_PARAM1));
641 MOV(PTRBITS, R(UNIFORMS), ImmPtr(&g_state.vs.uniforms));
642
643 // Zero address/loop registers
644 XOR(64, R(ADDROFFS_REG_0), R(ADDROFFS_REG_0));
645 XOR(64, R(ADDROFFS_REG_1), R(ADDROFFS_REG_1));
646 XOR(64, R(LOOPCOUNT_REG), R(LOOPCOUNT_REG));
647
648 // Used to set a register to one
649 static const __m128 one = { 1.f, 1.f, 1.f, 1.f };
650 MOV(PTRBITS, R(RAX), ImmPtr(&one));
651 MOVAPS(ONE, MDisp(RAX, 0));
652
653 // Used to negate registers
654 static const __m128 neg = { -0.f, -0.f, -0.f, -0.f };
655 MOV(PTRBITS, R(RAX), ImmPtr(&neg));
656 MOVAPS(NEGBIT, MDisp(RAX, 0));
657
658 looping = false;
659 done = false;
660 while (offset < g_state.vs.program_code.size()) {
661 Compile_NextInstr(&offset);
662 }
663
664 return (CompiledShader*)start;
665}
666
667} // namespace Shader
668
669} // namespace Pica
diff --git a/src/video_core/video_core.cpp b/src/video_core/video_core.cpp
index 3becc4261..943fde5ee 100644
--- a/src/video_core/video_core.cpp
+++ b/src/video_core/video_core.cpp
@@ -23,6 +23,7 @@ EmuWindow* g_emu_window = nullptr; ///< Frontend emulator window
23RendererBase* g_renderer = nullptr; ///< Renderer plugin 23RendererBase* g_renderer = nullptr; ///< Renderer plugin
24 24
25std::atomic<bool> g_hw_renderer_enabled; 25std::atomic<bool> g_hw_renderer_enabled;
26std::atomic<bool> g_shader_jit_enabled;
26 27
27/// Initialize the video core 28/// Initialize the video core
28void Init(EmuWindow* emu_window) { 29void Init(EmuWindow* emu_window) {
diff --git a/src/video_core/video_core.h b/src/video_core/video_core.h
index 14b33c9dd..2867bf03e 100644
--- a/src/video_core/video_core.h
+++ b/src/video_core/video_core.h
@@ -32,8 +32,9 @@ static const int kScreenBottomHeight = 240; ///< 3DS bottom screen height
32extern RendererBase* g_renderer; ///< Renderer plugin 32extern RendererBase* g_renderer; ///< Renderer plugin
33extern EmuWindow* g_emu_window; ///< Emu window 33extern EmuWindow* g_emu_window; ///< Emu window
34 34
35// TODO: Wrap this in a user settings struct along with any other graphics settings (often set from qt ui) 35// TODO: Wrap these in a user settings struct along with any other graphics settings (often set from qt ui)
36extern std::atomic<bool> g_hw_renderer_enabled; 36extern std::atomic<bool> g_hw_renderer_enabled;
37extern std::atomic<bool> g_shader_jit_enabled;
37 38
38/// Start the video core 39/// Start the video core
39void Start(); 40void Start();